summaryrefslogtreecommitdiffstats
path: root/intl/lwbrk/tools
diff options
context:
space:
mode:
Diffstat (limited to 'intl/lwbrk/tools')
-rw-r--r--intl/lwbrk/tools/anzx4051.html669
-rw-r--r--intl/lwbrk/tools/anzx4051.pl356
-rw-r--r--intl/lwbrk/tools/jisx4051class.txt159
-rw-r--r--intl/lwbrk/tools/jisx4051simp.txt24
-rw-r--r--intl/lwbrk/tools/spec_table.html127
5 files changed, 1335 insertions, 0 deletions
diff --git a/intl/lwbrk/tools/anzx4051.html b/intl/lwbrk/tools/anzx4051.html
new file mode 100644
index 000000000..d894ce811
--- /dev/null
+++ b/intl/lwbrk/tools/anzx4051.html
@@ -0,0 +1,669 @@
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+ - License, v. 2.0. If a copy of the MPL was not distributed with this
+ - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<HTML>
+<HEAD>
+<TITLE>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</TITLE>
+</HEAD>
+<BODY>
+<H1>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</H1>
+<TABLE BORDER=3>
+<TR BGCOLOR=blue><TH><TH>
+<TD BGCOLOR=red>C</TD>
+<TD BGCOLOR=red>L</TD>
+<TD BGCOLOR=red>M</TD>
+<TD BGCOLOR=red>N</TD>
+<TD BGCOLOR=red>P</TD>
+<TD BGCOLOR=red>S</TD>
+<TD BGCOLOR=red>Z</TD>
+<TD BGCOLOR=white>Total</TD>
+<TD BGCOLOR=yellow>Cc</TD>
+<TD BGCOLOR=yellow>Cf</TD>
+<TD BGCOLOR=yellow>Co</TD>
+<TD BGCOLOR=yellow>Cs</TD>
+<TD BGCOLOR=yellow>Ll</TD>
+<TD BGCOLOR=yellow>Lm</TD>
+<TD BGCOLOR=yellow>Lo</TD>
+<TD BGCOLOR=yellow>Lt</TD>
+<TD BGCOLOR=yellow>Lu</TD>
+<TD BGCOLOR=yellow>Mc</TD>
+<TD BGCOLOR=yellow>Me</TD>
+<TD BGCOLOR=yellow>Mn</TD>
+<TD BGCOLOR=yellow>Nd</TD>
+<TD BGCOLOR=yellow>Nl</TD>
+<TD BGCOLOR=yellow>No</TD>
+<TD BGCOLOR=yellow>Pc</TD>
+<TD BGCOLOR=yellow>Pd</TD>
+<TD BGCOLOR=yellow>Pe</TD>
+<TD BGCOLOR=yellow>Pf</TD>
+<TD BGCOLOR=yellow>Pi</TD>
+<TD BGCOLOR=yellow>Po</TD>
+<TD BGCOLOR=yellow>Ps</TD>
+<TD BGCOLOR=yellow>Sc</TD>
+<TD BGCOLOR=yellow>Sk</TD>
+<TD BGCOLOR=yellow>Sm</TD>
+<TD BGCOLOR=yellow>So</TD>
+<TD BGCOLOR=yellow>Zl</TD>
+<TD BGCOLOR=yellow>Zp</TD>
+<TD BGCOLOR=yellow>Zs</TD>
+</TR>
+<TR><TH>00_1<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>14</TD>
+<TD>1</TD>
+<TD></TD>
+<TD BGCOLOR=white>15</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD>2</TD>
+<TD>11</TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>01_[a]<TH>
+<TD></TD>
+<TD>32</TD>
+<TD>2</TD>
+<TD></TD>
+<TD>28</TD>
+<TD>3</TD>
+<TD></TD>
+<TD BGCOLOR=white>65</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>8</TD>
+<TD>24</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD>12</TD>
+<TD>1</TD>
+<TD></TD>
+<TD>14</TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>02_7<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD BGCOLOR=white>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>03_8<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD BGCOLOR=white>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>04_9<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>5</TD>
+<TD></TD>
+<TD></TD>
+<TD BGCOLOR=white>5</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>5</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>05_[b]<TH>
+<TD>33</TD>
+<TD>153</TD>
+<TD></TD>
+<TD>33</TD>
+<TD>2</TD>
+<TD>5</TD>
+<TD>13</TD>
+<TD BGCOLOR=white>239</TD>
+<TD>32</TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>153</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>33</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>5</TD>
+<TD></TD>
+<TD></TD>
+<TD>13</TD>
+</TR>
+<TR><TH>06_15<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>30</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD BGCOLOR=white>30</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>30</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>07_18<TH>
+<TD>18</TD>
+<TD>157</TD>
+<TD></TD>
+<TD>33</TD>
+<TD>56</TD>
+<TD>125</TD>
+<TD>2</TD>
+<TD BGCOLOR=white>391</TD>
+<TD></TD>
+<TD>18</TD>
+<TD></TD>
+<TD></TD>
+<TD>64</TD>
+<TD>7</TD>
+<TD>5</TD>
+<TD></TD>
+<TD>81</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+<TD>30</TD>
+<TD>4</TD>
+<TD>5</TD>
+<TD>2</TD>
+<TD></TD>
+<TD>5</TD>
+<TD>36</TD>
+<TD>4</TD>
+<TD></TD>
+<TD>3</TD>
+<TD>24</TD>
+<TD>98</TD>
+<TD>1</TD>
+<TD>1</TD>
+<TD></TD>
+</TR>
+<TR><TH>08_COMPLEX<TH>
+<TD></TD>
+<TD>54</TD>
+<TD>33</TD>
+<TD>20</TD>
+<TD>2</TD>
+<TD>1</TD>
+<TD></TD>
+<TD BGCOLOR=white>110</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD>53</TD>
+<TD></TD>
+<TD></TD>
+<TD>11</TD>
+<TD></TD>
+<TD>22</TD>
+<TD>10</TD>
+<TD></TD>
+<TD>10</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>2</TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>09_[c]<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+<TD>4</TD>
+<TD></TD>
+<TD BGCOLOR=white>7</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+<TD>2</TD>
+<TD></TD>
+<TD>2</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>0A_[d]<TH>
+<TD>1</TD>
+<TD>2</TD>
+<TD></TD>
+<TD>6</TD>
+<TD>28</TD>
+<TD>14</TD>
+<TD></TD>
+<TD BGCOLOR=white>51</TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>6</TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+<TD>3</TD>
+<TD></TD>
+<TD>22</TD>
+<TD></TD>
+<TD>2</TD>
+<TD>3</TD>
+<TD>7</TD>
+<TD>2</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>0B_[e]<TH>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD>1</TD>
+<TD>3</TD>
+<TD BGCOLOR=white>6</TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>3</TD>
+</TR>
+<TR><TH>X<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD BGCOLOR=white>0</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+</TABLE>
+<TABLE BORDER=3>
+<TR BGCOLOR=blue><TH><TH>
+<TD BGCOLOR=red>00_1</TD>
+<TD BGCOLOR=red>01_[a]</TD>
+<TD BGCOLOR=red>02_7</TD>
+<TD BGCOLOR=red>03_8</TD>
+<TD BGCOLOR=red>04_9</TD>
+<TD BGCOLOR=red>05_[b]</TD>
+<TD BGCOLOR=red>06_15</TD>
+<TD BGCOLOR=red>07_18</TD>
+<TD BGCOLOR=red>08_COMPLEX</TD>
+<TD BGCOLOR=red>09_[c]</TD>
+<TD BGCOLOR=red>0A_[d]</TD>
+<TD BGCOLOR=red>0B_[e]</TD>
+<TD BGCOLOR=red>X</TD>
+</TR>
+<TR><TH>00<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>33</TD>
+<TD>10</TD>
+<TD>127</TD>
+<TD></TD>
+<TD>7</TD>
+<TD>44</TD>
+<TD>2</TD>
+<TD></TD>
+</TR>
+<TR><TH>0E<TH>
+<TD>1</TD>
+<TD>6</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>20</TD>
+<TD>1</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>17<TH>
+<TD>2</TD>
+<TD>4</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>110</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>20<TH>
+<TD>2</TD>
+<TD>8</TD>
+<TD>1</TD>
+<TD></TD>
+<TD>5</TD>
+<TD>13</TD>
+<TD></TD>
+<TD>100</TD>
+<TD></TD>
+<TD></TD>
+<TD>7</TD>
+<TD>4</TD>
+<TD></TD>
+</TR>
+<TR><TH>21<TH>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>1</TD>
+<TD></TD>
+<TD>32</TD>
+<TD></TD>
+<TD>163</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR><TH>30<TH>
+<TD>10</TD>
+<TD>47</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD>161</TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+</TABLE>
diff --git a/intl/lwbrk/tools/anzx4051.pl b/intl/lwbrk/tools/anzx4051.pl
new file mode 100644
index 000000000..b13315b38
--- /dev/null
+++ b/intl/lwbrk/tools/anzx4051.pl
@@ -0,0 +1,356 @@
+#!/usr/bin/perl
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+######################################################################
+#
+# Initial global variable
+#
+######################################################################
+%utot = ();
+$ui=0;
+$li=0;
+
+######################################################################
+#
+# Open the unicode database file
+#
+######################################################################
+open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt")
+ || die "cannot find UnicodeData-Latest.txt";
+
+######################################################################
+#
+# Open the JIS X 4051 Class file
+#
+######################################################################
+open ( CLASS , "< jisx4051class.txt")
+ || die "cannot find jisx4051class.txt";
+
+######################################################################
+#
+# Open the JIS X 4051 Class simplified mapping
+#
+######################################################################
+open ( SIMP , "< jisx4051simp.txt")
+ || die "cannot find jisx4051simp.txt";
+
+######################################################################
+#
+# Open the output file
+#
+######################################################################
+open ( OUT , "> anzx4051.html")
+ || die "cannot open output anzx4051.html file";
+
+######################################################################
+#
+# Open the output file
+#
+######################################################################
+open ( HEADER , "> ../src/jisx4051class.h")
+ || die "cannot open output ../src/jisx4051class.h file";
+
+######################################################################
+#
+# Generate license and header
+#
+######################################################################
+$hthmlheader = <<END_OF_HTML;
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+ - License, v. 2.0. If a copy of the MPL was not distributed with this
+ - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<HTML>
+<HEAD>
+<TITLE>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</TITLE>
+</HEAD>
+<BODY>
+<H1>
+Analysis of JIS X 4051 to Unicode General Category Mapping
+</H1>
+END_OF_HTML
+print OUT $hthmlheader;
+
+######################################################################
+#
+# Generate license and header
+#
+######################################################################
+$npl = <<END_OF_NPL;
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/*
+ DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
+ mozilla/intl/lwbrk/tools/anzx4051.pl
+ */
+END_OF_NPL
+print HEADER $npl;
+
+%occ = ();
+%gcat = ();
+%dcat = ();
+%simp = ();
+%gcount = ();
+%dcount = ();
+%sccount = ();
+%rangecount = ();
+
+######################################################################
+#
+# Process the file line by line
+#
+######################################################################
+while(<UNICODATA>) {
+ chop;
+ ######################################################################
+ #
+ # Get value from fields
+ #
+ ######################################################################
+ @f = split(/;/ , $_);
+ $c = $f[0]; # The unicode value
+ $g = $f[2];
+ $d = substr($g, 0, 1);
+
+ $gcat{$c} = $g;
+ $dcat{$c} = $d;
+ $gcount{$g}++;
+ $dcount{$d}++;
+}
+close(UNIDATA);
+
+while(<SIMP>) {
+ chop;
+ ######################################################################
+ #
+ # Get value from fields
+ #
+ ######################################################################
+ @f = split(/;/ , $_);
+
+ $simp{$f[0]} = $f[1];
+ $sccount{$f[1]}++;
+}
+close(SIMP);
+
+sub GetClass{
+ my ($u) = @_;
+ my $hex = DecToHex($u);
+ $g = $gcat{$hex};
+ if($g ne "") {
+ return $g;
+ } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
+ return "Han";
+ } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
+ return "Lo";
+ } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
+ return "Cs";
+ } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
+ return "Cs";
+ } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
+ return "Cs";
+ } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
+ return "Co";
+ } else {
+ printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex;
+ }
+}
+sub GetDClass{
+ my ($u) = @_;
+ my $hex = DecToHex($u);
+ $g = $dcat{$hex};
+ if($g ne "") {
+ return $g;
+ } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
+ return "Han";
+ } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
+ return "L";
+ } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
+ return "C";
+ } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
+ return "C";
+ } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
+ return "C";
+ } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
+ return "C";
+ } else {
+ printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex;
+ }
+}
+sub DecToHex{
+ my ($d) = @_;
+ return sprintf("%04X", $d);
+}
+%gtotal = ();
+%dtotal = ();
+while(<CLASS>) {
+ chop;
+ ######################################################################
+ #
+ # Get value from fields
+ #
+ ######################################################################
+ @f = split(/;/ , $_);
+
+ if( substr($f[2], 0, 1) ne "a")
+ {
+ $sc = $simp{$f[2]};
+ $l = hex($f[0]);
+ if($f[1] eq "")
+ {
+ $h = $l;
+ } else {
+ $h = hex($f[1]);
+ }
+ for($k = $l; $k <= $h ; $k++)
+ {
+ if( exists($occ{$k}))
+ {
+ # printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n",
+ # DecToHex($k), $occ{$k} , $f[2] , $sc;
+ }
+ else
+ {
+ $occ{$k} = $sc . " | " . $f[2];
+ $gclass = GetClass($k);
+ $dclass = GetDClass($k);
+ $gtotal{$sc . $gclass}++;
+ $dtotal{$sc . $dclass}++;
+ $u = DecToHex($k);
+ $rk = " " . substr($u,0,2) . ":" . $sc;
+ $rangecount{$rk}++;
+ }
+ }
+ }
+}
+
+#print %gtotal;
+#print %dtotal;
+
+sub printreport
+{
+ print OUT "<TABLE BORDER=3>\n";
+ print OUT "<TR BGCOLOR=blue><TH><TH>\n";
+
+ foreach $d (sort(keys %dcount)) {
+ print OUT "<TD BGCOLOR=red>$d</TD>\n";
+ }
+
+ print OUT "<TD BGCOLOR=white>Total</TD>\n";
+ foreach $g (sort(keys %gcount)) {
+ print OUT "<TD BGCOLOR=yellow>$g</TD>\n";
+ }
+ print OUT "</TR>\n";
+ foreach $sc (sort(keys %sccount)) {
+
+ print OUT "<TR><TH>$sc<TH>\n";
+
+ $total = 0;
+ foreach $d (sort (keys %dcount)) {
+ $count = $dtotal{$sc . $d};
+ $total += $count;
+ print OUT "<TD>$count</TD>\n";
+ }
+
+ print OUT "<TD BGCOLOR=white>$total</TD>\n";
+
+ foreach $g (sort(keys %gcount)) {
+ $count = $gtotal{$sc . $g};
+ print OUT "<TD>$count</TD>\n";
+ }
+
+
+ print OUT "</TR>\n";
+ }
+ print OUT "</TABLE>\n";
+
+
+ print OUT "<TABLE BORDER=3>\n";
+ print OUT "<TR BGCOLOR=blue><TH><TH>\n";
+
+ foreach $sc (sort(keys %sccount))
+ {
+ print OUT "<TD BGCOLOR=red>$sc</TD>\n";
+ }
+
+ print OUT "</TR>\n";
+
+
+ for($rr = 0; $rr < 0x4f; $rr++)
+ {
+ $empty = 0;
+ $r = sprintf("%02X" , $rr) ;
+ $tmp = "<TR><TH>" . $r . "<TH>\n";
+
+ foreach $sc (sort(keys %sccount)) {
+ $count = $rangecount{ " " .$r . ":" .$sc};
+ $tmp .= sprintf("<TD>%s</TD>\n", $count);
+ $empty += $count;
+ }
+
+ $tmp .= "</TR>\n";
+
+ if($empty ne 0)
+ {
+ print OUT $tmp;
+ }
+ }
+ print OUT "</TABLE>\n";
+
+}
+printreport();
+
+sub printarray
+{
+ my($r, $def) = @_;
+printf "[%s || %s]\n", $r, $def;
+ $k = hex($r) * 256;
+ printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r;
+ for($i = 0 ; $i < 256; $i+= 8)
+ {
+ for($j = 7 ; $j >= 0; $j-- )
+ {
+ $v = $k + $i + $j;
+ if( exists($occ{$v}))
+ {
+ $p = substr($occ{$v}, 1,1);
+ } else {
+ $p = $def;
+ }
+
+ if($j eq 7 )
+ {
+ printf HEADER "0x%s" , $p;
+ } else {
+ printf HEADER "%s", $p ;
+ }
+ }
+ printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7);
+ }
+ print HEADER "};\n\n";
+}
+printarray("00", "7");
+printarray("20", "7");
+printarray("21", "7");
+printarray("30", "5");
+printarray("0E", "8");
+printarray("17", "7");
+
+#print %rangecount;
+
+######################################################################
+#
+# Close files
+#
+######################################################################
+close(HEADER);
+close(CLASS);
+close(OUT);
+
diff --git a/intl/lwbrk/tools/jisx4051class.txt b/intl/lwbrk/tools/jisx4051class.txt
new file mode 100644
index 000000000..5b26b7267
--- /dev/null
+++ b/intl/lwbrk/tools/jisx4051class.txt
@@ -0,0 +1,159 @@
+0000;001f;17
+0020;;17
+0024;;24
+0027;;18
+0028;;22
+002D;;18
+002F;;18
+0021;002F;23
+0030;0039;15
+003C;;22
+003A;003F;23
+0040;;18
+0041;005A;18
+005B;;22
+005E;;18
+005F;;18
+005B;005F;23
+0060;;18
+0061;007A;18
+007B;;22
+007B;007E;23
+00A0;;24
+00A3;;22
+00A5;;22
+00A9;;18
+00AA;;18
+00AB;;18
+00AC;;22
+00AE;;18
+00AF;;18
+00A1;00BF;23
+00B0;;18
+00F7;;23
+00C0;00FF;18
+0E3F;;1
+0E2F;;4
+0E46;;4
+0E5A;0E5B;4
+0E50;0E59;15
+0E4F;;18
+0EAF;;4
+0EC6;;4
+0ED0;0ED9;15
+1735;1736;1
+17D4;17D5;4
+17D8;;4
+17DA;;4
+1780;17DD;21
+17E0;17E9;21
+17F0;17F9;21
+2007;;24
+2000;200B;17
+200C;200F;18
+2010;;18
+2011;;24
+2012;2013;18
+2014;;7
+2015;;18
+2016;2017;18
+2019;;23
+201D;;23
+2018;201F;18
+2020;2023;18
+2024;2026;23
+2027;;23
+2028;202E;18
+202F;;24
+2030;2034;9
+2035;2038;18
+2039;;1
+203A;;2
+203B;;12
+203C;203D;3
+203E;;23
+203F;2043;18
+2044;;3
+2045;;1
+2046;;2
+2047;2049;3
+204A;205E;18
+205F;;17
+2060;;24
+2061;2063;18
+206A;206F;18
+2070;2071;18
+2074;208E;18
+2090;2094;18
+2116;;8
+2160;217F;12
+2190;21EA;a12
+2126;;18
+2100;2138;18
+2153;2182;18
+2190;21EA;18
+3008;;1
+300A;;1
+300C;;1
+300E;;1
+3010;;1
+3014;;1
+3016;;1
+3018;;1
+301A;;1
+301D;;1
+3001;;2
+3009;;2
+300B;;2
+300D;;2
+300F;;2
+3011;;2
+3015;;2
+3017;;2
+3019;;2
+301B;;2
+301E;;2
+301F;;2
+3005;;3
+301C;;3
+3041;;3
+3043;;3
+3045;;3
+3047;;3
+3049;;3
+3063;;3
+3083;;3
+3085;;3
+3087;;3
+308E;;3
+309D;;3
+309E;;3
+30A1;;3
+30A3;;3
+30A5;;3
+30A7;;3
+30A9;;3
+30C3;;3
+30E3;;3
+30E5;;3
+30E7;;3
+30EE;;3
+30F5;;3
+30F6;;3
+30FC;;3
+30FD;;3
+30FE;;3
+30FB;;5
+3002;;6
+3000;;10
+3042;3094;11
+3099;309E;3
+3003;;12
+3004;;12
+3006;;12
+3007;;12
+3012;;12
+3013;;12
+3020;;12
+3036;;12
+30A2;30FA;12
diff --git a/intl/lwbrk/tools/jisx4051simp.txt b/intl/lwbrk/tools/jisx4051simp.txt
new file mode 100644
index 000000000..e12a7fd80
--- /dev/null
+++ b/intl/lwbrk/tools/jisx4051simp.txt
@@ -0,0 +1,24 @@
+1;00_1
+2;01_[a]
+3;01_[a]
+4;01_[a]
+5;01_[a]
+6;01_[a]
+7;02_7
+8;03_8
+9;04_9
+10;05_[b]
+11;05_[b]
+12;05_[b]
+13;X
+14;X
+15;06_15
+16;X
+17;05_[b]
+18;07_18
+19;X
+20;X
+21;08_COMPLEX
+22;09_[c]
+23;0A_[d]
+24;0B_[e]
diff --git a/intl/lwbrk/tools/spec_table.html b/intl/lwbrk/tools/spec_table.html
new file mode 100644
index 000000000..519f98c53
--- /dev/null
+++ b/intl/lwbrk/tools/spec_table.html
@@ -0,0 +1,127 @@
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+ - License, v. 2.0. If a copy of the MPL was not distributed with this
+ - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title></title>
+<style type="text/css">
+table {
+ border: solid 1px;
+ border-collapse: collapse;
+}
+tbody, tfoot {
+ border-top: solid 2px;
+}
+td, th {
+ border: solid 1px;
+}
+td {
+ text-align: center;
+}
+</style>
+</head>
+<body>
+<p>This is a specification table for line breaking.</p>
+<p>The values of IE7 and Opera9: 'A' means that the line is breakable After the character, and 'B' means Before. 'BA' means Before and After.</p>
+<p>(C) which is the tail of the IE7 and the Opera9 means Character. (N) means Numeric.
+This means that they are around the character at testing. E.g., "a$a" is a testcase for (C), "0$0" is a testcase for (N).</p>
+<p>Gecko is not breaking the lines on most western language context. But for file paths, URLs and very long word which is connected hyphens,
+some characters might be breakable. They are 'breakable' in the table. However, they are not always breakable,
+they <em>depend on the context</em> in the word.</p>
+<table border="1">
+<thead>
+<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
+</thead>
+<tfoot>
+<tr><th colspan="2">character</th><th>Gecko</th><th>IE7(C)</th><th>IE7(N)</th><th>Opera9.2(C)</th><th>Opera9.2(N)</th></tr>
+</tfoot>
+<tbody>
+<tr><th>0x21</th><th>&#x21;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x22</th><th>&#x22;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x23</th><th>&#x23;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x24</th><th>&#x24;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
+<tr><th>0x25</th><th>&#x25;</th><td>breakable</td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x26</th><th>&#x26;</th><td>breakable</td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x27</th><th>&#x27;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x28</th><th>&#x28;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
+<tr><th>0x29</th><th>&#x29;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x2A</th><th>&#x2A;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x2B</th><th>&#x2B;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x2C</th><th>&#x2C;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x2D</th><th>&#x2D;</th><td>breakable</td><td>BA</td><td>BA</td><td>A</td><td>A</td></tr>
+<tr><th>0x2E</th><th>&#x2E;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x2F</th><th>&#x2F;</th><td>breakable</td><td></td><td></td><td>A</td><td>A</td></tr>
+</tbody>
+<tbody>
+<tr><th>0x3A</th><th>&#x3A;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3B</th><th>&#x3B;</th><td>breakable</td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3C</th><th>&#x3C;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3D</th><th>&#x3D;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3E</th><th>&#x3E;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x3F</th><th>&#x3F;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0x40</th><th>&#x40;</th><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0x5B</th><th>&#x5B;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
+<tr><th>0x5C</th><th>&#x5C;</th><td>breakable</td><td></td><td>B</td><td></td><td></td></tr>
+<tr><th>0x5D</th><th>&#x5D;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x5E</th><th>&#x5E;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0x5F</th><th>&#x5F;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0x60</th><th>&#x60;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0x7B</th><th>&#x7B;</th><td></td><td>B</td><td>B</td><td></td><td></td></tr>
+<tr><th>0x7C</th><th>&#x7C;</th><td></td><td></td><td></td><td>A</td><td>A</td></tr>
+<tr><th>0x7D</th><th>&#x7D;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0x7E</th><th>&#x7E;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0xA1</th><th>&#xA1;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA2</th><th>&#xA2;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0xA3</th><th>&#xA3;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
+<tr><th>0xA4</th><th>&#xA4;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA5</th><th>&#xA5;</th><td></td><td></td><td>B</td><td></td><td></td></tr>
+<tr><th>0xA6</th><th>&#xA6;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA7</th><th>&#xA7;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA8</th><th>&#xA8;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xA9</th><th>&#xA9;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAA</th><th>&#xAA;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAB</th><th>&#xAB;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAC</th><th>&#xAC;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAE</th><th>&#xAE;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xAF</th><th>&#xAF;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0xB0</th><th>&#xB0;</th><td></td><td>A</td><td>A</td><td></td><td></td></tr>
+<tr><th>0xB1</th><th>&#xB1;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB2</th><th>&#xB2;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB3</th><th>&#xB3;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB4</th><th>&#xB4;</th><td></td><td></td><td></td><td>B</td><td>B</td></tr>
+<tr><th>0xB5</th><th>&#xB5;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB6</th><th>&#xB6;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB7</th><th>&#xB7;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB8</th><th>&#xB8;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xB9</th><th>&#xB9;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBA</th><th>&#xBA;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBB</th><th>&#xBB;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBC</th><th>&#xBC;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBD</th><th>&#xBD;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBE</th><th>&#xBE;</th><td></td><td></td><td></td><td></td><td></td></tr>
+<tr><th>0xBF</th><th>&#xBF;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0xD7</th><th>&#xD7;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+<tbody>
+<tr><th>0xF7</th><th>&#xF7;</th><td></td><td></td><td></td><td></td><td></td></tr>
+</tbody>
+</table>
+</body>
+</html>