diff options
Diffstat (limited to 'intl/lwbrk/jisx4051pairtable.txt')
-rw-r--r-- | intl/lwbrk/jisx4051pairtable.txt | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/intl/lwbrk/jisx4051pairtable.txt b/intl/lwbrk/jisx4051pairtable.txt new file mode 100644 index 000000000..2bae1b18f --- /dev/null +++ b/intl/lwbrk/jisx4051pairtable.txt @@ -0,0 +1,286 @@ + + + +/* + + Simplification of Pair Table in JIS X 4051 + + 1. The Origion Table - in 4.1.3 + + In JIS x 4051. The pair table is defined as below + + Class of + Leading Class of Trailing Char Class + Char + + 1 2 3 4 5 6 7 8 9 10 11 12 13 13 14 14 15 16 17 18 19 20 + * # * # + 1 X X X X X X X X X X X X X X X X X X X X X E + 2 X X X X X X + 3 X X X X X X + 4 X X X X X X + 5 X X X X X X + 6 X X X X X X + 7 X X X X X X X + 8 X X X X X X E + 9 X X X X X X + 10 X X X X X X + 11 X X X X X X + 12 X X X X X X + 13 X X X X X X X + 14 X X X X X X X + 15 X X X X X X X X X + 16 X X X X X X X X + 17 X X X X X E + 18 X X X X X X X X X + 19 X E E E E E X X X X X X X X X X X X E X E E + 20 X X X X X E + + * Same Char + # Other Char + + 2. Simplified by remove the class which we do not care + + However, since we do not care about class 13(Subscript), 14(Ruby), + 19(split line note begin quote), and 20(split line note end quote) + we can simplify this par table into the following + + Class of + Leading Class of Trailing Char Class + Char + + 1 2 3 4 5 6 7 8 9 10 11 12 15 16 17 18 + + 1 X X X X X X X X X X X X X X X X + 2 X X X X X + 3 X X X X X + 4 X X X X X + 5 X X X X X + 6 X X X X X + 7 X X X X X X + 8 X X X X X X + 9 X X X X X + 10 X X X X X + 11 X X X X X + 12 X X X X X + 15 X X X X X X X X + 16 X X X X X X X + 17 X X X X X + 18 X X X X X X X X + + 3. Simplified by merged classes + + After the 2 simplification, the pair table have some duplication + a. class 2, 3, 4, 5, 6, are the same- we can merged them + b. class 10, 11, 12, 17 are the same- we can merged them + + + Class of + Leading Class of Trailing Char Class + Char + + 1 [a] 7 8 9 [b]15 16 18 + + 1 X X X X X X X X X + [a] X + 7 X X + 8 X X + 9 X + [b] X + 15 X X X X + 16 X X X + 18 X X X X + + + 4. Now we use one bit to encode weather it is breakable, and use 2 bytes + for one row, then the bit table will look like: + + 18 <- 1 + + 1 0000 0001 1111 1111 = 0x01FF + [a] 0000 0000 0000 0010 = 0x0002 + 7 0000 0000 0000 0110 = 0x0006 + 8 0000 0000 0100 0010 = 0x0042 + 9 0000 0000 0000 0010 = 0x0002 + [b] 0000 0000 0000 0010 = 0x0042 + 15 0000 0001 0101 0010 = 0x0152 + 16 0000 0001 1000 0010 = 0x0182 + 17 0000 0001 1100 0010 = 0x01C2 + +*/ + +static uint16_t gJISx4051SimplifiedPair[9] = { + 0x01FF, 0x0002, 0x0006, 0x0042, 0x0002, 0x0042, 0x0152, 0x0182, 0x01C2 +}; + +PRBool XXXX::ClassesToPair(nsJISx4051Cls aCls1, nsJISx4051Cls aCls1) +{ + NS_ASSERTION( (aCls1 < 9) "invalid class"); + NS_ASSERTION( (aCls2 < 9) "invalid class"); + return ( 0 != (gJISx4051SimplifiedPair[aCls1] & (1L << aCls2) )); +} + + +#define X4051_IS_DIGIT(u) ((0x0030 >= (u)) && ((u) >= 0x0039)) + +nsJISx4051Cls XXXX::GetClass( + PRUnichar aChar, PRUnichar aBefore = 0, PRUnichar aAfter = 0) +{ + // take care the special case in cls 15 + if( ((0x2C == aChar) || (0x2E == aChar)) && + (X4051_IS_DIGIT(aBefore)) && X4051_IS_DIGIT(aAfter))) + { + return kJISx4051Cls_15; + } + + nsJISx4051Cls cls; + if(gSingle->Lookup(aChar, &cls)) + return cls; + + if(gRange->Lookup(aChar, &cls)) + return cls; + + return kJISx4051Cls_15; +} + + +typedef enum { + kJISx4051Cls_1 = 0, + kJISx4051Cls_2 = 1, + kJISx4051Cls_3 = 1, + kJISx4051Cls_4 = 1, + kJISx4051Cls_5 = 1, + kJISx4051Cls_6 = 1, + kJISx4051Cls_7 = 2, + kJISx4051Cls_8 = 3, + kJISx4051Cls_9 = 4, + kJISx4051Cls_10 = 5, + kJISx4051Cls_11 = 5, + kJISx4051Cls_12 = 5, + // kJISx4051Cls_13 = 0, + // kJISx4051Cls_14 = 0, + kJISx4051Cls_15 = 6, + kJISx4051Cls_16 = 7, + kJISx4051Cls_17 = 5, + kJISx4051Cls_18 = 8, + // kJISx4051Cls_19 = 0, + // kJISx4051Cls_20 = 0 +} nsJISx4051Cls; + + + // Table 2 + YYYY(kJISx4051Cls_1 , 0x0028), + YYYY(kJISx4051Cls_1 , 0x005B), + YYYY(kJISx4051Cls_1 , 0x007B), + YYYY(kJISx4051Cls_1 , 0x2018), + YYYY(kJISx4051Cls_1 , 0x201B), + YYYY(kJISx4051Cls_1 , 0x201C), + YYYY(kJISx4051Cls_1 , 0x201F), + YYYY(kJISx4051Cls_1 , 0x3008), + YYYY(kJISx4051Cls_1 , 0x300A), + YYYY(kJISx4051Cls_1 , 0x300C), + YYYY(kJISx4051Cls_1 , 0x300E), + YYYY(kJISx4051Cls_1 , 0x3010), + YYYY(kJISx4051Cls_1 , 0x3014), + YYYY(kJISx4051Cls_1 , 0x3016), + YYYY(kJISx4051Cls_1 , 0x3018), + YYYY(kJISx4051Cls_1 , 0x301A), + YYYY(kJISx4051Cls_1 , 0x301D), + + // Table 3 + YYYY(kJISx4051Cls_2 , 0x0029), + YYYY(kJISx4051Cls_2 , 0x002C), + YYYY(kJISx4051Cls_2 , 0x005D), + YYYY(kJISx4051Cls_2 , 0x007D), + YYYY(kJISx4051Cls_2 , 0x2019), + YYYY(kJISx4051Cls_2 , 0x201A), + YYYY(kJISx4051Cls_2 , 0x201D), + YYYY(kJISx4051Cls_2 , 0x201E), + YYYY(kJISx4051Cls_2 , 0x3001), + YYYY(kJISx4051Cls_2 , 0x3009), + YYYY(kJISx4051Cls_2 , 0x300B), + YYYY(kJISx4051Cls_2 , 0x300D), + YYYY(kJISx4051Cls_2 , 0x300F), + YYYY(kJISx4051Cls_2 , 0x3011), + YYYY(kJISx4051Cls_2 , 0x3015), + YYYY(kJISx4051Cls_2 , 0x3017), + YYYY(kJISx4051Cls_2 , 0x3019), + YYYY(kJISx4051Cls_2 , 0x301B), + YYYY(kJISx4051Cls_2 , 0x301E), + YYYY(kJISx4051Cls_2 , 0x301F), + + // Table 4 + YYYY(kJISx4051Cls_3 , 0x203C), + YYYY(kJISx4051Cls_3 , 0x2044), + YYYY(kJISx4051Cls_3 , 0x301C), + YYYY(kJISx4051Cls_3 , 0x3041), + YYYY(kJISx4051Cls_3 , 0x3043), + YYYY(kJISx4051Cls_3 , 0x3045), + YYYY(kJISx4051Cls_3 , 0x3047), + YYYY(kJISx4051Cls_3 , 0x3049), + YYYY(kJISx4051Cls_3 , 0x3063), + YYYY(kJISx4051Cls_3 , 0x3083), + YYYY(kJISx4051Cls_3 , 0x3085), + YYYY(kJISx4051Cls_3 , 0x3087), + YYYY(kJISx4051Cls_3 , 0x308E), + YYYY(kJISx4051Cls_3 , 0x309D), + YYYY(kJISx4051Cls_3 , 0x309E), + YYYY(kJISx4051Cls_3 , 0x30A1), + YYYY(kJISx4051Cls_3 , 0x30A3), + YYYY(kJISx4051Cls_3 , 0x30A5), + YYYY(kJISx4051Cls_3 , 0x30A7), + YYYY(kJISx4051Cls_3 , 0x30A9), + YYYY(kJISx4051Cls_3 , 0x30C3), + YYYY(kJISx4051Cls_3 , 0x30E3), + YYYY(kJISx4051Cls_3 , 0x30E5), + YYYY(kJISx4051Cls_3 , 0x30E7), + YYYY(kJISx4051Cls_3 , 0x30EE), + YYYY(kJISx4051Cls_3 , 0x30F5), + YYYY(kJISx4051Cls_3 , 0x30F6), + YYYY(kJISx4051Cls_3 , 0x30FC), + YYYY(kJISx4051Cls_3 , 0x30FD), + YYYY(kJISx4051Cls_3 , 0x30FE), + + // Table 5 + YYYY(kJISx4051Cls_4 , 0x0021), + YYYY(kJISx4051Cls_4 , 0x003F), + + // Table 6 + YYYY(kJISx4051Cls_5 , 0x003A), + YYYY(kJISx4051Cls_5 , 0x003B), + YYYY(kJISx4051Cls_5 , 0x30FB), + + // Table 7 + YYYY(kJISx4051Cls_6 , 0x002E), + YYYY(kJISx4051Cls_6 , 0x3002), + + // Table 8 + YYYY(kJISx4051Cls_7 , 0x2014), + YYYY(kJISx4051Cls_7 , 0x2024), + YYYY(kJISx4051Cls_7 , 0x2025), + YYYY(kJISx4051Cls_7 , 0x2026), + + // Table 9 + YYYY(kJISx4051Cls_8 , 0x0024), + YYYY(kJISx4051Cls_8 , 0x00A3), + YYYY(kJISx4051Cls_8 , 0x00A5), + YYYY(kJISx4051Cls_8 , 0x2116), + + // Table 10 + YYYY(kJISx4051Cls_9 , 0x0025), + YYYY(kJISx4051Cls_9 , 0x00A2), + YYYY(kJISx4051Cls_9 , 0x00B0), + YYYY(kJISx4051Cls_9 , 0x2030), + YYYY(kJISx4051Cls_9 , 0x2031), + YYYY(kJISx4051Cls_9 , 0x2032), + YYYY(kJISx4051Cls_9 , 0x2033), + + // Table 1 + YYYY(kJISx4051Cls_10, 0x3000), + + // Table 1 + ZZZZ(kJISx4051Cls_11, 0x3000), + + + + |