diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /intl/chardet/tools/charfreqtostat.pl | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'intl/chardet/tools/charfreqtostat.pl')
-rw-r--r-- | intl/chardet/tools/charfreqtostat.pl | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/intl/chardet/tools/charfreqtostat.pl b/intl/chardet/tools/charfreqtostat.pl new file mode 100644 index 000000000..04af0c82c --- /dev/null +++ b/intl/chardet/tools/charfreqtostat.pl @@ -0,0 +1,95 @@ +#!/usr/bin/perl +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +sub GenNPL { + my($ret) = << "END_NPL"; +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +END_NPL + + return $ret; +} + +print GenNPL(); +$total=0; +@h; +@l; + +while(<STDIN>) +{ + @k = split(/\s+/, $_); + @i = unpack("CCCC", $k[0]); +# printf("%x %x %s",$i[0] , $i[1] , "[" . $k[0] . "] " . $i . " " . $j . " " . $k[1] ."\n"); + if((0xA1 <= $i[0]) && (0xA1 <= $i[1])){ + $total += $k[1]; + $v = $i[0] - 0x00A1; + $h[$v] += $k[1]; + $u = $i[1] - 0x00A1; + $l[$u] += $k[1]; +# print "hello $v $h[$v] $u $l[$u]\n"; + } +} + + +$ffh = 0.0; +$ffl = 0.0; +for($i=0x00A1;$i< 0x00FF ; $i++) +{ + $fh[$i - 0x00a1] = $h[$i- 0x00a1] / $total; + $ffh += $fh[$i - 0x00a1]; + + $fl[$i - 0x00a1] = $l[$i- 0x00a1] / $total; + $ffl += $fl[$i - 0x00a1]; +} +$mh = $ffh / 94.0; +$ml = $ffl / 94.0; + +$sumh=0.0; +$suml=0.0; +for($i=0x00A1;$i< 0x00FF ; $i++) +{ + $sh = $fh[$i - 0x00a1] - $mh; + $sh *= $sh; + $sumh += $sh; + + $sl = $fl[$i - 0x00a1] - $ml; + $sl *= $sl; + $suml += $sl; +} +$sumh /= 94.0; +$suml /= 94.0; +$stdh = sqrt($sumh); +$stdl = sqrt($suml); + +print "{\n"; +print " {\n"; +for($i=0x00A1;$i< 0x00FF ; $i++) +{ + if($i eq 0xfe) { + printf(" %.6ff \/\/ FreqH[%2x]\n", $fh[$i - 0x00a1] , $i); + } else { + printf(" %.6ff, \/\/ FreqH[%2x]\n", $fh[$i - 0x00a1] , $i); + } +} +print " },\n"; +printf ("%.6ff, \/\/ Lead Byte StdDev\n", $stdh); +printf ("%.6ff, \/\/ Lead Byte Mean\n", $mh); +printf ("%.6ff, \/\/ Lead Byte Weight\n", $stdh / ($stdh + $stdl)); +print " {\n"; +for($i=0x00A1;$i< 0x00FF ; $i++) +{ + if($i eq 0xfe) { + printf(" %.6ff \/\/ FreqL[%2x]\n", $fl[$i - 0x00a1] , $i); + } else { + printf(" %.6ff, \/\/ FreqL[%2x]\n", $fl[$i - 0x00a1] , $i); + } +} +print " },\n"; +printf ("%.6ff, \/\/ Trail Byte StdDev\n", $stdl); +printf ("%.6ff, \/\/ Trail Byte Mean\n", $ml); +printf ("%.6ff \/\/ Trial Byte Weight\n", $stdl / ($stdh + $stdl)); +print "};\n"; |