diff options
Diffstat (limited to 'intl/unicharutil/tests/genNormalizationData.pl')
-rw-r--r-- | intl/unicharutil/tests/genNormalizationData.pl | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/intl/unicharutil/tests/genNormalizationData.pl b/intl/unicharutil/tests/genNormalizationData.pl new file mode 100644 index 000000000..816ab94e7 --- /dev/null +++ b/intl/unicharutil/tests/genNormalizationData.pl @@ -0,0 +1,93 @@ +#!/usr/bin/perl +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +open ( TEXTFILE , "< NormalizationTest.txt") + || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n"; + +open ( OUT , "> NormalizationData.h") +#open ( OUT , "> test.txt") + || die "Cannot create output file NormalizationData.h\n"; + +$mpl = <<END_OF_MPL; +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* + DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY + mozilla/intl/unicharutil/tools/genNormalizationData.pl + */ +END_OF_MPL + +print OUT $mpl; + +# XXX This code assumes that wchar_t is 16-bit unsigned, which is currently +# true on Windows, Linux and Mac (with |g++ -fshort-wchar|). +# To make it work where that assumption doesn't hold, one could generate +# one huge array containing all the strings as 16-bit units (including +# the 0 terminator) and initialize the array of testcaseLine with pointers +# into the huge array. + +while(<TEXTFILE>) { + chop; + if (/^# NormalizationTest-(.+)\.txt/) { + print OUT "static char versionText[] = \"$1\";\n"; + } elsif (/^\@Part(.)/) { + if ($1 != "0") { + print OUT " {\n"; + print OUT " L\"\",\n"; + print OUT " L\"\",\n"; + print OUT " L\"\",\n"; + print OUT " L\"\",\n"; + print OUT " L\"\",\n"; + print OUT " \"\",\n"; + print OUT " },\n"; + print OUT "};\n"; + } + print OUT "\n"; + print OUT "static testcaseLine Part$1TestData[] = \n"; + print OUT "{\n"; + } else { + unless (/^\#/) { + @cases = split(/;/ , $_); + print OUT " {\n"; + for ($case = 0; $case < 5; ++$case) { + $c = $cases[$case]; + print OUT " L\""; + @codepoints = split(/ / , $c); + foreach (@codepoints) { + $cp = hex($_); + if ($cp < 0x10000) { + # BMP codepoint + printf OUT "\\x%04X", $cp; + } else { + # non-BMP codepoint, convert to surrogate pair + printf OUT "\\x%04X\\x%04X", + ($cp >> 10) + 0xD7C0, + ($cp & 0x03FF) | 0xDC00; + } + } + print OUT "\",\n"; + } + $description = $cases[10]; + $description =~ s/^ \) //; + print OUT " \"$description\"\n"; + print OUT " },\n"; + } + } +} + +print OUT " {\n"; +print OUT " L\"\",\n"; +print OUT " L\"\",\n"; +print OUT " L\"\",\n"; +print OUT " L\"\",\n"; +print OUT " L\"\",\n"; +print OUT " \"\",\n"; +print OUT " },\n"; +print OUT "};\n"; +close (OUT); +close (TEXTFILE); |