5 files changed, 401 insertions, 0 deletions
diff --git a/intl/unicharutil/tests/NormalizationData.h b/intl/unicharutil/tests/NormalizationData.h
new file mode 100644
index 000000000..c306d6acb
--- /dev/null
+++ b/intl/unicharutil/tests/NormalizationData.h
@@ -0,0 +1,12 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* dummy test data will be overwritten by generator */
+static char versionText[] = "\0";
+static testcaseLine Part0TestData[1];
+static testcaseLine Part1TestData[1];
+static testcaseLine Part2TestData[1];
+static testcaseLine Part3TestData[1];
+
diff --git a/intl/unicharutil/tests/NormalizationTest.cpp b/intl/unicharutil/tests/NormalizationTest.cpp
new file mode 100644
index 000000000..aaf775a8f
--- /dev/null
+++ b/intl/unicharutil/tests/NormalizationTest.cpp
@@ -0,0 +1,282 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include "gtest/gtest.h"
+#include "nsXPCOM.h"
+#include "nsIUnicodeNormalizer.h"
+#include "nsString.h"
+#include "nsCharTraits.h"
+#include "nsServiceManagerUtils.h"
+#include "mozilla/Sprintf.h"
+
+struct testcaseLine {
+  wchar_t* c1;
+  wchar_t* c2;
+  wchar_t* c3;
+  wchar_t* c4;
+  wchar_t* c5;
+  char* description;
+};
+
+#ifdef DEBUG_smontagu
+#define DEBUG_NAMED_TESTCASE(t, s) \
+  printf(t ": "); \
+  for (uint32_t i = 0; i < s.Length(); ++i) \
+    printf("%x ", s.CharAt(i)); \
+  printf("\n")
+#else
+#define DEBUG_NAMED_TESTCASE(t, s)
+#endif
+
+#define DEBUG_TESTCASE(x) DEBUG_NAMED_TESTCASE(#x, x)
+
+#define NORMALIZE_AND_COMPARE(base, comparison, form, description) \
+   normalized.Truncate();\
+   normalizer->NormalizeUnicode##form(comparison, normalized);\
+   DEBUG_NAMED_TESTCASE(#form "(" #comparison ")", normalized);\
+   if (!base.Equals(normalized)) {\
+     rv = false;\
+     showError(description, #base " != " #form "(" #comparison ")\n");\
+   }
+
+NS_DEFINE_CID(kUnicodeNormalizerCID, NS_UNICODE_NORMALIZER_CID);
+
+nsIUnicodeNormalizer *normalizer;
+
+#include "NormalizationData.h"
+
+void showError(const char* description, const char* errorText)
+{
+  printf("%s failed: %s", description, errorText);
+}
+
+bool TestInvariants(testcaseLine* testLine)
+{
+  nsAutoString c1, c2, c3, c4, c5, normalized;
+  c1 = nsDependentString((char16_t*)testLine->c1);
+  c2 = nsDependentString((char16_t*)testLine->c2);
+  c3 = nsDependentString((char16_t*)testLine->c3);
+  c4 = nsDependentString((char16_t*)testLine->c4);
+  c5 = nsDependentString((char16_t*)testLine->c5);
+  bool rv = true;
+ 
+  /*
+    1. The following invariants must be true for all conformant implementations
+
+    NFC
+      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
+  */
+  DEBUG_TESTCASE(c2);
+  NORMALIZE_AND_COMPARE(c2, c1, NFC, testLine->description);  
+  NORMALIZE_AND_COMPARE(c2, c2, NFC, testLine->description);
+  NORMALIZE_AND_COMPARE(c2, c3, NFC, testLine->description);
+
+  /*
+      c4 ==  NFC(c4) ==  NFC(c5)
+  */
+  DEBUG_TESTCASE(c4);
+  NORMALIZE_AND_COMPARE(c4, c4, NFC, testLine->description);
+  NORMALIZE_AND_COMPARE(c4, c5, NFC, testLine->description);
+
+  /*
+    NFD
+      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
+  */
+  DEBUG_TESTCASE(c3);
+  NORMALIZE_AND_COMPARE(c3, c1, NFD, testLine->description);
+  NORMALIZE_AND_COMPARE(c3, c2, NFD, testLine->description);
+  NORMALIZE_AND_COMPARE(c3, c3, NFD, testLine->description);
+  /*
+      c5 ==  NFD(c4) ==  NFD(c5)
+  */
+  DEBUG_TESTCASE(c5);
+  NORMALIZE_AND_COMPARE(c5, c4, NFD, testLine->description);
+  NORMALIZE_AND_COMPARE(c5, c5, NFD, testLine->description);
+
+  /*
+    NFKC
+      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
+  */
+  DEBUG_TESTCASE(c4);
+  NORMALIZE_AND_COMPARE(c4, c1, NFKC, testLine->description);
+  NORMALIZE_AND_COMPARE(c4, c2, NFKC, testLine->description);
+  NORMALIZE_AND_COMPARE(c4, c3, NFKC, testLine->description);
+  NORMALIZE_AND_COMPARE(c4, c4, NFKC, testLine->description);
+  NORMALIZE_AND_COMPARE(c4, c5, NFKC, testLine->description);
+
+  /*
+    NFKD
+      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
+  */
+  DEBUG_TESTCASE(c5);
+  NORMALIZE_AND_COMPARE(c5, c1, NFKD, testLine->description);
+  NORMALIZE_AND_COMPARE(c5, c2, NFKD, testLine->description);
+  NORMALIZE_AND_COMPARE(c5, c3, NFKD, testLine->description);
+  NORMALIZE_AND_COMPARE(c5, c4, NFKD, testLine->description);
+  NORMALIZE_AND_COMPARE(c5, c5, NFKD, testLine->description);
+
+  return rv;
+}
+
+uint32_t UTF32CodepointFromTestcase(testcaseLine* testLine)
+{
+  if (!IS_SURROGATE(testLine->c1[0]))
+    return testLine->c1[0];
+
+  NS_ASSERTION(NS_IS_HIGH_SURROGATE(testLine->c1[0]) &&
+               NS_IS_LOW_SURROGATE(testLine->c1[1]),
+               "Test data neither in BMP nor legal surrogate pair");
+  return SURROGATE_TO_UCS4(testLine->c1[0], testLine->c1[1]);
+}
+
+bool TestUnspecifiedCodepoint(uint32_t codepoint)
+{
+  bool rv = true;
+  char16_t unicharArray[3];
+  nsAutoString X, normalized;
+
+  if (IS_IN_BMP(codepoint)) {
+    unicharArray[0] = codepoint;
+    unicharArray[1] = 0;
+    X = nsDependentString(unicharArray);
+  }
+  else {
+    unicharArray[0] = H_SURROGATE(codepoint);
+    unicharArray[1] = L_SURROGATE(codepoint);
+    unicharArray[2] = 0;
+    X = nsDependentString(unicharArray);
+  }
+
+  /*
+ 2. For every code point X assigned in this version of Unicode that is not specifically
+    listed in Part 1, the following invariants must be true for all conformant
+    implementations:
+
+      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
+  */
+  static const size_t len = 9;
+  char description[len];
+
+  DEBUG_TESTCASE(X);
+  snprintf(description, len, "U+%04X", codepoint);
+  NORMALIZE_AND_COMPARE(X, X, NFC, description);
+  NORMALIZE_AND_COMPARE(X, X, NFD, description);
+  NORMALIZE_AND_COMPARE(X, X, NFKC, description);
+  NORMALIZE_AND_COMPARE(X, X, NFKD, description);
+  return rv;
+}
+
+void TestPart0()
+{
+  printf("Test Part0: Specific cases\n");
+
+  uint32_t i = 0;
+  uint32_t numFailed = 0;
+  uint32_t numPassed = 0;
+
+  while (Part0TestData[i].c1[0] != 0) {
+    if (TestInvariants(&Part0TestData[i++]))
+      ++numPassed;
+    else
+      ++numFailed;
+  }
+  printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
+  EXPECT_EQ(0u, numFailed);
+}
+
+void TestPart1()
+{
+  printf("Test Part1: Character by character test\n");
+
+  uint32_t i = 0;
+  uint32_t numFailed = 0;
+  uint32_t numPassed = 0;
+  uint32_t codepoint;
+  uint32_t testDataCodepoint = UTF32CodepointFromTestcase(&Part1TestData[i]);
+
+  for (codepoint = 1; codepoint < 0x110000; ++codepoint) {
+    if (testDataCodepoint == codepoint) {
+      if (TestInvariants(&Part1TestData[i]))
+        ++numPassed;
+      else
+        ++numFailed;
+      testDataCodepoint = UTF32CodepointFromTestcase(&Part1TestData[++i]);
+    } else {
+      if (TestUnspecifiedCodepoint(codepoint))
+        ++numPassed;
+      else
+        ++numFailed;
+    }
+  }
+  printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
+  EXPECT_EQ(0u, numFailed);
+}
+
+void TestPart2()
+{
+  printf("Test Part2: Canonical Order Test\n");
+
+  uint32_t i = 0;
+  uint32_t numFailed = 0;
+  uint32_t numPassed = 0;
+
+  while (Part2TestData[i].c1[0] != 0) {
+    if (TestInvariants(&Part2TestData[i++]))
+      ++numPassed;
+    else
+      ++numFailed;
+  }
+  printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
+  EXPECT_EQ(0u, numFailed);
+}
+
+void TestPart3()
+{
+  printf("Test Part3: PRI #29 Test\n");
+
+  uint32_t i = 0;
+  uint32_t numFailed = 0;
+  uint32_t numPassed = 0;
+
+  while (Part3TestData[i].c1[0] != 0) {
+    if (TestInvariants(&Part3TestData[i++]))
+      ++numPassed;
+    else
+      ++numFailed;
+  }
+  printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
+  EXPECT_EQ(0u, numFailed);
+}
+
+TEST(NormalizationTest, Main) {
+  if (sizeof(wchar_t) != 2) {
+    printf("This test can only be run where sizeof(wchar_t) == 2\n");
+    return;
+  }
+  if (strlen(versionText) == 0) {
+    printf("No testcases: to run the tests generate the header file using\n");
+    printf(" perl genNormalizationData.pl\n");
+    printf("in intl/unichar/tools and rebuild\n");
+    return;
+  }
+
+  printf("NormalizationTest: test nsIUnicodeNormalizer. UCD version: %s\n", 
+         versionText); 
+
+  normalizer = nullptr;
+  nsresult res;
+  res = CallGetService(kUnicodeNormalizerCID, &normalizer);
+  
+  ASSERT_FALSE(NS_FAILED(res)) << "GetService failed";
+  ASSERT_NE(nullptr, normalizer);
+
+  TestPart0();
+  TestPart1();
+  TestPart2();
+  TestPart3();
+  
+  NS_RELEASE(normalizer);
+}
diff --git a/intl/unicharutil/tests/genNormalizationData.pl b/intl/unicharutil/tests/genNormalizationData.pl
new file mode 100644
index 000000000..816ab94e7
--- /dev/null
+++ b/intl/unicharutil/tests/genNormalizationData.pl
@@ -0,0 +1,93 @@
+#!/usr/bin/perl 
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+open ( TEXTFILE , "< NormalizationTest.txt")
+    || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n";
+
+open ( OUT , "> NormalizationData.h")
+#open ( OUT , "> test.txt")
+    || die "Cannot create output file NormalizationData.h\n";
+
+$mpl = <<END_OF_MPL;
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* 
+    DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
+    mozilla/intl/unicharutil/tools/genNormalizationData.pl
+ */
+END_OF_MPL
+
+print OUT $mpl;
+
+# XXX This code assumes that wchar_t is 16-bit unsigned, which is currently
+#      true on Windows, Linux and Mac (with |g++ -fshort-wchar|).
+#      To make it work where that assumption doesn't hold, one could generate
+#      one huge array containing all the strings as 16-bit units (including
+#      the 0 terminator) and initialize the array of testcaseLine with pointers
+#      into the huge array.
+
+while(<TEXTFILE>) {
+    chop;
+    if (/^# NormalizationTest-(.+)\.txt/) {
+	print OUT "static char versionText[] = \"$1\";\n";
+    } elsif (/^\@Part(.)/) {
+	if ($1 != "0") {
+	    print OUT "  {\n";
+	    print OUT "    L\"\",\n";
+	    print OUT "    L\"\",\n";
+	    print OUT "    L\"\",\n";
+	    print OUT "    L\"\",\n";
+	    print OUT "    L\"\",\n";
+	    print OUT "    \"\",\n";
+	    print OUT "  },\n";
+	    print OUT "};\n";
+	}
+	print OUT "\n";
+	print OUT "static testcaseLine Part$1TestData[] = \n";
+	print OUT "{\n";
+    } else {
+	unless (/^\#/) {
+	    @cases = split(/;/ , $_);
+	    print OUT "  {\n";
+	    for ($case = 0; $case < 5; ++$case) {
+		$c = $cases[$case];
+		print OUT "    L\"";
+		@codepoints = split(/ / , $c);
+		foreach (@codepoints) {
+		    $cp = hex($_);
+		    if ($cp < 0x10000) {
+                      # BMP codepoint
+			printf OUT "\\x%04X", $cp;
+		    } else {
+                      # non-BMP codepoint, convert to surrogate pair
+			printf OUT "\\x%04X\\x%04X",
+			           ($cp >> 10) + 0xD7C0,
+			           ($cp & 0x03FF) | 0xDC00;
+		    }
+		}
+		print OUT "\",\n";
+	    }
+	    $description = $cases[10];
+	    $description =~ s/^ \) //;
+	    print OUT "    \"$description\"\n";
+	    print OUT "  },\n";
+	}
+    }
+}
+ 
+print OUT "  {\n";
+print OUT "    L\"\",\n";
+print OUT "    L\"\",\n";
+print OUT "    L\"\",\n";
+print OUT "    L\"\",\n";
+print OUT "    L\"\",\n";
+print OUT "    \"\",\n";
+print OUT "  },\n";
+print OUT "};\n";
+close (OUT);
+close (TEXTFILE);
diff --git a/intl/unicharutil/tests/moz.build b/intl/unicharutil/tests/moz.build
new file mode 100755
index 000000000..afd60160f
--- /dev/null
+++ b/intl/unicharutil/tests/moz.build
@@ -0,0 +1,11 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+    'NormalizationTest.cpp',
+]
+
+FINAL_LIBRARY = 'xul-gtest'
diff --git a/intl/unicharutil/tests/unit/xpcshell.ini b/intl/unicharutil/tests/unit/xpcshell.ini
new file mode 100755
index 000000000..1de04e432
--- /dev/null
+++ b/intl/unicharutil/tests/unit/xpcshell.ini
@@ -0,0 +1,3 @@
+[DEFAULT]
+head = 
+tail =