summaryrefslogtreecommitdiffstats
path: root/intl/unicharutil/tests/NormalizationTest.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/unicharutil/tests/NormalizationTest.cpp')
-rw-r--r--intl/unicharutil/tests/NormalizationTest.cpp282
1 files changed, 282 insertions, 0 deletions
diff --git a/intl/unicharutil/tests/NormalizationTest.cpp b/intl/unicharutil/tests/NormalizationTest.cpp
new file mode 100644
index 000000000..aaf775a8f
--- /dev/null
+++ b/intl/unicharutil/tests/NormalizationTest.cpp
@@ -0,0 +1,282 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include "gtest/gtest.h"
+#include "nsXPCOM.h"
+#include "nsIUnicodeNormalizer.h"
+#include "nsString.h"
+#include "nsCharTraits.h"
+#include "nsServiceManagerUtils.h"
+#include "mozilla/Sprintf.h"
+
+struct testcaseLine {
+ wchar_t* c1;
+ wchar_t* c2;
+ wchar_t* c3;
+ wchar_t* c4;
+ wchar_t* c5;
+ char* description;
+};
+
+#ifdef DEBUG_smontagu
+#define DEBUG_NAMED_TESTCASE(t, s) \
+ printf(t ": "); \
+ for (uint32_t i = 0; i < s.Length(); ++i) \
+ printf("%x ", s.CharAt(i)); \
+ printf("\n")
+#else
+#define DEBUG_NAMED_TESTCASE(t, s)
+#endif
+
+#define DEBUG_TESTCASE(x) DEBUG_NAMED_TESTCASE(#x, x)
+
+#define NORMALIZE_AND_COMPARE(base, comparison, form, description) \
+ normalized.Truncate();\
+ normalizer->NormalizeUnicode##form(comparison, normalized);\
+ DEBUG_NAMED_TESTCASE(#form "(" #comparison ")", normalized);\
+ if (!base.Equals(normalized)) {\
+ rv = false;\
+ showError(description, #base " != " #form "(" #comparison ")\n");\
+ }
+
+NS_DEFINE_CID(kUnicodeNormalizerCID, NS_UNICODE_NORMALIZER_CID);
+
+nsIUnicodeNormalizer *normalizer;
+
+#include "NormalizationData.h"
+
+void showError(const char* description, const char* errorText)
+{
+ printf("%s failed: %s", description, errorText);
+}
+
+bool TestInvariants(testcaseLine* testLine)
+{
+ nsAutoString c1, c2, c3, c4, c5, normalized;
+ c1 = nsDependentString((char16_t*)testLine->c1);
+ c2 = nsDependentString((char16_t*)testLine->c2);
+ c3 = nsDependentString((char16_t*)testLine->c3);
+ c4 = nsDependentString((char16_t*)testLine->c4);
+ c5 = nsDependentString((char16_t*)testLine->c5);
+ bool rv = true;
+
+ /*
+ 1. The following invariants must be true for all conformant implementations
+
+ NFC
+ c2 == NFC(c1) == NFC(c2) == NFC(c3)
+ */
+ DEBUG_TESTCASE(c2);
+ NORMALIZE_AND_COMPARE(c2, c1, NFC, testLine->description);
+ NORMALIZE_AND_COMPARE(c2, c2, NFC, testLine->description);
+ NORMALIZE_AND_COMPARE(c2, c3, NFC, testLine->description);
+
+ /*
+ c4 == NFC(c4) == NFC(c5)
+ */
+ DEBUG_TESTCASE(c4);
+ NORMALIZE_AND_COMPARE(c4, c4, NFC, testLine->description);
+ NORMALIZE_AND_COMPARE(c4, c5, NFC, testLine->description);
+
+ /*
+ NFD
+ c3 == NFD(c1) == NFD(c2) == NFD(c3)
+ */
+ DEBUG_TESTCASE(c3);
+ NORMALIZE_AND_COMPARE(c3, c1, NFD, testLine->description);
+ NORMALIZE_AND_COMPARE(c3, c2, NFD, testLine->description);
+ NORMALIZE_AND_COMPARE(c3, c3, NFD, testLine->description);
+ /*
+ c5 == NFD(c4) == NFD(c5)
+ */
+ DEBUG_TESTCASE(c5);
+ NORMALIZE_AND_COMPARE(c5, c4, NFD, testLine->description);
+ NORMALIZE_AND_COMPARE(c5, c5, NFD, testLine->description);
+
+ /*
+ NFKC
+ c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
+ */
+ DEBUG_TESTCASE(c4);
+ NORMALIZE_AND_COMPARE(c4, c1, NFKC, testLine->description);
+ NORMALIZE_AND_COMPARE(c4, c2, NFKC, testLine->description);
+ NORMALIZE_AND_COMPARE(c4, c3, NFKC, testLine->description);
+ NORMALIZE_AND_COMPARE(c4, c4, NFKC, testLine->description);
+ NORMALIZE_AND_COMPARE(c4, c5, NFKC, testLine->description);
+
+ /*
+ NFKD
+ c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
+ */
+ DEBUG_TESTCASE(c5);
+ NORMALIZE_AND_COMPARE(c5, c1, NFKD, testLine->description);
+ NORMALIZE_AND_COMPARE(c5, c2, NFKD, testLine->description);
+ NORMALIZE_AND_COMPARE(c5, c3, NFKD, testLine->description);
+ NORMALIZE_AND_COMPARE(c5, c4, NFKD, testLine->description);
+ NORMALIZE_AND_COMPARE(c5, c5, NFKD, testLine->description);
+
+ return rv;
+}
+
+uint32_t UTF32CodepointFromTestcase(testcaseLine* testLine)
+{
+ if (!IS_SURROGATE(testLine->c1[0]))
+ return testLine->c1[0];
+
+ NS_ASSERTION(NS_IS_HIGH_SURROGATE(testLine->c1[0]) &&
+ NS_IS_LOW_SURROGATE(testLine->c1[1]),
+ "Test data neither in BMP nor legal surrogate pair");
+ return SURROGATE_TO_UCS4(testLine->c1[0], testLine->c1[1]);
+}
+
+bool TestUnspecifiedCodepoint(uint32_t codepoint)
+{
+ bool rv = true;
+ char16_t unicharArray[3];
+ nsAutoString X, normalized;
+
+ if (IS_IN_BMP(codepoint)) {
+ unicharArray[0] = codepoint;
+ unicharArray[1] = 0;
+ X = nsDependentString(unicharArray);
+ }
+ else {
+ unicharArray[0] = H_SURROGATE(codepoint);
+ unicharArray[1] = L_SURROGATE(codepoint);
+ unicharArray[2] = 0;
+ X = nsDependentString(unicharArray);
+ }
+
+ /*
+ 2. For every code point X assigned in this version of Unicode that is not specifically
+ listed in Part 1, the following invariants must be true for all conformant
+ implementations:
+
+ X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
+ */
+ static const size_t len = 9;
+ char description[len];
+
+ DEBUG_TESTCASE(X);
+ snprintf(description, len, "U+%04X", codepoint);
+ NORMALIZE_AND_COMPARE(X, X, NFC, description);
+ NORMALIZE_AND_COMPARE(X, X, NFD, description);
+ NORMALIZE_AND_COMPARE(X, X, NFKC, description);
+ NORMALIZE_AND_COMPARE(X, X, NFKD, description);
+ return rv;
+}
+
+void TestPart0()
+{
+ printf("Test Part0: Specific cases\n");
+
+ uint32_t i = 0;
+ uint32_t numFailed = 0;
+ uint32_t numPassed = 0;
+
+ while (Part0TestData[i].c1[0] != 0) {
+ if (TestInvariants(&Part0TestData[i++]))
+ ++numPassed;
+ else
+ ++numFailed;
+ }
+ printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
+ EXPECT_EQ(0u, numFailed);
+}
+
+void TestPart1()
+{
+ printf("Test Part1: Character by character test\n");
+
+ uint32_t i = 0;
+ uint32_t numFailed = 0;
+ uint32_t numPassed = 0;
+ uint32_t codepoint;
+ uint32_t testDataCodepoint = UTF32CodepointFromTestcase(&Part1TestData[i]);
+
+ for (codepoint = 1; codepoint < 0x110000; ++codepoint) {
+ if (testDataCodepoint == codepoint) {
+ if (TestInvariants(&Part1TestData[i]))
+ ++numPassed;
+ else
+ ++numFailed;
+ testDataCodepoint = UTF32CodepointFromTestcase(&Part1TestData[++i]);
+ } else {
+ if (TestUnspecifiedCodepoint(codepoint))
+ ++numPassed;
+ else
+ ++numFailed;
+ }
+ }
+ printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
+ EXPECT_EQ(0u, numFailed);
+}
+
+void TestPart2()
+{
+ printf("Test Part2: Canonical Order Test\n");
+
+ uint32_t i = 0;
+ uint32_t numFailed = 0;
+ uint32_t numPassed = 0;
+
+ while (Part2TestData[i].c1[0] != 0) {
+ if (TestInvariants(&Part2TestData[i++]))
+ ++numPassed;
+ else
+ ++numFailed;
+ }
+ printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
+ EXPECT_EQ(0u, numFailed);
+}
+
+void TestPart3()
+{
+ printf("Test Part3: PRI #29 Test\n");
+
+ uint32_t i = 0;
+ uint32_t numFailed = 0;
+ uint32_t numPassed = 0;
+
+ while (Part3TestData[i].c1[0] != 0) {
+ if (TestInvariants(&Part3TestData[i++]))
+ ++numPassed;
+ else
+ ++numFailed;
+ }
+ printf(" %d cases passed, %d failed\n\n", numPassed, numFailed);
+ EXPECT_EQ(0u, numFailed);
+}
+
+TEST(NormalizationTest, Main) {
+ if (sizeof(wchar_t) != 2) {
+ printf("This test can only be run where sizeof(wchar_t) == 2\n");
+ return;
+ }
+ if (strlen(versionText) == 0) {
+ printf("No testcases: to run the tests generate the header file using\n");
+ printf(" perl genNormalizationData.pl\n");
+ printf("in intl/unichar/tools and rebuild\n");
+ return;
+ }
+
+ printf("NormalizationTest: test nsIUnicodeNormalizer. UCD version: %s\n",
+ versionText);
+
+ normalizer = nullptr;
+ nsresult res;
+ res = CallGetService(kUnicodeNormalizerCID, &normalizer);
+
+ ASSERT_FALSE(NS_FAILED(res)) << "GetService failed";
+ ASSERT_NE(nullptr, normalizer);
+
+ TestPart0();
+ TestPart1();
+ TestPart2();
+ TestPart3();
+
+ NS_RELEASE(normalizer);
+}