summaryrefslogtreecommitdiffstats
path: root/intl/lwbrk/gtest/TestLineBreak.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/lwbrk/gtest/TestLineBreak.cpp')
-rw-r--r--intl/lwbrk/gtest/TestLineBreak.cpp323
1 files changed, 323 insertions, 0 deletions
diff --git a/intl/lwbrk/gtest/TestLineBreak.cpp b/intl/lwbrk/gtest/TestLineBreak.cpp
new file mode 100644
index 000000000..5824bf70f
--- /dev/null
+++ b/intl/lwbrk/gtest/TestLineBreak.cpp
@@ -0,0 +1,323 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include "nsXPCOM.h"
+#include "nsIComponentManager.h"
+#include "nsISupports.h"
+#include "nsServiceManagerUtils.h"
+#include "nsILineBreaker.h"
+#include "nsIWordBreaker.h"
+#include "nsLWBrkCIID.h"
+#include "nsString.h"
+#include "gtest/gtest.h"
+
+NS_DEFINE_CID(kLBrkCID, NS_LBRK_CID);
+NS_DEFINE_CID(kWBrkCID, NS_WBRK_CID);
+
+static char teng1[] =
+// 1 2 3 4 5 6 7
+//01234567890123456789012345678901234567890123456789012345678901234567890123456789
+ "This is a test to test(reasonable) line break. This 0.01123 = 45 x 48.";
+
+static uint32_t lexp1[] = {
+ 4,7,9,14,17,34,39,40,41,42,49,54,62,64,67,69,73
+};
+
+static uint32_t wexp1[] = {
+ 4,5,7,8,9,10,14,15,17,18,22,23,33,34,35,39,43,48,49,50,54,55,56,57,62,63,
+ 64,65,67,68,69,70,72
+};
+
+static char teng2[] =
+// 1 2 3 4 5 6 7
+//01234567890123456789012345678901234567890123456789012345678901234567890123456789
+ "()((reasonab(l)e) line break. .01123=45x48.";
+
+static uint32_t lexp2[] = {
+ 17,22,23,30,44
+};
+
+static uint32_t wexp2[] = {
+ 4,12,13,14,15,16,17,18,22,24,29,30,31,32,37,38,43
+};
+
+static char teng3[] =
+// 1 2 3 4 5 6 7
+//01234567890123456789012345678901234567890123456789012345678901234567890123456789
+ "It's a test to test(ronae ) line break....";
+
+static uint32_t lexp3[] = {
+ 4,6,11,14,25,27,32,42
+};
+
+static uint32_t wexp3[] = {
+ 2,3,4,5,6,7,11,12,14,15,19,20,25,26,27,28,32,33,38
+};
+
+static char ruler1[] =
+" 1 2 3 4 5 6 7 ";
+static char ruler2[] =
+"0123456789012345678901234567890123456789012345678901234567890123456789012";
+
+bool
+Check(const char* in, const uint32_t* out, uint32_t outlen, uint32_t i,
+ uint32_t res[256])
+{
+ bool ok = true;
+
+ if (i != outlen) {
+ ok = false;
+ printf("WARNING!!! return size wrong, expect %d but got %d \n",
+ outlen, i);
+ }
+
+ for (uint32_t j = 0; j < i; j++) {
+ if (j < outlen) {
+ if (res[j] != out[j]) {
+ ok = false;
+ printf("[%d] expect %d but got %d\n", j, out[j], res[j]);
+ }
+ } else {
+ ok = false;
+ printf("[%d] additional %d\n", j, res[j]);
+ }
+ }
+
+ if (!ok) {
+ printf("string = \n%s\n", in);
+ printf("%s\n", ruler1);
+ printf("%s\n", ruler2);
+
+ printf("Expect = \n");
+ for (uint32_t j = 0; j < outlen; j++) {
+ printf("%d,", out[j]);
+ }
+
+ printf("\nResult = \n");
+ for (uint32_t j = 0; j < i; j++) {
+ printf("%d,", res[j]);
+ }
+ printf("\n");
+ }
+
+ return ok;
+}
+
+bool
+TestASCIILB(nsILineBreaker *lb,
+ const char* in,
+ const uint32_t* out, uint32_t outlen)
+{
+ NS_ConvertASCIItoUTF16 eng1(in);
+ uint32_t i;
+ uint32_t res[256];
+ int32_t curr;
+
+ for (i = 0, curr = 0;
+ curr != NS_LINEBREAKER_NEED_MORE_TEXT && i < 256;
+ i++) {
+ curr = lb->Next(eng1.get(), eng1.Length(), curr);
+ res[i] = curr != NS_LINEBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
+ }
+
+ return Check(in, out, outlen, i, res);
+}
+
+bool
+TestASCIIWB(nsIWordBreaker *lb,
+ const char* in,
+ const uint32_t* out, uint32_t outlen)
+{
+ NS_ConvertASCIItoUTF16 eng1(in);
+
+ uint32_t i;
+ uint32_t res[256];
+ int32_t curr = 0;
+
+ for (i = 0, curr = lb->NextWord(eng1.get(), eng1.Length(), curr);
+ curr != NS_WORDBREAKER_NEED_MORE_TEXT && i < 256;
+ curr = lb->NextWord(eng1.get(), eng1.Length(), curr), i++) {
+ res [i] = curr != NS_WORDBREAKER_NEED_MORE_TEXT ? curr : eng1.Length();
+ }
+
+ return Check(in, out, outlen, i, res);
+}
+
+TEST(LineBreak, LineBreaker)
+{
+ nsILineBreaker *t = nullptr;
+ nsresult res = CallGetService(kLBrkCID, &t);
+ ASSERT_TRUE(NS_SUCCEEDED(res) && t);
+ NS_IF_RELEASE(t);
+
+ res = CallGetService(kLBrkCID, &t);
+ ASSERT_TRUE(NS_SUCCEEDED(res) && t);
+
+ ASSERT_TRUE(TestASCIILB(t, teng1, lexp1, sizeof(lexp1) / sizeof(uint32_t)));
+ ASSERT_TRUE(TestASCIILB(t, teng2, lexp2, sizeof(lexp2) / sizeof(uint32_t)));
+ ASSERT_TRUE(TestASCIILB(t, teng3, lexp3, sizeof(lexp3) / sizeof(uint32_t)));
+
+ NS_RELEASE(t);
+}
+
+TEST(LineBreak, WordBreaker)
+{
+ nsIWordBreaker *t = nullptr;
+ nsresult res = CallGetService(kWBrkCID, &t);
+ ASSERT_TRUE(NS_SUCCEEDED(res) && t);
+ NS_IF_RELEASE(t);
+
+ res = CallGetService(kWBrkCID, &t);
+ ASSERT_TRUE(NS_SUCCEEDED(res) && t);
+
+ ASSERT_TRUE(TestASCIIWB(t, teng1, wexp1, sizeof(wexp1) / sizeof(uint32_t)));
+ ASSERT_TRUE(TestASCIIWB(t, teng2, wexp2, sizeof(wexp2) / sizeof(uint32_t)));
+ ASSERT_TRUE(TestASCIIWB(t, teng3, wexp3, sizeof(wexp3) / sizeof(uint32_t)));
+
+ NS_RELEASE(t);
+}
+
+// 012345678901234
+static const char wb0[] = "T";
+static const char wb1[] = "h";
+static const char wb2[] = "is is a int";
+static const char wb3[] = "ernationali";
+static const char wb4[] = "zation work.";
+
+static const char* wb[] = { wb0, wb1, wb2, wb3, wb4 };
+
+void
+TestPrintWordWithBreak()
+{
+ uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
+ nsIWordBreaker* wbk = nullptr;
+
+ CallGetService(kWBrkCID, &wbk);
+
+ nsAutoString result;
+
+ for (uint32_t i = 0; i < numOfFragment; i++) {
+ NS_ConvertASCIItoUTF16 fragText(wb[i]);
+
+ int32_t cur = 0;
+ cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
+ uint32_t start = 0;
+ for (uint32_t j = 0; cur != NS_WORDBREAKER_NEED_MORE_TEXT; j++) {
+ result.Append(Substring(fragText, start, cur - start));
+ result.Append('^');
+ start = (cur >= 0 ? cur : cur - start);
+ cur = wbk->NextWord(fragText.get(), fragText.Length(), cur);
+ }
+
+ result.Append(Substring(fragText, fragText.Length() - start));
+
+ if (i != numOfFragment - 1) {
+ NS_ConvertASCIItoUTF16 nextFragText(wb[i+1]);
+
+ bool canBreak = true;
+ canBreak = wbk->BreakInBetween(fragText.get(),
+ fragText.Length(),
+ nextFragText.get(),
+ nextFragText.Length());
+ if (canBreak) {
+ result.Append('^');
+ }
+ fragText.Assign(nextFragText);
+ }
+ }
+ ASSERT_STREQ("is^ ^is^ ^a^ ^ is a intzation^ ^work^ation work.",
+ NS_ConvertUTF16toUTF8(result).get());
+
+ NS_IF_RELEASE(wbk);
+}
+
+void
+TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset,
+ const char* expected)
+{
+ uint32_t numOfFragment = sizeof(wb) / sizeof(char*);
+ nsIWordBreaker* wbk = nullptr;
+
+ CallGetService(kWBrkCID, &wbk);
+
+ NS_ConvertASCIItoUTF16 fragText(wb[fragN]);
+
+ nsWordRange res = wbk->FindWord(fragText.get(), fragText.Length(), offset);
+
+ bool canBreak;
+ nsAutoString result(Substring(fragText, res.mBegin, res.mEnd-res.mBegin));
+
+ if ((uint32_t)fragText.Length() == res.mEnd) {
+ // if we hit the end of the fragment
+ nsAutoString curFragText = fragText;
+ for(uint32_t p = fragN +1; p < numOfFragment ;p++)
+ {
+ NS_ConvertASCIItoUTF16 nextFragText(wb[p]);
+ canBreak = wbk->BreakInBetween(curFragText.get(),
+ curFragText.Length(),
+ nextFragText.get(),
+ nextFragText.Length());
+ if (canBreak) {
+ break;
+ }
+ nsWordRange r = wbk->FindWord(nextFragText.get(), nextFragText.Length(),
+ 0);
+
+ result.Append(Substring(nextFragText, r.mBegin, r.mEnd - r.mBegin));
+
+ if ((uint32_t)nextFragText.Length() != r.mEnd) {
+ break;
+ }
+ nextFragText.Assign(curFragText);
+ }
+ }
+
+ if (0 == res.mBegin) {
+ // if we hit the beginning of the fragment
+ nsAutoString curFragText = fragText;
+ for (uint32_t p = fragN; p > 0; p--) {
+ NS_ConvertASCIItoUTF16 prevFragText(wb[p-1]);
+ canBreak = wbk->BreakInBetween(prevFragText.get(),
+ prevFragText.Length(),
+ curFragText.get(),
+ curFragText.Length());
+ if (canBreak) {
+ break;
+ }
+ nsWordRange r = wbk->FindWord(prevFragText.get(), prevFragText.Length(),
+ prevFragText.Length());
+
+ result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0);
+
+ if (0 != r.mBegin) {
+ break;
+ }
+ prevFragText.Assign(curFragText);
+ }
+ }
+
+ ASSERT_STREQ(expected, NS_ConvertUTF16toUTF8(result).get())
+ << "FindWordBreakFromPosition(" << fragN << ", " << offset << ")";
+
+ NS_IF_RELEASE(wbk);
+}
+
+TEST(LineBreak, WordBreakUsage)
+{
+ TestPrintWordWithBreak();
+ TestFindWordBreakFromPosition(0, 0, "This");
+ TestFindWordBreakFromPosition(1, 0, "his");
+ TestFindWordBreakFromPosition(2, 0, "is");
+ TestFindWordBreakFromPosition(2, 1, "is");
+ TestFindWordBreakFromPosition(2, 9, " ");
+ TestFindWordBreakFromPosition(2, 10, "internationalization");
+ TestFindWordBreakFromPosition(3, 4, "ernationalization");
+ TestFindWordBreakFromPosition(3, 8, "ernationalization");
+ TestFindWordBreakFromPosition(4, 6, " ");
+ TestFindWordBreakFromPosition(4, 7, "work");
+}
+