diff options
Diffstat (limited to 'intl/lwbrk/gtest/TestLineBreak.cpp')
-rw-r--r-- | intl/lwbrk/gtest/TestLineBreak.cpp | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/intl/lwbrk/gtest/TestLineBreak.cpp b/intl/lwbrk/gtest/TestLineBreak.cpp new file mode 100644 index 000000000..5824bf70f --- /dev/null +++ b/intl/lwbrk/gtest/TestLineBreak.cpp @@ -0,0 +1,323 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <stdio.h> +#include "nsXPCOM.h" +#include "nsIComponentManager.h" +#include "nsISupports.h" +#include "nsServiceManagerUtils.h" +#include "nsILineBreaker.h" +#include "nsIWordBreaker.h" +#include "nsLWBrkCIID.h" +#include "nsString.h" +#include "gtest/gtest.h" + +NS_DEFINE_CID(kLBrkCID, NS_LBRK_CID); +NS_DEFINE_CID(kWBrkCID, NS_WBRK_CID); + +static char teng1[] = +// 1 2 3 4 5 6 7 +//01234567890123456789012345678901234567890123456789012345678901234567890123456789 + "This is a test to test(reasonable) line break. This 0.01123 = 45 x 48."; + +static uint32_t lexp1[] = { + 4,7,9,14,17,34,39,40,41,42,49,54,62,64,67,69,73 +}; + +static uint32_t wexp1[] = { + 4,5,7,8,9,10,14,15,17,18,22,23,33,34,35,39,43,48,49,50,54,55,56,57,62,63, + 64,65,67,68,69,70,72 +}; + +static char teng2[] = +// 1 2 3 4 5 6 7 +//01234567890123456789012345678901234567890123456789012345678901234567890123456789 + "()((reasonab(l)e) line break. .01123=45x48."; + +static uint32_t lexp2[] = { + 17,22,23,30,44 +}; + +static uint32_t wexp2[] = { + 4,12,13,14,15,16,17,18,22,24,29,30,31,32,37,38,43 +}; + +static char teng3[] = +// 1 2 3 4 5 6 7 +//01234567890123456789012345678901234567890123456789012345678901234567890123456789 + "It's a test to test(ronae ) line break...."; + +static uint32_t lexp3[] = { + 4,6,11,14,25,27,32,42 +}; + +static uint32_t wexp3[] = { + 2,3,4,5,6,7,11,12,14,15,19,20,25,26,27,28,32,33,38 +}; + +static char ruler1[] = +" 1 2 3 4 5 6 7 "; +static char ruler2[] = +"0123456789012345678901234567890123456789012345678901234567890123456789012"; + +bool +Check(const char* in, const uint32_t* out, uint32_t outlen, uint32_t i, + uint32_t res[256]) +{ + bool ok = true; + + if (i != outlen) { + ok = false; + printf("WARNING!!! return size wrong, expect %d but got %d \n", + outlen, i); + } + + for (uint32_t j = 0; j < i; j++) { + if (j < outlen) { + if (res[j] != out[j]) { + ok = false; + printf("[%d] expect %d but got %d\n", j, out[j], res[j]); + } + } else { + ok = false; + printf("[%d] additional %d\n", j, res[j]); + } + } + + if (!ok) { + printf("string = \n%s\n", in); + printf("%s\n", ruler1); + printf("%s\n", ruler2); + + printf("Expect = \n"); + for (uint32_t j = 0; j < outlen; j++) { + printf("%d,", out[j]); + } + + printf("\nResult = \n"); + for (uint32_t j = 0; j < i; j++) { + printf("%d,", res[j]); + } + printf("\n"); + } + + return ok; +} + +bool +TestASCIILB(nsILineBreaker *lb, + const char* in, + const uint32_t* out, uint32_t outlen) +{ + NS_ConvertASCIItoUTF16 eng1(in); + uint32_t i; + uint32_t res[256]; + int32_t curr; + + for (i = 0, curr = 0; + curr != NS_LINEBREAKER_NEED_MORE_TEXT && i < 256; + i++) { + curr = lb->Next(eng1.get(), eng1.Length(), curr); + res[i] = curr != NS_LINEBREAKER_NEED_MORE_TEXT ? curr : eng1.Length(); + } + + return Check(in, out, outlen, i, res); +} + +bool +TestASCIIWB(nsIWordBreaker *lb, + const char* in, + const uint32_t* out, uint32_t outlen) +{ + NS_ConvertASCIItoUTF16 eng1(in); + + uint32_t i; + uint32_t res[256]; + int32_t curr = 0; + + for (i = 0, curr = lb->NextWord(eng1.get(), eng1.Length(), curr); + curr != NS_WORDBREAKER_NEED_MORE_TEXT && i < 256; + curr = lb->NextWord(eng1.get(), eng1.Length(), curr), i++) { + res [i] = curr != NS_WORDBREAKER_NEED_MORE_TEXT ? curr : eng1.Length(); + } + + return Check(in, out, outlen, i, res); +} + +TEST(LineBreak, LineBreaker) +{ + nsILineBreaker *t = nullptr; + nsresult res = CallGetService(kLBrkCID, &t); + ASSERT_TRUE(NS_SUCCEEDED(res) && t); + NS_IF_RELEASE(t); + + res = CallGetService(kLBrkCID, &t); + ASSERT_TRUE(NS_SUCCEEDED(res) && t); + + ASSERT_TRUE(TestASCIILB(t, teng1, lexp1, sizeof(lexp1) / sizeof(uint32_t))); + ASSERT_TRUE(TestASCIILB(t, teng2, lexp2, sizeof(lexp2) / sizeof(uint32_t))); + ASSERT_TRUE(TestASCIILB(t, teng3, lexp3, sizeof(lexp3) / sizeof(uint32_t))); + + NS_RELEASE(t); +} + +TEST(LineBreak, WordBreaker) +{ + nsIWordBreaker *t = nullptr; + nsresult res = CallGetService(kWBrkCID, &t); + ASSERT_TRUE(NS_SUCCEEDED(res) && t); + NS_IF_RELEASE(t); + + res = CallGetService(kWBrkCID, &t); + ASSERT_TRUE(NS_SUCCEEDED(res) && t); + + ASSERT_TRUE(TestASCIIWB(t, teng1, wexp1, sizeof(wexp1) / sizeof(uint32_t))); + ASSERT_TRUE(TestASCIIWB(t, teng2, wexp2, sizeof(wexp2) / sizeof(uint32_t))); + ASSERT_TRUE(TestASCIIWB(t, teng3, wexp3, sizeof(wexp3) / sizeof(uint32_t))); + + NS_RELEASE(t); +} + +// 012345678901234 +static const char wb0[] = "T"; +static const char wb1[] = "h"; +static const char wb2[] = "is is a int"; +static const char wb3[] = "ernationali"; +static const char wb4[] = "zation work."; + +static const char* wb[] = { wb0, wb1, wb2, wb3, wb4 }; + +void +TestPrintWordWithBreak() +{ + uint32_t numOfFragment = sizeof(wb) / sizeof(char*); + nsIWordBreaker* wbk = nullptr; + + CallGetService(kWBrkCID, &wbk); + + nsAutoString result; + + for (uint32_t i = 0; i < numOfFragment; i++) { + NS_ConvertASCIItoUTF16 fragText(wb[i]); + + int32_t cur = 0; + cur = wbk->NextWord(fragText.get(), fragText.Length(), cur); + uint32_t start = 0; + for (uint32_t j = 0; cur != NS_WORDBREAKER_NEED_MORE_TEXT; j++) { + result.Append(Substring(fragText, start, cur - start)); + result.Append('^'); + start = (cur >= 0 ? cur : cur - start); + cur = wbk->NextWord(fragText.get(), fragText.Length(), cur); + } + + result.Append(Substring(fragText, fragText.Length() - start)); + + if (i != numOfFragment - 1) { + NS_ConvertASCIItoUTF16 nextFragText(wb[i+1]); + + bool canBreak = true; + canBreak = wbk->BreakInBetween(fragText.get(), + fragText.Length(), + nextFragText.get(), + nextFragText.Length()); + if (canBreak) { + result.Append('^'); + } + fragText.Assign(nextFragText); + } + } + ASSERT_STREQ("is^ ^is^ ^a^ ^ is a intzation^ ^work^ation work.", + NS_ConvertUTF16toUTF8(result).get()); + + NS_IF_RELEASE(wbk); +} + +void +TestFindWordBreakFromPosition(uint32_t fragN, uint32_t offset, + const char* expected) +{ + uint32_t numOfFragment = sizeof(wb) / sizeof(char*); + nsIWordBreaker* wbk = nullptr; + + CallGetService(kWBrkCID, &wbk); + + NS_ConvertASCIItoUTF16 fragText(wb[fragN]); + + nsWordRange res = wbk->FindWord(fragText.get(), fragText.Length(), offset); + + bool canBreak; + nsAutoString result(Substring(fragText, res.mBegin, res.mEnd-res.mBegin)); + + if ((uint32_t)fragText.Length() == res.mEnd) { + // if we hit the end of the fragment + nsAutoString curFragText = fragText; + for(uint32_t p = fragN +1; p < numOfFragment ;p++) + { + NS_ConvertASCIItoUTF16 nextFragText(wb[p]); + canBreak = wbk->BreakInBetween(curFragText.get(), + curFragText.Length(), + nextFragText.get(), + nextFragText.Length()); + if (canBreak) { + break; + } + nsWordRange r = wbk->FindWord(nextFragText.get(), nextFragText.Length(), + 0); + + result.Append(Substring(nextFragText, r.mBegin, r.mEnd - r.mBegin)); + + if ((uint32_t)nextFragText.Length() != r.mEnd) { + break; + } + nextFragText.Assign(curFragText); + } + } + + if (0 == res.mBegin) { + // if we hit the beginning of the fragment + nsAutoString curFragText = fragText; + for (uint32_t p = fragN; p > 0; p--) { + NS_ConvertASCIItoUTF16 prevFragText(wb[p-1]); + canBreak = wbk->BreakInBetween(prevFragText.get(), + prevFragText.Length(), + curFragText.get(), + curFragText.Length()); + if (canBreak) { + break; + } + nsWordRange r = wbk->FindWord(prevFragText.get(), prevFragText.Length(), + prevFragText.Length()); + + result.Insert(Substring(prevFragText, r.mBegin, r.mEnd - r.mBegin), 0); + + if (0 != r.mBegin) { + break; + } + prevFragText.Assign(curFragText); + } + } + + ASSERT_STREQ(expected, NS_ConvertUTF16toUTF8(result).get()) + << "FindWordBreakFromPosition(" << fragN << ", " << offset << ")"; + + NS_IF_RELEASE(wbk); +} + +TEST(LineBreak, WordBreakUsage) +{ + TestPrintWordWithBreak(); + TestFindWordBreakFromPosition(0, 0, "This"); + TestFindWordBreakFromPosition(1, 0, "his"); + TestFindWordBreakFromPosition(2, 0, "is"); + TestFindWordBreakFromPosition(2, 1, "is"); + TestFindWordBreakFromPosition(2, 9, " "); + TestFindWordBreakFromPosition(2, 10, "internationalization"); + TestFindWordBreakFromPosition(3, 4, "ernationalization"); + TestFindWordBreakFromPosition(3, 8, "ernationalization"); + TestFindWordBreakFromPosition(4, 6, " "); + TestFindWordBreakFromPosition(4, 7, "work"); +} + |