diff options
author | wolfbeast <mcwerewolf@gmail.com> | 2018-05-04 11:52:49 +0200 |
---|---|---|
committer | wolfbeast <mcwerewolf@gmail.com> | 2018-05-04 11:52:49 +0200 |
commit | 58751cc2ad952d73754fcf6bd894af865074ede6 (patch) | |
tree | 529b6476ff5a25dd570725e679f11e4b91d0173d /intl/unicharutil/nsUnicodeNormalizer.cpp | |
parent | 2eddf19168d5230877cff574ed548610e8eebc51 (diff) | |
download | UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.gz UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.lz UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.xz UXP-58751cc2ad952d73754fcf6bd894af865074ede6.zip |
Issue #325 Part 4: Remove obsolete and archaic Unicode normalization implementation.
Diffstat (limited to 'intl/unicharutil/nsUnicodeNormalizer.cpp')
-rw-r--r-- | intl/unicharutil/nsUnicodeNormalizer.cpp | 742 |
1 files changed, 68 insertions, 674 deletions
diff --git a/intl/unicharutil/nsUnicodeNormalizer.cpp b/intl/unicharutil/nsUnicodeNormalizer.cpp index 16c3a969c..6f6c3d2dd 100644 --- a/intl/unicharutil/nsUnicodeNormalizer.cpp +++ b/intl/unicharutil/nsUnicodeNormalizer.cpp @@ -1,66 +1,16 @@ /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* This file is modified from JPNIC's mDNKit, it is under both MPL and - * JPNIC's license. - */ - /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -/* - * Copyright (c) 2000,2002 Japan Network Information Center. - * All rights reserved. - * - * By using this file, you agree to the terms and conditions set forth bellow. - * - * LICENSE TERMS AND CONDITIONS - * - * The following License Terms and Conditions apply, unless a different - * license is obtained from Japan Network Information Center ("JPNIC"), - * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, - * Chiyoda-ku, Tokyo 101-0047, Japan. - * - * 1. Use, Modification and Redistribution (including distribution of any - * modified or derived work) in source and/or binary forms is permitted - * under this License Terms and Conditions. - * - * 2. Redistribution of source code must retain the copyright notices as they - * appear in each source code file, this License Terms and Conditions. - * - * 3. Redistribution in binary form must reproduce the Copyright Notice, - * this License Terms and Conditions, in the documentation and/or other - * materials provided with the distribution. For the purposes of binary - * distribution the "Copyright Notice" refers to the following language: - * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." - * - * 4. The name of JPNIC may not be used to endorse or promote products - * derived from this Software without specific prior written approval of - * JPNIC. - * - * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - */ - -#include <string.h> - -#include "nsMemory.h" #include "nsUnicodeNormalizer.h" -#include "nsString.h" -#include "mozilla/BinarySearch.h" +#include "ICUUtils.h" +#include "unicode/unorm2.h" +#include "unicode/utext.h" NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer) - nsUnicodeNormalizer::nsUnicodeNormalizer() { } @@ -69,636 +19,80 @@ nsUnicodeNormalizer::~nsUnicodeNormalizer() { } - - -#define END_BIT 0x80000000 - - -/* - * Some constants for Hangul decomposition/composition. - * These things were taken from unicode book. - */ -#define SBase 0xac00 -#define LBase 0x1100 -#define VBase 0x1161 -#define TBase 0x11a7 -#define LCount 19 -#define VCount 21 -#define TCount 28 -#define SLast (SBase + LCount * VCount * TCount) - -struct composition { - uint32_t c2; /* 2nd character */ - uint32_t comp; /* composed character */ -}; - - -#include "normalization_data.h" - -/* - * Macro for multi-level index table. - */ -#define LOOKUPTBL(vprefix, mprefix, v) \ - DMAP(vprefix)[\ - IMAP(vprefix)[\ - IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\ - ]\ - ].tbl[IDX2(mprefix, v)] - -#define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix)) -#define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix)) -#define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix)) - -#define IDX_0(v, bits1, bits2) ((v) >> ((bits1) + (bits2))) -#define IDX_1(v, bits1, bits2) (((v) >> (bits2)) & ((1 << (bits1)) - 1)) -#define IDX_2(v, bits1, bits2) ((v) & ((1 << (bits2)) - 1)) - -#define BITS1(mprefix) mprefix ## _BITS_1 -#define BITS2(mprefix) mprefix ## _BITS_2 - -#define IMAP(vprefix) vprefix ## _imap -#define DMAP(vprefix) vprefix ## _table -#define SEQ(vprefix) vprefix ## _seq - -static int32_t -canonclass(uint32_t c) { - /* Look up canonicalclass table. */ - return (LOOKUPTBL(canon_class, CANON_CLASS, c)); -} - -static int32_t -decompose_char(uint32_t c, const uint32_t **seqp) -{ - /* Look up decomposition table. */ - int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c); - *seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT); - return (seqidx); -} - -static int32_t -compose_char(uint32_t c, - const struct composition **compp) -{ - /* Look up composition table. */ - int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c); - *compp = SEQ(compose) + (seqidx & 0xffff); - return (seqidx >> 16); -} - -static nsresult -mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen, - uint32_t c, int32_t *decomp_lenp) -{ - uint32_t *vorg = v; - int32_t seqidx; - const uint32_t *seq; - - //assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr); - - /* - * First, check for Hangul. - */ - if (SBase <= c && c < SLast) { - int32_t idx, t_offset, v_offset, l_offset; - - idx = c - SBase; - t_offset = idx % TCount; - idx /= TCount; - v_offset = idx % VCount; - l_offset = idx / VCount; - if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3)) - return (NS_ERROR_UNORM_MOREOUTPUT); - *v++ = LBase + l_offset; - *v++ = VBase + v_offset; - if (t_offset > 0) - *v++ = TBase + t_offset; - *decomp_lenp = v - vorg; - return (NS_OK); - } - - /* - * Look up decomposition table. If no decomposition is defined - * or if it is a compatibility decomosition when canonical - * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'. - */ - seqidx = decompose_char(c, &seq); - if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0)) - return (NS_SUCCESS_UNORM_NOTFOUND); - - /* - * Copy the decomposed sequence. The end of the sequence are - * marked with END_BIT. - */ - do { - uint32_t c; - int32_t dlen; - nsresult r; - - c = *seq & ~END_BIT; - - /* Decompose recursively. */ - r = mdn__unicode_decompose(compat, v, vlen, c, &dlen); - if (r == NS_OK) { - v += dlen; - vlen -= dlen; - } else if (r == NS_SUCCESS_UNORM_NOTFOUND) { - if (vlen < 1) - return (NS_ERROR_UNORM_MOREOUTPUT); - *v++ = c; - vlen--; - } else { - return (r); - } - - } while ((*seq++ & END_BIT) == 0); - - *decomp_lenp = v - vorg; - - return (NS_OK); -} - -static int32_t -mdn__unicode_iscompositecandidate(uint32_t c) -{ - const struct composition *dummy; - - /* Check for Hangul */ - if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast)) - return (1); - - /* - * Look up composition table. If there are no composition - * that begins with the given character, it is not a - * composition candidate. - */ - if (compose_char(c, &dummy) == 0) - return (0); - else - return (1); -} - -namespace { - -struct SequenceAdaptor -{ - const composition* const mSequence; - explicit SequenceAdaptor(const composition* aSequence) : mSequence(aSequence) {} - uint32_t operator[](size_t aIdx) const { - return mSequence[aIdx].c2; - } -}; - -} // namespace - -static nsresult -mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp) -{ - int32_t n; - const struct composition *cseq; - - //assert(compp != nullptr); - - /* - * Check for Hangul. - */ - if (LBase <= c1 && c1 < LBase + LCount && - VBase <= c2 && c2 < VBase + VCount) { - /* - * Hangul L and V. - */ - *compp = SBase + - ((c1 - LBase) * VCount + (c2 - VBase)) * TCount; - return (NS_OK); - } else if (SBase <= c1 && c1 < SLast && - TBase <= c2 && c2 < TBase + TCount && - (c1 - SBase) % TCount == 0) { - /* - * Hangul LV and T. - */ - *compp = c1 + (c2 - TBase); - return (NS_OK); - } - - /* - * Look up composition table. If the result is 0, no composition - * is defined. Otherwise, upper 16bits of the result contains - * the number of composition that begins with 'c1', and the lower - * 16bits is the offset in 'compose_seq'. - */ - if ((n = compose_char(c1, &cseq)) == 0) - return (NS_SUCCESS_UNORM_NOTFOUND); - - /* - * The composite sequences are sorted by the 2nd character 'c2'. - * So we can use binary search. - */ - size_t idx; - if (mozilla::BinarySearch(SequenceAdaptor(cseq), 0, n, c2, &idx)) { - *compp = cseq[idx].comp; - return (NS_OK); - } - - return (NS_SUCCESS_UNORM_NOTFOUND); -} - - -#define WORKBUF_SIZE 128 -#define WORKBUF_SIZE_MAX 10000 - -typedef struct { - int32_t cur; /* pointing now processing character */ - int32_t last; /* pointing just after the last character */ - int32_t size; /* size of UCS and CLASS array */ - uint32_t *ucs; /* UCS-4 characters */ - int32_t *cclass; /* and their canonical classes */ - uint32_t ucs_buf[WORKBUF_SIZE]; /* local buffer */ - int32_t class_buf[WORKBUF_SIZE]; /* ditto */ -} workbuf_t; - -static nsresult decompose(workbuf_t *wb, uint32_t c, int32_t compat); -static void get_class(workbuf_t *wb); -static void reorder(workbuf_t *wb); -static void compose(workbuf_t *wb); -static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr); -static void workbuf_init(workbuf_t *wb); -static void workbuf_free(workbuf_t *wb); -static nsresult workbuf_extend(workbuf_t *wb); -static nsresult workbuf_append(workbuf_t *wb, uint32_t c); -static void workbuf_shift(workbuf_t *wb, int32_t shift); -static void workbuf_removevoid(workbuf_t *wb); - - -static nsresult -mdn_normalize(bool do_composition, bool compat, - const nsAString& aSrcStr, nsAString& aToStr) -{ - workbuf_t wb; - nsresult r = NS_OK; - /* - * Initialize working buffer. - */ - workbuf_init(&wb); - - nsAString::const_iterator start, end; - aSrcStr.BeginReading(start); - aSrcStr.EndReading(end); - - while (start != end) { - uint32_t c; - char16_t curChar; - - //assert(wb.cur == wb.last); - - /* - * Get one character from 'from'. - */ - curChar= *start++; - - if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) { - c = SURROGATE_TO_UCS4(curChar, *start); - ++start; - } else { - c = curChar; - } - - /* - * Decompose it. - */ - if ((r = decompose(&wb, c, compat)) != NS_OK) - break; - - /* - * Get canonical class. - */ - get_class(&wb); - - /* - * Reorder & compose. - */ - for (; wb.cur < wb.last; wb.cur++) { - if (wb.cur == 0) { - continue; - } else if (wb.cclass[wb.cur] > 0) { - /* - * This is not a starter. Try reordering. - * Note that characters up to it are - * already in canonical order. - */ - reorder(&wb); - continue; - } - - /* - * This is a starter character, and there are - * some characters before it. Those characters - * have been reordered properly, and - * ready for composition. - */ - if (do_composition && wb.cclass[0] == 0) - compose(&wb); - - /* - * If CUR points to a starter character, - * then process of characters before CUR are - * already finished, because any further - * reordering/composition for them are blocked - * by the starter CUR points. - */ - if (wb.cur > 0 && wb.cclass[wb.cur] == 0) { - /* Flush everything before CUR. */ - r = flush_before_cur(&wb, aToStr); - if (r != NS_OK) - break; - } - } - } - - if (r == NS_OK) { - if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) { - /* - * There is some characters left in WB. - * They are ordered, but not composed yet. - * Now CUR points just after the last character in WB, - * and since compose() tries to compose characters - * between top and CUR inclusive, we must make CUR - * one character back during compose(). - */ - wb.cur--; - compose(&wb); - wb.cur++; - } - /* - * Call this even when WB.CUR == 0, to make TO - * NUL-terminated. - */ - r = flush_before_cur(&wb, aToStr); - } - - workbuf_free(&wb); - - return (r); -} - -static nsresult -decompose(workbuf_t *wb, uint32_t c, int32_t compat) { - nsresult r; - int32_t dec_len; - -again: - r = mdn__unicode_decompose(compat, wb->ucs + wb->last, - wb->size - wb->last, c, &dec_len); - switch (r) { - case NS_OK: - wb->last += dec_len; - return (NS_OK); - case NS_SUCCESS_UNORM_NOTFOUND: - return (workbuf_append(wb, c)); - case NS_ERROR_UNORM_MOREOUTPUT: - if ((r = workbuf_extend(wb)) != NS_OK) - return (r); - if (wb->size > WORKBUF_SIZE_MAX) { - // "mdn__unormalize_form*: " "working buffer too large\n" - return (NS_ERROR_FAILURE); - } - goto again; - default: - return (r); - } - /* NOTREACHED */ -} - -static void -get_class(workbuf_t *wb) { - int32_t i; - - for (i = wb->cur; i < wb->last; i++) - wb->cclass[i] = canonclass(wb->ucs[i]); -} - -static void -reorder(workbuf_t *wb) { - uint32_t c; - int32_t i; - int32_t cclass; - - //assert(wb != nullptr); - - i = wb->cur; - c = wb->ucs[i]; - cclass = wb->cclass[i]; - - while (i > 0 && wb->cclass[i - 1] > cclass) { - wb->ucs[i] = wb->ucs[i - 1]; - wb->cclass[i] =wb->cclass[i - 1]; - i--; - wb->ucs[i] = c; - wb->cclass[i] = cclass; - } -} - -static void -compose(workbuf_t *wb) { - int32_t cur; - uint32_t *ucs; - int32_t *cclass; - int32_t last_class; - int32_t nvoids; - int32_t i; - - //assert(wb != nullptr && wb->cclass[0] == 0); - - cur = wb->cur; - ucs = wb->ucs; - cclass = wb->cclass; - - /* - * If there are no decomposition sequence that begins with - * the top character, composition is impossible. - */ - if (!mdn__unicode_iscompositecandidate(ucs[0])) - return; - - last_class = 0; - nvoids = 0; - for (i = 1; i <= cur; i++) { - uint32_t c; - int32_t cl = cclass[i]; - - if ((last_class < cl || cl == 0) && - mdn__unicode_compose(ucs[0], ucs[i], - &c) == NS_OK) { - /* - * Replace the top character with the composed one. - */ - ucs[0] = c; - cclass[0] = canonclass(c); - - cclass[i] = -1; /* void this character */ - nvoids++; - } else { - last_class = cl; - } - } - - /* Purge void characters, if any. */ - if (nvoids > 0) - workbuf_removevoid(wb); -} - -static nsresult -flush_before_cur(workbuf_t *wb, nsAString& aToStr) -{ - int32_t i; - - for (i = 0; i < wb->cur; i++) { - if (!IS_IN_BMP(wb->ucs[i])) { - aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i])); - aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i])); - } else { - aToStr.Append((char16_t)(wb->ucs[i])); - } - } - - workbuf_shift(wb, wb->cur); - - return (NS_OK); -} - -static void -workbuf_init(workbuf_t *wb) { - wb->cur = 0; - wb->last = 0; - wb->size = WORKBUF_SIZE; - wb->ucs = wb->ucs_buf; - wb->cclass = wb->class_buf; -} - -static void -workbuf_free(workbuf_t *wb) { - if (wb->ucs != wb->ucs_buf) { - free(wb->ucs); - free(wb->cclass); - } -} - static nsresult -workbuf_extend(workbuf_t *wb) { - int32_t newsize = wb->size * 3; - - if (wb->ucs == wb->ucs_buf) { - wb->ucs = (uint32_t*)moz_xmalloc(sizeof(wb->ucs[0]) * newsize); - if (!wb->ucs) - return NS_ERROR_OUT_OF_MEMORY; - wb->cclass = (int32_t*)moz_xmalloc(sizeof(wb->cclass[0]) * newsize); - if (!wb->cclass) { - free(wb->ucs); - wb->ucs = nullptr; - return NS_ERROR_OUT_OF_MEMORY; - } - } else { - void* buf = moz_xrealloc(wb->ucs, sizeof(wb->ucs[0]) * newsize); - if (!buf) - return NS_ERROR_OUT_OF_MEMORY; - wb->ucs = (uint32_t*)buf; - buf = moz_xrealloc(wb->cclass, sizeof(wb->cclass[0]) * newsize); - if (!buf) - return NS_ERROR_OUT_OF_MEMORY; - wb->cclass = (int32_t*)buf; - } - return (NS_OK); -} - -static nsresult -workbuf_append(workbuf_t *wb, uint32_t c) { - nsresult r; - - if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK) - return (r); - wb->ucs[wb->last++] = c; - return (NS_OK); -} - -static void -workbuf_shift(workbuf_t *wb, int32_t shift) { - int32_t nmove; - - //assert(wb != nullptr && wb->cur >= shift); - - nmove = wb->last - shift; - memmove(&wb->ucs[0], &wb->ucs[shift], - nmove * sizeof(wb->ucs[0])); - memmove(&wb->cclass[0], &wb->cclass[shift], - nmove * sizeof(wb->cclass[0])); - wb->cur -= shift; - wb->last -= shift; -} - -static void -workbuf_removevoid(workbuf_t *wb) { - int32_t i, j; - int32_t last = wb->last; - - for (i = j = 0; i < last; i++) { - if (wb->cclass[i] >= 0) { - if (j < i) { - wb->ucs[j] = wb->ucs[i]; - wb->cclass[j] = wb->cclass[i]; - } - j++; - } - } - wb->cur -= last - j; - wb->last = j; -} - -nsresult -nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest) -{ - return mdn_normalize(false, false, aSrc, aDest); -} - -nsresult -nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest) -{ - return mdn_normalize(true, false, aSrc, aDest); -} - -nsresult -nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest) -{ - return mdn_normalize(false, true, aSrc, aDest); +DoNormalization(const UNormalizer2* aNorm, const nsAString& aSrc, + nsAString& aDest) +{ + UErrorCode errorCode = U_ZERO_ERROR; + const int32_t length = aSrc.Length(); + const UChar* src = reinterpret_cast<const UChar*>(aSrc.BeginReading()); + // Initial guess for a capacity that is likely to be enough for most cases. + int32_t capacity = length + (length >> 8) + 8; + do { + aDest.SetLength(capacity); + UChar* dest = reinterpret_cast<UChar*>(aDest.BeginWriting()); + int32_t len = unorm2_normalize(aNorm, src, aSrc.Length(), dest, capacity, + &errorCode); + if (U_SUCCESS(errorCode)) { + aDest.SetLength(len); + break; + } + if (errorCode == U_BUFFER_OVERFLOW_ERROR) { + // Buffer wasn't big enough; adjust to the reported size and try again. + capacity = len; + errorCode = U_ZERO_ERROR; + continue; + } + } while (false); + return ICUUtils::UErrorToNsResult(errorCode); +} + +nsresult +nsUnicodeNormalizer::NormalizeUnicodeNFD(const nsAString& aSrc, + nsAString& aDest) +{ + // The unorm2_getNF*Instance functions return static singletons that should + // not be deleted, so we just get them once on first use. + static UErrorCode errorCode = U_ZERO_ERROR; + static const UNormalizer2* norm = unorm2_getNFDInstance(&errorCode); + if (U_SUCCESS(errorCode)) { + return DoNormalization(norm, aSrc, aDest); + } + return ICUUtils::UErrorToNsResult(errorCode); } -nsresult -nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest) +nsresult +nsUnicodeNormalizer::NormalizeUnicodeNFC(const nsAString& aSrc, + nsAString& aDest) { - return mdn_normalize(true, true, aSrc, aDest); + static UErrorCode errorCode = U_ZERO_ERROR; + static const UNormalizer2* norm = unorm2_getNFCInstance(&errorCode); + if (U_SUCCESS(errorCode)) { + return DoNormalization(norm, aSrc, aDest); + } + return ICUUtils::UErrorToNsResult(errorCode); } -bool -nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab) +nsresult +nsUnicodeNormalizer::NormalizeUnicodeNFKD(const nsAString& aSrc, + nsAString& aDest) { - return mdn__unicode_compose(a, b, ab) == NS_OK; + static UErrorCode errorCode = U_ZERO_ERROR; + static const UNormalizer2* norm = unorm2_getNFKDInstance(&errorCode); + if (U_SUCCESS(errorCode)) { + return DoNormalization(norm, aSrc, aDest); + } + return ICUUtils::UErrorToNsResult(errorCode); } -bool -nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2) +nsresult +nsUnicodeNormalizer::NormalizeUnicodeNFKC(const nsAString& aSrc, + nsAString& aDest) { - // We can't use mdn__unicode_decompose here, because that does a recursive - // decomposition that may yield more than two characters, but the harfbuzz - // callback wants just a single-step decomp that is guaranteed to produce - // no more than two characters. So we do a low-level lookup in the table - // of decomp sequences. - const uint32_t *seq; - uint32_t seqidx = decompose_char(c, &seq); - if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) { - return false; - } - *c1 = *seq & ~END_BIT; - if (*seq & END_BIT) { - *c2 = 0; - } else { - *c2 = *++seq & ~END_BIT; + static UErrorCode errorCode = U_ZERO_ERROR; + static const UNormalizer2* norm = unorm2_getNFKCInstance(&errorCode); + if (U_SUCCESS(errorCode)) { + return DoNormalization(norm, aSrc, aDest); } - return true; + return ICUUtils::UErrorToNsResult(errorCode); } |