Issue #325 Part 4: Remove obsolete and archaic Unicode normalization implementation.

author: wolfbeast <mcwerewolf@gmail.com> 2018-05-04 11:52:49 +0200
committer: wolfbeast <mcwerewolf@gmail.com> 2018-05-04 11:52:49 +0200
commit: 58751cc2ad952d73754fcf6bd894af865074ede6 (patch)
tree: 529b6476ff5a25dd570725e679f11e4b91d0173d /intl/unicharutil/nsUnicodeNormalizer.cpp
parent: 2eddf19168d5230877cff574ed548610e8eebc51 (diff)
download: UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar
UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.gz
UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.lz
UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.xz
UXP-58751cc2ad952d73754fcf6bd894af865074ede6.zip
1 files changed, 68 insertions, 674 deletions
diff --git a/intl/unicharutil/nsUnicodeNormalizer.cpp b/intl/unicharutil/nsUnicodeNormalizer.cpp
index 16c3a969c..6f6c3d2dd 100644
--- a/intl/unicharutil/nsUnicodeNormalizer.cpp
+++ b/intl/unicharutil/nsUnicodeNormalizer.cpp
@@ -1,66 +1,16 @@
 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
 
-/* This file is modified from JPNIC's mDNKit, it is under both MPL and 
- * JPNIC's license.
- */
-
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-/*
- * Copyright (c) 2000,2002 Japan Network Information Center.
- * All rights reserved.
- *  
- * By using this file, you agree to the terms and conditions set forth bellow.
- * 
- * 			LICENSE TERMS AND CONDITIONS 
- * 
- * The following License Terms and Conditions apply, unless a different
- * license is obtained from Japan Network Information Center ("JPNIC"),
- * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
- * Chiyoda-ku, Tokyo 101-0047, Japan.
- * 
- * 1. Use, Modification and Redistribution (including distribution of any
- *    modified or derived work) in source and/or binary forms is permitted
- *    under this License Terms and Conditions.
- * 
- * 2. Redistribution of source code must retain the copyright notices as they
- *    appear in each source code file, this License Terms and Conditions.
- * 
- * 3. Redistribution in binary form must reproduce the Copyright Notice,
- *    this License Terms and Conditions, in the documentation and/or other
- *    materials provided with the distribution.  For the purposes of binary
- *    distribution the "Copyright Notice" refers to the following language:
- *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
- * 
- * 4. The name of JPNIC may not be used to endorse or promote products
- *    derived from this Software without specific prior written approval of
- *    JPNIC.
- * 
- * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
- *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
- *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
- */
-
-#include <string.h>
-
-#include "nsMemory.h"
 #include "nsUnicodeNormalizer.h"
-#include "nsString.h"
-#include "mozilla/BinarySearch.h"
+#include "ICUUtils.h"
+#include "unicode/unorm2.h"
+#include "unicode/utext.h"
 
 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
 
-
 nsUnicodeNormalizer::nsUnicodeNormalizer()
 {
 }
@@ -69,636 +19,80 @@ nsUnicodeNormalizer::~nsUnicodeNormalizer()
 {
 }
 
-
-
-#define END_BIT		0x80000000
-
-
-/*
- * Some constants for Hangul decomposition/composition.
- * These things were taken from unicode book. 
- */
-#define SBase		0xac00
-#define LBase		0x1100
-#define VBase		0x1161
-#define TBase		0x11a7
-#define LCount		19
-#define VCount		21
-#define TCount		28
-#define SLast		(SBase + LCount * VCount * TCount)
-
-struct composition {
-	uint32_t c2;	/* 2nd character */
-	uint32_t comp;	/* composed character */
-};
-
-
-#include "normalization_data.h"
-
-/*
- * Macro for multi-level index table.
- */
-#define LOOKUPTBL(vprefix, mprefix, v) \
-	DMAP(vprefix)[\
-		IMAP(vprefix)[\
-			IMAP(vprefix)[IDX0(mprefix, v)] + IDX1(mprefix, v)\
-		]\
-	].tbl[IDX2(mprefix, v)]
-
-#define IDX0(mprefix, v) IDX_0(v, BITS1(mprefix), BITS2(mprefix))
-#define IDX1(mprefix, v) IDX_1(v, BITS1(mprefix), BITS2(mprefix))
-#define IDX2(mprefix, v) IDX_2(v, BITS1(mprefix), BITS2(mprefix))
-
-#define IDX_0(v, bits1, bits2)	((v) >> ((bits1) + (bits2)))
-#define IDX_1(v, bits1, bits2)	(((v) >> (bits2)) & ((1 << (bits1)) - 1))
-#define IDX_2(v, bits1, bits2)	((v) & ((1 << (bits2)) - 1))
-
-#define BITS1(mprefix)	mprefix ## _BITS_1
-#define BITS2(mprefix)	mprefix ## _BITS_2
-
-#define IMAP(vprefix)	vprefix ## _imap
-#define DMAP(vprefix)	vprefix ## _table
-#define SEQ(vprefix)	vprefix ## _seq
-
-static int32_t
-canonclass(uint32_t c) {
-	/* Look up canonicalclass table. */
-	return (LOOKUPTBL(canon_class, CANON_CLASS, c));
-}
-
-static int32_t
-decompose_char(uint32_t c, const uint32_t **seqp)
-{
-	/* Look up decomposition table. */
-	int32_t seqidx = LOOKUPTBL(decompose, DECOMP, c);
-	*seqp = SEQ(decompose) + (seqidx & ~DECOMP_COMPAT);
-	return (seqidx);
-}
-
-static int32_t
-compose_char(uint32_t c,
-				const struct composition **compp)
-{
-	/* Look up composition table. */
-	int32_t seqidx = LOOKUPTBL(compose, CANON_COMPOSE, c);
-	*compp = SEQ(compose) + (seqidx & 0xffff);
-	return (seqidx >> 16);
-}
-
-static nsresult
-mdn__unicode_decompose(int32_t compat, uint32_t *v, size_t vlen,
-		       uint32_t c, int32_t *decomp_lenp)
-{
-	uint32_t *vorg = v;
-	int32_t seqidx;
-	const uint32_t *seq;
-
-	//assert(v != nullptr && vlen >= 0 && decomp_lenp != nullptr);
-
-	/*
-	 * First, check for Hangul.
-	 */
-	if (SBase <= c && c < SLast) {
-		int32_t idx, t_offset, v_offset, l_offset;
-
-		idx = c - SBase;
-		t_offset = idx % TCount;
-		idx /= TCount;
-		v_offset = idx % VCount;
-		l_offset = idx / VCount;
-		if ((t_offset == 0 && vlen < 2) || (t_offset > 0 && vlen < 3))
-			return (NS_ERROR_UNORM_MOREOUTPUT);
-		*v++ = LBase + l_offset;
-		*v++ = VBase + v_offset;
-		if (t_offset > 0)
-			*v++ = TBase + t_offset;
-		*decomp_lenp = v - vorg;
-		return (NS_OK);
-	}
-
-	/*
-	 * Look up decomposition table.  If no decomposition is defined
-	 * or if it is a compatibility decomosition when canonical
-	 * decomposition requested, return 'NS_SUCCESS_UNORM_NOTFOUND'.
-	 */
-	seqidx = decompose_char(c, &seq);
-	if (seqidx == 0 || (compat == 0 && (seqidx & DECOMP_COMPAT) != 0))
-		return (NS_SUCCESS_UNORM_NOTFOUND);
-	
-	/*
-	 * Copy the decomposed sequence.  The end of the sequence are
-	 * marked with END_BIT.
-	 */
-	do {
-		uint32_t c;
-		int32_t dlen;
-		nsresult r;
-
-		c = *seq & ~END_BIT;
-
-		/* Decompose recursively. */
-		r = mdn__unicode_decompose(compat, v, vlen, c, &dlen);
-		if (r == NS_OK) {
-			v += dlen;
-			vlen -= dlen;
-		} else if (r == NS_SUCCESS_UNORM_NOTFOUND) {
-			if (vlen < 1)
-				return (NS_ERROR_UNORM_MOREOUTPUT);
-			*v++ = c;
-			vlen--;
-		} else {
-			return (r);
-		}
-
-	} while ((*seq++ & END_BIT) == 0);
-	
-	*decomp_lenp = v - vorg;
-
-	return (NS_OK);
-}
-
-static int32_t
-mdn__unicode_iscompositecandidate(uint32_t c)
-{
-	const struct composition *dummy;
-
-	/* Check for Hangul */
-	if ((LBase <= c && c < LBase + LCount) || (SBase <= c && c < SLast))
-		return (1);
-
-	/*
-	 * Look up composition table.  If there are no composition
-	 * that begins with the given character, it is not a
-	 * composition candidate.
-	 */
-	if (compose_char(c, &dummy) == 0)
-		return (0);
-	else
-		return (1);
-}
-
-namespace {
-
-struct SequenceAdaptor
-{
-	const composition* const mSequence;
-	explicit SequenceAdaptor(const composition* aSequence) : mSequence(aSequence) {}
-	uint32_t operator[](size_t aIdx) const {
-		return mSequence[aIdx].c2;
-	}
-};
-
-} // namespace
-
-static nsresult
-mdn__unicode_compose(uint32_t c1, uint32_t c2, uint32_t *compp)
-{
-	int32_t n;
-	const struct composition *cseq;
-
-	//assert(compp != nullptr);
-
-	/*
-	 * Check for Hangul.
-	 */
-	if (LBase <= c1 && c1 < LBase + LCount &&
-	    VBase <= c2 && c2 < VBase + VCount) {
-		/*
-		 * Hangul L and V.
-		 */
-		*compp = SBase +
-			((c1 - LBase) * VCount + (c2 - VBase)) * TCount;
-		return (NS_OK);
-	} else if (SBase <= c1 && c1 < SLast &&
-		   TBase <= c2 && c2 < TBase + TCount &&
-		   (c1 - SBase) % TCount == 0) {
-		/*
-		 * Hangul LV and T.
-		 */
-		*compp = c1 + (c2 - TBase);
-		return (NS_OK);
-	}
-
-	/*
-	 * Look up composition table.  If the result is 0, no composition
-	 * is defined.  Otherwise, upper 16bits of the result contains
-	 * the number of composition that begins with 'c1', and the lower
-	 * 16bits is the offset in 'compose_seq'.
-	 */
-	if ((n = compose_char(c1, &cseq)) == 0)
-		return (NS_SUCCESS_UNORM_NOTFOUND);
-
-	/*
-	 * The composite sequences are sorted by the 2nd character 'c2'.
-	 * So we can use binary search.
-	 */
-	size_t idx;
-	if (mozilla::BinarySearch(SequenceAdaptor(cseq), 0, n, c2, &idx)) {
-		*compp = cseq[idx].comp;
-		return (NS_OK);
-	}
-
-	return (NS_SUCCESS_UNORM_NOTFOUND);
-}
-
-
-#define WORKBUF_SIZE		128
-#define WORKBUF_SIZE_MAX	10000
-
-typedef struct {
-	int32_t cur;		/* pointing now processing character */
-	int32_t last;		/* pointing just after the last character */
-	int32_t size;		/* size of UCS and CLASS array */
-	uint32_t *ucs;	/* UCS-4 characters */
-	int32_t *cclass;		/* and their canonical classes */
-	uint32_t ucs_buf[WORKBUF_SIZE];	/* local buffer */
-	int32_t class_buf[WORKBUF_SIZE];		/* ditto */
-} workbuf_t;
-
-static nsresult	decompose(workbuf_t *wb, uint32_t c, int32_t compat);
-static void		get_class(workbuf_t *wb);
-static void		reorder(workbuf_t *wb);
-static void		compose(workbuf_t *wb);
-static nsresult flush_before_cur(workbuf_t *wb, nsAString& aToStr);
-static void		workbuf_init(workbuf_t *wb);
-static void		workbuf_free(workbuf_t *wb);
-static nsresult	workbuf_extend(workbuf_t *wb);
-static nsresult	workbuf_append(workbuf_t *wb, uint32_t c);
-static void		workbuf_shift(workbuf_t *wb, int32_t shift);
-static void		workbuf_removevoid(workbuf_t *wb);
-
-
-static nsresult
-mdn_normalize(bool do_composition, bool compat,
-	  const nsAString& aSrcStr, nsAString& aToStr)
-{
-	workbuf_t wb;
-	nsresult r = NS_OK;
-	/*
-	 * Initialize working buffer.
-	 */
-	workbuf_init(&wb);
-
-	nsAString::const_iterator start, end;
-	aSrcStr.BeginReading(start); 
-	aSrcStr.EndReading(end); 
-
-	while (start != end) {
-		uint32_t c;
-		char16_t curChar;
-
-		//assert(wb.cur == wb.last);
-
-		/*
-		 * Get one character from 'from'.
-		 */
-		curChar= *start++;
-
-		if (NS_IS_HIGH_SURROGATE(curChar) && start != end && NS_IS_LOW_SURROGATE(*(start)) ) {
-			c = SURROGATE_TO_UCS4(curChar, *start);
-			++start;
-		} else {
-			c = curChar;
-		}
-
-		/*
-		 * Decompose it.
-		 */
-		if ((r = decompose(&wb, c, compat)) != NS_OK)
-			break;
-
-		/*
-		 * Get canonical class.
-		 */
-		get_class(&wb);
-
-		/*
-		 * Reorder & compose.
-		 */
-		for (; wb.cur < wb.last; wb.cur++) {
-			if (wb.cur == 0) {
-				continue;
-			} else if (wb.cclass[wb.cur] > 0) {
-				/*
-				 * This is not a starter. Try reordering.
-				 * Note that characters up to it are
-				 * already in canonical order.
-				 */
-				reorder(&wb);
-				continue;
-			}
-
-			/*
-			 * This is a starter character, and there are
-			 * some characters before it.  Those characters
-			 * have been reordered properly, and
-			 * ready for composition.
-			 */
-			if (do_composition && wb.cclass[0] == 0)
-				compose(&wb);
-
-			/*
-			 * If CUR points to a starter character,
-			 * then process of characters before CUR are
-			 * already finished, because any further
-			 * reordering/composition for them are blocked
-			 * by the starter CUR points.
-			 */
-			if (wb.cur > 0 && wb.cclass[wb.cur] == 0) {
-				/* Flush everything before CUR. */
-				r = flush_before_cur(&wb, aToStr);
-				if (r != NS_OK)
-					break;
-			}
-		}
-	}
-
-	if (r == NS_OK) {
-		if (do_composition && wb.cur > 0 && wb.cclass[0] == 0) {
-			/*
-			 * There is some characters left in WB.
-			 * They are ordered, but not composed yet.
-			 * Now CUR points just after the last character in WB,
-			 * and since compose() tries to compose characters
-			 * between top and CUR inclusive, we must make CUR
-			 * one character back during compose().
-			 */
-			wb.cur--;
-			compose(&wb);
-			wb.cur++;
-		}
-		/*
-		 * Call this even when WB.CUR == 0, to make TO
-		 * NUL-terminated.
-		 */
-		r = flush_before_cur(&wb, aToStr);
-	}
-
-	workbuf_free(&wb);
-
-	return (r);
-}
-
-static nsresult
-decompose(workbuf_t *wb, uint32_t c, int32_t compat) {
-	nsresult r;
-	int32_t dec_len;
-
-again:
-	r = mdn__unicode_decompose(compat, wb->ucs + wb->last,
-				   wb->size - wb->last, c, &dec_len);
-	switch (r) {
-	case NS_OK:
-		wb->last += dec_len;
-		return (NS_OK);
-	case NS_SUCCESS_UNORM_NOTFOUND:
-		return (workbuf_append(wb, c));
-	case NS_ERROR_UNORM_MOREOUTPUT:
-		if ((r = workbuf_extend(wb)) != NS_OK)
-			return (r);
-		if (wb->size > WORKBUF_SIZE_MAX) {
-			// "mdn__unormalize_form*: " "working buffer too large\n"
-			return (NS_ERROR_FAILURE);
-		}
-		goto again;
-	default:
-		return (r);
-	}
-	/* NOTREACHED */
-}
-
-static void		
-get_class(workbuf_t *wb) {
-	int32_t i;
-
-	for (i = wb->cur; i < wb->last; i++)
-		wb->cclass[i] = canonclass(wb->ucs[i]);
-}
-
-static void
-reorder(workbuf_t *wb) {
-	uint32_t c;
-	int32_t i;
-	int32_t cclass;
-
-	//assert(wb != nullptr);
-
-	i = wb->cur;
-	c = wb->ucs[i];
-	cclass = wb->cclass[i];
-
-	while (i > 0 && wb->cclass[i - 1] > cclass) {
-		wb->ucs[i] = wb->ucs[i - 1];
-		wb->cclass[i] =wb->cclass[i - 1];
-		i--;
-		wb->ucs[i] = c;
-		wb->cclass[i] = cclass;
-	}
-}
-
-static void
-compose(workbuf_t *wb) {
-	int32_t cur;
-	uint32_t *ucs;
-	int32_t *cclass;
-	int32_t last_class;
-	int32_t nvoids;
-	int32_t i;
-
-	//assert(wb != nullptr && wb->cclass[0] == 0);
-
-	cur = wb->cur;
-	ucs = wb->ucs;
-	cclass = wb->cclass;
-
-	/*
-	 * If there are no decomposition sequence that begins with
-	 * the top character, composition is impossible.
-	 */
-	if (!mdn__unicode_iscompositecandidate(ucs[0]))
-		return;
-
-	last_class = 0;
-	nvoids = 0;
-	for (i = 1; i <= cur; i++) {
-		uint32_t c;
-		int32_t cl = cclass[i];
-
-		if ((last_class < cl || cl == 0) &&
-		    mdn__unicode_compose(ucs[0], ucs[i],
-					 &c) == NS_OK) {
-			/*
-			 * Replace the top character with the composed one.
-			 */
-			ucs[0] = c;
-			cclass[0] = canonclass(c);
-
-			cclass[i] = -1;	/* void this character */
-			nvoids++;
-		} else {
-			last_class = cl;
-		}
-	}
-
-	/* Purge void characters, if any. */
-	if (nvoids > 0)
-		workbuf_removevoid(wb);
-}
-
-static nsresult
-flush_before_cur(workbuf_t *wb, nsAString& aToStr) 
-{
-	int32_t i;
-
-	for (i = 0; i < wb->cur; i++) {
-		if (!IS_IN_BMP(wb->ucs[i])) {
-			aToStr.Append((char16_t)H_SURROGATE(wb->ucs[i]));
-			aToStr.Append((char16_t)L_SURROGATE(wb->ucs[i]));
-		} else {
-			aToStr.Append((char16_t)(wb->ucs[i]));
-		}
-	}
-
-	workbuf_shift(wb, wb->cur);
-
-	return (NS_OK);
-}
-
-static void
-workbuf_init(workbuf_t *wb) {
-	wb->cur = 0;
-	wb->last = 0;
-	wb->size = WORKBUF_SIZE;
-	wb->ucs = wb->ucs_buf;
-	wb->cclass = wb->class_buf;
-}
-
-static void
-workbuf_free(workbuf_t *wb) {
-	if (wb->ucs != wb->ucs_buf) {
-		free(wb->ucs);
-		free(wb->cclass);
-	}
-}
-
 static nsresult
-workbuf_extend(workbuf_t *wb) {
-	int32_t newsize = wb->size * 3;
-
-	if (wb->ucs == wb->ucs_buf) {
-		wb->ucs = (uint32_t*)moz_xmalloc(sizeof(wb->ucs[0]) * newsize);
-		if (!wb->ucs)
-			return NS_ERROR_OUT_OF_MEMORY;
-		wb->cclass = (int32_t*)moz_xmalloc(sizeof(wb->cclass[0]) * newsize);
-		if (!wb->cclass) {
-			free(wb->ucs);
-			wb->ucs = nullptr;
-			return NS_ERROR_OUT_OF_MEMORY;
-		}
-	} else {
-		void* buf = moz_xrealloc(wb->ucs, sizeof(wb->ucs[0]) * newsize);
-		if (!buf)
-			return NS_ERROR_OUT_OF_MEMORY;
-		wb->ucs = (uint32_t*)buf;
-		buf = moz_xrealloc(wb->cclass, sizeof(wb->cclass[0]) * newsize);
-		if (!buf)
-			return NS_ERROR_OUT_OF_MEMORY;
-		wb->cclass = (int32_t*)buf;
-	}
-	return (NS_OK);
-}
-
-static nsresult
-workbuf_append(workbuf_t *wb, uint32_t c) {
-	nsresult r;
-
-	if (wb->last >= wb->size && (r = workbuf_extend(wb)) != NS_OK)
-		return (r);
-	wb->ucs[wb->last++] = c;
-	return (NS_OK);
-}
-
-static void
-workbuf_shift(workbuf_t *wb, int32_t shift) {
-	int32_t nmove;
-
-	//assert(wb != nullptr && wb->cur >= shift);
-
-	nmove = wb->last - shift;
-	memmove(&wb->ucs[0], &wb->ucs[shift],
-		      nmove * sizeof(wb->ucs[0]));
-	memmove(&wb->cclass[0], &wb->cclass[shift],
-		      nmove * sizeof(wb->cclass[0]));
-	wb->cur -= shift;
-	wb->last -= shift;
-}
-
-static void
-workbuf_removevoid(workbuf_t *wb) {
-	int32_t i, j;
-	int32_t last = wb->last;
-
-	for (i = j = 0; i < last; i++) {
-		if (wb->cclass[i] >= 0) {
-			if (j < i) {
-				wb->ucs[j] = wb->ucs[i];
-				wb->cclass[j] = wb->cclass[i];
-			}
-			j++;
-		}
-	}
-	wb->cur -= last - j;
-	wb->last = j;
-}
-
-nsresult  
-nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
-{
-  return mdn_normalize(false, false, aSrc, aDest);
-}
-
-nsresult  
-nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
-{
-  return mdn_normalize(true, false, aSrc, aDest);
-}
-
-nsresult  
-nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
-{
-  return mdn_normalize(false, true, aSrc, aDest);
+DoNormalization(const UNormalizer2* aNorm, const nsAString& aSrc,
+                nsAString& aDest)
+{
+  UErrorCode errorCode = U_ZERO_ERROR;
+  const int32_t length = aSrc.Length();
+  const UChar* src = reinterpret_cast<const UChar*>(aSrc.BeginReading());
+  // Initial guess for a capacity that is likely to be enough for most cases.
+  int32_t capacity = length + (length >> 8) + 8;
+  do {
+    aDest.SetLength(capacity);
+    UChar* dest = reinterpret_cast<UChar*>(aDest.BeginWriting());
+    int32_t len = unorm2_normalize(aNorm, src, aSrc.Length(), dest, capacity,
+                                   &errorCode);
+    if (U_SUCCESS(errorCode)) {
+      aDest.SetLength(len);
+      break;
+    }
+    if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
+      // Buffer wasn't big enough; adjust to the reported size and try again.
+      capacity = len;
+      errorCode = U_ZERO_ERROR;
+      continue;
+    }
+  } while (false);
+  return ICUUtils::UErrorToNsResult(errorCode);
+}
+
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFD(const nsAString& aSrc,
+                                         nsAString& aDest)
+{
+  // The unorm2_getNF*Instance functions return static singletons that should
+  // not be deleted, so we just get them once on first use.
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFDInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
 }
 
-nsresult  
-nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFC(const nsAString& aSrc,
+                                         nsAString& aDest)
 {
-  return mdn_normalize(true, true, aSrc, aDest);
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFCInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
 }
 
-bool
-nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab)
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFKD(const nsAString& aSrc,
+                                          nsAString& aDest)
 {
-  return mdn__unicode_compose(a, b, ab) == NS_OK;
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFKDInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
+  }
+  return ICUUtils::UErrorToNsResult(errorCode);
 }
 
-bool
-nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2)
+nsresult
+nsUnicodeNormalizer::NormalizeUnicodeNFKC(const nsAString& aSrc,
+                                          nsAString& aDest)
 {
-  // We can't use mdn__unicode_decompose here, because that does a recursive
-  // decomposition that may yield more than two characters, but the harfbuzz
-  // callback wants just a single-step decomp that is guaranteed to produce
-  // no more than two characters. So we do a low-level lookup in the table
-  // of decomp sequences.
-  const uint32_t *seq;
-  uint32_t seqidx = decompose_char(c, &seq);
-  if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
-    return false;
-  }
-  *c1 = *seq & ~END_BIT;
-  if (*seq & END_BIT) {
-    *c2 = 0;
-  } else {
-    *c2 = *++seq & ~END_BIT;
+  static UErrorCode errorCode = U_ZERO_ERROR;
+  static const UNormalizer2* norm = unorm2_getNFKCInstance(&errorCode);
+  if (U_SUCCESS(errorCode)) {
+    return DoNormalization(norm, aSrc, aDest);
   }
-  return true;
+  return ICUUtils::UErrorToNsResult(errorCode);
 }
author	wolfbeast <mcwerewolf@gmail.com>	2018-05-04 11:52:49 +0200
committer	wolfbeast <mcwerewolf@gmail.com>	2018-05-04 11:52:49 +0200
commit	58751cc2ad952d73754fcf6bd894af865074ede6 (patch)
tree	529b6476ff5a25dd570725e679f11e4b91d0173d /intl/unicharutil/nsUnicodeNormalizer.cpp
parent	2eddf19168d5230877cff574ed548610e8eebc51 (diff)
download	UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.gz UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.lz UXP-58751cc2ad952d73754fcf6bd894af865074ede6.tar.xz UXP-58751cc2ad952d73754fcf6bd894af865074ede6.zip