summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/collationsettings.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/collationsettings.cpp')
-rw-r--r--intl/icu/source/i18n/collationsettings.cpp377
1 files changed, 377 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/collationsettings.cpp b/intl/icu/source/i18n/collationsettings.cpp
new file mode 100644
index 000000000..bc1d4e63f
--- /dev/null
+++ b/intl/icu/source/i18n/collationsettings.cpp
@@ -0,0 +1,377 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2013-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* collationsettings.cpp
+*
+* created on: 2013feb07
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/ucol.h"
+#include "cmemory.h"
+#include "collation.h"
+#include "collationdata.h"
+#include "collationsettings.h"
+#include "sharedobject.h"
+#include "uassert.h"
+#include "umutex.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+CollationSettings::CollationSettings(const CollationSettings &other)
+ : SharedObject(other),
+ options(other.options), variableTop(other.variableTop),
+ reorderTable(NULL),
+ minHighNoReorder(other.minHighNoReorder),
+ reorderRanges(NULL), reorderRangesLength(0),
+ reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
+ fastLatinOptions(other.fastLatinOptions) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ copyReorderingFrom(other, errorCode);
+ if(fastLatinOptions >= 0) {
+ uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
+ }
+}
+
+CollationSettings::~CollationSettings() {
+ if(reorderCodesCapacity != 0) {
+ uprv_free(const_cast<int32_t *>(reorderCodes));
+ }
+}
+
+UBool
+CollationSettings::operator==(const CollationSettings &other) const {
+ if(options != other.options) { return FALSE; }
+ if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
+ if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
+ for(int32_t i = 0; i < reorderCodesLength; ++i) {
+ if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
+ }
+ return TRUE;
+}
+
+int32_t
+CollationSettings::hashCode() const {
+ int32_t h = options << 8;
+ if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
+ h ^= reorderCodesLength;
+ for(int32_t i = 0; i < reorderCodesLength; ++i) {
+ h ^= (reorderCodes[i] << i);
+ }
+ return h;
+}
+
+void
+CollationSettings::resetReordering() {
+ // When we turn off reordering, we want to set a NULL permutation
+ // rather than a no-op permutation.
+ // Keep the memory via reorderCodes and its capacity.
+ reorderTable = NULL;
+ minHighNoReorder = 0;
+ reorderRangesLength = 0;
+ reorderCodesLength = 0;
+}
+
+void
+CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
+ const uint32_t *ranges, int32_t rangesLength,
+ const uint8_t *table, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ if(table != NULL &&
+ (rangesLength == 0 ?
+ !reorderTableHasSplitBytes(table) :
+ rangesLength >= 2 &&
+ // The first offset must be 0. The last offset must not be 0.
+ (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
+ // We need to release the memory before setting the alias pointer.
+ if(reorderCodesCapacity != 0) {
+ uprv_free(const_cast<int32_t *>(reorderCodes));
+ reorderCodesCapacity = 0;
+ }
+ reorderTable = table;
+ reorderCodes = codes;
+ reorderCodesLength = length;
+ // Drop ranges before the first split byte. They are reordered by the table.
+ // This then speeds up reordering of the remaining ranges.
+ int32_t firstSplitByteRangeIndex = 0;
+ while(firstSplitByteRangeIndex < rangesLength &&
+ (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
+ // The second byte of the primary limit is 0.
+ ++firstSplitByteRangeIndex;
+ }
+ if(firstSplitByteRangeIndex == rangesLength) {
+ U_ASSERT(!reorderTableHasSplitBytes(table));
+ minHighNoReorder = 0;
+ reorderRanges = NULL;
+ reorderRangesLength = 0;
+ } else {
+ U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
+ minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
+ reorderRanges = ranges + firstSplitByteRangeIndex;
+ reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
+ }
+ return;
+ }
+ // Regenerate missing data.
+ setReordering(data, codes, length, errorCode);
+}
+
+void
+CollationSettings::setReordering(const CollationData &data,
+ const int32_t *codes, int32_t codesLength,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
+ resetReordering();
+ return;
+ }
+ UVector32 rangesList(errorCode);
+ data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
+ if(U_FAILURE(errorCode)) { return; }
+ int32_t rangesLength = rangesList.size();
+ if(rangesLength == 0) {
+ resetReordering();
+ return;
+ }
+ const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
+ // ranges[] contains at least two (limit, offset) pairs.
+ // The first offset must be 0. The last offset must not be 0.
+ // Separators (at the low end) and trailing weights (at the high end)
+ // are never reordered.
+ U_ASSERT(rangesLength >= 2);
+ U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
+ minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
+
+ // Write the lead byte permutation table.
+ // Set a 0 for each lead byte that has a range boundary in the middle.
+ uint8_t table[256];
+ int32_t b = 0;
+ int32_t firstSplitByteRangeIndex = -1;
+ for(int32_t i = 0; i < rangesLength; ++i) {
+ uint32_t pair = ranges[i];
+ int32_t limit1 = (int32_t)(pair >> 24);
+ while(b < limit1) {
+ table[b] = (uint8_t)(b + pair);
+ ++b;
+ }
+ // Check the second byte of the limit.
+ if((pair & 0xff0000) != 0) {
+ table[limit1] = 0;
+ b = limit1 + 1;
+ if(firstSplitByteRangeIndex < 0) {
+ firstSplitByteRangeIndex = i;
+ }
+ }
+ }
+ while(b <= 0xff) {
+ table[b] = (uint8_t)b;
+ ++b;
+ }
+ if(firstSplitByteRangeIndex < 0) {
+ // The lead byte permutation table alone suffices for reordering.
+ rangesLength = 0;
+ } else {
+ // Remove the ranges below the first split byte.
+ ranges += firstSplitByteRangeIndex;
+ rangesLength -= firstSplitByteRangeIndex;
+ }
+ setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
+}
+
+void
+CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
+ const uint32_t *ranges, int32_t rangesLength,
+ const uint8_t *table, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ int32_t *ownedCodes;
+ int32_t totalLength = codesLength + rangesLength;
+ U_ASSERT(totalLength > 0);
+ if(totalLength <= reorderCodesCapacity) {
+ ownedCodes = const_cast<int32_t *>(reorderCodes);
+ } else {
+ // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
+ int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints
+ ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
+ if(ownedCodes == NULL) {
+ resetReordering();
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if(reorderCodesCapacity != 0) {
+ uprv_free(const_cast<int32_t *>(reorderCodes));
+ }
+ reorderCodes = ownedCodes;
+ reorderCodesCapacity = capacity;
+ }
+ uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
+ uprv_memcpy(ownedCodes, codes, codesLength * 4);
+ uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
+ reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
+ reorderCodesLength = codesLength;
+ reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
+ reorderRangesLength = rangesLength;
+}
+
+void
+CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ if(!other.hasReordering()) {
+ resetReordering();
+ return;
+ }
+ minHighNoReorder = other.minHighNoReorder;
+ if(other.reorderCodesCapacity == 0) {
+ // The reorder arrays are aliased to memory-mapped data.
+ reorderTable = other.reorderTable;
+ reorderRanges = other.reorderRanges;
+ reorderRangesLength = other.reorderRangesLength;
+ reorderCodes = other.reorderCodes;
+ reorderCodesLength = other.reorderCodesLength;
+ } else {
+ setReorderArrays(other.reorderCodes, other.reorderCodesLength,
+ other.reorderRanges, other.reorderRangesLength,
+ other.reorderTable, errorCode);
+ }
+}
+
+UBool
+CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
+ U_ASSERT(table[0] == 0);
+ for(int32_t i = 1; i < 256; ++i) {
+ if(table[i] == 0) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+uint32_t
+CollationSettings::reorderEx(uint32_t p) const {
+ if(p >= minHighNoReorder) { return p; }
+ // Round up p so that its lower 16 bits are >= any offset bits.
+ // Then compare q directly with (limit, offset) pairs.
+ uint32_t q = p | 0xffff;
+ uint32_t r;
+ const uint32_t *ranges = reorderRanges;
+ while(q >= (r = *ranges)) { ++ranges; }
+ return p + (r << 24);
+}
+
+void
+CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ int32_t noStrength = options & ~STRENGTH_MASK;
+ switch(value) {
+ case UCOL_PRIMARY:
+ case UCOL_SECONDARY:
+ case UCOL_TERTIARY:
+ case UCOL_QUATERNARY:
+ case UCOL_IDENTICAL:
+ options = noStrength | (value << STRENGTH_SHIFT);
+ break;
+ case UCOL_DEFAULT:
+ options = noStrength | (defaultOptions & STRENGTH_MASK);
+ break;
+ default:
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+}
+
+void
+CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
+ int32_t defaultOptions, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ switch(value) {
+ case UCOL_ON:
+ options |= bit;
+ break;
+ case UCOL_OFF:
+ options &= ~bit;
+ break;
+ case UCOL_DEFAULT:
+ options = (options & ~bit) | (defaultOptions & bit);
+ break;
+ default:
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+}
+
+void
+CollationSettings::setCaseFirst(UColAttributeValue value,
+ int32_t defaultOptions, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
+ switch(value) {
+ case UCOL_OFF:
+ options = noCaseFirst;
+ break;
+ case UCOL_LOWER_FIRST:
+ options = noCaseFirst | CASE_FIRST;
+ break;
+ case UCOL_UPPER_FIRST:
+ options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
+ break;
+ case UCOL_DEFAULT:
+ options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
+ break;
+ default:
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+}
+
+void
+CollationSettings::setAlternateHandling(UColAttributeValue value,
+ int32_t defaultOptions, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ int32_t noAlternate = options & ~ALTERNATE_MASK;
+ switch(value) {
+ case UCOL_NON_IGNORABLE:
+ options = noAlternate;
+ break;
+ case UCOL_SHIFTED:
+ options = noAlternate | SHIFTED;
+ break;
+ case UCOL_DEFAULT:
+ options = noAlternate | (defaultOptions & ALTERNATE_MASK);
+ break;
+ default:
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+}
+
+void
+CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ int32_t noMax = options & ~MAX_VARIABLE_MASK;
+ switch(value) {
+ case MAX_VAR_SPACE:
+ case MAX_VAR_PUNCT:
+ case MAX_VAR_SYMBOL:
+ case MAX_VAR_CURRENCY:
+ options = noMax | (value << MAX_VARIABLE_SHIFT);
+ break;
+ case UCOL_DEFAULT:
+ options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
+ break;
+ default:
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_COLLATION