diff options
Diffstat (limited to 'intl/icu/source/i18n/compactdecimalformat.cpp')
-rw-r--r-- | intl/icu/source/i18n/compactdecimalformat.cpp | 1004 |
1 files changed, 1004 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/compactdecimalformat.cpp b/intl/icu/source/i18n/compactdecimalformat.cpp new file mode 100644 index 000000000..385b3a513 --- /dev/null +++ b/intl/icu/source/i18n/compactdecimalformat.cpp @@ -0,0 +1,1004 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1997-2015, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +* +* File COMPACTDECIMALFORMAT.CPP +* +******************************************************************************** +*/ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "charstr.h" +#include "cstring.h" +#include "digitlst.h" +#include "mutex.h" +#include "unicode/compactdecimalformat.h" +#include "unicode/numsys.h" +#include "unicode/plurrule.h" +#include "unicode/ures.h" +#include "ucln_in.h" +#include "uhash.h" +#include "umutex.h" +#include "unicode/ures.h" +#include "uresimp.h" + +// Maps locale name to CDFLocaleData struct. +static UHashtable* gCompactDecimalData = NULL; +static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER; + +U_NAMESPACE_BEGIN + +static const int32_t MAX_DIGITS = 15; +static const char gOther[] = "other"; +static const char gLatnTag[] = "latn"; +static const char gNumberElementsTag[] = "NumberElements"; +static const char gDecimalFormatTag[] = "decimalFormat"; +static const char gPatternsShort[] = "patternsShort"; +static const char gPatternsLong[] = "patternsLong"; +static const char gLatnPath[] = "NumberElements/latn"; + +static const UChar u_0 = 0x30; +static const UChar u_apos = 0x27; + +static const UChar kZero[] = {u_0}; + +// Used to unescape single quotes. +enum QuoteState { + OUTSIDE, + INSIDE_EMPTY, + INSIDE_FULL +}; + +enum FallbackFlags { + ANY = 0, + MUST = 1, + NOT_ROOT = 2 + // Next one will be 4 then 6 etc. +}; + + +// CDFUnit represents a prefix-suffix pair for a particular variant +// and log10 value. +struct CDFUnit : public UMemory { + UnicodeString prefix; + UnicodeString suffix; + inline CDFUnit() : prefix(), suffix() { + prefix.setToBogus(); + } + inline ~CDFUnit() {} + inline UBool isSet() const { + return !prefix.isBogus(); + } + inline void markAsSet() { + prefix.remove(); + } +}; + +// CDFLocaleStyleData contains formatting data for a particular locale +// and style. +class CDFLocaleStyleData : public UMemory { + public: + // What to divide by for each log10 value when formatting. These values + // will be powers of 10. For English, would be: + // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ... + double divisors[MAX_DIGITS]; + // Maps plural variants to CDFUnit[MAX_DIGITS] arrays. + // To format a number x, + // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]). + // Compute the plural variant for displayNum + // (e.g zero, one, two, few, many, other). + // Compute cdfUnits = unitsByVariant[pluralVariant]. + // Prefix and suffix to use at cdfUnits[log10(x)] + UHashtable* unitsByVariant; + // A flag for whether or not this CDFLocaleStyleData was loaded from the + // Latin numbering system as a fallback from the locale numbering system. + // This value is meaningless if the object is bogus or empty. + UBool fromFallback; + inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) { + uprv_memset(divisors, 0, sizeof(divisors)); + } + ~CDFLocaleStyleData(); + // Init initializes this object. + void Init(UErrorCode& status); + inline UBool isBogus() const { + return unitsByVariant == NULL; + } + void setToBogus(); + UBool isEmpty() { + return unitsByVariant == NULL || unitsByVariant->count == 0; + } + private: + CDFLocaleStyleData(const CDFLocaleStyleData&); + CDFLocaleStyleData& operator=(const CDFLocaleStyleData&); +}; + +// CDFLocaleData contains formatting data for a particular locale. +struct CDFLocaleData : public UMemory { + CDFLocaleStyleData shortData; + CDFLocaleStyleData longData; + inline CDFLocaleData() : shortData(), longData() { } + inline ~CDFLocaleData() { } + // Init initializes this object. + void Init(UErrorCode& status); +}; + +U_NAMESPACE_END + +U_CDECL_BEGIN + +static UBool U_CALLCONV cdf_cleanup(void) { + if (gCompactDecimalData != NULL) { + uhash_close(gCompactDecimalData); + gCompactDecimalData = NULL; + } + return TRUE; +} + +static void U_CALLCONV deleteCDFUnits(void* ptr) { + delete [] (icu::CDFUnit*) ptr; +} + +static void U_CALLCONV deleteCDFLocaleData(void* ptr) { + delete (icu::CDFLocaleData*) ptr; +} + +U_CDECL_END + +U_NAMESPACE_BEGIN + +static UBool divisors_equal(const double* lhs, const double* rhs); +static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status); + +static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status); +static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status); +static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status); +static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status); +static double calculateDivisor(double power10, int32_t numZeros); +static UBool onlySpaces(UnicodeString u); +static void fixQuotes(UnicodeString& s); +static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status); +static void fillInMissing(CDFLocaleStyleData* result); +static int32_t computeLog10(double x, UBool inRange); +static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status); +static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value); + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat) + +CompactDecimalFormat::CompactDecimalFormat( + const DecimalFormat& decimalFormat, + const UHashtable* unitsByVariant, + const double* divisors, + PluralRules* pluralRules) + : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) { +} + +CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source) + : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) { +} + +CompactDecimalFormat* U_EXPORT2 +CompactDecimalFormat::createInstance( + const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) { + LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status)); + if (U_FAILURE(status)) { + return NULL; + } + LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status)); + if (U_FAILURE(status)) { + return NULL; + } + const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status); + if (U_FAILURE(status)) { + return NULL; + } + CompactDecimalFormat* result = + new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias()); + if (result == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + pluralRules.orphan(); + result->setMaximumSignificantDigits(3); + result->setSignificantDigitsUsed(TRUE); + result->setGroupingUsed(FALSE); + return result; +} + +CompactDecimalFormat& +CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) { + if (this != &rhs) { + DecimalFormat::operator=(rhs); + _unitsByVariant = rhs._unitsByVariant; + _divisors = rhs._divisors; + delete _pluralRules; + _pluralRules = rhs._pluralRules->clone(); + } + return *this; +} + +CompactDecimalFormat::~CompactDecimalFormat() { + delete _pluralRules; +} + + +Format* +CompactDecimalFormat::clone(void) const { + return new CompactDecimalFormat(*this); +} + +UBool +CompactDecimalFormat::operator==(const Format& that) const { + if (this == &that) { + return TRUE; + } + return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that)); +} + +UBool +CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const { + return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules); +} + +UnicodeString& +CompactDecimalFormat::format( + double number, + UnicodeString& appendTo, + FieldPosition& pos) const { + UErrorCode status = U_ZERO_ERROR; + return format(number, appendTo, pos, status); +} + +UnicodeString& +CompactDecimalFormat::format( + double number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return appendTo; + } + DigitList orig, rounded; + orig.set(number); + UBool isNegative; + _round(orig, rounded, isNegative, status); + if (U_FAILURE(status)) { + return appendTo; + } + double roundedDouble = rounded.getDouble(); + if (isNegative) { + roundedDouble = -roundedDouble; + } + int32_t baseIdx = computeLog10(roundedDouble, TRUE); + double numberToFormat = roundedDouble / _divisors[baseIdx]; + UnicodeString variant = _pluralRules->select(numberToFormat); + if (isNegative) { + numberToFormat = -numberToFormat; + } + const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx); + appendTo += unit->prefix; + DecimalFormat::format(numberToFormat, appendTo, pos); + appendTo += unit->suffix; + return appendTo; +} + +UnicodeString& +CompactDecimalFormat::format( + double /* number */, + UnicodeString& appendTo, + FieldPositionIterator* /* posIter */, + UErrorCode& status) const { + status = U_UNSUPPORTED_ERROR; + return appendTo; +} + +UnicodeString& +CompactDecimalFormat::format( + int32_t number, + UnicodeString& appendTo, + FieldPosition& pos) const { + return format((double) number, appendTo, pos); +} + +UnicodeString& +CompactDecimalFormat::format( + int32_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const { + return format((double) number, appendTo, pos, status); +} + +UnicodeString& +CompactDecimalFormat::format( + int32_t /* number */, + UnicodeString& appendTo, + FieldPositionIterator* /* posIter */, + UErrorCode& status) const { + status = U_UNSUPPORTED_ERROR; + return appendTo; +} + +UnicodeString& +CompactDecimalFormat::format( + int64_t number, + UnicodeString& appendTo, + FieldPosition& pos) const { + return format((double) number, appendTo, pos); +} + +UnicodeString& +CompactDecimalFormat::format( + int64_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const { + return format((double) number, appendTo, pos, status); +} + +UnicodeString& +CompactDecimalFormat::format( + int64_t /* number */, + UnicodeString& appendTo, + FieldPositionIterator* /* posIter */, + UErrorCode& status) const { + status = U_UNSUPPORTED_ERROR; + return appendTo; +} + +UnicodeString& +CompactDecimalFormat::format( + StringPiece /* number */, + UnicodeString& appendTo, + FieldPositionIterator* /* posIter */, + UErrorCode& status) const { + status = U_UNSUPPORTED_ERROR; + return appendTo; +} + +UnicodeString& +CompactDecimalFormat::format( + const DigitList& /* number */, + UnicodeString& appendTo, + FieldPositionIterator* /* posIter */, + UErrorCode& status) const { + status = U_UNSUPPORTED_ERROR; + return appendTo; +} + +UnicodeString& +CompactDecimalFormat::format(const DigitList& /* number */, + UnicodeString& appendTo, + FieldPosition& /* pos */, + UErrorCode& status) const { + status = U_UNSUPPORTED_ERROR; + return appendTo; +} + +void +CompactDecimalFormat::parse( + const UnicodeString& /* text */, + Formattable& /* result */, + ParsePosition& /* parsePosition */) const { +} + +void +CompactDecimalFormat::parse( + const UnicodeString& /* text */, + Formattable& /* result */, + UErrorCode& status) const { + status = U_UNSUPPORTED_ERROR; +} + +CurrencyAmount* +CompactDecimalFormat::parseCurrency( + const UnicodeString& /* text */, + ParsePosition& /* pos */) const { + return NULL; +} + +void CDFLocaleStyleData::Init(UErrorCode& status) { + if (unitsByVariant != NULL) { + return; + } + unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); + if (U_FAILURE(status)) { + return; + } + uhash_setKeyDeleter(unitsByVariant, uprv_free); + uhash_setValueDeleter(unitsByVariant, deleteCDFUnits); +} + +CDFLocaleStyleData::~CDFLocaleStyleData() { + setToBogus(); +} + +void CDFLocaleStyleData::setToBogus() { + if (unitsByVariant != NULL) { + uhash_close(unitsByVariant); + unitsByVariant = NULL; + } +} + +void CDFLocaleData::Init(UErrorCode& status) { + shortData.Init(status); + if (U_FAILURE(status)) { + return; + } + longData.Init(status); +} + +// Helper method for operator= +static UBool divisors_equal(const double* lhs, const double* rhs) { + for (int32_t i = 0; i < MAX_DIGITS; ++i) { + if (lhs[i] != rhs[i]) { + return FALSE; + } + } + return TRUE; +} + +// getCDFLocaleStyleData returns pointer to formatting data for given locale and +// style within the global cache. On cache miss, getCDFLocaleStyleData loads +// the data from CLDR into the global cache before returning the pointer. If a +// UNUM_LONG data is requested for a locale, and that locale does not have +// UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for +// that locale. +static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) { + if (U_FAILURE(status)) { + return NULL; + } + CDFLocaleData* result = NULL; + const char* key = inLocale.getName(); + { + Mutex lock(&gCompactDecimalMetaLock); + if (gCompactDecimalData == NULL) { + gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); + if (U_FAILURE(status)) { + return NULL; + } + uhash_setKeyDeleter(gCompactDecimalData, uprv_free); + uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData); + ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup); + } else { + result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key); + } + } + if (result != NULL) { + return extractDataByStyleEnum(*result, style, status); + } + + result = loadCDFLocaleData(inLocale, status); + if (U_FAILURE(status)) { + return NULL; + } + + { + Mutex lock(&gCompactDecimalMetaLock); + CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key); + if (temp != NULL) { + delete result; + result = temp; + } else { + uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status); + if (U_FAILURE(status)) { + return NULL; + } + } + } + return extractDataByStyleEnum(*result, style, status); +} + +static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) { + switch (style) { + case UNUM_SHORT: + return &data.shortData; + case UNUM_LONG: + if (!data.longData.isBogus()) { + return &data.longData; + } + return &data.shortData; + default: + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } +} + +// loadCDFLocaleData loads formatting data from CLDR for a given locale. The +// caller owns the returned pointer. +static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) { + if (U_FAILURE(status)) { + return NULL; + } + CDFLocaleData* result = new CDFLocaleData; + if (result == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + result->Init(status); + if (U_FAILURE(status)) { + delete result; + return NULL; + } + + load(inLocale, result, status); + + if (U_FAILURE(status)) { + delete result; + return NULL; + } + return result; +} + +namespace { + +struct CmptDecDataSink : public ResourceSink { + + CDFLocaleData& dataBundle; // Where to save values when they are read + UBool isLatin; // Whether or not we are traversing the Latin tree + UBool isFallback; // Whether or not we are traversing the Latin tree as fallback + + enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG }; + enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT }; + + /* + * NumberElements{ <-- top (numbering system table) + * latn{ <-- patternsTable (one per numbering system) + * patternsLong{ <-- formatsTable (one per pattern) + * decimalFormat{ <-- powersOfTenTable (one per format) + * 1000{ <-- pluralVariantsTable (one per power of ten) + * one{"0 thousand"} <-- plural variant and template + */ + + CmptDecDataSink(CDFLocaleData& _dataBundle) + : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {} + virtual ~CmptDecDataSink(); + + virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) { + // SPECIAL CASE: Don't consume root in the non-Latin numbering system + if (isRoot && !isLatin) { return; } + + ResourceTable patternsTable = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) { + + // Check for patternsShort or patternsLong + EPatternsTableKey patternsTableKey; + if (uprv_strcmp(key, gPatternsShort) == 0) { + patternsTableKey = PATTERNS_SHORT; + } else if (uprv_strcmp(key, gPatternsLong) == 0) { + patternsTableKey = PATTERNS_LONG; + } else { + continue; + } + + // Traverse into the formats table + ResourceTable formatsTable = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) { + + // Check for decimalFormat or currencyFormat + EFormatsTableKey formatsTableKey; + if (uprv_strcmp(key, gDecimalFormatTag) == 0) { + formatsTableKey = DECIMAL_FORMAT; + // TODO: Enable this statement when currency support is added + // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) { + // formatsTableKey = CURRENCY_FORMAT; + } else { + continue; + } + + // Set the current style and destination based on the two keys + UNumberCompactStyle style; + CDFLocaleStyleData* destination = NULL; + if (patternsTableKey == PATTERNS_LONG + && formatsTableKey == DECIMAL_FORMAT) { + style = UNUM_LONG; + destination = &dataBundle.longData; + } else if (patternsTableKey == PATTERNS_SHORT + && formatsTableKey == DECIMAL_FORMAT) { + style = UNUM_SHORT; + destination = &dataBundle.shortData; + // TODO: Enable the following statements when currency support is added + // } else if (patternsTableKey == PATTERNS_SHORT + // && formatsTableKey == CURRENCY_FORMAT) { + // style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named + // destination = &dataBundle.shortCurrencyData; + // } else { + // // Silently ignore this case + // continue; + } + + // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE: + // 1) Don't consume longData if shortData was consumed from the non-Latin + // locale numbering system + // 2) Don't consume longData for the first time if this is the root bundle and + // shortData is already populated from a more specific locale. Note that if + // both longData and shortData are both only in root, longData will be + // consumed since it is alphabetically before shortData in the bundle. + if (isFallback + && style == UNUM_LONG + && !dataBundle.shortData.isEmpty() + && !dataBundle.shortData.fromFallback) { + continue; + } + if (isRoot + && style == UNUM_LONG + && dataBundle.longData.isEmpty() + && !dataBundle.shortData.isEmpty()) { + continue; + } + + // Set the "fromFallback" flag on the data object + destination->fromFallback = isFallback; + + // Traverse into the powers of ten table + ResourceTable powersOfTenTable = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) { + + // The key will always be some even power of 10. e.g 10000. + char* endPtr = NULL; + double power10 = uprv_strtod(key, &endPtr); + if (*endPtr != 0) { + errorCode = U_INTERNAL_PROGRAM_ERROR; + return; + } + int32_t log10Value = computeLog10(power10, FALSE); + + // Silently ignore divisors that are too big. + if (log10Value >= MAX_DIGITS) continue; + + // Iterate over the plural variants ("one", "other", etc) + ResourceTable pluralVariantsTable = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { + const char* pluralVariant = key; + const UnicodeString formatStr = value.getUnicodeString(errorCode); + + // Copy the data into the in-memory data bundle (do not overwrite + // existing values) + int32_t numZeros = populatePrefixSuffix( + pluralVariant, log10Value, formatStr, + destination->unitsByVariant, FALSE, errorCode); + + // If populatePrefixSuffix returns -1, it means that this key has been + // encountered already. + if (numZeros < 0) { + continue; + } + + // Set the divisor, which is based on the number of zeros in the template + // string. If the divisor from here is different from the one previously + // stored, it means that the number of zeros in different plural variants + // differs; throw an exception. + // TODO: How should I check for floating-point errors here? + // Is there a good reason why "divisor" is double and not long like Java? + double divisor = calculateDivisor(power10, numZeros); + if (destination->divisors[log10Value] != 0.0 + && destination->divisors[log10Value] != divisor) { + errorCode = U_INTERNAL_PROGRAM_ERROR; + return; + } + destination->divisors[log10Value] = divisor; + } + } + } + } + } +}; + +// Virtual destructors must be defined out of line. +CmptDecDataSink::~CmptDecDataSink() {} + +} // namespace + +static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) { + LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status)); + if (U_FAILURE(status)) { + return; + } + const char* nsName = ns->getName(); + + LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status)); + if (U_FAILURE(status)) { + return; + } + CmptDecDataSink sink(*result); + sink.isFallback = FALSE; + + // First load the number elements data if nsName is not Latin. + if (uprv_strcmp(nsName, gLatnTag) != 0) { + sink.isLatin = FALSE; + CharString path; + path.append(gNumberElementsTag, status) + .append('/', status) + .append(nsName, status); + ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status); + if (status == U_MISSING_RESOURCE_ERROR) { + // Silently ignore and use Latin + status = U_ZERO_ERROR; + } else if (U_FAILURE(status)) { + return; + } + sink.isFallback = TRUE; + } + + // Now load Latin. + sink.isLatin = TRUE; + ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status); + if (U_FAILURE(status)) return; + + // If longData is empty, default it to be equal to shortData + if (result->longData.isEmpty()) { + result->longData.setToBogus(); + } + + // Check for "other" variants in each of the three data classes, and resolve missing elements. + + if (!result->longData.isBogus()) { + checkForOtherVariants(&result->longData, status); + if (U_FAILURE(status)) return; + fillInMissing(&result->longData); + } + + checkForOtherVariants(&result->shortData, status); + if (U_FAILURE(status)) return; + fillInMissing(&result->shortData); + + // TODO: Enable this statement when currency support is added + // checkForOtherVariants(&result->shortCurrencyData, status); + // if (U_FAILURE(status)) return; + // fillInMissing(&result->shortCurrencyData); +} + +// populatePrefixSuffix Adds a specific prefix-suffix pair to result for a +// given variant and log10 value. +// variant is 'zero', 'one', 'two', 'few', 'many', or 'other'. +// formatStr is the format string from which the prefix and suffix are +// extracted. It is usually of form 'Pefix 000 suffix'. +// populatePrefixSuffix returns the number of 0's found in formatStr +// before the decimal point. +// In the special case that formatStr contains only spaces for prefix +// and suffix, populatePrefixSuffix returns log10Value + 1. +static int32_t populatePrefixSuffix( + const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) { + if (U_FAILURE(status)) { + return 0; + } + int32_t firstIdx = formatStr.indexOf(kZero, UPRV_LENGTHOF(kZero), 0); + // We must have 0's in format string. + if (firstIdx == -1) { + status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + int32_t lastIdx = formatStr.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx); + CDFUnit* unit = createCDFUnit(variant, log10Value, result, status); + if (U_FAILURE(status)) { + return 0; + } + + // Return -1 if we are not overwriting an existing value + if (unit->isSet() && !overwrite) { + return -1; + } + unit->markAsSet(); + + // Everything up to first 0 is the prefix + unit->prefix = formatStr.tempSubString(0, firstIdx); + fixQuotes(unit->prefix); + // Everything beyond the last 0 is the suffix + unit->suffix = formatStr.tempSubString(lastIdx + 1); + fixQuotes(unit->suffix); + + // If there is effectively no prefix or suffix, ignore the actual number of + // 0's and act as if the number of 0's matches the size of the number. + if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) { + return log10Value + 1; + } + + // Calculate number of zeros before decimal point + int32_t idx = firstIdx + 1; + while (idx <= lastIdx && formatStr.charAt(idx) == u_0) { + ++idx; + } + return (idx - firstIdx); +} + +// Calculate a divisor based on the magnitude and number of zeros in the +// template string. +static double calculateDivisor(double power10, int32_t numZeros) { + double divisor = power10; + for (int32_t i = 1; i < numZeros; ++i) { + divisor /= 10.0; + } + return divisor; +} + +static UBool onlySpaces(UnicodeString u) { + return u.trim().length() == 0; +} + +// fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j. +// Modifies s in place. +static void fixQuotes(UnicodeString& s) { + QuoteState state = OUTSIDE; + int32_t len = s.length(); + int32_t dest = 0; + for (int32_t i = 0; i < len; ++i) { + UChar ch = s.charAt(i); + if (ch == u_apos) { + if (state == INSIDE_EMPTY) { + s.setCharAt(dest, ch); + ++dest; + } + } else { + s.setCharAt(dest, ch); + ++dest; + } + + // Update state + switch (state) { + case OUTSIDE: + state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE; + break; + case INSIDE_EMPTY: + case INSIDE_FULL: + state = ch == u_apos ? OUTSIDE : INSIDE_FULL; + break; + default: + break; + } + } + s.truncate(dest); +} + +// Checks to make sure that an "other" variant is present in all +// powers of 10. +static void checkForOtherVariants(CDFLocaleStyleData* result, + UErrorCode& status) { + if (result == NULL || result->unitsByVariant == NULL) { + return; + } + + const CDFUnit* otherByBase = + (const CDFUnit*) uhash_get(result->unitsByVariant, gOther); + if (otherByBase == NULL) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + + // Check all other plural variants, and make sure that if + // any of them are populated, then other is also populated + int32_t pos = UHASH_FIRST; + const UHashElement* element; + while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) { + CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer; + if (variantsByBase == otherByBase) continue; + for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) { + if (variantsByBase[log10Value].isSet() + && !otherByBase[log10Value].isSet()) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + } + } +} + +// fillInMissing ensures that the data in result is complete. +// result data is complete if for each variant in result, there exists +// a prefix-suffix pair for each log10 value and there also exists +// a divisor for each log10 value. +// +// First this function figures out for which log10 values, the other +// variant already had data. These are the same log10 values defined +// in CLDR. +// +// For each log10 value not defined in CLDR, it uses the divisor for +// the last defined log10 value or 1. +// +// Then for each variant, it does the following. For each log10 +// value not defined in CLDR, copy the prefix-suffix pair from the +// previous log10 value. If log10 value is defined in CLDR but is +// missing from given variant, copy the prefix-suffix pair for that +// log10 value from the 'other' variant. +static void fillInMissing(CDFLocaleStyleData* result) { + const CDFUnit* otherUnits = + (const CDFUnit*) uhash_get(result->unitsByVariant, gOther); + UBool definedInCLDR[MAX_DIGITS]; + double lastDivisor = 1.0; + for (int32_t i = 0; i < MAX_DIGITS; ++i) { + if (!otherUnits[i].isSet()) { + result->divisors[i] = lastDivisor; + definedInCLDR[i] = FALSE; + } else { + lastDivisor = result->divisors[i]; + definedInCLDR[i] = TRUE; + } + } + // Iterate over each variant. + int32_t pos = UHASH_FIRST; + const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos); + for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) { + CDFUnit* units = (CDFUnit*) element->value.pointer; + for (int32_t i = 0; i < MAX_DIGITS; ++i) { + if (definedInCLDR[i]) { + if (!units[i].isSet()) { + units[i] = otherUnits[i]; + } + } else { + if (i == 0) { + units[0].markAsSet(); + } else { + units[i] = units[i - 1]; + } + } + } + } +} + +// computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest +// value computeLog10 will return MAX_DIGITS -1 even for +// numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return +// up to MAX_DIGITS. +static int32_t computeLog10(double x, UBool inRange) { + int32_t result = 0; + int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS; + while (x >= 10.0) { + x /= 10.0; + ++result; + if (result == max) { + break; + } + } + return result; +} + +// createCDFUnit returns a pointer to the prefix-suffix pair for a given +// variant and log10 value within table. If no such prefix-suffix pair is +// stored in table, one is created within table before returning pointer. +static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) { + if (U_FAILURE(status)) { + return NULL; + } + CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant); + if (cdfUnit == NULL) { + cdfUnit = new CDFUnit[MAX_DIGITS]; + if (cdfUnit == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uhash_put(table, uprv_strdup(variant), cdfUnit, &status); + if (U_FAILURE(status)) { + return NULL; + } + } + CDFUnit* result = &cdfUnit[log10Value]; + return result; +} + +// getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given +// variant and log10 value within table. If the given variant doesn't exist, it +// falls back to the OTHER variant. Therefore, this method will always return +// some non-NULL value. +static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) { + CharString cvariant; + UErrorCode status = U_ZERO_ERROR; + const CDFUnit *cdfUnit = NULL; + cvariant.appendInvariantChars(variant, status); + if (!U_FAILURE(status)) { + cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data()); + } + if (cdfUnit == NULL) { + cdfUnit = (const CDFUnit*) uhash_get(table, gOther); + } + return &cdfUnit[log10Value]; +} + +U_NAMESPACE_END +#endif |