diff options
Diffstat (limited to 'intl/icu/source/i18n/affixpatternparser.cpp')
-rw-r--r-- | intl/icu/source/i18n/affixpatternparser.cpp | 696 |
1 files changed, 696 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/affixpatternparser.cpp b/intl/icu/source/i18n/affixpatternparser.cpp new file mode 100644 index 000000000..2ca22a150 --- /dev/null +++ b/intl/icu/source/i18n/affixpatternparser.cpp @@ -0,0 +1,696 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + * Copyright (C) 2015, International Business Machines + * Corporation and others. All Rights Reserved. + * + * file name: affixpatternparser.cpp + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/dcfmtsym.h" +#include "unicode/plurrule.h" +#include "unicode/ucurr.h" +#include "affixpatternparser.h" +#include "charstr.h" +#include "precision.h" +#include "uassert.h" +#include "unistrappender.h" + + static UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4}; + +static UChar gPercent = 0x25; +static UChar gPerMill = 0x2030; +static UChar gNegative = 0x2D; +static UChar gPositive = 0x2B; + +#define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF))) + +#define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F)) + +#define UNPACK_LONG(c) (((c) >> 8) & 0x80) + +#define UNPACK_LENGTH(c) ((c) & 0xFF) + +U_NAMESPACE_BEGIN + +static int32_t +nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { + if (buffer[idx] != 0x27 || idx + 1 == len) { + *token = buffer[idx]; + return 1; + } + *token = buffer[idx + 1]; + if (buffer[idx + 1] == 0xA4) { + int32_t i = 2; + for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i) + ; + return i; + } + return 2; +} + +static int32_t +nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { + *token = buffer[idx]; + int32_t max; + switch (buffer[idx]) { + case 0x27: + max = 2; + break; + case 0xA4: + max = 3; + break; + default: + max = 1; + break; + } + int32_t i = 1; + for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i) + ; + return i; +} + +CurrencyAffixInfo::CurrencyAffixInfo() + : fSymbol(gDefaultSymbols, 1), + fISO(gDefaultSymbols, 2), + fLong(DigitAffix(gDefaultSymbols, 3)), + fIsDefault(TRUE) { +} + +void +CurrencyAffixInfo::set( + const char *locale, + const PluralRules *rules, + const UChar *currency, + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + fIsDefault = FALSE; + if (currency == NULL) { + fSymbol.setTo(gDefaultSymbols, 1); + fISO.setTo(gDefaultSymbols, 2); + fLong.remove(); + fLong.append(gDefaultSymbols, 3); + fIsDefault = TRUE; + return; + } + int32_t len; + UBool unusedIsChoice; + const UChar *symbol = ucurr_getName( + currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice, + &len, &status); + if (U_FAILURE(status)) { + return; + } + fSymbol.setTo(symbol, len); + fISO.setTo(currency, u_strlen(currency)); + fLong.remove(); + StringEnumeration* keywords = rules->getKeywords(status); + if (U_FAILURE(status)) { + return; + } + const UnicodeString* pluralCount; + while ((pluralCount = keywords->snext(status)) != NULL) { + CharString pCount; + pCount.appendInvariantChars(*pluralCount, status); + const UChar *pluralName = ucurr_getPluralName( + currency, locale, &unusedIsChoice, pCount.data(), + &len, &status); + fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status); + } + delete keywords; +} + +void +CurrencyAffixInfo::adjustPrecision( + const UChar *currency, const UCurrencyUsage usage, + FixedPrecision &precision, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + + int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage( + currency, usage, &status); + precision.fMin.setFracDigitCount(digitCount); + precision.fMax.setFracDigitCount(digitCount); + double increment = ucurr_getRoundingIncrementForUsage( + currency, usage, &status); + if (increment == 0.0) { + precision.fRoundingIncrement.clear(); + } else { + precision.fRoundingIncrement.set(increment); + // guard against round-off error + precision.fRoundingIncrement.round(6); + } +} + +void +AffixPattern::addLiteral( + const UChar *literal, int32_t start, int32_t len) { + char32Count += u_countChar32(literal + start, len); + literals.append(literal, start, len); + int32_t tlen = tokens.length(); + // Takes 4 UChars to encode maximum literal length. + UChar *tokenChars = tokens.getBuffer(tlen + 4); + + // find start of literal size. May be tlen if there is no literal. + // While finding start of literal size, compute literal length + int32_t literalLength = 0; + int32_t tLiteralStart = tlen; + while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) { + tLiteralStart--; + literalLength <<= 8; + literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]); + } + // Add number of chars we just added to literal + literalLength += len; + + // Now encode the new length starting at tLiteralStart + tlen = tLiteralStart; + tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF); + literalLength >>= 8; + while (literalLength) { + tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF); + literalLength >>= 8; + } + tokens.releaseBuffer(tlen); +} + +void +AffixPattern::add(ETokenType t) { + add(t, 1); +} + +void +AffixPattern::addCurrency(uint8_t count) { + add(kCurrency, count); +} + +void +AffixPattern::add(ETokenType t, uint8_t count) { + U_ASSERT(t != kLiteral); + char32Count += count; + switch (t) { + case kCurrency: + hasCurrencyToken = TRUE; + break; + case kPercent: + hasPercentToken = TRUE; + break; + case kPerMill: + hasPermillToken = TRUE; + break; + default: + // Do nothing + break; + } + tokens.append(PACK_TOKEN_AND_LENGTH(t, count)); +} + +AffixPattern & +AffixPattern::append(const AffixPattern &other) { + AffixPatternIterator iter; + other.iterator(iter); + UnicodeString literal; + while (iter.nextToken()) { + switch (iter.getTokenType()) { + case kLiteral: + iter.getLiteral(literal); + addLiteral(literal.getBuffer(), 0, literal.length()); + break; + case kCurrency: + addCurrency(iter.getTokenLength()); + break; + default: + add(iter.getTokenType()); + break; + } + } + return *this; +} + +void +AffixPattern::remove() { + tokens.remove(); + literals.remove(); + hasCurrencyToken = FALSE; + hasPercentToken = FALSE; + hasPermillToken = FALSE; + char32Count = 0; +} + +// escapes literals for strings where special characters are NOT escaped +// except for apostrophe. +static void escapeApostropheInLiteral( + const UnicodeString &literal, UnicodeStringAppender &appender) { + int32_t len = literal.length(); + const UChar *buffer = literal.getBuffer(); + for (int32_t i = 0; i < len; ++i) { + UChar ch = buffer[i]; + switch (ch) { + case 0x27: + appender.append((UChar) 0x27); + appender.append((UChar) 0x27); + break; + default: + appender.append(ch); + break; + } + } +} + + +// escapes literals for user strings where special characters in literals +// are escaped with apostrophe. +static void escapeLiteral( + const UnicodeString &literal, UnicodeStringAppender &appender) { + int32_t len = literal.length(); + const UChar *buffer = literal.getBuffer(); + for (int32_t i = 0; i < len; ++i) { + UChar ch = buffer[i]; + switch (ch) { + case 0x27: + appender.append((UChar) 0x27); + appender.append((UChar) 0x27); + break; + case 0x25: + appender.append((UChar) 0x27); + appender.append((UChar) 0x25); + appender.append((UChar) 0x27); + break; + case 0x2030: + appender.append((UChar) 0x27); + appender.append((UChar) 0x2030); + appender.append((UChar) 0x27); + break; + case 0xA4: + appender.append((UChar) 0x27); + appender.append((UChar) 0xA4); + appender.append((UChar) 0x27); + break; + case 0x2D: + appender.append((UChar) 0x27); + appender.append((UChar) 0x2D); + appender.append((UChar) 0x27); + break; + case 0x2B: + appender.append((UChar) 0x27); + appender.append((UChar) 0x2B); + appender.append((UChar) 0x27); + break; + default: + appender.append(ch); + break; + } + } +} + +UnicodeString & +AffixPattern::toString(UnicodeString &appendTo) const { + AffixPatternIterator iter; + iterator(iter); + UnicodeStringAppender appender(appendTo); + UnicodeString literal; + while (iter.nextToken()) { + switch (iter.getTokenType()) { + case kLiteral: + escapeApostropheInLiteral(iter.getLiteral(literal), appender); + break; + case kPercent: + appender.append((UChar) 0x27); + appender.append((UChar) 0x25); + break; + case kPerMill: + appender.append((UChar) 0x27); + appender.append((UChar) 0x2030); + break; + case kCurrency: + { + appender.append((UChar) 0x27); + int32_t cl = iter.getTokenLength(); + for (int32_t i = 0; i < cl; ++i) { + appender.append((UChar) 0xA4); + } + } + break; + case kNegative: + appender.append((UChar) 0x27); + appender.append((UChar) 0x2D); + break; + case kPositive: + appender.append((UChar) 0x27); + appender.append((UChar) 0x2B); + break; + default: + U_ASSERT(FALSE); + break; + } + } + return appendTo; +} + +UnicodeString & +AffixPattern::toUserString(UnicodeString &appendTo) const { + AffixPatternIterator iter; + iterator(iter); + UnicodeStringAppender appender(appendTo); + UnicodeString literal; + while (iter.nextToken()) { + switch (iter.getTokenType()) { + case kLiteral: + escapeLiteral(iter.getLiteral(literal), appender); + break; + case kPercent: + appender.append((UChar) 0x25); + break; + case kPerMill: + appender.append((UChar) 0x2030); + break; + case kCurrency: + { + int32_t cl = iter.getTokenLength(); + for (int32_t i = 0; i < cl; ++i) { + appender.append((UChar) 0xA4); + } + } + break; + case kNegative: + appender.append((UChar) 0x2D); + break; + case kPositive: + appender.append((UChar) 0x2B); + break; + default: + U_ASSERT(FALSE); + break; + } + } + return appendTo; +} + +class AffixPatternAppender : public UMemory { +public: + AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { } + + inline void append(UChar x) { + if (fIdx == UPRV_LENGTHOF(fBuffer)) { + fDest->addLiteral(fBuffer, 0, fIdx); + fIdx = 0; + } + fBuffer[fIdx++] = x; + } + + inline void append(UChar32 x) { + if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) { + fDest->addLiteral(fBuffer, 0, fIdx); + fIdx = 0; + } + U16_APPEND_UNSAFE(fBuffer, fIdx, x); + } + + inline void flush() { + if (fIdx) { + fDest->addLiteral(fBuffer, 0, fIdx); + } + fIdx = 0; + } + + /** + * flush the buffer when we go out of scope. + */ + ~AffixPatternAppender() { + flush(); + } +private: + AffixPattern *fDest; + int32_t fIdx; + UChar fBuffer[32]; + AffixPatternAppender(const AffixPatternAppender &other); + AffixPatternAppender &operator=(const AffixPatternAppender &other); +}; + + +AffixPattern & +AffixPattern::parseUserAffixString( + const UnicodeString &affixStr, + AffixPattern &appendTo, + UErrorCode &status) { + if (U_FAILURE(status)) { + return appendTo; + } + int32_t len = affixStr.length(); + const UChar *buffer = affixStr.getBuffer(); + // 0 = not quoted; 1 = quoted. + int32_t state = 0; + AffixPatternAppender appender(appendTo); + for (int32_t i = 0; i < len; ) { + UChar token; + int32_t tokenSize = nextUserToken(buffer, i, len, &token); + i += tokenSize; + if (token == 0x27 && tokenSize == 1) { // quote + state = 1 - state; + continue; + } + if (state == 0) { + switch (token) { + case 0x25: + appender.flush(); + appendTo.add(kPercent, 1); + break; + case 0x27: // double quote + appender.append((UChar) 0x27); + break; + case 0x2030: + appender.flush(); + appendTo.add(kPerMill, 1); + break; + case 0x2D: + appender.flush(); + appendTo.add(kNegative, 1); + break; + case 0x2B: + appender.flush(); + appendTo.add(kPositive, 1); + break; + case 0xA4: + appender.flush(); + appendTo.add(kCurrency, tokenSize); + break; + default: + appender.append(token); + break; + } + } else { + switch (token) { + case 0x27: // double quote + appender.append((UChar) 0x27); + break; + case 0xA4: // included b/c tokenSize can be > 1 + for (int32_t j = 0; j < tokenSize; ++j) { + appender.append((UChar) 0xA4); + } + break; + default: + appender.append(token); + break; + } + } + } + return appendTo; +} + +AffixPattern & +AffixPattern::parseAffixString( + const UnicodeString &affixStr, + AffixPattern &appendTo, + UErrorCode &status) { + if (U_FAILURE(status)) { + return appendTo; + } + int32_t len = affixStr.length(); + const UChar *buffer = affixStr.getBuffer(); + for (int32_t i = 0; i < len; ) { + UChar token; + int32_t tokenSize = nextToken(buffer, i, len, &token); + if (tokenSize == 1) { + int32_t literalStart = i; + ++i; + while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) { + ++i; + } + appendTo.addLiteral(buffer, literalStart, i - literalStart); + + // If we reached end of string, we are done + if (i == len) { + return appendTo; + } + } + i += tokenSize; + switch (token) { + case 0x25: + appendTo.add(kPercent, 1); + break; + case 0x2030: + appendTo.add(kPerMill, 1); + break; + case 0x2D: + appendTo.add(kNegative, 1); + break; + case 0x2B: + appendTo.add(kPositive, 1); + break; + case 0xA4: + { + if (tokenSize - 1 > 3) { + status = U_PARSE_ERROR; + return appendTo; + } + appendTo.add(kCurrency, tokenSize - 1); + } + break; + default: + appendTo.addLiteral(&token, 0, 1); + break; + } + } + return appendTo; +} + +AffixPatternIterator & +AffixPattern::iterator(AffixPatternIterator &result) const { + result.nextLiteralIndex = 0; + result.lastLiteralLength = 0; + result.nextTokenIndex = 0; + result.tokens = &tokens; + result.literals = &literals; + return result; +} + +UBool +AffixPatternIterator::nextToken() { + int32_t tlen = tokens->length(); + if (nextTokenIndex == tlen) { + return FALSE; + } + ++nextTokenIndex; + const UChar *tokenBuffer = tokens->getBuffer(); + if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) == + AffixPattern::kLiteral) { + while (nextTokenIndex < tlen && + UNPACK_LONG(tokenBuffer[nextTokenIndex])) { + ++nextTokenIndex; + } + lastLiteralLength = 0; + int32_t i = nextTokenIndex - 1; + for (; UNPACK_LONG(tokenBuffer[i]); --i) { + lastLiteralLength <<= 8; + lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); + } + lastLiteralLength <<= 8; + lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); + nextLiteralIndex += lastLiteralLength; + } + return TRUE; +} + +AffixPattern::ETokenType +AffixPatternIterator::getTokenType() const { + return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1)); +} + +UnicodeString & +AffixPatternIterator::getLiteral(UnicodeString &result) const { + const UChar *buffer = literals->getBuffer(); + result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength); + return result; +} + +int32_t +AffixPatternIterator::getTokenLength() const { + const UChar *tokenBuffer = tokens->getBuffer(); + AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]); + return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]); +} + +AffixPatternParser::AffixPatternParser() + : fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) { +} + +AffixPatternParser::AffixPatternParser( + const DecimalFormatSymbols &symbols) { + setDecimalFormatSymbols(symbols); +} + +void +AffixPatternParser::setDecimalFormatSymbols( + const DecimalFormatSymbols &symbols) { + fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); + fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); + fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); + fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); +} + +PluralAffix & +AffixPatternParser::parse( + const AffixPattern &affixPattern, + const CurrencyAffixInfo ¤cyAffixInfo, + PluralAffix &appendTo, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return appendTo; + } + AffixPatternIterator iter; + affixPattern.iterator(iter); + UnicodeString literal; + while (iter.nextToken()) { + switch (iter.getTokenType()) { + case AffixPattern::kPercent: + appendTo.append(fPercent, UNUM_PERCENT_FIELD); + break; + case AffixPattern::kPerMill: + appendTo.append(fPermill, UNUM_PERMILL_FIELD); + break; + case AffixPattern::kNegative: + appendTo.append(fNegative, UNUM_SIGN_FIELD); + break; + case AffixPattern::kPositive: + appendTo.append(fPositive, UNUM_SIGN_FIELD); + break; + case AffixPattern::kCurrency: + switch (iter.getTokenLength()) { + case 1: + appendTo.append( + currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD); + break; + case 2: + appendTo.append( + currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD); + break; + case 3: + appendTo.append( + currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status); + break; + default: + U_ASSERT(FALSE); + break; + } + break; + case AffixPattern::kLiteral: + appendTo.append(iter.getLiteral(literal)); + break; + default: + U_ASSERT(FALSE); + break; + } + } + return appendTo; +} + + +U_NAMESPACE_END +#endif /* #if !UCONFIG_NO_FORMATTING */ |