diff options
Diffstat (limited to 'intl/icu/source/i18n/affixpatternparser.h')
-rw-r--r-- | intl/icu/source/i18n/affixpatternparser.h | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/affixpatternparser.h b/intl/icu/source/i18n/affixpatternparser.h new file mode 100644 index 000000000..1e534c9f3 --- /dev/null +++ b/intl/icu/source/i18n/affixpatternparser.h @@ -0,0 +1,402 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* affixpatternparser.h +* +* created on: 2015jan06 +* created by: Travis Keep +*/ + +#ifndef __AFFIX_PATTERN_PARSER_H__ +#define __AFFIX_PATTERN_PARSER_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/unistr.h" +#include "unicode/uobject.h" +#include "pluralaffix.h" + +U_NAMESPACE_BEGIN + +class PluralRules; +class FixedPrecision; +class DecimalFormatSymbols; + +/** + * A representation of the various forms of a particular currency according + * to some locale and usage context. + * + * Includes the symbol, ISO code form, and long form(s) of the currency name + * for each plural variation. + */ +class U_I18N_API CurrencyAffixInfo : public UMemory { +public: + /** + * Symbol is \u00a4; ISO form is \u00a4\u00a4; + * long form is \u00a4\u00a4\u00a4. + */ + CurrencyAffixInfo(); + + const UnicodeString &getSymbol() const { return fSymbol; } + const UnicodeString &getISO() const { return fISO; } + const PluralAffix &getLong() const { return fLong; } + void setSymbol(const UnicodeString &symbol) { + fSymbol = symbol; + fIsDefault = FALSE; + } + void setISO(const UnicodeString &iso) { + fISO = iso; + fIsDefault = FALSE; + } + UBool + equals(const CurrencyAffixInfo &other) const { + return (fSymbol == other.fSymbol) + && (fISO == other.fISO) + && (fLong.equals(other.fLong)) + && (fIsDefault == other.fIsDefault); + } + + /** + * Intializes this instance. + * + * @param locale the locale for the currency forms. + * @param rules The plural rules for the locale. + * @param currency the null terminated, 3 character ISO code of the + * currency. If NULL, resets this instance as if it were just created. + * In this case, the first 2 parameters may be NULL as well. + * @param status any error returned here. + */ + void set( + const char *locale, const PluralRules *rules, + const UChar *currency, UErrorCode &status); + + /** + * Returns true if this instance is the default. That is has no real + * currency. For instance never initialized with set() + * or reset with set(NULL, NULL, NULL, status). + */ + UBool isDefault() const { return fIsDefault; } + + /** + * Adjusts the precision used for a particular currency. + * @param currency the null terminated, 3 character ISO code of the + * currency. + * @param usage the usage of the currency + * @param precision min/max fraction digits and rounding increment + * adjusted. + * @params status any error reported here. + */ + static void adjustPrecision( + const UChar *currency, const UCurrencyUsage usage, + FixedPrecision &precision, UErrorCode &status); + +private: + /** + * The symbol form of the currency. + */ + UnicodeString fSymbol; + + /** + * The ISO form of the currency, usually three letter abbreviation. + */ + UnicodeString fISO; + + /** + * The long forms of the currency keyed by plural variation. + */ + PluralAffix fLong; + + UBool fIsDefault; + +}; + +class AffixPatternIterator; + +/** + * A locale agnostic representation of an affix pattern. + */ +class U_I18N_API AffixPattern : public UMemory { +public: + + /** + * The token types that can appear in an affix pattern. + */ + enum ETokenType { + kLiteral, + kPercent, + kPerMill, + kCurrency, + kNegative, + kPositive + }; + + /** + * An empty affix pattern. + */ + AffixPattern() + : tokens(), literals(), hasCurrencyToken(FALSE), + hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) { + } + + /** + * Adds a string literal to this affix pattern. + */ + void addLiteral(const UChar *, int32_t start, int32_t len); + + /** + * Adds a token to this affix pattern. t must not be kLiteral as + * the addLiteral() method adds literals. + * @param t the token type to add + */ + void add(ETokenType t); + + /** + * Adds a currency token with specific count to this affix pattern. + * @param count the token count. Used to distinguish between + * one, two, or three currency symbols. Note that adding a currency + * token with count=2 (Use ISO code) is different than adding two + * currency tokens each with count=1 (two currency symbols). + */ + void addCurrency(uint8_t count); + + /** + * Makes this instance be an empty affix pattern. + */ + void remove(); + + /** + * Provides an iterator over the tokens in this instance. + * @param result this is initialized to point just before the + * first token of this instance. Caller must call nextToken() + * on the iterator once it is set up to have it actually point + * to the first token. This first call to nextToken() will return + * FALSE if the AffixPattern being iterated over is empty. + * @return result + */ + AffixPatternIterator &iterator(AffixPatternIterator &result) const; + + /** + * Returns TRUE if this instance has currency tokens in it. + */ + UBool usesCurrency() const { + return hasCurrencyToken; + } + + UBool usesPercent() const { + return hasPercentToken; + } + + UBool usesPermill() const { + return hasPermillToken; + } + + /** + * Returns the number of code points a string of this instance + * would have if none of the special tokens were escaped. + * Used to compute the padding size. + */ + int32_t countChar32() const { + return char32Count; + } + + /** + * Appends other to this instance mutating this instance in place. + * @param other The pattern appended to the end of this one. + * @return a reference to this instance for chaining. + */ + AffixPattern &append(const AffixPattern &other); + + /** + * Converts this AffixPattern back into a user string. + * It is the inverse of parseUserAffixString. + */ + UnicodeString &toUserString(UnicodeString &appendTo) const; + + /** + * Converts this AffixPattern back into a string. + * It is the inverse of parseAffixString. + */ + UnicodeString &toString(UnicodeString &appendTo) const; + + /** + * Parses an affix pattern string appending it to an AffixPattern. + * Parses affix pattern strings produced from using + * DecimalFormatPatternParser to parse a format pattern. Affix patterns + * include the positive prefix and suffix and the negative prefix + * and suffix. This method expects affix patterns strings to be in the + * same format that DecimalFormatPatternParser produces. Namely special + * characters in the affix that correspond to a field type must be + * prefixed with an apostrophe ('). These special character sequences + * inluce minus (-), percent (%), permile (U+2030), plus (+), + * short currency (U+00a4), medium currency (u+00a4 * 2), + * long currency (u+a4 * 3), and apostrophe (') + * (apostrophe does not correspond to a field type but has to be escaped + * because it itself is the escape character). + * Since the expansion of these special character + * sequences is locale dependent, these sequences are not expanded in + * an AffixPattern instance. + * If these special characters are not prefixed with an apostrophe in + * the affix pattern string, then they are treated verbatim just as + * any other character. If an apostrophe prefixes a non special + * character in the affix pattern, the apostrophe is simply ignored. + * + * @param affixStr the string from DecimalFormatPatternParser + * @param appendTo parsed result appended here. + * @param status any error parsing returned here. + */ + static AffixPattern &parseAffixString( + const UnicodeString &affixStr, + AffixPattern &appendTo, + UErrorCode &status); + + /** + * Parses an affix pattern string appending it to an AffixPattern. + * Parses affix pattern strings as the user would supply them. + * In this function, quoting makes special characters like normal + * characters whereas in parseAffixString, quoting makes special + * characters special. + * + * @param affixStr the string from the user + * @param appendTo parsed result appended here. + * @param status any error parsing returned here. + */ + static AffixPattern &parseUserAffixString( + const UnicodeString &affixStr, + AffixPattern &appendTo, + UErrorCode &status); + + UBool equals(const AffixPattern &other) const { + return (tokens == other.tokens) + && (literals == other.literals) + && (hasCurrencyToken == other.hasCurrencyToken) + && (hasPercentToken == other.hasPercentToken) + && (hasPermillToken == other.hasPermillToken) + && (char32Count == other.char32Count); + } + +private: + /* + * Tokens stored here. Each UChar generally stands for one token. A + * Each token is of form 'etttttttllllllll' llllllll is the length of + * the token and ranges from 0-255. ttttttt is the token type and ranges + * from 0-127. If e is set it means this is an extendo token (to be + * described later). To accomodate token lengths above 255, each normal + * token (e=0) can be followed by 0 or more extendo tokens (e=1) with + * the same type. Right now only kLiteral Tokens have extendo tokens. + * Each extendo token provides the next 8 higher bits for the length. + * If a kLiteral token is followed by 2 extendo tokens then, then the + * llllllll of the next extendo token contains bits 8-15 of the length + * and the last extendo token contains bits 16-23 of the length. + */ + UnicodeString tokens; + + /* + * The characters of the kLiteral tokens are concatenated together here. + * The first characters go with the first kLiteral token, the next + * characters go with the next kLiteral token etc. + */ + UnicodeString literals; + UBool hasCurrencyToken; + UBool hasPercentToken; + UBool hasPermillToken; + int32_t char32Count; + void add(ETokenType t, uint8_t count); + +}; + +/** + * An iterator over the tokens in an AffixPattern instance. + */ +class U_I18N_API AffixPatternIterator : public UMemory { +public: + + /** + * Using an iterator without first calling iterator on an AffixPattern + * instance to initialize the iterator results in + * undefined behavior. + */ + AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { } + /** + * Advances this iterator to the next token. Returns FALSE when there + * are no more tokens. Calling the other methods after nextToken() + * returns FALSE results in undefined behavior. + */ + UBool nextToken(); + + /** + * Returns the type of token. + */ + AffixPattern::ETokenType getTokenType() const; + + /** + * For literal tokens, returns the literal string. Calling this for + * other token types results in undefined behavior. + * @param result replaced with a read-only alias to the literal string. + * @return result + */ + UnicodeString &getLiteral(UnicodeString &result) const; + + /** + * Returns the token length. Usually 1, but for currency tokens may + * be 2 for ISO code and 3 for long form. + */ + int32_t getTokenLength() const; +private: + int32_t nextLiteralIndex; + int32_t lastLiteralLength; + int32_t nextTokenIndex; + const UnicodeString *tokens; + const UnicodeString *literals; + friend class AffixPattern; + AffixPatternIterator(const AffixPatternIterator &); + AffixPatternIterator &operator=(const AffixPatternIterator &); +}; + +/** + * A locale aware class that converts locale independent AffixPattern + * instances into locale dependent PluralAffix instances. + */ +class U_I18N_API AffixPatternParser : public UMemory { +public: +AffixPatternParser(); +AffixPatternParser(const DecimalFormatSymbols &symbols); +void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols); + +/** + * Parses affixPattern appending the result to appendTo. + * @param affixPattern The affix pattern. + * @param currencyAffixInfo contains the currency forms. + * @param appendTo The result of parsing affixPattern is appended here. + * @param status any error returned here. + * @return appendTo. + */ +PluralAffix &parse( + const AffixPattern &affixPattern, + const CurrencyAffixInfo ¤cyAffixInfo, + PluralAffix &appendTo, + UErrorCode &status) const; + +UBool equals(const AffixPatternParser &other) const { + return (fPercent == other.fPercent) + && (fPermill == other.fPermill) + && (fNegative == other.fNegative) + && (fPositive == other.fPositive); +} + +private: +UnicodeString fPercent; +UnicodeString fPermill; +UnicodeString fNegative; +UnicodeString fPositive; +}; + + +U_NAMESPACE_END +#endif /* #if !UCONFIG_NO_FORMATTING */ +#endif // __AFFIX_PATTERN_PARSER_H__ |