summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/affixpatternparser.h
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/affixpatternparser.h')
-rw-r--r--intl/icu/source/i18n/affixpatternparser.h402
1 files changed, 402 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/affixpatternparser.h b/intl/icu/source/i18n/affixpatternparser.h
new file mode 100644
index 000000000..1e534c9f3
--- /dev/null
+++ b/intl/icu/source/i18n/affixpatternparser.h
@@ -0,0 +1,402 @@
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* affixpatternparser.h
+*
+* created on: 2015jan06
+* created by: Travis Keep
+*/
+
+#ifndef __AFFIX_PATTERN_PARSER_H__
+#define __AFFIX_PATTERN_PARSER_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "pluralaffix.h"
+
+U_NAMESPACE_BEGIN
+
+class PluralRules;
+class FixedPrecision;
+class DecimalFormatSymbols;
+
+/**
+ * A representation of the various forms of a particular currency according
+ * to some locale and usage context.
+ *
+ * Includes the symbol, ISO code form, and long form(s) of the currency name
+ * for each plural variation.
+ */
+class U_I18N_API CurrencyAffixInfo : public UMemory {
+public:
+ /**
+ * Symbol is \u00a4; ISO form is \u00a4\u00a4;
+ * long form is \u00a4\u00a4\u00a4.
+ */
+ CurrencyAffixInfo();
+
+ const UnicodeString &getSymbol() const { return fSymbol; }
+ const UnicodeString &getISO() const { return fISO; }
+ const PluralAffix &getLong() const { return fLong; }
+ void setSymbol(const UnicodeString &symbol) {
+ fSymbol = symbol;
+ fIsDefault = FALSE;
+ }
+ void setISO(const UnicodeString &iso) {
+ fISO = iso;
+ fIsDefault = FALSE;
+ }
+ UBool
+ equals(const CurrencyAffixInfo &other) const {
+ return (fSymbol == other.fSymbol)
+ && (fISO == other.fISO)
+ && (fLong.equals(other.fLong))
+ && (fIsDefault == other.fIsDefault);
+ }
+
+ /**
+ * Intializes this instance.
+ *
+ * @param locale the locale for the currency forms.
+ * @param rules The plural rules for the locale.
+ * @param currency the null terminated, 3 character ISO code of the
+ * currency. If NULL, resets this instance as if it were just created.
+ * In this case, the first 2 parameters may be NULL as well.
+ * @param status any error returned here.
+ */
+ void set(
+ const char *locale, const PluralRules *rules,
+ const UChar *currency, UErrorCode &status);
+
+ /**
+ * Returns true if this instance is the default. That is has no real
+ * currency. For instance never initialized with set()
+ * or reset with set(NULL, NULL, NULL, status).
+ */
+ UBool isDefault() const { return fIsDefault; }
+
+ /**
+ * Adjusts the precision used for a particular currency.
+ * @param currency the null terminated, 3 character ISO code of the
+ * currency.
+ * @param usage the usage of the currency
+ * @param precision min/max fraction digits and rounding increment
+ * adjusted.
+ * @params status any error reported here.
+ */
+ static void adjustPrecision(
+ const UChar *currency, const UCurrencyUsage usage,
+ FixedPrecision &precision, UErrorCode &status);
+
+private:
+ /**
+ * The symbol form of the currency.
+ */
+ UnicodeString fSymbol;
+
+ /**
+ * The ISO form of the currency, usually three letter abbreviation.
+ */
+ UnicodeString fISO;
+
+ /**
+ * The long forms of the currency keyed by plural variation.
+ */
+ PluralAffix fLong;
+
+ UBool fIsDefault;
+
+};
+
+class AffixPatternIterator;
+
+/**
+ * A locale agnostic representation of an affix pattern.
+ */
+class U_I18N_API AffixPattern : public UMemory {
+public:
+
+ /**
+ * The token types that can appear in an affix pattern.
+ */
+ enum ETokenType {
+ kLiteral,
+ kPercent,
+ kPerMill,
+ kCurrency,
+ kNegative,
+ kPositive
+ };
+
+ /**
+ * An empty affix pattern.
+ */
+ AffixPattern()
+ : tokens(), literals(), hasCurrencyToken(FALSE),
+ hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) {
+ }
+
+ /**
+ * Adds a string literal to this affix pattern.
+ */
+ void addLiteral(const UChar *, int32_t start, int32_t len);
+
+ /**
+ * Adds a token to this affix pattern. t must not be kLiteral as
+ * the addLiteral() method adds literals.
+ * @param t the token type to add
+ */
+ void add(ETokenType t);
+
+ /**
+ * Adds a currency token with specific count to this affix pattern.
+ * @param count the token count. Used to distinguish between
+ * one, two, or three currency symbols. Note that adding a currency
+ * token with count=2 (Use ISO code) is different than adding two
+ * currency tokens each with count=1 (two currency symbols).
+ */
+ void addCurrency(uint8_t count);
+
+ /**
+ * Makes this instance be an empty affix pattern.
+ */
+ void remove();
+
+ /**
+ * Provides an iterator over the tokens in this instance.
+ * @param result this is initialized to point just before the
+ * first token of this instance. Caller must call nextToken()
+ * on the iterator once it is set up to have it actually point
+ * to the first token. This first call to nextToken() will return
+ * FALSE if the AffixPattern being iterated over is empty.
+ * @return result
+ */
+ AffixPatternIterator &iterator(AffixPatternIterator &result) const;
+
+ /**
+ * Returns TRUE if this instance has currency tokens in it.
+ */
+ UBool usesCurrency() const {
+ return hasCurrencyToken;
+ }
+
+ UBool usesPercent() const {
+ return hasPercentToken;
+ }
+
+ UBool usesPermill() const {
+ return hasPermillToken;
+ }
+
+ /**
+ * Returns the number of code points a string of this instance
+ * would have if none of the special tokens were escaped.
+ * Used to compute the padding size.
+ */
+ int32_t countChar32() const {
+ return char32Count;
+ }
+
+ /**
+ * Appends other to this instance mutating this instance in place.
+ * @param other The pattern appended to the end of this one.
+ * @return a reference to this instance for chaining.
+ */
+ AffixPattern &append(const AffixPattern &other);
+
+ /**
+ * Converts this AffixPattern back into a user string.
+ * It is the inverse of parseUserAffixString.
+ */
+ UnicodeString &toUserString(UnicodeString &appendTo) const;
+
+ /**
+ * Converts this AffixPattern back into a string.
+ * It is the inverse of parseAffixString.
+ */
+ UnicodeString &toString(UnicodeString &appendTo) const;
+
+ /**
+ * Parses an affix pattern string appending it to an AffixPattern.
+ * Parses affix pattern strings produced from using
+ * DecimalFormatPatternParser to parse a format pattern. Affix patterns
+ * include the positive prefix and suffix and the negative prefix
+ * and suffix. This method expects affix patterns strings to be in the
+ * same format that DecimalFormatPatternParser produces. Namely special
+ * characters in the affix that correspond to a field type must be
+ * prefixed with an apostrophe ('). These special character sequences
+ * inluce minus (-), percent (%), permile (U+2030), plus (+),
+ * short currency (U+00a4), medium currency (u+00a4 * 2),
+ * long currency (u+a4 * 3), and apostrophe (')
+ * (apostrophe does not correspond to a field type but has to be escaped
+ * because it itself is the escape character).
+ * Since the expansion of these special character
+ * sequences is locale dependent, these sequences are not expanded in
+ * an AffixPattern instance.
+ * If these special characters are not prefixed with an apostrophe in
+ * the affix pattern string, then they are treated verbatim just as
+ * any other character. If an apostrophe prefixes a non special
+ * character in the affix pattern, the apostrophe is simply ignored.
+ *
+ * @param affixStr the string from DecimalFormatPatternParser
+ * @param appendTo parsed result appended here.
+ * @param status any error parsing returned here.
+ */
+ static AffixPattern &parseAffixString(
+ const UnicodeString &affixStr,
+ AffixPattern &appendTo,
+ UErrorCode &status);
+
+ /**
+ * Parses an affix pattern string appending it to an AffixPattern.
+ * Parses affix pattern strings as the user would supply them.
+ * In this function, quoting makes special characters like normal
+ * characters whereas in parseAffixString, quoting makes special
+ * characters special.
+ *
+ * @param affixStr the string from the user
+ * @param appendTo parsed result appended here.
+ * @param status any error parsing returned here.
+ */
+ static AffixPattern &parseUserAffixString(
+ const UnicodeString &affixStr,
+ AffixPattern &appendTo,
+ UErrorCode &status);
+
+ UBool equals(const AffixPattern &other) const {
+ return (tokens == other.tokens)
+ && (literals == other.literals)
+ && (hasCurrencyToken == other.hasCurrencyToken)
+ && (hasPercentToken == other.hasPercentToken)
+ && (hasPermillToken == other.hasPermillToken)
+ && (char32Count == other.char32Count);
+ }
+
+private:
+ /*
+ * Tokens stored here. Each UChar generally stands for one token. A
+ * Each token is of form 'etttttttllllllll' llllllll is the length of
+ * the token and ranges from 0-255. ttttttt is the token type and ranges
+ * from 0-127. If e is set it means this is an extendo token (to be
+ * described later). To accomodate token lengths above 255, each normal
+ * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
+ * the same type. Right now only kLiteral Tokens have extendo tokens.
+ * Each extendo token provides the next 8 higher bits for the length.
+ * If a kLiteral token is followed by 2 extendo tokens then, then the
+ * llllllll of the next extendo token contains bits 8-15 of the length
+ * and the last extendo token contains bits 16-23 of the length.
+ */
+ UnicodeString tokens;
+
+ /*
+ * The characters of the kLiteral tokens are concatenated together here.
+ * The first characters go with the first kLiteral token, the next
+ * characters go with the next kLiteral token etc.
+ */
+ UnicodeString literals;
+ UBool hasCurrencyToken;
+ UBool hasPercentToken;
+ UBool hasPermillToken;
+ int32_t char32Count;
+ void add(ETokenType t, uint8_t count);
+
+};
+
+/**
+ * An iterator over the tokens in an AffixPattern instance.
+ */
+class U_I18N_API AffixPatternIterator : public UMemory {
+public:
+
+ /**
+ * Using an iterator without first calling iterator on an AffixPattern
+ * instance to initialize the iterator results in
+ * undefined behavior.
+ */
+ AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
+ /**
+ * Advances this iterator to the next token. Returns FALSE when there
+ * are no more tokens. Calling the other methods after nextToken()
+ * returns FALSE results in undefined behavior.
+ */
+ UBool nextToken();
+
+ /**
+ * Returns the type of token.
+ */
+ AffixPattern::ETokenType getTokenType() const;
+
+ /**
+ * For literal tokens, returns the literal string. Calling this for
+ * other token types results in undefined behavior.
+ * @param result replaced with a read-only alias to the literal string.
+ * @return result
+ */
+ UnicodeString &getLiteral(UnicodeString &result) const;
+
+ /**
+ * Returns the token length. Usually 1, but for currency tokens may
+ * be 2 for ISO code and 3 for long form.
+ */
+ int32_t getTokenLength() const;
+private:
+ int32_t nextLiteralIndex;
+ int32_t lastLiteralLength;
+ int32_t nextTokenIndex;
+ const UnicodeString *tokens;
+ const UnicodeString *literals;
+ friend class AffixPattern;
+ AffixPatternIterator(const AffixPatternIterator &);
+ AffixPatternIterator &operator=(const AffixPatternIterator &);
+};
+
+/**
+ * A locale aware class that converts locale independent AffixPattern
+ * instances into locale dependent PluralAffix instances.
+ */
+class U_I18N_API AffixPatternParser : public UMemory {
+public:
+AffixPatternParser();
+AffixPatternParser(const DecimalFormatSymbols &symbols);
+void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
+
+/**
+ * Parses affixPattern appending the result to appendTo.
+ * @param affixPattern The affix pattern.
+ * @param currencyAffixInfo contains the currency forms.
+ * @param appendTo The result of parsing affixPattern is appended here.
+ * @param status any error returned here.
+ * @return appendTo.
+ */
+PluralAffix &parse(
+ const AffixPattern &affixPattern,
+ const CurrencyAffixInfo &currencyAffixInfo,
+ PluralAffix &appendTo,
+ UErrorCode &status) const;
+
+UBool equals(const AffixPatternParser &other) const {
+ return (fPercent == other.fPercent)
+ && (fPermill == other.fPermill)
+ && (fNegative == other.fNegative)
+ && (fPositive == other.fPositive);
+}
+
+private:
+UnicodeString fPercent;
+UnicodeString fPermill;
+UnicodeString fNegative;
+UnicodeString fPositive;
+};
+
+
+U_NAMESPACE_END
+#endif /* #if !UCONFIG_NO_FORMATTING */
+#endif // __AFFIX_PATTERN_PARSER_H__