diff options
Diffstat (limited to 'intl/icu/source/i18n/unicode')
78 files changed, 52439 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/unicode/alphaindex.h b/intl/icu/source/i18n/unicode/alphaindex.h new file mode 100644 index 000000000..c7b361ff9 --- /dev/null +++ b/intl/icu/source/i18n/unicode/alphaindex.h @@ -0,0 +1,754 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2011-2014 International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +*/ + +#ifndef INDEXCHARS_H +#define INDEXCHARS_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/locid.h" + +#if !UCONFIG_NO_COLLATION + +/** + * \file + * \brief C++ API: Index Characters + */ + +U_CDECL_BEGIN + +/** + * Constants for Alphabetic Index Label Types. + * The form of these enum constants anticipates having a plain C API + * for Alphabetic Indexes that will also use them. + * @stable ICU 4.8 + */ +typedef enum UAlphabeticIndexLabelType { + /** + * Normal Label, typically the starting letter of the names + * in the bucket with this label. + * @stable ICU 4.8 + */ + U_ALPHAINDEX_NORMAL = 0, + + /** + * Undeflow Label. The bucket with this label contains names + * in scripts that sort before any of the bucket labels in this index. + * @stable ICU 4.8 + */ + U_ALPHAINDEX_UNDERFLOW = 1, + + /** + * Inflow Label. The bucket with this label contains names + * in scripts that sort between two of the bucket labels in this index. + * Inflow labels are created when an index contains normal labels for + * multiple scripts, and skips other scripts that sort between some of the + * included scripts. + * @stable ICU 4.8 + */ + U_ALPHAINDEX_INFLOW = 2, + + /** + * Overflow Label. Te bucket with this label contains names in scripts + * that sort after all of the bucket labels in this index. + * @stable ICU 4.8 + */ + U_ALPHAINDEX_OVERFLOW = 3 +} UAlphabeticIndexLabelType; + + +struct UHashtable; +U_CDECL_END + +U_NAMESPACE_BEGIN + +// Forward Declarations + +class BucketList; +class Collator; +class RuleBasedCollator; +class StringEnumeration; +class UnicodeSet; +class UVector; + +/** + * AlphabeticIndex supports the creation of a UI index appropriate for a given language. + * It can support either direct use, or use with a client that doesn't support localized collation. + * The following is an example of what an index might look like in a UI: + * + * <pre> + * <b>... A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ...</b> + * + * <b>A</b> + * Addison + * Albertson + * Azensky + * <b>B</b> + * Baker + * ... + * </pre> + * + * The class can generate a list of labels for use as a UI "index", that is, a list of + * clickable characters (or character sequences) that allow the user to see a segment + * (bucket) of a larger "target" list. That is, each label corresponds to a bucket in + * the target list, where everything in the bucket is greater than or equal to the character + * (according to the locale's collation). Strings can be added to the index; + * they will be in sorted order in the right bucket. + * <p> + * The class also supports having buckets for strings before the first (underflow), + * after the last (overflow), and between scripts (inflow). For example, if the index + * is constructed with labels for Russian and English, Greek characters would fall + * into an inflow bucket between the other two scripts. + * <p> + * The AlphabeticIndex class is not intended for public subclassing. + * + * <p><em>Note:</em> If you expect to have a lot of ASCII or Latin characters + * as well as characters from the user's language, + * then it is a good idea to call addLabels(Locale::getEnglish(), status).</p> + * + * <h2>Direct Use</h2> + * <p>The following shows an example of building an index directly. + * The "show..." methods below are just to illustrate usage. + * + * <pre> + * // Create a simple index. "Item" is assumed to be an application + * // defined type that the application's UI and other processing knows about, + * // and that has a name. + * + * UErrorCode status = U_ZERO_ERROR; + * AlphabeticIndex index = new AlphabeticIndex(desiredLocale, status); + * index->addLabels(additionalLocale, status); + * for (Item *item in some source of Items ) { + * index->addRecord(item->name(), item, status); + * } + * ... + * // Show index at top. We could skip or gray out empty buckets + * + * while (index->nextBucket(status)) { + * if (showAll || index->getBucketRecordCount() != 0) { + * showLabelAtTop(UI, index->getBucketLabel()); + * } + * } + * ... + * // Show the buckets with their contents, skipping empty buckets + * + * index->resetBucketIterator(status); + * while (index->nextBucket(status)) { + * if (index->getBucketRecordCount() != 0) { + * showLabelInList(UI, index->getBucketLabel()); + * while (index->nextRecord(status)) { + * showIndexedItem(UI, static_cast<Item *>(index->getRecordData())) + * </pre> + * + * The caller can build different UIs using this class. + * For example, an index character could be omitted or grayed-out + * if its bucket is empty. Small buckets could also be combined based on size, such as: + * + * <pre> + * <b>... A-F G-N O-Z ...</b> + * </pre> + * + * <h2>Client Support</h2> + * <p>Callers can also use the AlphabeticIndex::ImmutableIndex, or the AlphabeticIndex itself, + * to support sorting on a client that doesn't support AlphabeticIndex functionality. + * + * <p>The ImmutableIndex is both immutable and thread-safe. + * The corresponding AlphabeticIndex methods are not thread-safe because + * they "lazily" build the index buckets. + * <ul> + * <li>ImmutableIndex.getBucket(index) provides random access to all + * buckets and their labels and label types. + * <li>The AlphabeticIndex bucket iterator or ImmutableIndex.getBucket(0..getBucketCount-1) + * can be used to get a list of the labels, + * such as "...", "A", "B",..., and send that list to the client. + * <li>When the client has a new name, it sends that name to the server. + * The server needs to call the following methods, + * and communicate the bucketIndex and collationKey back to the client. + * + * <pre> + * int32_t bucketIndex = index.getBucketIndex(name, status); + * const UnicodeString &label = immutableIndex.getBucket(bucketIndex)->getLabel(); // optional + * int32_t skLength = collator.getSortKey(name, sk, skCapacity); + * </pre> + * + * <li>The client would put the name (and associated information) into its bucket for bucketIndex. The sort key sk is a + * sequence of bytes that can be compared with a binary compare, and produce the right localized result.</li> + * </ul> + * + * @stable ICU 4.8 + */ +class U_I18N_API AlphabeticIndex: public UObject { +public: + /** + * An index "bucket" with a label string and type. + * It is referenced by getBucketIndex(), + * and returned by ImmutableIndex.getBucket(). + * + * The Bucket class is not intended for public subclassing. + * @stable ICU 51 + */ + class U_I18N_API Bucket : public UObject { + public: + /** + * Destructor. + * @stable ICU 51 + */ + virtual ~Bucket(); + + /** + * Returns the label string. + * + * @return the label string for the bucket + * @stable ICU 51 + */ + const UnicodeString &getLabel() const { return label_; } + /** + * Returns whether this bucket is a normal, underflow, overflow, or inflow bucket. + * + * @return the bucket label type + * @stable ICU 51 + */ + UAlphabeticIndexLabelType getLabelType() const { return labelType_; } + + private: + friend class AlphabeticIndex; + friend class BucketList; + + UnicodeString label_; + UnicodeString lowerBoundary_; + UAlphabeticIndexLabelType labelType_; + Bucket *displayBucket_; + int32_t displayIndex_; + UVector *records_; // Records are owned by the inputList_ vector. + + Bucket(const UnicodeString &label, // Parameter strings are copied. + const UnicodeString &lowerBoundary, + UAlphabeticIndexLabelType type); + }; + + /** + * Immutable, thread-safe version of AlphabeticIndex. + * This class provides thread-safe methods for bucketing, + * and random access to buckets and their properties, + * but does not offer adding records to the index. + * + * The ImmutableIndex class is not intended for public subclassing. + * + * @stable ICU 51 + */ + class U_I18N_API ImmutableIndex : public UObject { + public: + /** + * Destructor. + * @stable ICU 51 + */ + virtual ~ImmutableIndex(); + + /** + * Returns the number of index buckets and labels, including underflow/inflow/overflow. + * + * @return the number of index buckets + * @stable ICU 51 + */ + int32_t getBucketCount() const; + + /** + * Finds the index bucket for the given name and returns the number of that bucket. + * Use getBucket() to get the bucket's properties. + * + * @param name the string to be sorted into an index bucket + * @return the bucket number for the name + * @stable ICU 51 + */ + int32_t getBucketIndex(const UnicodeString &name, UErrorCode &errorCode) const; + + /** + * Returns the index-th bucket. Returns NULL if the index is out of range. + * + * @param index bucket number + * @return the index-th bucket + * @stable ICU 51 + */ + const Bucket *getBucket(int32_t index) const; + + private: + friend class AlphabeticIndex; + + ImmutableIndex(BucketList *bucketList, Collator *collatorPrimaryOnly) + : buckets_(bucketList), collatorPrimaryOnly_(collatorPrimaryOnly) {} + + BucketList *buckets_; + Collator *collatorPrimaryOnly_; + }; + + /** + * Construct an AlphabeticIndex object for the specified locale. If the locale's + * data does not include index characters, a set of them will be + * synthesized based on the locale's exemplar characters. The locale + * determines the sorting order for both the index characters and the + * user item names appearing under each Index character. + * + * @param locale the desired locale. + * @param status Error code, will be set with the reason if the construction + * of the AlphabeticIndex object fails. + * @stable ICU 4.8 + */ + AlphabeticIndex(const Locale &locale, UErrorCode &status); + + /** + * Construct an AlphabeticIndex that uses a specific collator. + * + * The index will be created with no labels; the addLabels() function must be called + * after creation to add the desired labels to the index. + * + * The index adopts the collator, and is responsible for deleting it. + * The caller should make no further use of the collator after creating the index. + * + * @param collator The collator to use to order the contents of this index. + * @param status Error code, will be set with the reason if the + * operation fails. + * @stable ICU 51 + */ + AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status); + + /** + * Add Labels to this Index. The labels are additions to those + * that are already in the index; they do not replace the existing + * ones. + * @param additions The additional characters to add to the index, such as A-Z. + * @param status Error code, will be set with the reason if the + * operation fails. + * @return this, for chaining + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status); + + /** + * Add the index characters from a Locale to the index. The labels + * are added to those that are already in the index; they do not replace the + * existing index characters. The collation order for this index is not + * changed; it remains that of the locale that was originally specified + * when creating this Index. + * + * @param locale The locale whose index characters are to be added. + * @param status Error code, will be set with the reason if the + * operation fails. + * @return this, for chaining + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status); + + /** + * Destructor + * @stable ICU 4.8 + */ + virtual ~AlphabeticIndex(); + + /** + * Builds an immutable, thread-safe version of this instance, without data records. + * + * @return an immutable index instance + * @stable ICU 51 + */ + ImmutableIndex *buildImmutableIndex(UErrorCode &errorCode); + + /** + * Get the Collator that establishes the ordering of the items in this index. + * Ownership of the collator remains with the AlphabeticIndex instance. + * + * The returned collator is a reference to the internal collator used by this + * index. It may be safely used to compare the names of items or to get + * sort keys for names. However if any settings need to be changed, + * or other non-const methods called, a cloned copy must be made first. + * + * @return The collator + * @stable ICU 4.8 + */ + virtual const RuleBasedCollator &getCollator() const; + + + /** + * Get the default label used for abbreviated buckets <i>between</i> other index characters. + * For example, consider the labels when Latin and Greek are used: + * X Y Z ... Α Β Γ. + * + * @return inflow label + * @stable ICU 4.8 + */ + virtual const UnicodeString &getInflowLabel() const; + + /** + * Set the default label used for abbreviated buckets <i>between</i> other index characters. + * An inflow label will be automatically inserted if two otherwise-adjacent label characters + * are from different scripts, e.g. Latin and Cyrillic, and a third script, e.g. Greek, + * sorts between the two. The default inflow character is an ellipsis (...) + * + * @param inflowLabel the new Inflow label. + * @param status Error code, will be set with the reason if the operation fails. + * @return this + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status); + + + /** + * Get the special label used for items that sort after the last normal label, + * and that would not otherwise have an appropriate label. + * + * @return the overflow label + * @stable ICU 4.8 + */ + virtual const UnicodeString &getOverflowLabel() const; + + + /** + * Set the label used for items that sort after the last normal label, + * and that would not otherwise have an appropriate label. + * + * @param overflowLabel the new overflow label. + * @param status Error code, will be set with the reason if the operation fails. + * @return this + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status); + + /** + * Get the special label used for items that sort before the first normal label, + * and that would not otherwise have an appropriate label. + * + * @return underflow label + * @stable ICU 4.8 + */ + virtual const UnicodeString &getUnderflowLabel() const; + + /** + * Set the label used for items that sort before the first normal label, + * and that would not otherwise have an appropriate label. + * + * @param underflowLabel the new underflow label. + * @param status Error code, will be set with the reason if the operation fails. + * @return this + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status); + + + /** + * Get the limit on the number of labels permitted in the index. + * The number does not include over, under and inflow labels. + * + * @return maxLabelCount maximum number of labels. + * @stable ICU 4.8 + */ + virtual int32_t getMaxLabelCount() const; + + /** + * Set a limit on the number of labels permitted in the index. + * The number does not include over, under and inflow labels. + * Currently, if the number is exceeded, then every + * nth item is removed to bring the count down. + * A more sophisticated mechanism may be available in the future. + * + * @param maxLabelCount the maximum number of labels. + * @param status error code + * @return This, for chaining + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status); + + + /** + * Add a record to the index. Each record will be associated with an index Bucket + * based on the record's name. The list of records for each bucket will be sorted + * based on the collation ordering of the names in the index's locale. + * Records with duplicate names are permitted; they will be kept in the order + * that they were added. + * + * @param name The display name for the Record. The Record will be placed in + * a bucket based on this name. + * @param data An optional pointer to user data associated with this + * item. When iterating the contents of a bucket, both the + * data pointer the name will be available for each Record. + * @param status Error code, will be set with the reason if the operation fails. + * @return This, for chaining. + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status); + + /** + * Remove all Records from the Index. The set of Buckets, which define the headings under + * which records are classified, is not altered. + * + * @param status Error code, will be set with the reason if the operation fails. + * @return This, for chaining. + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &clearRecords(UErrorCode &status); + + + /** Get the number of labels in this index. + * Note: may trigger lazy index construction. + * + * @param status Error code, will be set with the reason if the operation fails. + * @return The number of labels in this index, including any under, over or + * in-flow labels. + * @stable ICU 4.8 + */ + virtual int32_t getBucketCount(UErrorCode &status); + + + /** Get the total number of Records in this index, that is, the number + * of <name, data> pairs added. + * + * @param status Error code, will be set with the reason if the operation fails. + * @return The number of records in this index, that is, the total number + * of (name, data) items added with addRecord(). + * @stable ICU 4.8 + */ + virtual int32_t getRecordCount(UErrorCode &status); + + + + /** + * Given the name of a record, return the zero-based index of the Bucket + * in which the item should appear. The name need not be in the index. + * A Record will not be added to the index by this function. + * Bucket numbers are zero-based, in Bucket iteration order. + * + * @param itemName The name whose bucket position in the index is to be determined. + * @param status Error code, will be set with the reason if the operation fails. + * @return The bucket number for this name. + * @stable ICU 4.8 + * + */ + virtual int32_t getBucketIndex(const UnicodeString &itemName, UErrorCode &status); + + + /** + * Get the zero based index of the current Bucket from an iteration + * over the Buckets of this index. Return -1 if no iteration is in process. + * @return the index of the current Bucket + * @stable ICU 4.8 + */ + virtual int32_t getBucketIndex() const; + + + /** + * Advance the iteration over the Buckets of this index. Return FALSE if + * there are no more Buckets. + * + * @param status Error code, will be set with the reason if the operation fails. + * U_ENUM_OUT_OF_SYNC_ERROR will be reported if the index is modified while + * an enumeration of its contents are in process. + * + * @return TRUE if success, FALSE if at end of iteration + * @stable ICU 4.8 + */ + virtual UBool nextBucket(UErrorCode &status); + + /** + * Return the name of the Label of the current bucket from an iteration over the buckets. + * If the iteration is before the first Bucket (nextBucket() has not been called), + * or after the last, return an empty string. + * + * @return the bucket label. + * @stable ICU 4.8 + */ + virtual const UnicodeString &getBucketLabel() const; + + /** + * Return the type of the label for the current Bucket (selected by the + * iteration over Buckets.) + * + * @return the label type. + * @stable ICU 4.8 + */ + virtual UAlphabeticIndexLabelType getBucketLabelType() const; + + /** + * Get the number of <name, data> Records in the current Bucket. + * If the current bucket iteration position is before the first label or after the + * last, return 0. + * + * @return the number of Records. + * @stable ICU 4.8 + */ + virtual int32_t getBucketRecordCount() const; + + + /** + * Reset the Bucket iteration for this index. The next call to nextBucket() + * will restart the iteration at the first label. + * + * @param status Error code, will be set with the reason if the operation fails. + * @return this, for chaining. + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status); + + /** + * Advance to the next record in the current Bucket. + * When nextBucket() is called, Record iteration is reset to just before the + * first Record in the new Bucket. + * + * @param status Error code, will be set with the reason if the operation fails. + * U_ENUM_OUT_OF_SYNC_ERROR will be reported if the index is modified while + * an enumeration of its contents are in process. + * @return TRUE if successful, FALSE when the iteration advances past the last item. + * @stable ICU 4.8 + */ + virtual UBool nextRecord(UErrorCode &status); + + /** + * Get the name of the current Record. + * Return an empty string if the Record iteration position is before first + * or after the last. + * + * @return The name of the current index item. + * @stable ICU 4.8 + */ + virtual const UnicodeString &getRecordName() const; + + + /** + * Return the data pointer of the Record currently being iterated over. + * Return NULL if the current iteration position before the first item in this Bucket, + * or after the last. + * + * @return The current Record's data pointer. + * @stable ICU 4.8 + */ + virtual const void *getRecordData() const; + + + /** + * Reset the Record iterator position to before the first Record in the current Bucket. + * + * @return This, for chaining. + * @stable ICU 4.8 + */ + virtual AlphabeticIndex &resetRecordIterator(); + +private: + /** + * No Copy constructor. + * @internal + */ + AlphabeticIndex(const AlphabeticIndex &other); + + /** + * No assignment. + */ + AlphabeticIndex &operator =(const AlphabeticIndex & /*other*/) { return *this;}; + + /** + * No Equality operators. + * @internal + */ + virtual UBool operator==(const AlphabeticIndex& other) const; + + /** + * Inequality operator. + * @internal + */ + virtual UBool operator!=(const AlphabeticIndex& other) const; + + // Common initialization, for use from all constructors. + void init(const Locale *locale, UErrorCode &status); + + /** + * This method is called to get the index exemplars. Normally these come from the locale directly, + * but if they aren't available, we have to synthesize them. + */ + void addIndexExemplars(const Locale &locale, UErrorCode &status); + /** + * Add Chinese index characters from the tailoring. + */ + UBool addChineseIndexCharacters(UErrorCode &errorCode); + + UVector *firstStringsInScript(UErrorCode &status); + + static UnicodeString separated(const UnicodeString &item); + + /** + * Determine the best labels to use. + * This is based on the exemplars, but we also process to make sure that they are unique, + * and sort differently, and that the overall list is small enough. + */ + void initLabels(UVector &indexCharacters, UErrorCode &errorCode) const; + BucketList *createBucketList(UErrorCode &errorCode) const; + void initBuckets(UErrorCode &errorCode); + void clearBuckets(); + void internalResetBucketIterator(); + +public: + + // The Record is declared public only to allow access from + // implementation code written in plain C. + // It is not intended for public use. + +#ifndef U_HIDE_INTERNAL_API + /** + * A (name, data) pair, to be sorted by name into one of the index buckets. + * The user data is not used by the index implementation. + * @internal + */ + struct Record: public UMemory { + const UnicodeString name_; + const void *data_; + Record(const UnicodeString &name, const void *data); + ~Record(); + }; +#endif /* U_HIDE_INTERNAL_API */ + +private: + + /** + * Holds all user records before they are distributed into buckets. + * Type of contents is (Record *) + * @internal + */ + UVector *inputList_; + + int32_t labelsIterIndex_; // Index of next item to return. + int32_t itemsIterIndex_; + Bucket *currentBucket_; // While an iteration of the index in underway, + // point to the bucket for the current label. + // NULL when no iteration underway. + + int32_t maxLabelCount_; // Limit on # of labels permitted in the index. + + UnicodeSet *initialLabels_; // Initial (unprocessed) set of Labels. Union + // of those explicitly set by the user plus + // those from locales. Raw values, before + // crunching into bucket labels. + + UVector *firstCharsInScripts_; // The first character from each script, + // in collation order. + + RuleBasedCollator *collator_; + RuleBasedCollator *collatorPrimaryOnly_; + + // Lazy evaluated: null means that we have not built yet. + BucketList *buckets_; + + UnicodeString inflowLabel_; + UnicodeString overflowLabel_; + UnicodeString underflowLabel_; + UnicodeString overflowComparisonString_; + + UnicodeString emptyString_; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_COLLATION +#endif diff --git a/intl/icu/source/i18n/unicode/basictz.h b/intl/icu/source/i18n/unicode/basictz.h new file mode 100644 index 000000000..90073b23a --- /dev/null +++ b/intl/icu/source/i18n/unicode/basictz.h @@ -0,0 +1,216 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2013, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ +#ifndef BASICTZ_H +#define BASICTZ_H + +/** + * \file + * \brief C++ API: ICU TimeZone base class + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/timezone.h" +#include "unicode/tzrule.h" +#include "unicode/tztrans.h" + +U_NAMESPACE_BEGIN + +// forward declarations +class UVector; + +/** + * <code>BasicTimeZone</code> is an abstract class extending <code>TimeZone</code>. + * This class provides some additional methods to access time zone transitions and rules. + * All ICU <code>TimeZone</code> concrete subclasses extend this class. + * @stable ICU 3.8 + */ +class U_I18N_API BasicTimeZone: public TimeZone { +public: + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~BasicTimeZone(); + + /** + * Gets the first time zone transition after the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the first transition after the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const = 0; + + /** + * Gets the most recent time zone transition before the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the most recent transition before the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const = 0; + + /** + * Checks if the time zone has equivalent transitions in the time range. + * This method returns true when all of transition times, from/to standard + * offsets and DST savings used by this time zone match the other in the + * time range. + * @param tz The <code>BasicTimeZone</code> object to be compared with. + * @param start The start time of the evaluated time range (inclusive) + * @param end The end time of the evaluated time range (inclusive) + * @param ignoreDstAmount + * When true, any transitions with only daylight saving amount + * changes will be ignored, except either of them is zero. + * For example, a transition from rawoffset 3:00/dstsavings 1:00 + * to rawoffset 2:00/dstsavings 2:00 is excluded from the comparison, + * but a transtion from rawoffset 2:00/dstsavings 1:00 to + * rawoffset 3:00/dstsavings 0:00 is included. + * @param ec Output param to filled in with a success or an error. + * @return true if the other time zone has the equivalent transitions in the + * time range. + * @stable ICU 3.8 + */ + virtual UBool hasEquivalentTransitions(const BasicTimeZone& tz, UDate start, UDate end, + UBool ignoreDstAmount, UErrorCode& ec) const; + + /** + * Returns the number of <code>TimeZoneRule</code>s which represents time transitions, + * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except + * <code>InitialTimeZoneRule</code>. The return value range is 0 or any positive value. + * @param status Receives error status code. + * @return The number of <code>TimeZoneRule</code>s representing time transitions. + * @stable ICU 3.8 + */ + virtual int32_t countTransitionRules(UErrorCode& status) const = 0; + + /** + * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code> + * which represent time transitions for this time zone. On successful return, + * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and + * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code> + * instances up to the size specified by trscount. The results are referencing the + * rule instance held by this time zone instance. Therefore, after this time zone + * is destructed, they are no longer available. + * @param initial Receives the initial timezone rule + * @param trsrules Receives the timezone transition rules + * @param trscount On input, specify the size of the array 'transitions' receiving + * the timezone transition rules. On output, actual number of + * rules filled in the array will be set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const = 0; + + /** + * Gets the set of time zone rules valid at the specified time. Some known external time zone + * implementations are not capable to handle historic time zone rule changes. Also some + * implementations can only handle certain type of rule definitions. + * If this time zone does not use any daylight saving time within about 1 year from the specified + * time, only the <code>InitialTimeZone</code> is returned. Otherwise, the rule for standard + * time and daylight saving time transitions are returned in addition to the + * <code>InitialTimeZoneRule</code>. The standard and daylight saving time transition rules are + * represented by <code>AnnualTimeZoneRule</code> with <code>DateTimeRule::DOW</code> for its date + * rule and <code>DateTimeRule::WALL_TIME</code> for its time rule. Because daylight saving time + * rule is changing time to time in many time zones and also mapping a transition time rule to + * different type is lossy transformation, the set of rules returned by this method may be valid + * for short period of time. + * The time zone rule objects returned by this method is owned by the caller, so the caller is + * responsible for deleting them after use. + * @param date The date used for extracting time zone rules. + * @param initial Receives the <code>InitialTimeZone</code>, always not NULL. + * @param std Receives the <code>AnnualTimeZoneRule</code> for standard time transitions. + * When this time time zone does not observe daylight saving times around the + * specified date, NULL is set. + * @param dst Receives the <code>AnnualTimeZoneRule</code> for daylight saving time + * transitions. When this time zone does not observer daylight saving times + * around the specified date, NULL is set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial, + AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) const; + + +#ifndef U_HIDE_INTERNAL_API + /** + * The time type option bit flags used by getOffsetFromLocal + * @internal + */ + enum { + kStandard = 0x01, + kDaylight = 0x03, + kFormer = 0x04, + kLatter = 0x0C + }; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Get time zone offsets from local wall time. + * @internal + */ + virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const; + +protected: + +#ifndef U_HIDE_INTERNAL_API + /** + * The time type option bit masks used by getOffsetFromLocal + * @internal + */ + enum { + kStdDstMask = kDaylight, + kFormerLatterMask = kLatter + }; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Default constructor. + * @stable ICU 3.8 + */ + BasicTimeZone(); + + /** + * Construct a timezone with a given ID. + * @param id a system time zone ID + * @stable ICU 3.8 + */ + BasicTimeZone(const UnicodeString &id); + + /** + * Copy constructor. + * @param source the object to be copied. + * @stable ICU 3.8 + */ + BasicTimeZone(const BasicTimeZone& source); + + /** + * Gets the set of TimeZoneRule instances applicable to the specified time and after. + * @param start The start date used for extracting time zone rules + * @param initial Receives the InitialTimeZone, always not NULL + * @param transitionRules Receives the transition rules, could be NULL + * @param status Receives error status code + */ + void getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, UVector*& transitionRules, + UErrorCode& status) const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // BASICTZ_H + +//eof diff --git a/intl/icu/source/i18n/unicode/calendar.h b/intl/icu/source/i18n/unicode/calendar.h new file mode 100644 index 000000000..8776e21d5 --- /dev/null +++ b/intl/icu/source/i18n/unicode/calendar.h @@ -0,0 +1,2551 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File CALENDAR.H +* +* Modification History: +* +* Date Name Description +* 04/22/97 aliu Expanded and corrected comments and other header +* contents. +* 05/01/97 aliu Made equals(), before(), after() arguments const. +* 05/20/97 aliu Replaced fAreFieldsSet with fAreFieldsInSync and +* fAreAllFieldsSet. +* 07/27/98 stephen Sync up with JDK 1.2 +* 11/15/99 weiv added YEAR_WOY and DOW_LOCAL +* to EDateFields +* 8/19/2002 srl Removed Javaisms +* 11/07/2003 srl Update, clean up documentation. +******************************************************************************** +*/ + +#ifndef CALENDAR_H +#define CALENDAR_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Calendar object + */ +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" +#include "unicode/locid.h" +#include "unicode/timezone.h" +#include "unicode/ucal.h" +#include "unicode/umisc.h" + +U_NAMESPACE_BEGIN + +class ICUServiceFactory; + +/** + * @internal + */ +typedef int32_t UFieldResolutionTable[12][8]; + +class BasicTimeZone; +/** + * <code>Calendar</code> is an abstract base class for converting between + * a <code>UDate</code> object and a set of integer fields such as + * <code>YEAR</code>, <code>MONTH</code>, <code>DAY</code>, <code>HOUR</code>, + * and so on. (A <code>UDate</code> object represents a specific instant in + * time with millisecond precision. See UDate + * for information about the <code>UDate</code> class.) + * + * <p> + * Subclasses of <code>Calendar</code> interpret a <code>UDate</code> + * according to the rules of a specific calendar system. + * The most commonly used subclass of <code>Calendar</code> is + * <code>GregorianCalendar</code>. Other subclasses could represent + * the various types of lunar calendars in use in many parts of the world. + * + * <p> + * <b>NOTE</b>: (ICU 2.6) The subclass interface should be considered unstable + * - it WILL change. + * + * <p> + * Like other locale-sensitive classes, <code>Calendar</code> provides a + * static method, <code>createInstance</code>, for getting a generally useful + * object of this type. <code>Calendar</code>'s <code>createInstance</code> method + * returns the appropriate <code>Calendar</code> subclass whose + * time fields have been initialized with the current date and time: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * Calendar *rightNow = Calendar::createInstance(errCode); + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * <p> + * A <code>Calendar</code> object can produce all the time field values + * needed to implement the date-time formatting for a particular language + * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional). + * + * <p> + * When computing a <code>UDate</code> from time fields, some special circumstances + * may arise: there may be insufficient information to compute the + * <code>UDate</code> (such as only year and month but no day in the month), + * there may be inconsistent information (such as "Tuesday, July 15, 1996" + * -- July 15, 1996 is actually a Monday), or the input time might be ambiguous + * because of time zone transition. + * + * <p> + * <strong>Insufficient information.</strong> The calendar will use default + * information to specify the missing fields. This may vary by calendar; for + * the Gregorian calendar, the default for a field is the same as that of the + * start of the epoch: i.e., YEAR = 1970, MONTH = JANUARY, DATE = 1, etc. + * + * <p> + * <strong>Inconsistent information.</strong> If fields conflict, the calendar + * will give preference to fields set more recently. For example, when + * determining the day, the calendar will look for one of the following + * combinations of fields. The most recent combination, as determined by the + * most recently set single field, will be used. + * + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * MONTH + DAY_OF_MONTH + * MONTH + WEEK_OF_MONTH + DAY_OF_WEEK + * MONTH + DAY_OF_WEEK_IN_MONTH + DAY_OF_WEEK + * DAY_OF_YEAR + * DAY_OF_WEEK + WEEK_OF_YEAR + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * For the time of day: + * + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * HOUR_OF_DAY + * AM_PM + HOUR + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * <p> + * <strong>Ambiguous Wall Clock Time.</strong> When time offset from UTC has + * changed, it produces an ambiguous time slot around the transition. For example, + * many US locations observe daylight saving time. On the date switching to daylight + * saving time in US, wall clock time jumps from 12:59 AM (standard) to 2:00 AM + * (daylight). Therefore, wall clock time from 1:00 AM to 1:59 AM do not exist on + * the date. When the input wall time fall into this missing time slot, the ICU + * Calendar resolves the time using the UTC offset before the transition by default. + * In this example, 1:30 AM is interpreted as 1:30 AM standard time (non-exist), + * so the final result will be 2:30 AM daylight time. + * + * <p>On the date switching back to standard time, wall clock time is moved back one + * hour at 2:00 AM. So wall clock time from 1:00 AM to 1:59 AM occur twice. In this + * case, the ICU Calendar resolves the time using the UTC offset after the transition + * by default. For example, 1:30 AM on the date is resolved as 1:30 AM standard time. + * + * <p>Ambiguous wall clock time resolution behaviors can be customized by Calendar APIs + * {@link #setRepeatedWallTimeOption} and {@link #setSkippedWallTimeOption}. + * These methods are available in ICU 49 or later versions. + * + * <p> + * <strong>Note:</strong> for some non-Gregorian calendars, different + * fields may be necessary for complete disambiguation. For example, a full + * specification of the historial Arabic astronomical calendar requires year, + * month, day-of-month <em>and</em> day-of-week in some cases. + * + * <p> + * <strong>Note:</strong> There are certain possible ambiguities in + * interpretation of certain singular times, which are resolved in the + * following ways: + * <ol> + * <li> 24:00:00 "belongs" to the following day. That is, + * 23:59 on Dec 31, 1969 < 24:00 on Jan 1, 1970 < 24:01:00 on Jan 1, 1970 + * + * <li> Although historically not precise, midnight also belongs to "am", + * and noon belongs to "pm", so on the same day, + * 12:00 am (midnight) < 12:01 am, and 12:00 pm (noon) < 12:01 pm + * </ol> + * + * <p> + * The date or time format strings are not part of the definition of a + * calendar, as those must be modifiable or overridable by the user at + * runtime. Use {@link DateFormat} + * to format dates. + * + * <p> + * <code>Calendar</code> provides an API for field "rolling", where fields + * can be incremented or decremented, but wrap around. For example, rolling the + * month up in the date <code>December 12, <b>1996</b></code> results in + * <code>January 12, <b>1996</b></code>. + * + * <p> + * <code>Calendar</code> also provides a date arithmetic function for + * adding the specified (signed) amount of time to a particular time field. + * For example, subtracting 5 days from the date <code>September 12, 1996</code> + * results in <code>September 7, 1996</code>. + * + * <p><big><b>Supported range</b></big> + * + * <p>The allowable range of <code>Calendar</code> has been + * narrowed. <code>GregorianCalendar</code> used to attempt to support + * the range of dates with millisecond values from + * <code>Long.MIN_VALUE</code> to <code>Long.MAX_VALUE</code>. + * The new <code>Calendar</code> protocol specifies the + * maximum range of supportable dates as those having Julian day numbers + * of <code>-0x7F000000</code> to <code>+0x7F000000</code>. This + * corresponds to years from ~5,800,000 BCE to ~5,800,000 CE. Programmers + * should use the protected constants in <code>Calendar</code> to + * specify an extremely early or extremely late date.</p> + * + * @stable ICU 2.0 + */ +class U_I18N_API Calendar : public UObject { +public: + + /** + * Field IDs for date and time. Used to specify date/time fields. ERA is calendar + * specific. Example ranges given are for illustration only; see specific Calendar + * subclasses for actual ranges. + * @deprecated ICU 2.6. Use C enum UCalendarDateFields defined in ucal.h + */ + enum EDateFields { +#ifndef U_HIDE_DEPRECATED_API +/* + * ERA may be defined on other platforms. To avoid any potential problems undefined it here. + */ +#ifdef ERA +#undef ERA +#endif + ERA, // Example: 0..1 + YEAR, // Example: 1..big number + MONTH, // Example: 0..11 + WEEK_OF_YEAR, // Example: 1..53 + WEEK_OF_MONTH, // Example: 1..4 + DATE, // Example: 1..31 + DAY_OF_YEAR, // Example: 1..365 + DAY_OF_WEEK, // Example: 1..7 + DAY_OF_WEEK_IN_MONTH, // Example: 1..4, may be specified as -1 + AM_PM, // Example: 0..1 + HOUR, // Example: 0..11 + HOUR_OF_DAY, // Example: 0..23 + MINUTE, // Example: 0..59 + SECOND, // Example: 0..59 + MILLISECOND, // Example: 0..999 + ZONE_OFFSET, // Example: -12*U_MILLIS_PER_HOUR..12*U_MILLIS_PER_HOUR + DST_OFFSET, // Example: 0 or U_MILLIS_PER_HOUR + YEAR_WOY, // 'Y' Example: 1..big number - Year of Week of Year + DOW_LOCAL, // 'e' Example: 1..7 - Day of Week / Localized + + EXTENDED_YEAR, + JULIAN_DAY, + MILLISECONDS_IN_DAY, + IS_LEAP_MONTH, + + FIELD_COUNT = UCAL_FIELD_COUNT // See ucal.h for other fields. +#endif /* U_HIDE_DEPRECATED_API */ + }; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients + * who create locale resources for the field of first-day-of-week should be aware of + * this. For instance, in US locale, first-day-of-week is set to 1, i.e., SUNDAY. + * @deprecated ICU 2.6. Use C enum UCalendarDaysOfWeek defined in ucal.h + */ + enum EDaysOfWeek { + SUNDAY = 1, + MONDAY, + TUESDAY, + WEDNESDAY, + THURSDAY, + FRIDAY, + SATURDAY + }; + + /** + * Useful constants for month. Note: Calendar month is 0-based. + * @deprecated ICU 2.6. Use C enum UCalendarMonths defined in ucal.h + */ + enum EMonths { + JANUARY, + FEBRUARY, + MARCH, + APRIL, + MAY, + JUNE, + JULY, + AUGUST, + SEPTEMBER, + OCTOBER, + NOVEMBER, + DECEMBER, + UNDECIMBER + }; + + /** + * Useful constants for hour in 12-hour clock. Used in GregorianCalendar. + * @deprecated ICU 2.6. Use C enum UCalendarAMPMs defined in ucal.h + */ + enum EAmpm { + AM, + PM + }; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * destructor + * @stable ICU 2.0 + */ + virtual ~Calendar(); + + /** + * Create and return a polymorphic copy of this calendar. + * + * @return a polymorphic copy of this calendar. + * @stable ICU 2.0 + */ + virtual Calendar* clone(void) const = 0; + + /** + * Creates a Calendar using the default timezone and locale. Clients are responsible + * for deleting the object returned. + * + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. U_MISSING_RESOURCE_ERROR will be returned if the resource data + * requests a calendar type which has not been installed. + * @return A Calendar if created successfully. NULL otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(UErrorCode& success); + + /** + * Creates a Calendar using the given timezone and the default locale. + * The Calendar takes ownership of zoneToAdopt; the + * client must not delete it. + * + * @param zoneToAdopt The given timezone to be adopted. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. NULL otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(TimeZone* zoneToAdopt, UErrorCode& success); + + /** + * Creates a Calendar using the given timezone and the default locale. The TimeZone + * is _not_ adopted; the client is still responsible for deleting it. + * + * @param zone The timezone. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. NULL otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(const TimeZone& zone, UErrorCode& success); + + /** + * Creates a Calendar using the default timezone and the given locale. + * + * @param aLocale The given locale. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. NULL otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(const Locale& aLocale, UErrorCode& success); + + /** + * Creates a Calendar using the given timezone and given locale. + * The Calendar takes ownership of zoneToAdopt; the + * client must not delete it. + * + * @param zoneToAdopt The given timezone to be adopted. + * @param aLocale The given locale. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. NULL otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(TimeZone* zoneToAdopt, const Locale& aLocale, UErrorCode& success); + + /** + * Gets a Calendar using the given timezone and given locale. The TimeZone + * is _not_ adopted; the client is still responsible for deleting it. + * + * @param zone The given timezone. + * @param aLocale The given locale. + * @param success Indicates the success/failure of Calendar creation. Filled in + * with U_ZERO_ERROR if created successfully, set to a failure result + * otherwise. + * @return A Calendar if created successfully. NULL otherwise. + * @stable ICU 2.0 + */ + static Calendar* U_EXPORT2 createInstance(const TimeZone& zone, const Locale& aLocale, UErrorCode& success); + + /** + * Returns a list of the locales for which Calendars are installed. + * + * @param count Number of locales returned. + * @return An array of Locale objects representing the set of locales for which + * Calendars are installed. The system retains ownership of this list; + * the caller must NOT delete it. Does not include user-registered Calendars. + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + + /** + * Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param key one of the keys supported by this service. For now, only + * "calendar" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @param status ICU Error Code + * @return a string enumeration over keyword values for the given key and the locale. + * @stable ICU 4.2 + */ + static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* key, + const Locale& locale, UBool commonlyUsed, UErrorCode& status); + + /** + * Returns the current UTC (GMT) time measured in milliseconds since 0:00:00 on 1/1/70 + * (derived from the system time). + * + * @return The current UTC time in milliseconds. + * @stable ICU 2.0 + */ + static UDate U_EXPORT2 getNow(void); + + /** + * Gets this Calendar's time as milliseconds. May involve recalculation of time due + * to previous calls to set time field values. The time specified is non-local UTC + * (GMT) time. Although this method is const, this object may actually be changed + * (semantically const). + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return The current time in UTC (GMT) time, or zero if the operation + * failed. + * @stable ICU 2.0 + */ + inline UDate getTime(UErrorCode& status) const { return getTimeInMillis(status); } + + /** + * Sets this Calendar's current time with the given UDate. The time specified should + * be in non-local UTC (GMT) time. + * + * @param date The given UDate in UTC (GMT) time. + * @param status Output param set to success/failure code on exit. If any value + * set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + inline void setTime(UDate date, UErrorCode& status) { setTimeInMillis(date, status); } + + /** + * Compares the equality of two Calendar objects. Objects of different subclasses + * are considered unequal. This comparison is very exacting; two Calendar objects + * must be in exactly the same state to be considered equal. To compare based on the + * represented time, use equals() instead. + * + * @param that The Calendar object to be compared with. + * @return True if the given Calendar is the same as this Calendar; false + * otherwise. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Calendar& that) const; + + /** + * Compares the inequality of two Calendar objects. + * + * @param that The Calendar object to be compared with. + * @return True if the given Calendar is not the same as this Calendar; false + * otherwise. + * @stable ICU 2.0 + */ + UBool operator!=(const Calendar& that) const {return !operator==(that);} + + /** + * Returns TRUE if the given Calendar object is equivalent to this + * one. An equivalent Calendar will behave exactly as this one + * does, but it may be set to a different time. By contrast, for + * the operator==() method to return TRUE, the other Calendar must + * be set to the same time. + * + * @param other the Calendar to be compared with this Calendar + * @stable ICU 2.4 + */ + virtual UBool isEquivalentTo(const Calendar& other) const; + + /** + * Compares the Calendar time, whereas Calendar::operator== compares the equality of + * Calendar objects. + * + * @param when The Calendar to be compared with this Calendar. Although this is a + * const parameter, the object may be modified physically + * (semantically const). + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return True if the current time of this Calendar is equal to the time of + * Calendar when; false otherwise. + * @stable ICU 2.0 + */ + UBool equals(const Calendar& when, UErrorCode& status) const; + + /** + * Returns true if this Calendar's current time is before "when"'s current time. + * + * @param when The Calendar to be compared with this Calendar. Although this is a + * const parameter, the object may be modified physically + * (semantically const). + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return True if the current time of this Calendar is before the time of + * Calendar when; false otherwise. + * @stable ICU 2.0 + */ + UBool before(const Calendar& when, UErrorCode& status) const; + + /** + * Returns true if this Calendar's current time is after "when"'s current time. + * + * @param when The Calendar to be compared with this Calendar. Although this is a + * const parameter, the object may be modified physically + * (semantically const). + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return True if the current time of this Calendar is after the time of + * Calendar when; false otherwise. + * @stable ICU 2.0 + */ + UBool after(const Calendar& when, UErrorCode& status) const; + + /** + * UDate Arithmetic function. Adds the specified (signed) amount of time to the given + * time field, based on the calendar's rules. For example, to subtract 5 days from + * the current time of the calendar, call add(Calendar::DATE, -5). When adding on + * the month or Calendar::MONTH field, other fields like date might conflict and + * need to be changed. For instance, adding 1 month on the date 01/31/96 will result + * in 02/29/96. + * Adding a positive value always means moving forward in time, so for the Gregorian calendar, + * starting with 100 BC and adding +1 to year results in 99 BC (even though this actually reduces + * the numeric value of the field itself). + * + * @param field Specifies which date field to modify. + * @param amount The amount of time to be added to the field, in the natural unit + * for that field (e.g., days for the day fields, hours for the hour + * field.) + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @deprecated ICU 2.6. use add(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead. + */ + virtual void add(EDateFields field, int32_t amount, UErrorCode& status); + + /** + * UDate Arithmetic function. Adds the specified (signed) amount of time to the given + * time field, based on the calendar's rules. For example, to subtract 5 days from + * the current time of the calendar, call add(Calendar::DATE, -5). When adding on + * the month or Calendar::MONTH field, other fields like date might conflict and + * need to be changed. For instance, adding 1 month on the date 01/31/96 will result + * in 02/29/96. + * Adding a positive value always means moving forward in time, so for the Gregorian calendar, + * starting with 100 BC and adding +1 to year results in 99 BC (even though this actually reduces + * the numeric value of the field itself). + * + * @param field Specifies which date field to modify. + * @param amount The amount of time to be added to the field, in the natural unit + * for that field (e.g., days for the day fields, hours for the hour + * field.) + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.6. + */ + virtual void add(UCalendarDateFields field, int32_t amount, UErrorCode& status); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Time Field Rolling function. Rolls (up/down) a single unit of time on the given + * time field. For example, to roll the current date up by one day, call + * roll(Calendar::DATE, true). When rolling on the year or Calendar::YEAR field, it + * will roll the year value in the range between getMinimum(Calendar::YEAR) and the + * value returned by getMaximum(Calendar::YEAR). When rolling on the month or + * Calendar::MONTH field, other fields like date might conflict and, need to be + * changed. For instance, rolling the month up on the date 01/31/96 will result in + * 02/29/96. Rolling up always means rolling forward in time (unless the limit of the + * field is reached, in which case it may pin or wrap), so for Gregorian calendar, + * starting with 100 BC and rolling the year up results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in + * most eras in the Japanese calendar) then rolling the year past either limit of the + * era will cause the year to wrap around. When eras only have a limit at one end, + * then attempting to roll the year past that limit will result in pinning the year + * at that limit. Note that for most calendars in which era 0 years move forward in + * time (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to + * result in negative years for era 0 (that is the only way to represent years before + * the calendar epoch). + * When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the + * hour value in the range between 0 and 23, which is zero-based. + * <P> + * NOTE: Do not use this method -- use roll(EDateFields, int, UErrorCode&) instead. + * + * @param field The time field. + * @param up Indicates if the value of the specified time field is to be rolled + * up or rolled down. Use true if rolling up, false otherwise. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, UBool up, UErrorCode& status) instead. + */ + inline void roll(EDateFields field, UBool up, UErrorCode& status); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Time Field Rolling function. Rolls (up/down) a single unit of time on the given + * time field. For example, to roll the current date up by one day, call + * roll(Calendar::DATE, true). When rolling on the year or Calendar::YEAR field, it + * will roll the year value in the range between getMinimum(Calendar::YEAR) and the + * value returned by getMaximum(Calendar::YEAR). When rolling on the month or + * Calendar::MONTH field, other fields like date might conflict and, need to be + * changed. For instance, rolling the month up on the date 01/31/96 will result in + * 02/29/96. Rolling up always means rolling forward in time (unless the limit of the + * field is reached, in which case it may pin or wrap), so for Gregorian calendar, + * starting with 100 BC and rolling the year up results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in + * most eras in the Japanese calendar) then rolling the year past either limit of the + * era will cause the year to wrap around. When eras only have a limit at one end, + * then attempting to roll the year past that limit will result in pinning the year + * at that limit. Note that for most calendars in which era 0 years move forward in + * time (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to + * result in negative years for era 0 (that is the only way to represent years before + * the calendar epoch). + * When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the + * hour value in the range between 0 and 23, which is zero-based. + * <P> + * NOTE: Do not use this method -- use roll(UCalendarDateFields, int, UErrorCode&) instead. + * + * @param field The time field. + * @param up Indicates if the value of the specified time field is to be rolled + * up or rolled down. Use true if rolling up, false otherwise. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.6. + */ + inline void roll(UCalendarDateFields field, UBool up, UErrorCode& status); + + /** + * Time Field Rolling function. Rolls by the given amount on the given + * time field. For example, to roll the current date up by one day, call + * roll(Calendar::DATE, +1, status). When rolling on the month or + * Calendar::MONTH field, other fields like date might conflict and, need to be + * changed. For instance, rolling the month up on the date 01/31/96 will result in + * 02/29/96. Rolling by a positive value always means rolling forward in time (unless + * the limit of the field is reached, in which case it may pin or wrap), so for + * Gregorian calendar, starting with 100 BC and rolling the year by + 1 results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in + * most eras in the Japanese calendar) then rolling the year past either limit of the + * era will cause the year to wrap around. When eras only have a limit at one end, + * then attempting to roll the year past that limit will result in pinning the year + * at that limit. Note that for most calendars in which era 0 years move forward in + * time (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to + * result in negative years for era 0 (that is the only way to represent years before + * the calendar epoch). + * When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the + * hour value in the range between 0 and 23, which is zero-based. + * <P> + * The only difference between roll() and add() is that roll() does not change + * the value of more significant fields when it reaches the minimum or maximum + * of its range, whereas add() does. + * + * @param field The time field. + * @param amount Indicates amount to roll. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead. + */ + virtual void roll(EDateFields field, int32_t amount, UErrorCode& status); + + /** + * Time Field Rolling function. Rolls by the given amount on the given + * time field. For example, to roll the current date up by one day, call + * roll(Calendar::DATE, +1, status). When rolling on the month or + * Calendar::MONTH field, other fields like date might conflict and, need to be + * changed. For instance, rolling the month up on the date 01/31/96 will result in + * 02/29/96. Rolling by a positive value always means rolling forward in time (unless + * the limit of the field is reached, in which case it may pin or wrap), so for + * Gregorian calendar, starting with 100 BC and rolling the year by + 1 results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in + * most eras in the Japanese calendar) then rolling the year past either limit of the + * era will cause the year to wrap around. When eras only have a limit at one end, + * then attempting to roll the year past that limit will result in pinning the year + * at that limit. Note that for most calendars in which era 0 years move forward in + * time (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to + * result in negative years for era 0 (that is the only way to represent years before + * the calendar epoch). + * When rolling on the hour-in-day or Calendar::HOUR_OF_DAY field, it will roll the + * hour value in the range between 0 and 23, which is zero-based. + * <P> + * The only difference between roll() and add() is that roll() does not change + * the value of more significant fields when it reaches the minimum or maximum + * of its range, whereas add() does. + * + * @param field The time field. + * @param amount Indicates amount to roll. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @stable ICU 2.6. + */ + virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status); + + /** + * Return the difference between the given time and the time this + * calendar object is set to. If this calendar is set + * <em>before</em> the given time, the returned value will be + * positive. If this calendar is set <em>after</em> the given + * time, the returned value will be negative. The + * <code>field</code> parameter specifies the units of the return + * value. For example, if <code>fieldDifference(when, + * Calendar::MONTH)</code> returns 3, then this calendar is set to + * 3 months before <code>when</code>, and possibly some addition + * time less than one month. + * + * <p>As a side effect of this call, this calendar is advanced + * toward <code>when</code> by the given amount. That is, calling + * this method has the side effect of calling <code>add(field, + * n)</code>, where <code>n</code> is the return value. + * + * <p>Usage: To use this method, call it first with the largest + * field of interest, then with progressively smaller fields. For + * example: + * + * <pre> + * int y = cal->fieldDifference(when, Calendar::YEAR, err); + * int m = cal->fieldDifference(when, Calendar::MONTH, err); + * int d = cal->fieldDifference(when, Calendar::DATE, err);</pre> + * + * computes the difference between <code>cal</code> and + * <code>when</code> in years, months, and days. + * + * <p>Note: <code>fieldDifference()</code> is + * <em>asymmetrical</em>. That is, in the following code: + * + * <pre> + * cal->setTime(date1, err); + * int m1 = cal->fieldDifference(date2, Calendar::MONTH, err); + * int d1 = cal->fieldDifference(date2, Calendar::DATE, err); + * cal->setTime(date2, err); + * int m2 = cal->fieldDifference(date1, Calendar::MONTH, err); + * int d2 = cal->fieldDifference(date1, Calendar::DATE, err);</pre> + * + * one might expect that <code>m1 == -m2 && d1 == -d2</code>. + * However, this is not generally the case, because of + * irregularities in the underlying calendar system (e.g., the + * Gregorian calendar has a varying number of days per month). + * + * @param when the date to compare this calendar's time to + * @param field the field in which to compute the result + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @return the difference, either positive or negative, between + * this calendar's time and <code>when</code>, in terms of + * <code>field</code>. + * @deprecated ICU 2.6. Use fieldDifference(UDate when, UCalendarDateFields field, UErrorCode& status). + */ + virtual int32_t fieldDifference(UDate when, EDateFields field, UErrorCode& status); + + /** + * Return the difference between the given time and the time this + * calendar object is set to. If this calendar is set + * <em>before</em> the given time, the returned value will be + * positive. If this calendar is set <em>after</em> the given + * time, the returned value will be negative. The + * <code>field</code> parameter specifies the units of the return + * value. For example, if <code>fieldDifference(when, + * Calendar::MONTH)</code> returns 3, then this calendar is set to + * 3 months before <code>when</code>, and possibly some addition + * time less than one month. + * + * <p>As a side effect of this call, this calendar is advanced + * toward <code>when</code> by the given amount. That is, calling + * this method has the side effect of calling <code>add(field, + * n)</code>, where <code>n</code> is the return value. + * + * <p>Usage: To use this method, call it first with the largest + * field of interest, then with progressively smaller fields. For + * example: + * + * <pre> + * int y = cal->fieldDifference(when, Calendar::YEAR, err); + * int m = cal->fieldDifference(when, Calendar::MONTH, err); + * int d = cal->fieldDifference(when, Calendar::DATE, err);</pre> + * + * computes the difference between <code>cal</code> and + * <code>when</code> in years, months, and days. + * + * <p>Note: <code>fieldDifference()</code> is + * <em>asymmetrical</em>. That is, in the following code: + * + * <pre> + * cal->setTime(date1, err); + * int m1 = cal->fieldDifference(date2, Calendar::MONTH, err); + * int d1 = cal->fieldDifference(date2, Calendar::DATE, err); + * cal->setTime(date2, err); + * int m2 = cal->fieldDifference(date1, Calendar::MONTH, err); + * int d2 = cal->fieldDifference(date1, Calendar::DATE, err);</pre> + * + * one might expect that <code>m1 == -m2 && d1 == -d2</code>. + * However, this is not generally the case, because of + * irregularities in the underlying calendar system (e.g., the + * Gregorian calendar has a varying number of days per month). + * + * @param when the date to compare this calendar's time to + * @param field the field in which to compute the result + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @return the difference, either positive or negative, between + * this calendar's time and <code>when</code>, in terms of + * <code>field</code>. + * @stable ICU 2.6. + */ + virtual int32_t fieldDifference(UDate when, UCalendarDateFields field, UErrorCode& status); + + /** + * Sets the calendar's time zone to be the one passed in. The Calendar takes ownership + * of the TimeZone; the caller is no longer responsible for deleting it. If the + * given time zone is NULL, this function has no effect. + * + * @param value The given time zone. + * @stable ICU 2.0 + */ + void adoptTimeZone(TimeZone* value); + + /** + * Sets the calendar's time zone to be the same as the one passed in. The TimeZone + * passed in is _not_ adopted; the client is still responsible for deleting it. + * + * @param zone The given time zone. + * @stable ICU 2.0 + */ + void setTimeZone(const TimeZone& zone); + + /** + * Returns a reference to the time zone owned by this calendar. The returned reference + * is only valid until clients make another call to adoptTimeZone or setTimeZone, + * or this Calendar is destroyed. + * + * @return The time zone object associated with this calendar. + * @stable ICU 2.0 + */ + const TimeZone& getTimeZone(void) const; + + /** + * Returns the time zone owned by this calendar. The caller owns the returned object + * and must delete it when done. After this call, the new time zone associated + * with this Calendar is the default TimeZone as returned by TimeZone::createDefault(). + * + * @return The time zone object which was associated with this calendar. + * @stable ICU 2.0 + */ + TimeZone* orphanTimeZone(void); + + /** + * Queries if the current date for this Calendar is in Daylight Savings Time. + * + * @param status Fill-in parameter which receives the status of this operation. + * @return True if the current date for this Calendar is in Daylight Savings Time, + * false, otherwise. + * @stable ICU 2.0 + */ + virtual UBool inDaylightTime(UErrorCode& status) const = 0; + + /** + * Specifies whether or not date/time interpretation is to be lenient. With lenient + * interpretation, a date such as "February 942, 1996" will be treated as being + * equivalent to the 941st day after February 1, 1996. With strict interpretation, + * such dates will cause an error when computing time from the time field values + * representing the dates. + * + * @param lenient True specifies date/time interpretation to be lenient. + * + * @see DateFormat#setLenient + * @stable ICU 2.0 + */ + void setLenient(UBool lenient); + + /** + * Tells whether date/time interpretation is to be lenient. + * + * @return True tells that date/time interpretation is to be lenient. + * @stable ICU 2.0 + */ + UBool isLenient(void) const; + + /** + * Sets the behavior for handling wall time repeating multiple times + * at negative time zone offset transitions. For example, 1:30 AM on + * November 6, 2011 in US Eastern time (Ameirca/New_York) occurs twice; + * 1:30 AM EDT, then 1:30 AM EST one hour later. When <code>UCAL_WALLTIME_FIRST</code> + * is used, the wall time 1:30AM in this example will be interpreted as 1:30 AM EDT + * (first occurrence). When <code>UCAL_WALLTIME_LAST</code> is used, it will be + * interpreted as 1:30 AM EST (last occurrence). The default value is + * <code>UCAL_WALLTIME_LAST</code>. + * <p> + * <b>Note:</b>When <code>UCAL_WALLTIME_NEXT_VALID</code> is not a valid + * option for this. When the argument is neither <code>UCAL_WALLTIME_FIRST</code> + * nor <code>UCAL_WALLTIME_LAST</code>, this method has no effect and will keep + * the current setting. + * + * @param option the behavior for handling repeating wall time, either + * <code>UCAL_WALLTIME_FIRST</code> or <code>UCAL_WALLTIME_LAST</code>. + * @see #getRepeatedWallTimeOption + * @stable ICU 49 + */ + void setRepeatedWallTimeOption(UCalendarWallTimeOption option); + + /** + * Gets the behavior for handling wall time repeating multiple times + * at negative time zone offset transitions. + * + * @return the behavior for handling repeating wall time, either + * <code>UCAL_WALLTIME_FIRST</code> or <code>UCAL_WALLTIME_LAST</code>. + * @see #setRepeatedWallTimeOption + * @stable ICU 49 + */ + UCalendarWallTimeOption getRepeatedWallTimeOption(void) const; + + /** + * Sets the behavior for handling skipped wall time at positive time zone offset + * transitions. For example, 2:30 AM on March 13, 2011 in US Eastern time (America/New_York) + * does not exist because the wall time jump from 1:59 AM EST to 3:00 AM EDT. When + * <code>UCAL_WALLTIME_FIRST</code> is used, 2:30 AM is interpreted as 30 minutes before 3:00 AM + * EDT, therefore, it will be resolved as 1:30 AM EST. When <code>UCAL_WALLTIME_LAST</code> + * is used, 2:30 AM is interpreted as 31 minutes after 1:59 AM EST, therefore, it will be + * resolved as 3:30 AM EDT. When <code>UCAL_WALLTIME_NEXT_VALID</code> is used, 2:30 AM will + * be resolved as next valid wall time, that is 3:00 AM EDT. The default value is + * <code>UCAL_WALLTIME_LAST</code>. + * <p> + * <b>Note:</b>This option is effective only when this calendar is lenient. + * When the calendar is strict, such non-existing wall time will cause an error. + * + * @param option the behavior for handling skipped wall time at positive time zone + * offset transitions, one of <code>UCAL_WALLTIME_FIRST</code>, <code>UCAL_WALLTIME_LAST</code> and + * <code>UCAL_WALLTIME_NEXT_VALID</code>. + * @see #getSkippedWallTimeOption + * + * @stable ICU 49 + */ + void setSkippedWallTimeOption(UCalendarWallTimeOption option); + + /** + * Gets the behavior for handling skipped wall time at positive time zone offset + * transitions. + * + * @return the behavior for handling skipped wall time, one of + * <code>UCAL_WALLTIME_FIRST</code>, <code>UCAL_WALLTIME_LAST</code> + * and <code>UCAL_WALLTIME_NEXT_VALID</code>. + * @see #setSkippedWallTimeOption + * @stable ICU 49 + */ + UCalendarWallTimeOption getSkippedWallTimeOption(void) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Sets what the first day of the week is; e.g., Sunday in US, Monday in France. + * + * @param value The given first day of the week. + * @deprecated ICU 2.6. Use setFirstDayOfWeek(UCalendarDaysOfWeek value) instead. + */ + void setFirstDayOfWeek(EDaysOfWeek value); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Sets what the first day of the week is; e.g., Sunday in US, Monday in France. + * + * @param value The given first day of the week. + * @stable ICU 2.6. + */ + void setFirstDayOfWeek(UCalendarDaysOfWeek value); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Gets what the first day of the week is; e.g., Sunday in US, Monday in France. + * + * @return The first day of the week. + * @deprecated ICU 2.6 use the overload with error code + */ + EDaysOfWeek getFirstDayOfWeek(void) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Gets what the first day of the week is; e.g., Sunday in US, Monday in France. + * + * @param status error code + * @return The first day of the week. + * @stable ICU 2.6 + */ + UCalendarDaysOfWeek getFirstDayOfWeek(UErrorCode &status) const; + + /** + * Sets what the minimal days required in the first week of the year are; For + * example, if the first week is defined as one that contains the first day of the + * first month of a year, call the method with value 1. If it must be a full week, + * use value 7. + * + * @param value The given minimal days required in the first week of the year. + * @stable ICU 2.0 + */ + void setMinimalDaysInFirstWeek(uint8_t value); + + /** + * Gets what the minimal days required in the first week of the year are; e.g., if + * the first week is defined as one that contains the first day of the first month + * of a year, getMinimalDaysInFirstWeek returns 1. If the minimal days required must + * be a full week, getMinimalDaysInFirstWeek returns 7. + * + * @return The minimal days required in the first week of the year. + * @stable ICU 2.0 + */ + uint8_t getMinimalDaysInFirstWeek(void) const; + + /** + * Gets the minimum value for the given time field. e.g., for Gregorian + * DAY_OF_MONTH, 1. + * + * @param field The given time field. + * @return The minimum value for the given time field. + * @deprecated ICU 2.6. Use getMinimum(UCalendarDateFields field) instead. + */ + virtual int32_t getMinimum(EDateFields field) const; + + /** + * Gets the minimum value for the given time field. e.g., for Gregorian + * DAY_OF_MONTH, 1. + * + * @param field The given time field. + * @return The minimum value for the given time field. + * @stable ICU 2.6. + */ + virtual int32_t getMinimum(UCalendarDateFields field) const; + + /** + * Gets the maximum value for the given time field. e.g. for Gregorian DAY_OF_MONTH, + * 31. + * + * @param field The given time field. + * @return The maximum value for the given time field. + * @deprecated ICU 2.6. Use getMaximum(UCalendarDateFields field) instead. + */ + virtual int32_t getMaximum(EDateFields field) const; + + /** + * Gets the maximum value for the given time field. e.g. for Gregorian DAY_OF_MONTH, + * 31. + * + * @param field The given time field. + * @return The maximum value for the given time field. + * @stable ICU 2.6. + */ + virtual int32_t getMaximum(UCalendarDateFields field) const; + + /** + * Gets the highest minimum value for the given field if varies. Otherwise same as + * getMinimum(). For Gregorian, no difference. + * + * @param field The given time field. + * @return The highest minimum value for the given time field. + * @deprecated ICU 2.6. Use getGreatestMinimum(UCalendarDateFields field) instead. + */ + virtual int32_t getGreatestMinimum(EDateFields field) const; + + /** + * Gets the highest minimum value for the given field if varies. Otherwise same as + * getMinimum(). For Gregorian, no difference. + * + * @param field The given time field. + * @return The highest minimum value for the given time field. + * @stable ICU 2.6. + */ + virtual int32_t getGreatestMinimum(UCalendarDateFields field) const; + + /** + * Gets the lowest maximum value for the given field if varies. Otherwise same as + * getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28. + * + * @param field The given time field. + * @return The lowest maximum value for the given time field. + * @deprecated ICU 2.6. Use getLeastMaximum(UCalendarDateFields field) instead. + */ + virtual int32_t getLeastMaximum(EDateFields field) const; + + /** + * Gets the lowest maximum value for the given field if varies. Otherwise same as + * getMaximum(). e.g., for Gregorian DAY_OF_MONTH, 28. + * + * @param field The given time field. + * @return The lowest maximum value for the given time field. + * @stable ICU 2.6. + */ + virtual int32_t getLeastMaximum(UCalendarDateFields field) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Return the minimum value that this field could have, given the current date. + * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum(). + * + * The version of this function on Calendar uses an iterative algorithm to determine the + * actual minimum value for the field. There is almost always a more efficient way to + * accomplish this (in most cases, you can simply return getMinimum()). GregorianCalendar + * overrides this function with a more efficient implementation. + * + * @param field the field to determine the minimum of + * @param status Fill-in parameter which receives the status of this operation. + * @return the minimum of the given field for the current date of this Calendar + * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field, UErrorCode& status) instead. + */ + int32_t getActualMinimum(EDateFields field, UErrorCode& status) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Return the minimum value that this field could have, given the current date. + * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum(). + * + * The version of this function on Calendar uses an iterative algorithm to determine the + * actual minimum value for the field. There is almost always a more efficient way to + * accomplish this (in most cases, you can simply return getMinimum()). GregorianCalendar + * overrides this function with a more efficient implementation. + * + * @param field the field to determine the minimum of + * @param status Fill-in parameter which receives the status of this operation. + * @return the minimum of the given field for the current date of this Calendar + * @stable ICU 2.6. + */ + virtual int32_t getActualMinimum(UCalendarDateFields field, UErrorCode& status) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Return the maximum value that this field could have, given the current date. + * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual + * maximum would be 28; for "Feb 3, 1996" it s 29. Similarly for a Hebrew calendar, + * for some years the actual maximum for MONTH is 12, and for others 13. + * + * The version of this function on Calendar uses an iterative algorithm to determine the + * actual maximum value for the field. There is almost always a more efficient way to + * accomplish this (in most cases, you can simply return getMaximum()). GregorianCalendar + * overrides this function with a more efficient implementation. + * + * @param field the field to determine the maximum of + * @param status Fill-in parameter which receives the status of this operation. + * @return the maximum of the given field for the current date of this Calendar + * @deprecated ICU 2.6. Use getActualMaximum(UCalendarDateFields field, UErrorCode& status) instead. + */ + int32_t getActualMaximum(EDateFields field, UErrorCode& status) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Return the maximum value that this field could have, given the current date. + * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual + * maximum would be 28; for "Feb 3, 1996" it s 29. Similarly for a Hebrew calendar, + * for some years the actual maximum for MONTH is 12, and for others 13. + * + * The version of this function on Calendar uses an iterative algorithm to determine the + * actual maximum value for the field. There is almost always a more efficient way to + * accomplish this (in most cases, you can simply return getMaximum()). GregorianCalendar + * overrides this function with a more efficient implementation. + * + * @param field the field to determine the maximum of + * @param status Fill-in parameter which receives the status of this operation. + * @return the maximum of the given field for the current date of this Calendar + * @stable ICU 2.6. + */ + virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Gets the value for a given time field. Recalculate the current time field values + * if the time value has been changed by a call to setTime(). Return zero for unset + * fields if any fields have been explicitly set by a call to set(). To force a + * recomputation of all fields regardless of the previous state, call complete(). + * This method is semantically const, but may alter the object in memory. + * + * @param field The given time field. + * @param status Fill-in parameter which receives the status of the operation. + * @return The value for the given time field, or zero if the field is unset, + * and set() has been called for any other field. + * @deprecated ICU 2.6. Use get(UCalendarDateFields field, UErrorCode& status) instead. + */ + int32_t get(EDateFields field, UErrorCode& status) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Gets the value for a given time field. Recalculate the current time field values + * if the time value has been changed by a call to setTime(). Return zero for unset + * fields if any fields have been explicitly set by a call to set(). To force a + * recomputation of all fields regardless of the previous state, call complete(). + * This method is semantically const, but may alter the object in memory. + * + * @param field The given time field. + * @param status Fill-in parameter which receives the status of the operation. + * @return The value for the given time field, or zero if the field is unset, + * and set() has been called for any other field. + * @stable ICU 2.6. + */ + int32_t get(UCalendarDateFields field, UErrorCode& status) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Determines if the given time field has a value set. This can affect in the + * resolving of time in Calendar. Unset fields have a value of zero, by definition. + * + * @param field The given time field. + * @return True if the given time field has a value set; false otherwise. + * @deprecated ICU 2.6. Use isSet(UCalendarDateFields field) instead. + */ + UBool isSet(EDateFields field) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Determines if the given time field has a value set. This can affect in the + * resolving of time in Calendar. Unset fields have a value of zero, by definition. + * + * @param field The given time field. + * @return True if the given time field has a value set; false otherwise. + * @stable ICU 2.6. + */ + UBool isSet(UCalendarDateFields field) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Sets the given time field with the given value. + * + * @param field The given time field. + * @param value The value to be set for the given time field. + * @deprecated ICU 2.6. Use set(UCalendarDateFields field, int32_t value) instead. + */ + void set(EDateFields field, int32_t value); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Sets the given time field with the given value. + * + * @param field The given time field. + * @param value The value to be set for the given time field. + * @stable ICU 2.6. + */ + void set(UCalendarDateFields field, int32_t value); + + /** + * Sets the values for the fields YEAR, MONTH, and DATE. Other field values are + * retained; call clear() first if this is not desired. + * + * @param year The value used to set the YEAR time field. + * @param month The value used to set the MONTH time field. Month value is 0-based. + * e.g., 0 for January. + * @param date The value used to set the DATE time field. + * @stable ICU 2.0 + */ + void set(int32_t year, int32_t month, int32_t date); + + /** + * Sets the values for the fields YEAR, MONTH, DATE, HOUR_OF_DAY, and MINUTE. Other + * field values are retained; call clear() first if this is not desired. + * + * @param year The value used to set the YEAR time field. + * @param month The value used to set the MONTH time field. Month value is + * 0-based. E.g., 0 for January. + * @param date The value used to set the DATE time field. + * @param hour The value used to set the HOUR_OF_DAY time field. + * @param minute The value used to set the MINUTE time field. + * @stable ICU 2.0 + */ + void set(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute); + + /** + * Sets the values for the fields YEAR, MONTH, DATE, HOUR_OF_DAY, MINUTE, and SECOND. + * Other field values are retained; call clear() first if this is not desired. + * + * @param year The value used to set the YEAR time field. + * @param month The value used to set the MONTH time field. Month value is + * 0-based. E.g., 0 for January. + * @param date The value used to set the DATE time field. + * @param hour The value used to set the HOUR_OF_DAY time field. + * @param minute The value used to set the MINUTE time field. + * @param second The value used to set the SECOND time field. + * @stable ICU 2.0 + */ + void set(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, int32_t second); + + /** + * Clears the values of all the time fields, making them both unset and assigning + * them a value of zero. The field values will be determined during the next + * resolving of time into time fields. + * @stable ICU 2.0 + */ + void clear(void); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Clears the value in the given time field, both making it unset and assigning it a + * value of zero. This field value will be determined during the next resolving of + * time into time fields. + * + * @param field The time field to be cleared. + * @deprecated ICU 2.6. Use clear(UCalendarDateFields field) instead. + */ + void clear(EDateFields field); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Clears the value in the given time field, both making it unset and assigning it a + * value of zero. This field value will be determined during the next resolving of + * time into time fields. + * + * @param field The time field to be cleared. + * @stable ICU 2.6. + */ + void clear(UCalendarDateFields field); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual method. This method is to + * implement a simple version of RTTI, since not all C++ compilers support genuine + * RTTI. Polymorphic operator==() and clone() methods call this method. + * <P> + * Concrete subclasses of Calendar must implement getDynamicClassID() and also a + * static method and data member: + * + * static UClassID getStaticClassID() { return (UClassID)&fgClassID; } + * static char fgClassID; + * + * @return The class ID for this object. All objects of a given class have the + * same class ID. Objects of other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Returns the calendar type name string for this Calendar object. + * The returned string is the legacy ICU calendar attribute value, + * for example, "gregorian" or "japanese". + * + * See type="old type name" for the calendar attribute of locale IDs + * at http://www.unicode.org/reports/tr35/#Key_Type_Definitions + * + * Sample code for getting the LDML/BCP 47 calendar key value: + * \code + * const char *calType = cal->getType(); + * if (0 == strcmp(calType, "unknown")) { + * // deal with unknown calendar type + * } else { + * string localeID("root@calendar="); + * localeID.append(calType); + * char langTag[100]; + * UErrorCode errorCode = U_ZERO_ERROR; + * int32_t length = uloc_toLanguageTag(localeID.c_str(), langTag, (int32_t)sizeof(langTag), TRUE, &errorCode); + * if (U_FAILURE(errorCode)) { + * // deal with errors & overflow + * } + * string lang(langTag, length); + * size_t caPos = lang.find("-ca-"); + * lang.erase(0, caPos + 4); + * // lang now contains the LDML calendar type + * } + * \endcode + * + * @return legacy calendar type name string + * @stable ICU 49 + */ + virtual const char * getType() const = 0; + + /** + * Returns whether the given day of the week is a weekday, a weekend day, + * or a day that transitions from one to the other, for the locale and + * calendar system associated with this Calendar (the locale's region is + * often the most determinant factor). If a transition occurs at midnight, + * then the days before and after the transition will have the + * type UCAL_WEEKDAY or UCAL_WEEKEND. If a transition occurs at a time + * other than midnight, then the day of the transition will have + * the type UCAL_WEEKEND_ONSET or UCAL_WEEKEND_CEASE. In this case, the + * method getWeekendTransition() will return the point of + * transition. + * @param dayOfWeek The day of the week whose type is desired (UCAL_SUNDAY..UCAL_SATURDAY). + * @param status The error code for the operation. + * @return The UCalendarWeekdayType for the day of the week. + * @stable ICU 4.4 + */ + virtual UCalendarWeekdayType getDayOfWeekType(UCalendarDaysOfWeek dayOfWeek, UErrorCode &status) const; + + /** + * Returns the time during the day at which the weekend begins or ends in + * this calendar system. If getDayOfWeekType() returns UCAL_WEEKEND_ONSET + * for the specified dayOfWeek, return the time at which the weekend begins. + * If getDayOfWeekType() returns UCAL_WEEKEND_CEASE for the specified dayOfWeek, + * return the time at which the weekend ends. If getDayOfWeekType() returns + * some other UCalendarWeekdayType for the specified dayOfWeek, is it an error condition + * (U_ILLEGAL_ARGUMENT_ERROR). + * @param dayOfWeek The day of the week for which the weekend transition time is + * desired (UCAL_SUNDAY..UCAL_SATURDAY). + * @param status The error code for the operation. + * @return The milliseconds after midnight at which the weekend begins or ends. + * @stable ICU 4.4 + */ + virtual int32_t getWeekendTransition(UCalendarDaysOfWeek dayOfWeek, UErrorCode &status) const; + + /** + * Returns TRUE if the given UDate is in the weekend in + * this calendar system. + * @param date The UDate in question. + * @param status The error code for the operation. + * @return TRUE if the given UDate is in the weekend in + * this calendar system, FALSE otherwise. + * @stable ICU 4.4 + */ + virtual UBool isWeekend(UDate date, UErrorCode &status) const; + + /** + * Returns TRUE if this Calendar's current date-time is in the weekend in + * this calendar system. + * @return TRUE if this Calendar's current date-time is in the weekend in + * this calendar system, FALSE otherwise. + * @stable ICU 4.4 + */ + virtual UBool isWeekend(void) const; + +protected: + + /** + * Constructs a Calendar with the default time zone as returned by + * TimeZone::createInstance(), and the default locale. + * + * @param success Indicates the status of Calendar object construction. Returns + * U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + Calendar(UErrorCode& success); + + /** + * Copy constructor + * + * @param source Calendar object to be copied from + * @stable ICU 2.0 + */ + Calendar(const Calendar& source); + + /** + * Default assignment operator + * + * @param right Calendar object to be copied + * @stable ICU 2.0 + */ + Calendar& operator=(const Calendar& right); + + /** + * Constructs a Calendar with the given time zone and locale. Clients are no longer + * responsible for deleting the given time zone object after it's adopted. + * + * @param zone The given time zone. + * @param aLocale The given locale. + * @param success Indicates the status of Calendar object construction. Returns + * U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + Calendar(TimeZone* zone, const Locale& aLocale, UErrorCode& success); + + /** + * Constructs a Calendar with the given time zone and locale. + * + * @param zone The given time zone. + * @param aLocale The given locale. + * @param success Indicates the status of Calendar object construction. Returns + * U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + Calendar(const TimeZone& zone, const Locale& aLocale, UErrorCode& success); + + /** + * Converts Calendar's time field values to GMT as milliseconds. + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + virtual void computeTime(UErrorCode& status); + + /** + * Converts GMT as milliseconds to time field values. This allows you to sync up the + * time field values with a new time that is set for the calendar. This method + * does NOT recompute the time first; to recompute the time, then the fields, use + * the method complete(). + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + virtual void computeFields(UErrorCode& status); + + /** + * Gets this Calendar's current time as a long. + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @return the current time as UTC milliseconds from the epoch. + * @stable ICU 2.0 + */ + double getTimeInMillis(UErrorCode& status) const; + + /** + * Sets this Calendar's current time from the given long value. + * @param millis the new time in UTC milliseconds from the epoch. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + void setTimeInMillis( double millis, UErrorCode& status ); + + /** + * Recomputes the current time from currently set fields, and then fills in any + * unset fields in the time field list. + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + * @stable ICU 2.0 + */ + void complete(UErrorCode& status); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Gets the value for a given time field. Subclasses can use this function to get + * field values without forcing recomputation of time. + * + * @param field The given time field. + * @return The value for the given time field. + * @deprecated ICU 2.6. Use internalGet(UCalendarDateFields field) instead. + */ + inline int32_t internalGet(EDateFields field) const {return fFields[field];} +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_HIDE_INTERNAL_API + /** + * Gets the value for a given time field. Subclasses can use this function to get + * field values without forcing recomputation of time. If the field's stamp is UNSET, + * the defaultValue is used. + * + * @param field The given time field. + * @param defaultValue a default value used if the field is unset. + * @return The value for the given time field. + * @internal + */ + inline int32_t internalGet(UCalendarDateFields field, int32_t defaultValue) const {return fStamp[field]>kUnset ? fFields[field] : defaultValue;} + + /** + * Gets the value for a given time field. Subclasses can use this function to get + * field values without forcing recomputation of time. + * + * @param field The given time field. + * @return The value for the given time field. + * @internal + */ + inline int32_t internalGet(UCalendarDateFields field) const {return fFields[field];} +#endif /* U_HIDE_INTERNAL_API */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * Sets the value for a given time field. This is a fast internal method for + * subclasses. It does not affect the areFieldsInSync, isTimeSet, or areAllFieldsSet + * flags. + * + * @param field The given time field. + * @param value The value for the given time field. + * @deprecated ICU 2.6. Use internalSet(UCalendarDateFields field, int32_t value) instead. + */ + void internalSet(EDateFields field, int32_t value); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Sets the value for a given time field. This is a fast internal method for + * subclasses. It does not affect the areFieldsInSync, isTimeSet, or areAllFieldsSet + * flags. + * + * @param field The given time field. + * @param value The value for the given time field. + * @stable ICU 2.6. + */ + inline void internalSet(UCalendarDateFields field, int32_t value); + + /** + * Prepare this calendar for computing the actual minimum or maximum. + * This method modifies this calendar's fields; it is called on a + * temporary calendar. + * @internal + */ + virtual void prepareGetActual(UCalendarDateFields field, UBool isMinimum, UErrorCode &status); + + /** + * Limit enums. Not in sync with UCalendarLimitType (refers to internal fields). + * @internal + */ + enum ELimitType { +#ifndef U_HIDE_INTERNAL_API + UCAL_LIMIT_MINIMUM = 0, + UCAL_LIMIT_GREATEST_MINIMUM, + UCAL_LIMIT_LEAST_MAXIMUM, + UCAL_LIMIT_MAXIMUM, + UCAL_LIMIT_COUNT +#endif /* U_HIDE_INTERNAL_API */ + }; + + /** + * Subclass API for defining limits of different types. + * Subclasses must implement this method to return limits for the + * following fields: + * + * <pre>UCAL_ERA + * UCAL_YEAR + * UCAL_MONTH + * UCAL_WEEK_OF_YEAR + * UCAL_WEEK_OF_MONTH + * UCAL_DATE (DAY_OF_MONTH on Java) + * UCAL_DAY_OF_YEAR + * UCAL_DAY_OF_WEEK_IN_MONTH + * UCAL_YEAR_WOY + * UCAL_EXTENDED_YEAR</pre> + * + * @param field one of the above field numbers + * @param limitType one of <code>MINIMUM</code>, <code>GREATEST_MINIMUM</code>, + * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code> + * @internal + */ + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const = 0; + + /** + * Return a limit for a field. + * @param field the field, from <code>0..UCAL_MAX_FIELD</code> + * @param limitType the type specifier for the limit + * @see #ELimitType + * @internal + */ + virtual int32_t getLimit(UCalendarDateFields field, ELimitType limitType) const; + + + /** + * Return the Julian day number of day before the first day of the + * given month in the given extended year. Subclasses should override + * this method to implement their calendar system. + * @param eyear the extended year + * @param month the zero-based month, or 0 if useMonth is false + * @param useMonth if false, compute the day before the first day of + * the given year, otherwise, compute the day before the first day of + * the given month + * @return the Julian day number of the day before the first + * day of the given month and year + * @internal + */ + virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, + UBool useMonth) const = 0; + + /** + * Return the number of days in the given month of the given extended + * year of this calendar system. Subclasses should override this + * method if they can provide a more correct or more efficient + * implementation than the default implementation in Calendar. + * @internal + */ + virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const ; + + /** + * Return the number of days in the given extended year of this + * calendar system. Subclasses should override this method if they can + * provide a more correct or more efficient implementation than the + * default implementation in Calendar. + * @stable ICU 2.0 + */ + virtual int32_t handleGetYearLength(int32_t eyear) const; + + + /** + * Return the extended year defined by the current fields. This will + * use the UCAL_EXTENDED_YEAR field or the UCAL_YEAR and supra-year fields (such + * as UCAL_ERA) specific to the calendar system, depending on which set of + * fields is newer. + * @return the extended year + * @internal + */ + virtual int32_t handleGetExtendedYear() = 0; + + /** + * Subclasses may override this. This method calls + * handleGetMonthLength() to obtain the calendar-specific month + * length. + * @param bestField which field to use to calculate the date + * @return julian day specified by calendar fields. + * @internal + */ + virtual int32_t handleComputeJulianDay(UCalendarDateFields bestField); + + /** + * Subclasses must override this to convert from week fields + * (YEAR_WOY and WEEK_OF_YEAR) to an extended year in the case + * where YEAR, EXTENDED_YEAR are not set. + * The Calendar implementation assumes yearWoy is in extended gregorian form + * @return the extended year, UCAL_EXTENDED_YEAR + * @internal + */ + virtual int32_t handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t woy); + + /** + * Validate a single field of this calendar. Subclasses should + * override this method to validate any calendar-specific fields. + * Generic fields can be handled by + * <code>Calendar::validateField()</code>. + * @see #validateField(int, int, int, int&) + * @internal + */ + virtual void validateField(UCalendarDateFields field, UErrorCode &status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Compute the Julian day from fields. Will determine whether to use + * the JULIAN_DAY field directly, or other fields. + * @return the julian day + * @internal + */ + int32_t computeJulianDay(); + + /** + * Compute the milliseconds in the day from the fields. This is a + * value from 0 to 23:59:59.999 inclusive, unless fields are out of + * range, in which case it can be an arbitrary value. This value + * reflects local zone wall time. + * @internal + */ + int32_t computeMillisInDay(); + + /** + * This method can assume EXTENDED_YEAR has been set. + * @param millis milliseconds of the date fields + * @param millisInDay milliseconds of the time fields; may be out + * or range. + * @param ec Output param set to failure code on function return + * when this function fails. + * @internal + */ + int32_t computeZoneOffset(double millis, int32_t millisInDay, UErrorCode &ec); + + + /** + * Determine the best stamp in a range. + * @param start first enum to look at + * @param end last enum to look at + * @param bestSoFar stamp prior to function call + * @return the stamp value of the best stamp + * @internal + */ + int32_t newestStamp(UCalendarDateFields start, UCalendarDateFields end, int32_t bestSoFar) const; + + /** + * Values for field resolution tables + * @see #resolveFields + * @internal + */ + enum { + /** Marker for end of resolve set (row or group). */ + kResolveSTOP = -1, + /** Value to be bitwised "ORed" against resolve table field values for remapping. Example: (UCAL_DATE | kResolveRemap) in 1st column will cause 'UCAL_DATE' to be returned, but will not examine the value of UCAL_DATE. */ + kResolveRemap = 32 + }; + + /** + * Precedence table for Dates + * @see #resolveFields + * @internal + */ + static const UFieldResolutionTable kDatePrecedence[]; + + /** + * Precedence table for Year + * @see #resolveFields + * @internal + */ + static const UFieldResolutionTable kYearPrecedence[]; + + /** + * Precedence table for Day of Week + * @see #resolveFields + * @internal + */ + static const UFieldResolutionTable kDOWPrecedence[]; + + /** + * Given a precedence table, return the newest field combination in + * the table, or UCAL_FIELD_COUNT if none is found. + * + * <p>The precedence table is a 3-dimensional array of integers. It + * may be thought of as an array of groups. Each group is an array of + * lines. Each line is an array of field numbers. Within a line, if + * all fields are set, then the time stamp of the line is taken to be + * the stamp of the most recently set field. If any field of a line is + * unset, then the line fails to match. Within a group, the line with + * the newest time stamp is selected. The first field of the line is + * returned to indicate which line matched. + * + * <p>In some cases, it may be desirable to map a line to field that + * whose stamp is NOT examined. For example, if the best field is + * DAY_OF_WEEK then the DAY_OF_WEEK_IN_MONTH algorithm may be used. In + * order to do this, insert the value <code>kResolveRemap | F</code> at + * the start of the line, where <code>F</code> is the desired return + * field value. This field will NOT be examined; it only determines + * the return value if the other fields in the line are the newest. + * + * <p>If all lines of a group contain at least one unset field, then no + * line will match, and the group as a whole will fail to match. In + * that case, the next group will be processed. If all groups fail to + * match, then UCAL_FIELD_COUNT is returned. + * @internal + */ + UCalendarDateFields resolveFields(const UFieldResolutionTable *precedenceTable); +#endif /* U_HIDE_INTERNAL_API */ + + + /** + * @internal + */ + virtual const UFieldResolutionTable* getFieldResolutionTable() const; + +#ifndef U_HIDE_INTERNAL_API + /** + * Return the field that is newer, either defaultField, or + * alternateField. If neither is newer or neither is set, return defaultField. + * @internal + */ + UCalendarDateFields newerField(UCalendarDateFields defaultField, UCalendarDateFields alternateField) const; +#endif /* U_HIDE_INTERNAL_API */ + + +private: + /** + * Helper function for calculating limits by trial and error + * @param field The field being investigated + * @param startValue starting (least max) value of field + * @param endValue ending (greatest max) value of field + * @param status return type + * @internal + */ + int32_t getActualHelper(UCalendarDateFields field, int32_t startValue, int32_t endValue, UErrorCode &status) const; + + +protected: + /** + * The flag which indicates if the current time is set in the calendar. + * @stable ICU 2.0 + */ + UBool fIsTimeSet; + + /** + * True if the fields are in sync with the currently set time of this Calendar. + * If false, then the next attempt to get the value of a field will + * force a recomputation of all fields from the current value of the time + * field. + * <P> + * This should really be named areFieldsInSync, but the old name is retained + * for backward compatibility. + * @stable ICU 2.0 + */ + UBool fAreFieldsSet; + + /** + * True if all of the fields have been set. This is initially false, and set to + * true by computeFields(). + * @stable ICU 2.0 + */ + UBool fAreAllFieldsSet; + + /** + * True if all fields have been virtually set, but have not yet been + * computed. This occurs only in setTimeInMillis(). A calendar set + * to this state will compute all fields from the time if it becomes + * necessary, but otherwise will delay such computation. + * @stable ICU 3.0 + */ + UBool fAreFieldsVirtuallySet; + + /** + * Get the current time without recomputing. + * + * @return the current time without recomputing. + * @stable ICU 2.0 + */ + UDate internalGetTime(void) const { return fTime; } + + /** + * Set the current time without affecting flags or fields. + * + * @param time The time to be set + * @return the current time without recomputing. + * @stable ICU 2.0 + */ + void internalSetTime(UDate time) { fTime = time; } + + /** + * The time fields containing values into which the millis is computed. + * @stable ICU 2.0 + */ + int32_t fFields[UCAL_FIELD_COUNT]; + + /** + * The flags which tell if a specified time field for the calendar is set. + * @deprecated ICU 2.8 use (fStamp[n]!=kUnset) + */ + UBool fIsSet[UCAL_FIELD_COUNT]; + + /** Special values of stamp[] + * @stable ICU 2.0 + */ + enum { + kUnset = 0, + kInternallySet, + kMinimumUserStamp + }; + + /** + * Pseudo-time-stamps which specify when each field was set. There + * are two special values, UNSET and INTERNALLY_SET. Values from + * MINIMUM_USER_SET to Integer.MAX_VALUE are legal user set values. + * @stable ICU 2.0 + */ + int32_t fStamp[UCAL_FIELD_COUNT]; + + /** + * Subclasses may override this method to compute several fields + * specific to each calendar system. These are: + * + * <ul><li>ERA + * <li>YEAR + * <li>MONTH + * <li>DAY_OF_MONTH + * <li>DAY_OF_YEAR + * <li>EXTENDED_YEAR</ul> + * + * Subclasses can refer to the DAY_OF_WEEK and DOW_LOCAL fields, which + * will be set when this method is called. Subclasses can also call + * the getGregorianXxx() methods to obtain Gregorian calendar + * equivalents for the given Julian day. + * + * <p>In addition, subclasses should compute any subclass-specific + * fields, that is, fields from BASE_FIELD_COUNT to + * getFieldCount() - 1. + * + * <p>The default implementation in <code>Calendar</code> implements + * a pure proleptic Gregorian calendar. + * @internal + */ + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Return the extended year on the Gregorian calendar as computed by + * <code>computeGregorianFields()</code>. + * @internal + */ + int32_t getGregorianYear() const { + return fGregorianYear; + } + + /** + * Return the month (0-based) on the Gregorian calendar as computed by + * <code>computeGregorianFields()</code>. + * @internal + */ + int32_t getGregorianMonth() const { + return fGregorianMonth; + } + + /** + * Return the day of year (1-based) on the Gregorian calendar as + * computed by <code>computeGregorianFields()</code>. + * @internal + */ + int32_t getGregorianDayOfYear() const { + return fGregorianDayOfYear; + } + + /** + * Return the day of month (1-based) on the Gregorian calendar as + * computed by <code>computeGregorianFields()</code>. + * @internal + */ + int32_t getGregorianDayOfMonth() const { + return fGregorianDayOfMonth; + } +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Called by computeJulianDay. Returns the default month (0-based) for the year, + * taking year and era into account. Defaults to 0 for Gregorian, which doesn't care. + * @param eyear The extended year + * @internal + */ + virtual int32_t getDefaultMonthInYear(int32_t eyear) ; + + + /** + * Called by computeJulianDay. Returns the default day (1-based) for the month, + * taking currently-set year and era into account. Defaults to 1 for Gregorian. + * @param eyear the extended year + * @param month the month in the year + * @internal + */ + virtual int32_t getDefaultDayInMonth(int32_t eyear, int32_t month); + + //------------------------------------------------------------------------- + // Protected utility methods for use by subclasses. These are very handy + // for implementing add, roll, and computeFields. + //------------------------------------------------------------------------- + + /** + * Adjust the specified field so that it is within + * the allowable range for the date to which this calendar is set. + * For example, in a Gregorian calendar pinning the {@link #UCalendarDateFields DAY_OF_MONTH} + * field for a calendar set to April 31 would cause it to be set + * to April 30. + * <p> + * <b>Subclassing:</b> + * <br> + * This utility method is intended for use by subclasses that need to implement + * their own overrides of {@link #roll roll} and {@link #add add}. + * <p> + * <b>Note:</b> + * <code>pinField</code> is implemented in terms of + * {@link #getActualMinimum getActualMinimum} + * and {@link #getActualMaximum getActualMaximum}. If either of those methods uses + * a slow, iterative algorithm for a particular field, it would be + * unwise to attempt to call <code>pinField</code> for that field. If you + * really do need to do so, you should override this method to do + * something more efficient for that field. + * <p> + * @param field The calendar field whose value should be pinned. + * @param status Output param set to failure code on function return + * when this function fails. + * + * @see #getActualMinimum + * @see #getActualMaximum + * @stable ICU 2.0 + */ + virtual void pinField(UCalendarDateFields field, UErrorCode& status); + + /** + * Return the week number of a day, within a period. This may be the week number in + * a year or the week number in a month. Usually this will be a value >= 1, but if + * some initial days of the period are excluded from week 1, because + * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} is > 1, then + * the week number will be zero for those + * initial days. This method requires the day number and day of week for some + * known date in the period in order to determine the day of week + * on the desired day. + * <p> + * <b>Subclassing:</b> + * <br> + * This method is intended for use by subclasses in implementing their + * {@link #computeTime computeTime} and/or {@link #computeFields computeFields} methods. + * It is often useful in {@link #getActualMinimum getActualMinimum} and + * {@link #getActualMaximum getActualMaximum} as well. + * <p> + * This variant is handy for computing the week number of some other + * day of a period (often the first or last day of the period) when its day + * of the week is not known but the day number and day of week for some other + * day in the period (e.g. the current date) <em>is</em> known. + * <p> + * @param desiredDay The {@link #UCalendarDateFields DAY_OF_YEAR} or + * {@link #UCalendarDateFields DAY_OF_MONTH} whose week number is desired. + * Should be 1 for the first day of the period. + * + * @param dayOfPeriod The {@link #UCalendarDateFields DAY_OF_YEAR} + * or {@link #UCalendarDateFields DAY_OF_MONTH} for a day in the period whose + * {@link #UCalendarDateFields DAY_OF_WEEK} is specified by the + * <code>knownDayOfWeek</code> parameter. + * Should be 1 for first day of period. + * + * @param dayOfWeek The {@link #UCalendarDateFields DAY_OF_WEEK} for the day + * corresponding to the <code>knownDayOfPeriod</code> parameter. + * 1-based with 1=Sunday. + * + * @return The week number (one-based), or zero if the day falls before + * the first week because + * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} + * is more than one. + * + * @stable ICU 2.8 + */ + int32_t weekNumber(int32_t desiredDay, int32_t dayOfPeriod, int32_t dayOfWeek); + + +#ifndef U_HIDE_INTERNAL_API + /** + * Return the week number of a day, within a period. This may be the week number in + * a year, or the week number in a month. Usually this will be a value >= 1, but if + * some initial days of the period are excluded from week 1, because + * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} is > 1, + * then the week number will be zero for those + * initial days. This method requires the day of week for the given date in order to + * determine the result. + * <p> + * <b>Subclassing:</b> + * <br> + * This method is intended for use by subclasses in implementing their + * {@link #computeTime computeTime} and/or {@link #computeFields computeFields} methods. + * It is often useful in {@link #getActualMinimum getActualMinimum} and + * {@link #getActualMaximum getActualMaximum} as well. + * <p> + * @param dayOfPeriod The {@link #UCalendarDateFields DAY_OF_YEAR} or + * {@link #UCalendarDateFields DAY_OF_MONTH} whose week number is desired. + * Should be 1 for the first day of the period. + * + * @param dayOfWeek The {@link #UCalendarDateFields DAY_OF_WEEK} for the day + * corresponding to the <code>dayOfPeriod</code> parameter. + * 1-based with 1=Sunday. + * + * @return The week number (one-based), or zero if the day falls before + * the first week because + * {@link #getMinimalDaysInFirstWeek getMinimalDaysInFirstWeek} + * is more than one. + * @internal + */ + inline int32_t weekNumber(int32_t dayOfPeriod, int32_t dayOfWeek); + + /** + * returns the local DOW, valid range 0..6 + * @internal + */ + int32_t getLocalDOW(); +#endif /* U_HIDE_INTERNAL_API */ + +private: + + /** + * The next available value for fStamp[] + */ + int32_t fNextStamp;// = MINIMUM_USER_STAMP; + + /** + * Recalculates the time stamp array (fStamp). + * Resets fNextStamp to lowest next stamp value. + */ + void recalculateStamp(); + + /** + * The current time set for the calendar. + */ + UDate fTime; + + /** + * @see #setLenient + */ + UBool fLenient; + + /** + * Time zone affects the time calculation done by Calendar. Calendar subclasses use + * the time zone data to produce the local time. Always set; never NULL. + */ + TimeZone* fZone; + + /** + * Option for rpeated wall time + * @see #setRepeatedWallTimeOption + */ + UCalendarWallTimeOption fRepeatedWallTime; + + /** + * Option for skipped wall time + * @see #setSkippedWallTimeOption + */ + UCalendarWallTimeOption fSkippedWallTime; + + /** + * Both firstDayOfWeek and minimalDaysInFirstWeek are locale-dependent. They are + * used to figure out the week count for a specific date for a given locale. These + * must be set when a Calendar is constructed. For example, in US locale, + * firstDayOfWeek is SUNDAY; minimalDaysInFirstWeek is 1. They are used to figure + * out the week count for a specific date for a given locale. These must be set when + * a Calendar is constructed. + */ + UCalendarDaysOfWeek fFirstDayOfWeek; + uint8_t fMinimalDaysInFirstWeek; + UCalendarDaysOfWeek fWeekendOnset; + int32_t fWeekendOnsetMillis; + UCalendarDaysOfWeek fWeekendCease; + int32_t fWeekendCeaseMillis; + + /** + * Sets firstDayOfWeek and minimalDaysInFirstWeek. Called at Calendar construction + * time. + * + * @param desiredLocale The given locale. + * @param type The calendar type identifier, e.g: gregorian, buddhist, etc. + * @param success Indicates the status of setting the week count data from + * the resource for the given locale. Returns U_ZERO_ERROR if + * constructed successfully. + */ + void setWeekData(const Locale& desiredLocale, const char *type, UErrorCode& success); + + /** + * Recompute the time and update the status fields isTimeSet + * and areFieldsSet. Callers should check isTimeSet and only + * call this method if isTimeSet is false. + * + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid or restricted by + * leniency, this will be set to an error status. + */ + void updateTime(UErrorCode& status); + + /** + * The Gregorian year, as computed by computeGregorianFields() and + * returned by getGregorianYear(). + * @see #computeGregorianFields + */ + int32_t fGregorianYear; + + /** + * The Gregorian month, as computed by computeGregorianFields() and + * returned by getGregorianMonth(). + * @see #computeGregorianFields + */ + int32_t fGregorianMonth; + + /** + * The Gregorian day of the year, as computed by + * computeGregorianFields() and returned by getGregorianDayOfYear(). + * @see #computeGregorianFields + */ + int32_t fGregorianDayOfYear; + + /** + * The Gregorian day of the month, as computed by + * computeGregorianFields() and returned by getGregorianDayOfMonth(). + * @see #computeGregorianFields + */ + int32_t fGregorianDayOfMonth; + + /* calculations */ + + /** + * Compute the Gregorian calendar year, month, and day of month from + * the given Julian day. These values are not stored in fields, but in + * member variables gregorianXxx. Also compute the DAY_OF_WEEK and + * DOW_LOCAL fields. + */ + void computeGregorianAndDOWFields(int32_t julianDay, UErrorCode &ec); + +protected: + + /** + * Compute the Gregorian calendar year, month, and day of month from the + * Julian day. These values are not stored in fields, but in member + * variables gregorianXxx. They are used for time zone computations and by + * subclasses that are Gregorian derivatives. Subclasses may call this + * method to perform a Gregorian calendar millis->fields computation. + */ + void computeGregorianFields(int32_t julianDay, UErrorCode &ec); + +private: + + /** + * Compute the fields WEEK_OF_YEAR, YEAR_WOY, WEEK_OF_MONTH, + * DAY_OF_WEEK_IN_MONTH, and DOW_LOCAL from EXTENDED_YEAR, YEAR, + * DAY_OF_WEEK, and DAY_OF_YEAR. The latter fields are computed by the + * subclass based on the calendar system. + * + * <p>The YEAR_WOY field is computed simplistically. It is equal to YEAR + * most of the time, but at the year boundary it may be adjusted to YEAR-1 + * or YEAR+1 to reflect the overlap of a week into an adjacent year. In + * this case, a simple increment or decrement is performed on YEAR, even + * though this may yield an invalid YEAR value. For instance, if the YEAR + * is part of a calendar system with an N-year cycle field CYCLE, then + * incrementing the YEAR may involve incrementing CYCLE and setting YEAR + * back to 0 or 1. This is not handled by this code, and in fact cannot be + * simply handled without having subclasses define an entire parallel set of + * fields for fields larger than or equal to a year. This additional + * complexity is not warranted, since the intention of the YEAR_WOY field is + * to support ISO 8601 notation, so it will typically be used with a + * proleptic Gregorian calendar, which has no field larger than a year. + */ + void computeWeekFields(UErrorCode &ec); + + + /** + * Ensure that each field is within its valid range by calling {@link + * #validateField(int, int&)} on each field that has been set. This method + * should only be called if this calendar is not lenient. + * @see #isLenient + * @see #validateField(int, int&) + * @internal + */ + void validateFields(UErrorCode &status); + + /** + * Validate a single field of this calendar given its minimum and + * maximum allowed value. If the field is out of range, + * <code>U_ILLEGAL_ARGUMENT_ERROR</code> will be set. Subclasses may + * use this method in their implementation of {@link + * #validateField(int, int&)}. + * @internal + */ + void validateField(UCalendarDateFields field, int32_t min, int32_t max, UErrorCode& status); + + protected: +#ifndef U_HIDE_INTERNAL_API + /** + * Convert a quasi Julian date to the day of the week. The Julian date used here is + * not a true Julian date, since it is measured from midnight, not noon. Return + * value is one-based. + * + * @param julian The given Julian date number. + * @return Day number from 1..7 (SUN..SAT). + * @internal + */ + static uint8_t julianDayToDayOfWeek(double julian); +#endif /* U_HIDE_INTERNAL_API */ + + private: + char validLocale[ULOC_FULLNAME_CAPACITY]; + char actualLocale[ULOC_FULLNAME_CAPACITY]; + + public: +#if !UCONFIG_NO_SERVICE + /** + * INTERNAL FOR 2.6 -- Registration. + */ + +#ifndef U_HIDE_INTERNAL_API + /** + * Return a StringEnumeration over the locales available at the time of the call, + * including registered locales. + * @return a StringEnumeration over the locales available at the time of the call + * @internal + */ + static StringEnumeration* getAvailableLocales(void); + + /** + * Register a new Calendar factory. The factory will be adopted. + * INTERNAL in 2.6 + * + * Because ICU may choose to cache Calendars internally, this must + * be called at application startup, prior to any calls to + * Calendar::createInstance to avoid undefined behavior. + * + * @param toAdopt the factory instance to be adopted + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this factory + * @internal + */ + static URegistryKey registerFactory(ICUServiceFactory* toAdopt, UErrorCode& status); + + /** + * Unregister a previously-registered CalendarFactory using the key returned from the + * register call. Key becomes invalid after a successful call and should not be used again. + * The CalendarFactory corresponding to the key will be deleted. + * INTERNAL in 2.6 + * + * Because ICU may choose to cache Calendars internally, this should + * be called during application shutdown, after all calls to + * Calendar::createInstance to avoid undefined behavior. + * + * @param key the registry key returned by a previous call to registerFactory + * @param status the in/out status code, no special meanings are assigned + * @return TRUE if the factory for the key was successfully unregistered + * @internal + */ + static UBool unregister(URegistryKey key, UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Multiple Calendar Implementation + * @internal + */ + friend class CalendarFactory; + + /** + * Multiple Calendar Implementation + * @internal + */ + friend class CalendarService; + + /** + * Multiple Calendar Implementation + * @internal + */ + friend class DefaultCalendarFactory; +#endif /* !UCONFIG_NO_SERVICE */ + + /** + * @return TRUE if this calendar has a default century (i.e. 03 -> 2003) + * @internal + */ + virtual UBool haveDefaultCentury() const = 0; + + /** + * @return the start of the default century, as a UDate + * @internal + */ + virtual UDate defaultCenturyStart() const = 0; + /** + * @return the beginning year of the default century, as a year + * @internal + */ + virtual int32_t defaultCenturyStartYear() const = 0; + + /** Get the locale for this calendar object. You can choose between valid and actual locale. + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale + * @stable ICU 2.8 + */ + Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const; + + /** + * @return The related Gregorian year; will be obtained by modifying the value + * obtained by get from UCAL_EXTENDED_YEAR field + * @internal + */ + virtual int32_t getRelatedYear(UErrorCode &status) const; + + /** + * @param year The related Gregorian year to set; will be modified as necessary then + * set in UCAL_EXTENDED_YEAR field + * @internal + */ + virtual void setRelatedYear(int32_t year); + +#ifndef U_HIDE_INTERNAL_API + /** Get the locale for this calendar object. You can choose between valid and actual locale. + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale + * @internal + */ + const char* getLocaleID(ULocDataLocaleType type, UErrorCode &status) const; +#endif /* U_HIDE_INTERNAL_API */ + +private: + /** + * Cast TimeZone used by this object to BasicTimeZone, or NULL if the TimeZone + * is not an instance of BasicTimeZone. + */ + BasicTimeZone* getBasicTimeZone() const; + + /** + * Find the previous zone transtion near the given time. + * @param base The base time, inclusive + * @param transitionTime Receives the result time + * @param status The error status + * @return TRUE if a transition is found. + */ + UBool getImmediatePreviousZoneTransition(UDate base, UDate *transitionTime, UErrorCode& status) const; + +public: +#ifndef U_HIDE_INTERNAL_API + /** + * Creates a new Calendar from a Locale for the cache. + * This method does not set the time or timezone in returned calendar. + * @param locale the locale. + * @param status any error returned here. + * @return the new Calendar object with no time or timezone set. + * @internal For ICU use only. + */ + static Calendar * U_EXPORT2 makeInstance( + const Locale &locale, UErrorCode &status); + + /** + * Get the calendar type for given locale. + * @param locale the locale + * @param typeBuffer calendar type returned here + * @param typeBufferSize The size of typeBuffer in bytes. If the type + * can't fit in the buffer, this method sets status to + * U_BUFFER_OVERFLOW_ERROR + * @param status error, if any, returned here. + * @internal For ICU use only. + */ + static void U_EXPORT2 getCalendarTypeFromLocale( + const Locale &locale, + char *typeBuffer, + int32_t typeBufferSize, + UErrorCode &status); +#endif /* U_HIDE_INTERNAL_API */ +}; + +// ------------------------------------- + +inline Calendar* +Calendar::createInstance(TimeZone* zone, UErrorCode& errorCode) +{ + // since the Locale isn't specified, use the default locale + return createInstance(zone, Locale::getDefault(), errorCode); +} + +// ------------------------------------- + +inline void +Calendar::roll(UCalendarDateFields field, UBool up, UErrorCode& status) +{ + roll(field, (int32_t)(up ? +1 : -1), status); +} + +#ifndef U_HIDE_DEPRECATED_API +inline void +Calendar::roll(EDateFields field, UBool up, UErrorCode& status) +{ + roll((UCalendarDateFields) field, up, status); +} +#endif /* U_HIDE_DEPRECATED_API */ + + +// ------------------------------------- + +/** + * Fast method for subclasses. The caller must maintain fUserSetDSTOffset and + * fUserSetZoneOffset, as well as the isSet[] array. + */ + +inline void +Calendar::internalSet(UCalendarDateFields field, int32_t value) +{ + fFields[field] = value; + fStamp[field] = kInternallySet; + fIsSet[field] = TRUE; // Remove later +} + + +#ifndef U_HIDE_INTERNAL_API +inline int32_t Calendar::weekNumber(int32_t dayOfPeriod, int32_t dayOfWeek) +{ + return weekNumber(dayOfPeriod, dayOfPeriod, dayOfWeek); +} +#endif /* U_HIDE_INTERNAL_API */ + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _CALENDAR diff --git a/intl/icu/source/i18n/unicode/choicfmt.h b/intl/icu/source/i18n/unicode/choicfmt.h new file mode 100644 index 000000000..ab3c28fe0 --- /dev/null +++ b/intl/icu/source/i18n/unicode/choicfmt.h @@ -0,0 +1,596 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2013, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File CHOICFMT.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 03/20/97 helena Finished first cut of implementation and got rid +* of nextDouble/previousDouble and replaced with +* boolean array. +* 4/10/97 aliu Clean up. Modified to work on AIX. +* 8/6/97 nos Removed overloaded constructor, member var 'buffer'. +* 07/22/98 stephen Removed operator!= (implemented in Format) +******************************************************************************** +*/ + +#ifndef CHOICFMT_H +#define CHOICFMT_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Choice Format. + */ + +#if !UCONFIG_NO_FORMATTING +#ifndef U_HIDE_DEPRECATED_API + +#include "unicode/fieldpos.h" +#include "unicode/format.h" +#include "unicode/messagepattern.h" +#include "unicode/numfmt.h" +#include "unicode/unistr.h" + +U_NAMESPACE_BEGIN + +class MessageFormat; + +/** + * ChoiceFormat converts between ranges of numeric values and strings for those ranges. + * The strings must conform to the MessageFormat pattern syntax. + * + * <p><em><code>ChoiceFormat</code> is probably not what you need. + * Please use <code>MessageFormat</code> + * with <code>plural</code> arguments for proper plural selection, + * and <code>select</code> arguments for simple selection among a fixed set of choices!</em></p> + * + * <p>A <code>ChoiceFormat</code> splits + * the real number line \htmlonly<code>-∞</code> to + * <code>+∞</code>\endhtmlonly into two + * or more contiguous ranges. Each range is mapped to a + * string.</p> + * + * <p><code>ChoiceFormat</code> was originally intended + * for displaying grammatically correct + * plurals such as "There is one file." vs. "There are 2 files." + * <em>However,</em> plural rules for many languages + * are too complex for the capabilities of ChoiceFormat, + * and its requirement of specifying the precise rules for each message + * is unmanageable for translators.</p> + * + * <p>There are two methods of defining a <code>ChoiceFormat</code>; both + * are equivalent. The first is by using a string pattern. This is the + * preferred method in most cases. The second method is through direct + * specification of the arrays that logically make up the + * <code>ChoiceFormat</code>.</p> + * + * <p>Note: Typically, choice formatting is done (if done at all) via <code>MessageFormat</code> + * with a <code>choice</code> argument type, + * rather than using a stand-alone <code>ChoiceFormat</code>.</p> + * + * <h5>Patterns and Their Interpretation</h5> + * + * <p>The pattern string defines the range boundaries and the strings for each number range. + * Syntax: + * <pre> + * choiceStyle = number separator message ('|' number separator message)* + * number = normal_number | ['-'] \htmlonly∞\endhtmlonly (U+221E, infinity) + * normal_number = double value (unlocalized ASCII string) + * separator = less_than | less_than_or_equal + * less_than = '<' + * less_than_or_equal = '#' | \htmlonly≤\endhtmlonly (U+2264) + * message: see {@link MessageFormat} + * </pre> + * Pattern_White_Space between syntax elements is ignored, except + * around each range's sub-message.</p> + * + * <p>Each numeric sub-range extends from the current range's number + * to the next range's number. + * The number itself is included in its range if a <code>less_than_or_equal</code> sign is used, + * and excluded from its range (and instead included in the previous range) + * if a <code>less_than</code> sign is used.</p> + * + * <p>When a <code>ChoiceFormat</code> is constructed from + * arrays of numbers, closure flags and strings, + * they are interpreted just like + * the sequence of <code>(number separator string)</code> in an equivalent pattern string. + * <code>closure[i]==TRUE</code> corresponds to a <code>less_than</code> separator sign. + * The equivalent pattern string will be constructed automatically.</p> + * + * <p>During formatting, a number is mapped to the first range + * where the number is not greater than the range's upper limit. + * That range's message string is returned. A NaN maps to the very first range.</p> + * + * <p>During parsing, a range is selected for the longest match of + * any range's message. That range's number is returned, ignoring the separator/closure. + * Only a simple string match is performed, without parsing of arguments that + * might be specified in the message strings.</p> + * + * <p>Note that the first range's number is ignored in formatting + * but may be returned from parsing.</p> + * + * <h5>Examples</h5> + * + * <p>Here is an example of two arrays that map the number + * <code>1..7</code> to the English day of the week abbreviations + * <code>Sun..Sat</code>. No closures array is given; this is the same as + * specifying all closures to be <code>FALSE</code>.</p> + * + * <pre> {1,2,3,4,5,6,7}, + * {"Sun","Mon","Tue","Wed","Thur","Fri","Sat"}</pre> + * + * <p>Here is an example that maps the ranges [-Inf, 1), [1, 1], and (1, + * +Inf] to three strings. That is, the number line is split into three + * ranges: x < 1.0, x = 1.0, and x > 1.0. + * (The round parentheses in the notation above indicate an exclusive boundary, + * like the turned bracket in European notation: [-Inf, 1) == [-Inf, 1[ )</p> + * + * <pre> {0, 1, 1}, + * {FALSE, FALSE, TRUE}, + * {"no files", "one file", "many files"}</pre> + * + * <p>Here is an example that shows formatting and parsing: </p> + * + * \code + * #include <unicode/choicfmt.h> + * #include <unicode/unistr.h> + * #include <iostream.h> + * + * int main(int argc, char *argv[]) { + * double limits[] = {1,2,3,4,5,6,7}; + * UnicodeString monthNames[] = { + * "Sun","Mon","Tue","Wed","Thu","Fri","Sat"}; + * ChoiceFormat fmt(limits, monthNames, 7); + * UnicodeString str; + * char buf[256]; + * for (double x = 1.0; x <= 8.0; x += 1.0) { + * fmt.format(x, str); + * str.extract(0, str.length(), buf, 256, ""); + * str.truncate(0); + * cout << x << " -> " + * << buf << endl; + * } + * cout << endl; + * return 0; + * } + * \endcode + * + * <p><em>User subclasses are not supported.</em> While clients may write + * subclasses, such code will not necessarily work and will not be + * guaranteed to work stably from release to release. + * + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ +class U_I18N_API ChoiceFormat: public NumberFormat { +public: + /** + * Constructs a new ChoiceFormat from the pattern string. + * + * @param pattern Pattern used to construct object. + * @param status Output param to receive success code. If the + * pattern cannot be parsed, set to failure code. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + ChoiceFormat(const UnicodeString& pattern, + UErrorCode& status); + + + /** + * Constructs a new ChoiceFormat with the given limits and message strings. + * All closure flags default to <code>FALSE</code>, + * equivalent to <code>less_than_or_equal</code> separators. + * + * Copies the limits and formats instead of adopting them. + * + * @param limits Array of limit values. + * @param formats Array of formats. + * @param count Size of 'limits' and 'formats' arrays. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + ChoiceFormat(const double* limits, + const UnicodeString* formats, + int32_t count ); + + /** + * Constructs a new ChoiceFormat with the given limits, closure flags and message strings. + * + * Copies the limits and formats instead of adopting them. + * + * @param limits Array of limit values + * @param closures Array of booleans specifying whether each + * element of 'limits' is open or closed. If FALSE, then the + * corresponding limit number is a member of its range. + * If TRUE, then the limit number belongs to the previous range it. + * @param formats Array of formats + * @param count Size of 'limits', 'closures', and 'formats' arrays + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + ChoiceFormat(const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t count); + + /** + * Copy constructor. + * + * @param that ChoiceFormat object to be copied from + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + ChoiceFormat(const ChoiceFormat& that); + + /** + * Assignment operator. + * + * @param that ChoiceFormat object to be copied + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + const ChoiceFormat& operator=(const ChoiceFormat& that); + + /** + * Destructor. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual ~ChoiceFormat(); + + /** + * Clones this Format object. The caller owns the + * result and must delete it when done. + * + * @return a copy of this object + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual Format* clone(void) const; + + /** + * Returns true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * + * @param other ChoiceFormat object to be compared + * @return true if other is the same as this. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UBool operator==(const Format& other) const; + + /** + * Sets the pattern. + * @param pattern The pattern to be applied. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void applyPattern(const UnicodeString& pattern, + UErrorCode& status); + + /** + * Sets the pattern. + * @param pattern The pattern to be applied. + * @param parseError Struct to receive information on position + * of error if an error is encountered + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void applyPattern(const UnicodeString& pattern, + UParseError& parseError, + UErrorCode& status); + /** + * Gets the pattern. + * + * @param pattern Output param which will receive the pattern + * Previous contents are deleted. + * @return A reference to 'pattern' + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& toPattern(UnicodeString &pattern) const; + + /** + * Sets the choices to be used in formatting. + * For details see the constructor with the same parameter list. + * + * @param limitsToCopy Contains the top value that you want + * parsed with that format,and should be in + * ascending sorted order. When formatting X, + * the choice will be the i, where limit[i] + * <= X < limit[i+1]. + * @param formatsToCopy The format strings you want to use for each limit. + * @param count The size of the above arrays. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void setChoices(const double* limitsToCopy, + const UnicodeString* formatsToCopy, + int32_t count ); + + /** + * Sets the choices to be used in formatting. + * For details see the constructor with the same parameter list. + * + * @param limits Array of limits + * @param closures Array of limit booleans + * @param formats Array of format string + * @param count The size of the above arrays + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void setChoices(const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t count); + + /** + * Returns NULL and 0. + * Before ICU 4.8, this used to return the choice limits array. + * + * @param count Will be set to 0. + * @return NULL + * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern. + */ + virtual const double* getLimits(int32_t& count) const; + + /** + * Returns NULL and 0. + * Before ICU 4.8, this used to return the limit booleans array. + * + * @param count Will be set to 0. + * @return NULL + * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern. + */ + virtual const UBool* getClosures(int32_t& count) const; + + /** + * Returns NULL and 0. + * Before ICU 4.8, this used to return the array of choice strings. + * + * @param count Will be set to 0. + * @return NULL + * @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern. + */ + virtual const UnicodeString* getFormats(int32_t& count) const; + + + using NumberFormat::format; + + /** + * Formats a double number using this object's choices. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos) const; + /** + * Formats an int32_t number using this object's choices. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Formats an int64_t number using this object's choices. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Formats an array of objects using this object's choices. + * + * @param objs The array of objects to be formatted. + * @param cnt The size of objs. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param success Output param set to success/failure code on + * exit. + * @return Reference to 'appendTo' parameter. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UnicodeString& format(const Formattable* objs, + int32_t cnt, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& success) const; + + using NumberFormat::parse; + + /** + * Looks for the longest match of any message string on the input text and, + * if there is a match, sets the result object to the corresponding range's number. + * + * If no string matches, then the parsePosition is unchanged. + * + * @param text The text to be parsed. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parsePosition The position to start parsing at on input. + * On output, moved to after the last successfully + * parse character. On parse failure, does not change. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual void parse(const UnicodeString& text, + Formattable& result, + ParsePosition& parsePosition) const; + + /** + * Returns a unique class ID POLYMORPHICALLY. Part of ICU's "poor man's RTTI". + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Returns the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . Derived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @deprecated ICU 49 Use MessageFormat instead, with plural and select arguments. + */ + static UClassID U_EXPORT2 getStaticClassID(void); + +private: + /** + * Converts a double value to a string. + * @param value the double number to be converted. + * @param string the result string. + * @return the converted string. + */ + static UnicodeString& dtos(double value, UnicodeString& string); + + ChoiceFormat(); // default constructor not implemented + + /** + * Construct a new ChoiceFormat with the limits and the corresponding formats + * based on the pattern. + * + * @param newPattern Pattern used to construct object. + * @param parseError Struct to receive information on position + * of error if an error is encountered. + * @param status Output param to receive success code. If the + * pattern cannot be parsed, set to failure code. + */ + ChoiceFormat(const UnicodeString& newPattern, + UParseError& parseError, + UErrorCode& status); + + friend class MessageFormat; + + virtual void setChoices(const double* limits, + const UBool* closures, + const UnicodeString* formats, + int32_t count, + UErrorCode &errorCode); + + /** + * Finds the ChoiceFormat sub-message for the given number. + * @param pattern A MessagePattern. + * @param partIndex the index of the first ChoiceFormat argument style part. + * @param number a number to be mapped to one of the ChoiceFormat argument's intervals + * @return the sub-message start part index. + */ + static int32_t findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number); + + static double parseArgument( + const MessagePattern &pattern, int32_t partIndex, + const UnicodeString &source, ParsePosition &pos); + + /** + * Matches the pattern string from the end of the partIndex to + * the beginning of the limitPartIndex, + * including all syntax except SKIP_SYNTAX, + * against the source string starting at sourceOffset. + * If they match, returns the length of the source string match. + * Otherwise returns -1. + */ + static int32_t matchStringUntilLimitPart( + const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex, + const UnicodeString &source, int32_t sourceOffset); + + /** + * Some of the ChoiceFormat constructors do not have a UErrorCode paramater. + * We need _some_ way to provide one for the MessagePattern constructor. + * Alternatively, the MessagePattern could be a pointer field, but that is + * not nice either. + */ + UErrorCode constructorErrorCode; + + /** + * The MessagePattern which contains the parsed structure of the pattern string. + * + * Starting with ICU 4.8, the MessagePattern contains a sequence of + * numeric/selector/message parts corresponding to the parsed pattern. + * For details see the MessagePattern class API docs. + */ + MessagePattern msgPattern; + + /** + * Docs & fields from before ICU 4.8, before MessagePattern was used. + * Commented out, and left only for explanation of semantics. + * -------- + * Each ChoiceFormat divides the range -Inf..+Inf into fCount + * intervals. The intervals are: + * + * 0: fChoiceLimits[0]..fChoiceLimits[1] + * 1: fChoiceLimits[1]..fChoiceLimits[2] + * ... + * fCount-2: fChoiceLimits[fCount-2]..fChoiceLimits[fCount-1] + * fCount-1: fChoiceLimits[fCount-1]..+Inf + * + * Interval 0 is special; during formatting (mapping numbers to + * strings), it also contains all numbers less than + * fChoiceLimits[0], as well as NaN values. + * + * Interval i maps to and from string fChoiceFormats[i]. When + * parsing (mapping strings to numbers), then intervals map to + * their lower limit, that is, interval i maps to fChoiceLimit[i]. + * + * The intervals may be closed, half open, or open. This affects + * formatting but does not affect parsing. Interval i is affected + * by fClosures[i] and fClosures[i+1]. If fClosures[i] + * is FALSE, then the value fChoiceLimits[i] is in interval i. + * That is, intervals i and i are: + * + * i-1: ... x < fChoiceLimits[i] + * i: fChoiceLimits[i] <= x ... + * + * If fClosures[i] is TRUE, then the value fChoiceLimits[i] is + * in interval i-1. That is, intervals i-1 and i are: + * + * i-1: ... x <= fChoiceLimits[i] + * i: fChoiceLimits[i] < x ... + * + * Because of the nature of interval 0, fClosures[0] has no + * effect. + */ + // double* fChoiceLimits; + // UBool* fClosures; + // UnicodeString* fChoiceFormats; + // int32_t fCount; +}; + + +U_NAMESPACE_END + +#endif // U_HIDE_DEPRECATED_API +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // CHOICFMT_H +//eof diff --git a/intl/icu/source/i18n/unicode/coleitr.h b/intl/icu/source/i18n/unicode/coleitr.h new file mode 100644 index 000000000..4feaa1b55 --- /dev/null +++ b/intl/icu/source/i18n/unicode/coleitr.h @@ -0,0 +1,406 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ****************************************************************************** + * Copyright (C) 1997-2014, International Business Machines + * Corporation and others. All Rights Reserved. + ****************************************************************************** + */ + +/** + * \file + * \brief C++ API: Collation Element Iterator. + */ + +/** +* File coleitr.h +* +* Created by: Helena Shih +* +* Modification History: +* +* Date Name Description +* +* 8/18/97 helena Added internal API documentation. +* 08/03/98 erm Synched with 1.2 version CollationElementIterator.java +* 12/10/99 aliu Ported Thai collation support from Java. +* 01/25/01 swquek Modified into a C++ wrapper calling C APIs (ucoliter.h) +* 02/19/01 swquek Removed CollationElementsIterator() since it is +* private constructor and no calls are made to it +* 2012-2014 markus Rewritten in C++ again. +*/ + +#ifndef COLEITR_H +#define COLEITR_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "unicode/unistr.h" +#include "unicode/uobject.h" + +struct UCollationElements; +struct UHashtable; + +U_NAMESPACE_BEGIN + +struct CollationData; + +class CollationIterator; +class RuleBasedCollator; +class UCollationPCE; +class UVector32; + +/** +* The CollationElementIterator class is used as an iterator to walk through +* each character of an international string. Use the iterator to return the +* ordering priority of the positioned character. The ordering priority of a +* character, which we refer to as a key, defines how a character is collated in +* the given collation object. +* For example, consider the following in Slovak and in traditional Spanish collation: +* <pre> +* "ca" -> the first key is key('c') and second key is key('a'). +* "cha" -> the first key is key('ch') and second key is key('a').</pre> +* And in German phonebook collation, +* <pre> \htmlonly "æb"-> the first key is key('a'), the second key is key('e'), and +* the third key is key('b'). \endhtmlonly </pre> +* The key of a character, is an integer composed of primary order(short), +* secondary order(char), and tertiary order(char). Java strictly defines the +* size and signedness of its primitive data types. Therefore, the static +* functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return +* int32_t to ensure the correctness of the key value. +* <p>Example of the iterator usage: (without error checking) +* <pre> +* \code +* void CollationElementIterator_Example() +* { +* UnicodeString str = "This is a test"; +* UErrorCode success = U_ZERO_ERROR; +* RuleBasedCollator* rbc = +* (RuleBasedCollator*) RuleBasedCollator::createInstance(success); +* CollationElementIterator* c = +* rbc->createCollationElementIterator( str ); +* int32_t order = c->next(success); +* c->reset(); +* order = c->previous(success); +* delete c; +* delete rbc; +* } +* \endcode +* </pre> +* <p> +* The method next() returns the collation order of the next character based on +* the comparison level of the collator. The method previous() returns the +* collation order of the previous character based on the comparison level of +* the collator. The Collation Element Iterator moves only in one direction +* between calls to reset(), setOffset(), or setText(). That is, next() +* and previous() can not be inter-used. Whenever previous() is to be called after +* next() or vice versa, reset(), setOffset() or setText() has to be called first +* to reset the status, shifting pointers to either the end or the start of +* the string (reset() or setText()), or the specified position (setOffset()). +* Hence at the next call of next() or previous(), the first or last collation order, +* or collation order at the spefcifieid position will be returned. If a change of +* direction is done without one of these calls, the result is undefined. +* <p> +* The result of a forward iterate (next()) and reversed result of the backward +* iterate (previous()) on the same string are equivalent, if collation orders +* with the value 0 are ignored. +* Character based on the comparison level of the collator. A collation order +* consists of primary order, secondary order and tertiary order. The data +* type of the collation order is <strong>int32_t</strong>. +* +* Note, CollationElementIterator should not be subclassed. +* @see Collator +* @see RuleBasedCollator +* @version 1.8 Jan 16 2001 +*/ +class U_I18N_API CollationElementIterator U_FINAL : public UObject { +public: + + // CollationElementIterator public data member ------------------------------ + + enum { + /** + * NULLORDER indicates that an error has occured while processing + * @stable ICU 2.0 + */ + NULLORDER = (int32_t)0xffffffff + }; + + // CollationElementIterator public constructor/destructor ------------------- + + /** + * Copy constructor. + * + * @param other the object to be copied from + * @stable ICU 2.0 + */ + CollationElementIterator(const CollationElementIterator& other); + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~CollationElementIterator(); + + // CollationElementIterator public methods ---------------------------------- + + /** + * Returns true if "other" is the same as "this" + * + * @param other the object to be compared + * @return true if "other" is the same as "this" + * @stable ICU 2.0 + */ + UBool operator==(const CollationElementIterator& other) const; + + /** + * Returns true if "other" is not the same as "this". + * + * @param other the object to be compared + * @return true if "other" is not the same as "this" + * @stable ICU 2.0 + */ + UBool operator!=(const CollationElementIterator& other) const; + + /** + * Resets the cursor to the beginning of the string. + * @stable ICU 2.0 + */ + void reset(void); + + /** + * Gets the ordering priority of the next character in the string. + * @param status the error code status. + * @return the next character's ordering. otherwise returns NULLORDER if an + * error has occured or if the end of string has been reached + * @stable ICU 2.0 + */ + int32_t next(UErrorCode& status); + + /** + * Get the ordering priority of the previous collation element in the string. + * @param status the error code status. + * @return the previous element's ordering. otherwise returns NULLORDER if an + * error has occured or if the start of string has been reached + * @stable ICU 2.0 + */ + int32_t previous(UErrorCode& status); + + /** + * Gets the primary order of a collation order. + * @param order the collation order + * @return the primary order of a collation order. + * @stable ICU 2.0 + */ + static inline int32_t primaryOrder(int32_t order); + + /** + * Gets the secondary order of a collation order. + * @param order the collation order + * @return the secondary order of a collation order. + * @stable ICU 2.0 + */ + static inline int32_t secondaryOrder(int32_t order); + + /** + * Gets the tertiary order of a collation order. + * @param order the collation order + * @return the tertiary order of a collation order. + * @stable ICU 2.0 + */ + static inline int32_t tertiaryOrder(int32_t order); + + /** + * Return the maximum length of any expansion sequences that end with the + * specified comparison order. + * @param order a collation order returned by previous or next. + * @return maximum size of the expansion sequences ending with the collation + * element or 1 if collation element does not occur at the end of any + * expansion sequence + * @stable ICU 2.0 + */ + int32_t getMaxExpansion(int32_t order) const; + + /** + * Gets the comparison order in the desired strength. Ignore the other + * differences. + * @param order The order value + * @stable ICU 2.0 + */ + int32_t strengthOrder(int32_t order) const; + + /** + * Sets the source string. + * @param str the source string. + * @param status the error code status. + * @stable ICU 2.0 + */ + void setText(const UnicodeString& str, UErrorCode& status); + + /** + * Sets the source string. + * @param str the source character iterator. + * @param status the error code status. + * @stable ICU 2.0 + */ + void setText(CharacterIterator& str, UErrorCode& status); + + /** + * Checks if a comparison order is ignorable. + * @param order the collation order. + * @return TRUE if a character is ignorable, FALSE otherwise. + * @stable ICU 2.0 + */ + static inline UBool isIgnorable(int32_t order); + + /** + * Gets the offset of the currently processed character in the source string. + * @return the offset of the character. + * @stable ICU 2.0 + */ + int32_t getOffset(void) const; + + /** + * Sets the offset of the currently processed character in the source string. + * @param newOffset the new offset. + * @param status the error code status. + * @return the offset of the character. + * @stable ICU 2.0 + */ + void setOffset(int32_t newOffset, UErrorCode& status); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) { + return reinterpret_cast<CollationElementIterator *>(uc); + } + /** @internal */ + static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) { + return reinterpret_cast<const CollationElementIterator *>(uc); + } + /** @internal */ + inline UCollationElements *toUCollationElements() { + return reinterpret_cast<UCollationElements *>(this); + } + /** @internal */ + inline const UCollationElements *toUCollationElements() const { + return reinterpret_cast<const UCollationElements *>(this); + } +#endif // U_HIDE_INTERNAL_API + +private: + friend class RuleBasedCollator; + friend class UCollationPCE; + + /** + * CollationElementIterator constructor. This takes the source string and the + * collation object. The cursor will walk thru the source string based on the + * predefined collation rules. If the source string is empty, NULLORDER will + * be returned on the calls to next(). + * @param sourceText the source string. + * @param order the collation object. + * @param status the error code status. + */ + CollationElementIterator(const UnicodeString& sourceText, + const RuleBasedCollator* order, UErrorCode& status); + // Note: The constructors should take settings & tailoring, not a collator, + // to avoid circular dependencies. + // However, for operator==() we would need to be able to compare tailoring data for equality + // without making CollationData or CollationTailoring depend on TailoredSet. + // (See the implementation of RuleBasedCollator::operator==().) + // That might require creating an intermediate class that would be used + // by both CollationElementIterator and RuleBasedCollator + // but only contain the part of RBC== related to data and rules. + + /** + * CollationElementIterator constructor. This takes the source string and the + * collation object. The cursor will walk thru the source string based on the + * predefined collation rules. If the source string is empty, NULLORDER will + * be returned on the calls to next(). + * @param sourceText the source string. + * @param order the collation object. + * @param status the error code status. + */ + CollationElementIterator(const CharacterIterator& sourceText, + const RuleBasedCollator* order, UErrorCode& status); + + /** + * Assignment operator + * + * @param other the object to be copied + */ + const CollationElementIterator& + operator=(const CollationElementIterator& other); + + CollationElementIterator(); // default constructor not implemented + + /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */ + inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; } + + static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode); + + static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order); + + // CollationElementIterator private data members ---------------------------- + + CollationIterator *iter_; // owned + const RuleBasedCollator *rbc_; // aliased + uint32_t otherHalf_; + /** + * <0: backwards; 0: just after reset() (previous() begins from end); + * 1: just after setOffset(); >1: forward + */ + int8_t dir_; + /** + * Stores offsets from expansions and from unsafe-backwards iteration, + * so that getOffset() returns intermediate offsets for the CEs + * that are consistent with forward iteration. + */ + UVector32 *offsets_; + + UnicodeString string_; +}; + +// CollationElementIterator inline method definitions -------------------------- + +inline int32_t CollationElementIterator::primaryOrder(int32_t order) +{ + return (order >> 16) & 0xffff; +} + +inline int32_t CollationElementIterator::secondaryOrder(int32_t order) +{ + return (order >> 8) & 0xff; +} + +inline int32_t CollationElementIterator::tertiaryOrder(int32_t order) +{ + return order & 0xff; +} + +inline UBool CollationElementIterator::isIgnorable(int32_t order) +{ + return (order & 0xffff0000) == 0; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/coll.h b/intl/icu/source/i18n/unicode/coll.h new file mode 100644 index 000000000..e41be2ee8 --- /dev/null +++ b/intl/icu/source/i18n/unicode/coll.h @@ -0,0 +1,1274 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1996-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +*/ + +/** + * \file + * \brief C++ API: Collation Service. + */ + +/** +* File coll.h +* +* Created by: Helena Shih +* +* Modification History: +* +* Date Name Description +* 02/5/97 aliu Modified createDefault to load collation data from +* binary files when possible. Added related methods +* createCollationFromFile, chopLocale, createPathName. +* 02/11/97 aliu Added members addToCache, findInCache, and fgCache. +* 02/12/97 aliu Modified to create objects from RuleBasedCollator cache. +* Moved cache out of Collation class. +* 02/13/97 aliu Moved several methods out of this class and into +* RuleBasedCollator, with modifications. Modified +* createDefault() to call new RuleBasedCollator(Locale&) +* constructor. General clean up and documentation. +* 02/20/97 helena Added clone, operator==, operator!=, operator=, copy +* constructor and getDynamicClassID. +* 03/25/97 helena Updated with platform independent data types. +* 05/06/97 helena Added memory allocation error detection. +* 06/20/97 helena Java class name change. +* 09/03/97 helena Added createCollationKeyValues(). +* 02/10/98 damiba Added compare() with length as parameter. +* 04/23/99 stephen Removed EDecompositionMode, merged with +* Normalizer::EMode. +* 11/02/99 helena Collator performance enhancements. Eliminates the +* UnicodeString construction and special case for NO_OP. +* 11/23/99 srl More performance enhancements. Inlining of +* critical accessors. +* 05/15/00 helena Added version information API. +* 01/29/01 synwee Modified into a C++ wrapper which calls C apis +* (ucol.h). +* 2012-2014 markus Rewritten in C++ again. +*/ + +#ifndef COLL_H +#define COLL_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "unicode/uobject.h" +#include "unicode/ucol.h" +#include "unicode/normlzr.h" +#include "unicode/locid.h" +#include "unicode/uniset.h" +#include "unicode/umisc.h" +#include "unicode/uiter.h" +#include "unicode/stringpiece.h" + +U_NAMESPACE_BEGIN + +class StringEnumeration; + +#if !UCONFIG_NO_SERVICE +/** + * @stable ICU 2.6 + */ +class CollatorFactory; +#endif + +/** +* @stable ICU 2.0 +*/ +class CollationKey; + +/** +* The <code>Collator</code> class performs locale-sensitive string +* comparison.<br> +* You use this class to build searching and sorting routines for natural +* language text. +* <p> +* <code>Collator</code> is an abstract base class. Subclasses implement +* specific collation strategies. One subclass, +* <code>RuleBasedCollator</code>, is currently provided and is applicable +* to a wide set of languages. Other subclasses may be created to handle more +* specialized needs. +* <p> +* Like other locale-sensitive classes, you can use the static factory method, +* <code>createInstance</code>, to obtain the appropriate +* <code>Collator</code> object for a given locale. You will only need to +* look at the subclasses of <code>Collator</code> if you need to +* understand the details of a particular collation strategy or if you need to +* modify that strategy. +* <p> +* The following example shows how to compare two strings using the +* <code>Collator</code> for the default locale. +* \htmlonly<blockquote>\endhtmlonly +* <pre> +* \code +* // Compare two strings in the default locale +* UErrorCode success = U_ZERO_ERROR; +* Collator* myCollator = Collator::createInstance(success); +* if (myCollator->compare("abc", "ABC") < 0) +* cout << "abc is less than ABC" << endl; +* else +* cout << "abc is greater than or equal to ABC" << endl; +* \endcode +* </pre> +* \htmlonly</blockquote>\endhtmlonly +* <p> +* You can set a <code>Collator</code>'s <em>strength</em> attribute to +* determine the level of difference considered significant in comparisons. +* Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, +* <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>. +* The exact assignment of strengths to language features is locale dependent. +* For example, in Czech, "e" and "f" are considered primary differences, +* while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary +* differences and "e" and "e" are identical. The following shows how both case +* and accents could be ignored for US English. +* \htmlonly<blockquote>\endhtmlonly +* <pre> +* \code +* //Get the Collator for US English and set its strength to PRIMARY +* UErrorCode success = U_ZERO_ERROR; +* Collator* usCollator = Collator::createInstance(Locale::getUS(), success); +* usCollator->setStrength(Collator::PRIMARY); +* if (usCollator->compare("abc", "ABC") == 0) +* cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl; +* \endcode +* </pre> +* \htmlonly</blockquote>\endhtmlonly +* +* The <code>getSortKey</code> methods +* convert a string to a series of bytes that can be compared bitwise against +* other sort keys using <code>strcmp()</code>. Sort keys are written as +* zero-terminated byte strings. +* +* Another set of APIs returns a <code>CollationKey</code> object that wraps +* the sort key bytes instead of returning the bytes themselves. +* </p> +* <p> +* <strong>Note:</strong> <code>Collator</code>s with different Locale, +* and CollationStrength settings will return different sort +* orders for the same set of strings. Locales have specific collation rules, +* and the way in which secondary and tertiary differences are taken into +* account, for example, will result in a different sorting order for same +* strings. +* </p> +* @see RuleBasedCollator +* @see CollationKey +* @see CollationElementIterator +* @see Locale +* @see Normalizer +* @version 2.0 11/15/01 +*/ + +class U_I18N_API Collator : public UObject { +public: + + // Collator public enums ----------------------------------------------- + + /** + * Base letter represents a primary difference. Set comparison level to + * PRIMARY to ignore secondary and tertiary differences.<br> + * Use this to set the strength of a Collator object.<br> + * Example of primary difference, "abc" < "abd" + * + * Diacritical differences on the same base letter represent a secondary + * difference. Set comparison level to SECONDARY to ignore tertiary + * differences. Use this to set the strength of a Collator object.<br> + * Example of secondary difference, "ä" >> "a". + * + * Uppercase and lowercase versions of the same character represents a + * tertiary difference. Set comparison level to TERTIARY to include all + * comparison differences. Use this to set the strength of a Collator + * object.<br> + * Example of tertiary difference, "abc" <<< "ABC". + * + * Two characters are considered "identical" when they have the same unicode + * spellings.<br> + * For example, "ä" == "ä". + * + * UCollationStrength is also used to determine the strength of sort keys + * generated from Collator objects. + * @stable ICU 2.0 + */ + enum ECollationStrength + { + PRIMARY = UCOL_PRIMARY, // 0 + SECONDARY = UCOL_SECONDARY, // 1 + TERTIARY = UCOL_TERTIARY, // 2 + QUATERNARY = UCOL_QUATERNARY, // 3 + IDENTICAL = UCOL_IDENTICAL // 15 + }; + + + // Cannot use #ifndef U_HIDE_DEPRECATED_API for the following, it is + // used by virtual methods that cannot have that conditional. + /** + * LESS is returned if source string is compared to be less than target + * string in the compare() method. + * EQUAL is returned if source string is compared to be equal to target + * string in the compare() method. + * GREATER is returned if source string is compared to be greater than + * target string in the compare() method. + * @see Collator#compare + * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h + */ + enum EComparisonResult + { + LESS = UCOL_LESS, // -1 + EQUAL = UCOL_EQUAL, // 0 + GREATER = UCOL_GREATER // 1 + }; + + // Collator public destructor ----------------------------------------- + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~Collator(); + + // Collator public methods -------------------------------------------- + + /** + * Returns TRUE if "other" is the same as "this". + * + * The base class implementation returns TRUE if "other" has the same type/class as "this": + * <code>typeid(*this) == typeid(other)</code>. + * + * Subclass implementations should do something like the following: + * <pre> + * if (this == &other) { return TRUE; } + * if (!Collator::operator==(other)) { return FALSE; } // not the same class + * + * const MyCollator &o = (const MyCollator&)other; + * (compare this vs. o's subclass fields) + * </pre> + * @param other Collator object to be compared + * @return TRUE if other is the same as this. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Collator& other) const; + + /** + * Returns true if "other" is not the same as "this". + * Calls ! operator==(const Collator&) const which works for all subclasses. + * @param other Collator object to be compared + * @return TRUE if other is not the same as this. + * @stable ICU 2.0 + */ + virtual UBool operator!=(const Collator& other) const; + + /** + * Makes a copy of this object. + * @return a copy of this object, owned by the caller + * @stable ICU 2.0 + */ + virtual Collator* clone(void) const = 0; + + /** + * Creates the Collator object for the current default locale. + * The default locale is determined by Locale::getDefault. + * The UErrorCode& err parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check the + * value of U_SUCCESS(err). If you wish more detailed information, you can + * check for informational error results which still indicate success. + * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_ERROR indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * + * @param err the error code status. + * @return the collation object of the default locale.(for example, en_US) + * @see Locale#getDefault + * @stable ICU 2.0 + */ + static Collator* U_EXPORT2 createInstance(UErrorCode& err); + + /** + * Gets the collation object for the desired locale. The + * resource of the desired locale will be loaded. + * + * Locale::getRoot() is the base collation table and all other languages are + * built on top of it with additional language-specific modifications. + * + * For some languages, multiple collation types are available; + * for example, "de@collation=phonebook". + * Starting with ICU 54, collation attributes can be specified via locale keywords as well, + * in the old locale extension syntax ("el@colCaseFirst=upper") + * or in language tag syntax ("el-u-kf-upper"). + * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>. + * + * The UErrorCode& err parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_ERROR indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * + * The caller owns the returned object and is responsible for deleting it. + * @param loc The locale ID for which to open a collator. + * @param err the error code status. + * @return the created table-based collation object based on the desired + * locale. + * @see Locale + * @see ResourceLoader + * @stable ICU 2.2 + */ + static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err); + + /** + * The comparison function compares the character data stored in two + * different strings. Returns information about whether a string is less + * than, greater than or equal to another string. + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @return Returns a byte value. GREATER if source is greater + * than target; EQUAL if source is equal to target; LESS if source is less + * than target + * @deprecated ICU 2.6 use the overload with UErrorCode & + */ + virtual EComparisonResult compare(const UnicodeString& source, + const UnicodeString& target) const; + + /** + * The comparison function compares the character data stored in two + * different strings. Returns information about whether a string is less + * than, greater than or equal to another string. + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source is greater + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less + * than target + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const UnicodeString& source, + const UnicodeString& target, + UErrorCode &status) const = 0; + + /** + * Does the same thing as compare but limits the comparison to a specified + * length + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param length the length the comparison is limited to + * @return Returns a byte value. GREATER if source (up to the specified + * length) is greater than target; EQUAL if source (up to specified + * length) is equal to target; LESS if source (up to the specified + * length) is less than target. + * @deprecated ICU 2.6 use the overload with UErrorCode & + */ + virtual EComparisonResult compare(const UnicodeString& source, + const UnicodeString& target, + int32_t length) const; + + /** + * Does the same thing as compare but limits the comparison to a specified + * length + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param length the length the comparison is limited to + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source (up to the specified + * length) is greater than target; UCOL_EQUAL if source (up to specified + * length) is equal to target; UCOL_LESS if source (up to the specified + * length) is less than target. + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const UnicodeString& source, + const UnicodeString& target, + int32_t length, + UErrorCode &status) const = 0; + + /** + * The comparison function compares the character data stored in two + * different string arrays. Returns information about whether a string array + * is less than, greater than or equal to another string array. + * <p>Example of use: + * <pre> + * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC" + * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc" + * . UErrorCode status = U_ZERO_ERROR; + * . Collator *myCollation = + * . Collator::createInstance(Locale::getUS(), status); + * . if (U_FAILURE(status)) return; + * . myCollation->setStrength(Collator::PRIMARY); + * . // result would be Collator::EQUAL ("abc" == "ABC") + * . // (no primary difference between "abc" and "ABC") + * . Collator::EComparisonResult result = + * . myCollation->compare(abc, 3, ABC, 3); + * . myCollation->setStrength(Collator::TERTIARY); + * . // result would be Collator::LESS ("abc" <<< "ABC") + * . // (with tertiary difference between "abc" and "ABC") + * . result = myCollation->compare(abc, 3, ABC, 3); + * </pre> + * @param source the source string array to be compared with. + * @param sourceLength the length of the source string array. If this value + * is equal to -1, the string array is null-terminated. + * @param target the string that is to be compared with the source string. + * @param targetLength the length of the target string array. If this value + * is equal to -1, the string array is null-terminated. + * @return Returns a byte value. GREATER if source is greater than target; + * EQUAL if source is equal to target; LESS if source is less than + * target + * @deprecated ICU 2.6 use the overload with UErrorCode & + */ + virtual EComparisonResult compare(const UChar* source, int32_t sourceLength, + const UChar* target, int32_t targetLength) + const; + + /** + * The comparison function compares the character data stored in two + * different string arrays. Returns information about whether a string array + * is less than, greater than or equal to another string array. + * @param source the source string array to be compared with. + * @param sourceLength the length of the source string array. If this value + * is equal to -1, the string array is null-terminated. + * @param target the string that is to be compared with the source string. + * @param targetLength the length of the target string array. If this value + * is equal to -1, the string array is null-terminated. + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source is greater + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less + * than target + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const UChar* source, int32_t sourceLength, + const UChar* target, int32_t targetLength, + UErrorCode &status) const = 0; + + /** + * Compares two strings using the Collator. + * Returns whether the first one compares less than/equal to/greater than + * the second one. + * This version takes UCharIterator input. + * @param sIter the first ("source") string iterator + * @param tIter the second ("target") string iterator + * @param status ICU status + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER + * @stable ICU 4.2 + */ + virtual UCollationResult compare(UCharIterator &sIter, + UCharIterator &tIter, + UErrorCode &status) const; + + /** + * Compares two UTF-8 strings using the Collator. + * Returns whether the first one compares less than/equal to/greater than + * the second one. + * This version takes UTF-8 input. + * Note that a StringPiece can be implicitly constructed + * from a std::string or a NUL-terminated const char * string. + * @param source the first UTF-8 string + * @param target the second UTF-8 string + * @param status ICU status + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER + * @stable ICU 4.2 + */ + virtual UCollationResult compareUTF8(const StringPiece &source, + const StringPiece &target, + UErrorCode &status) const; + + /** + * Transforms the string into a series of characters that can be compared + * with CollationKey::compareTo. It is not possible to restore the original + * string from the chars in the sort key. + * <p>Use CollationKey::equals or CollationKey::compare to compare the + * generated sort keys. + * If the source string is null, a null collation key will be returned. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source the source string to be transformed into a sort key. + * @param key the collation key to be filled in + * @param status the error code status. + * @return the collation key of the string based on the collation rules. + * @see CollationKey#compare + * @stable ICU 2.0 + */ + virtual CollationKey& getCollationKey(const UnicodeString& source, + CollationKey& key, + UErrorCode& status) const = 0; + + /** + * Transforms the string into a series of characters that can be compared + * with CollationKey::compareTo. It is not possible to restore the original + * string from the chars in the sort key. + * <p>Use CollationKey::equals or CollationKey::compare to compare the + * generated sort keys. + * <p>If the source string is null, a null collation key will be returned. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source the source string to be transformed into a sort key. + * @param sourceLength length of the collation key + * @param key the collation key to be filled in + * @param status the error code status. + * @return the collation key of the string based on the collation rules. + * @see CollationKey#compare + * @stable ICU 2.0 + */ + virtual CollationKey& getCollationKey(const UChar*source, + int32_t sourceLength, + CollationKey& key, + UErrorCode& status) const = 0; + /** + * Generates the hash code for the collation object + * @stable ICU 2.0 + */ + virtual int32_t hashCode(void) const = 0; + + /** + * Gets the locale of the Collator + * + * @param type can be either requested, valid or actual locale. For more + * information see the definition of ULocDataLocaleType in + * uloc.h + * @param status the error code status. + * @return locale where the collation data lives. If the collator + * was instantiated from rules, locale is empty. + * @deprecated ICU 2.8 This API is under consideration for revision + * in ICU 3.0. + */ + virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0; + + /** + * Convenience method for comparing two strings based on the collation rules. + * @param source the source string to be compared with. + * @param target the target string to be compared with. + * @return true if the first string is greater than the second one, + * according to the collation rules. false, otherwise. + * @see Collator#compare + * @stable ICU 2.0 + */ + UBool greater(const UnicodeString& source, const UnicodeString& target) + const; + + /** + * Convenience method for comparing two strings based on the collation rules. + * @param source the source string to be compared with. + * @param target the target string to be compared with. + * @return true if the first string is greater than or equal to the second + * one, according to the collation rules. false, otherwise. + * @see Collator#compare + * @stable ICU 2.0 + */ + UBool greaterOrEqual(const UnicodeString& source, + const UnicodeString& target) const; + + /** + * Convenience method for comparing two strings based on the collation rules. + * @param source the source string to be compared with. + * @param target the target string to be compared with. + * @return true if the strings are equal according to the collation rules. + * false, otherwise. + * @see Collator#compare + * @stable ICU 2.0 + */ + UBool equals(const UnicodeString& source, const UnicodeString& target) const; + + /** + * Determines the minimum strength that will be used in comparison or + * transformation. + * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored + * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference + * are ignored. + * @return the current comparison level. + * @see Collator#setStrength + * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead + */ + virtual ECollationStrength getStrength(void) const; + + /** + * Sets the minimum strength to be used in comparison or transformation. + * <p>Example of use: + * <pre> + * \code + * UErrorCode status = U_ZERO_ERROR; + * Collator*myCollation = Collator::createInstance(Locale::getUS(), status); + * if (U_FAILURE(status)) return; + * myCollation->setStrength(Collator::PRIMARY); + * // result will be "abc" == "ABC" + * // tertiary differences will be ignored + * Collator::ComparisonResult result = myCollation->compare("abc", "ABC"); + * \endcode + * </pre> + * @see Collator#getStrength + * @param newStrength the new comparison level. + * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead + */ + virtual void setStrength(ECollationStrength newStrength); + + /** + * Retrieves the reordering codes for this collator. + * @param dest The array to fill with the script ordering. + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function + * will only return the length of the result without writing any codes (pre-flighting). + * @param status A reference to an error code value, which must not indicate + * a failure before the function call. + * @return The length of the script ordering array. + * @see ucol_setReorderCodes + * @see Collator#getEquivalentReorderCodes + * @see Collator#setReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ + virtual int32_t getReorderCodes(int32_t *dest, + int32_t destCapacity, + UErrorCode& status) const; + + /** + * Sets the ordering of scripts for this collator. + * + * <p>The reordering codes are a combination of script codes and reorder codes. + * @param reorderCodes An array of script codes in the new order. This can be NULL if the + * length is also set to 0. An empty array will clear any reordering codes on the collator. + * @param reorderCodesLength The length of reorderCodes. + * @param status error code + * @see ucol_setReorderCodes + * @see Collator#getReorderCodes + * @see Collator#getEquivalentReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ + virtual void setReorderCodes(const int32_t* reorderCodes, + int32_t reorderCodesLength, + UErrorCode& status) ; + + /** + * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder + * codes will be grouped and must reorder together. + * Beginning with ICU 55, scripts only reorder together if they are primary-equal, + * for example Hiragana and Katakana. + * + * @param reorderCode The reorder code to determine equivalence for. + * @param dest The array to fill with the script equivalence reordering codes. + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the + * function will only return the length of the result without writing any codes (pre-flighting). + * @param status A reference to an error code value, which must not indicate + * a failure before the function call. + * @return The length of the of the reordering code equivalence array. + * @see ucol_setReorderCodes + * @see Collator#getReorderCodes + * @see Collator#setReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ + static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode, + int32_t* dest, + int32_t destCapacity, + UErrorCode& status); + + /** + * Get name of the object for the desired Locale, in the desired langauge + * @param objectLocale must be from getAvailableLocales + * @param displayLocale specifies the desired locale for output + * @param name the fill-in parameter of the return value + * @return display-able name of the object for the object locale in the + * desired language + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, + const Locale& displayLocale, + UnicodeString& name); + + /** + * Get name of the object for the desired Locale, in the langauge of the + * default locale. + * @param objectLocale must be from getAvailableLocales + * @param name the fill-in parameter of the return value + * @return name of the object for the desired locale in the default language + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, + UnicodeString& name); + + /** + * Get the set of Locales for which Collations are installed. + * + * <p>Note this does not include locales supported by registered collators. + * If collators might have been registered, use the overload of getAvailableLocales + * that returns a StringEnumeration.</p> + * + * @param count the output parameter of number of elements in the locale list + * @return the list of available locales for which collations are installed + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + /** + * Return a StringEnumeration over the locales available at the time of the call, + * including registered locales. If a severe error occurs (such as out of memory + * condition) this will return null. If there is no locale data, an empty enumeration + * will be returned. + * @return a StringEnumeration over the locales available at the time of the call + * @stable ICU 2.6 + */ + static StringEnumeration* U_EXPORT2 getAvailableLocales(void); + + /** + * Create a string enumerator of all possible keywords that are relevant to + * collation. At this point, the only recognized keyword for this + * service is "collation". + * @param status input-output error code + * @return a string enumeration over locale strings. The caller is + * responsible for closing the result. + * @stable ICU 3.0 + */ + static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status); + + /** + * Given a keyword, create a string enumeration of all values + * for that keyword that are currently in use. + * @param keyword a particular keyword as enumerated by + * ucol_getKeywords. If any other keyword is passed in, status is set + * to U_ILLEGAL_ARGUMENT_ERROR. + * @param status input-output error code + * @return a string enumeration over collation keyword values, or NULL + * upon error. The caller is responsible for deleting the result. + * @stable ICU 3.0 + */ + static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status); + + /** + * Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param keyword one of the keys supported by this service. For now, only + * "collation" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @param status ICU status + * @return a string enumeration over keyword values for the given key and the locale. + * @stable ICU 4.2 + */ + static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale, + UBool commonlyUsed, UErrorCode& status); + + /** + * Return the functionally equivalent locale for the given + * requested locale, with respect to given keyword, for the + * collation service. If two locales return the same result, then + * collators instantiated for these locales will behave + * equivalently. The converse is not always true; two collators + * may in fact be equivalent, but return different results, due to + * internal details. The return result has no other meaning than + * that stated above, and implies nothing as to the relationship + * between the two locales. This is intended for use by + * applications who wish to cache collators, or otherwise reuse + * collators when possible. The functional equivalent may change + * over time. For more information, please see the <a + * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services"> + * Locales and Services</a> section of the ICU User Guide. + * @param keyword a particular keyword as enumerated by + * ucol_getKeywords. + * @param locale the requested locale + * @param isAvailable reference to a fillin parameter that + * indicates whether the requested locale was 'available' to the + * collation service. A locale is defined as 'available' if it + * physically exists within the collation locale data. + * @param status reference to input-output error code + * @return the functionally equivalent collation locale, or the root + * locale upon error. + * @stable ICU 3.0 + */ + static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale, + UBool& isAvailable, UErrorCode& status); + +#if !UCONFIG_NO_SERVICE + /** + * Register a new Collator. The collator will be adopted. + * Because ICU may choose to cache collators internally, this must be + * called at application startup, prior to any calls to + * Collator::createInstance to avoid undefined behavior. + * @param toAdopt the Collator instance to be adopted + * @param locale the locale with which the collator will be associated + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this collator + * @stable ICU 2.6 + */ + static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status); + + /** + * Register a new CollatorFactory. The factory will be adopted. + * Because ICU may choose to cache collators internally, this must be + * called at application startup, prior to any calls to + * Collator::createInstance to avoid undefined behavior. + * @param toAdopt the CollatorFactory instance to be adopted + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this collator + * @stable ICU 2.6 + */ + static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status); + + /** + * Unregister a previously-registered Collator or CollatorFactory + * using the key returned from the register call. Key becomes + * invalid after a successful call and should not be used again. + * The object corresponding to the key will be deleted. + * Because ICU may choose to cache collators internally, this should + * be called during application shutdown, after all calls to + * Collator::createInstance to avoid undefined behavior. + * @param key the registry key returned by a previous call to registerInstance + * @param status the in/out status code, no special meanings are assigned + * @return TRUE if the collator for the key was successfully unregistered + * @stable ICU 2.6 + */ + static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); +#endif /* UCONFIG_NO_SERVICE */ + + /** + * Gets the version information for a Collator. + * @param info the version # information, the result will be filled in + * @stable ICU 2.0 + */ + virtual void getVersion(UVersionInfo info) const = 0; + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual method. + * This method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * @return The class ID for this object. All objects of a given class have + * the same class ID. Objects of other classes have different class + * IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Universal attribute setter + * @param attr attribute type + * @param value attribute value + * @param status to indicate whether the operation went on smoothly or + * there were errors + * @stable ICU 2.2 + */ + virtual void setAttribute(UColAttribute attr, UColAttributeValue value, + UErrorCode &status) = 0; + + /** + * Universal attribute getter + * @param attr attribute type + * @param status to indicate whether the operation went on smoothly or + * there were errors + * @return attribute value + * @stable ICU 2.2 + */ + virtual UColAttributeValue getAttribute(UColAttribute attr, + UErrorCode &status) const = 0; + + /** + * Sets the variable top to the top of the specified reordering group. + * The variable top determines the highest-sorting character + * which is affected by UCOL_ALTERNATE_HANDLING. + * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. + * + * The base class implementation sets U_UNSUPPORTED_ERROR. + * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, + * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; + * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @see getMaxVariable + * @stable ICU 53 + */ + virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode); + + /** + * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. + * + * The base class implementation returns UCOL_REORDER_CODE_PUNCTUATION. + * @return the maximum variable reordering group. + * @see setMaxVariable + * @stable ICU 53 + */ + virtual UColReorderCode getMaxVariable() const; + + /** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop one or more (if contraction) UChars to which the variable top should be set + * @param len length of variable top string. If -1 it is considered to be zero terminated. + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> + * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by setMaxVariable() + * @return variable top primary weight + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0; + + /** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> + * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by setMaxVariable() + * @return variable top primary weight + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0; + + /** + * Sets the variable top to the specified primary weight. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop + * @param status error code + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0; + + /** + * Gets the variable top value of a Collator. + * @param status error code (not changed by function). If error code is set, the return value is undefined. + * @return the variable top primary weight + * @see getMaxVariable + * @stable ICU 2.0 + */ + virtual uint32_t getVariableTop(UErrorCode &status) const = 0; + + /** + * Get a UnicodeSet that contains all the characters and sequences + * tailored in this collator. + * @param status error code of the operation + * @return a pointer to a UnicodeSet object containing all the + * code points and sequences that may sort differently than + * in the root collator. The object must be disposed of by using delete + * @stable ICU 2.4 + */ + virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; + + /** + * Same as clone(). + * The base class implementation simply calls clone(). + * @return a copy of this object, owned by the caller + * @see clone() + * @deprecated ICU 50 no need to have two methods for cloning + */ + virtual Collator* safeClone(void) const; + + /** + * Get the sort key as an array of bytes from a UnicodeString. + * Sort key byte arrays are zero-terminated and can be compared using + * strcmp(). + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source string to be processed. + * @param result buffer to store result in. If NULL, number of bytes needed + * will be returned. + * @param resultLength length of the result buffer. If if not enough the + * buffer will be filled to capacity. + * @return Number of bytes needed for storing the sort key + * @stable ICU 2.2 + */ + virtual int32_t getSortKey(const UnicodeString& source, + uint8_t* result, + int32_t resultLength) const = 0; + + /** + * Get the sort key as an array of bytes from a UChar buffer. + * Sort key byte arrays are zero-terminated and can be compared using + * strcmp(). + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source string to be processed. + * @param sourceLength length of string to be processed. + * If -1, the string is 0 terminated and length will be decided by the + * function. + * @param result buffer to store result in. If NULL, number of bytes needed + * will be returned. + * @param resultLength length of the result buffer. If if not enough the + * buffer will be filled to capacity. + * @return Number of bytes needed for storing the sort key + * @stable ICU 2.2 + */ + virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, + uint8_t*result, int32_t resultLength) const = 0; + + /** + * Produce a bound for a given sortkey and a number of levels. + * Return value is always the number of bytes needed, regardless of + * whether the result buffer was big enough or even valid.<br> + * Resulting bounds can be used to produce a range of strings that are + * between upper and lower bounds. For example, if bounds are produced + * for a sortkey of string "smith", strings between upper and lower + * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> + * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER + * is produced, strings matched would be as above. However, if bound + * produced using UCOL_BOUND_UPPER_LONG is used, the above example will + * also match "Smithsonian" and similar.<br> + * For more on usage, see example in cintltst/capitst.c in procedure + * TestBounds. + * Sort keys may be compared using <TT>strcmp</TT>. + * @param source The source sortkey. + * @param sourceLength The length of source, or -1 if null-terminated. + * (If an unmodified sortkey is passed, it is always null + * terminated). + * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which + * produces a lower inclusive bound, UCOL_BOUND_UPPER, that + * produces upper bound that matches strings of the same length + * or UCOL_BOUND_UPPER_LONG that matches strings that have the + * same starting substring as the source string. + * @param noOfLevels Number of levels required in the resulting bound (for most + * uses, the recommended value is 1). See users guide for + * explanation on number of levels a sortkey can have. + * @param result A pointer to a buffer to receive the resulting sortkey. + * @param resultLength The maximum size of result. + * @param status Used for returning error code if something went wrong. If the + * number of levels requested is higher than the number of levels + * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is + * issued. + * @return The size needed to fully store the bound. + * @see ucol_keyHashCode + * @stable ICU 2.1 + */ + static int32_t U_EXPORT2 getBound(const uint8_t *source, + int32_t sourceLength, + UColBoundMode boundType, + uint32_t noOfLevels, + uint8_t *result, + int32_t resultLength, + UErrorCode &status); + + +protected: + + // Collator protected constructors ------------------------------------- + + /** + * Default constructor. + * Constructor is different from the old default Collator constructor. + * The task for determing the default collation strength and normalization + * mode is left to the child class. + * @stable ICU 2.0 + */ + Collator(); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Constructor. + * Empty constructor, does not handle the arguments. + * This constructor is done for backward compatibility with 1.7 and 1.8. + * The task for handling the argument collation strength and normalization + * mode is left to the child class. + * @param collationStrength collation strength + * @param decompositionMode + * @deprecated ICU 2.4. Subclasses should use the default constructor + * instead and handle the strength and normalization mode themselves. + */ + Collator(UCollationStrength collationStrength, + UNormalizationMode decompositionMode); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Copy constructor. + * @param other Collator object to be copied from + * @stable ICU 2.0 + */ + Collator(const Collator& other); + +public: + /** + * Used internally by registration to define the requested and valid locales. + * @param requestedLocale the requested locale + * @param validLocale the valid locale + * @param actualLocale the actual locale + * @internal + */ + virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); + + /** Get the short definition string for a collator. This internal API harvests the collator's + * locale and the attribute set and produces a string that can be used for opening + * a collator with the same attributes using the ucol_openFromShortString API. + * This string will be normalized. + * The structure and the syntax of the string is defined in the "Naming collators" + * section of the users guide: + * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme + * This function supports preflighting. + * + * This is internal, and intended to be used with delegate converters. + * + * @param locale a locale that will appear as a collators locale in the resulting + * short string definition. If NULL, the locale will be harvested + * from the collator. + * @param buffer space to hold the resulting string + * @param capacity capacity of the buffer + * @param status for returning errors. All the preflighting errors are featured + * @return length of the resulting string + * @see ucol_openFromShortString + * @see ucol_normalizeShortDefinitionString + * @see ucol_getShortDefinitionString + * @internal + */ + virtual int32_t internalGetShortDefinitionString(const char *locale, + char *buffer, + int32_t capacity, + UErrorCode &status) const; + + /** + * Implements ucol_strcollUTF8(). + * @internal + */ + virtual UCollationResult internalCompareUTF8( + const char *left, int32_t leftLength, + const char *right, int32_t rightLength, + UErrorCode &errorCode) const; + + /** + * Implements ucol_nextSortKeyPart(). + * @internal + */ + virtual int32_t + internalNextSortKeyPart( + UCharIterator *iter, uint32_t state[2], + uint8_t *dest, int32_t count, UErrorCode &errorCode) const; + +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + static inline Collator *fromUCollator(UCollator *uc) { + return reinterpret_cast<Collator *>(uc); + } + /** @internal */ + static inline const Collator *fromUCollator(const UCollator *uc) { + return reinterpret_cast<const Collator *>(uc); + } + /** @internal */ + inline UCollator *toUCollator() { + return reinterpret_cast<UCollator *>(this); + } + /** @internal */ + inline const UCollator *toUCollator() const { + return reinterpret_cast<const UCollator *>(this); + } +#endif // U_HIDE_INTERNAL_API + +private: + /** + * Assignment operator. Private for now. + */ + Collator& operator=(const Collator& other); + + friend class CFactory; + friend class SimpleCFactory; + friend class ICUCollatorFactory; + friend class ICUCollatorService; + static Collator* makeInstance(const Locale& desiredLocale, + UErrorCode& status); +}; + +#if !UCONFIG_NO_SERVICE +/** + * A factory, used with registerFactory, the creates multiple collators and provides + * display names for them. A factory supports some number of locales-- these are the + * locales for which it can create collators. The factory can be visible, in which + * case the supported locales will be enumerated by getAvailableLocales, or invisible, + * in which they are not. Invisible locales are still supported, they are just not + * listed by getAvailableLocales. + * <p> + * If standard locale display names are sufficient, Collator instances can + * be registered using registerInstance instead.</p> + * <p> + * Note: if the collators are to be used from C APIs, they must be instances + * of RuleBasedCollator.</p> + * + * @stable ICU 2.6 + */ +class U_I18N_API CollatorFactory : public UObject { +public: + + /** + * Destructor + * @stable ICU 3.0 + */ + virtual ~CollatorFactory(); + + /** + * Return true if this factory is visible. Default is true. + * If not visible, the locales supported by this factory will not + * be listed by getAvailableLocales. + * @return true if the factory is visible. + * @stable ICU 2.6 + */ + virtual UBool visible(void) const; + + /** + * Return a collator for the provided locale. If the locale + * is not supported, return NULL. + * @param loc the locale identifying the collator to be created. + * @return a new collator if the locale is supported, otherwise NULL. + * @stable ICU 2.6 + */ + virtual Collator* createCollator(const Locale& loc) = 0; + + /** + * Return the name of the collator for the objectLocale, localized for the displayLocale. + * If objectLocale is not supported, or the factory is not visible, set the result string + * to bogus. + * @param objectLocale the locale identifying the collator + * @param displayLocale the locale for which the display name of the collator should be localized + * @param result an output parameter for the display name, set to bogus if not supported. + * @return the display name + * @stable ICU 2.6 + */ + virtual UnicodeString& getDisplayName(const Locale& objectLocale, + const Locale& displayLocale, + UnicodeString& result); + + /** + * Return an array of all the locale names directly supported by this factory. + * The number of names is returned in count. This array is owned by the factory. + * Its contents must never change. + * @param count output parameter for the number of locales supported by the factory + * @param status the in/out error code + * @return a pointer to an array of count UnicodeStrings. + * @stable ICU 2.6 + */ + virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0; +}; +#endif /* UCONFIG_NO_SERVICE */ + +// Collator inline methods ----------------------------------------------- + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/compactdecimalformat.h b/intl/icu/source/i18n/unicode/compactdecimalformat.h new file mode 100644 index 000000000..1fcc5c581 --- /dev/null +++ b/intl/icu/source/i18n/unicode/compactdecimalformat.h @@ -0,0 +1,415 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 2012-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File COMPACTDECIMALFORMAT.H +******************************************************************************** +*/ + +#ifndef __COMPACT_DECIMAL_FORMAT_H__ +#define __COMPACT_DECIMAL_FORMAT_H__ + +#include "unicode/utypes.h" +/** + * \file + * \brief C++ API: Formats decimal numbers in compact form. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/decimfmt.h" + +struct UHashtable; + +U_NAMESPACE_BEGIN + +class PluralRules; + +/** + * The CompactDecimalFormat produces abbreviated numbers, suitable for display in + * environments will limited real estate. For example, 'Hits: 1.2B' instead of + * 'Hits: 1,200,000,000'. The format will be appropriate for the given language, + * such as "1,2 Mrd." for German. + * <p> + * For numbers under 1000 trillion (under 10^15, such as 123,456,789,012,345), + * the result will be short for supported languages. However, the result may + * sometimes exceed 7 characters, such as when there are combining marks or thin + * characters. In such cases, the visual width in fonts should still be short. + * <p> + * By default, there are 3 significant digits. After creation, if more than + * three significant digits are set (with setMaximumSignificantDigits), or if a + * fixed number of digits are set (with setMaximumIntegerDigits or + * setMaximumFractionDigits), then result may be wider. + * <p> + * At this time, parsing is not supported, and will produce a U_UNSUPPORTED_ERROR. + * Resetting the pattern prefixes or suffixes is not supported; the method calls + * are ignored. + * <p> + * @stable ICU 51 + */ +class U_I18N_API CompactDecimalFormat : public DecimalFormat { +public: + + /** + * Returns a compact decimal instance for specified locale. + * @param inLocale the given locale. + * @param style whether to use short or long style. + * @param status error code returned here. + * @stable ICU 51 + */ + static CompactDecimalFormat* U_EXPORT2 createInstance( + const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status); + + /** + * Copy constructor. + * + * @param source the DecimalFormat object to be copied from. + * @stable ICU 51 + */ + CompactDecimalFormat(const CompactDecimalFormat& source); + + /** + * Destructor. + * @stable ICU 51 + */ + virtual ~CompactDecimalFormat(); + + /** + * Assignment operator. + * + * @param rhs the DecimalFormat object to be copied. + * @stable ICU 51 + */ + CompactDecimalFormat& operator=(const CompactDecimalFormat& rhs); + + /** + * Clone this Format object polymorphically. The caller owns the + * result and should delete it when done. + * + * @return a polymorphic copy of this CompactDecimalFormat. + * @stable ICU 51 + */ + virtual Format* clone() const; + + /** + * Return TRUE if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * + * @param other the object to be compared with. + * @return TRUE if the given Format objects are semantically equal. + * @stable ICU 51 + */ + virtual UBool operator==(const Format& other) const; + + + using DecimalFormat::format; + + /** + * Format a double or long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 51 + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Format a double or long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const; + + /** + * Format a double or long number using base-10 representation. + * Currently sets status to U_UNSUPPORTED_ERROR. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 56 + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Format a long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const; + + /** + * Format a long number using base-10 representation. + * Currently sets status to U_UNSUPPORTED_ERROR + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format an int64 number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 51 + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Format an int64 number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const; + + /** + * Format an int64 number using base-10 representation. + * Currently sets status to U_UNSUPPORTED_ERROR + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. Currently sets status to U_UNSUPPORTED_ERROR + * The syntax of the unformatted number is a "numeric string" + * as defined in the Decimal Arithmetic Specification, available at + * http://speleotrove.com/decimal + * + * @param number The unformatted number, as a string. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(StringPiece number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. Currently sets status to U_UNSUPPORTED_ERROR + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. Currently sets status to U_UNSUPPORTED_ERROR. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * CompactDecimalFormat does not support parsing. This implementation + * does nothing. + * @param text Unused. + * @param result Does not change. + * @param parsePosition Does not change. + * @see Formattable + * @stable ICU 51 + */ + virtual void parse(const UnicodeString& text, + Formattable& result, + ParsePosition& parsePosition) const; + + /** + * CompactDecimalFormat does not support parsing. This implementation + * sets status to U_UNSUPPORTED_ERROR + * + * @param text Unused. + * @param result Does not change. + * @param status Always set to U_UNSUPPORTED_ERROR. + * @stable ICU 51 + */ + virtual void parse(const UnicodeString& text, + Formattable& result, + UErrorCode& status) const; + + /** + * Parses text from the given string as a currency amount. Unlike + * the parse() method, this method will attempt to parse a generic + * currency name, searching for a match of this object's locale's + * currency display names, or for a 3-letter ISO currency code. + * This method will fail if this format is not a currency format, + * that is, if it does not contain the currency pattern symbol + * (U+00A4) in its prefix or suffix. This implementation always returns + * NULL. + * + * @param text the string to parse + * @param pos input-output position; on input, the position within text + * to match; must have 0 <= pos.getIndex() < text.length(); + * on output, the position after the last matched character. + * If the parse fails, the position in unchanged upon output. + * @return if parse succeeds, a pointer to a newly-created CurrencyAmount + * object (owned by the caller) containing information about + * the parsed currency; if parse fails, this is NULL. + * @internal + */ + virtual CurrencyAmount* parseCurrency(const UnicodeString& text, + ParsePosition& pos) const; + + /** + * Return the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . Derived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 51 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. + * This method is to implement a simple version of RTTI, since not all + * C++ compilers support genuine RTTI. Polymorphic operator==() and + * clone() methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 51 + */ + virtual UClassID getDynamicClassID() const; + +private: + + const UHashtable* _unitsByVariant; + const double* _divisors; + PluralRules* _pluralRules; + + // Default constructor not implemented. + CompactDecimalFormat(const DecimalFormat &, const UHashtable* unitsByVariant, const double* divisors, PluralRules* pluralRules); + + UBool eqHelper(const CompactDecimalFormat& that) const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // __COMPACT_DECIMAL_FORMAT_H__ +//eof diff --git a/intl/icu/source/i18n/unicode/curramt.h b/intl/icu/source/i18n/unicode/curramt.h new file mode 100644 index 000000000..65c5b39a7 --- /dev/null +++ b/intl/icu/source/i18n/unicode/curramt.h @@ -0,0 +1,132 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2004-2006, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: April 26, 2004 +* Since: ICU 3.0 +********************************************************************** +*/ +#ifndef __CURRENCYAMOUNT_H__ +#define __CURRENCYAMOUNT_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/measure.h" +#include "unicode/currunit.h" + +/** + * \file + * \brief C++ API: Currency Amount Object. + */ + +U_NAMESPACE_BEGIN + +/** + * + * A currency together with a numeric amount, such as 200 USD. + * + * @author Alan Liu + * @stable ICU 3.0 + */ +class U_I18N_API CurrencyAmount: public Measure { + public: + /** + * Construct an object with the given numeric amount and the given + * ISO currency code. + * @param amount a numeric object; amount.isNumeric() must be TRUE + * @param isoCode the 3-letter ISO 4217 currency code; must not be + * NULL and must have length 3 + * @param ec input-output error code. If the amount or the isoCode + * is invalid, then this will be set to a failing value. + * @stable ICU 3.0 + */ + CurrencyAmount(const Formattable& amount, const UChar* isoCode, + UErrorCode &ec); + + /** + * Construct an object with the given numeric amount and the given + * ISO currency code. + * @param amount the amount of the given currency + * @param isoCode the 3-letter ISO 4217 currency code; must not be + * NULL and must have length 3 + * @param ec input-output error code. If the isoCode is invalid, + * then this will be set to a failing value. + * @stable ICU 3.0 + */ + CurrencyAmount(double amount, const UChar* isoCode, + UErrorCode &ec); + + /** + * Copy constructor + * @stable ICU 3.0 + */ + CurrencyAmount(const CurrencyAmount& other); + + /** + * Assignment operator + * @stable ICU 3.0 + */ + CurrencyAmount& operator=(const CurrencyAmount& other); + + /** + * Return a polymorphic clone of this object. The result will + * have the same class as returned by getDynamicClassID(). + * @stable ICU 3.0 + */ + virtual UObject* clone() const; + + /** + * Destructor + * @stable ICU 3.0 + */ + virtual ~CurrencyAmount(); + + /** + * Returns a unique class ID for this object POLYMORPHICALLY. + * This method implements a simple form of RTTI used by ICU. + * @return The class ID for this object. All objects of a given + * class have the same class ID. Objects of other classes have + * different class IDs. + * @stable ICU 3.0 + */ + virtual UClassID getDynamicClassID() const; + + /** + * Returns the class ID for this class. This is used to compare to + * the return value of getDynamicClassID(). + * @return The class ID for all objects of this class. + * @stable ICU 3.0 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * Return the currency unit object of this object. + * @stable ICU 3.0 + */ + inline const CurrencyUnit& getCurrency() const; + + /** + * Return the ISO currency code of this object. + * @stable ICU 3.0 + */ + inline const UChar* getISOCurrency() const; +}; + +inline const CurrencyUnit& CurrencyAmount::getCurrency() const { + return (const CurrencyUnit&) getUnit(); +} + +inline const UChar* CurrencyAmount::getISOCurrency() const { + return getCurrency().getISOCurrency(); +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_FORMATTING +#endif // __CURRENCYAMOUNT_H__ diff --git a/intl/icu/source/i18n/unicode/currpinf.h b/intl/icu/source/i18n/unicode/currpinf.h new file mode 100644 index 000000000..acf8b5326 --- /dev/null +++ b/intl/icu/source/i18n/unicode/currpinf.h @@ -0,0 +1,261 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 2009-2015, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +#ifndef CURRPINF_H +#define CURRPINF_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Currency Plural Information used by Decimal Format + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/unistr.h" + +U_NAMESPACE_BEGIN + +class Locale; +class PluralRules; +class Hashtable; + +/** + * This class represents the information needed by + * DecimalFormat to format currency plural, + * such as "3.00 US dollars" or "1.00 US dollar". + * DecimalFormat creates for itself an instance of + * CurrencyPluralInfo from its locale data. + * If you need to change any of these symbols, you can get the + * CurrencyPluralInfo object from your + * DecimalFormat and modify it. + * + * Following are the information needed for currency plural format and parse: + * locale information, + * plural rule of the locale, + * currency plural pattern of the locale. + * + * @stable ICU 4.2 + */ +class U_I18N_API CurrencyPluralInfo : public UObject { +public: + + /** + * Create a CurrencyPluralInfo object for the default locale. + * @param status output param set to success/failure code on exit + * @stable ICU 4.2 + */ + CurrencyPluralInfo(UErrorCode& status); + + /** + * Create a CurrencyPluralInfo object for the given locale. + * @param locale the locale + * @param status output param set to success/failure code on exit + * @stable ICU 4.2 + */ + CurrencyPluralInfo(const Locale& locale, UErrorCode& status); + + /** + * Copy constructor + * + * @stable ICU 4.2 + */ + CurrencyPluralInfo(const CurrencyPluralInfo& info); + + + /** + * Assignment operator + * + * @stable ICU 4.2 + */ + CurrencyPluralInfo& operator=(const CurrencyPluralInfo& info); + + + /** + * Destructor + * + * @stable ICU 4.2 + */ + virtual ~CurrencyPluralInfo(); + + + /** + * Equal operator. + * + * @stable ICU 4.2 + */ + UBool operator==(const CurrencyPluralInfo& info) const; + + + /** + * Not equal operator + * + * @stable ICU 4.2 + */ + UBool operator!=(const CurrencyPluralInfo& info) const; + + + /** + * Clone + * + * @stable ICU 4.2 + */ + CurrencyPluralInfo* clone() const; + + + /** + * Gets plural rules of this locale, used for currency plural format + * + * @return plural rule + * @stable ICU 4.2 + */ + const PluralRules* getPluralRules() const; + + /** + * Given a plural count, gets currency plural pattern of this locale, + * used for currency plural format + * + * @param pluralCount currency plural count + * @param result output param to receive the pattern + * @return a currency plural pattern based on plural count + * @stable ICU 4.2 + */ + UnicodeString& getCurrencyPluralPattern(const UnicodeString& pluralCount, + UnicodeString& result) const; + + /** + * Get locale + * + * @return locale + * @stable ICU 4.2 + */ + const Locale& getLocale() const; + + /** + * Set plural rules. + * The plural rule is set when CurrencyPluralInfo + * instance is created. + * You can call this method to reset plural rules only if you want + * to modify the default plural rule of the locale. + * + * @param ruleDescription new plural rule description + * @param status output param set to success/failure code on exit + * @stable ICU 4.2 + */ + void setPluralRules(const UnicodeString& ruleDescription, + UErrorCode& status); + + /** + * Set currency plural pattern. + * The currency plural pattern is set when CurrencyPluralInfo + * instance is created. + * You can call this method to reset currency plural pattern only if + * you want to modify the default currency plural pattern of the locale. + * + * @param pluralCount the plural count for which the currency pattern will + * be overridden. + * @param pattern the new currency plural pattern + * @param status output param set to success/failure code on exit + * @stable ICU 4.2 + */ + void setCurrencyPluralPattern(const UnicodeString& pluralCount, + const UnicodeString& pattern, + UErrorCode& status); + + /** + * Set locale + * + * @param loc the new locale to set + * @param status output param set to success/failure code on exit + * @stable ICU 4.2 + */ + void setLocale(const Locale& loc, UErrorCode& status); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 4.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 4.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +private: + friend class DecimalFormat; + friend class DecimalFormatImpl; + + void initialize(const Locale& loc, UErrorCode& status); + + void setupCurrencyPluralPattern(const Locale& loc, UErrorCode& status); + + /* + * delete hash table + * + * @param hTable hash table to be deleted + */ + void deleteHash(Hashtable* hTable); + + + /* + * initialize hash table + * + * @param status output param set to success/failure code on exit + * @return hash table initialized + */ + Hashtable* initHash(UErrorCode& status); + + + + /** + * copy hash table + * + * @param source the source to copy from + * @param target the target to copy to + * @param status error code + */ + void copyHash(const Hashtable* source, Hashtable* target, UErrorCode& status); + + //-------------------- private data member --------------------- + // map from plural count to currency plural pattern, for example + // a plural pattern defined in "CurrencyUnitPatterns" is + // "one{{0} {1}}", in which "one" is a plural count + // and "{0} {1}" is a currency plural pattern". + // The currency plural pattern saved in this mapping is the pattern + // defined in "CurrencyUnitPattern" by replacing + // {0} with the number format pattern, + // and {1} with 3 currency sign. + Hashtable* fPluralCountToCurrencyUnitPattern; + + /* + * The plural rule is used to format currency plural name, + * for example: "3.00 US Dollars". + * If there are 3 currency signs in the currency patttern, + * the 3 currency signs will be replaced by currency plural name. + */ + PluralRules* fPluralRules; + + // locale + Locale* fLocale; +}; + + +inline UBool +CurrencyPluralInfo::operator!=(const CurrencyPluralInfo& info) const { return !operator==(info); } + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _CURRPINFO +//eof diff --git a/intl/icu/source/i18n/unicode/currunit.h b/intl/icu/source/i18n/unicode/currunit.h new file mode 100644 index 000000000..9ca4dc591 --- /dev/null +++ b/intl/icu/source/i18n/unicode/currunit.h @@ -0,0 +1,112 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2004-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: April 26, 2004 +* Since: ICU 3.0 +********************************************************************** +*/ +#ifndef __CURRENCYUNIT_H__ +#define __CURRENCYUNIT_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/measunit.h" + +/** + * \file + * \brief C++ API: Currency Unit Information. + */ + +U_NAMESPACE_BEGIN + +/** + * A unit of currency, such as USD (U.S. dollars) or JPY (Japanese + * yen). This class is a thin wrapper over a UChar string that + * subclasses MeasureUnit, for use with Measure and MeasureFormat. + * + * @author Alan Liu + * @stable ICU 3.0 + */ +class U_I18N_API CurrencyUnit: public MeasureUnit { + public: + /** + * Construct an object with the given ISO currency code. + * @param isoCode the 3-letter ISO 4217 currency code; must not be + * NULL and must have length 3 + * @param ec input-output error code. If the isoCode is invalid, + * then this will be set to a failing value. + * @stable ICU 3.0 + */ + CurrencyUnit(const UChar* isoCode, UErrorCode &ec); + + /** + * Copy constructor + * @stable ICU 3.0 + */ + CurrencyUnit(const CurrencyUnit& other); + + /** + * Assignment operator + * @stable ICU 3.0 + */ + CurrencyUnit& operator=(const CurrencyUnit& other); + + /** + * Return a polymorphic clone of this object. The result will + * have the same class as returned by getDynamicClassID(). + * @stable ICU 3.0 + */ + virtual UObject* clone() const; + + /** + * Destructor + * @stable ICU 3.0 + */ + virtual ~CurrencyUnit(); + + /** + * Returns a unique class ID for this object POLYMORPHICALLY. + * This method implements a simple form of RTTI used by ICU. + * @return The class ID for this object. All objects of a given + * class have the same class ID. Objects of other classes have + * different class IDs. + * @stable ICU 3.0 + */ + virtual UClassID getDynamicClassID() const; + + /** + * Returns the class ID for this class. This is used to compare to + * the return value of getDynamicClassID(). + * @return The class ID for all objects of this class. + * @stable ICU 3.0 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * Return the ISO currency code of this object. + * @stable ICU 3.0 + */ + inline const UChar* getISOCurrency() const; + + private: + /** + * The ISO 4217 code of this object. + */ + UChar isoCode[4]; +}; + +inline const UChar* CurrencyUnit::getISOCurrency() const { + return isoCode; +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_FORMATTING +#endif // __CURRENCYUNIT_H__ diff --git a/intl/icu/source/i18n/unicode/datefmt.h b/intl/icu/source/i18n/unicode/datefmt.h new file mode 100644 index 000000000..311412651 --- /dev/null +++ b/intl/icu/source/i18n/unicode/datefmt.h @@ -0,0 +1,952 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************** + * Copyright (C) 1997-2016, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************** + * + * File DATEFMT.H + * + * Modification History: + * + * Date Name Description + * 02/19/97 aliu Converted from java. + * 04/01/97 aliu Added support for centuries. + * 07/23/98 stephen JDK 1.2 sync + * 11/15/99 weiv Added support for week of year/day of week formatting + ******************************************************************************** + */ + +#ifndef DATEFMT_H +#define DATEFMT_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/udat.h" +#include "unicode/calendar.h" +#include "unicode/numfmt.h" +#include "unicode/format.h" +#include "unicode/locid.h" +#include "unicode/enumset.h" +#include "unicode/udisplaycontext.h" + +/** + * \file + * \brief C++ API: Abstract class for converting dates. + */ + +U_NAMESPACE_BEGIN + +class TimeZone; +class DateTimePatternGenerator; + +// explicit template instantiation. see digitlst.h +#if defined (_MSC_VER) +template class U_I18N_API EnumSet<UDateFormatBooleanAttribute, + 0, + UDAT_BOOLEAN_ATTRIBUTE_COUNT>; +#endif + +/** + * DateFormat is an abstract class for a family of classes that convert dates and + * times from their internal representations to textual form and back again in a + * language-independent manner. Converting from the internal representation (milliseconds + * since midnight, January 1, 1970) to text is known as "formatting," and converting + * from text to millis is known as "parsing." We currently define only one concrete + * subclass of DateFormat: SimpleDateFormat, which can handle pretty much all normal + * date formatting and parsing actions. + * <P> + * DateFormat helps you to format and parse dates for any locale. Your code can + * be completely independent of the locale conventions for months, days of the + * week, or even the calendar format: lunar vs. solar. + * <P> + * To format a date for the current Locale, use one of the static factory + * methods: + * <pre> + * \code + * DateFormat* dfmt = DateFormat::createDateInstance(); + * UDate myDate = Calendar::getNow(); + * UnicodeString myString; + * myString = dfmt->format( myDate, myString ); + * \endcode + * </pre> + * If you are formatting multiple numbers, it is more efficient to get the + * format and use it multiple times so that the system doesn't have to fetch the + * information about the local language and country conventions multiple times. + * <pre> + * \code + * DateFormat* df = DateFormat::createDateInstance(); + * UnicodeString myString; + * UDate myDateArr[] = { 0.0, 100000000.0, 2000000000.0 }; // test values + * for (int32_t i = 0; i < 3; ++i) { + * myString.remove(); + * cout << df->format( myDateArr[i], myString ) << endl; + * } + * \endcode + * </pre> + * To get specific fields of a date, you can use UFieldPosition to + * get specific fields. + * <pre> + * \code + * DateFormat* dfmt = DateFormat::createDateInstance(); + * FieldPosition pos(DateFormat::YEAR_FIELD); + * UnicodeString myString; + * myString = dfmt->format( myDate, myString ); + * cout << myString << endl; + * cout << pos.getBeginIndex() << "," << pos. getEndIndex() << endl; + * \endcode + * </pre> + * To format a date for a different Locale, specify it in the call to + * createDateInstance(). + * <pre> + * \code + * DateFormat* df = + * DateFormat::createDateInstance( DateFormat::SHORT, Locale::getFrance()); + * \endcode + * </pre> + * You can use a DateFormat to parse also. + * <pre> + * \code + * UErrorCode status = U_ZERO_ERROR; + * UDate myDate = df->parse(myString, status); + * \endcode + * </pre> + * Use createDateInstance() to produce the normal date format for that country. + * There are other static factory methods available. Use createTimeInstance() + * to produce the normal time format for that country. Use createDateTimeInstance() + * to produce a DateFormat that formats both date and time. You can pass in + * different options to these factory methods to control the length of the + * result; from SHORT to MEDIUM to LONG to FULL. The exact result depends on the + * locale, but generally: + * <ul type=round> + * <li> SHORT is completely numeric, such as 12/13/52 or 3:30pm + * <li> MEDIUM is longer, such as Jan 12, 1952 + * <li> LONG is longer, such as January 12, 1952 or 3:30:32pm + * <li> FULL is pretty completely specified, such as + * Tuesday, April 12, 1952 AD or 3:30:42pm PST. + * </ul> + * You can also set the time zone on the format if you wish. If you want even + * more control over the format or parsing, (or want to give your users more + * control), you can try casting the DateFormat you get from the factory methods + * to a SimpleDateFormat. This will work for the majority of countries; just + * remember to chck getDynamicClassID() before carrying out the cast. + * <P> + * You can also use forms of the parse and format methods with ParsePosition and + * FieldPosition to allow you to + * <ul type=round> + * <li> Progressively parse through pieces of a string. + * <li> Align any particular field, or find out where it is for selection + * on the screen. + * </ul> + * + * <p><em>User subclasses are not supported.</em> While clients may write + * subclasses, such code will not necessarily work and will not be + * guaranteed to work stably from release to release. + */ +class U_I18N_API DateFormat : public Format { +public: + + /** + * Constants for various style patterns. These reflect the order of items in + * the DateTimePatterns resource. There are 4 time patterns, 4 date patterns, + * the default date-time pattern, and 4 date-time patterns. Each block of 4 values + * in the resource occurs in the order full, long, medium, short. + * @stable ICU 2.4 + */ + enum EStyle + { + kNone = -1, + + kFull = 0, + kLong = 1, + kMedium = 2, + kShort = 3, + + kDateOffset = kShort + 1, + // kFull + kDateOffset = 4 + // kLong + kDateOffset = 5 + // kMedium + kDateOffset = 6 + // kShort + kDateOffset = 7 + + kDateTime = 8, + // Default DateTime + + kDateTimeOffset = kDateTime + 1, + // kFull + kDateTimeOffset = 9 + // kLong + kDateTimeOffset = 10 + // kMedium + kDateTimeOffset = 11 + // kShort + kDateTimeOffset = 12 + + // relative dates + kRelative = (1 << 7), + + kFullRelative = (kFull | kRelative), + + kLongRelative = kLong | kRelative, + + kMediumRelative = kMedium | kRelative, + + kShortRelative = kShort | kRelative, + + + kDefault = kMedium, + + + + /** + * These constants are provided for backwards compatibility only. + * Please use the C++ style constants defined above. + */ + FULL = kFull, + LONG = kLong, + MEDIUM = kMedium, + SHORT = kShort, + DEFAULT = kDefault, + DATE_OFFSET = kDateOffset, + NONE = kNone, + DATE_TIME = kDateTime + }; + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~DateFormat(); + + /** + * Equality operator. Returns true if the two formats have the same behavior. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Format&) const; + + + using Format::format; + + /** + * Format an object to produce a string. This method handles Formattable + * objects with a UDate type. If a the Formattable object type is not a Date, + * then it returns a failing UErrorCode. + * + * @param obj The object to format. Must be a Date. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Format an object to produce a string. This method handles Formattable + * objects with a UDate type. If a the Formattable object type is not a Date, + * then it returns a failing UErrorCode. + * + * @param obj The object to format. Must be a Date. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. Field values + * are defined in UDateFormatField. Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + /** + * Formats a date into a date/time string. This is an abstract method which + * concrete subclasses must implement. + * <P> + * On input, the FieldPosition parameter may have its "field" member filled with + * an enum value specifying a field. On output, the FieldPosition will be filled + * in with the text offsets for that field. + * <P> For example, given a time text + * "1996.07.10 AD at 15:08:56 PDT", if the given fieldPosition.field is + * UDAT_YEAR_FIELD, the offsets fieldPosition.beginIndex and + * statfieldPositionus.getEndIndex will be set to 0 and 4, respectively. + * <P> Notice + * that if the same time field appears more than once in a pattern, the status will + * be set for the first occurence of that time field. For instance, + * formatting a UDate to the time string "1 PM PDT (Pacific Daylight Time)" + * using the pattern "h a z (zzzz)" and the alignment field + * DateFormat::TIMEZONE_FIELD, the offsets fieldPosition.beginIndex and + * fieldPosition.getEndIndex will be set to 5 and 8, respectively, for the first + * occurence of the timezone pattern character 'z'. + * + * @param cal Calendar set to the date and time to be formatted + * into a date/time string. When the calendar type is + * different from the internal calendar held by this + * DateFormat instance, the date and the time zone will + * be inherited from the input calendar, but other calendar + * field values will be calculated by the internal calendar. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param fieldPosition On input: an alignment field, if desired (see examples above) + * On output: the offsets of the alignment field (see examples above) + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.1 + */ + virtual UnicodeString& format( Calendar& cal, + UnicodeString& appendTo, + FieldPosition& fieldPosition) const = 0; + + /** + * Formats a date into a date/time string. Subclasses should implement this method. + * + * @param cal Calendar set to the date and time to be formatted + * into a date/time string. When the calendar type is + * different from the internal calendar held by this + * DateFormat instance, the date and the time zone will + * be inherited from the input calendar, but other calendar + * field values will be calculated by the internal calendar. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. Field values + * are defined in UDateFormatField. Can be NULL. + * @param status error status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(Calendar& cal, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + /** + * Formats a UDate into a date/time string. + * <P> + * On input, the FieldPosition parameter may have its "field" member filled with + * an enum value specifying a field. On output, the FieldPosition will be filled + * in with the text offsets for that field. + * <P> For example, given a time text + * "1996.07.10 AD at 15:08:56 PDT", if the given fieldPosition.field is + * UDAT_YEAR_FIELD, the offsets fieldPosition.beginIndex and + * statfieldPositionus.getEndIndex will be set to 0 and 4, respectively. + * <P> Notice + * that if the same time field appears more than once in a pattern, the status will + * be set for the first occurence of that time field. For instance, + * formatting a UDate to the time string "1 PM PDT (Pacific Daylight Time)" + * using the pattern "h a z (zzzz)" and the alignment field + * DateFormat::TIMEZONE_FIELD, the offsets fieldPosition.beginIndex and + * fieldPosition.getEndIndex will be set to 5 and 8, respectively, for the first + * occurence of the timezone pattern character 'z'. + * + * @param date UDate to be formatted into a date/time string. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param fieldPosition On input: an alignment field, if desired (see examples above) + * On output: the offsets of the alignment field (see examples above) + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + UnicodeString& format( UDate date, + UnicodeString& appendTo, + FieldPosition& fieldPosition) const; + + /** + * Formats a UDate into a date/time string. + * + * @param date UDate to be formatted into a date/time string. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. Field values + * are defined in UDateFormatField. Can be NULL. + * @param status error status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + UnicodeString& format(UDate date, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + /** + * Formats a UDate into a date/time string. If there is a problem, you won't + * know, using this method. Use the overloaded format() method which takes a + * FieldPosition& to detect formatting problems. + * + * @param date The UDate value to be formatted into a string. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + UnicodeString& format(UDate date, UnicodeString& appendTo) const; + + /** + * Parse a date/time string. For example, a time text "07/10/96 4:5 PM, PDT" + * will be parsed into a UDate that is equivalent to Date(837039928046). + * Parsing begins at the beginning of the string and proceeds as far as + * possible. Assuming no parse errors were encountered, this function + * doesn't return any information about how much of the string was consumed + * by the parsing. If you need that information, use the version of + * parse() that takes a ParsePosition. + * <P> + * By default, parsing is lenient: If the input is not in the form used by + * this object's format method but can still be parsed as a date, then the + * parse succeeds. Clients may insist on strict adherence to the format by + * calling setLenient(false). + * @see DateFormat::setLenient(boolean) + * <P> + * Note that the normal date formats associated with some calendars - such + * as the Chinese lunar calendar - do not specify enough fields to enable + * dates to be parsed unambiguously. In the case of the Chinese lunar + * calendar, while the year within the current 60-year cycle is specified, + * the number of such cycles since the start date of the calendar (in the + * ERA field of the Calendar object) is not normally part of the format, + * and parsing may assume the wrong era. For cases such as this it is + * recommended that clients parse using the method + * parse(const UnicodeString&, Calendar& cal, ParsePosition&) + * with the Calendar passed in set to the current date, or to a date + * within the era/cycle that should be assumed if absent in the format. + * + * @param text The date/time string to be parsed into a UDate value. + * @param status Output param to be set to success/failure code. If + * 'text' cannot be parsed, it will be set to a failure + * code. + * @return The parsed UDate value, if successful. + * @stable ICU 2.0 + */ + virtual UDate parse( const UnicodeString& text, + UErrorCode& status) const; + + /** + * Parse a date/time string beginning at the given parse position. For + * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date + * that is equivalent to Date(837039928046). + * <P> + * By default, parsing is lenient: If the input is not in the form used by + * this object's format method but can still be parsed as a date, then the + * parse succeeds. Clients may insist on strict adherence to the format by + * calling setLenient(false). + * @see DateFormat::setLenient(boolean) + * + * @param text The date/time string to be parsed. + * @param cal A Calendar set on input to the date and time to be used for + * missing values in the date/time string being parsed, and set + * on output to the parsed date/time. When the calendar type is + * different from the internal calendar held by this DateFormat + * instance, the internal calendar will be cloned to a work + * calendar set to the same milliseconds and time zone as the + * cal parameter, field values will be parsed based on the work + * calendar, then the result (milliseconds and time zone) will + * be set in this calendar. + * @param pos On input, the position at which to start parsing; on + * output, the position at which parsing terminated, or the + * start position if the parse failed. + * @stable ICU 2.1 + */ + virtual void parse( const UnicodeString& text, + Calendar& cal, + ParsePosition& pos) const = 0; + + /** + * Parse a date/time string beginning at the given parse position. For + * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date + * that is equivalent to Date(837039928046). + * <P> + * By default, parsing is lenient: If the input is not in the form used by + * this object's format method but can still be parsed as a date, then the + * parse succeeds. Clients may insist on strict adherence to the format by + * calling setLenient(false). + * @see DateFormat::setLenient(boolean) + * <P> + * Note that the normal date formats associated with some calendars - such + * as the Chinese lunar calendar - do not specify enough fields to enable + * dates to be parsed unambiguously. In the case of the Chinese lunar + * calendar, while the year within the current 60-year cycle is specified, + * the number of such cycles since the start date of the calendar (in the + * ERA field of the Calendar object) is not normally part of the format, + * and parsing may assume the wrong era. For cases such as this it is + * recommended that clients parse using the method + * parse(const UnicodeString&, Calendar& cal, ParsePosition&) + * with the Calendar passed in set to the current date, or to a date + * within the era/cycle that should be assumed if absent in the format. + * + * @param text The date/time string to be parsed into a UDate value. + * @param pos On input, the position at which to start parsing; on + * output, the position at which parsing terminated, or the + * start position if the parse failed. + * @return A valid UDate if the input could be parsed. + * @stable ICU 2.0 + */ + UDate parse( const UnicodeString& text, + ParsePosition& pos) const; + + /** + * Parse a string to produce an object. This methods handles parsing of + * date/time strings into Formattable objects with UDate types. + * <P> + * Before calling, set parse_pos.index to the offset you want to start + * parsing at in the source. After calling, parse_pos.index is the end of + * the text you parsed. If error occurs, index is unchanged. + * <P> + * When parsing, leading whitespace is discarded (with a successful parse), + * while trailing whitespace is left as is. + * <P> + * See Format::parseObject() for more. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Upon return + * this param is set to the position after the + * last character successfully parsed. If the + * source is not parsed successfully, this param + * will remain unchanged. + * @stable ICU 2.0 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& parse_pos) const; + + /** + * Create a default date/time formatter that uses the SHORT style for both + * the date and the time. + * + * @return A date/time formatter which the caller owns. + * @stable ICU 2.0 + */ + static DateFormat* U_EXPORT2 createInstance(void); + + /** + * Creates a time formatter with the given formatting style for the given + * locale. + * + * @param style The given formatting style. For example, + * SHORT for "h:mm a" in the US locale. Relative + * time styles are not currently supported. + * @param aLocale The given locale. + * @return A time formatter which the caller owns. + * @stable ICU 2.0 + */ + static DateFormat* U_EXPORT2 createTimeInstance(EStyle style = kDefault, + const Locale& aLocale = Locale::getDefault()); + + /** + * Creates a date formatter with the given formatting style for the given + * const locale. + * + * @param style The given formatting style. For example, SHORT for "M/d/yy" in the + * US locale. As currently implemented, relative date formatting only + * affects a limited range of calendar days before or after the + * current date, based on the CLDR <field type="day">/<relative> data: + * For example, in English, "Yesterday", "Today", and "Tomorrow". + * Outside of this range, dates are formatted using the corresponding + * non-relative style. + * @param aLocale The given locale. + * @return A date formatter which the caller owns. + * @stable ICU 2.0 + */ + static DateFormat* U_EXPORT2 createDateInstance(EStyle style = kDefault, + const Locale& aLocale = Locale::getDefault()); + + /** + * Creates a date/time formatter with the given formatting styles for the + * given locale. + * + * @param dateStyle The given formatting style for the date portion of the result. + * For example, SHORT for "M/d/yy" in the US locale. As currently + * implemented, relative date formatting only affects a limited range + * of calendar days before or after the current date, based on the + * CLDR <field type="day">/<relative> data: For example, in English, + * "Yesterday", "Today", and "Tomorrow". Outside of this range, dates + * are formatted using the corresponding non-relative style. + * @param timeStyle The given formatting style for the time portion of the result. + * For example, SHORT for "h:mm a" in the US locale. Relative + * time styles are not currently supported. + * @param aLocale The given locale. + * @return A date/time formatter which the caller owns. + * @stable ICU 2.0 + */ + static DateFormat* U_EXPORT2 createDateTimeInstance(EStyle dateStyle = kDefault, + EStyle timeStyle = kDefault, + const Locale& aLocale = Locale::getDefault()); + +#ifndef U_HIDE_INTERNAL_API + /** + * Returns the best pattern given a skeleton and locale. + * @param locale the locale + * @param skeleton the skeleton + * @param status ICU error returned here + * @return the best pattern. + * @internal For ICU use only. + */ + static UnicodeString getBestPattern( + const Locale &locale, + const UnicodeString &skeleton, + UErrorCode &status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Creates a date/time formatter for the given skeleton and + * default locale. + * + * @param skeleton The skeleton e.g "yMMMMd." Fields in the skeleton can + * be in any order, and this method uses the locale to + * map the skeleton to a pattern that includes locale + * specific separators with the fields in the appropriate + * order for that locale. + * @param status Any error returned here. + * @return A date/time formatter which the caller owns. + * @stable ICU 55 + */ + static DateFormat* U_EXPORT2 createInstanceForSkeleton( + const UnicodeString& skeleton, + UErrorCode &status); + + /** + * Creates a date/time formatter for the given skeleton and locale. + * + * @param skeleton The skeleton e.g "yMMMMd." Fields in the skeleton can + * be in any order, and this method uses the locale to + * map the skeleton to a pattern that includes locale + * specific separators with the fields in the appropriate + * order for that locale. + * @param locale The given locale. + * @param status Any error returned here. + * @return A date/time formatter which the caller owns. + * @stable ICU 55 + */ + static DateFormat* U_EXPORT2 createInstanceForSkeleton( + const UnicodeString& skeleton, + const Locale &locale, + UErrorCode &status); + + /** + * Creates a date/time formatter for the given skeleton and locale. + * + * @param calendarToAdopt the calendar returned DateFormat is to use. + * @param skeleton The skeleton e.g "yMMMMd." Fields in the skeleton can + * be in any order, and this method uses the locale to + * map the skeleton to a pattern that includes locale + * specific separators with the fields in the appropriate + * order for that locale. + * @param locale The given locale. + * @param status Any error returned here. + * @return A date/time formatter which the caller owns. + * @stable ICU 55 + */ + static DateFormat* U_EXPORT2 createInstanceForSkeleton( + Calendar *calendarToAdopt, + const UnicodeString& skeleton, + const Locale &locale, + UErrorCode &status); + + + /** + * Gets the set of locales for which DateFormats are installed. + * @param count Filled in with the number of locales in the list that is returned. + * @return the set of locales for which DateFormats are installed. The caller + * does NOT own this list and must not delete it. + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + /** + * Returns whether both date/time parsing in the encapsulated Calendar object and DateFormat whitespace & + * numeric processing is lenient. + * @stable ICU 2.0 + */ + virtual UBool isLenient(void) const; + + /** + * Specifies whether date/time parsing is to be lenient. With + * lenient parsing, the parser may use heuristics to interpret inputs that + * do not precisely match this object's format. Without lenient parsing, + * inputs must match this object's format more closely. + * + * Note: ICU 53 introduced finer grained control of leniency (and added + * new control points) making the preferred method a combination of + * setCalendarLenient() & setBooleanAttribute() calls. + * This method supports prior functionality but may not support all + * future leniency control & behavior of DateFormat. For control of pre 53 leniency, + * Calendar and DateFormat whitespace & numeric tolerance, this method is safe to + * use. However, mixing leniency control via this method and modification of the + * newer attributes via setBooleanAttribute() may produce undesirable + * results. + * + * @param lenient True specifies date/time interpretation to be lenient. + * @see Calendar::setLenient + * @stable ICU 2.0 + */ + virtual void setLenient(UBool lenient); + + + /** + * Returns whether date/time parsing in the encapsulated Calendar object processing is lenient. + * @stable ICU 53 + */ + virtual UBool isCalendarLenient(void) const; + + + /** + * Specifies whether encapsulated Calendar date/time parsing is to be lenient. With + * lenient parsing, the parser may use heuristics to interpret inputs that + * do not precisely match this object's format. Without lenient parsing, + * inputs must match this object's format more closely. + * @param lenient when true, parsing is lenient + * @see com.ibm.icu.util.Calendar#setLenient + * @stable ICU 53 + */ + virtual void setCalendarLenient(UBool lenient); + + + /** + * Gets the calendar associated with this date/time formatter. + * The calendar is owned by the formatter and must not be modified. + * Also, the calendar does not reflect the results of a parse operation. + * To parse to a calendar, use {@link #parse(const UnicodeString&, Calendar& cal, ParsePosition&) const parse(const UnicodeString&, Calendar& cal, ParsePosition&)} + * @return the calendar associated with this date/time formatter. + * @stable ICU 2.0 + */ + virtual const Calendar* getCalendar(void) const; + + /** + * Set the calendar to be used by this date format. Initially, the default + * calendar for the specified or default locale is used. The caller should + * not delete the Calendar object after it is adopted by this call. + * Adopting a new calendar will change to the default symbols. + * + * @param calendarToAdopt Calendar object to be adopted. + * @stable ICU 2.0 + */ + virtual void adoptCalendar(Calendar* calendarToAdopt); + + /** + * Set the calendar to be used by this date format. Initially, the default + * calendar for the specified or default locale is used. + * + * @param newCalendar Calendar object to be set. + * @stable ICU 2.0 + */ + virtual void setCalendar(const Calendar& newCalendar); + + + /** + * Gets the number formatter which this date/time formatter uses to format + * and parse the numeric portions of the pattern. + * @return the number formatter which this date/time formatter uses. + * @stable ICU 2.0 + */ + virtual const NumberFormat* getNumberFormat(void) const; + + /** + * Allows you to set the number formatter. The caller should + * not delete the NumberFormat object after it is adopted by this call. + * @param formatToAdopt NumberFormat object to be adopted. + * @stable ICU 2.0 + */ + virtual void adoptNumberFormat(NumberFormat* formatToAdopt); + + /** + * Allows you to set the number formatter. + * @param newNumberFormat NumberFormat object to be set. + * @stable ICU 2.0 + */ + virtual void setNumberFormat(const NumberFormat& newNumberFormat); + + /** + * Returns a reference to the TimeZone used by this DateFormat's calendar. + * @return the time zone associated with the calendar of DateFormat. + * @stable ICU 2.0 + */ + virtual const TimeZone& getTimeZone(void) const; + + /** + * Sets the time zone for the calendar of this DateFormat object. The caller + * no longer owns the TimeZone object and should not delete it after this call. + * @param zoneToAdopt the TimeZone to be adopted. + * @stable ICU 2.0 + */ + virtual void adoptTimeZone(TimeZone* zoneToAdopt); + + /** + * Sets the time zone for the calendar of this DateFormat object. + * @param zone the new time zone. + * @stable ICU 2.0 + */ + virtual void setTimeZone(const TimeZone& zone); + + /** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. + * @param value The UDisplayContext value to set. + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @stable ICU 53 + */ + virtual void setContext(UDisplayContext value, UErrorCode& status); + + /** + * Get the formatter's UDisplayContext value for the specified UDisplayContextType, + * such as UDISPCTX_TYPE_CAPITALIZATION. + * @param type The UDisplayContextType whose value to return + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @return The UDisplayContextValue for the specified type. + * @stable ICU 53 + */ + virtual UDisplayContext getContext(UDisplayContextType type, UErrorCode& status) const; + + /** + * Sets an boolean attribute on this DateFormat. + * May return U_UNSUPPORTED_ERROR if this instance does not support + * the specified attribute. + * @param attr the attribute to set + * @param newvalue new value + * @param status the error type + * @return *this - for chaining (example: format.setAttribute(...).setAttribute(...) ) + * @stable ICU 53 + */ + + virtual DateFormat& U_EXPORT2 setBooleanAttribute(UDateFormatBooleanAttribute attr, + UBool newvalue, + UErrorCode &status); + + /** + * Returns a boolean from this DateFormat + * May return U_UNSUPPORTED_ERROR if this instance does not support + * the specified attribute. + * @param attr the attribute to set + * @param status the error type + * @return the attribute value. Undefined if there is an error. + * @stable ICU 53 + */ + virtual UBool U_EXPORT2 getBooleanAttribute(UDateFormatBooleanAttribute attr, UErrorCode &status) const; + +protected: + /** + * Default constructor. Creates a DateFormat with no Calendar or NumberFormat + * associated with it. This constructor depends on the subclasses to fill in + * the calendar and numberFormat fields. + * @stable ICU 2.0 + */ + DateFormat(); + + /** + * Copy constructor. + * @stable ICU 2.0 + */ + DateFormat(const DateFormat&); + + /** + * Default assignment operator. + * @stable ICU 2.0 + */ + DateFormat& operator=(const DateFormat&); + + /** + * The calendar that DateFormat uses to produce the time field values needed + * to implement date/time formatting. Subclasses should generally initialize + * this to the default calendar for the locale associated with this DateFormat. + * @stable ICU 2.4 + */ + Calendar* fCalendar; + + /** + * The number formatter that DateFormat uses to format numbers in dates and + * times. Subclasses should generally initialize this to the default number + * format for the locale associated with this DateFormat. + * @stable ICU 2.4 + */ + NumberFormat* fNumberFormat; + + +private: + + /** + * Gets the date/time formatter with the given formatting styles for the + * given locale. + * @param dateStyle the given date formatting style. + * @param timeStyle the given time formatting style. + * @param inLocale the given locale. + * @return a date/time formatter, or 0 on failure. + */ + static DateFormat* U_EXPORT2 create(EStyle timeStyle, EStyle dateStyle, const Locale& inLocale); + + + /** + * enum set of active boolean attributes for this instance + */ + EnumSet<UDateFormatBooleanAttribute, 0, UDAT_BOOLEAN_ATTRIBUTE_COUNT> fBoolFlags; + + + UDisplayContext fCapitalizationContext; + friend class DateFmtKeyByStyle; + +public: +#ifndef U_HIDE_OBSOLETE_API + /** + * Field selector for FieldPosition for DateFormat fields. + * @obsolete ICU 3.4 use UDateFormatField instead, since this API will be + * removed in that release + */ + enum EField + { + // Obsolete; use UDateFormatField instead + kEraField = UDAT_ERA_FIELD, + kYearField = UDAT_YEAR_FIELD, + kMonthField = UDAT_MONTH_FIELD, + kDateField = UDAT_DATE_FIELD, + kHourOfDay1Field = UDAT_HOUR_OF_DAY1_FIELD, + kHourOfDay0Field = UDAT_HOUR_OF_DAY0_FIELD, + kMinuteField = UDAT_MINUTE_FIELD, + kSecondField = UDAT_SECOND_FIELD, + kMillisecondField = UDAT_FRACTIONAL_SECOND_FIELD, + kDayOfWeekField = UDAT_DAY_OF_WEEK_FIELD, + kDayOfYearField = UDAT_DAY_OF_YEAR_FIELD, + kDayOfWeekInMonthField = UDAT_DAY_OF_WEEK_IN_MONTH_FIELD, + kWeekOfYearField = UDAT_WEEK_OF_YEAR_FIELD, + kWeekOfMonthField = UDAT_WEEK_OF_MONTH_FIELD, + kAmPmField = UDAT_AM_PM_FIELD, + kHour1Field = UDAT_HOUR1_FIELD, + kHour0Field = UDAT_HOUR0_FIELD, + kTimezoneField = UDAT_TIMEZONE_FIELD, + kYearWOYField = UDAT_YEAR_WOY_FIELD, + kDOWLocalField = UDAT_DOW_LOCAL_FIELD, + kExtendedYearField = UDAT_EXTENDED_YEAR_FIELD, + kJulianDayField = UDAT_JULIAN_DAY_FIELD, + kMillisecondsInDayField = UDAT_MILLISECONDS_IN_DAY_FIELD, + + // Obsolete; use UDateFormatField instead + ERA_FIELD = UDAT_ERA_FIELD, + YEAR_FIELD = UDAT_YEAR_FIELD, + MONTH_FIELD = UDAT_MONTH_FIELD, + DATE_FIELD = UDAT_DATE_FIELD, + HOUR_OF_DAY1_FIELD = UDAT_HOUR_OF_DAY1_FIELD, + HOUR_OF_DAY0_FIELD = UDAT_HOUR_OF_DAY0_FIELD, + MINUTE_FIELD = UDAT_MINUTE_FIELD, + SECOND_FIELD = UDAT_SECOND_FIELD, + MILLISECOND_FIELD = UDAT_FRACTIONAL_SECOND_FIELD, + DAY_OF_WEEK_FIELD = UDAT_DAY_OF_WEEK_FIELD, + DAY_OF_YEAR_FIELD = UDAT_DAY_OF_YEAR_FIELD, + DAY_OF_WEEK_IN_MONTH_FIELD = UDAT_DAY_OF_WEEK_IN_MONTH_FIELD, + WEEK_OF_YEAR_FIELD = UDAT_WEEK_OF_YEAR_FIELD, + WEEK_OF_MONTH_FIELD = UDAT_WEEK_OF_MONTH_FIELD, + AM_PM_FIELD = UDAT_AM_PM_FIELD, + HOUR1_FIELD = UDAT_HOUR1_FIELD, + HOUR0_FIELD = UDAT_HOUR0_FIELD, + TIMEZONE_FIELD = UDAT_TIMEZONE_FIELD + }; +#endif /* U_HIDE_OBSOLETE_API */ +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _DATEFMT +//eof diff --git a/intl/icu/source/i18n/unicode/dcfmtsym.h b/intl/icu/source/i18n/unicode/dcfmtsym.h new file mode 100644 index 000000000..946227add --- /dev/null +++ b/intl/icu/source/i18n/unicode/dcfmtsym.h @@ -0,0 +1,505 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File DCFMTSYM.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 03/18/97 clhuang Updated per C++ implementation. +* 03/27/97 helena Updated to pass the simple test after code review. +* 08/26/97 aliu Added currency/intl currency symbol support. +* 07/22/98 stephen Changed to match C++ style +* currencySymbol -> fCurrencySymbol +* Constants changed from CAPS to kCaps +* 06/24/99 helena Integrated Alan's NF enhancements and Java2 bug fixes +* 09/22/00 grhoten Marked deprecation tags with a pointer to replacement +* functions. +******************************************************************************** +*/ + +#ifndef DCFMTSYM_H +#define DCFMTSYM_H + +#include "unicode/utypes.h" +#include "unicode/uchar.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" +#include "unicode/locid.h" +#include "unicode/unum.h" + +/** + * \file + * \brief C++ API: Symbols for formatting numbers. + */ + + +U_NAMESPACE_BEGIN + +/** + * This class represents the set of symbols needed by DecimalFormat + * to format numbers. DecimalFormat creates for itself an instance of + * DecimalFormatSymbols from its locale data. If you need to change any + * of these symbols, you can get the DecimalFormatSymbols object from + * your DecimalFormat and modify it. + * <P> + * Here are the special characters used in the parts of the + * subpattern, with notes on their usage. + * <pre> + * \code + * Symbol Meaning + * 0 a digit + * # a digit, zero shows as absent + * . placeholder for decimal separator + * , placeholder for grouping separator. + * ; separates formats. + * - default negative prefix. + * % divide by 100 and show as percentage + * X any other characters can be used in the prefix or suffix + * ' used to quote special characters in a prefix or suffix. + * \endcode + * </pre> + * [Notes] + * <P> + * If there is no explicit negative subpattern, - is prefixed to the + * positive form. That is, "0.00" alone is equivalent to "0.00;-0.00". + * <P> + * The grouping separator is commonly used for thousands, but in some + * countries for ten-thousands. The interval is a constant number of + * digits between the grouping characters, such as 100,000,000 or 1,0000,0000. + * If you supply a pattern with multiple grouping characters, the interval + * between the last one and the end of the integer is the one that is + * used. So "#,##,###,####" == "######,####" == "##,####,####". + * <P> + * This class only handles localized digits where the 10 digits are + * contiguous in Unicode, from 0 to 9. Other digits sets (such as + * superscripts) would need a different subclass. + */ +class U_I18N_API DecimalFormatSymbols : public UObject { +public: + /** + * Constants for specifying a number format symbol. + * @stable ICU 2.0 + */ + enum ENumberFormatSymbol { + /** The decimal separator */ + kDecimalSeparatorSymbol, + /** The grouping separator */ + kGroupingSeparatorSymbol, + /** The pattern separator */ + kPatternSeparatorSymbol, + /** The percent sign */ + kPercentSymbol, + /** Zero*/ + kZeroDigitSymbol, + /** Character representing a digit in the pattern */ + kDigitSymbol, + /** The minus sign */ + kMinusSignSymbol, + /** The plus sign */ + kPlusSignSymbol, + /** The currency symbol */ + kCurrencySymbol, + /** The international currency symbol */ + kIntlCurrencySymbol, + /** The monetary separator */ + kMonetarySeparatorSymbol, + /** The exponential symbol */ + kExponentialSymbol, + /** Per mill symbol - replaces kPermillSymbol */ + kPerMillSymbol, + /** Escape padding character */ + kPadEscapeSymbol, + /** Infinity symbol */ + kInfinitySymbol, + /** Nan symbol */ + kNaNSymbol, + /** Significant digit symbol + * @stable ICU 3.0 */ + kSignificantDigitSymbol, + /** The monetary grouping separator + * @stable ICU 3.6 + */ + kMonetaryGroupingSeparatorSymbol, + /** One + * @stable ICU 4.6 + */ + kOneDigitSymbol, + /** Two + * @stable ICU 4.6 + */ + kTwoDigitSymbol, + /** Three + * @stable ICU 4.6 + */ + kThreeDigitSymbol, + /** Four + * @stable ICU 4.6 + */ + kFourDigitSymbol, + /** Five + * @stable ICU 4.6 + */ + kFiveDigitSymbol, + /** Six + * @stable ICU 4.6 + */ + kSixDigitSymbol, + /** Seven + * @stable ICU 4.6 + */ + kSevenDigitSymbol, + /** Eight + * @stable ICU 4.6 + */ + kEightDigitSymbol, + /** Nine + * @stable ICU 4.6 + */ + kNineDigitSymbol, + /** Multiplication sign. + * @stable ICU 54 + */ + kExponentMultiplicationSymbol, + /** count symbol constants */ + kFormatSymbolCount = kNineDigitSymbol + 2 + }; + + /** + * Create a DecimalFormatSymbols object for the given locale. + * + * @param locale The locale to get symbols for. + * @param status Input/output parameter, set to success or + * failure code upon return. + * @stable ICU 2.0 + */ + DecimalFormatSymbols(const Locale& locale, UErrorCode& status); + + /** + * Create a DecimalFormatSymbols object for the default locale. + * This constructor will not fail. If the resource file data is + * not available, it will use hard-coded last-resort data and + * set status to U_USING_FALLBACK_ERROR. + * + * @param status Input/output parameter, set to success or + * failure code upon return. + * @stable ICU 2.0 + */ + DecimalFormatSymbols(UErrorCode& status); + + /** + * Creates a DecimalFormatSymbols object with last-resort data. + * Intended for callers who cache the symbols data and + * set all symbols on the resulting object. + * + * The last-resort symbols are similar to those for the root data, + * except that the grouping separators are empty, + * the NaN symbol is U+FFFD rather than "NaN", + * and the CurrencySpacing patterns are empty. + * + * @param status Input/output parameter, set to success or + * failure code upon return. + * @return last-resort symbols + * @stable ICU 52 + */ + static DecimalFormatSymbols* createWithLastResortData(UErrorCode& status); + + /** + * Copy constructor. + * @stable ICU 2.0 + */ + DecimalFormatSymbols(const DecimalFormatSymbols&); + + /** + * Assignment operator. + * @stable ICU 2.0 + */ + DecimalFormatSymbols& operator=(const DecimalFormatSymbols&); + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~DecimalFormatSymbols(); + + /** + * Return true if another object is semantically equal to this one. + * + * @param other the object to be compared with. + * @return true if another object is semantically equal to this one. + * @stable ICU 2.0 + */ + UBool operator==(const DecimalFormatSymbols& other) const; + + /** + * Return true if another object is semantically unequal to this one. + * + * @param other the object to be compared with. + * @return true if another object is semantically unequal to this one. + * @stable ICU 2.0 + */ + UBool operator!=(const DecimalFormatSymbols& other) const { return !operator==(other); } + + /** + * Get one of the format symbols by its enum constant. + * Each symbol is stored as a string so that graphemes + * (characters with modifier letters) can be used. + * + * @param symbol Constant to indicate a number format symbol. + * @return the format symbols by the param 'symbol' + * @stable ICU 2.0 + */ + inline UnicodeString getSymbol(ENumberFormatSymbol symbol) const; + + /** + * Set one of the format symbols by its enum constant. + * Each symbol is stored as a string so that graphemes + * (characters with modifier letters) can be used. + * + * @param symbol Constant to indicate a number format symbol. + * @param value value of the format symbol + * @param propogateDigits If false, setting the zero digit will not automatically set 1-9. + * The default behavior is to automatically set 1-9 if zero is being set and the value + * it is being set to corresponds to a known Unicode zero digit. + * @stable ICU 2.0 + */ + void setSymbol(ENumberFormatSymbol symbol, const UnicodeString &value, const UBool propogateDigits); + + /** + * Returns the locale for which this object was constructed. + * @stable ICU 2.6 + */ + inline Locale getLocale() const; + + /** + * Returns the locale for this object. Two flavors are available: + * valid and actual locale. + * @stable ICU 2.8 + */ + Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + + /** + * Get pattern string for 'CurrencySpacing' that can be applied to + * currency format. + * This API gets the CurrencySpacing data from ResourceBundle. The pattern can + * be empty if there is no data from current locale and its parent locales. + * + * @param type : UNUM_CURRENCY_MATCH, UNUM_CURRENCY_SURROUNDING_MATCH or UNUM_CURRENCY_INSERT. + * @param beforeCurrency : true if the pattern is for before currency symbol. + * false if the pattern is for after currency symbol. + * @param status: Input/output parameter, set to success or + * failure code upon return. + * @return pattern string for currencyMatch, surroundingMatch or spaceInsert. + * Return empty string if there is no data for this locale and its parent + * locales. + * @stable ICU 4.8 + */ + const UnicodeString& getPatternForCurrencySpacing(UCurrencySpacing type, + UBool beforeCurrency, + UErrorCode& status) const; + /** + * Set pattern string for 'CurrencySpacing' that can be applied to + * currency format. + * + * @param type : UNUM_CURRENCY_MATCH, UNUM_CURRENCY_SURROUNDING_MATCH or UNUM_CURRENCY_INSERT. + * @param beforeCurrency : true if the pattern is for before currency symbol. + * false if the pattern is for after currency symbol. + * @param pattern : pattern string to override current setting. + * @stable ICU 4.8 + */ + void setPatternForCurrencySpacing(UCurrencySpacing type, + UBool beforeCurrency, + const UnicodeString& pattern); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +private: + DecimalFormatSymbols(); + + /** + * Initializes the symbols from the LocaleElements resource bundle. + * Note: The organization of LocaleElements badly needs to be + * cleaned up. + * + * @param locale The locale to get symbols for. + * @param success Input/output parameter, set to success or + * failure code upon return. + * @param useLastResortData determine if use last resort data + */ + void initialize(const Locale& locale, UErrorCode& success, UBool useLastResortData = FALSE); + + /** + * Initialize the symbols with default values. + */ + void initialize(); + + void setCurrencyForSymbols(); + +public: + +#ifndef U_HIDE_INTERNAL_API + /** + * @internal For ICU use only + */ + inline UBool isCustomCurrencySymbol() const { + return fIsCustomCurrencySymbol; + } + + /** + * @internal For ICU use only + */ + inline UBool isCustomIntlCurrencySymbol() const { + return fIsCustomIntlCurrencySymbol; + } +#endif /* U_HIDE_INTERNAL_API */ + + /** + * _Internal_ function - more efficient version of getSymbol, + * returning a const reference to one of the symbol strings. + * The returned reference becomes invalid when the symbol is changed + * or when the DecimalFormatSymbols are destroyed. + * ### TODO markus 2002oct11: Consider proposing getConstSymbol() to be really public. + * Note: moved #ifndef U_HIDE_INTERNAL_API after this, since this is needed for inline in DecimalFormat + * + * @param symbol Constant to indicate a number format symbol. + * @return the format symbol by the param 'symbol' + * @internal + */ + inline const UnicodeString &getConstSymbol(ENumberFormatSymbol symbol) const; + +#ifndef U_HIDE_INTERNAL_API + /** + * Returns that pattern stored in currecy info. Internal API for use by NumberFormat API. + * @internal + */ + inline const UChar* getCurrencyPattern(void) const; +#endif /* U_HIDE_INTERNAL_API */ + +private: + /** + * Private symbol strings. + * They are either loaded from a resource bundle or otherwise owned. + * setSymbol() clones the symbol string. + * Readonly aliases can only come from a resource bundle, so that we can always + * use fastCopyFrom() with them. + * + * If DecimalFormatSymbols becomes subclassable and the status of fSymbols changes + * from private to protected, + * or when fSymbols can be set any other way that allows them to be readonly aliases + * to non-resource bundle strings, + * then regular UnicodeString copies must be used instead of fastCopyFrom(). + * + * @internal + */ + UnicodeString fSymbols[kFormatSymbolCount]; + + /** + * Non-symbol variable for getConstSymbol(). Always empty. + * @internal + */ + UnicodeString fNoSymbol; + + Locale locale; + + char actualLocale[ULOC_FULLNAME_CAPACITY]; + char validLocale[ULOC_FULLNAME_CAPACITY]; + const UChar* currPattern; + + UnicodeString currencySpcBeforeSym[UNUM_CURRENCY_SPACING_COUNT]; + UnicodeString currencySpcAfterSym[UNUM_CURRENCY_SPACING_COUNT]; + UBool fIsCustomCurrencySymbol; + UBool fIsCustomIntlCurrencySymbol; +}; + +// ------------------------------------- + +inline UnicodeString +DecimalFormatSymbols::getSymbol(ENumberFormatSymbol symbol) const { + const UnicodeString *strPtr; + if(symbol < kFormatSymbolCount) { + strPtr = &fSymbols[symbol]; + } else { + strPtr = &fNoSymbol; + } + return *strPtr; +} + +// See comments above for this function. Not hidden with #ifndef U_HIDE_INTERNAL_API +inline const UnicodeString & +DecimalFormatSymbols::getConstSymbol(ENumberFormatSymbol symbol) const { + const UnicodeString *strPtr; + if(symbol < kFormatSymbolCount) { + strPtr = &fSymbols[symbol]; + } else { + strPtr = &fNoSymbol; + } + return *strPtr; +} + +// ------------------------------------- + +inline void +DecimalFormatSymbols::setSymbol(ENumberFormatSymbol symbol, const UnicodeString &value, const UBool propogateDigits = TRUE) { + if (symbol == kCurrencySymbol) { + fIsCustomCurrencySymbol = TRUE; + } + else if (symbol == kIntlCurrencySymbol) { + fIsCustomIntlCurrencySymbol = TRUE; + } + if(symbol<kFormatSymbolCount) { + fSymbols[symbol]=value; + } + + // If the zero digit is being set to a known zero digit according to Unicode, + // then we automatically set the corresponding 1-9 digits + if ( propogateDigits && symbol == kZeroDigitSymbol && value.countChar32() == 1 ) { + UChar32 sym = value.char32At(0); + if ( u_charDigitValue(sym) == 0 ) { + for ( int8_t i = 1 ; i<= 9 ; i++ ) { + sym++; + fSymbols[(int)kOneDigitSymbol+i-1] = UnicodeString(sym); + } + } + } +} + +// ------------------------------------- + +inline Locale +DecimalFormatSymbols::getLocale() const { + return locale; +} + +#ifndef U_HIDE_INTERNAL_API +inline const UChar* +DecimalFormatSymbols::getCurrencyPattern() const { + return currPattern; +} +#endif /* U_HIDE_INTERNAL_API */ + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _DCFMTSYM +//eof diff --git a/intl/icu/source/i18n/unicode/decimfmt.h b/intl/icu/source/i18n/unicode/decimfmt.h new file mode 100644 index 000000000..7339399f7 --- /dev/null +++ b/intl/icu/source/i18n/unicode/decimfmt.h @@ -0,0 +1,2304 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File DECIMFMT.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 03/20/97 clhuang Updated per C++ implementation. +* 04/03/97 aliu Rewrote parsing and formatting completely, and +* cleaned up and debugged. Actually works now. +* 04/17/97 aliu Changed DigitCount to int per code review. +* 07/10/97 helena Made ParsePosition a class and get rid of the function +* hiding problems. +* 09/09/97 aliu Ported over support for exponential formats. +* 07/20/98 stephen Changed documentation +* 01/30/13 emmons Added Scaling methods +******************************************************************************** +*/ + +#ifndef DECIMFMT_H +#define DECIMFMT_H + +#include "unicode/utypes.h" +/** + * \file + * \brief C++ API: Formats decimal numbers. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/dcfmtsym.h" +#include "unicode/numfmt.h" +#include "unicode/locid.h" +#include "unicode/fpositer.h" +#include "unicode/stringpiece.h" +#include "unicode/curramt.h" +#include "unicode/enumset.h" + +#ifndef U_HIDE_INTERNAL_API +/** + * \def UNUM_DECIMALFORMAT_INTERNAL_SIZE + * @internal + */ +#if UCONFIG_FORMAT_FASTPATHS_49 +#define UNUM_DECIMALFORMAT_INTERNAL_SIZE 16 +#endif +#endif /* U_HIDE_INTERNAL_API */ + +U_NAMESPACE_BEGIN + +class DigitList; +class CurrencyPluralInfo; +class Hashtable; +class UnicodeSet; +class FieldPositionHandler; +class DecimalFormatStaticSets; +class FixedDecimal; +class DecimalFormatImpl; +class PluralRules; +class VisibleDigitsWithExponent; + +// explicit template instantiation. see digitlst.h +#if defined (_MSC_VER) +template class U_I18N_API EnumSet<UNumberFormatAttribute, + UNUM_MAX_NONBOOLEAN_ATTRIBUTE+1, + UNUM_LIMIT_BOOLEAN_ATTRIBUTE>; +#endif + +/** + * DecimalFormat is a concrete subclass of NumberFormat that formats decimal + * numbers. It has a variety of features designed to make it possible to parse + * and format numbers in any locale, including support for Western, Arabic, or + * Indic digits. It also supports different flavors of numbers, including + * integers ("123"), fixed-point numbers ("123.4"), scientific notation + * ("1.23E4"), percentages ("12%"), and currency amounts ("$123", "USD123", + * "123 US dollars"). All of these flavors can be easily localized. + * + * <p>To obtain a NumberFormat for a specific locale (including the default + * locale) call one of NumberFormat's factory methods such as + * createInstance(). Do not call the DecimalFormat constructors directly, unless + * you know what you are doing, since the NumberFormat factory methods may + * return subclasses other than DecimalFormat. + * + * <p><strong>Example Usage</strong> + * + * \code + * // Normally we would have a GUI with a menu for this + * int32_t locCount; + * const Locale* locales = NumberFormat::getAvailableLocales(locCount); + * + * double myNumber = -1234.56; + * UErrorCode success = U_ZERO_ERROR; + * NumberFormat* form; + * + * // Print out a number with the localized number, currency and percent + * // format for each locale. + * UnicodeString countryName; + * UnicodeString displayName; + * UnicodeString str; + * UnicodeString pattern; + * Formattable fmtable; + * for (int32_t j = 0; j < 3; ++j) { + * cout << endl << "FORMAT " << j << endl; + * for (int32_t i = 0; i < locCount; ++i) { + * if (locales[i].getCountry(countryName).size() == 0) { + * // skip language-only + * continue; + * } + * switch (j) { + * case 0: + * form = NumberFormat::createInstance(locales[i], success ); break; + * case 1: + * form = NumberFormat::createCurrencyInstance(locales[i], success ); break; + * default: + * form = NumberFormat::createPercentInstance(locales[i], success ); break; + * } + * if (form) { + * str.remove(); + * pattern = ((DecimalFormat*)form)->toPattern(pattern); + * cout << locales[i].getDisplayName(displayName) << ": " << pattern; + * cout << " -> " << form->format(myNumber,str) << endl; + * form->parse(form->format(myNumber,str), fmtable, success); + * delete form; + * } + * } + * } + * \endcode + * <P> + * Another example use createInstance(style) + * <P> + * <pre> + * <strong>// Print out a number using the localized number, currency, + * // percent, scientific, integer, iso currency, and plural currency + * // format for each locale</strong> + * Locale* locale = new Locale("en", "US"); + * double myNumber = 1234.56; + * UErrorCode success = U_ZERO_ERROR; + * UnicodeString str; + * Formattable fmtable; + * for (int j=NumberFormat::kNumberStyle; + * j<=NumberFormat::kPluralCurrencyStyle; + * ++j) { + * NumberFormat* format = NumberFormat::createInstance(locale, j, success); + * str.remove(); + * cout << "format result " << form->format(myNumber, str) << endl; + * format->parse(form->format(myNumber, str), fmtable, success); + * }</pre> + * + * + * <p><strong>Patterns</strong> + * + * <p>A DecimalFormat consists of a <em>pattern</em> and a set of + * <em>symbols</em>. The pattern may be set directly using + * applyPattern(), or indirectly using other API methods which + * manipulate aspects of the pattern, such as the minimum number of integer + * digits. The symbols are stored in a DecimalFormatSymbols + * object. When using the NumberFormat factory methods, the + * pattern and symbols are read from ICU's locale data. + * + * <p><strong>Special Pattern Characters</strong> + * + * <p>Many characters in a pattern are taken literally; they are matched during + * parsing and output unchanged during formatting. Special characters, on the + * other hand, stand for other characters, strings, or classes of characters. + * For example, the '#' character is replaced by a localized digit. Often the + * replacement character is the same as the pattern character; in the U.S. locale, + * the ',' grouping character is replaced by ','. However, the replacement is + * still happening, and if the symbols are modified, the grouping character + * changes. Some special characters affect the behavior of the formatter by + * their presence; for example, if the percent character is seen, then the + * value is multiplied by 100 before being displayed. + * + * <p>To insert a special character in a pattern as a literal, that is, without + * any special meaning, the character must be quoted. There are some exceptions to + * this which are noted below. + * + * <p>The characters listed here are used in non-localized patterns. Localized + * patterns use the corresponding characters taken from this formatter's + * DecimalFormatSymbols object instead, and these characters lose + * their special status. Two exceptions are the currency sign and quote, which + * are not localized. + * + * <table border=0 cellspacing=3 cellpadding=0> + * <tr bgcolor="#ccccff"> + * <td align=left><strong>Symbol</strong> + * <td align=left><strong>Location</strong> + * <td align=left><strong>Localized?</strong> + * <td align=left><strong>Meaning</strong> + * <tr valign=top> + * <td><code>0</code> + * <td>Number + * <td>Yes + * <td>Digit + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>1-9</code> + * <td>Number + * <td>Yes + * <td>'1' through '9' indicate rounding. + * <tr valign=top> + * <td><code>\htmlonly@\endhtmlonly</code> <!--doxygen doesn't like @--> + * <td>Number + * <td>No + * <td>Significant digit + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>#</code> + * <td>Number + * <td>Yes + * <td>Digit, zero shows as absent + * <tr valign=top> + * <td><code>.</code> + * <td>Number + * <td>Yes + * <td>Decimal separator or monetary decimal separator + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>-</code> + * <td>Number + * <td>Yes + * <td>Minus sign + * <tr valign=top> + * <td><code>,</code> + * <td>Number + * <td>Yes + * <td>Grouping separator + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>E</code> + * <td>Number + * <td>Yes + * <td>Separates mantissa and exponent in scientific notation. + * <em>Need not be quoted in prefix or suffix.</em> + * <tr valign=top> + * <td><code>+</code> + * <td>Exponent + * <td>Yes + * <td>Prefix positive exponents with localized plus sign. + * <em>Need not be quoted in prefix or suffix.</em> + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>;</code> + * <td>Subpattern boundary + * <td>Yes + * <td>Separates positive and negative subpatterns + * <tr valign=top> + * <td><code>\%</code> + * <td>Prefix or suffix + * <td>Yes + * <td>Multiply by 100 and show as percentage + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>\\u2030</code> + * <td>Prefix or suffix + * <td>Yes + * <td>Multiply by 1000 and show as per mille + * <tr valign=top> + * <td><code>\htmlonly¤\endhtmlonly</code> (<code>\\u00A4</code>) + * <td>Prefix or suffix + * <td>No + * <td>Currency sign, replaced by currency symbol. If + * doubled, replaced by international currency symbol. + * If tripled, replaced by currency plural names, for example, + * "US dollar" or "US dollars" for America. + * If present in a pattern, the monetary decimal separator + * is used instead of the decimal separator. + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>'</code> + * <td>Prefix or suffix + * <td>No + * <td>Used to quote special characters in a prefix or suffix, + * for example, <code>"'#'#"</code> formats 123 to + * <code>"#123"</code>. To create a single quote + * itself, use two in a row: <code>"# o''clock"</code>. + * <tr valign=top> + * <td><code>*</code> + * <td>Prefix or suffix boundary + * <td>Yes + * <td>Pad escape, precedes pad character + * </table> + * + * <p>A DecimalFormat pattern contains a postive and negative + * subpattern, for example, "#,##0.00;(#,##0.00)". Each subpattern has a + * prefix, a numeric part, and a suffix. If there is no explicit negative + * subpattern, the negative subpattern is the localized minus sign prefixed to the + * positive subpattern. That is, "0.00" alone is equivalent to "0.00;-0.00". If there + * is an explicit negative subpattern, it serves only to specify the negative + * prefix and suffix; the number of digits, minimal digits, and other + * characteristics are ignored in the negative subpattern. That means that + * "#,##0.0#;(#)" has precisely the same result as "#,##0.0#;(#,##0.0#)". + * + * <p>The prefixes, suffixes, and various symbols used for infinity, digits, + * thousands separators, decimal separators, etc. may be set to arbitrary + * values, and they will appear properly during formatting. However, care must + * be taken that the symbols and strings do not conflict, or parsing will be + * unreliable. For example, either the positive and negative prefixes or the + * suffixes must be distinct for parse() to be able + * to distinguish positive from negative values. Another example is that the + * decimal separator and thousands separator should be distinct characters, or + * parsing will be impossible. + * + * <p>The <em>grouping separator</em> is a character that separates clusters of + * integer digits to make large numbers more legible. It commonly used for + * thousands, but in some locales it separates ten-thousands. The <em>grouping + * size</em> is the number of digits between the grouping separators, such as 3 + * for "100,000,000" or 4 for "1 0000 0000". There are actually two different + * grouping sizes: One used for the least significant integer digits, the + * <em>primary grouping size</em>, and one used for all others, the + * <em>secondary grouping size</em>. In most locales these are the same, but + * sometimes they are different. For example, if the primary grouping interval + * is 3, and the secondary is 2, then this corresponds to the pattern + * "#,##,##0", and the number 123456789 is formatted as "12,34,56,789". If a + * pattern contains multiple grouping separators, the interval between the last + * one and the end of the integer defines the primary grouping size, and the + * interval between the last two defines the secondary grouping size. All others + * are ignored, so "#,##,###,####" == "###,###,####" == "##,#,###,####". + * + * <p>Illegal patterns, such as "#.#.#" or "#.###,###", will cause + * DecimalFormat to set a failing UErrorCode. + * + * <p><strong>Pattern BNF</strong> + * + * <pre> + * pattern := subpattern (';' subpattern)? + * subpattern := prefix? number exponent? suffix? + * number := (integer ('.' fraction)?) | sigDigits + * prefix := '\\u0000'..'\\uFFFD' - specialCharacters + * suffix := '\\u0000'..'\\uFFFD' - specialCharacters + * integer := '#'* '0'* '0' + * fraction := '0'* '#'* + * sigDigits := '#'* '@' '@'* '#'* + * exponent := 'E' '+'? '0'* '0' + * padSpec := '*' padChar + * padChar := '\\u0000'..'\\uFFFD' - quote + * + * Notation: + * X* 0 or more instances of X + * X? 0 or 1 instances of X + * X|Y either X or Y + * C..D any character from C up to D, inclusive + * S-T characters in S, except those in T + * </pre> + * The first subpattern is for positive numbers. The second (optional) + * subpattern is for negative numbers. + * + * <p>Not indicated in the BNF syntax above: + * + * <ul><li>The grouping separator ',' can occur inside the integer and + * sigDigits elements, between any two pattern characters of that + * element, as long as the integer or sigDigits element is not + * followed by the exponent element. + * + * <li>Two grouping intervals are recognized: That between the + * decimal point and the first grouping symbol, and that + * between the first and second grouping symbols. These + * intervals are identical in most locales, but in some + * locales they differ. For example, the pattern + * "#,##,###" formats the number 123456789 as + * "12,34,56,789".</li> + * + * <li>The pad specifier <code>padSpec</code> may appear before the prefix, + * after the prefix, before the suffix, after the suffix, or not at all. + * + * <li>In place of '0', the digits '1' through '9' may be used to + * indicate a rounding increment. + * </ul> + * + * <p><strong>Parsing</strong> + * + * <p>DecimalFormat parses all Unicode characters that represent + * decimal digits, as defined by u_charDigitValue(). In addition, + * DecimalFormat also recognizes as digits the ten consecutive + * characters starting with the localized zero digit defined in the + * DecimalFormatSymbols object. During formatting, the + * DecimalFormatSymbols-based digits are output. + * + * <p>During parsing, grouping separators are ignored if in lenient mode; + * otherwise, if present, they must be in appropriate positions. + * + * <p>For currency parsing, the formatter is able to parse every currency + * style formats no matter which style the formatter is constructed with. + * For example, a formatter instance gotten from + * NumberFormat.getInstance(ULocale, NumberFormat.CURRENCYSTYLE) can parse + * formats such as "USD1.00" and "3.00 US dollars". + * + * <p>If parse(UnicodeString&,Formattable&,ParsePosition&) + * fails to parse a string, it leaves the parse position unchanged. + * The convenience method parse(UnicodeString&,Formattable&,UErrorCode&) + * indicates parse failure by setting a failing + * UErrorCode. + * + * <p><strong>Formatting</strong> + * + * <p>Formatting is guided by several parameters, all of which can be + * specified either using a pattern or using the API. The following + * description applies to formats that do not use <a href="#sci">scientific + * notation</a> or <a href="#sigdig">significant digits</a>. + * + * <ul><li>If the number of actual integer digits exceeds the + * <em>maximum integer digits</em>, then only the least significant + * digits are shown. For example, 1997 is formatted as "97" if the + * maximum integer digits is set to 2. + * + * <li>If the number of actual integer digits is less than the + * <em>minimum integer digits</em>, then leading zeros are added. For + * example, 1997 is formatted as "01997" if the minimum integer digits + * is set to 5. + * + * <li>If the number of actual fraction digits exceeds the <em>maximum + * fraction digits</em>, then rounding is performed to the + * maximum fraction digits. For example, 0.125 is formatted as "0.12" + * if the maximum fraction digits is 2. This behavior can be changed + * by specifying a rounding increment and/or a rounding mode. + * + * <li>If the number of actual fraction digits is less than the + * <em>minimum fraction digits</em>, then trailing zeros are added. + * For example, 0.125 is formatted as "0.1250" if the mimimum fraction + * digits is set to 4. + * + * <li>Trailing fractional zeros are not displayed if they occur + * <em>j</em> positions after the decimal, where <em>j</em> is less + * than the maximum fraction digits. For example, 0.10004 is + * formatted as "0.1" if the maximum fraction digits is four or less. + * </ul> + * + * <p><strong>Special Values</strong> + * + * <p><code>NaN</code> is represented as a single character, typically + * <code>\\uFFFD</code>. This character is determined by the + * DecimalFormatSymbols object. This is the only value for which + * the prefixes and suffixes are not used. + * + * <p>Infinity is represented as a single character, typically + * <code>\\u221E</code>, with the positive or negative prefixes and suffixes + * applied. The infinity character is determined by the + * DecimalFormatSymbols object. + * + * <a name="sci"><strong>Scientific Notation</strong></a> + * + * <p>Numbers in scientific notation are expressed as the product of a mantissa + * and a power of ten, for example, 1234 can be expressed as 1.234 x 10<sup>3</sup>. The + * mantissa is typically in the half-open interval [1.0, 10.0) or sometimes [0.0, 1.0), + * but it need not be. DecimalFormat supports arbitrary mantissas. + * DecimalFormat can be instructed to use scientific + * notation through the API or through the pattern. In a pattern, the exponent + * character immediately followed by one or more digit characters indicates + * scientific notation. Example: "0.###E0" formats the number 1234 as + * "1.234E3". + * + * <ul> + * <li>The number of digit characters after the exponent character gives the + * minimum exponent digit count. There is no maximum. Negative exponents are + * formatted using the localized minus sign, <em>not</em> the prefix and suffix + * from the pattern. This allows patterns such as "0.###E0 m/s". To prefix + * positive exponents with a localized plus sign, specify '+' between the + * exponent and the digits: "0.###E+0" will produce formats "1E+1", "1E+0", + * "1E-1", etc. (In localized patterns, use the localized plus sign rather than + * '+'.) + * + * <li>The minimum number of integer digits is achieved by adjusting the + * exponent. Example: 0.00123 formatted with "00.###E0" yields "12.3E-4". This + * only happens if there is no maximum number of integer digits. If there is a + * maximum, then the minimum number of integer digits is fixed at one. + * + * <li>The maximum number of integer digits, if present, specifies the exponent + * grouping. The most common use of this is to generate <em>engineering + * notation</em>, in which the exponent is a multiple of three, e.g., + * "##0.###E0". The number 12345 is formatted using "##0.####E0" as "12.345E3". + * + * <li>When using scientific notation, the formatter controls the + * digit counts using significant digits logic. The maximum number of + * significant digits limits the total number of integer and fraction + * digits that will be shown in the mantissa; it does not affect + * parsing. For example, 12345 formatted with "##0.##E0" is "12.3E3". + * See the section on significant digits for more details. + * + * <li>The number of significant digits shown is determined as + * follows: If areSignificantDigitsUsed() returns false, then the + * minimum number of significant digits shown is one, and the maximum + * number of significant digits shown is the sum of the <em>minimum + * integer</em> and <em>maximum fraction</em> digits, and is + * unaffected by the maximum integer digits. If this sum is zero, + * then all significant digits are shown. If + * areSignificantDigitsUsed() returns true, then the significant digit + * counts are specified by getMinimumSignificantDigits() and + * getMaximumSignificantDigits(). In this case, the number of + * integer digits is fixed at one, and there is no exponent grouping. + * + * <li>Exponential patterns may not contain grouping separators. + * </ul> + * + * <a name="sigdig"><strong>Significant Digits</strong></a> + * + * <code>DecimalFormat</code> has two ways of controlling how many + * digits are shows: (a) significant digits counts, or (b) integer and + * fraction digit counts. Integer and fraction digit counts are + * described above. When a formatter is using significant digits + * counts, the number of integer and fraction digits is not specified + * directly, and the formatter settings for these counts are ignored. + * Instead, the formatter uses however many integer and fraction + * digits are required to display the specified number of significant + * digits. Examples: + * + * <table border=0 cellspacing=3 cellpadding=0> + * <tr bgcolor="#ccccff"> + * <td align=left>Pattern + * <td align=left>Minimum significant digits + * <td align=left>Maximum significant digits + * <td align=left>Number + * <td align=left>Output of format() + * <tr valign=top> + * <td><code>\@\@\@</code> + * <td>3 + * <td>3 + * <td>12345 + * <td><code>12300</code> + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>\@\@\@</code> + * <td>3 + * <td>3 + * <td>0.12345 + * <td><code>0.123</code> + * <tr valign=top> + * <td><code>\@\@##</code> + * <td>2 + * <td>4 + * <td>3.14159 + * <td><code>3.142</code> + * <tr valign=top bgcolor="#eeeeff"> + * <td><code>\@\@##</code> + * <td>2 + * <td>4 + * <td>1.23004 + * <td><code>1.23</code> + * </table> + * + * <ul> + * <li>Significant digit counts may be expressed using patterns that + * specify a minimum and maximum number of significant digits. These + * are indicated by the <code>'@'</code> and <code>'#'</code> + * characters. The minimum number of significant digits is the number + * of <code>'@'</code> characters. The maximum number of significant + * digits is the number of <code>'@'</code> characters plus the number + * of <code>'#'</code> characters following on the right. For + * example, the pattern <code>"@@@"</code> indicates exactly 3 + * significant digits. The pattern <code>"@##"</code> indicates from + * 1 to 3 significant digits. Trailing zero digits to the right of + * the decimal separator are suppressed after the minimum number of + * significant digits have been shown. For example, the pattern + * <code>"@##"</code> formats the number 0.1203 as + * <code>"0.12"</code>. + * + * <li>If a pattern uses significant digits, it may not contain a + * decimal separator, nor the <code>'0'</code> pattern character. + * Patterns such as <code>"@00"</code> or <code>"@.###"</code> are + * disallowed. + * + * <li>Any number of <code>'#'</code> characters may be prepended to + * the left of the leftmost <code>'@'</code> character. These have no + * effect on the minimum and maximum significant digits counts, but + * may be used to position grouping separators. For example, + * <code>"#,#@#"</code> indicates a minimum of one significant digits, + * a maximum of two significant digits, and a grouping size of three. + * + * <li>In order to enable significant digits formatting, use a pattern + * containing the <code>'@'</code> pattern character. Alternatively, + * call setSignificantDigitsUsed(TRUE). + * + * <li>In order to disable significant digits formatting, use a + * pattern that does not contain the <code>'@'</code> pattern + * character. Alternatively, call setSignificantDigitsUsed(FALSE). + * + * <li>The number of significant digits has no effect on parsing. + * + * <li>Significant digits may be used together with exponential notation. Such + * patterns are equivalent to a normal exponential pattern with a minimum and + * maximum integer digit count of one, a minimum fraction digit count of + * <code>getMinimumSignificantDigits() - 1</code>, and a maximum fraction digit + * count of <code>getMaximumSignificantDigits() - 1</code>. For example, the + * pattern <code>"@@###E0"</code> is equivalent to <code>"0.0###E0"</code>. + * + * <li>If signficant digits are in use, then the integer and fraction + * digit counts, as set via the API, are ignored. If significant + * digits are not in use, then the signficant digit counts, as set via + * the API, are ignored. + * + * </ul> + * + * <p><strong>Padding</strong> + * + * <p>DecimalFormat supports padding the result of + * format() to a specific width. Padding may be specified either + * through the API or through the pattern syntax. In a pattern the pad escape + * character, followed by a single pad character, causes padding to be parsed + * and formatted. The pad escape character is '*' in unlocalized patterns, and + * can be localized using DecimalFormatSymbols::setSymbol() with a + * DecimalFormatSymbols::kPadEscapeSymbol + * selector. For example, <code>"$*x#,##0.00"</code> formats 123 to + * <code>"$xx123.00"</code>, and 1234 to <code>"$1,234.00"</code>. + * + * <ul> + * <li>When padding is in effect, the width of the positive subpattern, + * including prefix and suffix, determines the format width. For example, in + * the pattern <code>"* #0 o''clock"</code>, the format width is 10. + * + * <li>The width is counted in 16-bit code units (UChars). + * + * <li>Some parameters which usually do not matter have meaning when padding is + * used, because the pattern width is significant with padding. In the pattern + * "* ##,##,#,##0.##", the format width is 14. The initial characters "##,##," + * do not affect the grouping size or maximum integer digits, but they do affect + * the format width. + * + * <li>Padding may be inserted at one of four locations: before the prefix, + * after the prefix, before the suffix, or after the suffix. If padding is + * specified in any other location, applyPattern() + * sets a failing UErrorCode. If there is no prefix, + * before the prefix and after the prefix are equivalent, likewise for the + * suffix. + * + * <li>When specified in a pattern, the 32-bit code point immediately + * following the pad escape is the pad character. This may be any character, + * including a special pattern character. That is, the pad escape + * <em>escapes</em> the following character. If there is no character after + * the pad escape, then the pattern is illegal. + * + * </ul> + * + * <p><strong>Rounding</strong> + * + * <p>DecimalFormat supports rounding to a specific increment. For + * example, 1230 rounded to the nearest 50 is 1250. 1.234 rounded to the + * nearest 0.65 is 1.3. The rounding increment may be specified through the API + * or in a pattern. To specify a rounding increment in a pattern, include the + * increment in the pattern itself. "#,#50" specifies a rounding increment of + * 50. "#,##0.05" specifies a rounding increment of 0.05. + * + * <p>In the absense of an explicit rounding increment numbers are + * rounded to their formatted width. + * + * <ul> + * <li>Rounding only affects the string produced by formatting. It does + * not affect parsing or change any numerical values. + * + * <li>A <em>rounding mode</em> determines how values are rounded; see + * DecimalFormat::ERoundingMode. The default rounding mode is + * DecimalFormat::kRoundHalfEven. The rounding mode can only be set + * through the API; it can not be set with a pattern. + * + * <li>Some locales use rounding in their currency formats to reflect the + * smallest currency denomination. + * + * <li>In a pattern, digits '1' through '9' specify rounding, but otherwise + * behave identically to digit '0'. + * </ul> + * + * <p><strong>Synchronization</strong> + * + * <p>DecimalFormat objects are not synchronized. Multiple + * threads should not access one formatter concurrently. + * + * <p><strong>Subclassing</strong> + * + * <p><em>User subclasses are not supported.</em> While clients may write + * subclasses, such code will not necessarily work and will not be + * guaranteed to work stably from release to release. + */ +class U_I18N_API DecimalFormat: public NumberFormat { +public: + /** + * Rounding mode. + * @stable ICU 2.4 + */ + enum ERoundingMode { + kRoundCeiling, /**< Round towards positive infinity */ + kRoundFloor, /**< Round towards negative infinity */ + kRoundDown, /**< Round towards zero */ + kRoundUp, /**< Round away from zero */ + kRoundHalfEven, /**< Round towards the nearest integer, or + towards the nearest even integer if equidistant */ + kRoundHalfDown, /**< Round towards the nearest integer, or + towards zero if equidistant */ + kRoundHalfUp, /**< Round towards the nearest integer, or + away from zero if equidistant */ + /** + * Return U_FORMAT_INEXACT_ERROR if number does not format exactly. + * @stable ICU 4.8 + */ + kRoundUnnecessary + }; + + /** + * Pad position. + * @stable ICU 2.4 + */ + enum EPadPosition { + kPadBeforePrefix, + kPadAfterPrefix, + kPadBeforeSuffix, + kPadAfterSuffix + }; + + /** + * Create a DecimalFormat using the default pattern and symbols + * for the default locale. This is a convenient way to obtain a + * DecimalFormat when internationalization is not the main concern. + * <P> + * To obtain standard formats for a given locale, use the factory methods + * on NumberFormat such as createInstance. These factories will + * return the most appropriate sub-class of NumberFormat for a given + * locale. + * @param status Output param set to success/failure code. If the + * pattern is invalid this will be set to a failure code. + * @stable ICU 2.0 + */ + DecimalFormat(UErrorCode& status); + + /** + * Create a DecimalFormat from the given pattern and the symbols + * for the default locale. This is a convenient way to obtain a + * DecimalFormat when internationalization is not the main concern. + * <P> + * To obtain standard formats for a given locale, use the factory methods + * on NumberFormat such as createInstance. These factories will + * return the most appropriate sub-class of NumberFormat for a given + * locale. + * @param pattern A non-localized pattern string. + * @param status Output param set to success/failure code. If the + * pattern is invalid this will be set to a failure code. + * @stable ICU 2.0 + */ + DecimalFormat(const UnicodeString& pattern, + UErrorCode& status); + + /** + * Create a DecimalFormat from the given pattern and symbols. + * Use this constructor when you need to completely customize the + * behavior of the format. + * <P> + * To obtain standard formats for a given + * locale, use the factory methods on NumberFormat such as + * createInstance or createCurrencyInstance. If you need only minor adjustments + * to a standard format, you can modify the format returned by + * a NumberFormat factory method. + * + * @param pattern a non-localized pattern string + * @param symbolsToAdopt the set of symbols to be used. The caller should not + * delete this object after making this call. + * @param status Output param set to success/failure code. If the + * pattern is invalid this will be set to a failure code. + * @stable ICU 2.0 + */ + DecimalFormat( const UnicodeString& pattern, + DecimalFormatSymbols* symbolsToAdopt, + UErrorCode& status); + +#ifndef U_HIDE_INTERNAL_API + /** + * This API is for ICU use only. + * Create a DecimalFormat from the given pattern, symbols, and style. + * + * @param pattern a non-localized pattern string + * @param symbolsToAdopt the set of symbols to be used. The caller should not + * delete this object after making this call. + * @param style style of decimal format + * @param status Output param set to success/failure code. If the + * pattern is invalid this will be set to a failure code. + * @internal + */ + DecimalFormat( const UnicodeString& pattern, + DecimalFormatSymbols* symbolsToAdopt, + UNumberFormatStyle style, + UErrorCode& status); + +#if UCONFIG_HAVE_PARSEALLINPUT + /** + * @internal + */ + void setParseAllInput(UNumberFormatAttributeValue value); +#endif + +#endif /* U_HIDE_INTERNAL_API */ + + + /** + * Set an integer attribute on this DecimalFormat. + * May return U_UNSUPPORTED_ERROR if this instance does not support + * the specified attribute. + * @param attr the attribute to set + * @param newvalue new value + * @param status the error type + * @return *this - for chaining (example: format.setAttribute(...).setAttribute(...) ) + * @stable ICU 51 + */ + virtual DecimalFormat& setAttribute( UNumberFormatAttribute attr, + int32_t newvalue, + UErrorCode &status); + + /** + * Get an integer + * May return U_UNSUPPORTED_ERROR if this instance does not support + * the specified attribute. + * @param attr the attribute to set + * @param status the error type + * @return the attribute value. Undefined if there is an error. + * @stable ICU 51 + */ + virtual int32_t getAttribute( UNumberFormatAttribute attr, + UErrorCode &status) const; + + + /** + * Set whether or not grouping will be used in this format. + * @param newValue True, grouping will be used in this format. + * @see getGroupingUsed + * @stable ICU 53 + */ + virtual void setGroupingUsed(UBool newValue); + + /** + * Sets whether or not numbers should be parsed as integers only. + * @param value set True, this format will parse numbers as integers + * only. + * @see isParseIntegerOnly + * @stable ICU 53 + */ + virtual void setParseIntegerOnly(UBool value); + + /** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. + * @param value The UDisplayContext value to set. + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @stable ICU 53 + */ + virtual void setContext(UDisplayContext value, UErrorCode& status); + + /** + * Create a DecimalFormat from the given pattern and symbols. + * Use this constructor when you need to completely customize the + * behavior of the format. + * <P> + * To obtain standard formats for a given + * locale, use the factory methods on NumberFormat such as + * createInstance or createCurrencyInstance. If you need only minor adjustments + * to a standard format, you can modify the format returned by + * a NumberFormat factory method. + * + * @param pattern a non-localized pattern string + * @param symbolsToAdopt the set of symbols to be used. The caller should not + * delete this object after making this call. + * @param parseError Output param to receive errors occured during parsing + * @param status Output param set to success/failure code. If the + * pattern is invalid this will be set to a failure code. + * @stable ICU 2.0 + */ + DecimalFormat( const UnicodeString& pattern, + DecimalFormatSymbols* symbolsToAdopt, + UParseError& parseError, + UErrorCode& status); + /** + * Create a DecimalFormat from the given pattern and symbols. + * Use this constructor when you need to completely customize the + * behavior of the format. + * <P> + * To obtain standard formats for a given + * locale, use the factory methods on NumberFormat such as + * createInstance or createCurrencyInstance. If you need only minor adjustments + * to a standard format, you can modify the format returned by + * a NumberFormat factory method. + * + * @param pattern a non-localized pattern string + * @param symbols the set of symbols to be used + * @param status Output param set to success/failure code. If the + * pattern is invalid this will be set to a failure code. + * @stable ICU 2.0 + */ + DecimalFormat( const UnicodeString& pattern, + const DecimalFormatSymbols& symbols, + UErrorCode& status); + + /** + * Copy constructor. + * + * @param source the DecimalFormat object to be copied from. + * @stable ICU 2.0 + */ + DecimalFormat(const DecimalFormat& source); + + /** + * Assignment operator. + * + * @param rhs the DecimalFormat object to be copied. + * @stable ICU 2.0 + */ + DecimalFormat& operator=(const DecimalFormat& rhs); + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~DecimalFormat(); + + /** + * Clone this Format object polymorphically. The caller owns the + * result and should delete it when done. + * + * @return a polymorphic copy of this DecimalFormat. + * @stable ICU 2.0 + */ + virtual Format* clone(void) const; + + /** + * Return true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * + * @param other the object to be compared with. + * @return true if the given Format objects are semantically equal. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Format& other) const; + + + using NumberFormat::format; + + /** + * Format a double or long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + + /** + * Format a double or long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const; + + /** + * Format a double or long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Format a long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const; + + /** + * Format a long number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format an int64 number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.8 + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Format an int64 number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const; + + /** + * Format an int64 number using base-10 representation. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. + * The syntax of the unformatted number is a "numeric string" + * as defined in the Decimal Arithmetic Specification, available at + * http://speleotrove.com/decimal + * + * @param number The unformatted number, as a string. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(StringPiece number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + + /** + * Format a decimal number. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. + * @param number The number + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format( + const VisibleDigitsWithExponent &number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Format a decimal number. + * @param number The number + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format( + const VisibleDigitsWithExponent &number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + using NumberFormat::parse; + + /** + * Parse the given string using this object's choices. The method + * does string comparisons to try to find an optimal match. + * If no object can be parsed, index is unchanged, and NULL is + * returned. The result is returned as the most parsimonious + * type of Formattable that will accomodate all of the + * necessary precision. For example, if the result is exactly 12, + * it will be returned as a long. However, if it is 1.5, it will + * be returned as a double. + * + * @param text The text to be parsed. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parsePosition The position to start parsing at on input. + * On output, moved to after the last successfully + * parse character. On parse failure, does not change. + * @see Formattable + * @stable ICU 2.0 + */ + virtual void parse(const UnicodeString& text, + Formattable& result, + ParsePosition& parsePosition) const; + + /** + * Parses text from the given string as a currency amount. Unlike + * the parse() method, this method will attempt to parse a generic + * currency name, searching for a match of this object's locale's + * currency display names, or for a 3-letter ISO currency code. + * This method will fail if this format is not a currency format, + * that is, if it does not contain the currency pattern symbol + * (U+00A4) in its prefix or suffix. + * + * @param text the string to parse + * @param pos input-output position; on input, the position within text + * to match; must have 0 <= pos.getIndex() < text.length(); + * on output, the position after the last matched character. + * If the parse fails, the position in unchanged upon output. + * @return if parse succeeds, a pointer to a newly-created CurrencyAmount + * object (owned by the caller) containing information about + * the parsed currency; if parse fails, this is NULL. + * @stable ICU 49 + */ + virtual CurrencyAmount* parseCurrency(const UnicodeString& text, + ParsePosition& pos) const; + + /** + * Returns the decimal format symbols, which is generally not changed + * by the programmer or user. + * @return desired DecimalFormatSymbols + * @see DecimalFormatSymbols + * @stable ICU 2.0 + */ + virtual const DecimalFormatSymbols* getDecimalFormatSymbols(void) const; + + /** + * Sets the decimal format symbols, which is generally not changed + * by the programmer or user. + * @param symbolsToAdopt DecimalFormatSymbols to be adopted. + * @stable ICU 2.0 + */ + virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt); + + /** + * Sets the decimal format symbols, which is generally not changed + * by the programmer or user. + * @param symbols DecimalFormatSymbols. + * @stable ICU 2.0 + */ + virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols); + + + /** + * Returns the currency plural format information, + * which is generally not changed by the programmer or user. + * @return desired CurrencyPluralInfo + * @stable ICU 4.2 + */ + virtual const CurrencyPluralInfo* getCurrencyPluralInfo(void) const; + + /** + * Sets the currency plural format information, + * which is generally not changed by the programmer or user. + * @param toAdopt CurrencyPluralInfo to be adopted. + * @stable ICU 4.2 + */ + virtual void adoptCurrencyPluralInfo(CurrencyPluralInfo* toAdopt); + + /** + * Sets the currency plural format information, + * which is generally not changed by the programmer or user. + * @param info Currency Plural Info. + * @stable ICU 4.2 + */ + virtual void setCurrencyPluralInfo(const CurrencyPluralInfo& info); + + + /** + * Get the positive prefix. + * + * @param result Output param which will receive the positive prefix. + * @return A reference to 'result'. + * Examples: +123, $123, sFr123 + * @stable ICU 2.0 + */ + UnicodeString& getPositivePrefix(UnicodeString& result) const; + + /** + * Set the positive prefix. + * + * @param newValue the new value of the the positive prefix to be set. + * Examples: +123, $123, sFr123 + * @stable ICU 2.0 + */ + virtual void setPositivePrefix(const UnicodeString& newValue); + + /** + * Get the negative prefix. + * + * @param result Output param which will receive the negative prefix. + * @return A reference to 'result'. + * Examples: -123, ($123) (with negative suffix), sFr-123 + * @stable ICU 2.0 + */ + UnicodeString& getNegativePrefix(UnicodeString& result) const; + + /** + * Set the negative prefix. + * + * @param newValue the new value of the the negative prefix to be set. + * Examples: -123, ($123) (with negative suffix), sFr-123 + * @stable ICU 2.0 + */ + virtual void setNegativePrefix(const UnicodeString& newValue); + + /** + * Get the positive suffix. + * + * @param result Output param which will receive the positive suffix. + * @return A reference to 'result'. + * Example: 123% + * @stable ICU 2.0 + */ + UnicodeString& getPositiveSuffix(UnicodeString& result) const; + + /** + * Set the positive suffix. + * + * @param newValue the new value of the positive suffix to be set. + * Example: 123% + * @stable ICU 2.0 + */ + virtual void setPositiveSuffix(const UnicodeString& newValue); + + /** + * Get the negative suffix. + * + * @param result Output param which will receive the negative suffix. + * @return A reference to 'result'. + * Examples: -123%, ($123) (with positive suffixes) + * @stable ICU 2.0 + */ + UnicodeString& getNegativeSuffix(UnicodeString& result) const; + + /** + * Set the negative suffix. + * + * @param newValue the new value of the negative suffix to be set. + * Examples: 123% + * @stable ICU 2.0 + */ + virtual void setNegativeSuffix(const UnicodeString& newValue); + + /** + * Get the multiplier for use in percent, permill, etc. + * For a percentage, set the suffixes to have "%" and the multiplier to be 100. + * (For Arabic, use arabic percent symbol). + * For a permill, set the suffixes to have "\\u2031" and the multiplier to be 1000. + * + * @return the multiplier for use in percent, permill, etc. + * Examples: with 100, 1.23 -> "123", and "123" -> 1.23 + * @stable ICU 2.0 + */ + int32_t getMultiplier(void) const; + + /** + * Set the multiplier for use in percent, permill, etc. + * For a percentage, set the suffixes to have "%" and the multiplier to be 100. + * (For Arabic, use arabic percent symbol). + * For a permill, set the suffixes to have "\\u2031" and the multiplier to be 1000. + * + * @param newValue the new value of the multiplier for use in percent, permill, etc. + * Examples: with 100, 1.23 -> "123", and "123" -> 1.23 + * @stable ICU 2.0 + */ + virtual void setMultiplier(int32_t newValue); + + /** + * Get the rounding increment. + * @return A positive rounding increment, or 0.0 if a custom rounding + * increment is not in effect. + * @see #setRoundingIncrement + * @see #getRoundingMode + * @see #setRoundingMode + * @stable ICU 2.0 + */ + virtual double getRoundingIncrement(void) const; + + /** + * Set the rounding increment. In the absence of a rounding increment, + * numbers will be rounded to the number of digits displayed. + * @param newValue A positive rounding increment, or 0.0 to + * use the default rounding increment. + * Negative increments are equivalent to 0.0. + * @see #getRoundingIncrement + * @see #getRoundingMode + * @see #setRoundingMode + * @stable ICU 2.0 + */ + virtual void setRoundingIncrement(double newValue); + + /** + * Get the rounding mode. + * @return A rounding mode + * @see #setRoundingIncrement + * @see #getRoundingIncrement + * @see #setRoundingMode + * @stable ICU 2.0 + */ + virtual ERoundingMode getRoundingMode(void) const; + + /** + * Set the rounding mode. + * @param roundingMode A rounding mode + * @see #setRoundingIncrement + * @see #getRoundingIncrement + * @see #getRoundingMode + * @stable ICU 2.0 + */ + virtual void setRoundingMode(ERoundingMode roundingMode); + + /** + * Get the width to which the output of format() is padded. + * The width is counted in 16-bit code units. + * @return the format width, or zero if no padding is in effect + * @see #setFormatWidth + * @see #getPadCharacterString + * @see #setPadCharacter + * @see #getPadPosition + * @see #setPadPosition + * @stable ICU 2.0 + */ + virtual int32_t getFormatWidth(void) const; + + /** + * Set the width to which the output of format() is padded. + * The width is counted in 16-bit code units. + * This method also controls whether padding is enabled. + * @param width the width to which to pad the result of + * format(), or zero to disable padding. A negative + * width is equivalent to 0. + * @see #getFormatWidth + * @see #getPadCharacterString + * @see #setPadCharacter + * @see #getPadPosition + * @see #setPadPosition + * @stable ICU 2.0 + */ + virtual void setFormatWidth(int32_t width); + + /** + * Get the pad character used to pad to the format width. The + * default is ' '. + * @return a string containing the pad character. This will always + * have a length of one 32-bit code point. + * @see #setFormatWidth + * @see #getFormatWidth + * @see #setPadCharacter + * @see #getPadPosition + * @see #setPadPosition + * @stable ICU 2.0 + */ + virtual UnicodeString getPadCharacterString() const; + + /** + * Set the character used to pad to the format width. If padding + * is not enabled, then this will take effect if padding is later + * enabled. + * @param padChar a string containing the pad charcter. If the string + * has length 0, then the pad characer is set to ' '. Otherwise + * padChar.char32At(0) will be used as the pad character. + * @see #setFormatWidth + * @see #getFormatWidth + * @see #getPadCharacterString + * @see #getPadPosition + * @see #setPadPosition + * @stable ICU 2.0 + */ + virtual void setPadCharacter(const UnicodeString &padChar); + + /** + * Get the position at which padding will take place. This is the location + * at which padding will be inserted if the result of format() + * is shorter than the format width. + * @return the pad position, one of kPadBeforePrefix, + * kPadAfterPrefix, kPadBeforeSuffix, or + * kPadAfterSuffix. + * @see #setFormatWidth + * @see #getFormatWidth + * @see #setPadCharacter + * @see #getPadCharacterString + * @see #setPadPosition + * @see #EPadPosition + * @stable ICU 2.0 + */ + virtual EPadPosition getPadPosition(void) const; + + /** + * Set the position at which padding will take place. This is the location + * at which padding will be inserted if the result of format() + * is shorter than the format width. This has no effect unless padding is + * enabled. + * @param padPos the pad position, one of kPadBeforePrefix, + * kPadAfterPrefix, kPadBeforeSuffix, or + * kPadAfterSuffix. + * @see #setFormatWidth + * @see #getFormatWidth + * @see #setPadCharacter + * @see #getPadCharacterString + * @see #getPadPosition + * @see #EPadPosition + * @stable ICU 2.0 + */ + virtual void setPadPosition(EPadPosition padPos); + + /** + * Return whether or not scientific notation is used. + * @return TRUE if this object formats and parses scientific notation + * @see #setScientificNotation + * @see #getMinimumExponentDigits + * @see #setMinimumExponentDigits + * @see #isExponentSignAlwaysShown + * @see #setExponentSignAlwaysShown + * @stable ICU 2.0 + */ + virtual UBool isScientificNotation(void) const; + + /** + * Set whether or not scientific notation is used. When scientific notation + * is used, the effective maximum number of integer digits is <= 8. If the + * maximum number of integer digits is set to more than 8, the effective + * maximum will be 1. This allows this call to generate a 'default' scientific + * number format without additional changes. + * @param useScientific TRUE if this object formats and parses scientific + * notation + * @see #isScientificNotation + * @see #getMinimumExponentDigits + * @see #setMinimumExponentDigits + * @see #isExponentSignAlwaysShown + * @see #setExponentSignAlwaysShown + * @stable ICU 2.0 + */ + virtual void setScientificNotation(UBool useScientific); + + /** + * Return the minimum exponent digits that will be shown. + * @return the minimum exponent digits that will be shown + * @see #setScientificNotation + * @see #isScientificNotation + * @see #setMinimumExponentDigits + * @see #isExponentSignAlwaysShown + * @see #setExponentSignAlwaysShown + * @stable ICU 2.0 + */ + virtual int8_t getMinimumExponentDigits(void) const; + + /** + * Set the minimum exponent digits that will be shown. This has no + * effect unless scientific notation is in use. + * @param minExpDig a value >= 1 indicating the fewest exponent digits + * that will be shown. Values less than 1 will be treated as 1. + * @see #setScientificNotation + * @see #isScientificNotation + * @see #getMinimumExponentDigits + * @see #isExponentSignAlwaysShown + * @see #setExponentSignAlwaysShown + * @stable ICU 2.0 + */ + virtual void setMinimumExponentDigits(int8_t minExpDig); + + /** + * Return whether the exponent sign is always shown. + * @return TRUE if the exponent is always prefixed with either the + * localized minus sign or the localized plus sign, false if only negative + * exponents are prefixed with the localized minus sign. + * @see #setScientificNotation + * @see #isScientificNotation + * @see #setMinimumExponentDigits + * @see #getMinimumExponentDigits + * @see #setExponentSignAlwaysShown + * @stable ICU 2.0 + */ + virtual UBool isExponentSignAlwaysShown(void) const; + + /** + * Set whether the exponent sign is always shown. This has no effect + * unless scientific notation is in use. + * @param expSignAlways TRUE if the exponent is always prefixed with either + * the localized minus sign or the localized plus sign, false if only + * negative exponents are prefixed with the localized minus sign. + * @see #setScientificNotation + * @see #isScientificNotation + * @see #setMinimumExponentDigits + * @see #getMinimumExponentDigits + * @see #isExponentSignAlwaysShown + * @stable ICU 2.0 + */ + virtual void setExponentSignAlwaysShown(UBool expSignAlways); + + /** + * Return the grouping size. Grouping size is the number of digits between + * grouping separators in the integer portion of a number. For example, + * in the number "123,456.78", the grouping size is 3. + * + * @return the grouping size. + * @see setGroupingSize + * @see NumberFormat::isGroupingUsed + * @see DecimalFormatSymbols::getGroupingSeparator + * @stable ICU 2.0 + */ + int32_t getGroupingSize(void) const; + + /** + * Set the grouping size. Grouping size is the number of digits between + * grouping separators in the integer portion of a number. For example, + * in the number "123,456.78", the grouping size is 3. + * + * @param newValue the new value of the grouping size. + * @see getGroupingSize + * @see NumberFormat::setGroupingUsed + * @see DecimalFormatSymbols::setGroupingSeparator + * @stable ICU 2.0 + */ + virtual void setGroupingSize(int32_t newValue); + + /** + * Return the secondary grouping size. In some locales one + * grouping interval is used for the least significant integer + * digits (the primary grouping size), and another is used for all + * others (the secondary grouping size). A formatter supporting a + * secondary grouping size will return a positive integer unequal + * to the primary grouping size returned by + * getGroupingSize(). For example, if the primary + * grouping size is 4, and the secondary grouping size is 2, then + * the number 123456789 formats as "1,23,45,6789", and the pattern + * appears as "#,##,###0". + * @return the secondary grouping size, or a value less than + * one if there is none + * @see setSecondaryGroupingSize + * @see NumberFormat::isGroupingUsed + * @see DecimalFormatSymbols::getGroupingSeparator + * @stable ICU 2.4 + */ + int32_t getSecondaryGroupingSize(void) const; + + /** + * Set the secondary grouping size. If set to a value less than 1, + * then secondary grouping is turned off, and the primary grouping + * size is used for all intervals, not just the least significant. + * + * @param newValue the new value of the secondary grouping size. + * @see getSecondaryGroupingSize + * @see NumberFormat#setGroupingUsed + * @see DecimalFormatSymbols::setGroupingSeparator + * @stable ICU 2.4 + */ + virtual void setSecondaryGroupingSize(int32_t newValue); + +#ifndef U_HIDE_INTERNAL_API + + /** + * Returns the minimum number of grouping digits. + * Grouping separators are output if there are at least this many + * digits to the left of the first (rightmost) grouping separator, + * that is, there are at least (minimum grouping + grouping size) integer digits. + * (Subject to isGroupingUsed().) + * + * For example, if this value is 2, and the grouping size is 3, then + * 9999 -> "9999" and 10000 -> "10,000" + * + * This is a technology preview. This API may change behavior or may be removed. + * + * The default value for this attribute is 0. + * A value of 1, 0, or lower, means that the use of grouping separators + * only depends on the grouping size (and on isGroupingUsed()). + * Currently, the corresponding CLDR data is not used; this is likely to change. + * + * @see setMinimumGroupingDigits + * @see getGroupingSize + * @internal technology preview + */ + int32_t getMinimumGroupingDigits() const; + +#endif /* U_HIDE_INTERNAL_API */ + + /* Cannot use #ifndef U_HIDE_INTERNAL_API for the following draft method since it is virtual. */ + /** + * Sets the minimum grouping digits. Setting to a value less than or + * equal to 1 turns off minimum grouping digits. + * + * @param newValue the new value of minimum grouping digits. + * @see getMinimumGroupingDigits + * @internal technology preview + */ + virtual void setMinimumGroupingDigits(int32_t newValue); + + + /** + * Allows you to get the behavior of the decimal separator with integers. + * (The decimal separator will always appear with decimals.) + * + * @return TRUE if the decimal separator always appear with decimals. + * Example: Decimal ON: 12345 -> 12345.; OFF: 12345 -> 12345 + * @stable ICU 2.0 + */ + UBool isDecimalSeparatorAlwaysShown(void) const; + + /** + * Allows you to set the behavior of the decimal separator with integers. + * (The decimal separator will always appear with decimals.) + * + * @param newValue set TRUE if the decimal separator will always appear with decimals. + * Example: Decimal ON: 12345 -> 12345.; OFF: 12345 -> 12345 + * @stable ICU 2.0 + */ + virtual void setDecimalSeparatorAlwaysShown(UBool newValue); + + /** + * Allows you to get the parse behavior of the pattern decimal mark. + * + * @return TRUE if input must contain a match to decimal mark in pattern + * @stable ICU 54 + */ + UBool isDecimalPatternMatchRequired(void) const; + + /** + * Allows you to set the behavior of the pattern decimal mark. + * + * if TRUE, the input must have a decimal mark if one was specified in the pattern. When + * FALSE the decimal mark may be omitted from the input. + * + * @param newValue set TRUE if input must contain a match to decimal mark in pattern + * @stable ICU 54 + */ + virtual void setDecimalPatternMatchRequired(UBool newValue); + + + /** + * Synthesizes a pattern string that represents the current state + * of this Format object. + * + * @param result Output param which will receive the pattern. + * Previous contents are deleted. + * @return A reference to 'result'. + * @see applyPattern + * @stable ICU 2.0 + */ + virtual UnicodeString& toPattern(UnicodeString& result) const; + + /** + * Synthesizes a localized pattern string that represents the current + * state of this Format object. + * + * @param result Output param which will receive the localized pattern. + * Previous contents are deleted. + * @return A reference to 'result'. + * @see applyPattern + * @stable ICU 2.0 + */ + virtual UnicodeString& toLocalizedPattern(UnicodeString& result) const; + + /** + * Apply the given pattern to this Format object. A pattern is a + * short-hand specification for the various formatting properties. + * These properties can also be changed individually through the + * various setter methods. + * <P> + * There is no limit to integer digits are set + * by this routine, since that is the typical end-user desire; + * use setMaximumInteger if you want to set a real value. + * For negative numbers, use a second pattern, separated by a semicolon + * <pre> + * . Example "#,#00.0#" -> 1,234.56 + * </pre> + * This means a minimum of 2 integer digits, 1 fraction digit, and + * a maximum of 2 fraction digits. + * <pre> + * . Example: "#,#00.0#;(#,#00.0#)" for negatives in parantheses. + * </pre> + * In negative patterns, the minimum and maximum counts are ignored; + * these are presumed to be set in the positive pattern. + * + * @param pattern The pattern to be applied. + * @param parseError Struct to recieve information on position + * of error if an error is encountered + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @stable ICU 2.0 + */ + virtual void applyPattern(const UnicodeString& pattern, + UParseError& parseError, + UErrorCode& status); + /** + * Sets the pattern. + * @param pattern The pattern to be applied. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @stable ICU 2.0 + */ + virtual void applyPattern(const UnicodeString& pattern, + UErrorCode& status); + + /** + * Apply the given pattern to this Format object. The pattern + * is assumed to be in a localized notation. A pattern is a + * short-hand specification for the various formatting properties. + * These properties can also be changed individually through the + * various setter methods. + * <P> + * There is no limit to integer digits are set + * by this routine, since that is the typical end-user desire; + * use setMaximumInteger if you want to set a real value. + * For negative numbers, use a second pattern, separated by a semicolon + * <pre> + * . Example "#,#00.0#" -> 1,234.56 + * </pre> + * This means a minimum of 2 integer digits, 1 fraction digit, and + * a maximum of 2 fraction digits. + * + * Example: "#,#00.0#;(#,#00.0#)" for negatives in parantheses. + * + * In negative patterns, the minimum and maximum counts are ignored; + * these are presumed to be set in the positive pattern. + * + * @param pattern The localized pattern to be applied. + * @param parseError Struct to recieve information on position + * of error if an error is encountered + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @stable ICU 2.0 + */ + virtual void applyLocalizedPattern(const UnicodeString& pattern, + UParseError& parseError, + UErrorCode& status); + + /** + * Apply the given pattern to this Format object. + * + * @param pattern The localized pattern to be applied. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @stable ICU 2.0 + */ + virtual void applyLocalizedPattern(const UnicodeString& pattern, + UErrorCode& status); + + + /** + * Sets the maximum number of digits allowed in the integer portion of a + * number. This override limits the integer digit count to 309. + * + * @param newValue the new value of the maximum number of digits + * allowed in the integer portion of a number. + * @see NumberFormat#setMaximumIntegerDigits + * @stable ICU 2.0 + */ + virtual void setMaximumIntegerDigits(int32_t newValue); + + /** + * Sets the minimum number of digits allowed in the integer portion of a + * number. This override limits the integer digit count to 309. + * + * @param newValue the new value of the minimum number of digits + * allowed in the integer portion of a number. + * @see NumberFormat#setMinimumIntegerDigits + * @stable ICU 2.0 + */ + virtual void setMinimumIntegerDigits(int32_t newValue); + + /** + * Sets the maximum number of digits allowed in the fraction portion of a + * number. This override limits the fraction digit count to 340. + * + * @param newValue the new value of the maximum number of digits + * allowed in the fraction portion of a number. + * @see NumberFormat#setMaximumFractionDigits + * @stable ICU 2.0 + */ + virtual void setMaximumFractionDigits(int32_t newValue); + + /** + * Sets the minimum number of digits allowed in the fraction portion of a + * number. This override limits the fraction digit count to 340. + * + * @param newValue the new value of the minimum number of digits + * allowed in the fraction portion of a number. + * @see NumberFormat#setMinimumFractionDigits + * @stable ICU 2.0 + */ + virtual void setMinimumFractionDigits(int32_t newValue); + + /** + * Returns the minimum number of significant digits that will be + * displayed. This value has no effect unless areSignificantDigitsUsed() + * returns true. + * @return the fewest significant digits that will be shown + * @stable ICU 3.0 + */ + int32_t getMinimumSignificantDigits() const; + + /** + * Returns the maximum number of significant digits that will be + * displayed. This value has no effect unless areSignificantDigitsUsed() + * returns true. + * @return the most significant digits that will be shown + * @stable ICU 3.0 + */ + int32_t getMaximumSignificantDigits() const; + + /** + * Sets the minimum number of significant digits that will be + * displayed. If <code>min</code> is less than one then it is set + * to one. If the maximum significant digits count is less than + * <code>min</code>, then it is set to <code>min</code>. + * This function also enables the use of significant digits + * by this formatter - areSignificantDigitsUsed() will return TRUE. + * @see #areSignificantDigitsUsed + * @param min the fewest significant digits to be shown + * @stable ICU 3.0 + */ + void setMinimumSignificantDigits(int32_t min); + + /** + * Sets the maximum number of significant digits that will be + * displayed. If <code>max</code> is less than one then it is set + * to one. If the minimum significant digits count is greater + * than <code>max</code>, then it is set to <code>max</code>. + * This function also enables the use of significant digits + * by this formatter - areSignificantDigitsUsed() will return TRUE. + * @see #areSignificantDigitsUsed + * @param max the most significant digits to be shown + * @stable ICU 3.0 + */ + void setMaximumSignificantDigits(int32_t max); + + /** + * Returns true if significant digits are in use, or false if + * integer and fraction digit counts are in use. + * @return true if significant digits are in use + * @stable ICU 3.0 + */ + UBool areSignificantDigitsUsed() const; + + /** + * Sets whether significant digits are in use, or integer and + * fraction digit counts are in use. + * @param useSignificantDigits true to use significant digits, or + * false to use integer and fraction digit counts + * @stable ICU 3.0 + */ + void setSignificantDigitsUsed(UBool useSignificantDigits); + + public: + /** + * Sets the currency used to display currency + * amounts. This takes effect immediately, if this format is a + * currency format. If this format is not a currency format, then + * the currency is used if and when this object becomes a + * currency format through the application of a new pattern. + * @param theCurrency a 3-letter ISO code indicating new currency + * to use. It need not be null-terminated. May be the empty + * string or NULL to indicate no currency. + * @param ec input-output error code + * @stable ICU 3.0 + */ + virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec); + + /** + * Sets the currency used to display currency amounts. See + * setCurrency(const UChar*, UErrorCode&). + * @deprecated ICU 3.0. Use setCurrency(const UChar*, UErrorCode&). + */ + virtual void setCurrency(const UChar* theCurrency); + + /** + * Sets the <tt>Currency Context</tt> object used to display currency. + * This takes effect immediately, if this format is a + * currency format. + * @param currencyContext new currency context object to use. + * @stable ICU 54 + */ + void setCurrencyUsage(UCurrencyUsage newUsage, UErrorCode* ec); + + /** + * Returns the <tt>Currency Context</tt> object used to display currency + * @stable ICU 54 + */ + UCurrencyUsage getCurrencyUsage() const; + + +#ifndef U_HIDE_DEPRECATED_API + /** + * The resource tags we use to retrieve decimal format data from + * locale resource bundles. + * @deprecated ICU 3.4. This string has no public purpose. Please don't use it. + */ + static const char fgNumberPatterns[]; +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_HIDE_INTERNAL_API + /** + * Get a FixedDecimal corresponding to a double as it would be + * formatted by this DecimalFormat. + * Internal, not intended for public use. + * @internal + */ + FixedDecimal getFixedDecimal(double number, UErrorCode &status) const; + + /** + * Get a FixedDecimal corresponding to a formattable as it would be + * formatted by this DecimalFormat. + * Internal, not intended for public use. + * @internal + */ + FixedDecimal getFixedDecimal(const Formattable &number, UErrorCode &status) const; + + /** + * Get a FixedDecimal corresponding to a DigitList as it would be + * formatted by this DecimalFormat. Note: the DigitList may be modified. + * Internal, not intended for public use. + * @internal + */ + FixedDecimal getFixedDecimal(DigitList &number, UErrorCode &status) const; + + /** + * Get a VisibleDigitsWithExponent corresponding to a double + * as it would be formatted by this DecimalFormat. + * Internal, not intended for public use. + * @internal + */ + VisibleDigitsWithExponent &initVisibleDigitsWithExponent( + double number, + VisibleDigitsWithExponent &digits, + UErrorCode &status) const; + + /** + * Get a VisibleDigitsWithExponent corresponding to a formattable + * as it would be formatted by this DecimalFormat. + * Internal, not intended for public use. + * @internal + */ + VisibleDigitsWithExponent &initVisibleDigitsWithExponent( + const Formattable &number, + VisibleDigitsWithExponent &digits, + UErrorCode &status) const; + + /** + * Get a VisibleDigitsWithExponent corresponding to a DigitList + * as it would be formatted by this DecimalFormat. + * Note: the DigitList may be modified. + * Internal, not intended for public use. + * @internal + */ + VisibleDigitsWithExponent &initVisibleDigitsWithExponent( + DigitList &number, + VisibleDigitsWithExponent &digits, + UErrorCode &status) const; + +#endif /* U_HIDE_INTERNAL_API */ + +public: + + /** + * Return the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . Derived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. + * This method is to implement a simple version of RTTI, since not all + * C++ compilers support genuine RTTI. Polymorphic operator==() and + * clone() methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + +private: + + DecimalFormat(); // default constructor not implemented + + /** + * Initialize all fields of a new DecimalFormatter to a safe default value. + * Common code for use by constructors. + */ + void init(); + + /** + * Do real work of constructing a new DecimalFormat. + */ + void construct(UErrorCode& status, + UParseError& parseErr, + const UnicodeString* pattern = 0, + DecimalFormatSymbols* symbolsToAdopt = 0 + ); + + void handleCurrencySignInPattern(UErrorCode& status); + + void parse(const UnicodeString& text, + Formattable& result, + ParsePosition& pos, + UChar* currency) const; + + enum { + fgStatusInfinite, + fgStatusLength // Leave last in list. + } StatusFlags; + + UBool subparse(const UnicodeString& text, + const UnicodeString* negPrefix, + const UnicodeString* negSuffix, + const UnicodeString* posPrefix, + const UnicodeString* posSuffix, + UBool complexCurrencyParsing, + int8_t type, + ParsePosition& parsePosition, + DigitList& digits, UBool* status, + UChar* currency) const; + + // Mixed style parsing for currency. + // It parses against the current currency pattern + // using complex affix comparison + // parses against the currency plural patterns using complex affix comparison, + // and parses against the current pattern using simple affix comparison. + UBool parseForCurrency(const UnicodeString& text, + ParsePosition& parsePosition, + DigitList& digits, + UBool* status, + UChar* currency) const; + + int32_t skipPadding(const UnicodeString& text, int32_t position) const; + + int32_t compareAffix(const UnicodeString& input, + int32_t pos, + UBool isNegative, + UBool isPrefix, + const UnicodeString* affixPat, + UBool complexCurrencyParsing, + int8_t type, + UChar* currency) const; + + static UnicodeString& trimMarksFromAffix(const UnicodeString& affix, UnicodeString& trimmedAffix); + + UBool equalWithSignCompatibility(UChar32 lhs, UChar32 rhs) const; + + int32_t compareSimpleAffix(const UnicodeString& affix, + const UnicodeString& input, + int32_t pos, + UBool lenient) const; + + static int32_t skipPatternWhiteSpace(const UnicodeString& text, int32_t pos); + + static int32_t skipUWhiteSpace(const UnicodeString& text, int32_t pos); + + static int32_t skipUWhiteSpaceAndMarks(const UnicodeString& text, int32_t pos); + + static int32_t skipBidiMarks(const UnicodeString& text, int32_t pos); + + int32_t compareComplexAffix(const UnicodeString& affixPat, + const UnicodeString& input, + int32_t pos, + int8_t type, + UChar* currency) const; + + static int32_t match(const UnicodeString& text, int32_t pos, UChar32 ch); + + static int32_t match(const UnicodeString& text, int32_t pos, const UnicodeString& str); + + static UBool matchSymbol(const UnicodeString &text, int32_t position, int32_t length, const UnicodeString &symbol, + UnicodeSet *sset, UChar32 schar); + + static UBool matchDecimal(UChar32 symbolChar, + UBool sawDecimal, UChar32 sawDecimalChar, + const UnicodeSet *sset, UChar32 schar); + + static UBool matchGrouping(UChar32 groupingChar, + UBool sawGrouping, UChar32 sawGroupingChar, + const UnicodeSet *sset, + UChar32 decimalChar, const UnicodeSet *decimalSet, + UChar32 schar); + + // set up currency affix patterns for mix parsing. + // The patterns saved here are the affix patterns of default currency + // pattern and the unique affix patterns of the plural currency patterns. + // Those patterns are used by parseForCurrency(). + void setupCurrencyAffixPatterns(UErrorCode& status); + + // get the currency rounding with respect to currency usage + double getCurrencyRounding(const UChar* currency, + UErrorCode* ec) const; + + // get the currency fraction with respect to currency usage + int getCurrencyFractionDigits(const UChar* currency, + UErrorCode* ec) const; + + // hashtable operations + Hashtable* initHashForAffixPattern(UErrorCode& status); + + void deleteHashForAffixPattern(); + + void copyHashForAffixPattern(const Hashtable* source, + Hashtable* target, UErrorCode& status); + + DecimalFormatImpl *fImpl; + + /** + * Constants. + */ + + + EnumSet<UNumberFormatAttribute, + UNUM_MAX_NONBOOLEAN_ATTRIBUTE+1, + UNUM_LIMIT_BOOLEAN_ATTRIBUTE> + fBoolFlags; + + + // style is only valid when decimal formatter is constructed by + // DecimalFormat(pattern, decimalFormatSymbol, style) + int fStyle; + + + // Affix pattern set for currency. + // It is a set of AffixPatternsForCurrency, + // each element of the set saves the negative prefix pattern, + // negative suffix pattern, positive prefix pattern, + // and positive suffix pattern of a pattern. + // It is used for currency mixed style parsing. + // It is actually is a set. + // The set contains the default currency pattern from the locale, + // and the currency plural patterns. + // Since it is a set, it does not contain duplicated items. + // For example, if 2 currency plural patterns are the same, only one pattern + // is included in the set. When parsing, we do not check whether the plural + // count match or not. + Hashtable* fAffixPatternsForCurrency; + + // Information needed for DecimalFormat to format/parse currency plural. + CurrencyPluralInfo* fCurrencyPluralInfo; + +#if UCONFIG_HAVE_PARSEALLINPUT + UNumberFormatAttributeValue fParseAllInput; +#endif + + // Decimal Format Static Sets singleton. + const DecimalFormatStaticSets *fStaticSets; + +protected: + +#ifndef U_HIDE_INTERNAL_API + /** + * Rounds a value according to the rules of this object. + * @internal + */ + DigitList& _round(const DigitList& number, DigitList& adjustedNum, UBool& isNegative, UErrorCode& status) const; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Returns the currency in effect for this formatter. Subclasses + * should override this method as needed. Unlike getCurrency(), + * this method should never return "". + * @result output parameter for null-terminated result, which must + * have a capacity of at least 4 + * @internal + */ + virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const; + + /** number of integer digits + * @stable ICU 2.4 + */ + static const int32_t kDoubleIntegerDigits; + /** number of fraction digits + * @stable ICU 2.4 + */ + static const int32_t kDoubleFractionDigits; + + /** + * When someone turns on scientific mode, we assume that more than this + * number of digits is due to flipping from some other mode that didn't + * restrict the maximum, and so we force 1 integer digit. We don't bother + * to track and see if someone is using exponential notation with more than + * this number, it wouldn't make sense anyway, and this is just to make sure + * that someone turning on scientific mode with default settings doesn't + * end up with lots of zeroes. + * @stable ICU 2.8 + */ + static const int32_t kMaxScientificIntegerDigits; + +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _DECIMFMT +//eof diff --git a/intl/icu/source/i18n/unicode/dtfmtsym.h b/intl/icu/source/i18n/unicode/dtfmtsym.h new file mode 100644 index 000000000..757347881 --- /dev/null +++ b/intl/icu/source/i18n/unicode/dtfmtsym.h @@ -0,0 +1,1014 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File DTFMTSYM.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 07/21/98 stephen Added getZoneIndex() +* Changed to match C++ conventions +******************************************************************************** +*/ + +#ifndef DTFMTSYM_H +#define DTFMTSYM_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/calendar.h" +#include "unicode/uobject.h" +#include "unicode/locid.h" +#include "unicode/udat.h" +#include "unicode/ures.h" + +/** + * \file + * \brief C++ API: Symbols for formatting dates. + */ + +U_NAMESPACE_BEGIN + +/* forward declaration */ +class SimpleDateFormat; +class Hashtable; + +/** + * DateFormatSymbols is a public class for encapsulating localizable date-time + * formatting data -- including timezone data. DateFormatSymbols is used by + * DateFormat and SimpleDateFormat. + * <P> + * Rather than first creating a DateFormatSymbols to get a date-time formatter + * by using a SimpleDateFormat constructor, clients are encouraged to create a + * date-time formatter using the getTimeInstance(), getDateInstance(), or + * getDateTimeInstance() method in DateFormat. Each of these methods can return a + * date/time formatter initialized with a default format pattern along with the + * date-time formatting data for a given or default locale. After a formatter is + * created, clients may modify the format pattern using the setPattern function + * as so desired. For more information on using these formatter factory + * functions, see DateFormat. + * <P> + * If clients decide to create a date-time formatter with a particular format + * pattern and locale, they can do so with new SimpleDateFormat(aPattern, + * new DateFormatSymbols(aLocale)). This will load the appropriate date-time + * formatting data from the locale. + * <P> + * DateFormatSymbols objects are clonable. When clients obtain a + * DateFormatSymbols object, they can feel free to modify the date-time + * formatting data as necessary. For instance, clients can + * replace the localized date-time format pattern characters with the ones that + * they feel easy to remember. Or they can change the representative cities + * originally picked by default to using their favorite ones. + * <P> + * DateFormatSymbols are not expected to be subclassed. Data for a calendar is + * loaded out of resource bundles. The 'type' parameter indicates the type of + * calendar, for example, "gregorian" or "japanese". If the type is not gregorian + * (or NULL, or an empty string) then the type is appended to the resource name, + * for example, 'Eras_japanese' instead of 'Eras'. If the resource 'Eras_japanese' did + * not exist (even in root), then this class will fall back to just 'Eras', that is, + * Gregorian data. Therefore, the calendar implementor MUST ensure that the root + * locale at least contains any resources that are to be particularized for the + * calendar type. + */ +class U_I18N_API DateFormatSymbols U_FINAL : public UObject { +public: + /** + * Construct a DateFormatSymbols object by loading format data from + * resources for the default locale, in the default calendar (Gregorian). + * <P> + * NOTE: This constructor will never fail; if it cannot get resource + * data for the default locale, it will return a last-resort object + * based on hard-coded strings. + * + * @param status Status code. Failure + * results if the resources for the default cannot be + * found or cannot be loaded + * @stable ICU 2.0 + */ + DateFormatSymbols(UErrorCode& status); + + /** + * Construct a DateFormatSymbols object by loading format data from + * resources for the given locale, in the default calendar (Gregorian). + * + * @param locale Locale to load format data from. + * @param status Status code. Failure + * results if the resources for the locale cannot be + * found or cannot be loaded + * @stable ICU 2.0 + */ + DateFormatSymbols(const Locale& locale, + UErrorCode& status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Construct a DateFormatSymbols object by loading format data from + * resources for the default locale, in the default calendar (Gregorian). + * <P> + * NOTE: This constructor will never fail; if it cannot get resource + * data for the default locale, it will return a last-resort object + * based on hard-coded strings. + * + * @param type Type of calendar (as returned by Calendar::getType). + * Will be used to access the correct set of strings. + * (NULL or empty string defaults to "gregorian".) + * @param status Status code. Failure + * results if the resources for the default cannot be + * found or cannot be loaded + * @internal + */ + DateFormatSymbols(const char *type, UErrorCode& status); + + /** + * Construct a DateFormatSymbols object by loading format data from + * resources for the given locale, in the default calendar (Gregorian). + * + * @param locale Locale to load format data from. + * @param type Type of calendar (as returned by Calendar::getType). + * Will be used to access the correct set of strings. + * (NULL or empty string defaults to "gregorian".) + * @param status Status code. Failure + * results if the resources for the locale cannot be + * found or cannot be loaded + * @internal + */ + DateFormatSymbols(const Locale& locale, + const char *type, + UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Copy constructor. + * @stable ICU 2.0 + */ + DateFormatSymbols(const DateFormatSymbols&); + + /** + * Assignment operator. + * @stable ICU 2.0 + */ + DateFormatSymbols& operator=(const DateFormatSymbols&); + + /** + * Destructor. This is nonvirtual because this class is not designed to be + * subclassed. + * @stable ICU 2.0 + */ + virtual ~DateFormatSymbols(); + + /** + * Return true if another object is semantically equal to this one. + * + * @param other the DateFormatSymbols object to be compared with. + * @return true if other is semantically equal to this. + * @stable ICU 2.0 + */ + UBool operator==(const DateFormatSymbols& other) const; + + /** + * Return true if another object is semantically unequal to this one. + * + * @param other the DateFormatSymbols object to be compared with. + * @return true if other is semantically unequal to this. + * @stable ICU 2.0 + */ + UBool operator!=(const DateFormatSymbols& other) const { return !operator==(other); } + + /** + * Gets abbreviated era strings. For example: "AD" and "BC". + * + * @param count Filled in with length of the array. + * @return the era strings. + * @stable ICU 2.0 + */ + const UnicodeString* getEras(int32_t& count) const; + + /** + * Sets abbreviated era strings. For example: "AD" and "BC". + * @param eras Array of era strings (DateFormatSymbols retains ownership.) + * @param count Filled in with length of the array. + * @stable ICU 2.0 + */ + void setEras(const UnicodeString* eras, int32_t count); + + /** + * Gets era name strings. For example: "Anno Domini" and "Before Christ". + * + * @param count Filled in with length of the array. + * @return the era name strings. + * @stable ICU 3.4 + */ + const UnicodeString* getEraNames(int32_t& count) const; + + /** + * Sets era name strings. For example: "Anno Domini" and "Before Christ". + * @param eraNames Array of era name strings (DateFormatSymbols retains ownership.) + * @param count Filled in with length of the array. + * @stable ICU 3.6 + */ + void setEraNames(const UnicodeString* eraNames, int32_t count); + + /** + * Gets narrow era strings. For example: "A" and "B". + * + * @param count Filled in with length of the array. + * @return the narrow era strings. + * @stable ICU 4.2 + */ + const UnicodeString* getNarrowEras(int32_t& count) const; + + /** + * Sets narrow era strings. For example: "A" and "B". + * @param narrowEras Array of narrow era strings (DateFormatSymbols retains ownership.) + * @param count Filled in with length of the array. + * @stable ICU 4.2 + */ + void setNarrowEras(const UnicodeString* narrowEras, int32_t count); + + /** + * Gets month strings. For example: "January", "February", etc. + * @param count Filled in with length of the array. + * @return the month strings. (DateFormatSymbols retains ownership.) + * @stable ICU 2.0 + */ + const UnicodeString* getMonths(int32_t& count) const; + + /** + * Sets month strings. For example: "January", "February", etc. + * + * @param months the new month strings. (not adopted; caller retains ownership) + * @param count Filled in with length of the array. + * @stable ICU 2.0 + */ + void setMonths(const UnicodeString* months, int32_t count); + + /** + * Gets short month strings. For example: "Jan", "Feb", etc. + * + * @param count Filled in with length of the array. + * @return the short month strings. (DateFormatSymbols retains ownership.) + * @stable ICU 2.0 + */ + const UnicodeString* getShortMonths(int32_t& count) const; + + /** + * Sets short month strings. For example: "Jan", "Feb", etc. + * @param count Filled in with length of the array. + * @param shortMonths the new short month strings. (not adopted; caller retains ownership) + * @stable ICU 2.0 + */ + void setShortMonths(const UnicodeString* shortMonths, int32_t count); + + /** + * Selector for date formatting context + * @stable ICU 3.6 + */ + enum DtContextType { + FORMAT, + STANDALONE, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal DtContextType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + DT_CONTEXT_COUNT +#endif // U_HIDE_DEPRECATED_API + }; + + /** + * Selector for date formatting width + * @stable ICU 3.6 + */ + enum DtWidthType { + ABBREVIATED, + WIDE, + NARROW, + /** + * Short width is currently only supported for weekday names. + * @stable ICU 51 + */ + SHORT, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal DtWidthType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + DT_WIDTH_COUNT = 4 +#endif // U_HIDE_DEPRECATED_API + }; + + /** + * Gets month strings by width and context. For example: "January", "February", etc. + * @param count Filled in with length of the array. + * @param context The formatting context, either FORMAT or STANDALONE + * @param width The width of returned strings, either WIDE, ABBREVIATED, or NARROW. + * @return the month strings. (DateFormatSymbols retains ownership.) + * @stable ICU 3.4 + */ + const UnicodeString* getMonths(int32_t& count, DtContextType context, DtWidthType width) const; + + /** + * Sets month strings by width and context. For example: "January", "February", etc. + * + * @param months The new month strings. (not adopted; caller retains ownership) + * @param count Filled in with length of the array. + * @param context The formatting context, either FORMAT or STANDALONE + * @param width The width of returned strings, either WIDE, ABBREVIATED, or NARROW. + * @stable ICU 3.6 + */ + void setMonths(const UnicodeString* months, int32_t count, DtContextType context, DtWidthType width); + + /** + * Gets wide weekday strings. For example: "Sunday", "Monday", etc. + * @param count Filled in with length of the array. + * @return the weekday strings. (DateFormatSymbols retains ownership.) + * @stable ICU 2.0 + */ + const UnicodeString* getWeekdays(int32_t& count) const; + + + /** + * Sets wide weekday strings. For example: "Sunday", "Monday", etc. + * @param weekdays the new weekday strings. (not adopted; caller retains ownership) + * @param count Filled in with length of the array. + * @stable ICU 2.0 + */ + void setWeekdays(const UnicodeString* weekdays, int32_t count); + + /** + * Gets abbreviated weekday strings. For example: "Sun", "Mon", etc. (Note: The method name is + * misleading; it does not get the CLDR-style "short" weekday strings, e.g. "Su", "Mo", etc.) + * @param count Filled in with length of the array. + * @return the abbreviated weekday strings. (DateFormatSymbols retains ownership.) + * @stable ICU 2.0 + */ + const UnicodeString* getShortWeekdays(int32_t& count) const; + + /** + * Sets abbreviated weekday strings. For example: "Sun", "Mon", etc. (Note: The method name is + * misleading; it does not set the CLDR-style "short" weekday strings, e.g. "Su", "Mo", etc.) + * @param abbrevWeekdays the new abbreviated weekday strings. (not adopted; caller retains ownership) + * @param count Filled in with length of the array. + * @stable ICU 2.0 + */ + void setShortWeekdays(const UnicodeString* abbrevWeekdays, int32_t count); + + /** + * Gets weekday strings by width and context. For example: "Sunday", "Monday", etc. + * @param count Filled in with length of the array. + * @param context The formatting context, either FORMAT or STANDALONE + * @param width The width of returned strings, either WIDE, ABBREVIATED, SHORT, or NARROW + * @return the month strings. (DateFormatSymbols retains ownership.) + * @stable ICU 3.4 + */ + const UnicodeString* getWeekdays(int32_t& count, DtContextType context, DtWidthType width) const; + + /** + * Sets weekday strings by width and context. For example: "Sunday", "Monday", etc. + * @param weekdays The new weekday strings. (not adopted; caller retains ownership) + * @param count Filled in with length of the array. + * @param context The formatting context, either FORMAT or STANDALONE + * @param width The width of returned strings, either WIDE, ABBREVIATED, SHORT, or NARROW + * @stable ICU 3.6 + */ + void setWeekdays(const UnicodeString* weekdays, int32_t count, DtContextType context, DtWidthType width); + + /** + * Gets quarter strings by width and context. For example: "1st Quarter", "2nd Quarter", etc. + * @param count Filled in with length of the array. + * @param context The formatting context, either FORMAT or STANDALONE + * @param width The width of returned strings, either WIDE or ABBREVIATED. There + * are no NARROW quarters. + * @return the quarter strings. (DateFormatSymbols retains ownership.) + * @stable ICU 3.6 + */ + const UnicodeString* getQuarters(int32_t& count, DtContextType context, DtWidthType width) const; + + /** + * Sets quarter strings by width and context. For example: "1st Quarter", "2nd Quarter", etc. + * + * @param quarters The new quarter strings. (not adopted; caller retains ownership) + * @param count Filled in with length of the array. + * @param context The formatting context, either FORMAT or STANDALONE + * @param width The width of returned strings, either WIDE or ABBREVIATED. There + * are no NARROW quarters. + * @stable ICU 3.6 + */ + void setQuarters(const UnicodeString* quarters, int32_t count, DtContextType context, DtWidthType width); + + /** + * Gets AM/PM strings. For example: "AM" and "PM". + * @param count Filled in with length of the array. + * @return the weekday strings. (DateFormatSymbols retains ownership.) + * @stable ICU 2.0 + */ + const UnicodeString* getAmPmStrings(int32_t& count) const; + + /** + * Sets ampm strings. For example: "AM" and "PM". + * @param ampms the new ampm strings. (not adopted; caller retains ownership) + * @param count Filled in with length of the array. + * @stable ICU 2.0 + */ + void setAmPmStrings(const UnicodeString* ampms, int32_t count); + +#ifndef U_HIDE_INTERNAL_API + /** + * This default time separator is used for formatting when the locale + * doesn't specify any time separator, and always recognized when parsing. + * @internal + */ + static const UChar DEFAULT_TIME_SEPARATOR = 0x003a; // ':' + + /** + * This alternate time separator is always recognized when parsing. + * @internal + */ + static const UChar ALTERNATE_TIME_SEPARATOR = 0x002e; // '.' + + /** + * Gets the time separator string. For example: ":". + * @param result Output param which will receive the time separator string. + * @return A reference to 'result'. + * @internal + */ + UnicodeString& getTimeSeparatorString(UnicodeString& result) const; + + /** + * Sets the time separator string. For example: ":". + * @param newTimeSeparator the new time separator string. + * @internal + */ + void setTimeSeparatorString(const UnicodeString& newTimeSeparator); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Gets cyclic year name strings if the calendar has them, by width and context. + * For example: "jia-zi", "yi-chou", etc. + * @param count Filled in with length of the array. + * @param context The usage context: FORMAT, STANDALONE. + * @param width The requested name width: WIDE, ABBREVIATED, NARROW. + * @return The year name strings (DateFormatSymbols retains ownership), + * or null if they are not available for this calendar. + * @stable ICU 54 + */ + const UnicodeString* getYearNames(int32_t& count, + DtContextType context, DtWidthType width) const; + + /** + * Sets cyclic year name strings by width and context. For example: "jia-zi", "yi-chou", etc. + * + * @param yearNames The new cyclic year name strings (not adopted; caller retains ownership). + * @param count The length of the array. + * @param context The usage context: FORMAT, STANDALONE (currently only FORMAT is supported). + * @param width The name width: WIDE, ABBREVIATED, NARROW (currently only ABBREVIATED is supported). + * @stable ICU 54 + */ + void setYearNames(const UnicodeString* yearNames, int32_t count, + DtContextType context, DtWidthType width); + + /** + * Gets calendar zodiac name strings if the calendar has them, by width and context. + * For example: "Rat", "Ox", "Tiger", etc. + * @param count Filled in with length of the array. + * @param context The usage context: FORMAT, STANDALONE. + * @param width The requested name width: WIDE, ABBREVIATED, NARROW. + * @return The zodiac name strings (DateFormatSymbols retains ownership), + * or null if they are not available for this calendar. + * @stable ICU 54 + */ + const UnicodeString* getZodiacNames(int32_t& count, + DtContextType context, DtWidthType width) const; + + /** + * Sets calendar zodiac name strings by width and context. For example: "Rat", "Ox", "Tiger", etc. + * + * @param zodiacNames The new zodiac name strings (not adopted; caller retains ownership). + * @param count The length of the array. + * @param context The usage context: FORMAT, STANDALONE (currently only FORMAT is supported). + * @param width The name width: WIDE, ABBREVIATED, NARROW (currently only ABBREVIATED is supported). + * @stable ICU 54 + */ + void setZodiacNames(const UnicodeString* zodiacNames, int32_t count, + DtContextType context, DtWidthType width); + +#ifndef U_HIDE_INTERNAL_API + /** + * Somewhat temporary constants for leap month pattern types, adequate for supporting + * just leap month patterns as needed for Chinese lunar calendar. + * Eventually we will add full support for different month pattern types (needed for + * other calendars such as Hindu) at which point this approach will be replaced by a + * more complete approach. + * @internal + */ + enum EMonthPatternType + { + kLeapMonthPatternFormatWide, + kLeapMonthPatternFormatAbbrev, + kLeapMonthPatternFormatNarrow, + kLeapMonthPatternStandaloneWide, + kLeapMonthPatternStandaloneAbbrev, + kLeapMonthPatternStandaloneNarrow, + kLeapMonthPatternNumeric, + kMonthPatternsCount + }; + + /** + * Somewhat temporary function for getting complete set of leap month patterns for all + * contexts & widths, indexed by EMonthPatternType values. Returns NULL if calendar + * does not have leap month patterns. Note, there is currently no setter for this. + * Eventually we will add full support for different month pattern types (needed for + * other calendars such as Hindu) at which point this approach will be replaced by a + * more complete approach. + * @param count Filled in with length of the array (may be 0). + * @return The leap month patterns (DateFormatSymbols retains ownership). + * May be NULL if there are no leap month patterns for this calendar. + * @internal + */ + const UnicodeString* getLeapMonthPatterns(int32_t& count) const; + +#endif /* U_HIDE_INTERNAL_API */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * Gets timezone strings. These strings are stored in a 2-dimensional array. + * @param rowCount Output param to receive number of rows. + * @param columnCount Output param to receive number of columns. + * @return The timezone strings as a 2-d array. (DateFormatSymbols retains ownership.) + * @deprecated ICU 3.6 + */ + const UnicodeString** getZoneStrings(int32_t& rowCount, int32_t& columnCount) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Sets timezone strings. These strings are stored in a 2-dimensional array. + * <p><b>Note:</b> SimpleDateFormat no longer use the zone strings stored in + * a DateFormatSymbols. Therefore, the time zone strings set by this mthod + * have no effects in an instance of SimpleDateFormat for formatting time + * zones. + * @param strings The timezone strings as a 2-d array to be copied. (not adopted; caller retains ownership) + * @param rowCount The number of rows (count of first index). + * @param columnCount The number of columns (count of second index). + * @stable ICU 2.0 + */ + void setZoneStrings(const UnicodeString* const* strings, int32_t rowCount, int32_t columnCount); + + /** + * Get the non-localized date-time pattern characters. + * @return the non-localized date-time pattern characters + * @stable ICU 2.0 + */ + static const UChar * U_EXPORT2 getPatternUChars(void); + + /** + * Gets localized date-time pattern characters. For example: 'u', 't', etc. + * <p> + * Note: ICU no longer provides localized date-time pattern characters for a locale + * starting ICU 3.8. This method returns the non-localized date-time pattern + * characters unless user defined localized data is set by setLocalPatternChars. + * @param result Output param which will receive the localized date-time pattern characters. + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + UnicodeString& getLocalPatternChars(UnicodeString& result) const; + + /** + * Sets localized date-time pattern characters. For example: 'u', 't', etc. + * @param newLocalPatternChars the new localized date-time + * pattern characters. + * @stable ICU 2.0 + */ + void setLocalPatternChars(const UnicodeString& newLocalPatternChars); + + /** + * Returns the locale for this object. Two flavors are available: + * valid and actual locale. + * @stable ICU 2.8 + */ + Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + + /* The following type and kCapContextUsageTypeCount cannot be #ifndef U_HIDE_INTERNAL_API, + they are needed for .h file declarations. */ + /** + * Constants for capitalization context usage types. + * @internal + */ + enum ECapitalizationContextUsageType + { +#ifndef U_HIDE_INTERNAL_API + kCapContextUsageOther = 0, + kCapContextUsageMonthFormat, /* except narrow */ + kCapContextUsageMonthStandalone, /* except narrow */ + kCapContextUsageMonthNarrow, + kCapContextUsageDayFormat, /* except narrow */ + kCapContextUsageDayStandalone, /* except narrow */ + kCapContextUsageDayNarrow, + kCapContextUsageEraWide, + kCapContextUsageEraAbbrev, + kCapContextUsageEraNarrow, + kCapContextUsageZoneLong, + kCapContextUsageZoneShort, + kCapContextUsageMetazoneLong, + kCapContextUsageMetazoneShort, +#endif /* U_HIDE_INTERNAL_API */ + kCapContextUsageTypeCount = 14 + }; + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +private: + + friend class SimpleDateFormat; + friend class DateFormatSymbolsSingleSetter; // see udat.cpp + + /** + * Abbreviated era strings. For example: "AD" and "BC". + */ + UnicodeString* fEras; + int32_t fErasCount; + + /** + * Era name strings. For example: "Anno Domini" and "Before Christ". + */ + UnicodeString* fEraNames; + int32_t fEraNamesCount; + + /** + * Narrow era strings. For example: "A" and "B". + */ + UnicodeString* fNarrowEras; + int32_t fNarrowErasCount; + + /** + * Month strings. For example: "January", "February", etc. + */ + UnicodeString* fMonths; + int32_t fMonthsCount; + + /** + * Short month strings. For example: "Jan", "Feb", etc. + */ + UnicodeString* fShortMonths; + int32_t fShortMonthsCount; + + /** + * Narrow month strings. For example: "J", "F", etc. + */ + UnicodeString* fNarrowMonths; + int32_t fNarrowMonthsCount; + + /** + * Standalone Month strings. For example: "January", "February", etc. + */ + UnicodeString* fStandaloneMonths; + int32_t fStandaloneMonthsCount; + + /** + * Standalone Short month strings. For example: "Jan", "Feb", etc. + */ + UnicodeString* fStandaloneShortMonths; + int32_t fStandaloneShortMonthsCount; + + /** + * Standalone Narrow month strings. For example: "J", "F", etc. + */ + UnicodeString* fStandaloneNarrowMonths; + int32_t fStandaloneNarrowMonthsCount; + + /** + * CLDR-style format wide weekday strings. For example: "Sunday", "Monday", etc. + */ + UnicodeString* fWeekdays; + int32_t fWeekdaysCount; + + /** + * CLDR-style format abbreviated (not short) weekday strings. For example: "Sun", "Mon", etc. + */ + UnicodeString* fShortWeekdays; + int32_t fShortWeekdaysCount; + + /** + * CLDR-style format short weekday strings. For example: "Su", "Mo", etc. + */ + UnicodeString* fShorterWeekdays; + int32_t fShorterWeekdaysCount; + + /** + * CLDR-style format narrow weekday strings. For example: "S", "M", etc. + */ + UnicodeString* fNarrowWeekdays; + int32_t fNarrowWeekdaysCount; + + /** + * CLDR-style standalone wide weekday strings. For example: "Sunday", "Monday", etc. + */ + UnicodeString* fStandaloneWeekdays; + int32_t fStandaloneWeekdaysCount; + + /** + * CLDR-style standalone abbreviated (not short) weekday strings. For example: "Sun", "Mon", etc. + */ + UnicodeString* fStandaloneShortWeekdays; + int32_t fStandaloneShortWeekdaysCount; + + /** + * CLDR-style standalone short weekday strings. For example: "Su", "Mo", etc. + */ + UnicodeString* fStandaloneShorterWeekdays; + int32_t fStandaloneShorterWeekdaysCount; + + /** + * Standalone Narrow weekday strings. For example: "Sun", "Mon", etc. + */ + UnicodeString* fStandaloneNarrowWeekdays; + int32_t fStandaloneNarrowWeekdaysCount; + + /** + * Ampm strings. For example: "AM" and "PM". + */ + UnicodeString* fAmPms; + int32_t fAmPmsCount; + + /** + * Narrow Ampm strings. For example: "a" and "p". + */ + UnicodeString* fNarrowAmPms; + int32_t fNarrowAmPmsCount; + + /** + * Time separator string. For example: ":". + */ + UnicodeString fTimeSeparator; + + /** + * Quarter strings. For example: "1st quarter", "2nd quarter", etc. + */ + UnicodeString *fQuarters; + int32_t fQuartersCount; + + /** + * Short quarters. For example: "Q1", "Q2", etc. + */ + UnicodeString *fShortQuarters; + int32_t fShortQuartersCount; + + /** + * Standalone quarter strings. For example: "1st quarter", "2nd quarter", etc. + */ + UnicodeString *fStandaloneQuarters; + int32_t fStandaloneQuartersCount; + + /** + * Standalone short quarter strings. For example: "Q1", "Q2", etc. + */ + UnicodeString *fStandaloneShortQuarters; + int32_t fStandaloneShortQuartersCount; + + /** + * All leap month patterns, for example "{0}bis". + */ + UnicodeString *fLeapMonthPatterns; + int32_t fLeapMonthPatternsCount; + + /** + * Cyclic year names, for example: "jia-zi", "yi-chou", ... "gui-hai"; + * currently we only have data for format/abbreviated. + * For the others, just get from format/abbreviated, ignore set. + */ + UnicodeString *fShortYearNames; + int32_t fShortYearNamesCount; + + /** + * Cyclic zodiac names, for example "Rat", "Ox", "Tiger", etc.; + * currently we only have data for format/abbreviated. + * For the others, just get from format/abbreviated, ignore set. + */ + UnicodeString *fShortZodiacNames; + int32_t fShortZodiacNamesCount; + + /** + * Localized names of time zones in this locale. This is a + * two-dimensional array of strings of size n by m, + * where m is at least 5 and up to 7. Each of the n rows is an + * entry containing the localized names for a single TimeZone. + * + * Each such row contains (with i ranging from 0..n-1): + * + * zoneStrings[i][0] - time zone ID + * example: America/Los_Angeles + * zoneStrings[i][1] - long name of zone in standard time + * example: Pacific Standard Time + * zoneStrings[i][2] - short name of zone in standard time + * example: PST + * zoneStrings[i][3] - long name of zone in daylight savings time + * example: Pacific Daylight Time + * zoneStrings[i][4] - short name of zone in daylight savings time + * example: PDT + * zoneStrings[i][5] - location name of zone + * example: United States (Los Angeles) + * zoneStrings[i][6] - long generic name of zone + * example: Pacific Time + * zoneStrings[i][7] - short generic of zone + * example: PT + * + * The zone ID is not localized; it corresponds to the ID + * value associated with a system time zone object. All other entries + * are localized names. If a zone does not implement daylight savings + * time, the daylight savings time names are ignored. + * + * Note:CLDR 1.5 introduced metazone and its historical mappings. + * This simple two-dimensional array is no longer sufficient to represent + * localized names and its historic changes. Since ICU 3.8.1, localized + * zone names extracted from ICU locale data is stored in a ZoneStringFormat + * instance. But we still need to support the old way of customizing + * localized zone names, so we keep this field for the purpose. + */ + UnicodeString **fZoneStrings; // Zone string array set by setZoneStrings + UnicodeString **fLocaleZoneStrings; // Zone string array created by the locale + int32_t fZoneStringsRowCount; + int32_t fZoneStringsColCount; + + Locale fZSFLocale; // Locale used for getting ZoneStringFormat + + /** + * Localized date-time pattern characters. For example: use 'u' as 'y'. + */ + UnicodeString fLocalPatternChars; + + /** + * Capitalization transforms. For each usage type, the first array element indicates + * whether to titlecase for uiListOrMenu context, the second indicates whether to + * titlecase for stand-alone context. + */ + UBool fCapitalization[kCapContextUsageTypeCount][2]; + + /** + * Abbreviated (== short) day period strings. + */ + UnicodeString *fAbbreviatedDayPeriods; + int32_t fAbbreviatedDayPeriodsCount; + + /** + * Wide day period strings. + */ + UnicodeString *fWideDayPeriods; + int32_t fWideDayPeriodsCount; + + /** + * Narrow day period strings. + */ + UnicodeString *fNarrowDayPeriods; + int32_t fNarrowDayPeriodsCount; + + /** + * Stand-alone abbreviated (== short) day period strings. + */ + UnicodeString *fStandaloneAbbreviatedDayPeriods; + int32_t fStandaloneAbbreviatedDayPeriodsCount; + + /** + * Stand-alone wide day period strings. + */ + UnicodeString *fStandaloneWideDayPeriods; + int32_t fStandaloneWideDayPeriodsCount; + + /** + * Stand-alone narrow day period strings. + */ + UnicodeString *fStandaloneNarrowDayPeriods; + int32_t fStandaloneNarrowDayPeriodsCount; + +private: + /** valid/actual locale information + * these are always ICU locales, so the length should not be a problem + */ + char validLocale[ULOC_FULLNAME_CAPACITY]; + char actualLocale[ULOC_FULLNAME_CAPACITY]; + + DateFormatSymbols(); // default constructor not implemented + + /** + * Called by the constructors to actually load data from the resources + * + * @param locale The locale to get symbols for. + * @param type Calendar Type (as from Calendar::getType()) + * @param status Input/output parameter, set to success or + * failure code upon return. + * @param useLastResortData determine if use last resort data + */ + void initializeData(const Locale& locale, const char *type, UErrorCode& status, UBool useLastResortData = FALSE); + + /** + * Copy or alias an array in another object, as appropriate. + * + * @param dstArray the copy destination array. + * @param dstCount fill in with the lenth of 'dstArray'. + * @param srcArray the source array to be copied. + * @param srcCount the length of items to be copied from the 'srcArray'. + */ + static void assignArray(UnicodeString*& dstArray, + int32_t& dstCount, + const UnicodeString* srcArray, + int32_t srcCount); + + /** + * Return true if the given arrays' contents are equal, or if the arrays are + * identical (pointers are equal). + * + * @param array1 one array to be compared with. + * @param array2 another array to be compared with. + * @param count the length of items to be copied. + * @return true if the given arrays' contents are equal, or if the arrays are + * identical (pointers are equal). + */ + static UBool arrayCompare(const UnicodeString* array1, + const UnicodeString* array2, + int32_t count); + + /** + * Create a copy, in fZoneStrings, of the given zone strings array. The + * member variables fZoneStringsRowCount and fZoneStringsColCount should be + * set already by the caller. + */ + void createZoneStrings(const UnicodeString *const * otherStrings); + + /** + * Delete all the storage owned by this object. + */ + void dispose(void); + + /** + * Copy all of the other's data to this. + * @param other the object to be copied. + */ + void copyData(const DateFormatSymbols& other); + + /** + * Create zone strings array by locale if not yet available + */ + void initZoneStringsArray(void); + + /** + * Delete just the zone strings. + */ + void disposeZoneStrings(void); + + /** + * Returns the date format field index of the pattern character c, + * or UDAT_FIELD_COUNT if c is not a pattern character. + */ + static UDateFormatField U_EXPORT2 getPatternCharIndex(UChar c); + + /** + * Returns TRUE if f (with its pattern character repeated count times) is a numeric field. + */ + static UBool U_EXPORT2 isNumericField(UDateFormatField f, int32_t count); + + /** + * Returns TRUE if c (repeated count times) is the pattern character for a numeric field. + */ + static UBool U_EXPORT2 isNumericPatternChar(UChar c, int32_t count); +public: +#ifndef U_HIDE_INTERNAL_API + /** + * Gets a DateFormatSymbols by locale. + * Unlike the constructors which always use gregorian calendar, this + * method uses the calendar in the locale. If the locale contains no + * explicit calendar, this method uses the default calendar for that + * locale. + * @param locale the locale. + * @param status error returned here. + * @return the new DateFormatSymbols which the caller owns. + * @internal For ICU use only. + */ + static DateFormatSymbols * U_EXPORT2 createForLocale( + const Locale &locale, UErrorCode &status); +#endif /* U_HIDE_INTERNAL_API */ +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _DTFMTSYM +//eof diff --git a/intl/icu/source/i18n/unicode/dtitvfmt.h b/intl/icu/source/i18n/unicode/dtitvfmt.h new file mode 100644 index 000000000..68360b87d --- /dev/null +++ b/intl/icu/source/i18n/unicode/dtitvfmt.h @@ -0,0 +1,1046 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/******************************************************************************** +* Copyright (C) 2008-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File DTITVFMT.H +* +******************************************************************************* +*/ + +#ifndef __DTITVFMT_H__ +#define __DTITVFMT_H__ + + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Format and parse date interval in a language-independent manner. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/ucal.h" +#include "unicode/smpdtfmt.h" +#include "unicode/dtintrv.h" +#include "unicode/dtitvinf.h" +#include "unicode/dtptngen.h" + +U_NAMESPACE_BEGIN + + + +/** + * DateIntervalFormat is a class for formatting and parsing date + * intervals in a language-independent manner. + * Only formatting is supported, parsing is not supported. + * + * <P> + * Date interval means from one date to another date, + * for example, from "Jan 11, 2008" to "Jan 18, 2008". + * We introduced class DateInterval to represent it. + * DateInterval is a pair of UDate, which is + * the standard milliseconds since 24:00 GMT, Jan 1, 1970. + * + * <P> + * DateIntervalFormat formats a DateInterval into + * text as compactly as possible. + * For example, the date interval format from "Jan 11, 2008" to "Jan 18,. 2008" + * is "Jan 11-18, 2008" for English. + * And it parses text into DateInterval, + * although initially, parsing is not supported. + * + * <P> + * There is no structural information in date time patterns. + * For any punctuations and string literals inside a date time pattern, + * we do not know whether it is just a separator, or a prefix, or a suffix. + * Without such information, so, it is difficult to generate a sub-pattern + * (or super-pattern) by algorithm. + * So, formatting a DateInterval is pattern-driven. It is very + * similar to formatting in SimpleDateFormat. + * We introduce class DateIntervalInfo to save date interval + * patterns, similar to date time pattern in SimpleDateFormat. + * + * <P> + * Logically, the interval patterns are mappings + * from (skeleton, the_largest_different_calendar_field) + * to (date_interval_pattern). + * + * <P> + * A skeleton + * <ol> + * <li> + * only keeps the field pattern letter and ignores all other parts + * in a pattern, such as space, punctuations, and string literals. + * </li> + * <li> + * hides the order of fields. + * </li> + * <li> + * might hide a field's pattern letter length. + * </li> + * </ol> + * + * For those non-digit calendar fields, the pattern letter length is + * important, such as MMM, MMMM, and MMMMM; EEE and EEEE, + * and the field's pattern letter length is honored. + * + * For the digit calendar fields, such as M or MM, d or dd, yy or yyyy, + * the field pattern length is ignored and the best match, which is defined + * in date time patterns, will be returned without honor the field pattern + * letter length in skeleton. + * + * <P> + * The calendar fields we support for interval formatting are: + * year, month, date, day-of-week, am-pm, hour, hour-of-day, minute, and second + * (though we do not currently have specific intervalFormat date for skeletons + * with seconds). + * Those calendar fields can be defined in the following order: + * year > month > date > hour (in day) > minute > second + * + * The largest different calendar fields between 2 calendars is the + * first different calendar field in above order. + * + * For example: the largest different calendar fields between "Jan 10, 2007" + * and "Feb 20, 2008" is year. + * + * <P> + * For other calendar fields, the compact interval formatting is not + * supported. And the interval format will be fall back to fall-back + * patterns, which is mostly "{date0} - {date1}". + * + * <P> + * There is a set of pre-defined static skeleton strings. + * There are pre-defined interval patterns for those pre-defined skeletons + * in locales' resource files. + * For example, for a skeleton UDAT_YEAR_ABBR_MONTH_DAY, which is "yMMMd", + * in en_US, if the largest different calendar field between date1 and date2 + * is "year", the date interval pattern is "MMM d, yyyy - MMM d, yyyy", + * such as "Jan 10, 2007 - Jan 10, 2008". + * If the largest different calendar field between date1 and date2 is "month", + * the date interval pattern is "MMM d - MMM d, yyyy", + * such as "Jan 10 - Feb 10, 2007". + * If the largest different calendar field between date1 and date2 is "day", + * the date interval pattern is "MMM d-d, yyyy", such as "Jan 10-20, 2007". + * + * For date skeleton, the interval patterns when year, or month, or date is + * different are defined in resource files. + * For time skeleton, the interval patterns when am/pm, or hour, or minute is + * different are defined in resource files. + * + * <P> + * If a skeleton is not found in a locale's DateIntervalInfo, which means + * the interval patterns for the skeleton is not defined in resource file, + * the interval pattern will falls back to the interval "fallback" pattern + * defined in resource file. + * If the interval "fallback" pattern is not defined, the default fall-back + * is "{date0} - {data1}". + * + * <P> + * For the combination of date and time, + * The rule to generate interval patterns are: + * <ol> + * <li> + * when the year, month, or day differs, falls back to fall-back + * interval pattern, which mostly is the concatenate the two original + * expressions with a separator between, + * For example, interval pattern from "Jan 10, 2007 10:10 am" + * to "Jan 11, 2007 10:10am" is + * "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am" + * </li> + * <li> + * otherwise, present the date followed by the range expression + * for the time. + * For example, interval pattern from "Jan 10, 2007 10:10 am" + * to "Jan 10, 2007 11:10am" is "Jan 10, 2007 10:10 am - 11:10am" + * </li> + * </ol> + * + * + * <P> + * If two dates are the same, the interval pattern is the single date pattern. + * For example, interval pattern from "Jan 10, 2007" to "Jan 10, 2007" is + * "Jan 10, 2007". + * + * Or if the presenting fields between 2 dates have the exact same values, + * the interval pattern is the single date pattern. + * For example, if user only requests year and month, + * the interval pattern from "Jan 10, 2007" to "Jan 20, 2007" is "Jan 2007". + * + * <P> + * DateIntervalFormat needs the following information for correct + * formatting: time zone, calendar type, pattern, date format symbols, + * and date interval patterns. + * It can be instantiated in 2 ways: + * <ol> + * <li> + * create an instance using default or given locale plus given skeleton. + * Users are encouraged to created date interval formatter this way and + * to use the pre-defined skeleton macros, such as + * UDAT_YEAR_NUM_MONTH, which consists the calendar fields and + * the format style. + * </li> + * <li> + * create an instance using default or given locale plus given skeleton + * plus a given DateIntervalInfo. + * This factory method is for powerful users who want to provide their own + * interval patterns. + * Locale provides the timezone, calendar, and format symbols information. + * Local plus skeleton provides full pattern information. + * DateIntervalInfo provides the date interval patterns. + * </li> + * </ol> + * + * <P> + * For the calendar field pattern letter, such as G, y, M, d, a, h, H, m, s etc. + * DateIntervalFormat uses the same syntax as that of + * DateTime format. + * + * <P> + * Code Sample: general usage + * <pre> + * \code + * // the date interval object which the DateIntervalFormat formats on + * // and parses into + * DateInterval* dtInterval = new DateInterval(1000*3600*24, 1000*3600*24*2); + * UErrorCode status = U_ZERO_ERROR; + * DateIntervalFormat* dtIntervalFmt = DateIntervalFormat::createInstance( + * UDAT_YEAR_MONTH_DAY, + * Locale("en", "GB", ""), status); + * UnicodeUnicodeString dateIntervalString; + * FieldPosition pos = 0; + * // formatting + * dtIntervalFmt->format(dtInterval, dateIntervalUnicodeString, pos, status); + * delete dtIntervalFmt; + * \endcode + * </pre> + */ + +class U_I18N_API DateIntervalFormat : public Format { +public: + + /** + * Construct a DateIntervalFormat from skeleton and the default locale. + * + * This is a convenient override of + * createInstance(const UnicodeString& skeleton, const Locale& locale, + * UErrorCode&) + * with the value of locale as default locale. + * + * @param skeleton the skeleton on which interval format based. + * @param status output param set to success/failure code on exit + * @return a date time interval formatter which the caller owns. + * @stable ICU 4.0 + */ + static DateIntervalFormat* U_EXPORT2 createInstance( + const UnicodeString& skeleton, + UErrorCode& status); + + /** + * Construct a DateIntervalFormat from skeleton and a given locale. + * <P> + * In this factory method, + * the date interval pattern information is load from resource files. + * Users are encouraged to created date interval formatter this way and + * to use the pre-defined skeleton macros. + * + * <P> + * There are pre-defined skeletons (defined in udate.h) having predefined + * interval patterns in resource files. + * Users are encouraged to use those macros. + * For example: + * DateIntervalFormat::createInstance(UDAT_MONTH_DAY, status) + * + * The given Locale provides the interval patterns. + * For example, for en_GB, if skeleton is UDAT_YEAR_ABBR_MONTH_WEEKDAY_DAY, + * which is "yMMMEEEd", + * the interval patterns defined in resource file to above skeleton are: + * "EEE, d MMM, yyyy - EEE, d MMM, yyyy" for year differs, + * "EEE, d MMM - EEE, d MMM, yyyy" for month differs, + * "EEE, d - EEE, d MMM, yyyy" for day differs, + * @param skeleton the skeleton on which the interval format is based. + * @param locale the given locale + * @param status output param set to success/failure code on exit + * @return a date time interval formatter which the caller owns. + * @stable ICU 4.0 + * <p> + * <h4>Sample code</h4> + * \snippet samples/dtitvfmtsample/dtitvfmtsample.cpp dtitvfmtPreDefined1 + * \snippet samples/dtitvfmtsample/dtitvfmtsample.cpp dtitvfmtPreDefined + * <p> + */ + + static DateIntervalFormat* U_EXPORT2 createInstance( + const UnicodeString& skeleton, + const Locale& locale, + UErrorCode& status); + + /** + * Construct a DateIntervalFormat from skeleton + * DateIntervalInfo, and default locale. + * + * This is a convenient override of + * createInstance(const UnicodeString& skeleton, const Locale& locale, + * const DateIntervalInfo& dtitvinf, UErrorCode&) + * with the locale value as default locale. + * + * @param skeleton the skeleton on which interval format based. + * @param dtitvinf the DateIntervalInfo object. + * @param status output param set to success/failure code on exit + * @return a date time interval formatter which the caller owns. + * @stable ICU 4.0 + */ + static DateIntervalFormat* U_EXPORT2 createInstance( + const UnicodeString& skeleton, + const DateIntervalInfo& dtitvinf, + UErrorCode& status); + + /** + * Construct a DateIntervalFormat from skeleton + * a DateIntervalInfo, and the given locale. + * + * <P> + * In this factory method, user provides its own date interval pattern + * information, instead of using those pre-defined data in resource file. + * This factory method is for powerful users who want to provide their own + * interval patterns. + * <P> + * There are pre-defined skeletons (defined in udate.h) having predefined + * interval patterns in resource files. + * Users are encouraged to use those macros. + * For example: + * DateIntervalFormat::createInstance(UDAT_MONTH_DAY, status) + * + * The DateIntervalInfo provides the interval patterns. + * and the DateIntervalInfo ownership remains to the caller. + * + * User are encouraged to set default interval pattern in DateIntervalInfo + * as well, if they want to set other interval patterns ( instead of + * reading the interval patterns from resource files). + * When the corresponding interval pattern for a largest calendar different + * field is not found ( if user not set it ), interval format fallback to + * the default interval pattern. + * If user does not provide default interval pattern, it fallback to + * "{date0} - {date1}" + * + * @param skeleton the skeleton on which interval format based. + * @param locale the given locale + * @param dtitvinf the DateIntervalInfo object. + * @param status output param set to success/failure code on exit + * @return a date time interval formatter which the caller owns. + * @stable ICU 4.0 + * <p> + * <h4>Sample code</h4> + * \snippet samples/dtitvfmtsample/dtitvfmtsample.cpp dtitvfmtPreDefined1 + * \snippet samples/dtitvfmtsample/dtitvfmtsample.cpp dtitvfmtCustomized + * <p> + */ + static DateIntervalFormat* U_EXPORT2 createInstance( + const UnicodeString& skeleton, + const Locale& locale, + const DateIntervalInfo& dtitvinf, + UErrorCode& status); + + /** + * Destructor. + * @stable ICU 4.0 + */ + virtual ~DateIntervalFormat(); + + /** + * Clone this Format object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @stable ICU 4.0 + */ + virtual Format* clone(void) const; + + /** + * Return true if the given Format objects are semantically equal. Objects + * of different subclasses are considered unequal. + * @param other the object to be compared with. + * @return true if the given Format objects are semantically equal. + * @stable ICU 4.0 + */ + virtual UBool operator==(const Format& other) const; + + /** + * Return true if the given Format objects are not semantically equal. + * Objects of different subclasses are considered unequal. + * @param other the object to be compared with. + * @return true if the given Format objects are not semantically equal. + * @stable ICU 4.0 + */ + UBool operator!=(const Format& other) const; + + + using Format::format; + + /** + * Format an object to produce a string. This method handles Formattable + * objects with a DateInterval type. + * If a the Formattable object type is not a DateInterval, + * then it returns a failing UErrorCode. + * + * @param obj The object to format. + * Must be a DateInterval. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param fieldPosition On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * There may be multiple instances of a given field type + * in an interval format; in this case the fieldPosition + * offsets refer to the first instance. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.0 + */ + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& fieldPosition, + UErrorCode& status) const ; + + + + /** + * Format a DateInterval to produce a string. + * + * @param dtInterval DateInterval to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param fieldPosition On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * There may be multiple instances of a given field type + * in an interval format; in this case the fieldPosition + * offsets refer to the first instance. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.0 + */ + UnicodeString& format(const DateInterval* dtInterval, + UnicodeString& appendTo, + FieldPosition& fieldPosition, + UErrorCode& status) const ; + + + /** + * Format 2 Calendars to produce a string. + * + * Note: "fromCalendar" and "toCalendar" are not const, + * since calendar is not const in SimpleDateFormat::format(Calendar&), + * + * @param fromCalendar calendar set to the from date in date interval + * to be formatted into date interval string + * @param toCalendar calendar set to the to date in date interval + * to be formatted into date interval string + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param fieldPosition On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * There may be multiple instances of a given field type + * in an interval format; in this case the fieldPosition + * offsets refer to the first instance. + * @param status Output param filled with success/failure status. + * Caller needs to make sure it is SUCCESS + * at the function entrance + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.0 + */ + UnicodeString& format(Calendar& fromCalendar, + Calendar& toCalendar, + UnicodeString& appendTo, + FieldPosition& fieldPosition, + UErrorCode& status) const ; + + /** + * Date interval parsing is not supported. Please do not use. + * <P> + * This method should handle parsing of + * date time interval strings into Formattable objects with + * DateInterval type, which is a pair of UDate. + * <P> + * Before calling, set parse_pos.index to the offset you want to start + * parsing at in the source. After calling, parse_pos.index is the end of + * the text you parsed. If error occurs, index is unchanged. + * <P> + * When parsing, leading whitespace is discarded (with a successful parse), + * while trailing whitespace is left as is. + * <P> + * See Format::parseObject() for more. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Since no parsing + * is supported, upon return this param is unchanged. + * @return A newly created Formattable* object, or NULL + * on failure. The caller owns this and should + * delete it when done. + * @internal ICU 4.0 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& parse_pos) const; + + + /** + * Gets the date time interval patterns. + * @return the date time interval patterns associated with + * this date interval formatter. + * @stable ICU 4.0 + */ + const DateIntervalInfo* getDateIntervalInfo(void) const; + + + /** + * Set the date time interval patterns. + * @param newIntervalPatterns the given interval patterns to copy. + * @param status output param set to success/failure code on exit + * @stable ICU 4.0 + */ + void setDateIntervalInfo(const DateIntervalInfo& newIntervalPatterns, + UErrorCode& status); + + + /** + * Gets the date formatter. The DateIntervalFormat instance continues to own + * the returned DateFormatter object, and will use and possibly modify it + * during format operations. In a multi-threaded environment, the returned + * DateFormat can only be used if it is certain that no other threads are + * concurrently using this DateIntervalFormatter, even for nominally const + * functions. + * + * @return the date formatter associated with this date interval formatter. + * @stable ICU 4.0 + */ + const DateFormat* getDateFormat(void) const; + + /** + * Returns a reference to the TimeZone used by this DateIntervalFormat's calendar. + * @return the time zone associated with the calendar of DateIntervalFormat. + * @stable ICU 4.8 + */ + virtual const TimeZone& getTimeZone(void) const; + + /** + * Sets the time zone for the calendar used by this DateIntervalFormat object. The + * caller no longer owns the TimeZone object and should not delete it after this call. + * @param zoneToAdopt the TimeZone to be adopted. + * @stable ICU 4.8 + */ + virtual void adoptTimeZone(TimeZone* zoneToAdopt); + + /** + * Sets the time zone for the calendar used by this DateIntervalFormat object. + * @param zone the new time zone. + * @stable ICU 4.8 + */ + virtual void setTimeZone(const TimeZone& zone); + + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 4.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 4.0 + */ + virtual UClassID getDynamicClassID(void) const; + +protected: + + /** + * Copy constructor. + * @stable ICU 4.0 + */ + DateIntervalFormat(const DateIntervalFormat&); + + /** + * Assignment operator. + * @stable ICU 4.0 + */ + DateIntervalFormat& operator=(const DateIntervalFormat&); + +private: + + /* + * This is for ICU internal use only. Please do not use. + * Save the interval pattern information. + * Interval pattern consists of 2 single date patterns and the separator. + * For example, interval pattern "MMM d - MMM d, yyyy" consists + * a single date pattern "MMM d", another single date pattern "MMM d, yyyy", + * and a separator "-". + * The pattern is divided into 2 parts. For above example, + * the first part is "MMM d - ", and the second part is "MMM d, yyyy". + * Also, the first date appears in an interval pattern could be + * the earlier date or the later date. + * And such information is saved in the interval pattern as well. + */ + struct PatternInfo { + UnicodeString firstPart; + UnicodeString secondPart; + /** + * Whether the first date in interval pattern is later date or not. + * Fallback format set the default ordering. + * And for a particular interval pattern, the order can be + * overriden by prefixing the interval pattern with "latestFirst:" or + * "earliestFirst:" + * For example, given 2 date, Jan 10, 2007 to Feb 10, 2007. + * if the fallback format is "{0} - {1}", + * and the pattern is "d MMM - d MMM yyyy", the interval format is + * "10 Jan - 10 Feb, 2007". + * If the pattern is "latestFirst:d MMM - d MMM yyyy", + * the interval format is "10 Feb - 10 Jan, 2007" + */ + UBool laterDateFirst; + }; + + + /** + * default constructor + * @internal (private) + */ + DateIntervalFormat(); + + /** + * Construct a DateIntervalFormat from DateFormat, + * a DateIntervalInfo, and skeleton. + * DateFormat provides the timezone, calendar, + * full pattern, and date format symbols information. + * It should be a SimpleDateFormat object which + * has a pattern in it. + * the DateIntervalInfo provides the interval patterns. + * + * Note: the DateIntervalFormat takes ownership of both + * DateFormat and DateIntervalInfo objects. + * Caller should not delete them. + * + * @param locale the locale of this date interval formatter. + * @param dtItvInfo the DateIntervalInfo object to be adopted. + * @param skeleton the skeleton of the date formatter + * @param status output param set to success/failure code on exit + */ + DateIntervalFormat(const Locale& locale, DateIntervalInfo* dtItvInfo, + const UnicodeString* skeleton, UErrorCode& status); + + + /** + * Construct a DateIntervalFormat from DateFormat + * and a DateIntervalInfo. + * + * It is a wrapper of the constructor. + * + * @param locale the locale of this date interval formatter. + * @param dtitvinf the DateIntervalInfo object to be adopted. + * @param skeleton the skeleton of this formatter. + * @param status Output param set to success/failure code. + * @return a date time interval formatter which the caller owns. + */ + static DateIntervalFormat* U_EXPORT2 create(const Locale& locale, + DateIntervalInfo* dtitvinf, + const UnicodeString* skeleton, + UErrorCode& status); + + /** + * Below are for generating interval patterns local to the formatter + */ + + /** + * Provide an updated FieldPosition posResult based on two formats, + * the FieldPosition values for each of them, and the pattern used + * to combine them. The idea is for posResult to indicate the first + * instance (if any) of the specified field in the combined result, + * with correct offsets. + * + * @param combiningPattern Pattern used to combine pat0 and pat1 + * @param pat0 Formatted date/time value to replace {0} + * @param pos0 FieldPosition within pat0 + * @param pat1 Formatted date/time value to replace {1} + * @param pos1 FieldPosition within pat1 + * @param posResult FieldPosition to be set to the correct + * position of the first field instance when + * pat0 and pat1 are combined using combiningPattern + */ + static void + adjustPosition(UnicodeString& combiningPattern, // has {0} and {1} in it + UnicodeString& pat0, FieldPosition& pos0, // pattern and pos corresponding to {0} + UnicodeString& pat1, FieldPosition& pos1, // pattern and pos corresponding to {1} + FieldPosition& posResult); + + + /** + * Format 2 Calendars using fall-back interval pattern + * + * The full pattern used in this fall-back format is the + * full pattern of the date formatter. + * + * gFormatterMutex must already be locked when calling this function. + * + * @param fromCalendar calendar set to the from date in date interval + * to be formatted into date interval string + * @param toCalendar calendar set to the to date in date interval + * to be formatted into date interval string + * @param fromToOnSameDay TRUE iff from and to dates are on the same day + * (any difference is in ampm/hours or below) + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param set to success/failure code on exit + * @return Reference to 'appendTo' parameter. + * @internal (private) + */ + UnicodeString& fallbackFormat(Calendar& fromCalendar, + Calendar& toCalendar, + UBool fromToOnSameDay, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + + + /** + * Initialize interval patterns locale to this formatter + * + * This code is a bit complicated since + * 1. the interval patterns saved in resource bundle files are interval + * patterns based on date or time only. + * It does not have interval patterns based on both date and time. + * Interval patterns on both date and time are algorithm generated. + * + * For example, it has interval patterns on skeleton "dMy" and "hm", + * but it does not have interval patterns on skeleton "dMyhm". + * + * The rule to generate interval patterns for both date and time skeleton are + * 1) when the year, month, or day differs, concatenate the two original + * expressions with a separator between, + * For example, interval pattern from "Jan 10, 2007 10:10 am" + * to "Jan 11, 2007 10:10am" is + * "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am" + * + * 2) otherwise, present the date followed by the range expression + * for the time. + * For example, interval pattern from "Jan 10, 2007 10:10 am" + * to "Jan 10, 2007 11:10am" is + * "Jan 10, 2007 10:10 am - 11:10am" + * + * 2. even a pattern does not request a certain calendar field, + * the interval pattern needs to include such field if such fields are + * different between 2 dates. + * For example, a pattern/skeleton is "hm", but the interval pattern + * includes year, month, and date when year, month, and date differs. + * + * + * @param status output param set to success/failure code on exit + */ + void initializePattern(UErrorCode& status); + + + + /** + * Set fall back interval pattern given a calendar field, + * a skeleton, and a date time pattern generator. + * @param field the largest different calendar field + * @param skeleton a skeleton + * @param status output param set to success/failure code on exit + */ + void setFallbackPattern(UCalendarDateFields field, + const UnicodeString& skeleton, + UErrorCode& status); + + + + /** + * get separated date and time skeleton from a combined skeleton. + * + * The difference between date skeleton and normalizedDateSkeleton are: + * 1. both 'y' and 'd' are appeared only once in normalizeDateSkeleton + * 2. 'E' and 'EE' are normalized into 'EEE' + * 3. 'MM' is normalized into 'M' + * + ** the difference between time skeleton and normalizedTimeSkeleton are: + * 1. both 'H' and 'h' are normalized as 'h' in normalized time skeleton, + * 2. 'a' is omitted in normalized time skeleton. + * 3. there is only one appearance for 'h', 'm','v', 'z' in normalized time + * skeleton + * + * + * @param skeleton given combined skeleton. + * @param date Output parameter for date only skeleton. + * @param normalizedDate Output parameter for normalized date only + * + * @param time Output parameter for time only skeleton. + * @param normalizedTime Output parameter for normalized time only + * skeleton. + * + */ + static void U_EXPORT2 getDateTimeSkeleton(const UnicodeString& skeleton, + UnicodeString& date, + UnicodeString& normalizedDate, + UnicodeString& time, + UnicodeString& normalizedTime); + + + + /** + * Generate date or time interval pattern from resource, + * and set them into the interval pattern locale to this formatter. + * + * It needs to handle the following: + * 1. need to adjust field width. + * For example, the interval patterns saved in DateIntervalInfo + * includes "dMMMy", but not "dMMMMy". + * Need to get interval patterns for dMMMMy from dMMMy. + * Another example, the interval patterns saved in DateIntervalInfo + * includes "hmv", but not "hmz". + * Need to get interval patterns for "hmz' from 'hmv' + * + * 2. there might be no pattern for 'y' differ for skeleton "Md", + * in order to get interval patterns for 'y' differ, + * need to look for it from skeleton 'yMd' + * + * @param dateSkeleton normalized date skeleton + * @param timeSkeleton normalized time skeleton + * @return whether the resource is found for the skeleton. + * TRUE if interval pattern found for the skeleton, + * FALSE otherwise. + */ + UBool setSeparateDateTimePtn(const UnicodeString& dateSkeleton, + const UnicodeString& timeSkeleton); + + + + + /** + * Generate interval pattern from existing resource + * + * It not only save the interval patterns, + * but also return the extended skeleton and its best match skeleton. + * + * @param field largest different calendar field + * @param skeleton skeleton + * @param bestSkeleton the best match skeleton which has interval pattern + * defined in resource + * @param differenceInfo the difference between skeleton and best skeleton + * 0 means the best matched skeleton is the same as input skeleton + * 1 means the fields are the same, but field width are different + * 2 means the only difference between fields are v/z, + * -1 means there are other fields difference + * + * @param extendedSkeleton extended skeleton + * @param extendedBestSkeleton extended best match skeleton + * @return whether the interval pattern is found + * through extending skeleton or not. + * TRUE if interval pattern is found by + * extending skeleton, FALSE otherwise. + */ + UBool setIntervalPattern(UCalendarDateFields field, + const UnicodeString* skeleton, + const UnicodeString* bestSkeleton, + int8_t differenceInfo, + UnicodeString* extendedSkeleton = NULL, + UnicodeString* extendedBestSkeleton = NULL); + + /** + * Adjust field width in best match interval pattern to match + * the field width in input skeleton. + * + * TODO (xji) make a general solution + * The adjusting rule can be: + * 1. always adjust + * 2. never adjust + * 3. default adjust, which means adjust according to the following rules + * 3.1 always adjust string, such as MMM and MMMM + * 3.2 never adjust between string and numeric, such as MM and MMM + * 3.3 always adjust year + * 3.4 do not adjust 'd', 'h', or 'm' if h presents + * 3.5 do not adjust 'M' if it is numeric(?) + * + * Since date interval format is well-formed format, + * date and time skeletons are normalized previously, + * till this stage, the adjust here is only "adjust strings, such as MMM + * and MMMM, EEE and EEEE. + * + * @param inputSkeleton the input skeleton + * @param bestMatchSkeleton the best match skeleton + * @param bestMatchIntervalPattern the best match interval pattern + * @param differenceInfo the difference between 2 skeletons + * 1 means only field width differs + * 2 means v/z exchange + * @param adjustedIntervalPattern adjusted interval pattern + */ + static void U_EXPORT2 adjustFieldWidth( + const UnicodeString& inputSkeleton, + const UnicodeString& bestMatchSkeleton, + const UnicodeString& bestMatchIntervalPattern, + int8_t differenceInfo, + UnicodeString& adjustedIntervalPattern); + + /** + * Concat a single date pattern with a time interval pattern, + * set it into the intervalPatterns, while field is time field. + * This is used to handle time interval patterns on skeleton with + * both time and date. Present the date followed by + * the range expression for the time. + * @param format date and time format + * @param datePattern date pattern + * @param field time calendar field: AM_PM, HOUR, MINUTE + * @param status output param set to success/failure code on exit + */ + void concatSingleDate2TimeInterval(UnicodeString& format, + const UnicodeString& datePattern, + UCalendarDateFields field, + UErrorCode& status); + + /** + * check whether a calendar field present in a skeleton. + * @param field calendar field need to check + * @param skeleton given skeleton on which to check the calendar field + * @return true if field present in a skeleton. + */ + static UBool U_EXPORT2 fieldExistsInSkeleton(UCalendarDateFields field, + const UnicodeString& skeleton); + + + /** + * Split interval patterns into 2 part. + * @param intervalPattern interval pattern + * @return the index in interval pattern which split the pattern into 2 part + */ + static int32_t U_EXPORT2 splitPatternInto2Part(const UnicodeString& intervalPattern); + + + /** + * Break interval patterns as 2 part and save them into pattern info. + * @param field calendar field + * @param intervalPattern interval pattern + */ + void setIntervalPattern(UCalendarDateFields field, + const UnicodeString& intervalPattern); + + + /** + * Break interval patterns as 2 part and save them into pattern info. + * @param field calendar field + * @param intervalPattern interval pattern + * @param laterDateFirst whether later date appear first in interval pattern + */ + void setIntervalPattern(UCalendarDateFields field, + const UnicodeString& intervalPattern, + UBool laterDateFirst); + + + /** + * Set pattern information. + * + * @param field calendar field + * @param firstPart the first part in interval pattern + * @param secondPart the second part in interval pattern + * @param laterDateFirst whether the first date in intervalPattern + * is earlier date or later date + */ + void setPatternInfo(UCalendarDateFields field, + const UnicodeString* firstPart, + const UnicodeString* secondPart, + UBool laterDateFirst); + + /** + * Format 2 Calendars to produce a string. + * Implementation of the similar public format function. + * Must be called with gFormatterMutex already locked. + * + * Note: "fromCalendar" and "toCalendar" are not const, + * since calendar is not const in SimpleDateFormat::format(Calendar&), + * + * @param fromCalendar calendar set to the from date in date interval + * to be formatted into date interval string + * @param toCalendar calendar set to the to date in date interval + * to be formatted into date interval string + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param fieldPosition On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * There may be multiple instances of a given field type + * in an interval format; in this case the fieldPosition + * offsets refer to the first instance. + * @param status Output param filled with success/failure status. + * Caller needs to make sure it is SUCCESS + * at the function entrance + * @return Reference to 'appendTo' parameter. + * @internal (private) + */ + UnicodeString& formatImpl(Calendar& fromCalendar, + Calendar& toCalendar, + UnicodeString& appendTo, + FieldPosition& fieldPosition, + UErrorCode& status) const ; + + + // from calendar field to pattern letter + static const UChar fgCalendarFieldToPatternLetter[]; + + + /** + * The interval patterns for this locale. + */ + DateIntervalInfo* fInfo; + + /** + * The DateFormat object used to format single pattern + */ + SimpleDateFormat* fDateFormat; + + /** + * The 2 calendars with the from and to date. + * could re-use the calendar in fDateFormat, + * but keeping 2 calendars make it clear and clean. + */ + Calendar* fFromCalendar; + Calendar* fToCalendar; + + Locale fLocale; + + /** + * Following are interval information relevant (locale) to this formatter. + */ + UnicodeString fSkeleton; + PatternInfo fIntervalPatterns[DateIntervalInfo::kIPI_MAX_INDEX]; + + /** + * Patterns for fallback formatting. + */ + UnicodeString* fDatePattern; + UnicodeString* fTimePattern; + UnicodeString* fDateTimeFormat; +}; + +inline UBool +DateIntervalFormat::operator!=(const Format& other) const { + return !operator==(other); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _DTITVFMT_H__ +//eof diff --git a/intl/icu/source/i18n/unicode/dtitvinf.h b/intl/icu/source/i18n/unicode/dtitvinf.h new file mode 100644 index 000000000..325eca66d --- /dev/null +++ b/intl/icu/source/i18n/unicode/dtitvinf.h @@ -0,0 +1,522 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 2008-2016, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + * + * File DTITVINF.H + * + ******************************************************************************* + */ + +#ifndef __DTITVINF_H__ +#define __DTITVINF_H__ + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Date/Time interval patterns for formatting date/time interval + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/udat.h" +#include "unicode/locid.h" +#include "unicode/ucal.h" +#include "unicode/dtptngen.h" + +U_NAMESPACE_BEGIN + +/** + * DateIntervalInfo is a public class for encapsulating localizable + * date time interval patterns. It is used by DateIntervalFormat. + * + * <P> + * For most users, ordinary use of DateIntervalFormat does not need to create + * DateIntervalInfo object directly. + * DateIntervalFormat will take care of it when creating a date interval + * formatter when user pass in skeleton and locale. + * + * <P> + * For power users, who want to create their own date interval patterns, + * or want to re-set date interval patterns, they could do so by + * directly creating DateIntervalInfo and manupulating it. + * + * <P> + * Logically, the interval patterns are mappings + * from (skeleton, the_largest_different_calendar_field) + * to (date_interval_pattern). + * + * <P> + * A skeleton + * <ol> + * <li> + * only keeps the field pattern letter and ignores all other parts + * in a pattern, such as space, punctuations, and string literals. + * <li> + * hides the order of fields. + * <li> + * might hide a field's pattern letter length. + * + * For those non-digit calendar fields, the pattern letter length is + * important, such as MMM, MMMM, and MMMMM; EEE and EEEE, + * and the field's pattern letter length is honored. + * + * For the digit calendar fields, such as M or MM, d or dd, yy or yyyy, + * the field pattern length is ignored and the best match, which is defined + * in date time patterns, will be returned without honor the field pattern + * letter length in skeleton. + * </ol> + * + * <P> + * The calendar fields we support for interval formatting are: + * year, month, date, day-of-week, am-pm, hour, hour-of-day, and minute. + * Those calendar fields can be defined in the following order: + * year > month > date > am-pm > hour > minute + * + * The largest different calendar fields between 2 calendars is the + * first different calendar field in above order. + * + * For example: the largest different calendar fields between "Jan 10, 2007" + * and "Feb 20, 2008" is year. + * + * <P> + * There is a set of pre-defined static skeleton strings. + * There are pre-defined interval patterns for those pre-defined skeletons + * in locales' resource files. + * For example, for a skeleton UDAT_YEAR_ABBR_MONTH_DAY, which is "yMMMd", + * in en_US, if the largest different calendar field between date1 and date2 + * is "year", the date interval pattern is "MMM d, yyyy - MMM d, yyyy", + * such as "Jan 10, 2007 - Jan 10, 2008". + * If the largest different calendar field between date1 and date2 is "month", + * the date interval pattern is "MMM d - MMM d, yyyy", + * such as "Jan 10 - Feb 10, 2007". + * If the largest different calendar field between date1 and date2 is "day", + * the date interval pattern is "MMM d-d, yyyy", such as "Jan 10-20, 2007". + * + * For date skeleton, the interval patterns when year, or month, or date is + * different are defined in resource files. + * For time skeleton, the interval patterns when am/pm, or hour, or minute is + * different are defined in resource files. + * + * + * <P> + * There are 2 dates in interval pattern. For most locales, the first date + * in an interval pattern is the earlier date. There might be a locale in which + * the first date in an interval pattern is the later date. + * We use fallback format for the default order for the locale. + * For example, if the fallback format is "{0} - {1}", it means + * the first date in the interval pattern for this locale is earlier date. + * If the fallback format is "{1} - {0}", it means the first date is the + * later date. + * For a particular interval pattern, the default order can be overriden + * by prefixing "latestFirst:" or "earliestFirst:" to the interval pattern. + * For example, if the fallback format is "{0}-{1}", + * but for skeleton "yMMMd", the interval pattern when day is different is + * "latestFirst:d-d MMM yy", it means by default, the first date in interval + * pattern is the earlier date. But for skeleton "yMMMd", when day is different, + * the first date in "d-d MMM yy" is the later date. + * + * <P> + * The recommended way to create a DateIntervalFormat object is to pass in + * the locale. + * By using a Locale parameter, the DateIntervalFormat object is + * initialized with the pre-defined interval patterns for a given or + * default locale. + * <P> + * Users can also create DateIntervalFormat object + * by supplying their own interval patterns. + * It provides flexibility for power users. + * + * <P> + * After a DateIntervalInfo object is created, clients may modify + * the interval patterns using setIntervalPattern function as so desired. + * Currently, users can only set interval patterns when the following + * calendar fields are different: ERA, YEAR, MONTH, DATE, DAY_OF_MONTH, + * DAY_OF_WEEK, AM_PM, HOUR, HOUR_OF_DAY, and MINUTE. + * Interval patterns when other calendar fields are different is not supported. + * <P> + * DateIntervalInfo objects are cloneable. + * When clients obtain a DateIntervalInfo object, + * they can feel free to modify it as necessary. + * <P> + * DateIntervalInfo are not expected to be subclassed. + * Data for a calendar is loaded out of resource bundles. + * Through ICU 4.4, date interval patterns are only supported in the Gregorian + * calendar; non-Gregorian calendars are supported from ICU 4.4.1. + * @stable ICU 4.0 +**/ + +class U_I18N_API DateIntervalInfo U_FINAL : public UObject { +public: + // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API + // or else the compiler will create a public default constructor. + /** + * Default constructor. + * It does not initialize any interval patterns except + * that it initialize default fall-back pattern as "{0} - {1}", + * which can be reset by setFallbackIntervalPattern(). + * It should be followed by setFallbackIntervalPattern() and + * setIntervalPattern(), + * and is recommended to be used only for power users who + * wants to create their own interval patterns and use them to create + * date interval formatter. + * @param status output param set to success/failure code on exit + * @internal ICU 4.0 + */ + DateIntervalInfo(UErrorCode& status); + + + /** + * Construct DateIntervalInfo for the given locale, + * @param locale the interval patterns are loaded from the appropriate calendar + * data (specified calendar or default calendar) in this locale. + * @param status output param set to success/failure code on exit + * @stable ICU 4.0 + */ + DateIntervalInfo(const Locale& locale, UErrorCode& status); + + + /** + * Copy constructor. + * @stable ICU 4.0 + */ + DateIntervalInfo(const DateIntervalInfo&); + + /** + * Assignment operator + * @stable ICU 4.0 + */ + DateIntervalInfo& operator=(const DateIntervalInfo&); + + /** + * Clone this object polymorphically. + * The caller owns the result and should delete it when done. + * @return a copy of the object + * @stable ICU 4.0 + */ + virtual DateIntervalInfo* clone(void) const; + + /** + * Destructor. + * It is virtual to be safe, but it is not designed to be subclassed. + * @stable ICU 4.0 + */ + virtual ~DateIntervalInfo(); + + + /** + * Return true if another object is semantically equal to this one. + * + * @param other the DateIntervalInfo object to be compared with. + * @return true if other is semantically equal to this. + * @stable ICU 4.0 + */ + virtual UBool operator==(const DateIntervalInfo& other) const; + + /** + * Return true if another object is semantically unequal to this one. + * + * @param other the DateIntervalInfo object to be compared with. + * @return true if other is semantically unequal to this. + * @stable ICU 4.0 + */ + UBool operator!=(const DateIntervalInfo& other) const; + + + + /** + * Provides a way for client to build interval patterns. + * User could construct DateIntervalInfo by providing a list of skeletons + * and their patterns. + * <P> + * For example: + * <pre> + * UErrorCode status = U_ZERO_ERROR; + * DateIntervalInfo dIntervalInfo = new DateIntervalInfo(); + * dIntervalInfo->setFallbackIntervalPattern("{0} ~ {1}"); + * dIntervalInfo->setIntervalPattern("yMd", UCAL_YEAR, "'from' yyyy-M-d 'to' yyyy-M-d", status); + * dIntervalInfo->setIntervalPattern("yMMMd", UCAL_MONTH, "'from' yyyy MMM d 'to' MMM d", status); + * dIntervalInfo->setIntervalPattern("yMMMd", UCAL_DAY, "yyyy MMM d-d", status, status); + * </pre> + * + * Restriction: + * Currently, users can only set interval patterns when the following + * calendar fields are different: ERA, YEAR, MONTH, DATE, DAY_OF_MONTH, + * DAY_OF_WEEK, AM_PM, HOUR, HOUR_OF_DAY, and MINUTE. + * Interval patterns when other calendar fields are different are + * not supported. + * + * @param skeleton the skeleton on which interval pattern based + * @param lrgDiffCalUnit the largest different calendar unit. + * @param intervalPattern the interval pattern on the largest different + * calendar unit. + * For example, if lrgDiffCalUnit is + * "year", the interval pattern for en_US when year + * is different could be "'from' yyyy 'to' yyyy". + * @param status output param set to success/failure code on exit + * @stable ICU 4.0 + */ + void setIntervalPattern(const UnicodeString& skeleton, + UCalendarDateFields lrgDiffCalUnit, + const UnicodeString& intervalPattern, + UErrorCode& status); + + /** + * Get the interval pattern given skeleton and + * the largest different calendar field. + * @param skeleton the skeleton + * @param field the largest different calendar field + * @param result output param to receive the pattern + * @param status output param set to success/failure code on exit + * @return a reference to 'result' + * @stable ICU 4.0 + */ + UnicodeString& getIntervalPattern(const UnicodeString& skeleton, + UCalendarDateFields field, + UnicodeString& result, + UErrorCode& status) const; + + /** + * Get the fallback interval pattern. + * @param result output param to receive the pattern + * @return a reference to 'result' + * @stable ICU 4.0 + */ + UnicodeString& getFallbackIntervalPattern(UnicodeString& result) const; + + + /** + * Re-set the fallback interval pattern. + * + * In construction, default fallback pattern is set as "{0} - {1}". + * And constructor taking locale as parameter will set the + * fallback pattern as what defined in the locale resource file. + * + * This method provides a way for user to replace the fallback pattern. + * + * @param fallbackPattern fall-back interval pattern. + * @param status output param set to success/failure code on exit + * @stable ICU 4.0 + */ + void setFallbackIntervalPattern(const UnicodeString& fallbackPattern, + UErrorCode& status); + + + /** Get default order -- whether the first date in pattern is later date + or not. + * return default date ordering in interval pattern. TRUE if the first date + * in pattern is later date, FALSE otherwise. + * @stable ICU 4.0 + */ + UBool getDefaultOrder() const; + + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 4.0 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 4.0 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + +private: + /** + * DateIntervalFormat will need access to + * getBestSkeleton(), parseSkeleton(), enum IntervalPatternIndex, + * and calendarFieldToPatternIndex(). + * + * Instead of making above public, + * make DateIntervalFormat a friend of DateIntervalInfo. + */ + friend class DateIntervalFormat; + + /** + * Internal struct used to load resource bundle data. + */ + struct DateIntervalSink; + + /** + * Following is for saving the interval patterns. + * We only support interval patterns on + * ERA, YEAR, MONTH, DAY, AM_PM, HOUR, and MINUTE + */ + enum IntervalPatternIndex + { + kIPI_ERA, + kIPI_YEAR, + kIPI_MONTH, + kIPI_DATE, + kIPI_AM_PM, + kIPI_HOUR, + kIPI_MINUTE, + kIPI_SECOND, + kIPI_MAX_INDEX + }; +public: +#ifndef U_HIDE_INTERNAL_API + /** + * Max index for stored interval patterns + * @internal ICU 4.4 + */ + enum { + kMaxIntervalPatternIndex = kIPI_MAX_INDEX + }; +#endif /* U_HIDE_INTERNAL_API */ +private: + + + /** + * Initialize the DateIntervalInfo from locale + * @param locale the given locale. + * @param status output param set to success/failure code on exit + */ + void initializeData(const Locale& locale, UErrorCode& status); + + + /* Set Interval pattern. + * + * It sets interval pattern into the hash map. + * + * @param skeleton skeleton on which the interval pattern based + * @param lrgDiffCalUnit the largest different calendar unit. + * @param intervalPattern the interval pattern on the largest different + * calendar unit. + * @param status output param set to success/failure code on exit + */ + void setIntervalPatternInternally(const UnicodeString& skeleton, + UCalendarDateFields lrgDiffCalUnit, + const UnicodeString& intervalPattern, + UErrorCode& status); + + + /**given an input skeleton, get the best match skeleton + * which has pre-defined interval pattern in resource file. + * Also return the difference between the input skeleton + * and the best match skeleton. + * + * TODO (xji): set field weight or + * isolate the funtionality in DateTimePatternGenerator + * @param skeleton input skeleton + * @param bestMatchDistanceInfo the difference between input skeleton + * and best match skeleton. + * 0, if there is exact match for input skeleton + * 1, if there is only field width difference between + * the best match and the input skeleton + * 2, the only field difference is 'v' and 'z' + * -1, if there is calendar field difference between + * the best match and the input skeleton + * @return best match skeleton + */ + const UnicodeString* getBestSkeleton(const UnicodeString& skeleton, + int8_t& bestMatchDistanceInfo) const; + + + /** + * Parse skeleton, save each field's width. + * It is used for looking for best match skeleton, + * and adjust pattern field width. + * @param skeleton skeleton to be parsed + * @param skeletonFieldWidth parsed skeleton field width + */ + static void U_EXPORT2 parseSkeleton(const UnicodeString& skeleton, + int32_t* skeletonFieldWidth); + + + /** + * Check whether one field width is numeric while the other is string. + * + * TODO (xji): make it general + * + * @param fieldWidth one field width + * @param anotherFieldWidth another field width + * @param patternLetter pattern letter char + * @return true if one field width is numeric and the other is string, + * false otherwise. + */ + static UBool U_EXPORT2 stringNumeric(int32_t fieldWidth, + int32_t anotherFieldWidth, + char patternLetter); + + + /** + * Convert calendar field to the interval pattern index in + * hash table. + * + * Since we only support the following calendar fields: + * ERA, YEAR, MONTH, DATE, DAY_OF_MONTH, DAY_OF_WEEK, + * AM_PM, HOUR, HOUR_OF_DAY, and MINUTE, + * We reserve only 4 interval patterns for a skeleton. + * + * @param field calendar field + * @param status output param set to success/failure code on exit + * @return interval pattern index in hash table + */ + static IntervalPatternIndex U_EXPORT2 calendarFieldToIntervalIndex( + UCalendarDateFields field, + UErrorCode& status); + + + /** + * delete hash table (of type fIntervalPatterns). + * + * @param hTable hash table to be deleted + */ + void deleteHash(Hashtable* hTable); + + + /** + * initialize hash table (of type fIntervalPatterns). + * + * @param status output param set to success/failure code on exit + * @return hash table initialized + */ + Hashtable* initHash(UErrorCode& status); + + + + /** + * copy hash table (of type fIntervalPatterns). + * + * @param source the source to copy from + * @param target the target to copy to + * @param status output param set to success/failure code on exit + */ + void copyHash(const Hashtable* source, Hashtable* target, UErrorCode& status); + + + // data members + // fallback interval pattern + UnicodeString fFallbackIntervalPattern; + // default order + UBool fFirstDateInPtnIsLaterDate; + + // HashMap<UnicodeString, UnicodeString[kIPI_MAX_INDEX]> + // HashMap( skeleton, pattern[largest_different_field] ) + Hashtable* fIntervalPatterns; + +};// end class DateIntervalInfo + + +inline UBool +DateIntervalInfo::operator!=(const DateIntervalInfo& other) const { + return !operator==(other); +} + + +U_NAMESPACE_END + +#endif + +#endif + diff --git a/intl/icu/source/i18n/unicode/dtptngen.h b/intl/icu/source/i18n/unicode/dtptngen.h new file mode 100644 index 000000000..fd617ce3c --- /dev/null +++ b/intl/icu/source/i18n/unicode/dtptngen.h @@ -0,0 +1,567 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File DTPTNGEN.H +* +******************************************************************************* +*/ + +#ifndef __DTPTNGEN_H__ +#define __DTPTNGEN_H__ + +#include "unicode/datefmt.h" +#include "unicode/locid.h" +#include "unicode/udat.h" +#include "unicode/udatpg.h" + +U_NAMESPACE_BEGIN + +/** + * \file + * \brief C++ API: Date/Time Pattern Generator + */ + + +class Hashtable; +class FormatParser; +class DateTimeMatcher; +class DistanceInfo; +class PatternMap; +class PtnSkeleton; +class SharedDateTimePatternGenerator; + +/** + * This class provides flexible generation of date format patterns, like "yy-MM-dd". + * The user can build up the generator by adding successive patterns. Once that + * is done, a query can be made using a "skeleton", which is a pattern which just + * includes the desired fields and lengths. The generator will return the "best fit" + * pattern corresponding to that skeleton. + * <p>The main method people will use is getBestPattern(String skeleton), + * since normally this class is pre-built with data from a particular locale. + * However, generators can be built directly from other data as well. + * <p><i>Issue: may be useful to also have a function that returns the list of + * fields in a pattern, in order, since we have that internally. + * That would be useful for getting the UI order of field elements.</i> + * @stable ICU 3.8 +**/ +class U_I18N_API DateTimePatternGenerator : public UObject { +public: + /** + * Construct a flexible generator according to default locale. + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @stable ICU 3.8 + */ + static DateTimePatternGenerator* U_EXPORT2 createInstance(UErrorCode& status); + + /** + * Construct a flexible generator according to data for a given locale. + * @param uLocale + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @stable ICU 3.8 + */ + static DateTimePatternGenerator* U_EXPORT2 createInstance(const Locale& uLocale, UErrorCode& status); + +#ifndef U_HIDE_INTERNAL_API + + /** + * For ICU use only + * + * @internal + */ + static DateTimePatternGenerator* U_EXPORT2 internalMakeInstance(const Locale& uLocale, UErrorCode& status); + +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Create an empty generator, to be constructed with addPattern(...) etc. + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @stable ICU 3.8 + */ + static DateTimePatternGenerator* U_EXPORT2 createEmptyInstance(UErrorCode& status); + + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~DateTimePatternGenerator(); + + /** + * Clone DateTimePatternGenerator object. Clients are responsible for + * deleting the DateTimePatternGenerator object cloned. + * @stable ICU 3.8 + */ + DateTimePatternGenerator* clone() const; + + /** + * Return true if another object is semantically equal to this one. + * + * @param other the DateTimePatternGenerator object to be compared with. + * @return true if other is semantically equal to this. + * @stable ICU 3.8 + */ + UBool operator==(const DateTimePatternGenerator& other) const; + + /** + * Return true if another object is semantically unequal to this one. + * + * @param other the DateTimePatternGenerator object to be compared with. + * @return true if other is semantically unequal to this. + * @stable ICU 3.8 + */ + UBool operator!=(const DateTimePatternGenerator& other) const; + + /** + * Utility to return a unique skeleton from a given pattern. For example, + * both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd". + * + * @param pattern Input pattern, such as "dd/MMM" + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return skeleton such as "MMMdd" + * @stable ICU 56 + */ + static UnicodeString staticGetSkeleton(const UnicodeString& pattern, UErrorCode& status); + + /** + * Utility to return a unique skeleton from a given pattern. For example, + * both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd". + * getSkeleton() works exactly like staticGetSkeleton(). + * Use staticGetSkeleton() instead of getSkeleton(). + * + * @param pattern Input pattern, such as "dd/MMM" + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return skeleton such as "MMMdd" + * @stable ICU 3.8 + */ + UnicodeString getSkeleton(const UnicodeString& pattern, UErrorCode& status); /* { + The function is commented out because it is a stable API calling a draft API. + After staticGetSkeleton becomes stable, staticGetSkeleton can be used and + these comments and the definition of getSkeleton in dtptngen.cpp should be removed. + return staticGetSkeleton(pattern, status); + }*/ + + /** + * Utility to return a unique base skeleton from a given pattern. This is + * the same as the skeleton, except that differences in length are minimized + * so as to only preserve the difference between string and numeric form. So + * for example, both "MMM-dd" and "d/MMM" produce the skeleton "MMMd" + * (notice the single d). + * + * @param pattern Input pattern, such as "dd/MMM" + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return base skeleton, such as "MMMd" + * @stable ICU 56 + */ + static UnicodeString staticGetBaseSkeleton(const UnicodeString& pattern, UErrorCode& status); + + /** + * Utility to return a unique base skeleton from a given pattern. This is + * the same as the skeleton, except that differences in length are minimized + * so as to only preserve the difference between string and numeric form. So + * for example, both "MMM-dd" and "d/MMM" produce the skeleton "MMMd" + * (notice the single d). + * getBaseSkeleton() works exactly like staticGetBaseSkeleton(). + * Use staticGetBaseSkeleton() instead of getBaseSkeleton(). + * + * @param pattern Input pattern, such as "dd/MMM" + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return base skeleton, such as "MMMd" + * @stable ICU 3.8 + */ + UnicodeString getBaseSkeleton(const UnicodeString& pattern, UErrorCode& status); /* { + The function is commented out because it is a stable API calling a draft API. + After staticGetBaseSkeleton becomes stable, staticGetBaseSkeleton can be used and + these comments and the definition of getBaseSkeleton in dtptngen.cpp should be removed. + return staticGetBaseSkeleton(pattern, status); + }*/ + + /** + * Adds a pattern to the generator. If the pattern has the same skeleton as + * an existing pattern, and the override parameter is set, then the previous + * value is overriden. Otherwise, the previous value is retained. In either + * case, the conflicting status is set and previous vale is stored in + * conflicting pattern. + * <p> + * Note that single-field patterns (like "MMM") are automatically added, and + * don't need to be added explicitly! + * + * @param pattern Input pattern, such as "dd/MMM" + * @param override When existing values are to be overridden use true, + * otherwise use false. + * @param conflictingPattern Previous pattern with the same skeleton. + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return conflicting status. The value could be UDATPG_NO_CONFLICT, + * UDATPG_BASE_CONFLICT or UDATPG_CONFLICT. + * @stable ICU 3.8 + * <p> + * <h4>Sample code</h4> + * \snippet samples/dtptngsample/dtptngsample.cpp getBestPatternExample1 + * \snippet samples/dtptngsample/dtptngsample.cpp addPatternExample + * <p> + */ + UDateTimePatternConflict addPattern(const UnicodeString& pattern, + UBool override, + UnicodeString& conflictingPattern, + UErrorCode& status); + + /** + * An AppendItem format is a pattern used to append a field if there is no + * good match. For example, suppose that the input skeleton is "GyyyyMMMd", + * and there is no matching pattern internally, but there is a pattern + * matching "yyyyMMMd", say "d-MM-yyyy". Then that pattern is used, plus the + * G. The way these two are conjoined is by using the AppendItemFormat for G + * (era). So if that value is, say "{0}, {1}" then the final resulting + * pattern is "d-MM-yyyy, G". + * <p> + * There are actually three available variables: {0} is the pattern so far, + * {1} is the element we are adding, and {2} is the name of the element. + * <p> + * This reflects the way that the CLDR data is organized. + * + * @param field such as UDATPG_ERA_FIELD. + * @param value pattern, such as "{0}, {1}" + * @stable ICU 3.8 + */ + void setAppendItemFormat(UDateTimePatternField field, const UnicodeString& value); + + /** + * Getter corresponding to setAppendItemFormat. Values below 0 or at or + * above UDATPG_FIELD_COUNT are illegal arguments. + * + * @param field such as UDATPG_ERA_FIELD. + * @return append pattern for field + * @stable ICU 3.8 + */ + const UnicodeString& getAppendItemFormat(UDateTimePatternField field) const; + + /** + * Sets the names of field, eg "era" in English for ERA. These are only + * used if the corresponding AppendItemFormat is used, and if it contains a + * {2} variable. + * <p> + * This reflects the way that the CLDR data is organized. + * + * @param field such as UDATPG_ERA_FIELD. + * @param value name of the field + * @stable ICU 3.8 + */ + void setAppendItemName(UDateTimePatternField field, const UnicodeString& value); + + /** + * Getter corresponding to setAppendItemNames. Values below 0 or at or above + * UDATPG_FIELD_COUNT are illegal arguments. + * + * @param field such as UDATPG_ERA_FIELD. + * @return name for field + * @stable ICU 3.8 + */ + const UnicodeString& getAppendItemName(UDateTimePatternField field) const; + + /** + * The DateTimeFormat is a message format pattern used to compose date and + * time patterns. The default pattern in the root locale is "{1} {0}", where + * {1} will be replaced by the date pattern and {0} will be replaced by the + * time pattern; however, other locales may specify patterns such as + * "{1}, {0}" or "{1} 'at' {0}", etc. + * <p> + * This is used when the input skeleton contains both date and time fields, + * but there is not a close match among the added patterns. For example, + * suppose that this object was created by adding "dd-MMM" and "hh:mm", and + * its datetimeFormat is the default "{1} {0}". Then if the input skeleton + * is "MMMdhmm", there is not an exact match, so the input skeleton is + * broken up into two components "MMMd" and "hmm". There are close matches + * for those two skeletons, so the result is put together with this pattern, + * resulting in "d-MMM h:mm". + * + * @param dateTimeFormat + * message format pattern, here {1} will be replaced by the date + * pattern and {0} will be replaced by the time pattern. + * @stable ICU 3.8 + */ + void setDateTimeFormat(const UnicodeString& dateTimeFormat); + + /** + * Getter corresponding to setDateTimeFormat. + * @return DateTimeFormat. + * @stable ICU 3.8 + */ + const UnicodeString& getDateTimeFormat() const; + + /** + * Return the best pattern matching the input skeleton. It is guaranteed to + * have all of the fields in the skeleton. + * + * @param skeleton + * The skeleton is a pattern containing only the variable fields. + * For example, "MMMdd" and "mmhh" are skeletons. + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return bestPattern + * The best pattern found from the given skeleton. + * @stable ICU 3.8 + * <p> + * <h4>Sample code</h4> + * \snippet samples/dtptngsample/dtptngsample.cpp getBestPatternExample1 + * \snippet samples/dtptngsample/dtptngsample.cpp getBestPatternExample + * <p> + */ + UnicodeString getBestPattern(const UnicodeString& skeleton, UErrorCode& status); + + + /** + * Return the best pattern matching the input skeleton. It is guaranteed to + * have all of the fields in the skeleton. + * + * @param skeleton + * The skeleton is a pattern containing only the variable fields. + * For example, "MMMdd" and "mmhh" are skeletons. + * @param options + * Options for forcing the length of specified fields in the + * returned pattern to match those in the skeleton (when this + * would not happen otherwise). For default behavior, use + * UDATPG_MATCH_NO_OPTIONS. + * @param status + * Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return bestPattern + * The best pattern found from the given skeleton. + * @stable ICU 4.4 + */ + UnicodeString getBestPattern(const UnicodeString& skeleton, + UDateTimePatternMatchOptions options, + UErrorCode& status); + + + /** + * Adjusts the field types (width and subtype) of a pattern to match what is + * in a skeleton. That is, if you supply a pattern like "d-M H:m", and a + * skeleton of "MMMMddhhmm", then the input pattern is adjusted to be + * "dd-MMMM hh:mm". This is used internally to get the best match for the + * input skeleton, but can also be used externally. + * + * @param pattern Input pattern + * @param skeleton + * The skeleton is a pattern containing only the variable fields. + * For example, "MMMdd" and "mmhh" are skeletons. + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return pattern adjusted to match the skeleton fields widths and subtypes. + * @stable ICU 3.8 + * <p> + * <h4>Sample code</h4> + * \snippet samples/dtptngsample/dtptngsample.cpp getBestPatternExample1 + * \snippet samples/dtptngsample/dtptngsample.cpp replaceFieldTypesExample + * <p> + */ + UnicodeString replaceFieldTypes(const UnicodeString& pattern, + const UnicodeString& skeleton, + UErrorCode& status); + + /** + * Adjusts the field types (width and subtype) of a pattern to match what is + * in a skeleton. That is, if you supply a pattern like "d-M H:m", and a + * skeleton of "MMMMddhhmm", then the input pattern is adjusted to be + * "dd-MMMM hh:mm". This is used internally to get the best match for the + * input skeleton, but can also be used externally. + * + * @param pattern Input pattern + * @param skeleton + * The skeleton is a pattern containing only the variable fields. + * For example, "MMMdd" and "mmhh" are skeletons. + * @param options + * Options controlling whether the length of specified fields in the + * pattern are adjusted to match those in the skeleton (when this + * would not happen otherwise). For default behavior, use + * UDATPG_MATCH_NO_OPTIONS. + * @param status + * Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return pattern adjusted to match the skeleton fields widths and subtypes. + * @stable ICU 4.4 + */ + UnicodeString replaceFieldTypes(const UnicodeString& pattern, + const UnicodeString& skeleton, + UDateTimePatternMatchOptions options, + UErrorCode& status); + + /** + * Return a list of all the skeletons (in canonical form) from this class. + * + * Call getPatternForSkeleton() to get the corresponding pattern. + * + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return StringEnumeration with the skeletons. + * The caller must delete the object. + * @stable ICU 3.8 + */ + StringEnumeration* getSkeletons(UErrorCode& status) const; + + /** + * Get the pattern corresponding to a given skeleton. + * @param skeleton + * @return pattern corresponding to a given skeleton. + * @stable ICU 3.8 + */ + const UnicodeString& getPatternForSkeleton(const UnicodeString& skeleton) const; + + /** + * Return a list of all the base skeletons (in canonical form) from this class. + * + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return a StringEnumeration with the base skeletons. + * The caller must delete the object. + * @stable ICU 3.8 + */ + StringEnumeration* getBaseSkeletons(UErrorCode& status) const; + +#ifndef U_HIDE_INTERNAL_API + /** + * Return a list of redundant patterns are those which if removed, make no + * difference in the resulting getBestPattern values. This method returns a + * list of them, to help check the consistency of the patterns used to build + * this generator. + * + * @param status Output param set to success/failure code on exit, + * which must not indicate a failure before the function call. + * @return a StringEnumeration with the redundant pattern. + * The caller must delete the object. + * @internal ICU 3.8 + */ + StringEnumeration* getRedundants(UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * The decimal value is used in formatting fractions of seconds. If the + * skeleton contains fractional seconds, then this is used with the + * fractional seconds. For example, suppose that the input pattern is + * "hhmmssSSSS", and the best matching pattern internally is "H:mm:ss", and + * the decimal string is ",". Then the resulting pattern is modified to be + * "H:mm:ss,SSSS" + * + * @param decimal + * @stable ICU 3.8 + */ + void setDecimal(const UnicodeString& decimal); + + /** + * Getter corresponding to setDecimal. + * @return UnicodeString corresponding to the decimal point + * @stable ICU 3.8 + */ + const UnicodeString& getDecimal() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 3.8 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 3.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + +private: + /** + * Constructor. + * @stable ICU 3.8 + */ + DateTimePatternGenerator(UErrorCode & status); + + /** + * Constructor. + * @stable ICU 3.8 + */ + DateTimePatternGenerator(const Locale& locale, UErrorCode & status); + + /** + * Copy constructor. + * @param other DateTimePatternGenerator to copy + * @stable ICU 3.8 + */ + DateTimePatternGenerator(const DateTimePatternGenerator& other); + + /** + * Default assignment operator. + * @param other DateTimePatternGenerator to copy + * @stable ICU 3.8 + */ + DateTimePatternGenerator& operator=(const DateTimePatternGenerator& other); + + Locale pLocale; // pattern locale + FormatParser *fp; + DateTimeMatcher* dtMatcher; + DistanceInfo *distanceInfo; + PatternMap *patternMap; + UnicodeString appendItemFormats[UDATPG_FIELD_COUNT]; + UnicodeString appendItemNames[UDATPG_FIELD_COUNT]; + UnicodeString dateTimeFormat; + UnicodeString decimal; + DateTimeMatcher *skipMatcher; + Hashtable *fAvailableFormatKeyHash; + UnicodeString emptyString; + UChar fDefaultHourFormatChar; + + int32_t fAllowedHourFormats[7]; // Actually an array of AllowedHourFormat enum type, ending with UNKNOWN. + + /* internal flags masks for adjustFieldTypes etc. */ + enum { + kDTPGNoFlags = 0, + kDTPGFixFractionalSeconds = 1, + kDTPGSkeletonUsesCapJ = 2, + kDTPGSkeletonUsesLowB = 3, + kDTPGSkeletonUsesCapB = 4 + }; + + void initData(const Locale &locale, UErrorCode &status); + void addCanonicalItems(UErrorCode &status); + void addICUPatterns(const Locale& locale, UErrorCode& status); + void hackTimes(const UnicodeString& hackPattern, UErrorCode& status); + void getCalendarTypeToUse(const Locale& locale, CharString& destination, UErrorCode& err); + void consumeShortTimePattern(const UnicodeString& shortTimePattern, UErrorCode& status); + void addCLDRData(const Locale& locale, UErrorCode& status); + UDateTimePatternConflict addPatternWithSkeleton(const UnicodeString& pattern, const UnicodeString * skeletonToUse, UBool override, UnicodeString& conflictingPattern, UErrorCode& status); + void initHashtable(UErrorCode& status); + void setDateTimeFromCalendar(const Locale& locale, UErrorCode& status); + void setDecimalSymbols(const Locale& locale, UErrorCode& status); + UDateTimePatternField getAppendFormatNumber(const char* field) const; + UDateTimePatternField getAppendNameNumber(const char* field) const; + UnicodeString& getMutableAppendItemName(UDateTimePatternField field); + void getAppendName(UDateTimePatternField field, UnicodeString& value); + int32_t getCanonicalIndex(const UnicodeString& field); + const UnicodeString* getBestRaw(DateTimeMatcher& source, int32_t includeMask, DistanceInfo* missingFields, const PtnSkeleton** specifiedSkeletonPtr = 0); + UnicodeString adjustFieldTypes(const UnicodeString& pattern, const PtnSkeleton* specifiedSkeleton, int32_t flags, UDateTimePatternMatchOptions options = UDATPG_MATCH_NO_OPTIONS); + UnicodeString getBestAppending(int32_t missingFields, int32_t flags, UDateTimePatternMatchOptions options = UDATPG_MATCH_NO_OPTIONS); + int32_t getTopBitNumber(int32_t foundMask); + void setAvailableFormat(const UnicodeString &key, UErrorCode& status); + UBool isAvailableFormatSet(const UnicodeString &key) const; + void copyHashtable(Hashtable *other, UErrorCode &status); + UBool isCanonicalItem(const UnicodeString& item) const; + static void U_CALLCONV loadAllowedHourFormatsData(UErrorCode &status); + void getAllowedHourFormats(const Locale &locale, UErrorCode &status); + + struct AppendItemFormatsSink; + struct AppendItemNamesSink; + struct AvailableFormatsSink; +} ;// end class DateTimePatternGenerator + +U_NAMESPACE_END + +#endif diff --git a/intl/icu/source/i18n/unicode/dtrule.h b/intl/icu/source/i18n/unicode/dtrule.h new file mode 100644 index 000000000..fa1294e95 --- /dev/null +++ b/intl/icu/source/i18n/unicode/dtrule.h @@ -0,0 +1,252 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +#ifndef DTRULE_H +#define DTRULE_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Rule for specifying date and time in an year + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN +/** + * <code>DateTimeRule</code> is a class representing a time in a year by + * a rule specified by month, day of month, day of week and + * time in the day. + * + * @stable ICU 3.8 + */ +class U_I18N_API DateTimeRule : public UObject { +public: + + /** + * Date rule type constants. + * @stable ICU 3.8 + */ + enum DateRuleType { + DOM = 0, /**< The exact day of month, + for example, March 11. */ + DOW, /**< The Nth occurence of the day of week, + for example, 2nd Sunday in March. */ + DOW_GEQ_DOM, /**< The first occurence of the day of week on or after the day of monnth, + for example, first Sunday on or after March 8. */ + DOW_LEQ_DOM /**< The last occurence of the day of week on or before the day of month, + for example, first Sunday on or before March 14. */ + }; + + /** + * Time rule type constants. + * @stable ICU 3.8 + */ + enum TimeRuleType { + WALL_TIME = 0, /**< The local wall clock time */ + STANDARD_TIME, /**< The local standard time */ + UTC_TIME /**< The UTC time */ + }; + + /** + * Constructs a <code>DateTimeRule</code> by the day of month and + * the time rule. The date rule type for an instance created by + * this constructor is <code>DOM</code>. + * + * @param month The rule month, for example, <code>Calendar::JANUARY</code> + * @param dayOfMonth The day of month, 1-based. + * @param millisInDay The milliseconds in the rule date. + * @param timeType The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code> + * or <code>UTC_TIME</code>. + * @stable ICU 3.8 + */ + DateTimeRule(int32_t month, int32_t dayOfMonth, + int32_t millisInDay, TimeRuleType timeType); + + /** + * Constructs a <code>DateTimeRule</code> by the day of week and its oridinal + * number and the time rule. The date rule type for an instance created + * by this constructor is <code>DOW</code>. + * + * @param month The rule month, for example, <code>Calendar::JANUARY</code>. + * @param weekInMonth The ordinal number of the day of week. Negative number + * may be used for specifying a rule date counted from the + * end of the rule month. + * @param dayOfWeek The day of week, for example, <code>Calendar::SUNDAY</code>. + * @param millisInDay The milliseconds in the rule date. + * @param timeType The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code> + * or <code>UTC_TIME</code>. + * @stable ICU 3.8 + */ + DateTimeRule(int32_t month, int32_t weekInMonth, int32_t dayOfWeek, + int32_t millisInDay, TimeRuleType timeType); + + /** + * Constructs a <code>DateTimeRule</code> by the first/last day of week + * on or after/before the day of month and the time rule. The date rule + * type for an instance created by this constructor is either + * <code>DOM_GEQ_DOM</code> or <code>DOM_LEQ_DOM</code>. + * + * @param month The rule month, for example, <code>Calendar::JANUARY</code> + * @param dayOfMonth The day of month, 1-based. + * @param dayOfWeek The day of week, for example, <code>Calendar::SUNDAY</code>. + * @param after true if the rule date is on or after the day of month. + * @param millisInDay The milliseconds in the rule date. + * @param timeType The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code> + * or <code>UTC_TIME</code>. + * @stable ICU 3.8 + */ + DateTimeRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, UBool after, + int32_t millisInDay, TimeRuleType timeType); + + /** + * Copy constructor. + * @param source The DateTimeRule object to be copied. + * @stable ICU 3.8 + */ + DateTimeRule(const DateTimeRule& source); + + /** + * Destructor. + * @stable ICU 3.8 + */ + ~DateTimeRule(); + + /** + * Clone this DateTimeRule object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @stable ICU 3.8 + */ + DateTimeRule* clone(void) const; + + /** + * Assignment operator. + * @param right The object to be copied. + * @stable ICU 3.8 + */ + DateTimeRule& operator=(const DateTimeRule& right); + + /** + * Return true if the given DateTimeRule objects are semantically equal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given DateTimeRule objects are semantically equal. + * @stable ICU 3.8 + */ + UBool operator==(const DateTimeRule& that) const; + + /** + * Return true if the given DateTimeRule objects are semantically unequal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given DateTimeRule objects are semantically unequal. + * @stable ICU 3.8 + */ + UBool operator!=(const DateTimeRule& that) const; + + /** + * Gets the date rule type, such as <code>DOM</code> + * @return The date rule type. + * @stable ICU 3.8 + */ + DateRuleType getDateRuleType(void) const; + + /** + * Gets the time rule type + * @return The time rule type, either <code>WALL_TIME</code> or <code>STANDARD_TIME</code> + * or <code>UTC_TIME</code>. + * @stable ICU 3.8 + */ + TimeRuleType getTimeRuleType(void) const; + + /** + * Gets the rule month. + * @return The rule month. + * @stable ICU 3.8 + */ + int32_t getRuleMonth(void) const; + + /** + * Gets the rule day of month. When the date rule type + * is <code>DOW</code>, the value is always 0. + * @return The rule day of month + * @stable ICU 3.8 + */ + int32_t getRuleDayOfMonth(void) const; + + /** + * Gets the rule day of week. When the date rule type + * is <code>DOM</code>, the value is always 0. + * @return The rule day of week. + * @stable ICU 3.8 + */ + int32_t getRuleDayOfWeek(void) const; + + /** + * Gets the ordinal number of the occurence of the day of week + * in the month. When the date rule type is not <code>DOW</code>, + * the value is always 0. + * @return The rule day of week ordinal number in the month. + * @stable ICU 3.8 + */ + int32_t getRuleWeekInMonth(void) const; + + /** + * Gets the rule time in the rule day. + * @return The time in the rule day in milliseconds. + * @stable ICU 3.8 + */ + int32_t getRuleMillisInDay(void) const; + +private: + int32_t fMonth; + int32_t fDayOfMonth; + int32_t fDayOfWeek; + int32_t fWeekInMonth; + int32_t fMillisInDay; + DateRuleType fDateRuleType; + TimeRuleType fTimeRuleType; + +public: + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 3.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 3.8 + */ + virtual UClassID getDynamicClassID(void) const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // DTRULE_H +//eof diff --git a/intl/icu/source/i18n/unicode/fieldpos.h b/intl/icu/source/i18n/unicode/fieldpos.h new file mode 100644 index 000000000..ad75b77f8 --- /dev/null +++ b/intl/icu/source/i18n/unicode/fieldpos.h @@ -0,0 +1,294 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************** + * Copyright (C) 1997-2006, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************** + * + * File FIELDPOS.H + * + * Modification History: + * + * Date Name Description + * 02/25/97 aliu Converted from java. + * 03/17/97 clhuang Updated per Format implementation. + * 07/17/98 stephen Added default/copy ctors, and operators =, ==, != + ******************************************************************************** + */ + +// ***************************************************************************** +// This file was generated from the java source file FieldPosition.java +// ***************************************************************************** + +#ifndef FIELDPOS_H +#define FIELDPOS_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: FieldPosition identifies the fields in a formatted output. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN + +/** + * <code>FieldPosition</code> is a simple class used by <code>Format</code> + * and its subclasses to identify fields in formatted output. Fields are + * identified by constants, whose names typically end with <code>_FIELD</code>, + * defined in the various subclasses of <code>Format</code>. See + * <code>ERA_FIELD</code> and its friends in <code>DateFormat</code> for + * an example. + * + * <p> + * <code>FieldPosition</code> keeps track of the position of the + * field within the formatted output with two indices: the index + * of the first character of the field and the index of the last + * character of the field. + * + * <p> + * One version of the <code>format</code> method in the various + * <code>Format</code> classes requires a <code>FieldPosition</code> + * object as an argument. You use this <code>format</code> method + * to perform partial formatting or to get information about the + * formatted output (such as the position of a field). + * + * The FieldPosition class is not intended for public subclassing. + * + * <p> + * Below is an example of using <code>FieldPosition</code> to aid + * alignment of an array of formatted floating-point numbers on + * their decimal points: + * <pre> + * \code + * double doubleNum[] = {123456789.0, -12345678.9, 1234567.89, -123456.789, + * 12345.6789, -1234.56789, 123.456789, -12.3456789, 1.23456789}; + * int dNumSize = (int)(sizeof(doubleNum)/sizeof(double)); + * + * UErrorCode status = U_ZERO_ERROR; + * DecimalFormat* fmt = (DecimalFormat*) NumberFormat::createInstance(status); + * fmt->setDecimalSeparatorAlwaysShown(true); + * + * const int tempLen = 20; + * char temp[tempLen]; + * + * for (int i=0; i<dNumSize; i++) { + * FieldPosition pos(NumberFormat::INTEGER_FIELD); + * UnicodeString buf; + * char fmtText[tempLen]; + * ToCharString(fmt->format(doubleNum[i], buf, pos), fmtText); + * for (int j=0; j<tempLen; j++) temp[j] = ' '; // clear with spaces + * temp[__min(tempLen, tempLen-pos.getEndIndex())] = '\0'; + * cout << temp << fmtText << endl; + * } + * delete fmt; + * \endcode + * </pre> + * <p> + * The code will generate the following output: + * <pre> + * \code + * 123,456,789.000 + * -12,345,678.900 + * 1,234,567.880 + * -123,456.789 + * 12,345.678 + * -1,234.567 + * 123.456 + * -12.345 + * 1.234 + * \endcode + * </pre> + */ +class U_I18N_API FieldPosition : public UObject { +public: + /** + * DONT_CARE may be specified as the field to indicate that the + * caller doesn't need to specify a field. + * @stable ICU 2.0 + */ + enum { DONT_CARE = -1 }; + + /** + * Creates a FieldPosition object with a non-specified field. + * @stable ICU 2.0 + */ + FieldPosition() + : UObject(), fField(DONT_CARE), fBeginIndex(0), fEndIndex(0) {} + + /** + * Creates a FieldPosition object for the given field. Fields are + * identified by constants, whose names typically end with _FIELD, + * in the various subclasses of Format. + * + * @see NumberFormat#INTEGER_FIELD + * @see NumberFormat#FRACTION_FIELD + * @see DateFormat#YEAR_FIELD + * @see DateFormat#MONTH_FIELD + * @stable ICU 2.0 + */ + FieldPosition(int32_t field) + : UObject(), fField(field), fBeginIndex(0), fEndIndex(0) {} + + /** + * Copy constructor + * @param copy the object to be copied from. + * @stable ICU 2.0 + */ + FieldPosition(const FieldPosition& copy) + : UObject(copy), fField(copy.fField), fBeginIndex(copy.fBeginIndex), fEndIndex(copy.fEndIndex) {} + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~FieldPosition(); + + /** + * Assignment operator + * @param copy the object to be copied from. + * @stable ICU 2.0 + */ + FieldPosition& operator=(const FieldPosition& copy); + + /** + * Equality operator. + * @param that the object to be compared with. + * @return TRUE if the two field positions are equal, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool operator==(const FieldPosition& that) const; + + /** + * Equality operator. + * @param that the object to be compared with. + * @return TRUE if the two field positions are not equal, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool operator!=(const FieldPosition& that) const; + + /** + * Clone this object. + * Clones can be used concurrently in multiple threads. + * If an error occurs, then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + FieldPosition *clone() const; + + /** + * Retrieve the field identifier. + * @return the field identifier. + * @stable ICU 2.0 + */ + int32_t getField(void) const { return fField; } + + /** + * Retrieve the index of the first character in the requested field. + * @return the index of the first character in the requested field. + * @stable ICU 2.0 + */ + int32_t getBeginIndex(void) const { return fBeginIndex; } + + /** + * Retrieve the index of the character following the last character in the + * requested field. + * @return the index of the character following the last character in the + * requested field. + * @stable ICU 2.0 + */ + int32_t getEndIndex(void) const { return fEndIndex; } + + /** + * Set the field. + * @param f the new value of the field. + * @stable ICU 2.0 + */ + void setField(int32_t f) { fField = f; } + + /** + * Set the begin index. For use by subclasses of Format. + * @param bi the new value of the begin index + * @stable ICU 2.0 + */ + void setBeginIndex(int32_t bi) { fBeginIndex = bi; } + + /** + * Set the end index. For use by subclasses of Format. + * @param ei the new value of the end index + * @stable ICU 2.0 + */ + void setEndIndex(int32_t ei) { fEndIndex = ei; } + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +private: + /** + * Input: Desired field to determine start and end offsets for. + * The meaning depends on the subclass of Format. + */ + int32_t fField; + + /** + * Output: Start offset of field in text. + * If the field does not occur in the text, 0 is returned. + */ + int32_t fBeginIndex; + + /** + * Output: End offset of field in text. + * If the field does not occur in the text, 0 is returned. + */ + int32_t fEndIndex; +}; + +inline FieldPosition& +FieldPosition::operator=(const FieldPosition& copy) +{ + fField = copy.fField; + fEndIndex = copy.fEndIndex; + fBeginIndex = copy.fBeginIndex; + return *this; +} + +inline UBool +FieldPosition::operator==(const FieldPosition& copy) const +{ + return (fField == copy.fField && + fEndIndex == copy.fEndIndex && + fBeginIndex == copy.fBeginIndex); +} + +inline UBool +FieldPosition::operator!=(const FieldPosition& copy) const +{ + return !operator==(copy); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _FIELDPOS +//eof diff --git a/intl/icu/source/i18n/unicode/fmtable.h b/intl/icu/source/i18n/unicode/fmtable.h new file mode 100644 index 000000000..ac5daba89 --- /dev/null +++ b/intl/icu/source/i18n/unicode/fmtable.h @@ -0,0 +1,762 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File FMTABLE.H +* +* Modification History: +* +* Date Name Description +* 02/29/97 aliu Creation. +******************************************************************************** +*/ +#ifndef FMTABLE_H +#define FMTABLE_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Formattable is a thin wrapper for primitive types used for formatting and parsing + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/unistr.h" +#include "unicode/stringpiece.h" +#include "unicode/uformattable.h" + +U_NAMESPACE_BEGIN + +class CharString; +class DigitList; + +/** + * \def UNUM_INTERNAL_STACKARRAY_SIZE + * @internal + */ +#if U_PLATFORM == U_PF_OS400 +#define UNUM_INTERNAL_STACKARRAY_SIZE 144 +#else +#define UNUM_INTERNAL_STACKARRAY_SIZE 128 +#endif + +/** + * Formattable objects can be passed to the Format class or + * its subclasses for formatting. Formattable is a thin wrapper + * class which interconverts between the primitive numeric types + * (double, long, etc.) as well as UDate and UnicodeString. + * + * <p>Internally, a Formattable object is a union of primitive types. + * As such, it can only store one flavor of data at a time. To + * determine what flavor of data it contains, use the getType method. + * + * <p>As of ICU 3.0, Formattable may also wrap a UObject pointer, + * which it owns. This allows an instance of any ICU class to be + * encapsulated in a Formattable. For legacy reasons and for + * efficiency, primitive numeric types are still stored directly + * within a Formattable. + * + * <p>The Formattable class is not suitable for subclassing. + * + * <p>See UFormattable for a C wrapper. + */ +class U_I18N_API Formattable : public UObject { +public: + /** + * This enum is only used to let callers distinguish between + * the Formattable(UDate) constructor and the Formattable(double) + * constructor; the compiler cannot distinguish the signatures, + * since UDate is currently typedefed to be either double or long. + * If UDate is changed later to be a bonafide class + * or struct, then we no longer need this enum. + * @stable ICU 2.4 + */ + enum ISDATE { kIsDate }; + + /** + * Default constructor + * @stable ICU 2.4 + */ + Formattable(); // Type kLong, value 0 + + /** + * Creates a Formattable object with a UDate instance. + * @param d the UDate instance. + * @param flag the flag to indicate this is a date. Always set it to kIsDate + * @stable ICU 2.0 + */ + Formattable(UDate d, ISDATE flag); + + /** + * Creates a Formattable object with a double number. + * @param d the double number. + * @stable ICU 2.0 + */ + Formattable(double d); + + /** + * Creates a Formattable object with a long number. + * @param l the long number. + * @stable ICU 2.0 + */ + Formattable(int32_t l); + + /** + * Creates a Formattable object with an int64_t number + * @param ll the int64_t number. + * @stable ICU 2.8 + */ + Formattable(int64_t ll); + +#if !UCONFIG_NO_CONVERSION + /** + * Creates a Formattable object with a char string pointer. + * Assumes that the char string is null terminated. + * @param strToCopy the char string. + * @stable ICU 2.0 + */ + Formattable(const char* strToCopy); +#endif + + /** + * Creates a Formattable object of an appropriate numeric type from a + * a decimal number in string form. The Formattable will retain the + * full precision of the input in decimal format, even when it exceeds + * what can be represented by a double or int64_t. + * + * @param number the unformatted (not localized) string representation + * of the Decimal number. + * @param status the error code. Possible errors include U_INVALID_FORMAT_ERROR + * if the format of the string does not conform to that of a + * decimal number. + * @stable ICU 4.4 + */ + Formattable(StringPiece number, UErrorCode &status); + + /** + * Creates a Formattable object with a UnicodeString object to copy from. + * @param strToCopy the UnicodeString string. + * @stable ICU 2.0 + */ + Formattable(const UnicodeString& strToCopy); + + /** + * Creates a Formattable object with a UnicodeString object to adopt from. + * @param strToAdopt the UnicodeString string. + * @stable ICU 2.0 + */ + Formattable(UnicodeString* strToAdopt); + + /** + * Creates a Formattable object with an array of Formattable objects. + * @param arrayToCopy the Formattable object array. + * @param count the array count. + * @stable ICU 2.0 + */ + Formattable(const Formattable* arrayToCopy, int32_t count); + + /** + * Creates a Formattable object that adopts the given UObject. + * @param objectToAdopt the UObject to set this object to + * @stable ICU 3.0 + */ + Formattable(UObject* objectToAdopt); + + /** + * Copy constructor. + * @stable ICU 2.0 + */ + Formattable(const Formattable&); + + /** + * Assignment operator. + * @param rhs The Formattable object to copy into this object. + * @stable ICU 2.0 + */ + Formattable& operator=(const Formattable &rhs); + + /** + * Equality comparison. + * @param other the object to be compared with. + * @return TRUE if other are equal to this, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool operator==(const Formattable &other) const; + + /** + * Equality operator. + * @param other the object to be compared with. + * @return TRUE if other are unequal to this, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool operator!=(const Formattable& other) const + { return !operator==(other); } + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~Formattable(); + + /** + * Clone this object. + * Clones can be used concurrently in multiple threads. + * If an error occurs, then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + Formattable *clone() const; + + /** + * Selector for flavor of data type contained within a + * Formattable object. Formattable is a union of several + * different types, and at any time contains exactly one type. + * @stable ICU 2.4 + */ + enum Type { + /** + * Selector indicating a UDate value. Use getDate to retrieve + * the value. + * @stable ICU 2.4 + */ + kDate, + + /** + * Selector indicating a double value. Use getDouble to + * retrieve the value. + * @stable ICU 2.4 + */ + kDouble, + + /** + * Selector indicating a 32-bit integer value. Use getLong to + * retrieve the value. + * @stable ICU 2.4 + */ + kLong, + + /** + * Selector indicating a UnicodeString value. Use getString + * to retrieve the value. + * @stable ICU 2.4 + */ + kString, + + /** + * Selector indicating an array of Formattables. Use getArray + * to retrieve the value. + * @stable ICU 2.4 + */ + kArray, + + /** + * Selector indicating a 64-bit integer value. Use getInt64 + * to retrieve the value. + * @stable ICU 2.8 + */ + kInt64, + + /** + * Selector indicating a UObject value. Use getObject to + * retrieve the value. + * @stable ICU 3.0 + */ + kObject + }; + + /** + * Gets the data type of this Formattable object. + * @return the data type of this Formattable object. + * @stable ICU 2.0 + */ + Type getType(void) const; + + /** + * Returns TRUE if the data type of this Formattable object + * is kDouble, kLong, or kInt64 + * @return TRUE if this is a pure numeric object + * @stable ICU 3.0 + */ + UBool isNumeric() const; + + /** + * Gets the double value of this object. If this object is not of type + * kDouble then the result is undefined. + * @return the double value of this object. + * @stable ICU 2.0 + */ + double getDouble(void) const { return fValue.fDouble; } + + /** + * Gets the double value of this object. If this object is of type + * long, int64 or Decimal Number then a conversion is peformed, with + * possible loss of precision. If the type is kObject and the + * object is a Measure, then the result of + * getNumber().getDouble(status) is returned. If this object is + * neither a numeric type nor a Measure, then 0 is returned and + * the status is set to U_INVALID_FORMAT_ERROR. + * @param status the error code + * @return the double value of this object. + * @stable ICU 3.0 + */ + double getDouble(UErrorCode& status) const; + + /** + * Gets the long value of this object. If this object is not of type + * kLong then the result is undefined. + * @return the long value of this object. + * @stable ICU 2.0 + */ + int32_t getLong(void) const { return (int32_t)fValue.fInt64; } + + /** + * Gets the long value of this object. If the magnitude is too + * large to fit in a long, then the maximum or minimum long value, + * as appropriate, is returned and the status is set to + * U_INVALID_FORMAT_ERROR. If this object is of type kInt64 and + * it fits within a long, then no precision is lost. If it is of + * type kDouble, then a conversion is peformed, with + * truncation of any fractional part. If the type is kObject and + * the object is a Measure, then the result of + * getNumber().getLong(status) is returned. If this object is + * neither a numeric type nor a Measure, then 0 is returned and + * the status is set to U_INVALID_FORMAT_ERROR. + * @param status the error code + * @return the long value of this object. + * @stable ICU 3.0 + */ + int32_t getLong(UErrorCode& status) const; + + /** + * Gets the int64 value of this object. If this object is not of type + * kInt64 then the result is undefined. + * @return the int64 value of this object. + * @stable ICU 2.8 + */ + int64_t getInt64(void) const { return fValue.fInt64; } + + /** + * Gets the int64 value of this object. If this object is of a numeric + * type and the magnitude is too large to fit in an int64, then + * the maximum or minimum int64 value, as appropriate, is returned + * and the status is set to U_INVALID_FORMAT_ERROR. If the + * magnitude fits in an int64, then a casting conversion is + * peformed, with truncation of any fractional part. If the type + * is kObject and the object is a Measure, then the result of + * getNumber().getDouble(status) is returned. If this object is + * neither a numeric type nor a Measure, then 0 is returned and + * the status is set to U_INVALID_FORMAT_ERROR. + * @param status the error code + * @return the int64 value of this object. + * @stable ICU 3.0 + */ + int64_t getInt64(UErrorCode& status) const; + + /** + * Gets the Date value of this object. If this object is not of type + * kDate then the result is undefined. + * @return the Date value of this object. + * @stable ICU 2.0 + */ + UDate getDate() const { return fValue.fDate; } + + /** + * Gets the Date value of this object. If the type is not a date, + * status is set to U_INVALID_FORMAT_ERROR and the return value is + * undefined. + * @param status the error code. + * @return the Date value of this object. + * @stable ICU 3.0 + */ + UDate getDate(UErrorCode& status) const; + + /** + * Gets the string value of this object. If this object is not of type + * kString then the result is undefined. + * @param result Output param to receive the Date value of this object. + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + UnicodeString& getString(UnicodeString& result) const + { result=*fValue.fString; return result; } + + /** + * Gets the string value of this object. If the type is not a + * string, status is set to U_INVALID_FORMAT_ERROR and a bogus + * string is returned. + * @param result Output param to receive the Date value of this object. + * @param status the error code. + * @return A reference to 'result'. + * @stable ICU 3.0 + */ + UnicodeString& getString(UnicodeString& result, UErrorCode& status) const; + + /** + * Gets a const reference to the string value of this object. If + * this object is not of type kString then the result is + * undefined. + * @return a const reference to the string value of this object. + * @stable ICU 2.0 + */ + inline const UnicodeString& getString(void) const; + + /** + * Gets a const reference to the string value of this object. If + * the type is not a string, status is set to + * U_INVALID_FORMAT_ERROR and the result is a bogus string. + * @param status the error code. + * @return a const reference to the string value of this object. + * @stable ICU 3.0 + */ + const UnicodeString& getString(UErrorCode& status) const; + + /** + * Gets a reference to the string value of this object. If this + * object is not of type kString then the result is undefined. + * @return a reference to the string value of this object. + * @stable ICU 2.0 + */ + inline UnicodeString& getString(void); + + /** + * Gets a reference to the string value of this object. If the + * type is not a string, status is set to U_INVALID_FORMAT_ERROR + * and the result is a bogus string. + * @param status the error code. + * @return a reference to the string value of this object. + * @stable ICU 3.0 + */ + UnicodeString& getString(UErrorCode& status); + + /** + * Gets the array value and count of this object. If this object + * is not of type kArray then the result is undefined. + * @param count fill-in with the count of this object. + * @return the array value of this object. + * @stable ICU 2.0 + */ + const Formattable* getArray(int32_t& count) const + { count=fValue.fArrayAndCount.fCount; return fValue.fArrayAndCount.fArray; } + + /** + * Gets the array value and count of this object. If the type is + * not an array, status is set to U_INVALID_FORMAT_ERROR, count is + * set to 0, and the result is NULL. + * @param count fill-in with the count of this object. + * @param status the error code. + * @return the array value of this object. + * @stable ICU 3.0 + */ + const Formattable* getArray(int32_t& count, UErrorCode& status) const; + + /** + * Accesses the specified element in the array value of this + * Formattable object. If this object is not of type kArray then + * the result is undefined. + * @param index the specified index. + * @return the accessed element in the array. + * @stable ICU 2.0 + */ + Formattable& operator[](int32_t index) { return fValue.fArrayAndCount.fArray[index]; } + + /** + * Returns a pointer to the UObject contained within this + * formattable, or NULL if this object does not contain a UObject. + * @return a UObject pointer, or NULL + * @stable ICU 3.0 + */ + const UObject* getObject() const; + + /** + * Returns a numeric string representation of the number contained within this + * formattable, or NULL if this object does not contain numeric type. + * For values obtained by parsing, the returned decimal number retains + * the full precision and range of the original input, unconstrained by + * the limits of a double floating point or a 64 bit int. + * + * This function is not thread safe, and therfore is not declared const, + * even though it is logically const. + * + * Possible errors include U_MEMORY_ALLOCATION_ERROR, and + * U_INVALID_STATE if the formattable object has not been set to + * a numeric type. + * + * @param status the error code. + * @return the unformatted string representation of a number. + * @stable ICU 4.4 + */ + StringPiece getDecimalNumber(UErrorCode &status); + + /** + * Sets the double value of this object and changes the type to + * kDouble. + * @param d the new double value to be set. + * @stable ICU 2.0 + */ + void setDouble(double d); + + /** + * Sets the long value of this object and changes the type to + * kLong. + * @param l the new long value to be set. + * @stable ICU 2.0 + */ + void setLong(int32_t l); + + /** + * Sets the int64 value of this object and changes the type to + * kInt64. + * @param ll the new int64 value to be set. + * @stable ICU 2.8 + */ + void setInt64(int64_t ll); + + /** + * Sets the Date value of this object and changes the type to + * kDate. + * @param d the new Date value to be set. + * @stable ICU 2.0 + */ + void setDate(UDate d); + + /** + * Sets the string value of this object and changes the type to + * kString. + * @param stringToCopy the new string value to be set. + * @stable ICU 2.0 + */ + void setString(const UnicodeString& stringToCopy); + + /** + * Sets the array value and count of this object and changes the + * type to kArray. + * @param array the array value. + * @param count the number of array elements to be copied. + * @stable ICU 2.0 + */ + void setArray(const Formattable* array, int32_t count); + + /** + * Sets and adopts the string value and count of this object and + * changes the type to kArray. + * @param stringToAdopt the new string value to be adopted. + * @stable ICU 2.0 + */ + void adoptString(UnicodeString* stringToAdopt); + + /** + * Sets and adopts the array value and count of this object and + * changes the type to kArray. + * @stable ICU 2.0 + */ + void adoptArray(Formattable* array, int32_t count); + + /** + * Sets and adopts the UObject value of this object and changes + * the type to kObject. After this call, the caller must not + * delete the given object. + * @param objectToAdopt the UObject value to be adopted + * @stable ICU 3.0 + */ + void adoptObject(UObject* objectToAdopt); + + /** + * Sets the the numeric value from a decimal number string, and changes + * the type to to a numeric type appropriate for the number. + * The syntax of the number is a "numeric string" + * as defined in the Decimal Arithmetic Specification, available at + * http://speleotrove.com/decimal + * The full precision and range of the input number will be retained, + * even when it exceeds what can be represented by a double or an int64. + * + * @param numberString a string representation of the unformatted decimal number. + * @param status the error code. Set to U_INVALID_FORMAT_ERROR if the + * incoming string is not a valid decimal number. + * @stable ICU 4.4 + */ + void setDecimalNumber(StringPiece numberString, + UErrorCode &status); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * Convert the UFormattable to a Formattable. Internally, this is a reinterpret_cast. + * @param fmt a valid UFormattable + * @return the UFormattable as a Formattable object pointer. This is an alias to the original + * UFormattable, and so is only valid while the original argument remains in scope. + * @stable ICU 52 + */ + static inline Formattable *fromUFormattable(UFormattable *fmt); + + /** + * Convert the const UFormattable to a const Formattable. Internally, this is a reinterpret_cast. + * @param fmt a valid UFormattable + * @return the UFormattable as a Formattable object pointer. This is an alias to the original + * UFormattable, and so is only valid while the original argument remains in scope. + * @stable ICU 52 + */ + static inline const Formattable *fromUFormattable(const UFormattable *fmt); + + /** + * Convert this object pointer to a UFormattable. + * @return this object as a UFormattable pointer. This is an alias to this object, + * and so is only valid while this object remains in scope. + * @stable ICU 52 + */ + inline UFormattable *toUFormattable(); + + /** + * Convert this object pointer to a UFormattable. + * @return this object as a UFormattable pointer. This is an alias to this object, + * and so is only valid while this object remains in scope. + * @stable ICU 52 + */ + inline const UFormattable *toUFormattable() const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Deprecated variant of getLong(UErrorCode&). + * @param status the error code + * @return the long value of this object. + * @deprecated ICU 3.0 use getLong(UErrorCode&) instead + */ + inline int32_t getLong(UErrorCode* status) const; +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_HIDE_INTERNAL_API + /** + * Internal function, do not use. + * TODO: figure out how to make this be non-public. + * NumberFormat::format(Formattable, ... + * needs to get at the DigitList, if it exists, for + * big decimal formatting. + * @internal + */ + DigitList *getDigitList() const { return fDecimalNum;} + + /** + * @internal + */ + DigitList *getInternalDigitList(); + + /** + * Adopt, and set value from, a DigitList + * Internal Function, do not use. + * @param dl the Digit List to be adopted + * @internal + */ + void adoptDigitList(DigitList *dl); + + /** + * Internal function to return the CharString pointer. + * @param status error code + * @return pointer to the CharString - may become invalid if the object is modified + * @internal + */ + CharString *internalGetCharString(UErrorCode &status); + +#endif /* U_HIDE_INTERNAL_API */ + +private: + /** + * Cleans up the memory for unwanted values. For example, the adopted + * string or array objects. + */ + void dispose(void); + + /** + * Common initialization, for use by constructors. + */ + void init(); + + UnicodeString* getBogus() const; + + union { + UObject* fObject; + UnicodeString* fString; + double fDouble; + int64_t fInt64; + UDate fDate; + struct { + Formattable* fArray; + int32_t fCount; + } fArrayAndCount; + } fValue; + + CharString *fDecimalStr; + + DigitList *fDecimalNum; + + char fStackData[UNUM_INTERNAL_STACKARRAY_SIZE]; // must be big enough for DigitList + + Type fType; + UnicodeString fBogus; // Bogus string when it's needed. +}; + +inline UDate Formattable::getDate(UErrorCode& status) const { + if (fType != kDate) { + if (U_SUCCESS(status)) { + status = U_INVALID_FORMAT_ERROR; + } + return 0; + } + return fValue.fDate; +} + +inline const UnicodeString& Formattable::getString(void) const { + return *fValue.fString; +} + +inline UnicodeString& Formattable::getString(void) { + return *fValue.fString; +} + +#ifndef U_HIDE_DEPRECATED_API +inline int32_t Formattable::getLong(UErrorCode* status) const { + return getLong(*status); +} +#endif /* U_HIDE_DEPRECATED_API */ + +inline UFormattable* Formattable::toUFormattable() { + return reinterpret_cast<UFormattable*>(this); +} + +inline const UFormattable* Formattable::toUFormattable() const { + return reinterpret_cast<const UFormattable*>(this); +} + +inline Formattable* Formattable::fromUFormattable(UFormattable *fmt) { + return reinterpret_cast<Formattable *>(fmt); +} + +inline const Formattable* Formattable::fromUFormattable(const UFormattable *fmt) { + return reinterpret_cast<const Formattable *>(fmt); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif //_FMTABLE +//eof diff --git a/intl/icu/source/i18n/unicode/format.h b/intl/icu/source/i18n/unicode/format.h new file mode 100644 index 000000000..bce5251cb --- /dev/null +++ b/intl/icu/source/i18n/unicode/format.h @@ -0,0 +1,307 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2011, International Business Machines Corporation and others. +* All Rights Reserved. +******************************************************************************** +* +* File FORMAT.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 03/17/97 clhuang Updated per C++ implementation. +* 03/27/97 helena Updated to pass the simple test after code review. +******************************************************************************** +*/ +// ***************************************************************************** +// This file was generated from the java source file Format.java +// ***************************************************************************** + +#ifndef FORMAT_H +#define FORMAT_H + + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Base class for all formats. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/unistr.h" +#include "unicode/fmtable.h" +#include "unicode/fieldpos.h" +#include "unicode/fpositer.h" +#include "unicode/parsepos.h" +#include "unicode/parseerr.h" +#include "unicode/locid.h" + +U_NAMESPACE_BEGIN + +/** + * Base class for all formats. This is an abstract base class which + * specifies the protocol for classes which convert other objects or + * values, such as numeric values and dates, and their string + * representations. In some cases these representations may be + * localized or contain localized characters or strings. For example, + * a numeric formatter such as DecimalFormat may convert a numeric + * value such as 12345 to the string "$12,345". It may also parse + * the string back into a numeric value. A date and time formatter + * like SimpleDateFormat may represent a specific date, encoded + * numerically, as a string such as "Wednesday, February 26, 1997 AD". + * <P> + * Many of the concrete subclasses of Format employ the notion of + * a pattern. A pattern is a string representation of the rules which + * govern the interconversion between values and strings. For example, + * a DecimalFormat object may be associated with the pattern + * "$#,##0.00;($#,##0.00)", which is a common US English format for + * currency values, yielding strings such as "$1,234.45" for 1234.45, + * and "($987.65)" for 987.6543. The specific syntax of a pattern + * is defined by each subclass. + * <P> + * Even though many subclasses use patterns, the notion of a pattern + * is not inherent to Format classes in general, and is not part of + * the explicit base class protocol. + * <P> + * Two complex formatting classes bear mentioning. These are + * MessageFormat and ChoiceFormat. ChoiceFormat is a subclass of + * NumberFormat which allows the user to format different number ranges + * as strings. For instance, 0 may be represented as "no files", 1 as + * "one file", and any number greater than 1 as "many files". + * MessageFormat is a formatter which utilizes other Format objects to + * format a string containing with multiple values. For instance, + * A MessageFormat object might produce the string "There are no files + * on the disk MyDisk on February 27, 1997." given the arguments 0, + * "MyDisk", and the date value of 2/27/97. See the ChoiceFormat + * and MessageFormat headers for further information. + * <P> + * If formatting is unsuccessful, a failing UErrorCode is returned when + * the Format cannot format the type of object, otherwise if there is + * something illformed about the the Unicode replacement character + * 0xFFFD is returned. + * <P> + * If there is no match when parsing, a parse failure UErrorCode is + * retured for methods which take no ParsePosition. For the method + * that takes a ParsePosition, the index parameter is left unchanged. + * <P> + * <em>User subclasses are not supported.</em> While clients may write + * subclasses, such code will not necessarily work and will not be + * guaranteed to work stably from release to release. + */ +class U_I18N_API Format : public UObject { +public: + + /** Destructor + * @stable ICU 2.4 + */ + virtual ~Format(); + + /** + * Return true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * @param other the object to be compared with. + * @return Return true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Format& other) const = 0; + + /** + * Return true if the given Format objects are not semantically + * equal. + * @param other the object to be compared with. + * @return Return true if the given Format objects are not semantically. + * @stable ICU 2.0 + */ + UBool operator!=(const Format& other) const { return !operator==(other); } + + /** + * Clone this object polymorphically. The caller is responsible + * for deleting the result when done. + * @return A copy of the object + * @stable ICU 2.0 + */ + virtual Format* clone() const = 0; + + /** + * Formats an object to produce a string. + * + * @param obj The object to format. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param status Output parameter filled in with success or failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + UErrorCode& status) const; + + /** + * Format an object to produce a string. This is a pure virtual method which + * subclasses must implement. This method allows polymorphic formatting + * of Formattable objects. If a subclass of Format receives a Formattable + * object type it doesn't handle (e.g., if a numeric Formattable is passed + * to a DateFormat object) then it returns a failing UErrorCode. + * + * @param obj The object to format. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const = 0; + /** + * Format an object to produce a string. Subclasses should override this + * method. This method allows polymorphic formatting of Formattable objects. + * If a subclass of Format receives a Formattable object type it doesn't + * handle (e.g., if a numeric Formattable is passed to a DateFormat object) + * then it returns a failing UErrorCode. + * + * @param obj The object to format. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Parse a string to produce an object. This is a pure virtual + * method which subclasses must implement. This method allows + * polymorphic parsing of strings into Formattable objects. + * <P> + * Before calling, set parse_pos.index to the offset you want to + * start parsing at in the source. After calling, parse_pos.index + * is the end of the text you parsed. If error occurs, index is + * unchanged. + * <P> + * When parsing, leading whitespace is discarded (with successful + * parse), while trailing whitespace is left as is. + * <P> + * Example: + * <P> + * Parsing "_12_xy" (where _ represents a space) for a number, + * with index == 0 will result in the number 12, with + * parse_pos.index updated to 3 (just before the second space). + * Parsing a second time will result in a failing UErrorCode since + * "xy" is not a number, and leave index at 3. + * <P> + * Subclasses will typically supply specific parse methods that + * return different types of values. Since methods can't overload + * on return types, these will typically be named "parse", while + * this polymorphic method will always be called parseObject. Any + * parse method that does not take a parse_pos should set status + * to an error value when no text in the required format is at the + * start position. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Upon return + * this param is set to the position after the + * last character successfully parsed. If the + * source is not parsed successfully, this param + * will remain unchanged. + * @stable ICU 2.0 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& parse_pos) const = 0; + + /** + * Parses a string to produce an object. This is a convenience method + * which calls the pure virtual parseObject() method, and returns a + * failure UErrorCode if the ParsePosition indicates failure. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param status Output param to be filled with success/failure + * result code. + * @stable ICU 2.0 + */ + void parseObject(const UnicodeString& source, + Formattable& result, + UErrorCode& status) const; + + /** Get the locale for this format object. You can choose between valid and actual locale. + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale + * @stable ICU 2.8 + */ + Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + +#ifndef U_HIDE_INTERNAL_API + /** Get the locale for this format object. You can choose between valid and actual locale. + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale + * @internal + */ + const char* getLocaleID(ULocDataLocaleType type, UErrorCode &status) const; +#endif /* U_HIDE_INTERNAL_API */ + + protected: + /** @stable ICU 2.8 */ + void setLocaleIDs(const char* valid, const char* actual); + +protected: + /** + * Default constructor for subclass use only. Does nothing. + * @stable ICU 2.0 + */ + Format(); + + /** + * @stable ICU 2.0 + */ + Format(const Format&); // Does nothing; for subclasses only + + /** + * @stable ICU 2.0 + */ + Format& operator=(const Format&); // Does nothing; for subclasses + + + /** + * Simple function for initializing a UParseError from a UnicodeString. + * + * @param pattern The pattern to copy into the parseError + * @param pos The position in pattern where the error occured + * @param parseError The UParseError object to fill in + * @stable ICU 2.4 + */ + static void syntaxError(const UnicodeString& pattern, + int32_t pos, + UParseError& parseError); + + private: + char actualLocale[ULOC_FULLNAME_CAPACITY]; + char validLocale[ULOC_FULLNAME_CAPACITY]; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _FORMAT +//eof diff --git a/intl/icu/source/i18n/unicode/fpositer.h b/intl/icu/source/i18n/unicode/fpositer.h new file mode 100644 index 000000000..59c36380d --- /dev/null +++ b/intl/icu/source/i18n/unicode/fpositer.h @@ -0,0 +1,119 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File attiter.h +* +* Modification History: +* +* Date Name Description +* 12/15/2009 dougfelt Created +******************************************************************************** +*/ + +#ifndef FPOSITER_H +#define FPOSITER_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: FieldPosition Iterator. + */ + +#if UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + +/* + * Allow the declaration of APIs with pointers to FieldPositionIterator + * even when formatting is removed from the build. + */ +class FieldPositionIterator; + +U_NAMESPACE_END + +#else + +#include "unicode/fieldpos.h" +#include "unicode/umisc.h" + +U_NAMESPACE_BEGIN + +class UVector32; + +/** + * FieldPositionIterator returns the field ids and their start/limit positions generated + * by a call to Format::format. See Format, NumberFormat, DecimalFormat. + * @stable ICU 4.4 + */ +class U_I18N_API FieldPositionIterator : public UObject { +public: + /** + * Destructor. + * @stable ICU 4.4 + */ + ~FieldPositionIterator(); + + /** + * Constructs a new, empty iterator. + * @stable ICU 4.4 + */ + FieldPositionIterator(void); + + /** + * Copy constructor. If the copy failed for some reason, the new iterator will + * be empty. + * @stable ICU 4.4 + */ + FieldPositionIterator(const FieldPositionIterator&); + + /** + * Return true if another object is semantically equal to this + * one. + * <p> + * Return true if this FieldPositionIterator is at the same position in an + * equal array of run values. + * @stable ICU 4.4 + */ + UBool operator==(const FieldPositionIterator&) const; + + /** + * Returns the complement of the result of operator== + * @param rhs The FieldPositionIterator to be compared for inequality + * @return the complement of the result of operator== + * @stable ICU 4.4 + */ + UBool operator!=(const FieldPositionIterator& rhs) const { return !operator==(rhs); } + + /** + * If the current position is valid, updates the FieldPosition values, advances the iterator, + * and returns TRUE, otherwise returns FALSE. + * @stable ICU 4.4 + */ + UBool next(FieldPosition& fp); + +private: + friend class FieldPositionIteratorHandler; + + /** + * Sets the data used by the iterator, and resets the position. + * Returns U_ILLEGAL_ARGUMENT_ERROR in status if the data is not valid + * (length is not a multiple of 3, or start >= limit for any run). + */ + void setData(UVector32 *adopt, UErrorCode& status); + + UVector32 *data; + int32_t pos; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // FPOSITER_H diff --git a/intl/icu/source/i18n/unicode/gender.h b/intl/icu/source/i18n/unicode/gender.h new file mode 100644 index 000000000..943d44544 --- /dev/null +++ b/intl/icu/source/i18n/unicode/gender.h @@ -0,0 +1,113 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2008-2013, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* +* File GENDER.H +* +* Modification History:* +* Date Name Description +* +******************************************************************************** +*/ + +#ifndef _GENDER +#define _GENDER + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/locid.h" +#include "unicode/ugender.h" +#include "unicode/uobject.h" + +class GenderInfoTest; + +U_NAMESPACE_BEGIN + +// Forward Declaration +void U_CALLCONV GenderInfo_initCache(UErrorCode &status); + +/** + * GenderInfo computes the gender of a list as a whole given the gender of + * each element. + * @stable ICU 50 + */ +class U_I18N_API GenderInfo : public UObject { +public: + + /** + * Provides access to the predefined GenderInfo object for a given + * locale. + * + * @param locale The locale for which a <code>GenderInfo</code> object is + * returned. + * @param status Output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return The predefined <code>GenderInfo</code> object pointer for + * this locale. The returned object is immutable, so it is + * declared as const. Caller does not own the returned + * pointer, so it must not attempt to free it. + * @stable ICU 50 + */ + static const GenderInfo* U_EXPORT2 getInstance(const Locale& locale, UErrorCode& status); + + /** + * Determines the gender of a list as a whole given the gender of each + * of the elements. + * + * @param genders the gender of each element in the list. + * @param length the length of gender array. + * @param status Output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return the gender of the whole list. + * @stable ICU 50 + */ + UGender getListGender(const UGender* genders, int32_t length, UErrorCode& status) const; + + /** + * Destructor. + * + * @stable ICU 50 + */ + virtual ~GenderInfo(); + +private: + int32_t _style; + + /** + * Copy constructor. One object per locale invariant. Clients + * must never copy GenderInfo objects. + */ + GenderInfo(const GenderInfo& other); + + /** + * Assignment operator. Not applicable to immutable objects. + */ + GenderInfo& operator=(const GenderInfo&); + + GenderInfo(); + + static const GenderInfo* getNeutralInstance(); + + static const GenderInfo* getMixedNeutralInstance(); + + static const GenderInfo* getMaleTaintsInstance(); + + static const GenderInfo* loadInstance(const Locale& locale, UErrorCode& status); + + friend class ::GenderInfoTest; + friend void U_CALLCONV GenderInfo_initCache(UErrorCode &status); +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _GENDER +//eof diff --git a/intl/icu/source/i18n/unicode/gregocal.h b/intl/icu/source/i18n/unicode/gregocal.h new file mode 100644 index 000000000..756bba1b4 --- /dev/null +++ b/intl/icu/source/i18n/unicode/gregocal.h @@ -0,0 +1,779 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +* Copyright (C) 1997-2013, International Business Machines Corporation and others. +* All Rights Reserved. +******************************************************************************** +* +* File GREGOCAL.H +* +* Modification History: +* +* Date Name Description +* 04/22/97 aliu Overhauled header. +* 07/28/98 stephen Sync with JDK 1.2 +* 09/04/98 stephen Re-sync with JDK 8/31 putback +* 09/14/98 stephen Changed type of kOneDay, kOneWeek to double. +* Fixed bug in roll() +* 10/15/99 aliu Fixed j31, incorrect WEEK_OF_YEAR computation. +* Added documentation of WEEK_OF_YEAR computation. +* 10/15/99 aliu Fixed j32, cannot set date to Feb 29 2000 AD. +* {JDK bug 4210209 4209272} +* 11/07/2003 srl Update, clean up documentation. +******************************************************************************** +*/ + +#ifndef GREGOCAL_H +#define GREGOCAL_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/calendar.h" + +/** + * \file + * \brief C++ API: Concrete class which provides the standard calendar. + */ + +U_NAMESPACE_BEGIN + +/** + * Concrete class which provides the standard calendar used by most of the world. + * <P> + * The standard (Gregorian) calendar has 2 eras, BC and AD. + * <P> + * This implementation handles a single discontinuity, which corresponds by default to + * the date the Gregorian calendar was originally instituted (October 15, 1582). Not all + * countries adopted the Gregorian calendar then, so this cutover date may be changed by + * the caller. + * <P> + * Prior to the institution of the Gregorian Calendar, New Year's Day was March 25. To + * avoid confusion, this Calendar always uses January 1. A manual adjustment may be made + * if desired for dates that are prior to the Gregorian changeover and which fall + * between January 1 and March 24. + * + * <p>Values calculated for the <code>WEEK_OF_YEAR</code> field range from 1 to + * 53. Week 1 for a year is the first week that contains at least + * <code>getMinimalDaysInFirstWeek()</code> days from that year. It thus + * depends on the values of <code>getMinimalDaysInFirstWeek()</code>, + * <code>getFirstDayOfWeek()</code>, and the day of the week of January 1. + * Weeks between week 1 of one year and week 1 of the following year are + * numbered sequentially from 2 to 52 or 53 (as needed). + * + * <p>For example, January 1, 1998 was a Thursday. If + * <code>getFirstDayOfWeek()</code> is <code>MONDAY</code> and + * <code>getMinimalDaysInFirstWeek()</code> is 4 (these are the values + * reflecting ISO 8601 and many national standards), then week 1 of 1998 starts + * on December 29, 1997, and ends on January 4, 1998. If, however, + * <code>getFirstDayOfWeek()</code> is <code>SUNDAY</code>, then week 1 of 1998 + * starts on January 4, 1998, and ends on January 10, 1998; the first three days + * of 1998 then are part of week 53 of 1997. + * + * <p>Example for using GregorianCalendar: + * <pre> + * \code + * // get the supported ids for GMT-08:00 (Pacific Standard Time) + * UErrorCode success = U_ZERO_ERROR; + * const StringEnumeration *ids = TimeZone::createEnumeration(-8 * 60 * 60 * 1000); + * // if no ids were returned, something is wrong. get out. + * if (ids == 0 || ids->count(success) == 0) { + * return; + * } + * + * // begin output + * cout << "Current Time" << endl; + * + * // create a Pacific Standard Time time zone + * SimpleTimeZone* pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, ids->unext(NULL, success))); + * + * // set up rules for daylight savings time + * pdt->setStartRule(UCAL_MARCH, 1, UCAL_SUNDAY, 2 * 60 * 60 * 1000); + * pdt->setEndRule(UCAL_NOVEMBER, 2, UCAL_SUNDAY, 2 * 60 * 60 * 1000); + * + * // create a GregorianCalendar with the Pacific Daylight time zone + * // and the current date and time + * Calendar* calendar = new GregorianCalendar( pdt, success ); + * + * // print out a bunch of interesting things + * cout << "ERA: " << calendar->get( UCAL_ERA, success ) << endl; + * cout << "YEAR: " << calendar->get( UCAL_YEAR, success ) << endl; + * cout << "MONTH: " << calendar->get( UCAL_MONTH, success ) << endl; + * cout << "WEEK_OF_YEAR: " << calendar->get( UCAL_WEEK_OF_YEAR, success ) << endl; + * cout << "WEEK_OF_MONTH: " << calendar->get( UCAL_WEEK_OF_MONTH, success ) << endl; + * cout << "DATE: " << calendar->get( UCAL_DATE, success ) << endl; + * cout << "DAY_OF_MONTH: " << calendar->get( UCAL_DAY_OF_MONTH, success ) << endl; + * cout << "DAY_OF_YEAR: " << calendar->get( UCAL_DAY_OF_YEAR, success ) << endl; + * cout << "DAY_OF_WEEK: " << calendar->get( UCAL_DAY_OF_WEEK, success ) << endl; + * cout << "DAY_OF_WEEK_IN_MONTH: " << calendar->get( UCAL_DAY_OF_WEEK_IN_MONTH, success ) << endl; + * cout << "AM_PM: " << calendar->get( UCAL_AM_PM, success ) << endl; + * cout << "HOUR: " << calendar->get( UCAL_HOUR, success ) << endl; + * cout << "HOUR_OF_DAY: " << calendar->get( UCAL_HOUR_OF_DAY, success ) << endl; + * cout << "MINUTE: " << calendar->get( UCAL_MINUTE, success ) << endl; + * cout << "SECOND: " << calendar->get( UCAL_SECOND, success ) << endl; + * cout << "MILLISECOND: " << calendar->get( UCAL_MILLISECOND, success ) << endl; + * cout << "ZONE_OFFSET: " << (calendar->get( UCAL_ZONE_OFFSET, success )/(60*60*1000)) << endl; + * cout << "DST_OFFSET: " << (calendar->get( UCAL_DST_OFFSET, success )/(60*60*1000)) << endl; + * + * cout << "Current Time, with hour reset to 3" << endl; + * calendar->clear(UCAL_HOUR_OF_DAY); // so doesn't override + * calendar->set(UCAL_HOUR, 3); + * cout << "ERA: " << calendar->get( UCAL_ERA, success ) << endl; + * cout << "YEAR: " << calendar->get( UCAL_YEAR, success ) << endl; + * cout << "MONTH: " << calendar->get( UCAL_MONTH, success ) << endl; + * cout << "WEEK_OF_YEAR: " << calendar->get( UCAL_WEEK_OF_YEAR, success ) << endl; + * cout << "WEEK_OF_MONTH: " << calendar->get( UCAL_WEEK_OF_MONTH, success ) << endl; + * cout << "DATE: " << calendar->get( UCAL_DATE, success ) << endl; + * cout << "DAY_OF_MONTH: " << calendar->get( UCAL_DAY_OF_MONTH, success ) << endl; + * cout << "DAY_OF_YEAR: " << calendar->get( UCAL_DAY_OF_YEAR, success ) << endl; + * cout << "DAY_OF_WEEK: " << calendar->get( UCAL_DAY_OF_WEEK, success ) << endl; + * cout << "DAY_OF_WEEK_IN_MONTH: " << calendar->get( UCAL_DAY_OF_WEEK_IN_MONTH, success ) << endl; + * cout << "AM_PM: " << calendar->get( UCAL_AM_PM, success ) << endl; + * cout << "HOUR: " << calendar->get( UCAL_HOUR, success ) << endl; + * cout << "HOUR_OF_DAY: " << calendar->get( UCAL_HOUR_OF_DAY, success ) << endl; + * cout << "MINUTE: " << calendar->get( UCAL_MINUTE, success ) << endl; + * cout << "SECOND: " << calendar->get( UCAL_SECOND, success ) << endl; + * cout << "MILLISECOND: " << calendar->get( UCAL_MILLISECOND, success ) << endl; + * cout << "ZONE_OFFSET: " << (calendar->get( UCAL_ZONE_OFFSET, success )/(60*60*1000)) << endl; // in hours + * cout << "DST_OFFSET: " << (calendar->get( UCAL_DST_OFFSET, success )/(60*60*1000)) << endl; // in hours + * + * if (U_FAILURE(success)) { + * cout << "An error occured. success=" << u_errorName(success) << endl; + * } + * + * delete ids; + * delete calendar; // also deletes pdt + * \endcode + * </pre> + * @stable ICU 2.0 + */ +class U_I18N_API GregorianCalendar: public Calendar { +public: + + /** + * Useful constants for GregorianCalendar and TimeZone. + * @stable ICU 2.0 + */ + enum EEras { + BC, + AD + }; + + /** + * Constructs a default GregorianCalendar using the current time in the default time + * zone with the default locale. + * + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(UErrorCode& success); + + /** + * Constructs a GregorianCalendar based on the current time in the given time zone + * with the default locale. Clients are no longer responsible for deleting the given + * time zone object after it's adopted. + * + * @param zoneToAdopt The given timezone. + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(TimeZone* zoneToAdopt, UErrorCode& success); + + /** + * Constructs a GregorianCalendar based on the current time in the given time zone + * with the default locale. + * + * @param zone The given timezone. + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(const TimeZone& zone, UErrorCode& success); + + /** + * Constructs a GregorianCalendar based on the current time in the default time zone + * with the given locale. + * + * @param aLocale The given locale. + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(const Locale& aLocale, UErrorCode& success); + + /** + * Constructs a GregorianCalendar based on the current time in the given time zone + * with the given locale. Clients are no longer responsible for deleting the given + * time zone object after it's adopted. + * + * @param zoneToAdopt The given timezone. + * @param aLocale The given locale. + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(TimeZone* zoneToAdopt, const Locale& aLocale, UErrorCode& success); + + /** + * Constructs a GregorianCalendar based on the current time in the given time zone + * with the given locale. + * + * @param zone The given timezone. + * @param aLocale The given locale. + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(const TimeZone& zone, const Locale& aLocale, UErrorCode& success); + + /** + * Constructs a GregorianCalendar with the given AD date set in the default time + * zone with the default locale. + * + * @param year The value used to set the YEAR time field in the calendar. + * @param month The value used to set the MONTH time field in the calendar. Month + * value is 0-based. e.g., 0 for January. + * @param date The value used to set the DATE time field in the calendar. + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(int32_t year, int32_t month, int32_t date, UErrorCode& success); + + /** + * Constructs a GregorianCalendar with the given AD date and time set for the + * default time zone with the default locale. + * + * @param year The value used to set the YEAR time field in the calendar. + * @param month The value used to set the MONTH time field in the calendar. Month + * value is 0-based. e.g., 0 for January. + * @param date The value used to set the DATE time field in the calendar. + * @param hour The value used to set the HOUR_OF_DAY time field in the calendar. + * @param minute The value used to set the MINUTE time field in the calendar. + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, UErrorCode& success); + + /** + * Constructs a GregorianCalendar with the given AD date and time set for the + * default time zone with the default locale. + * + * @param year The value used to set the YEAR time field in the calendar. + * @param month The value used to set the MONTH time field in the calendar. Month + * value is 0-based. e.g., 0 for January. + * @param date The value used to set the DATE time field in the calendar. + * @param hour The value used to set the HOUR_OF_DAY time field in the calendar. + * @param minute The value used to set the MINUTE time field in the calendar. + * @param second The value used to set the SECOND time field in the calendar. + * @param success Indicates the status of GregorianCalendar object construction. + * Returns U_ZERO_ERROR if constructed successfully. + * @stable ICU 2.0 + */ + GregorianCalendar(int32_t year, int32_t month, int32_t date, int32_t hour, int32_t minute, int32_t second, UErrorCode& success); + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~GregorianCalendar(); + + /** + * Copy constructor + * @param source the object to be copied. + * @stable ICU 2.0 + */ + GregorianCalendar(const GregorianCalendar& source); + + /** + * Default assignment operator + * @param right the object to be copied. + * @stable ICU 2.0 + */ + GregorianCalendar& operator=(const GregorianCalendar& right); + + /** + * Create and return a polymorphic copy of this calendar. + * @return return a polymorphic copy of this calendar. + * @stable ICU 2.0 + */ + virtual Calendar* clone(void) const; + + /** + * Sets the GregorianCalendar change date. This is the point when the switch from + * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October + * 15, 1582. Previous to this time and date will be Julian dates. + * + * @param date The given Gregorian cutover date. + * @param success Output param set to success/failure code on exit. + * @stable ICU 2.0 + */ + void setGregorianChange(UDate date, UErrorCode& success); + + /** + * Gets the Gregorian Calendar change date. This is the point when the switch from + * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October + * 15, 1582. Previous to this time and date will be Julian dates. + * + * @return The Gregorian cutover time for this calendar. + * @stable ICU 2.0 + */ + UDate getGregorianChange(void) const; + + /** + * Return true if the given year is a leap year. Determination of whether a year is + * a leap year is actually very complicated. We do something crude and mostly + * correct here, but for a real determination you need a lot of contextual + * information. For example, in Sweden, the change from Julian to Gregorian happened + * in a complex way resulting in missed leap years and double leap years between + * 1700 and 1753. Another example is that after the start of the Julian calendar in + * 45 B.C., the leap years did not regularize until 8 A.D. This method ignores these + * quirks, and pays attention only to the Julian onset date and the Gregorian + * cutover (which can be changed). + * + * @param year The given year. + * @return True if the given year is a leap year; false otherwise. + * @stable ICU 2.0 + */ + UBool isLeapYear(int32_t year) const; + + /** + * Returns TRUE if the given Calendar object is equivalent to this + * one. Calendar override. + * + * @param other the Calendar to be compared with this Calendar + * @stable ICU 2.4 + */ + virtual UBool isEquivalentTo(const Calendar& other) const; + + /** + * (Overrides Calendar) Rolls up or down by the given amount in the specified field. + * For more information, see the documentation for Calendar::roll(). + * + * @param field The time field. + * @param amount Indicates amount to roll. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @deprecated ICU 2.6. Use roll(UCalendarDateFields field, int32_t amount, UErrorCode& status) instead. + */ + virtual void roll(EDateFields field, int32_t amount, UErrorCode& status); + + /** + * (Overrides Calendar) Rolls up or down by the given amount in the specified field. + * For more information, see the documentation for Calendar::roll(). + * + * @param field The time field. + * @param amount Indicates amount to roll. + * @param status Output param set to success/failure code on exit. If any value + * previously set in the time field is invalid, this will be set to + * an error status. + * @stable ICU 2.6. + */ + virtual void roll(UCalendarDateFields field, int32_t amount, UErrorCode& status); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Return the minimum value that this field could have, given the current date. + * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum(). + * @param field the time field. + * @return the minimum value that this field could have, given the current date. + * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field) instead. + */ + int32_t getActualMinimum(EDateFields field) const; + + /** + * Return the minimum value that this field could have, given the current date. + * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum(). + * @param field the time field. + * @param status + * @return the minimum value that this field could have, given the current date. + * @deprecated ICU 2.6. Use getActualMinimum(UCalendarDateFields field) instead. (Added to ICU 3.0 for signature consistency) + */ + int32_t getActualMinimum(EDateFields field, UErrorCode& status) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Return the minimum value that this field could have, given the current date. + * For the Gregorian calendar, this is the same as getMinimum() and getGreatestMinimum(). + * @param field the time field. + * @param status error result. + * @return the minimum value that this field could have, given the current date. + * @stable ICU 3.0 + */ + int32_t getActualMinimum(UCalendarDateFields field, UErrorCode &status) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Return the maximum value that this field could have, given the current date. + * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual + * maximum would be 28; for "Feb 3, 1996" it s 29. Similarly for a Hebrew calendar, + * for some years the actual maximum for MONTH is 12, and for others 13. + * @param field the time field. + * @return the maximum value that this field could have, given the current date. + * @deprecated ICU 2.6. Use getActualMaximum(UCalendarDateFields field) instead. + */ + int32_t getActualMaximum(EDateFields field) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Return the maximum value that this field could have, given the current date. + * For example, with the date "Feb 3, 1997" and the DAY_OF_MONTH field, the actual + * maximum would be 28; for "Feb 3, 1996" it s 29. Similarly for a Hebrew calendar, + * for some years the actual maximum for MONTH is 12, and for others 13. + * @param field the time field. + * @param status returns any errors that may result from this function call. + * @return the maximum value that this field could have, given the current date. + * @stable ICU 2.6 + */ + virtual int32_t getActualMaximum(UCalendarDateFields field, UErrorCode& status) const; + + /** + * (Overrides Calendar) Return true if the current date for this Calendar is in + * Daylight Savings Time. Recognizes DST_OFFSET, if it is set. + * + * @param status Fill-in parameter which receives the status of this operation. + * @return True if the current date for this Calendar is in Daylight Savings Time, + * false, otherwise. + * @stable ICU 2.0 + */ + virtual UBool inDaylightTime(UErrorCode& status) const; + +public: + + /** + * Override Calendar Returns a unique class ID POLYMORPHICALLY. Pure virtual + * override. This method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() methods call + * this method. + * + * @return The class ID for this object. All objects of a given class have the + * same class ID. Objects of other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Return the class ID for this class. This is useful only for comparing to a return + * value from getDynamicClassID(). For example: + * + * Base* polymorphic_pointer = createPolymorphicObject(); + * if (polymorphic_pointer->getDynamicClassID() == + * Derived::getStaticClassID()) ... + * + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns the calendar type name string for this Calendar object. + * The returned string is the legacy ICU calendar attribute value, + * for example, "gregorian" or "japanese". + * + * For more details see the Calendar::getType() documentation. + * + * @return legacy calendar type name string + * @stable ICU 49 + */ + virtual const char * getType() const; + + private: + GregorianCalendar(); // default constructor not implemented + + protected: + /** + * Return the ERA. We need a special method for this because the + * default ERA is AD, but a zero (unset) ERA is BC. + * @return the ERA. + * @internal + */ + virtual int32_t internalGetEra() const; + + /** + * Return the Julian day number of day before the first day of the + * given month in the given extended year. Subclasses should override + * this method to implement their calendar system. + * @param eyear the extended year + * @param month the zero-based month, or 0 if useMonth is false + * @param useMonth if false, compute the day before the first day of + * the given year, otherwise, compute the day before the first day of + * the given month + * @return the Julian day number of the day before the first + * day of the given month and year + * @internal + */ + virtual int32_t handleComputeMonthStart(int32_t eyear, int32_t month, + UBool useMonth) const; + + /** + * Subclasses may override this. This method calls + * handleGetMonthLength() to obtain the calendar-specific month + * length. + * @param bestField which field to use to calculate the date + * @return julian day specified by calendar fields. + * @internal + */ + virtual int32_t handleComputeJulianDay(UCalendarDateFields bestField) ; + + /** + * Return the number of days in the given month of the given extended + * year of this calendar system. Subclasses should override this + * method if they can provide a more correct or more efficient + * implementation than the default implementation in Calendar. + * @internal + */ + virtual int32_t handleGetMonthLength(int32_t extendedYear, int32_t month) const; + + /** + * Return the number of days in the given extended year of this + * calendar system. Subclasses should override this method if they can + * provide a more correct or more efficient implementation than the + * default implementation in Calendar. + * @stable ICU 2.0 + */ + virtual int32_t handleGetYearLength(int32_t eyear) const; + + /** + * return the length of the given month. + * @param month the given month. + * @return the length of the given month. + * @internal + */ + virtual int32_t monthLength(int32_t month) const; + + /** + * return the length of the month according to the given year. + * @param month the given month. + * @param year the given year. + * @return the length of the month + * @internal + */ + virtual int32_t monthLength(int32_t month, int32_t year) const; + +#ifndef U_HIDE_INTERNAL_API + /** + * return the length of the given year. + * @param year the given year. + * @return the length of the given year. + * @internal + */ + int32_t yearLength(int32_t year) const; + + /** + * return the length of the year field. + * @return the length of the year field + * @internal + */ + int32_t yearLength(void) const; + + /** + * After adjustments such as add(MONTH), add(YEAR), we don't want the + * month to jump around. E.g., we don't want Jan 31 + 1 month to go to Mar + * 3, we want it to go to Feb 28. Adjustments which might run into this + * problem call this method to retain the proper month. + * @internal + */ + void pinDayOfMonth(void); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Return the day number with respect to the epoch. January 1, 1970 (Gregorian) + * is day zero. + * @param status Fill-in parameter which receives the status of this operation. + * @return the day number with respect to the epoch. + * @internal + */ + virtual UDate getEpochDay(UErrorCode& status); + + /** + * Subclass API for defining limits of different types. + * Subclasses must implement this method to return limits for the + * following fields: + * + * <pre>UCAL_ERA + * UCAL_YEAR + * UCAL_MONTH + * UCAL_WEEK_OF_YEAR + * UCAL_WEEK_OF_MONTH + * UCAL_DATE (DAY_OF_MONTH on Java) + * UCAL_DAY_OF_YEAR + * UCAL_DAY_OF_WEEK_IN_MONTH + * UCAL_YEAR_WOY + * UCAL_EXTENDED_YEAR</pre> + * + * @param field one of the above field numbers + * @param limitType one of <code>MINIMUM</code>, <code>GREATEST_MINIMUM</code>, + * <code>LEAST_MAXIMUM</code>, or <code>MAXIMUM</code> + * @internal + */ + virtual int32_t handleGetLimit(UCalendarDateFields field, ELimitType limitType) const; + + /** + * Return the extended year defined by the current fields. This will + * use the UCAL_EXTENDED_YEAR field or the UCAL_YEAR and supra-year fields (such + * as UCAL_ERA) specific to the calendar system, depending on which set of + * fields is newer. + * @return the extended year + * @internal + */ + virtual int32_t handleGetExtendedYear(); + + /** + * Subclasses may override this to convert from week fields + * (YEAR_WOY and WEEK_OF_YEAR) to an extended year in the case + * where YEAR, EXTENDED_YEAR are not set. + * The Gregorian implementation assumes a yearWoy in gregorian format, according to the current era. + * @return the extended year, UCAL_EXTENDED_YEAR + * @internal + */ + virtual int32_t handleGetExtendedYearFromWeekFields(int32_t yearWoy, int32_t woy); + + + /** + * Subclasses may override this method to compute several fields + * specific to each calendar system. These are: + * + * <ul><li>ERA + * <li>YEAR + * <li>MONTH + * <li>DAY_OF_MONTH + * <li>DAY_OF_YEAR + * <li>EXTENDED_YEAR</ul> + * + * <p>The GregorianCalendar implementation implements + * a calendar with the specified Julian/Gregorian cutover date. + * @internal + */ + virtual void handleComputeFields(int32_t julianDay, UErrorCode &status); + + private: + /** + * Compute the julian day number of the given year. + * @param isGregorian if true, using Gregorian calendar, otherwise using Julian calendar + * @param year the given year. + * @param isLeap true if the year is a leap year. + * @return + */ + static double computeJulianDayOfYear(UBool isGregorian, int32_t year, + UBool& isLeap); + + /** + * Validates the values of the set time fields. True if they're all valid. + * @return True if the set time fields are all valid. + */ + UBool validateFields(void) const; + + /** + * Validates the value of the given time field. True if it's valid. + */ + UBool boundsCheck(int32_t value, UCalendarDateFields field) const; + + /** + * Return the pseudo-time-stamp for two fields, given their + * individual pseudo-time-stamps. If either of the fields + * is unset, then the aggregate is unset. Otherwise, the + * aggregate is the later of the two stamps. + * @param stamp_a One given field. + * @param stamp_b Another given field. + * @return the pseudo-time-stamp for two fields + */ + int32_t aggregateStamp(int32_t stamp_a, int32_t stamp_b); + + /** + * The point at which the Gregorian calendar rules are used, measured in + * milliseconds from the standard epoch. Default is October 15, 1582 + * (Gregorian) 00:00:00 UTC, that is, October 4, 1582 (Julian) is followed + * by October 15, 1582 (Gregorian). This corresponds to Julian day number + * 2299161. This is measured from the standard epoch, not in Julian Days. + */ + UDate fGregorianCutover; + + /** + * Julian day number of the Gregorian cutover + */ + int32_t fCutoverJulianDay; + + /** + * Midnight, local time (using this Calendar's TimeZone) at or before the + * gregorianCutover. This is a pure date value with no time of day or + * timezone component. + */ + UDate fNormalizedGregorianCutover;// = gregorianCutover; + + /** + * The year of the gregorianCutover, with 0 representing + * 1 BC, -1 representing 2 BC, etc. + */ + int32_t fGregorianCutoverYear;// = 1582; + + /** + * The year of the gregorianCutover, with 0 representing + * 1 BC, -1 representing 2 BC, etc. + */ + int32_t fGregorianCutoverJulianDay;// = 2299161; + + /** + * Converts time as milliseconds to Julian date. The Julian date used here is not a + * true Julian date, since it is measured from midnight, not noon. + * + * @param millis The given milliseconds. + * @return The Julian date number. + */ + static double millisToJulianDay(UDate millis); + + /** + * Converts Julian date to time as milliseconds. The Julian date used here is not a + * true Julian date, since it is measured from midnight, not noon. + * + * @param julian The given Julian date number. + * @return Time as milliseconds. + */ + static UDate julianDayToMillis(double julian); + + /** + * Used by handleComputeJulianDay() and handleComputeMonthStart(). + * Temporary field indicating whether the calendar is currently Gregorian as opposed to Julian. + */ + UBool fIsGregorian; + + /** + * Used by handleComputeJulianDay() and handleComputeMonthStart(). + * Temporary field indicating that the sense of the gregorian cutover should be inverted + * to handle certain calculations on and around the cutover date. + */ + UBool fInvertGregorian; + + + public: // internal implementation + + /** + * @return TRUE if this calendar has the notion of a default century + * @internal + */ + virtual UBool haveDefaultCentury() const; + + /** + * @return the start of the default century + * @internal + */ + virtual UDate defaultCenturyStart() const; + + /** + * @return the beginning year of the default century + * @internal + */ + virtual int32_t defaultCenturyStartYear() const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _GREGOCAL +//eof + diff --git a/intl/icu/source/i18n/unicode/measfmt.h b/intl/icu/source/i18n/unicode/measfmt.h new file mode 100644 index 000000000..37b331493 --- /dev/null +++ b/intl/icu/source/i18n/unicode/measfmt.h @@ -0,0 +1,401 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2004-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: April 20, 2004 +* Since: ICU 3.0 +********************************************************************** +*/ +#ifndef MEASUREFORMAT_H +#define MEASUREFORMAT_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/format.h" +#include "unicode/udat.h" + +/** + * \file + * \brief C++ API: Formatter for measure objects. + */ + +/** + * Constants for various widths. + * There are 4 widths: Wide, Short, Narrow, Numeric. + * For example, for English, when formatting "3 hours" + * Wide is "3 hours"; short is "3 hrs"; narrow is "3h"; + * formatting "3 hours 17 minutes" as numeric give "3:17" + * @stable ICU 53 + */ +enum UMeasureFormatWidth { + + // Wide, short, and narrow must be first and in this order. + /** + * Spell out measure units. + * @stable ICU 53 + */ + UMEASFMT_WIDTH_WIDE, + + /** + * Abbreviate measure units. + * @stable ICU 53 + */ + UMEASFMT_WIDTH_SHORT, + + /** + * Use symbols for measure units when possible. + * @stable ICU 53 + */ + UMEASFMT_WIDTH_NARROW, + + /** + * Completely omit measure units when possible. For example, format + * '5 hours, 37 minutes' as '5:37' + * @stable ICU 53 + */ + UMEASFMT_WIDTH_NUMERIC, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UMeasureFormatWidth value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UMEASFMT_WIDTH_COUNT = 4 +#endif // U_HIDE_DEPRECATED_API +}; +/** @stable ICU 53 */ +typedef enum UMeasureFormatWidth UMeasureFormatWidth; + +U_NAMESPACE_BEGIN + +class Measure; +class MeasureUnit; +class NumberFormat; +class PluralRules; +class MeasureFormatCacheData; +class SharedNumberFormat; +class SharedPluralRules; +class QuantityFormatter; +class SimpleFormatter; +class ListFormatter; +class DateFormat; + +/** + * + * A formatter for measure objects. + * + * @see Format + * @author Alan Liu + * @stable ICU 3.0 + */ +class U_I18N_API MeasureFormat : public Format { + public: + using Format::parseObject; + using Format::format; + + /** + * Constructor. + * @stable ICU 53 + */ + MeasureFormat( + const Locale &locale, UMeasureFormatWidth width, UErrorCode &status); + + /** + * Constructor. + * @stable ICU 53 + */ + MeasureFormat( + const Locale &locale, + UMeasureFormatWidth width, + NumberFormat *nfToAdopt, + UErrorCode &status); + + /** + * Copy constructor. + * @stable ICU 3.0 + */ + MeasureFormat(const MeasureFormat &other); + + /** + * Assignment operator. + * @stable ICU 3.0 + */ + MeasureFormat &operator=(const MeasureFormat &rhs); + + /** + * Destructor. + * @stable ICU 3.0 + */ + virtual ~MeasureFormat(); + + /** + * Return true if given Format objects are semantically equal. + * @stable ICU 53 + */ + virtual UBool operator==(const Format &other) const; + + /** + * Clones this object polymorphically. + * @stable ICU 53 + */ + virtual Format *clone() const; + + /** + * Formats object to produce a string. + * @stable ICU 53 + */ + virtual UnicodeString &format( + const Formattable &obj, + UnicodeString &appendTo, + FieldPosition &pos, + UErrorCode &status) const; + + /** + * Parse a string to produce an object. This implementation sets + * status to U_UNSUPPORTED_ERROR. + * + * @draft ICU 53 + */ + virtual void parseObject( + const UnicodeString &source, + Formattable &reslt, + ParsePosition &pos) const; + + /** + * Formats measure objects to produce a string. An example of such a + * formatted string is 3 meters, 3.5 centimeters. Measure objects appear + * in the formatted string in the same order they appear in the "measures" + * array. The NumberFormat of this object is used only to format the amount + * of the very last measure. The other amounts are formatted with zero + * decimal places while rounding toward zero. + * @param measures array of measure objects. + * @param measureCount the number of measure objects. + * @param appendTo formatted string appended here. + * @param pos the field position. + * @param status the error. + * @return appendTo reference + * + * @stable ICU 53 + */ + UnicodeString &formatMeasures( + const Measure *measures, + int32_t measureCount, + UnicodeString &appendTo, + FieldPosition &pos, + UErrorCode &status) const; + + /** + * Formats a single measure per unit. An example of such a + * formatted string is 3.5 meters per second. + * @param measure The measure object. In above example, 3.5 meters. + * @param perUnit The per unit. In above example, it is + * *MeasureUnit::createSecond(status). + * @param appendTo formatted string appended here. + * @param pos the field position. + * @param status the error. + * @return appendTo reference + * + * @stable ICU 55 + */ + UnicodeString &formatMeasurePerUnit( + const Measure &measure, + const MeasureUnit &perUnit, + UnicodeString &appendTo, + FieldPosition &pos, + UErrorCode &status) const; + +#ifndef U_HIDE_DRAFT_API + /** + * Gets the display name of the specified {@link MeasureUnit} corresponding to the current + * locale and format width. + * @param unit The unit for which to get a display name. + * @param status the error. + * @return The display name in the locale and width specified in + * {@link MeasureFormat#getInstance}, or null if there is no display name available + * for the specified unit. + * + * @draft ICU 58 + */ + UnicodeString getUnitDisplayName(const MeasureUnit& unit, UErrorCode &status) const; +#endif /* U_HIDE_DRAFT_API */ + + + /** + * Return a formatter for CurrencyAmount objects in the given + * locale. + * @param locale desired locale + * @param ec input-output error code + * @return a formatter object, or NULL upon error + * @stable ICU 3.0 + */ + static MeasureFormat* U_EXPORT2 createCurrencyFormat(const Locale& locale, + UErrorCode& ec); + + /** + * Return a formatter for CurrencyAmount objects in the default + * locale. + * @param ec input-output error code + * @return a formatter object, or NULL upon error + * @stable ICU 3.0 + */ + static MeasureFormat* U_EXPORT2 createCurrencyFormat(UErrorCode& ec); + + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 53 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 53 + */ + virtual UClassID getDynamicClassID(void) const; + + protected: + /** + * Default constructor. + * @stable ICU 3.0 + */ + MeasureFormat(); + +#ifndef U_HIDE_INTERNAL_API + + /** + * ICU use only. + * Initialize or change MeasureFormat class from subclass. + * @internal. + */ + void initMeasureFormat( + const Locale &locale, + UMeasureFormatWidth width, + NumberFormat *nfToAdopt, + UErrorCode &status); + /** + * ICU use only. + * Allows subclass to change locale. Note that this method also changes + * the NumberFormat object. Returns TRUE if locale changed; FALSE if no + * change was made. + * @internal. + */ + UBool setMeasureFormatLocale(const Locale &locale, UErrorCode &status); + + /** + * ICU use only. + * Let subclass change NumberFormat. + * @internal. + */ + void adoptNumberFormat(NumberFormat *nfToAdopt, UErrorCode &status); + + /** + * ICU use only. + * @internal. + */ + const NumberFormat &getNumberFormat() const; + + /** + * ICU use only. + * @internal. + */ + const PluralRules &getPluralRules() const; + + /** + * ICU use only. + * @internal. + */ + Locale getLocale(UErrorCode &status) const; + + /** + * ICU use only. + * @internal. + */ + const char *getLocaleID(UErrorCode &status) const; + +#endif /* U_HIDE_INTERNAL_API */ + + private: + const MeasureFormatCacheData *cache; + const SharedNumberFormat *numberFormat; + const SharedPluralRules *pluralRules; + UMeasureFormatWidth width; + + // Declared outside of MeasureFormatSharedData because ListFormatter + // objects are relatively cheap to copy; therefore, they don't need to be + // shared across instances. + ListFormatter *listFormatter; + + const SimpleFormatter *getFormatterOrNull( + const MeasureUnit &unit, UMeasureFormatWidth width, int32_t index) const; + + const SimpleFormatter *getFormatter( + const MeasureUnit &unit, UMeasureFormatWidth width, int32_t index, + UErrorCode &errorCode) const; + + const SimpleFormatter *getPluralFormatter( + const MeasureUnit &unit, UMeasureFormatWidth width, int32_t index, + UErrorCode &errorCode) const; + + const SimpleFormatter *getPerFormatter( + UMeasureFormatWidth width, + UErrorCode &status) const; + + int32_t withPerUnitAndAppend( + const UnicodeString &formatted, + const MeasureUnit &perUnit, + UnicodeString &appendTo, + UErrorCode &status) const; + + UnicodeString &formatMeasure( + const Measure &measure, + const NumberFormat &nf, + UnicodeString &appendTo, + FieldPosition &pos, + UErrorCode &status) const; + + UnicodeString &formatMeasuresSlowTrack( + const Measure *measures, + int32_t measureCount, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + UnicodeString &formatNumeric( + const Formattable *hms, // always length 3: [0] is hour; [1] is + // minute; [2] is second. + int32_t bitMap, // 1=hour set, 2=minute set, 4=second set + UnicodeString &appendTo, + UErrorCode &status) const; + + UnicodeString &formatNumeric( + UDate date, + const DateFormat &dateFmt, + UDateFormatField smallestField, + const Formattable &smallestAmount, + UnicodeString &appendTo, + UErrorCode &status) const; +}; + +U_NAMESPACE_END + +#endif // #if !UCONFIG_NO_FORMATTING +#endif // #ifndef MEASUREFORMAT_H diff --git a/intl/icu/source/i18n/unicode/measunit.h b/intl/icu/source/i18n/unicode/measunit.h new file mode 100644 index 000000000..afef83ed0 --- /dev/null +++ b/intl/icu/source/i18n/unicode/measunit.h @@ -0,0 +1,1370 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2004-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: April 26, 2004 +* Since: ICU 3.0 +********************************************************************** +*/ +#ifndef __MEASUREUNIT_H__ +#define __MEASUREUNIT_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: A unit for measuring a quantity. + */ + +U_NAMESPACE_BEGIN + +class StringEnumeration; + +/** + * A unit such as length, mass, volume, currency, etc. A unit is + * coupled with a numeric amount to produce a Measure. + * + * @author Alan Liu + * @stable ICU 3.0 + */ +class U_I18N_API MeasureUnit: public UObject { + public: + + /** + * Default constructor. + * @stable ICU 3.0 + */ + MeasureUnit() : fTypeId(0), fSubTypeId(0) { + fCurrency[0] = 0; + } + + /** + * Copy constructor. + * @stable ICU 3.0 + */ + MeasureUnit(const MeasureUnit &other); + + /** + * Assignment operator. + * @stable ICU 3.0 + */ + MeasureUnit &operator=(const MeasureUnit &other); + + /** + * Returns a polymorphic clone of this object. The result will + * have the same class as returned by getDynamicClassID(). + * @stable ICU 3.0 + */ + virtual UObject* clone() const; + + /** + * Destructor + * @stable ICU 3.0 + */ + virtual ~MeasureUnit(); + + /** + * Equality operator. Return true if this object is equal + * to the given object. + * @stable ICU 3.0 + */ + virtual UBool operator==(const UObject& other) const; + + /** + * Inequality operator. Return true if this object is not equal + * to the given object. + * @stable ICU 53 + */ + UBool operator!=(const UObject& other) const { + return !(*this == other); + } + + /** + * Get the type. + * @stable ICU 53 + */ + const char *getType() const; + + /** + * Get the sub type. + * @stable ICU 53 + */ + const char *getSubtype() const; + + /** + * getAvailable gets all of the available units. + * If there are too many units to fit into destCapacity then the + * error code is set to U_BUFFER_OVERFLOW_ERROR. + * + * @param destArray destination buffer. + * @param destCapacity number of MeasureUnit instances available at dest. + * @param errorCode ICU error code. + * @return number of available units. + * @stable ICU 53 + */ + static int32_t getAvailable( + MeasureUnit *destArray, + int32_t destCapacity, + UErrorCode &errorCode); + + /** + * getAvailable gets all of the available units for a specific type. + * If there are too many units to fit into destCapacity then the + * error code is set to U_BUFFER_OVERFLOW_ERROR. + * + * @param type the type + * @param destArray destination buffer. + * @param destCapacity number of MeasureUnit instances available at dest. + * @param errorCode ICU error code. + * @return number of available units for type. + * @stable ICU 53 + */ + static int32_t getAvailable( + const char *type, + MeasureUnit *destArray, + int32_t destCapacity, + UErrorCode &errorCode); + + /** + * getAvailableTypes gets all of the available types. Caller owns the + * returned StringEnumeration and must delete it when finished using it. + * + * @param errorCode ICU error code. + * @return the types. + * @stable ICU 53 + */ + static StringEnumeration* getAvailableTypes(UErrorCode &errorCode); + + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 53 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 53 + */ + virtual UClassID getDynamicClassID(void) const; + +#ifndef U_HIDE_INTERNAL_API + /** + * ICU use only. + * Returns associated array index for this measure unit. Only valid for + * non-currency measure units. + * @internal + */ + int32_t getIndex() const; + + /** + * ICU use only. + * Returns maximum value from getIndex plus 1. + * @internal + */ + static int32_t getIndexCount(); + + /** + * ICU use only. + * @return the unit.getIndex() of the unit which has this unit.getType() and unit.getSubtype(), + * or a negative value if there is no such unit + * @internal + */ + static int32_t internalGetIndexForTypeAndSubtype(const char *type, const char *subtype); + + /** + * ICU use only. + * @internal + */ + static MeasureUnit *resolveUnitPerUnit( + const MeasureUnit &unit, const MeasureUnit &perUnit); +#endif /* U_HIDE_INTERNAL_API */ + +// All code between the "Start generated createXXX methods" comment and +// the "End generated createXXX methods" comment is auto generated code +// and must not be edited manually. For instructions on how to correctly +// update this code, refer to: +// http://site.icu-project.org/design/formatting/measureformat/updating-measure-unit +// +// Start generated createXXX methods + + /** + * Returns unit of acceleration: g-force. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createGForce(UErrorCode &status); + + /** + * Returns unit of acceleration: meter-per-second-squared. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMeterPerSecondSquared(UErrorCode &status); + + /** + * Returns unit of angle: arc-minute. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createArcMinute(UErrorCode &status); + + /** + * Returns unit of angle: arc-second. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createArcSecond(UErrorCode &status); + + /** + * Returns unit of angle: degree. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createDegree(UErrorCode &status); + + /** + * Returns unit of angle: radian. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createRadian(UErrorCode &status); + + /** + * Returns unit of angle: revolution. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 56 + */ + static MeasureUnit *createRevolutionAngle(UErrorCode &status); + + /** + * Returns unit of area: acre. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createAcre(UErrorCode &status); + + /** + * Returns unit of area: hectare. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createHectare(UErrorCode &status); + + /** + * Returns unit of area: square-centimeter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createSquareCentimeter(UErrorCode &status); + + /** + * Returns unit of area: square-foot. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createSquareFoot(UErrorCode &status); + + /** + * Returns unit of area: square-inch. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createSquareInch(UErrorCode &status); + + /** + * Returns unit of area: square-kilometer. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createSquareKilometer(UErrorCode &status); + + /** + * Returns unit of area: square-meter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createSquareMeter(UErrorCode &status); + + /** + * Returns unit of area: square-mile. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createSquareMile(UErrorCode &status); + + /** + * Returns unit of area: square-yard. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createSquareYard(UErrorCode &status); + + /** + * Returns unit of concentr: karat. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createKarat(UErrorCode &status); + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of concentr: milligram-per-deciliter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 57 + */ + static MeasureUnit *createMilligramPerDeciliter(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of concentr: millimole-per-liter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 57 + */ + static MeasureUnit *createMillimolePerLiter(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of concentr: part-per-million. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 57 + */ + static MeasureUnit *createPartPerMillion(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + + /** + * Returns unit of consumption: liter-per-100kilometers. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 56 + */ + static MeasureUnit *createLiterPer100Kilometers(UErrorCode &status); + + /** + * Returns unit of consumption: liter-per-kilometer. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createLiterPerKilometer(UErrorCode &status); + + /** + * Returns unit of consumption: mile-per-gallon. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMilePerGallon(UErrorCode &status); + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of consumption: mile-per-gallon-imperial. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 57 + */ + static MeasureUnit *createMilePerGallonImperial(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of coordinate: east. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 58 + */ + static MeasureUnit *createEast(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of coordinate: north. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 58 + */ + static MeasureUnit *createNorth(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of coordinate: south. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 58 + */ + static MeasureUnit *createSouth(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of coordinate: west. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 58 + */ + static MeasureUnit *createWest(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + + /** + * Returns unit of digital: bit. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createBit(UErrorCode &status); + + /** + * Returns unit of digital: byte. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createByte(UErrorCode &status); + + /** + * Returns unit of digital: gigabit. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createGigabit(UErrorCode &status); + + /** + * Returns unit of digital: gigabyte. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createGigabyte(UErrorCode &status); + + /** + * Returns unit of digital: kilobit. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createKilobit(UErrorCode &status); + + /** + * Returns unit of digital: kilobyte. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createKilobyte(UErrorCode &status); + + /** + * Returns unit of digital: megabit. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMegabit(UErrorCode &status); + + /** + * Returns unit of digital: megabyte. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMegabyte(UErrorCode &status); + + /** + * Returns unit of digital: terabit. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createTerabit(UErrorCode &status); + + /** + * Returns unit of digital: terabyte. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createTerabyte(UErrorCode &status); + + /** + * Returns unit of duration: century. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 56 + */ + static MeasureUnit *createCentury(UErrorCode &status); + + /** + * Returns unit of duration: day. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createDay(UErrorCode &status); + + /** + * Returns unit of duration: hour. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createHour(UErrorCode &status); + + /** + * Returns unit of duration: microsecond. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMicrosecond(UErrorCode &status); + + /** + * Returns unit of duration: millisecond. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMillisecond(UErrorCode &status); + + /** + * Returns unit of duration: minute. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMinute(UErrorCode &status); + + /** + * Returns unit of duration: month. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMonth(UErrorCode &status); + + /** + * Returns unit of duration: nanosecond. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createNanosecond(UErrorCode &status); + + /** + * Returns unit of duration: second. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createSecond(UErrorCode &status); + + /** + * Returns unit of duration: week. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createWeek(UErrorCode &status); + + /** + * Returns unit of duration: year. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createYear(UErrorCode &status); + + /** + * Returns unit of electric: ampere. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createAmpere(UErrorCode &status); + + /** + * Returns unit of electric: milliampere. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMilliampere(UErrorCode &status); + + /** + * Returns unit of electric: ohm. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createOhm(UErrorCode &status); + + /** + * Returns unit of electric: volt. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createVolt(UErrorCode &status); + + /** + * Returns unit of energy: calorie. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCalorie(UErrorCode &status); + + /** + * Returns unit of energy: foodcalorie. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createFoodcalorie(UErrorCode &status); + + /** + * Returns unit of energy: joule. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createJoule(UErrorCode &status); + + /** + * Returns unit of energy: kilocalorie. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createKilocalorie(UErrorCode &status); + + /** + * Returns unit of energy: kilojoule. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createKilojoule(UErrorCode &status); + + /** + * Returns unit of energy: kilowatt-hour. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createKilowattHour(UErrorCode &status); + + /** + * Returns unit of frequency: gigahertz. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createGigahertz(UErrorCode &status); + + /** + * Returns unit of frequency: hertz. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createHertz(UErrorCode &status); + + /** + * Returns unit of frequency: kilohertz. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createKilohertz(UErrorCode &status); + + /** + * Returns unit of frequency: megahertz. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMegahertz(UErrorCode &status); + + /** + * Returns unit of length: astronomical-unit. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createAstronomicalUnit(UErrorCode &status); + + /** + * Returns unit of length: centimeter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createCentimeter(UErrorCode &status); + + /** + * Returns unit of length: decimeter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createDecimeter(UErrorCode &status); + + /** + * Returns unit of length: fathom. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createFathom(UErrorCode &status); + + /** + * Returns unit of length: foot. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createFoot(UErrorCode &status); + + /** + * Returns unit of length: furlong. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createFurlong(UErrorCode &status); + + /** + * Returns unit of length: inch. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createInch(UErrorCode &status); + + /** + * Returns unit of length: kilometer. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createKilometer(UErrorCode &status); + + /** + * Returns unit of length: light-year. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createLightYear(UErrorCode &status); + + /** + * Returns unit of length: meter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMeter(UErrorCode &status); + + /** + * Returns unit of length: micrometer. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMicrometer(UErrorCode &status); + + /** + * Returns unit of length: mile. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMile(UErrorCode &status); + + /** + * Returns unit of length: mile-scandinavian. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 56 + */ + static MeasureUnit *createMileScandinavian(UErrorCode &status); + + /** + * Returns unit of length: millimeter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMillimeter(UErrorCode &status); + + /** + * Returns unit of length: nanometer. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createNanometer(UErrorCode &status); + + /** + * Returns unit of length: nautical-mile. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createNauticalMile(UErrorCode &status); + + /** + * Returns unit of length: parsec. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createParsec(UErrorCode &status); + + /** + * Returns unit of length: picometer. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createPicometer(UErrorCode &status); + + /** + * Returns unit of length: yard. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createYard(UErrorCode &status); + + /** + * Returns unit of light: lux. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createLux(UErrorCode &status); + + /** + * Returns unit of mass: carat. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCarat(UErrorCode &status); + + /** + * Returns unit of mass: gram. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createGram(UErrorCode &status); + + /** + * Returns unit of mass: kilogram. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createKilogram(UErrorCode &status); + + /** + * Returns unit of mass: metric-ton. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMetricTon(UErrorCode &status); + + /** + * Returns unit of mass: microgram. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMicrogram(UErrorCode &status); + + /** + * Returns unit of mass: milligram. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMilligram(UErrorCode &status); + + /** + * Returns unit of mass: ounce. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createOunce(UErrorCode &status); + + /** + * Returns unit of mass: ounce-troy. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createOunceTroy(UErrorCode &status); + + /** + * Returns unit of mass: pound. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createPound(UErrorCode &status); + + /** + * Returns unit of mass: stone. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createStone(UErrorCode &status); + + /** + * Returns unit of mass: ton. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createTon(UErrorCode &status); + + /** + * Returns unit of power: gigawatt. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createGigawatt(UErrorCode &status); + + /** + * Returns unit of power: horsepower. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createHorsepower(UErrorCode &status); + + /** + * Returns unit of power: kilowatt. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createKilowatt(UErrorCode &status); + + /** + * Returns unit of power: megawatt. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMegawatt(UErrorCode &status); + + /** + * Returns unit of power: milliwatt. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMilliwatt(UErrorCode &status); + + /** + * Returns unit of power: watt. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createWatt(UErrorCode &status); + + /** + * Returns unit of pressure: hectopascal. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createHectopascal(UErrorCode &status); + + /** + * Returns unit of pressure: inch-hg. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createInchHg(UErrorCode &status); + + /** + * Returns unit of pressure: millibar. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMillibar(UErrorCode &status); + + /** + * Returns unit of pressure: millimeter-of-mercury. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMillimeterOfMercury(UErrorCode &status); + + /** + * Returns unit of pressure: pound-per-square-inch. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createPoundPerSquareInch(UErrorCode &status); + + /** + * Returns unit of speed: kilometer-per-hour. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createKilometerPerHour(UErrorCode &status); + + /** + * Returns unit of speed: knot. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 56 + */ + static MeasureUnit *createKnot(UErrorCode &status); + + /** + * Returns unit of speed: meter-per-second. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMeterPerSecond(UErrorCode &status); + + /** + * Returns unit of speed: mile-per-hour. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createMilePerHour(UErrorCode &status); + + /** + * Returns unit of temperature: celsius. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createCelsius(UErrorCode &status); + + /** + * Returns unit of temperature: fahrenheit. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createFahrenheit(UErrorCode &status); + + /** + * Returns unit of temperature: generic. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 56 + */ + static MeasureUnit *createGenericTemperature(UErrorCode &status); + + /** + * Returns unit of temperature: kelvin. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createKelvin(UErrorCode &status); + + /** + * Returns unit of volume: acre-foot. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createAcreFoot(UErrorCode &status); + + /** + * Returns unit of volume: bushel. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createBushel(UErrorCode &status); + + /** + * Returns unit of volume: centiliter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCentiliter(UErrorCode &status); + + /** + * Returns unit of volume: cubic-centimeter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCubicCentimeter(UErrorCode &status); + + /** + * Returns unit of volume: cubic-foot. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCubicFoot(UErrorCode &status); + + /** + * Returns unit of volume: cubic-inch. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCubicInch(UErrorCode &status); + + /** + * Returns unit of volume: cubic-kilometer. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createCubicKilometer(UErrorCode &status); + + /** + * Returns unit of volume: cubic-meter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCubicMeter(UErrorCode &status); + + /** + * Returns unit of volume: cubic-mile. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createCubicMile(UErrorCode &status); + + /** + * Returns unit of volume: cubic-yard. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCubicYard(UErrorCode &status); + + /** + * Returns unit of volume: cup. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createCup(UErrorCode &status); + + /** + * Returns unit of volume: cup-metric. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 56 + */ + static MeasureUnit *createCupMetric(UErrorCode &status); + + /** + * Returns unit of volume: deciliter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createDeciliter(UErrorCode &status); + + /** + * Returns unit of volume: fluid-ounce. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createFluidOunce(UErrorCode &status); + + /** + * Returns unit of volume: gallon. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createGallon(UErrorCode &status); + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of volume: gallon-imperial. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 57 + */ + static MeasureUnit *createGallonImperial(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + + /** + * Returns unit of volume: hectoliter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createHectoliter(UErrorCode &status); + + /** + * Returns unit of volume: liter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 53 + */ + static MeasureUnit *createLiter(UErrorCode &status); + + /** + * Returns unit of volume: megaliter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMegaliter(UErrorCode &status); + + /** + * Returns unit of volume: milliliter. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createMilliliter(UErrorCode &status); + + /** + * Returns unit of volume: pint. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createPint(UErrorCode &status); + + /** + * Returns unit of volume: pint-metric. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 56 + */ + static MeasureUnit *createPintMetric(UErrorCode &status); + + /** + * Returns unit of volume: quart. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createQuart(UErrorCode &status); + + /** + * Returns unit of volume: tablespoon. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createTablespoon(UErrorCode &status); + + /** + * Returns unit of volume: teaspoon. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @stable ICU 54 + */ + static MeasureUnit *createTeaspoon(UErrorCode &status); + + +// End generated createXXX methods + + protected: + +#ifndef U_HIDE_INTERNAL_API + /** + * For ICU use only. + * @internal + */ + void initTime(const char *timeId); + + /** + * For ICU use only. + * @internal + */ + void initCurrency(const char *isoCurrency); + +#endif /* U_HIDE_INTERNAL_API */ + +private: + int32_t fTypeId; + int32_t fSubTypeId; + char fCurrency[4]; + + MeasureUnit(int32_t typeId, int32_t subTypeId) : fTypeId(typeId), fSubTypeId(subTypeId) { + fCurrency[0] = 0; + } + void setTo(int32_t typeId, int32_t subTypeId); + int32_t getOffset() const; + static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); +}; + +U_NAMESPACE_END + +#endif // !UNCONFIG_NO_FORMATTING +#endif // __MEASUREUNIT_H__ diff --git a/intl/icu/source/i18n/unicode/measure.h b/intl/icu/source/i18n/unicode/measure.h new file mode 100644 index 000000000..2bd78d29f --- /dev/null +++ b/intl/icu/source/i18n/unicode/measure.h @@ -0,0 +1,161 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2004-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: April 26, 2004 +* Since: ICU 3.0 +********************************************************************** +*/ +#ifndef __MEASURE_H__ +#define __MEASURE_H__ + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: MeasureUnit object. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/fmtable.h" + +U_NAMESPACE_BEGIN + +class MeasureUnit; + +/** + * An amount of a specified unit, consisting of a number and a Unit. + * For example, a length measure consists of a number and a length + * unit, such as feet or meters. + * + * <p>Measure objects are formatted by MeasureFormat. + * + * <p>Measure objects are immutable. + * + * @author Alan Liu + * @stable ICU 3.0 + */ +class U_I18N_API Measure: public UObject { + public: + /** + * Construct an object with the given numeric amount and the given + * unit. After this call, the caller must not delete the given + * unit object. + * @param number a numeric object; amount.isNumeric() must be TRUE + * @param adoptedUnit the unit object, which must not be NULL + * @param ec input-output error code. If the amount or the unit + * is invalid, then this will be set to a failing value. + * @stable ICU 3.0 + */ + Measure(const Formattable& number, MeasureUnit* adoptedUnit, + UErrorCode& ec); + + /** + * Copy constructor + * @stable ICU 3.0 + */ + Measure(const Measure& other); + + /** + * Assignment operator + * @stable ICU 3.0 + */ + Measure& operator=(const Measure& other); + + /** + * Return a polymorphic clone of this object. The result will + * have the same class as returned by getDynamicClassID(). + * @stable ICU 3.0 + */ + virtual UObject* clone() const; + + /** + * Destructor + * @stable ICU 3.0 + */ + virtual ~Measure(); + + /** + * Equality operator. Return true if this object is equal + * to the given object. + * @stable ICU 3.0 + */ + UBool operator==(const UObject& other) const; + + /** + * Return a reference to the numeric value of this object. The + * numeric value may be of any numeric type supported by + * Formattable. + * @stable ICU 3.0 + */ + inline const Formattable& getNumber() const; + + /** + * Return a reference to the unit of this object. + * @stable ICU 3.0 + */ + inline const MeasureUnit& getUnit() const; + + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 53 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 53 + */ + virtual UClassID getDynamicClassID(void) const; + + protected: + /** + * Default constructor. + * @stable ICU 3.0 + */ + Measure(); + + private: + /** + * The numeric value of this object, e.g. 2.54 or 100. + */ + Formattable number; + + /** + * The unit of this object, e.g., "millimeter" or "JPY". This is + * owned by this object. + */ + MeasureUnit* unit; +}; + +inline const Formattable& Measure::getNumber() const { + return number; +} + +inline const MeasureUnit& Measure::getUnit() const { + return *unit; +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_FORMATTING +#endif // __MEASURE_H__ diff --git a/intl/icu/source/i18n/unicode/msgfmt.h b/intl/icu/source/i18n/unicode/msgfmt.h new file mode 100644 index 000000000..1a9973872 --- /dev/null +++ b/intl/icu/source/i18n/unicode/msgfmt.h @@ -0,0 +1,1095 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +* Copyright (C) 2007-2013, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************** +* +* File MSGFMT.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 03/20/97 helena Finished first cut of implementation. +* 07/22/98 stephen Removed operator!= (defined in Format) +* 08/19/2002 srl Removing Javaisms +*******************************************************************************/ + +#ifndef MSGFMT_H +#define MSGFMT_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Formats messages in a language-neutral way. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/format.h" +#include "unicode/locid.h" +#include "unicode/messagepattern.h" +#include "unicode/parseerr.h" +#include "unicode/plurfmt.h" +#include "unicode/plurrule.h" + +U_CDECL_BEGIN +// Forward declaration. +struct UHashtable; +typedef struct UHashtable UHashtable; /**< @internal */ +U_CDECL_END + +U_NAMESPACE_BEGIN + +class AppendableWrapper; +class DateFormat; +class NumberFormat; + +/** + * <p>MessageFormat prepares strings for display to users, + * with optional arguments (variables/placeholders). + * The arguments can occur in any order, which is necessary for translation + * into languages with different grammars. + * + * <p>A MessageFormat is constructed from a <em>pattern</em> string + * with arguments in {curly braces} which will be replaced by formatted values. + * + * <p><code>MessageFormat</code> differs from the other <code>Format</code> + * classes in that you create a <code>MessageFormat</code> object with one + * of its constructors (not with a <code>createInstance</code> style factory + * method). Factory methods aren't necessary because <code>MessageFormat</code> + * itself doesn't implement locale-specific behavior. Any locale-specific + * behavior is defined by the pattern that you provide and the + * subformats used for inserted arguments. + * + * <p>Arguments can be named (using identifiers) or numbered (using small ASCII-digit integers). + * Some of the API methods work only with argument numbers and throw an exception + * if the pattern has named arguments (see {@link #usesNamedArguments()}). + * + * <p>An argument might not specify any format type. In this case, + * a Number value is formatted with a default (for the locale) NumberFormat, + * a Date value is formatted with a default (for the locale) DateFormat, + * and for any other value its toString() value is used. + * + * <p>An argument might specify a "simple" type for which the specified + * Format object is created, cached and used. + * + * <p>An argument might have a "complex" type with nested MessageFormat sub-patterns. + * During formatting, one of these sub-messages is selected according to the argument value + * and recursively formatted. + * + * <p>After construction, a custom Format object can be set for + * a top-level argument, overriding the default formatting and parsing behavior + * for that argument. + * However, custom formatting can be achieved more simply by writing + * a typeless argument in the pattern string + * and supplying it with a preformatted string value. + * + * <p>When formatting, MessageFormat takes a collection of argument values + * and writes an output string. + * The argument values may be passed as an array + * (when the pattern contains only numbered arguments) + * or as an array of names and and an array of arguments (which works for both named + * and numbered arguments). + * + * <p>Each argument is matched with one of the input values by array index or argument name + * and formatted according to its pattern specification + * (or using a custom Format object if one was set). + * A numbered pattern argument is matched with an argument name that contains that number + * as an ASCII-decimal-digit string (without leading zero). + * + * <h4><a name="patterns">Patterns and Their Interpretation</a></h4> + * + * <code>MessageFormat</code> uses patterns of the following form: + * <pre> + * message = messageText (argument messageText)* + * argument = noneArg | simpleArg | complexArg + * complexArg = choiceArg | pluralArg | selectArg | selectordinalArg + * + * noneArg = '{' argNameOrNumber '}' + * simpleArg = '{' argNameOrNumber ',' argType [',' argStyle] '}' + * choiceArg = '{' argNameOrNumber ',' "choice" ',' choiceStyle '}' + * pluralArg = '{' argNameOrNumber ',' "plural" ',' pluralStyle '}' + * selectArg = '{' argNameOrNumber ',' "select" ',' selectStyle '}' + * selectordinalArg = '{' argNameOrNumber ',' "selectordinal" ',' pluralStyle '}' + * + * choiceStyle: see {@link ChoiceFormat} + * pluralStyle: see {@link PluralFormat} + * selectStyle: see {@link SelectFormat} + * + * argNameOrNumber = argName | argNumber + * argName = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+ + * argNumber = '0' | ('1'..'9' ('0'..'9')*) + * + * argType = "number" | "date" | "time" | "spellout" | "ordinal" | "duration" + * argStyle = "short" | "medium" | "long" | "full" | "integer" | "currency" | "percent" | argStyleText + * </pre> + * + * <ul> + * <li>messageText can contain quoted literal strings including syntax characters. + * A quoted literal string begins with an ASCII apostrophe and a syntax character + * (usually a {curly brace}) and continues until the next single apostrophe. + * A double ASCII apostrohpe inside or outside of a quoted string represents + * one literal apostrophe. + * <li>Quotable syntax characters are the {curly braces} in all messageText parts, + * plus the '#' sign in a messageText immediately inside a pluralStyle, + * and the '|' symbol in a messageText immediately inside a choiceStyle. + * <li>See also {@link #UMessagePatternApostropheMode} + * <li>In argStyleText, every single ASCII apostrophe begins and ends quoted literal text, + * and unquoted {curly braces} must occur in matched pairs. + * </ul> + * + * <p>Recommendation: Use the real apostrophe (single quote) character + * \htmlonly’\endhtmlonly (U+2019) for + * human-readable text, and use the ASCII apostrophe ' (U+0027) + * only in program syntax, like quoting in MessageFormat. + * See the annotations for U+0027 Apostrophe in The Unicode Standard. + * + * <p>The <code>choice</code> argument type is deprecated. + * Use <code>plural</code> arguments for proper plural selection, + * and <code>select</code> arguments for simple selection among a fixed set of choices. + * + * <p>The <code>argType</code> and <code>argStyle</code> values are used to create + * a <code>Format</code> instance for the format element. The following + * table shows how the values map to Format instances. Combinations not + * shown in the table are illegal. Any <code>argStyleText</code> must + * be a valid pattern string for the Format subclass used. + * + * <p><table border=1> + * <tr> + * <th>argType + * <th>argStyle + * <th>resulting Format object + * <tr> + * <td colspan=2><i>(none)</i> + * <td><code>null</code> + * <tr> + * <td rowspan=5><code>number</code> + * <td><i>(none)</i> + * <td><code>NumberFormat.createInstance(getLocale(), status)</code> + * <tr> + * <td><code>integer</code> + * <td><code>NumberFormat.createInstance(getLocale(), kNumberStyle, status)</code> + * <tr> + * <td><code>currency</code> + * <td><code>NumberFormat.createCurrencyInstance(getLocale(), status)</code> + * <tr> + * <td><code>percent</code> + * <td><code>NumberFormat.createPercentInstance(getLocale(), status)</code> + * <tr> + * <td><i>argStyleText</i> + * <td><code>new DecimalFormat(argStyleText, new DecimalFormatSymbols(getLocale(), status), status)</code> + * <tr> + * <td rowspan=6><code>date</code> + * <td><i>(none)</i> + * <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code> + * <tr> + * <td><code>short</code> + * <td><code>DateFormat.createDateInstance(kShort, getLocale(), status)</code> + * <tr> + * <td><code>medium</code> + * <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code> + * <tr> + * <td><code>long</code> + * <td><code>DateFormat.createDateInstance(kLong, getLocale(), status)</code> + * <tr> + * <td><code>full</code> + * <td><code>DateFormat.createDateInstance(kFull, getLocale(), status)</code> + * <tr> + * <td><i>argStyleText</i> + * <td><code>new SimpleDateFormat(argStyleText, getLocale(), status) + * <tr> + * <td rowspan=6><code>time</code> + * <td><i>(none)</i> + * <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code> + * <tr> + * <td><code>short</code> + * <td><code>DateFormat.createTimeInstance(kShort, getLocale(), status)</code> + * <tr> + * <td><code>medium</code> + * <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code> + * <tr> + * <td><code>long</code> + * <td><code>DateFormat.createTimeInstance(kLong, getLocale(), status)</code> + * <tr> + * <td><code>full</code> + * <td><code>DateFormat.createTimeInstance(kFull, getLocale(), status)</code> + * <tr> + * <td><i>argStyleText</i> + * <td><code>new SimpleDateFormat(argStyleText, getLocale(), status) + * <tr> + * <td><code>spellout</code> + * <td><i>argStyleText (optional)</i> + * <td><code>new RuleBasedNumberFormat(URBNF_SPELLOUT, getLocale(), status) + * <br/> .setDefaultRuleset(argStyleText, status);</code> + * <tr> + * <td><code>ordinal</code> + * <td><i>argStyleText (optional)</i> + * <td><code>new RuleBasedNumberFormat(URBNF_ORDINAL, getLocale(), status) + * <br/> .setDefaultRuleset(argStyleText, status);</code> + * <tr> + * <td><code>duration</code> + * <td><i>argStyleText (optional)</i> + * <td><code>new RuleBasedNumberFormat(URBNF_DURATION, getLocale(), status) + * <br/> .setDefaultRuleset(argStyleText, status);</code> + * </table> + * <p> + * + * <h4>Usage Information</h4> + * + * <p>Here are some examples of usage: + * Example 1: + * + * <pre> + * \code + * UErrorCode success = U_ZERO_ERROR; + * GregorianCalendar cal(success); + * Formattable arguments[] = { + * 7L, + * Formattable( (Date) cal.getTime(success), Formattable::kIsDate), + * "a disturbance in the Force" + * }; + * + * UnicodeString result; + * MessageFormat::format( + * "At {1,time} on {1,date}, there was {2} on planet {0,number}.", + * arguments, 3, result, success ); + * + * cout << "result: " << result << endl; + * //<output>: At 4:34:20 PM on 23-Mar-98, there was a disturbance + * // in the Force on planet 7. + * \endcode + * </pre> + * + * Typically, the message format will come from resources, and the + * arguments will be dynamically set at runtime. + * + * <p>Example 2: + * + * <pre> + * \code + * success = U_ZERO_ERROR; + * Formattable testArgs[] = {3L, "MyDisk"}; + * + * MessageFormat form( + * "The disk \"{1}\" contains {0} file(s).", success ); + * + * UnicodeString string; + * FieldPosition fpos = 0; + * cout << "format: " << form.format(testArgs, 2, string, fpos, success ) << endl; + * + * // output, with different testArgs: + * // output: The disk "MyDisk" contains 0 file(s). + * // output: The disk "MyDisk" contains 1 file(s). + * // output: The disk "MyDisk" contains 1,273 file(s). + * \endcode + * </pre> + * + * + * <p>For messages that include plural forms, you can use a plural argument: + * <pre> + * \code + * success = U_ZERO_ERROR; + * MessageFormat msgFmt( + * "{num_files, plural, " + * "=0{There are no files on disk \"{disk_name}\".}" + * "=1{There is one file on disk \"{disk_name}\".}" + * "other{There are # files on disk \"{disk_name}\".}}", + * Locale("en"), + * success); + * FieldPosition fpos = 0; + * Formattable testArgs[] = {0L, "MyDisk"}; + * UnicodeString testArgsNames[] = {"num_files", "disk_name"}; + * UnicodeString result; + * cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success); + * testArgs[0] = 3L; + * cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success); + * \endcode + * <em>output</em>: + * There are no files on disk "MyDisk". + * There are 3 files on "MyDisk". + * </pre> + * See {@link PluralFormat} and {@link PluralRules} for details. + * + * <h4><a name="synchronization">Synchronization</a></h4> + * + * <p>MessageFormats are not synchronized. + * It is recommended to create separate format instances for each thread. + * If multiple threads access a format concurrently, it must be synchronized + * externally. + * + * @stable ICU 2.0 + */ +class U_I18N_API MessageFormat : public Format { +public: +#ifndef U_HIDE_OBSOLETE_API + /** + * Enum type for kMaxFormat. + * @obsolete ICU 3.0. The 10-argument limit was removed as of ICU 2.6, + * rendering this enum type obsolete. + */ + enum EFormatNumber { + /** + * The maximum number of arguments. + * @obsolete ICU 3.0. The 10-argument limit was removed as of ICU 2.6, + * rendering this constant obsolete. + */ + kMaxFormat = 10 + }; +#endif /* U_HIDE_OBSOLETE_API */ + + /** + * Constructs a new MessageFormat using the given pattern and the + * default locale. + * + * @param pattern Pattern used to construct object. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @stable ICU 2.0 + */ + MessageFormat(const UnicodeString& pattern, + UErrorCode &status); + + /** + * Constructs a new MessageFormat using the given pattern and locale. + * @param pattern Pattern used to construct object. + * @param newLocale The locale to use for formatting dates and numbers. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @stable ICU 2.0 + */ + MessageFormat(const UnicodeString& pattern, + const Locale& newLocale, + UErrorCode& status); + /** + * Constructs a new MessageFormat using the given pattern and locale. + * @param pattern Pattern used to construct object. + * @param newLocale The locale to use for formatting dates and numbers. + * @param parseError Struct to receive information on the position + * of an error within the pattern. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @stable ICU 2.0 + */ + MessageFormat(const UnicodeString& pattern, + const Locale& newLocale, + UParseError& parseError, + UErrorCode& status); + /** + * Constructs a new MessageFormat from an existing one. + * @stable ICU 2.0 + */ + MessageFormat(const MessageFormat&); + + /** + * Assignment operator. + * @stable ICU 2.0 + */ + const MessageFormat& operator=(const MessageFormat&); + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~MessageFormat(); + + /** + * Clones this Format object polymorphically. The caller owns the + * result and should delete it when done. + * @stable ICU 2.0 + */ + virtual Format* clone(void) const; + + /** + * Returns true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * @param other the object to be compared with. + * @return true if the given Format objects are semantically equal. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Format& other) const; + + /** + * Sets the locale to be used for creating argument Format objects. + * @param theLocale the new locale value to be set. + * @stable ICU 2.0 + */ + virtual void setLocale(const Locale& theLocale); + + /** + * Gets the locale used for creating argument Format objects. + * format information. + * @return the locale of the object. + * @stable ICU 2.0 + */ + virtual const Locale& getLocale(void) const; + + /** + * Applies the given pattern string to this message format. + * + * @param pattern The pattern to be applied. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @stable ICU 2.0 + */ + virtual void applyPattern(const UnicodeString& pattern, + UErrorCode& status); + /** + * Applies the given pattern string to this message format. + * + * @param pattern The pattern to be applied. + * @param parseError Struct to receive information on the position + * of an error within the pattern. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @stable ICU 2.0 + */ + virtual void applyPattern(const UnicodeString& pattern, + UParseError& parseError, + UErrorCode& status); + + /** + * Sets the UMessagePatternApostropheMode and the pattern used by this message format. + * Parses the pattern and caches Format objects for simple argument types. + * Patterns and their interpretation are specified in the + * <a href="#patterns">class description</a>. + * <p> + * This method is best used only once on a given object to avoid confusion about the mode, + * and after constructing the object with an empty pattern string to minimize overhead. + * + * @param pattern The pattern to be applied. + * @param aposMode The new apostrophe mode. + * @param parseError Struct to receive information on the position + * of an error within the pattern. + * Can be NULL. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @stable ICU 4.8 + */ + virtual void applyPattern(const UnicodeString& pattern, + UMessagePatternApostropheMode aposMode, + UParseError* parseError, + UErrorCode& status); + + /** + * @return this instance's UMessagePatternApostropheMode. + * @stable ICU 4.8 + */ + UMessagePatternApostropheMode getApostropheMode() const { + return msgPattern.getApostropheMode(); + } + + /** + * Returns a pattern that can be used to recreate this object. + * + * @param appendTo Output parameter to receive the pattern. + * Result is appended to existing contents. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& toPattern(UnicodeString& appendTo) const; + + /** + * Sets subformats. + * See the class description about format numbering. + * The caller should not delete the Format objects after this call. + * <EM>The array formatsToAdopt is not itself adopted.</EM> Its + * ownership is retained by the caller. If the call fails because + * memory cannot be allocated, then the formats will be deleted + * by this method, and this object will remain unchanged. + * + * <p>If this format uses named arguments, the new formats are discarded + * and this format remains unchanged. + * + * @stable ICU 2.0 + * @param formatsToAdopt the format to be adopted. + * @param count the size of the array. + */ + virtual void adoptFormats(Format** formatsToAdopt, int32_t count); + + /** + * Sets subformats. + * See the class description about format numbering. + * Each item in the array is cloned into the internal array. + * If the call fails because memory cannot be allocated, then this + * object will remain unchanged. + * + * <p>If this format uses named arguments, the new formats are discarded + * and this format remains unchanged. + * + * @stable ICU 2.0 + * @param newFormats the new format to be set. + * @param cnt the size of the array. + */ + virtual void setFormats(const Format** newFormats, int32_t cnt); + + + /** + * Sets one subformat. + * See the class description about format numbering. + * The caller should not delete the Format object after this call. + * If the number is over the number of formats already set, + * the item will be deleted and ignored. + * + * <p>If this format uses named arguments, the new format is discarded + * and this format remains unchanged. + * + * @stable ICU 2.0 + * @param formatNumber index of the subformat. + * @param formatToAdopt the format to be adopted. + */ + virtual void adoptFormat(int32_t formatNumber, Format* formatToAdopt); + + /** + * Sets one subformat. + * See the class description about format numbering. + * If the number is over the number of formats already set, + * the item will be ignored. + * @param formatNumber index of the subformat. + * @param format the format to be set. + * @stable ICU 2.0 + */ + virtual void setFormat(int32_t formatNumber, const Format& format); + + /** + * Gets format names. This function returns formatNames in StringEnumerations + * which can be used with getFormat() and setFormat() to export formattable + * array from current MessageFormat to another. It is the caller's responsibility + * to delete the returned formatNames. + * @param status output param set to success/failure code. + * @stable ICU 4.0 + */ + virtual StringEnumeration* getFormatNames(UErrorCode& status); + + /** + * Gets subformat pointer for given format name. + * This function supports both named and numbered + * arguments. If numbered, the formatName is the + * corresponding UnicodeStrings (e.g. "0", "1", "2"...). + * The returned Format object should not be deleted by the caller, + * nor should the ponter of other object . The pointer and its + * contents remain valid only until the next call to any method + * of this class is made with this object. + * @param formatName the name or number specifying a format + * @param status output param set to success/failure code. + * @stable ICU 4.0 + */ + virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status); + + /** + * Sets one subformat for given format name. + * See the class description about format name. + * This function supports both named and numbered + * arguments-- if numbered, the formatName is the + * corresponding UnicodeStrings (e.g. "0", "1", "2"...). + * If there is no matched formatName or wrong type, + * the item will be ignored. + * @param formatName Name of the subformat. + * @param format the format to be set. + * @param status output param set to success/failure code. + * @stable ICU 4.0 + */ + virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status); + + /** + * Sets one subformat for given format name. + * See the class description about format name. + * This function supports both named and numbered + * arguments-- if numbered, the formatName is the + * corresponding UnicodeStrings (e.g. "0", "1", "2"...). + * If there is no matched formatName or wrong type, + * the item will be ignored. + * The caller should not delete the Format object after this call. + * @param formatName Name of the subformat. + * @param formatToAdopt Format to be adopted. + * @param status output param set to success/failure code. + * @stable ICU 4.0 + */ + virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status); + + /** + * Gets an array of subformats of this object. The returned array + * should not be deleted by the caller, nor should the pointers + * within the array. The array and its contents remain valid only + * until the next call to this format. See the class description + * about format numbering. + * + * @param count output parameter to receive the size of the array + * @return an array of count Format* objects, or NULL if out of + * memory. Any or all of the array elements may be NULL. + * @stable ICU 2.0 + */ + virtual const Format** getFormats(int32_t& count) const; + + + using Format::format; + + /** + * Formats the given array of arguments into a user-readable string. + * Does not take ownership of the Formattable* array or its contents. + * + * <p>If this format uses named arguments, appendTo is unchanged and + * status is set to U_ILLEGAL_ARGUMENT_ERROR. + * + * @param source An array of objects to be formatted. + * @param count The number of elements of 'source'. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param ignore Not used; inherited from base class API. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + UnicodeString& format(const Formattable* source, + int32_t count, + UnicodeString& appendTo, + FieldPosition& ignore, + UErrorCode& status) const; + + /** + * Formats the given array of arguments into a user-readable string + * using the given pattern. + * + * <p>If this format uses named arguments, appendTo is unchanged and + * status is set to U_ILLEGAL_ARGUMENT_ERROR. + * + * @param pattern The pattern. + * @param arguments An array of objects to be formatted. + * @param count The number of elements of 'source'. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + static UnicodeString& format(const UnicodeString& pattern, + const Formattable* arguments, + int32_t count, + UnicodeString& appendTo, + UErrorCode& status); + + /** + * Formats the given array of arguments into a user-readable + * string. The array must be stored within a single Formattable + * object of type kArray. If the Formattable object type is not of + * type kArray, then returns a failing UErrorCode. + * + * <p>If this format uses named arguments, appendTo is unchanged and + * status is set to U_ILLEGAL_ARGUMENT_ERROR. + * + * @param obj A Formattable of type kArray containing + * arguments to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Formats the given array of arguments into a user-defined argument name + * array. This function supports both named and numbered + * arguments-- if numbered, the formatName is the + * corresponding UnicodeStrings (e.g. "0", "1", "2"...). + * + * @param argumentNames argument name array + * @param arguments An array of objects to be formatted. + * @param count The number of elements of 'argumentNames' and + * arguments. The number of argumentNames and arguments + * must be the same. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.0 + */ + UnicodeString& format(const UnicodeString* argumentNames, + const Formattable* arguments, + int32_t count, + UnicodeString& appendTo, + UErrorCode& status) const; + /** + * Parses the given string into an array of output arguments. + * + * @param source String to be parsed. + * @param pos On input, starting position for parse. On output, + * final position after parse. Unchanged if parse + * fails. + * @param count Output parameter to receive the number of arguments + * parsed. + * @return an array of parsed arguments. The caller owns both + * the array and its contents. + * @stable ICU 2.0 + */ + virtual Formattable* parse(const UnicodeString& source, + ParsePosition& pos, + int32_t& count) const; + + /** + * Parses the given string into an array of output arguments. + * + * <p>If this format uses named arguments, status is set to + * U_ARGUMENT_TYPE_MISMATCH. + * + * @param source String to be parsed. + * @param count Output param to receive size of returned array. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @return an array of parsed arguments. The caller owns both + * the array and its contents. Returns NULL if status is not U_ZERO_ERROR. + * + * @stable ICU 2.0 + */ + virtual Formattable* parse(const UnicodeString& source, + int32_t& count, + UErrorCode& status) const; + + /** + * Parses the given string into an array of output arguments + * stored within a single Formattable of type kArray. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param pos On input, starting position for parse. On output, + * final position after parse. Unchanged if parse + * fails. + * @stable ICU 2.0 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& pos) const; + + /** + * Convert an 'apostrophe-friendly' pattern into a standard + * pattern. Standard patterns treat all apostrophes as + * quotes, which is problematic in some languages, e.g. + * French, where apostrophe is commonly used. This utility + * assumes that only an unpaired apostrophe immediately before + * a brace is a true quote. Other unpaired apostrophes are paired, + * and the resulting standard pattern string is returned. + * + * <p><b>Note</b> it is not guaranteed that the returned pattern + * is indeed a valid pattern. The only effect is to convert + * between patterns having different quoting semantics. + * + * @param pattern the 'apostrophe-friendly' patttern to convert + * @param status Input/output error code. If the pattern + * cannot be parsed, the failure code is set. + * @return the standard equivalent of the original pattern + * @stable ICU 3.4 + */ + static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern, + UErrorCode& status); + + + /** + * Returns true if this MessageFormat uses named arguments, + * and false otherwise. See class description. + * + * @return true if named arguments are used. + * @stable ICU 4.0 + */ + UBool usesNamedArguments() const; + + +#ifndef U_HIDE_INTERNAL_API + /** + * This API is for ICU internal use only. + * Please do not use it. + * + * Returns argument types count in the parsed pattern. + * Used to distinguish pattern "{0} d" and "d". + * + * @return The number of formattable types in the pattern + * @internal + */ + int32_t getArgTypeCount() const; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. + * This method is to implement a simple version of RTTI, since not all + * C++ compilers support genuine RTTI. Polymorphic operator==() and + * clone() methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Return the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . Derived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + +#ifndef U_HIDE_INTERNAL_API + /** + * Compares two Format objects. This is used for constructing the hash + * tables. + * + * @param left pointer to a Format object. Must not be NULL. + * @param right pointer to a Format object. Must not be NULL. + * + * @return whether the two objects are the same + * @internal + */ + static UBool equalFormats(const void* left, const void* right); +#endif /* U_HIDE_INTERNAL_API */ + +private: + + Locale fLocale; + MessagePattern msgPattern; + Format** formatAliases; // see getFormats + int32_t formatAliasesCapacity; + + MessageFormat(); // default constructor not implemented + + /** + * This provider helps defer instantiation of a PluralRules object + * until we actually need to select a keyword. + * For example, if the number matches an explicit-value selector like "=1" + * we do not need any PluralRules. + */ + class U_I18N_API PluralSelectorProvider : public PluralFormat::PluralSelector { + public: + PluralSelectorProvider(const MessageFormat &mf, UPluralType type); + virtual ~PluralSelectorProvider(); + virtual UnicodeString select(void *ctx, double number, UErrorCode& ec) const; + + void reset(); + private: + const MessageFormat &msgFormat; + PluralRules* rules; + UPluralType type; + }; + + /** + * A MessageFormat formats an array of arguments. Each argument + * has an expected type, based on the pattern. For example, if + * the pattern contains the subformat "{3,number,integer}", then + * we expect argument 3 to have type Formattable::kLong. This + * array needs to grow dynamically if the MessageFormat is + * modified. + */ + Formattable::Type* argTypes; + int32_t argTypeCount; + int32_t argTypeCapacity; + + /** + * TRUE if there are different argTypes for the same argument. + * This only matters when the MessageFormat is used in the plain C (umsg_xxx) API + * where the pattern argTypes determine how the va_arg list is read. + */ + UBool hasArgTypeConflicts; + + // Variable-size array management + UBool allocateArgTypes(int32_t capacity, UErrorCode& status); + + /** + * Default Format objects used when no format is specified and a + * numeric or date argument is formatted. These are volatile + * cache objects maintained only for performance. They do not + * participate in operator=(), copy constructor(), nor + * operator==(). + */ + NumberFormat* defaultNumberFormat; + DateFormat* defaultDateFormat; + + UHashtable* cachedFormatters; + UHashtable* customFormatArgStarts; + + PluralSelectorProvider pluralProvider; + PluralSelectorProvider ordinalProvider; + + /** + * Method to retrieve default formats (or NULL on failure). + * These are semantically const, but may modify *this. + */ + const NumberFormat* getDefaultNumberFormat(UErrorCode&) const; + const DateFormat* getDefaultDateFormat(UErrorCode&) const; + + /** + * Finds the word s, in the keyword list and returns the located index. + * @param s the keyword to be searched for. + * @param list the list of keywords to be searched with. + * @return the index of the list which matches the keyword s. + */ + static int32_t findKeyword( const UnicodeString& s, + const UChar * const *list); + + /** + * Thin wrapper around the format(... AppendableWrapper ...) variant. + * Wraps the destination UnicodeString into an AppendableWrapper and + * supplies default values for some other parameters. + */ + UnicodeString& format(const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, + UnicodeString& appendTo, + FieldPosition* pos, + UErrorCode& status) const; + + /** + * Formats the arguments and writes the result into the + * AppendableWrapper, updates the field position. + * + * @param msgStart Index to msgPattern part to start formatting from. + * @param plNumber NULL except when formatting a plural argument sub-message + * where a '#' is replaced by the format string for this number. + * @param arguments The formattable objects array. (Must not be NULL.) + * @param argumentNames NULL if numbered values are used. Otherwise the same + * length as "arguments", and each entry is the name of the + * corresponding argument in "arguments". + * @param cnt The length of arguments (and of argumentNames if that is not NULL). + * @param appendTo Output parameter to receive the result. + * The result string is appended to existing contents. + * @param pos Field position status. + * @param success The error code status. + */ + void format(int32_t msgStart, + const void *plNumber, + const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, + AppendableWrapper& appendTo, + FieldPosition* pos, + UErrorCode& success) const; + + UnicodeString getArgName(int32_t partIndex); + + void setArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status); + + void setCustomArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status); + + int32_t nextTopLevelArgStart(int32_t partIndex) const; + + UBool argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber); + + void cacheExplicitFormats(UErrorCode& status); + + Format* createAppropriateFormat(UnicodeString& type, + UnicodeString& style, + Formattable::Type& formattableType, + UParseError& parseError, + UErrorCode& ec); + + const Formattable* getArgFromListByName(const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, UnicodeString& name) const; + + Formattable* parse(int32_t msgStart, + const UnicodeString& source, + ParsePosition& pos, + int32_t& count, + UErrorCode& ec) const; + + FieldPosition* updateMetaData(AppendableWrapper& dest, int32_t prevLength, + FieldPosition* fp, const Formattable* argId) const; + + /** + * Finds the "other" sub-message. + * @param partIndex the index of the first PluralFormat argument style part. + * @return the "other" sub-message start part index. + */ + int32_t findOtherSubMessage(int32_t partIndex) const; + + /** + * Returns the ARG_START index of the first occurrence of the plural number in a sub-message. + * Returns -1 if it is a REPLACE_NUMBER. + * Returns 0 if there is neither. + */ + int32_t findFirstPluralNumberArg(int32_t msgStart, const UnicodeString &argName) const; + + Format* getCachedFormatter(int32_t argumentNumber) const; + + UnicodeString getLiteralStringUntilNextArgument(int32_t from) const; + + void copyObjects(const MessageFormat& that, UErrorCode& ec); + + void formatComplexSubMessage(int32_t msgStart, + const void *plNumber, + const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, + AppendableWrapper& appendTo, + UErrorCode& success) const; + + /** + * Convenience method that ought to be in NumberFormat + */ + NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const; + + /** + * Returns array of argument types in the parsed pattern + * for use in C API. Only for the use of umsg_vformat(). Not + * for public consumption. + * @param listCount Output parameter to receive the size of array + * @return The array of formattable types in the pattern + */ + const Formattable::Type* getArgTypeList(int32_t& listCount) const { + listCount = argTypeCount; + return argTypes; + } + + /** + * Resets the internal MessagePattern, and other associated caches. + */ + void resetPattern(); + + /** + * A DummyFormatter that we use solely to store a NULL value. UHash does + * not support storing NULL values. + */ + class U_I18N_API DummyFormat : public Format { + public: + virtual UBool operator==(const Format&) const; + virtual Format* clone() const; + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + UErrorCode& status) const; + virtual UnicodeString& format(const Formattable&, + UnicodeString& appendTo, + FieldPosition&, + UErrorCode& status) const; + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + virtual void parseObject(const UnicodeString&, + Formattable&, + ParsePosition&) const; + }; + + friend class MessageFormatAdapter; // getFormatTypeList() access +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _MSGFMT +//eof diff --git a/intl/icu/source/i18n/unicode/numfmt.h b/intl/icu/source/i18n/unicode/numfmt.h new file mode 100644 index 000000000..d46e1fd68 --- /dev/null +++ b/intl/icu/source/i18n/unicode/numfmt.h @@ -0,0 +1,1180 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2016, International Business Machines Corporation and others. +* All Rights Reserved. +******************************************************************************** +* +* File NUMFMT.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 03/18/97 clhuang Updated per C++ implementation. +* 04/17/97 aliu Changed DigitCount to int per code review. +* 07/20/98 stephen JDK 1.2 sync up. Added scientific support. +* Changed naming conventions to match C++ guidelines +* Derecated Java style constants (eg, INTEGER_FIELD) +******************************************************************************** +*/ + +#ifndef NUMFMT_H +#define NUMFMT_H + + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Abstract base class for all number formats. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/unistr.h" +#include "unicode/format.h" +#include "unicode/unum.h" // UNumberFormatStyle +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/curramt.h" +#include "unicode/udisplaycontext.h" + +class NumberFormatTest; + +U_NAMESPACE_BEGIN + +class SharedNumberFormat; + +#if !UCONFIG_NO_SERVICE +class NumberFormatFactory; +class StringEnumeration; +#endif + +/** + * + * Abstract base class for all number formats. Provides interface for + * formatting and parsing a number. Also provides methods for + * determining which locales have number formats, and what their names + * are. + * \headerfile unicode/numfmt.h "unicode/numfmt.h" + * <P> + * NumberFormat helps you to format and parse numbers for any locale. + * Your code can be completely independent of the locale conventions + * for decimal points, thousands-separators, or even the particular + * decimal digits used, or whether the number format is even decimal. + * <P> + * To format a number for the current Locale, use one of the static + * factory methods: + * \code + * #include <iostream> + * #include "unicode/numfmt.h" + * #include "unicode/unistr.h" + * #include "unicode/ustream.h" + * using namespace std; + * + * int main() { + * double myNumber = 7.0; + * UnicodeString myString; + * UErrorCode success = U_ZERO_ERROR; + * NumberFormat* nf = NumberFormat::createInstance(success); + * nf->format(myNumber, myString); + * cout << " Example 1: " << myString << endl; + * } + * \endcode + * Note that there are additional factory methods within subclasses of + * NumberFormat. + * <P> + * If you are formatting multiple numbers, it is more efficient to get + * the format and use it multiple times so that the system doesn't + * have to fetch the information about the local language and country + * conventions multiple times. + * \code + * UnicodeString myString; + * UErrorCode success = U_ZERO_ERROR; + * NumberFormat *nf = NumberFormat::createInstance( success ); + * for (int32_t number: {123, 3333, -1234567}) { + * nf->format(number, myString); + * myString += "; "; + * } + * cout << " Example 2: " << myString << endl; + * \endcode + * To format a number for a different Locale, specify it in the + * call to \c createInstance(). + * \code + * nf = NumberFormat::createInstance(Locale::getFrench(), success); + * \endcode + * You can use a \c NumberFormat to parse also. + * \code + * UErrorCode success; + * Formattable result(-999); // initialized with error code + * nf->parse(myString, result, success); + * \endcode + * Use \c createInstance() to get the normal number format for a \c Locale. + * There are other static factory methods available. Use \c createCurrencyInstance() + * to get the currency number format for that country. Use \c createPercentInstance() + * to get a format for displaying percentages. With this format, a + * fraction from 0.53 is displayed as 53%. + * <P> + * The type of number formatting can be specified by passing a 'style' parameter to \c createInstance(). + * For example, use\n + * \c createInstance(locale, UNUM_DECIMAL, errorCode) to get the normal number format,\n + * \c createInstance(locale, UNUM_PERCENT, errorCode) to get a format for displaying percentage,\n + * \c createInstance(locale, UNUM_SCIENTIFIC, errorCode) to get a format for displaying scientific number,\n + * \c createInstance(locale, UNUM_CURRENCY, errorCode) to get the currency number format, + * in which the currency is represented by its symbol, for example, "$3.00".\n + * \c createInstance(locale, UNUM_CURRENCY_ISO, errorCode) to get the currency number format, + * in which the currency is represented by its ISO code, for example "USD3.00".\n + * \c createInstance(locale, UNUM_CURRENCY_PLURAL, errorCode) to get the currency number format, + * in which the currency is represented by its full name in plural format, + * for example, "3.00 US dollars" or "1.00 US dollar". + * <P> + * You can also control the display of numbers with such methods as + * \c getMinimumFractionDigits(). If you want even more control over the + * format or parsing, or want to give your users more control, you can + * try dynamic_casting the \c NumberFormat you get from the factory methods to a + * \c DecimalFormat. This will work for the vast majority of + * countries; just remember to test for NULL in case you + * encounter an unusual one. + * <P> + * You can also use forms of the parse and format methods with + * \c ParsePosition and \c FieldPosition to allow you to: + * <ul type=round> + * <li>(a) progressively parse through pieces of a string. + * <li>(b) align the decimal point and other areas. + * </ul> + * For example, you can align numbers in two ways. + * <P> + * If you are using a monospaced font with spacing for alignment, you + * can pass the \c FieldPosition in your format call, with field = + * \c UNUM_INTEGER_FIELD. On output, \c getEndIndex will be set to the offset + * between the last character of the integer and the decimal. Add + * (desiredSpaceCount - getEndIndex) spaces at the front of the + * string. + * <P> + * If you are using proportional fonts, instead of padding with + * spaces, measure the width of the string in pixels from the start to + * getEndIndex. Then move the pen by (desiredPixelWidth - + * widthToAlignmentPoint) before drawing the text. It also works + * where there is no decimal, but possibly additional characters at + * the end, e.g. with parentheses in negative numbers: "(12)" for -12. + * <p> + * <em>User subclasses are not supported.</em> While clients may write + * subclasses, such code will not necessarily work and will not be + * guaranteed to work stably from release to release. + * + * @stable ICU 2.0 + */ +class U_I18N_API NumberFormat : public Format { +public: + /** + * Alignment Field constants used to construct a FieldPosition object. + * Signifies that the position of the integer part or fraction part of + * a formatted number should be returned. + * + * Note: as of ICU 4.4, the values in this enum have been extended to + * support identification of all number format fields, not just those + * pertaining to alignment. + * + * These constants are provided for backwards compatibility only. + * Please use the C style constants defined in the header file unum.h. + * + * @see FieldPosition + * @stable ICU 2.0 + */ + enum EAlignmentFields { + /** @stable ICU 2.0 */ + kIntegerField = UNUM_INTEGER_FIELD, + /** @stable ICU 2.0 */ + kFractionField = UNUM_FRACTION_FIELD, + /** @stable ICU 2.0 */ + kDecimalSeparatorField = UNUM_DECIMAL_SEPARATOR_FIELD, + /** @stable ICU 2.0 */ + kExponentSymbolField = UNUM_EXPONENT_SYMBOL_FIELD, + /** @stable ICU 2.0 */ + kExponentSignField = UNUM_EXPONENT_SIGN_FIELD, + /** @stable ICU 2.0 */ + kExponentField = UNUM_EXPONENT_FIELD, + /** @stable ICU 2.0 */ + kGroupingSeparatorField = UNUM_GROUPING_SEPARATOR_FIELD, + /** @stable ICU 2.0 */ + kCurrencyField = UNUM_CURRENCY_FIELD, + /** @stable ICU 2.0 */ + kPercentField = UNUM_PERCENT_FIELD, + /** @stable ICU 2.0 */ + kPermillField = UNUM_PERMILL_FIELD, + /** @stable ICU 2.0 */ + kSignField = UNUM_SIGN_FIELD, + + /** + * These constants are provided for backwards compatibility only. + * Please use the constants defined in the header file unum.h. + */ + /** @stable ICU 2.0 */ + INTEGER_FIELD = UNUM_INTEGER_FIELD, + /** @stable ICU 2.0 */ + FRACTION_FIELD = UNUM_FRACTION_FIELD + }; + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~NumberFormat(); + + /** + * Return true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * @return true if the given Format objects are semantically equal. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Format& other) const; + + + using Format::format; + + /** + * Format an object to produce a string. This method handles + * Formattable objects with numeric types. If the Formattable + * object type is not a numeric type, then it returns a failing + * UErrorCode. + * + * @param obj The object to format. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Format an object to produce a string. This method handles + * Formattable objects with numeric types. If the Formattable + * object type is not a numeric type, then it returns a failing + * UErrorCode. + * + * @param obj The object to format. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. Can be + * NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Parse a string to produce an object. This methods handles + * parsing of numeric strings into Formattable objects with numeric + * types. + * <P> + * Before calling, set parse_pos.index to the offset you want to + * start parsing at in the source. After calling, parse_pos.index + * indicates the position after the successfully parsed text. If + * an error occurs, parse_pos.index is unchanged. + * <P> + * When parsing, leading whitespace is discarded (with successful + * parse), while trailing whitespace is left as is. + * <P> + * See Format::parseObject() for more. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Upon return + * this param is set to the position after the + * last character successfully parsed. If the + * source is not parsed successfully, this param + * will remain unchanged. + * @return A newly created Formattable* object, or NULL + * on failure. The caller owns this and should + * delete it when done. + * @stable ICU 2.0 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& parse_pos) const; + + /** + * Format a double number. These methods call the NumberFormat + * pure virtual format() methods with the default FieldPosition. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + UnicodeString& format( double number, + UnicodeString& appendTo) const; + + /** + * Format a long number. These methods call the NumberFormat + * pure virtual format() methods with the default FieldPosition. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + UnicodeString& format( int32_t number, + UnicodeString& appendTo) const; + + /** + * Format an int64 number. These methods call the NumberFormat + * pure virtual format() methods with the default FieldPosition. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.8 + */ + UnicodeString& format( int64_t number, + UnicodeString& appendTo) const; + + /** + * Format a double number. Concrete subclasses must implement + * these pure virtual methods. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos) const = 0; + /** + * Format a double number. By default, the parent function simply + * calls the base class and does not return an error status. + * Therefore, the status may be ignored in some subclasses. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status error status + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const; + /** + * Format a double number. Subclasses must implement + * this method. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + /** + * Format a long number. Concrete subclasses must implement + * these pure virtual methods. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos) const = 0; + + /** + * Format a long number. Concrete subclasses may override + * this function to provide status return. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status the output status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode &status) const; + + /** + * Format an int32 number. Subclasses must implement + * this method. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + /** + * Format an int64 number. (Not abstract to retain compatibility + * with earlier releases, however subclasses should override this + * method as it just delegates to format(int32_t number...); + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.8 + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Format an int64 number. (Not abstract to retain compatibility + * with earlier releases, however subclasses should override this + * method as it just delegates to format(int32_t number...); + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + /** + * Format an int64 number. Subclasses must implement + * this method. + * + * @param number The value to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. Subclasses must implement + * this method. The syntax of the unformatted number is a "numeric string" + * as defined in the Decimal Arithmetic Specification, available at + * http://speleotrove.com/decimal + * + * @param number The unformatted number, as a string, to be formatted. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * Can be NULL. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format(StringPiece number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; +public: + /** + * Format a decimal number. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. Subclasses of NumberFormat that want + * to specifically handle big decimal numbers must override this method. + * class DecimalFormat does so. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + /** + * Format a decimal number. + * The number is a DigitList wrapper onto a floating point decimal number. + * The default implementation in NumberFormat converts the decimal number + * to a double and formats that. Subclasses of NumberFormat that want + * to specifically handle big decimal numbers must override this method. + * class DecimalFormat does so. + * + * @param number The number, a DigitList format Decimal Floating Point. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @internal + */ + virtual UnicodeString& format(const DigitList &number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + +public: + + /** + * Return a long if possible (e.g. within range LONG_MAX, + * LONG_MAX], and with no decimals), otherwise a double. If + * IntegerOnly is set, will stop at a decimal point (or equivalent; + * e.g. for rational numbers "1 2/3", will stop after the 1). + * <P> + * If no object can be parsed, index is unchanged, and NULL is + * returned. + * <P> + * This is a pure virtual which concrete subclasses must implement. + * + * @param text The text to be parsed. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parsePosition The position to start parsing at on input. + * On output, moved to after the last successfully + * parse character. On parse failure, does not change. + * @stable ICU 2.0 + */ + virtual void parse(const UnicodeString& text, + Formattable& result, + ParsePosition& parsePosition) const = 0; + + /** + * Parse a string as a numeric value, and return a Formattable + * numeric object. This method parses integers only if IntegerOnly + * is set. + * + * @param text The text to be parsed. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param status Output parameter set to a failure error code + * when a failure occurs. + * @see NumberFormat::isParseIntegerOnly + * @stable ICU 2.0 + */ + virtual void parse(const UnicodeString& text, + Formattable& result, + UErrorCode& status) const; + + /** + * Parses text from the given string as a currency amount. Unlike + * the parse() method, this method will attempt to parse a generic + * currency name, searching for a match of this object's locale's + * currency display names, or for a 3-letter ISO currency code. + * This method will fail if this format is not a currency format, + * that is, if it does not contain the currency pattern symbol + * (U+00A4) in its prefix or suffix. + * + * @param text the string to parse + * @param pos input-output position; on input, the position within text + * to match; must have 0 <= pos.getIndex() < text.length(); + * on output, the position after the last matched character. + * If the parse fails, the position in unchanged upon output. + * @return if parse succeeds, a pointer to a newly-created CurrencyAmount + * object (owned by the caller) containing information about + * the parsed currency; if parse fails, this is NULL. + * @stable ICU 49 + */ + virtual CurrencyAmount* parseCurrency(const UnicodeString& text, + ParsePosition& pos) const; + + /** + * Return true if this format will parse numbers as integers + * only. For example in the English locale, with ParseIntegerOnly + * true, the string "1234." would be parsed as the integer value + * 1234 and parsing would stop at the "." character. Of course, + * the exact format accepted by the parse operation is locale + * dependant and determined by sub-classes of NumberFormat. + * @return true if this format will parse numbers as integers + * only. + * @stable ICU 2.0 + */ + UBool isParseIntegerOnly(void) const; + + /** + * Sets whether or not numbers should be parsed as integers only. + * @param value set True, this format will parse numbers as integers + * only. + * @see isParseIntegerOnly + * @stable ICU 2.0 + */ + virtual void setParseIntegerOnly(UBool value); + + /** + * Sets whether lenient parsing should be enabled (it is off by default). + * + * @param enable \c TRUE if lenient parsing should be used, + * \c FALSE otherwise. + * @stable ICU 4.8 + */ + virtual void setLenient(UBool enable); + + /** + * Returns whether lenient parsing is enabled (it is off by default). + * + * @return \c TRUE if lenient parsing is enabled, + * \c FALSE otherwise. + * @see #setLenient + * @stable ICU 4.8 + */ + virtual UBool isLenient(void) const; + + /** + * Create a default style NumberFormat for the current default locale. + * The default formatting style is locale dependent. + * @stable ICU 2.0 + */ + static NumberFormat* U_EXPORT2 createInstance(UErrorCode&); + + /** + * Create a default style NumberFormat for the specified locale. + * The default formatting style is locale dependent. + * @param inLocale the given locale. + * @stable ICU 2.0 + */ + static NumberFormat* U_EXPORT2 createInstance(const Locale& inLocale, + UErrorCode&); + + /** + * Create a specific style NumberFormat for the specified locale. + * @param desiredLocale the given locale. + * @param style the given style. + * @param errorCode Output param filled with success/failure status. + * @return A new NumberFormat instance. + * @stable ICU 4.8 + */ + static NumberFormat* U_EXPORT2 createInstance(const Locale& desiredLocale, + UNumberFormatStyle style, + UErrorCode& errorCode); + +#ifndef U_HIDE_INTERNAL_API + + /** + * ICU use only. + * Creates NumberFormat instance without using the cache. + * @internal + */ + static NumberFormat* internalCreateInstance( + const Locale& desiredLocale, + UNumberFormatStyle style, + UErrorCode& errorCode); + + /** + * ICU use only. + * Returns handle to the shared, cached NumberFormat instance for given + * locale. On success, caller must call removeRef() on returned value + * once it is done with the shared instance. + * @internal + */ + static const SharedNumberFormat* U_EXPORT2 createSharedInstance( + const Locale& inLocale, UNumberFormatStyle style, UErrorCode& status); + +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Returns a currency format for the current default locale. + * @stable ICU 2.0 + */ + static NumberFormat* U_EXPORT2 createCurrencyInstance(UErrorCode&); + + /** + * Returns a currency format for the specified locale. + * @param inLocale the given locale. + * @stable ICU 2.0 + */ + static NumberFormat* U_EXPORT2 createCurrencyInstance(const Locale& inLocale, + UErrorCode&); + + /** + * Returns a percentage format for the current default locale. + * @stable ICU 2.0 + */ + static NumberFormat* U_EXPORT2 createPercentInstance(UErrorCode&); + + /** + * Returns a percentage format for the specified locale. + * @param inLocale the given locale. + * @stable ICU 2.0 + */ + static NumberFormat* U_EXPORT2 createPercentInstance(const Locale& inLocale, + UErrorCode&); + + /** + * Returns a scientific format for the current default locale. + * @stable ICU 2.0 + */ + static NumberFormat* U_EXPORT2 createScientificInstance(UErrorCode&); + + /** + * Returns a scientific format for the specified locale. + * @param inLocale the given locale. + * @stable ICU 2.0 + */ + static NumberFormat* U_EXPORT2 createScientificInstance(const Locale& inLocale, + UErrorCode&); + + /** + * Get the set of Locales for which NumberFormats are installed. + * @param count Output param to receive the size of the locales + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + +#if !UCONFIG_NO_SERVICE + /** + * Register a new NumberFormatFactory. The factory will be adopted. + * Because ICU may choose to cache NumberFormat objects internally, + * this must be called at application startup, prior to any calls to + * NumberFormat::createInstance to avoid undefined behavior. + * @param toAdopt the NumberFormatFactory instance to be adopted + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this factory + * @stable ICU 2.6 + */ + static URegistryKey U_EXPORT2 registerFactory(NumberFormatFactory* toAdopt, UErrorCode& status); + + /** + * Unregister a previously-registered NumberFormatFactory using the key returned from the + * register call. Key becomes invalid after a successful call and should not be used again. + * The NumberFormatFactory corresponding to the key will be deleted. + * Because ICU may choose to cache NumberFormat objects internally, + * this should be called during application shutdown, after all calls to + * NumberFormat::createInstance to avoid undefined behavior. + * @param key the registry key returned by a previous call to registerFactory + * @param status the in/out status code, no special meanings are assigned + * @return TRUE if the factory for the key was successfully unregistered + * @stable ICU 2.6 + */ + static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); + + /** + * Return a StringEnumeration over the locales available at the time of the call, + * including registered locales. + * @return a StringEnumeration over the locales available at the time of the call + * @stable ICU 2.6 + */ + static StringEnumeration* U_EXPORT2 getAvailableLocales(void); +#endif /* UCONFIG_NO_SERVICE */ + + /** + * Returns true if grouping is used in this format. For example, + * in the English locale, with grouping on, the number 1234567 + * might be formatted as "1,234,567". The grouping separator as + * well as the size of each group is locale dependant and is + * determined by sub-classes of NumberFormat. + * @see setGroupingUsed + * @stable ICU 2.0 + */ + UBool isGroupingUsed(void) const; + + /** + * Set whether or not grouping will be used in this format. + * @param newValue True, grouping will be used in this format. + * @see getGroupingUsed + * @stable ICU 2.0 + */ + virtual void setGroupingUsed(UBool newValue); + + /** + * Returns the maximum number of digits allowed in the integer portion of a + * number. + * @return the maximum number of digits allowed in the integer portion of a + * number. + * @see setMaximumIntegerDigits + * @stable ICU 2.0 + */ + int32_t getMaximumIntegerDigits(void) const; + + /** + * Sets the maximum number of digits allowed in the integer portion of a + * number. maximumIntegerDigits must be >= minimumIntegerDigits. If the + * new value for maximumIntegerDigits is less than the current value + * of minimumIntegerDigits, then minimumIntegerDigits will also be set to + * the new value. + * + * @param newValue the new value for the maximum number of digits + * allowed in the integer portion of a number. + * @see getMaximumIntegerDigits + * @stable ICU 2.0 + */ + virtual void setMaximumIntegerDigits(int32_t newValue); + + /** + * Returns the minimum number of digits allowed in the integer portion of a + * number. + * @return the minimum number of digits allowed in the integer portion of a + * number. + * @see setMinimumIntegerDigits + * @stable ICU 2.0 + */ + int32_t getMinimumIntegerDigits(void) const; + + /** + * Sets the minimum number of digits allowed in the integer portion of a + * number. minimumIntegerDigits must be <= maximumIntegerDigits. If the + * new value for minimumIntegerDigits exceeds the current value + * of maximumIntegerDigits, then maximumIntegerDigits will also be set to + * the new value. + * @param newValue the new value to be set. + * @see getMinimumIntegerDigits + * @stable ICU 2.0 + */ + virtual void setMinimumIntegerDigits(int32_t newValue); + + /** + * Returns the maximum number of digits allowed in the fraction portion of a + * number. + * @return the maximum number of digits allowed in the fraction portion of a + * number. + * @see setMaximumFractionDigits + * @stable ICU 2.0 + */ + int32_t getMaximumFractionDigits(void) const; + + /** + * Sets the maximum number of digits allowed in the fraction portion of a + * number. maximumFractionDigits must be >= minimumFractionDigits. If the + * new value for maximumFractionDigits is less than the current value + * of minimumFractionDigits, then minimumFractionDigits will also be set to + * the new value. + * @param newValue the new value to be set. + * @see getMaximumFractionDigits + * @stable ICU 2.0 + */ + virtual void setMaximumFractionDigits(int32_t newValue); + + /** + * Returns the minimum number of digits allowed in the fraction portion of a + * number. + * @return the minimum number of digits allowed in the fraction portion of a + * number. + * @see setMinimumFractionDigits + * @stable ICU 2.0 + */ + int32_t getMinimumFractionDigits(void) const; + + /** + * Sets the minimum number of digits allowed in the fraction portion of a + * number. minimumFractionDigits must be <= maximumFractionDigits. If the + * new value for minimumFractionDigits exceeds the current value + * of maximumFractionDigits, then maximumIntegerDigits will also be set to + * the new value + * @param newValue the new value to be set. + * @see getMinimumFractionDigits + * @stable ICU 2.0 + */ + virtual void setMinimumFractionDigits(int32_t newValue); + + /** + * Sets the currency used to display currency + * amounts. This takes effect immediately, if this format is a + * currency format. If this format is not a currency format, then + * the currency is used if and when this object becomes a + * currency format. + * @param theCurrency a 3-letter ISO code indicating new currency + * to use. It need not be null-terminated. May be the empty + * string or NULL to indicate no currency. + * @param ec input-output error code + * @stable ICU 3.0 + */ + virtual void setCurrency(const UChar* theCurrency, UErrorCode& ec); + + /** + * Gets the currency used to display currency + * amounts. This may be an empty string for some subclasses. + * @return a 3-letter null-terminated ISO code indicating + * the currency in use, or a pointer to the empty string. + * @stable ICU 2.6 + */ + const UChar* getCurrency() const; + + /** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. + * @param value The UDisplayContext value to set. + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @stable ICU 53 + */ + virtual void setContext(UDisplayContext value, UErrorCode& status); + + /** + * Get the formatter's UDisplayContext value for the specified UDisplayContextType, + * such as UDISPCTX_TYPE_CAPITALIZATION. + * @param type The UDisplayContextType whose value to return + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @return The UDisplayContextValue for the specified type. + * @stable ICU 53 + */ + virtual UDisplayContext getContext(UDisplayContextType type, UErrorCode& status) const; + +public: + + /** + * Return the class ID for this class. This is useful for + * comparing to a return value from getDynamicClassID(). Note that, + * because NumberFormat is an abstract base class, no fully constructed object + * will have the class ID returned by NumberFormat::getStaticClassID(). + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. + * This method is to implement a simple version of RTTI, since not all + * C++ compilers support genuine RTTI. Polymorphic operator==() and + * clone() methods call this method. + * <P> + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + +protected: + + /** + * Default constructor for subclass use only. + * @stable ICU 2.0 + */ + NumberFormat(); + + /** + * Copy constructor. + * @stable ICU 2.0 + */ + NumberFormat(const NumberFormat&); + + /** + * Assignment operator. + * @stable ICU 2.0 + */ + NumberFormat& operator=(const NumberFormat&); + + /** + * Returns the currency in effect for this formatter. Subclasses + * should override this method as needed. Unlike getCurrency(), + * this method should never return "". + * @result output parameter for null-terminated result, which must + * have a capacity of at least 4 + * @internal + */ + virtual void getEffectiveCurrency(UChar* result, UErrorCode& ec) const; + +#ifndef U_HIDE_INTERNAL_API + /** + * Creates the specified number format style of the desired locale. + * If mustBeDecimalFormat is TRUE, then the returned pointer is + * either a DecimalFormat or it is NULL. + * @internal + */ + static NumberFormat* makeInstance(const Locale& desiredLocale, + UNumberFormatStyle style, + UBool mustBeDecimalFormat, + UErrorCode& errorCode); +#endif /* U_HIDE_INTERNAL_API */ + +private: + + static UBool isStyleSupported(UNumberFormatStyle style); + + /** + * Creates the specified decimal format style of the desired locale. + * @param desiredLocale the given locale. + * @param style the given style. + * @param errorCode Output param filled with success/failure status. + * @return A new NumberFormat instance. + */ + static NumberFormat* makeInstance(const Locale& desiredLocale, + UNumberFormatStyle style, + UErrorCode& errorCode); + + UBool fGroupingUsed; + int32_t fMaxIntegerDigits; + int32_t fMinIntegerDigits; + int32_t fMaxFractionDigits; + int32_t fMinFractionDigits; + + protected: + /** \internal */ + static const int32_t gDefaultMaxIntegerDigits; + /** \internal */ + static const int32_t gDefaultMinIntegerDigits; + + private: + UBool fParseIntegerOnly; + UBool fLenient; // TRUE => lenient parse is enabled + + // ISO currency code + UChar fCurrency[4]; + + UDisplayContext fCapitalizationContext; + + friend class ICUNumberFormatFactory; // access to makeInstance + friend class ICUNumberFormatService; + friend class ::NumberFormatTest; // access to isStyleSupported() +}; + +#if !UCONFIG_NO_SERVICE +/** + * A NumberFormatFactory is used to register new number formats. The factory + * should be able to create any of the predefined formats for each locale it + * supports. When registered, the locales it supports extend or override the + * locale already supported by ICU. + * + * @stable ICU 2.6 + */ +class U_I18N_API NumberFormatFactory : public UObject { +public: + + /** + * Destructor + * @stable ICU 3.0 + */ + virtual ~NumberFormatFactory(); + + /** + * Return true if this factory will be visible. Default is true. + * If not visible, the locales supported by this factory will not + * be listed by getAvailableLocales. + * @stable ICU 2.6 + */ + virtual UBool visible(void) const = 0; + + /** + * Return the locale names directly supported by this factory. The number of names + * is returned in count; + * @stable ICU 2.6 + */ + virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) const = 0; + + /** + * Return a number format of the appropriate type. If the locale + * is not supported, return null. If the locale is supported, but + * the type is not provided by this service, return null. Otherwise + * return an appropriate instance of NumberFormat. + * @stable ICU 2.6 + */ + virtual NumberFormat* createFormat(const Locale& loc, UNumberFormatStyle formatType) = 0; +}; + +/** + * A NumberFormatFactory that supports a single locale. It can be visible or invisible. + * @stable ICU 2.6 + */ +class U_I18N_API SimpleNumberFormatFactory : public NumberFormatFactory { +protected: + /** + * True if the locale supported by this factory is visible. + * @stable ICU 2.6 + */ + const UBool _visible; + + /** + * The locale supported by this factory, as a UnicodeString. + * @stable ICU 2.6 + */ + UnicodeString _id; + +public: + /** + * @stable ICU 2.6 + */ + SimpleNumberFormatFactory(const Locale& locale, UBool visible = TRUE); + + /** + * @stable ICU 3.0 + */ + virtual ~SimpleNumberFormatFactory(); + + /** + * @stable ICU 2.6 + */ + virtual UBool visible(void) const; + + /** + * @stable ICU 2.6 + */ + virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) const; +}; +#endif /* #if !UCONFIG_NO_SERVICE */ + +// ------------------------------------- + +inline UBool +NumberFormat::isParseIntegerOnly() const +{ + return fParseIntegerOnly; +} + +inline UBool +NumberFormat::isLenient() const +{ + return fLenient; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _NUMFMT +//eof diff --git a/intl/icu/source/i18n/unicode/numsys.h b/intl/icu/source/i18n/unicode/numsys.h new file mode 100644 index 000000000..4cc083f69 --- /dev/null +++ b/intl/icu/source/i18n/unicode/numsys.h @@ -0,0 +1,210 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2014, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* +* File NUMSYS.H +* +* Modification History:* +* Date Name Description +* +******************************************************************************** +*/ + +#ifndef NUMSYS +#define NUMSYS + +#include "unicode/utypes.h" + +/** + * \def NUMSYS_NAME_CAPACITY + * Size of a numbering system name. + * @internal + */ +#define NUMSYS_NAME_CAPACITY 8 + + +/** + * \file + * \brief C++ API: NumberingSystem object + */ + +#if !UCONFIG_NO_FORMATTING + + +#include "unicode/format.h" +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN + +/** + * Defines numbering systems. A numbering system describes the scheme by which + * numbers are to be presented to the end user. In its simplest form, a numbering + * system describes the set of digit characters that are to be used to display + * numbers, such as Western digits, Thai digits, Arabic-Indic digits, etc., in a + * positional numbering system with a specified radix (typically 10). + * More complicated numbering systems are algorithmic in nature, and require use + * of an RBNF formatter ( rule based number formatter ), in order to calculate + * the characters to be displayed for a given number. Examples of algorithmic + * numbering systems include Roman numerals, Chinese numerals, and Hebrew numerals. + * Formatting rules for many commonly used numbering systems are included in + * the ICU package, based on the numbering system rules defined in CLDR. + * Alternate numbering systems can be specified to a locale by using the + * numbers locale keyword. + */ + +class U_I18N_API NumberingSystem : public UObject { +public: + + /** + * Default Constructor. + * + * @stable ICU 4.2 + */ + NumberingSystem(); + + /** + * Copy constructor. + * @stable ICU 4.2 + */ + NumberingSystem(const NumberingSystem& other); + + /** + * Destructor. + * @stable ICU 4.2 + */ + virtual ~NumberingSystem(); + + /** + * Create the default numbering system associated with the specified locale. + * @param inLocale The given locale. + * @param status ICU status + * @stable ICU 4.2 + */ + static NumberingSystem* U_EXPORT2 createInstance(const Locale & inLocale, UErrorCode& status); + + /** + * Create the default numbering system associated with the default locale. + * @stable ICU 4.2 + */ + static NumberingSystem* U_EXPORT2 createInstance(UErrorCode& status); + + /** + * Create a numbering system using the specified radix, type, and description. + * @param radix The radix (base) for this numbering system. + * @param isAlgorithmic TRUE if the numbering system is algorithmic rather than numeric. + * @param description The string representing the set of digits used in a numeric system, or the name of the RBNF + * ruleset to be used in an algorithmic system. + * @param status ICU status + * @stable ICU 4.2 + */ + static NumberingSystem* U_EXPORT2 createInstance(int32_t radix, UBool isAlgorithmic, const UnicodeString& description, UErrorCode& status ); + + /** + * Return a StringEnumeration over all the names of numbering systems known to ICU. + * @stable ICU 4.2 + */ + + static StringEnumeration * U_EXPORT2 getAvailableNames(UErrorCode& status); + + /** + * Create a numbering system from one of the predefined numbering systems specified + * by CLDR and known to ICU, such as "latn", "arabext", or "hanidec"; the full list + * is returned by unumsys_openAvailableNames. Note that some of the names listed at + * http://unicode.org/repos/cldr/tags/latest/common/bcp47/number.xml - e.g. + * default, native, traditional, finance - do not identify specific numbering systems, + * but rather key values that may only be used as part of a locale, which in turn + * defines how they are mapped to a specific numbering system such as "latn" or "hant". + * @param name The name of the numbering system. + * @param status ICU status + * @stable ICU 4.2 + */ + static NumberingSystem* U_EXPORT2 createInstanceByName(const char* name, UErrorCode& status); + + + /** + * Returns the radix of this numbering system. Simple positional numbering systems + * typically have radix 10, but might have a radix of e.g. 16 for hexadecimal. The + * radix is less well-defined for non-positional algorithmic systems. + * @stable ICU 4.2 + */ + int32_t getRadix() const; + + /** + * Returns the name of this numbering system if it was created using one of the predefined names + * known to ICU. Otherwise, returns NULL. + * The predefined names are identical to the numbering system names as defined by + * the BCP47 definition in Unicode CLDR. + * See also, http://www.unicode.org/repos/cldr/tags/latest/common/bcp47/number.xml + * @stable ICU 4.6 + */ + const char * getName() const; + + /** + * Returns the description string of this numbering system. For simple + * positional systems this is the ordered string of digits (with length matching + * the radix), e.g. "\u3007\u4E00\u4E8C\u4E09\u56DB\u4E94\u516D\u4E03\u516B\u4E5D" + * for "hanidec"; it would be "0123456789ABCDEF" for hexadecimal. For + * algorithmic systems this is the name of the RBNF ruleset used for formatting, + * e.g. "zh/SpelloutRules/%spellout-cardinal" for "hans" or "%greek-upper" for + * "grek". + * @stable ICU 4.2 + */ + virtual UnicodeString getDescription() const; + + + + /** + * Returns TRUE if the given numbering system is algorithmic + * + * @return TRUE if the numbering system is algorithmic. + * Otherwise, return FALSE. + * @stable ICU 4.2 + */ + UBool isAlgorithmic() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 4.2 + * + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 4.2 + */ + virtual UClassID getDynamicClassID() const; + + +private: + UnicodeString desc; + int32_t radix; + UBool algorithmic; + char name[NUMSYS_NAME_CAPACITY+1]; + + void setRadix(int32_t radix); + + void setAlgorithmic(UBool algorithmic); + + void setDesc(UnicodeString desc); + + void setName(const char* name); + + static UBool isValidDigitString(const UnicodeString &str); + + UBool hasContiguousDecimalDigits() const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _NUMSYS +//eof diff --git a/intl/icu/source/i18n/unicode/plurfmt.h b/intl/icu/source/i18n/unicode/plurfmt.h new file mode 100644 index 000000000..369a8b7cc --- /dev/null +++ b/intl/icu/source/i18n/unicode/plurfmt.h @@ -0,0 +1,617 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2014, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* + +* File PLURFMT.H +******************************************************************************** +*/ + +#ifndef PLURFMT +#define PLURFMT + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: PluralFormat object + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messagepattern.h" +#include "unicode/numfmt.h" +#include "unicode/plurrule.h" + +U_NAMESPACE_BEGIN + +class Hashtable; +class NFRule; + +/** + * <p> + * <code>PluralFormat</code> supports the creation of internationalized + * messages with plural inflection. It is based on <i>plural + * selection</i>, i.e. the caller specifies messages for each + * plural case that can appear in the user's language and the + * <code>PluralFormat</code> selects the appropriate message based on + * the number. + * </p> + * <h4>The Problem of Plural Forms in Internationalized Messages</h4> + * <p> + * Different languages have different ways to inflect + * plurals. Creating internationalized messages that include plural + * forms is only feasible when the framework is able to handle plural + * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code> + * doesn't handle this well, because it attaches a number interval to + * each message and selects the message whose interval contains a + * given number. This can only handle a finite number of + * intervals. But in some languages, like Polish, one plural case + * applies to infinitely many intervals (e.g., the plural case applies to + * numbers ending with 2, 3, or 4 except those ending with 12, 13, or + * 14). Thus <code>ChoiceFormat</code> is not adequate. + * </p><p> + * <code>PluralFormat</code> deals with this by breaking the problem + * into two parts: + * <ul> + * <li>It uses <code>PluralRules</code> that can define more complex + * conditions for a plural case than just a single interval. These plural + * rules define both what plural cases exist in a language, and to + * which numbers these cases apply. + * <li>It provides predefined plural rules for many languages. Thus, the programmer + * need not worry about the plural cases of a language and + * does not have to define the plural cases; they can simply + * use the predefined keywords. The whole plural formatting of messages can + * be done using localized patterns from resource bundles. For predefined plural + * rules, see the CLDR <i>Language Plural Rules</i> page at + * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html + * </ul> + * </p> + * <h4>Usage of <code>PluralFormat</code></h4> + * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code> + * with a <code>plural</code> argument type, + * rather than using a stand-alone <code>PluralFormat</code>. + * </p><p> + * This discussion assumes that you use <code>PluralFormat</code> with + * a predefined set of plural rules. You can create one using one of + * the constructors that takes a <code>locale</code> object. To + * specify the message pattern, you can either pass it to the + * constructor or set it explicitly using the + * <code>applyPattern()</code> method. The <code>format()</code> + * method takes a number object and selects the message of the + * matching plural case. This message will be returned. + * </p> + * <h5>Patterns and Their Interpretation</h5> + * <p> + * The pattern text defines the message output for each plural case of the + * specified locale. Syntax: + * <pre> + * pluralStyle = [offsetValue] (selector '{' message '}')+ + * offsetValue = "offset:" number + * selector = explicitValue | keyword + * explicitValue = '=' number // adjacent, no white space in between + * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+ + * message: see {@link MessageFormat} + * </pre> + * Pattern_White_Space between syntax elements is ignored, except + * between the {curly braces} and their sub-message, + * and between the '=' and the number of an explicitValue. + * + * </p><p> + * There are 6 predefined casekeyword in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and + * 'other'. You always have to define a message text for the default plural case + * <code>other</code> which is contained in every rule set. + * If you do not specify a message text for a particular plural case, the + * message text of the plural case <code>other</code> gets assigned to this + * plural case. + * </p><p> + * When formatting, the input number is first matched against the explicitValue clauses. + * If there is no exact-number match, then a keyword is selected by calling + * the <code>PluralRules</code> with the input number <em>minus the offset</em>. + * (The offset defaults to 0 if it is omitted from the pattern string.) + * If there is no clause with that keyword, then the "other" clauses is returned. + * </p><p> + * An unquoted pound sign (<code>#</code>) in the selected sub-message + * itself (i.e., outside of arguments nested in the sub-message) + * is replaced by the input number minus the offset. + * The number-minus-offset value is formatted using a + * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you + * need special number formatting, you have to use a <code>MessageFormat</code> + * and explicitly specify a <code>NumberFormat</code> argument. + * <strong>Note:</strong> That argument is formatting without subtracting the offset! + * If you need a custom format and have a non-zero offset, then you need to pass the + * number-minus-offset value as a separate parameter. + * </p> + * For a usage example, see the {@link MessageFormat} class documentation. + * + * <h4>Defining Custom Plural Rules</h4> + * <p>If you need to use <code>PluralFormat</code> with custom rules, you can + * create a <code>PluralRules</code> object and pass it to + * <code>PluralFormat</code>'s constructor. If you also specify a locale in this + * constructor, this locale will be used to format the number in the message + * texts. + * </p><p> + * For more information about <code>PluralRules</code>, see + * {@link PluralRules}. + * </p> + * + * ported from Java + * @stable ICU 4.0 + */ + +class U_I18N_API PluralFormat : public Format { +public: + + /** + * Creates a new cardinal-number <code>PluralFormat</code> for the default locale. + * This locale will be used to get the set of plural rules and for standard + * number formatting. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + PluralFormat(UErrorCode& status); + + /** + * Creates a new cardinal-number <code>PluralFormat</code> for a given locale. + * @param locale the <code>PluralFormat</code> will be configured with + * rules for this locale. This locale will also be used for + * standard number formatting. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + PluralFormat(const Locale& locale, UErrorCode& status); + + /** + * Creates a new <code>PluralFormat</code> for a given set of rules. + * The standard number formatting will be done using the default locale. + * @param rules defines the behavior of the <code>PluralFormat</code> + * object. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + PluralFormat(const PluralRules& rules, UErrorCode& status); + + /** + * Creates a new <code>PluralFormat</code> for a given set of rules. + * The standard number formatting will be done using the given locale. + * @param locale the default number formatting will be done using this + * locale. + * @param rules defines the behavior of the <code>PluralFormat</code> + * object. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + * <p> + * <h4>Sample code</h4> + * \snippet samples/plurfmtsample/plurfmtsample.cpp PluralFormatExample1 + * \snippet samples/plurfmtsample/plurfmtsample.cpp PluralFormatExample + * <p> + */ + PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status); + + /** + * Creates a new <code>PluralFormat</code> for the plural type. + * The standard number formatting will be done using the given locale. + * @param locale the default number formatting will be done using this + * locale. + * @param type The plural type (e.g., cardinal or ordinal). + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 50 + */ + PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status); + + /** + * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string. + * The default locale will be used to get the set of plural rules and for + * standard number formatting. + * @param pattern the pattern for this <code>PluralFormat</code>. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + PluralFormat(const UnicodeString& pattern, UErrorCode& status); + + /** + * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string and + * locale. + * The locale will be used to get the set of plural rules and for + * standard number formatting. + * @param locale the <code>PluralFormat</code> will be configured with + * rules for this locale. This locale will also be used for + * standard number formatting. + * @param pattern the pattern for this <code>PluralFormat</code>. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status); + + /** + * Creates a new <code>PluralFormat</code> for a given set of rules, a + * pattern and a locale. + * @param rules defines the behavior of the <code>PluralFormat</code> + * object. + * @param pattern the pattern for this <code>PluralFormat</code>. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + PluralFormat(const PluralRules& rules, + const UnicodeString& pattern, + UErrorCode& status); + + /** + * Creates a new <code>PluralFormat</code> for a given set of rules, a + * pattern and a locale. + * @param locale the <code>PluralFormat</code> will be configured with + * rules for this locale. This locale will also be used for + * standard number formatting. + * @param rules defines the behavior of the <code>PluralFormat</code> + * object. + * @param pattern the pattern for this <code>PluralFormat</code>. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + PluralFormat(const Locale& locale, + const PluralRules& rules, + const UnicodeString& pattern, + UErrorCode& status); + + /** + * Creates a new <code>PluralFormat</code> for a plural type, a + * pattern and a locale. + * @param locale the <code>PluralFormat</code> will be configured with + * rules for this locale. This locale will also be used for + * standard number formatting. + * @param type The plural type (e.g., cardinal or ordinal). + * @param pattern the pattern for this <code>PluralFormat</code>. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 50 + */ + PluralFormat(const Locale& locale, + UPluralType type, + const UnicodeString& pattern, + UErrorCode& status); + + /** + * copy constructor. + * @stable ICU 4.0 + */ + PluralFormat(const PluralFormat& other); + + /** + * Destructor. + * @stable ICU 4.0 + */ + virtual ~PluralFormat(); + + /** + * Sets the pattern used by this plural format. + * The method parses the pattern and creates a map of format strings + * for the plural rules. + * Patterns and their interpretation are specified in the class description. + * + * @param pattern the pattern for this plural format + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + void applyPattern(const UnicodeString& pattern, UErrorCode& status); + + + using Format::format; + + /** + * Formats a plural message for a given number. + * + * @param number a number for which the plural message should be formatted + * for. If no pattern has been applied to this + * <code>PluralFormat</code> object yet, the formatted number + * will be returned. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return the string containing the formatted plural message. + * @stable ICU 4.0 + */ + UnicodeString format(int32_t number, UErrorCode& status) const; + + /** + * Formats a plural message for a given number. + * + * @param number a number for which the plural message should be formatted + * for. If no pattern has been applied to this + * PluralFormat object yet, the formatted number + * will be returned. + * @param status output param set to success or failure code on exit, which + * must not indicate a failure before the function call. + * @return the string containing the formatted plural message. + * @stable ICU 4.0 + */ + UnicodeString format(double number, UErrorCode& status) const; + + /** + * Formats a plural message for a given number. + * + * @param number a number for which the plural message should be formatted + * for. If no pattern has been applied to this + * <code>PluralFormat</code> object yet, the formatted number + * will be returned. + * @param appendTo output parameter to receive result. + * result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return the string containing the formatted plural message. + * @stable ICU 4.0 + */ + UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Formats a plural message for a given number. + * + * @param number a number for which the plural message should be formatted + * for. If no pattern has been applied to this + * PluralFormat object yet, the formatted number + * will be returned. + * @param appendTo output parameter to receive result. + * result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return the string containing the formatted plural message. + * @stable ICU 4.0 + */ + UnicodeString& format(double number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Sets the locale used by this <code>PluraFormat</code> object. + * Note: Calling this method resets this <code>PluraFormat</code> object, + * i.e., a pattern that was applied previously will be removed, + * and the NumberFormat is set to the default number format for + * the locale. The resulting format behaves the same as one + * constructed from {@link #PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status)} + * with UPLURAL_TYPE_CARDINAL. + * @param locale the <code>locale</code> to use to configure the formatter. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @deprecated ICU 50 This method clears the pattern and might create + * a different kind of PluralRules instance; + * use one of the constructors to create a new instance instead. + */ + void setLocale(const Locale& locale, UErrorCode& status); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Sets the number format used by this formatter. You only need to + * call this if you want a different number format than the default + * formatter for the locale. + * @param format the number format to use. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.0 + */ + void setNumberFormat(const NumberFormat* format, UErrorCode& status); + + /** + * Assignment operator + * + * @param other the PluralFormat object to copy from. + * @stable ICU 4.0 + */ + PluralFormat& operator=(const PluralFormat& other); + + /** + * Return true if another object is semantically equal to this one. + * + * @param other the PluralFormat object to be compared with. + * @return true if other is semantically equal to this. + * @stable ICU 4.0 + */ + virtual UBool operator==(const Format& other) const; + + /** + * Return true if another object is semantically unequal to this one. + * + * @param other the PluralFormat object to be compared with. + * @return true if other is semantically unequal to this. + * @stable ICU 4.0 + */ + virtual UBool operator!=(const Format& other) const; + + /** + * Clones this Format object polymorphically. The caller owns the + * result and should delete it when done. + * @stable ICU 4.0 + */ + virtual Format* clone(void) const; + + /** + * Formats a plural message for a number taken from a Formattable object. + * + * @param obj The object containing a number for which the + * plural message should be formatted. + * The object must be of a numeric type. + * @param appendTo output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.0 + */ + UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Returns the pattern from applyPattern() or constructor(). + * + * @param appendTo output parameter to receive result. + * Result is appended to existing contents. + * @return the UnicodeString with inserted pattern. + * @stable ICU 4.0 + */ + UnicodeString& toPattern(UnicodeString& appendTo); + + /** + * This method is not yet supported by <code>PluralFormat</code>. + * <P> + * Before calling, set parse_pos.index to the offset you want to start + * parsing at in the source. After calling, parse_pos.index is the end of + * the text you parsed. If error occurs, index is unchanged. + * <P> + * When parsing, leading whitespace is discarded (with a successful parse), + * while trailing whitespace is left as is. + * <P> + * See Format::parseObject() for more. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Upon return + * this param is set to the position after the + * last character successfully parsed. If the + * source is not parsed successfully, this param + * will remain unchanged. + * @stable ICU 4.0 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& parse_pos) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 4.0 + * + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 4.0 + */ + virtual UClassID getDynamicClassID() const; + +#if (defined(__xlC__) && (__xlC__ < 0x0C00)) || (U_PLATFORM == U_PF_OS390) || (U_PLATFORM ==U_PF_OS400) +// Work around a compiler bug on xlC 11.1 on AIX 7.1 that would +// prevent PluralSelectorAdapter from implementing private PluralSelector. +// xlC error message: +// 1540-0300 (S) The "private" member "class icu_49::PluralFormat::PluralSelector" cannot be accessed. +public: +#else +private: +#endif + /** + * @internal + */ + class U_I18N_API PluralSelector : public UMemory { + public: + virtual ~PluralSelector(); + /** + * Given a number, returns the appropriate PluralFormat keyword. + * + * @param context worker object for the selector. + * @param number The number to be plural-formatted. + * @param ec Error code. + * @return The selected PluralFormat keyword. + * @internal + */ + virtual UnicodeString select(void *context, double number, UErrorCode& ec) const = 0; + }; + + /** + * @internal + */ + class U_I18N_API PluralSelectorAdapter : public PluralSelector { + public: + PluralSelectorAdapter() : pluralRules(NULL) { + } + + virtual ~PluralSelectorAdapter(); + + virtual UnicodeString select(void *context, double number, UErrorCode& /*ec*/) const; /**< @internal */ + + void reset(); + + PluralRules* pluralRules; + }; + +#if defined(__xlC__) +// End of xlC bug workaround, keep remaining definitions private. +private: +#endif + Locale locale; + MessagePattern msgPattern; + NumberFormat* numberFormat; + double offset; + PluralSelectorAdapter pluralRulesWrapper; + + PluralFormat(); // default constructor not implemented + void init(const PluralRules* rules, UPluralType type, UErrorCode& status); + /** + * Copies dynamically allocated values (pointer fields). + * Others are copied using their copy constructors and assignment operators. + */ + void copyObjects(const PluralFormat& other); + + UnicodeString& format(const Formattable& numberObject, double number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; /**< @internal */ + + /** + * Finds the PluralFormat sub-message for the given number, or the "other" sub-message. + * @param pattern A MessagePattern. + * @param partIndex the index of the first PluralFormat argument style part. + * @param selector the PluralSelector for mapping the number (minus offset) to a keyword. + * @param context worker object for the selector. + * @param number a number to be matched to one of the PluralFormat argument's explicit values, + * or mapped via the PluralSelector. + * @param ec ICU error code. + * @return the sub-message start part index. + */ + static int32_t findSubMessage( + const MessagePattern& pattern, int32_t partIndex, + const PluralSelector& selector, void *context, double number, UErrorCode& ec); /**< @internal */ + + void parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, + Formattable& result, FieldPosition& pos) const; + + friend class MessageFormat; + friend class NFRule; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _PLURFMT +//eof diff --git a/intl/icu/source/i18n/unicode/plurrule.h b/intl/icu/source/i18n/unicode/plurrule.h new file mode 100644 index 000000000..146e6bea8 --- /dev/null +++ b/intl/icu/source/i18n/unicode/plurrule.h @@ -0,0 +1,512 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2008-2015, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* +* File PLURRULE.H +* +* Modification History:* +* Date Name Description +* +******************************************************************************** +*/ + +#ifndef PLURRULE +#define PLURRULE + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: PluralRules object + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/format.h" +#include "unicode/upluralrules.h" + +/** + * Value returned by PluralRules::getUniqueKeywordValue() when there is no + * unique value to return. + * @stable ICU 4.8 + */ +#define UPLRULES_NO_UNIQUE_VALUE ((double)-0.00123456777) + +U_NAMESPACE_BEGIN + +class Hashtable; +class FixedDecimal; +class VisibleDigitsWithExponent; +class RuleChain; +class PluralRuleParser; +class PluralKeywordEnumeration; +class AndConstraint; +class SharedPluralRules; + +/** + * Defines rules for mapping non-negative numeric values onto a small set of + * keywords. Rules are constructed from a text description, consisting + * of a series of keywords and conditions. The {@link #select} method + * examines each condition in order and returns the keyword for the + * first condition that matches the number. If none match, + * default rule(other) is returned. + * + * For more information, details, and tips for writing rules, see the + * LDML spec, C.11 Language Plural Rules: + * http://www.unicode.org/draft/reports/tr35/tr35.html#Language_Plural_Rules + * + * Examples:<pre> + * "one: n is 1; few: n in 2..4"</pre> + * This defines two rules, for 'one' and 'few'. The condition for + * 'one' is "n is 1" which means that the number must be equal to + * 1 for this condition to pass. The condition for 'few' is + * "n in 2..4" which means that the number must be between 2 and + * 4 inclusive for this condition to pass. All other numbers + * are assigned the keyword "other" by the default rule. + * </p><pre> + * "zero: n is 0; one: n is 1; zero: n mod 100 in 1..19"</pre> + * This illustrates that the same keyword can be defined multiple times. + * Each rule is examined in order, and the first keyword whose condition + * passes is the one returned. Also notes that a modulus is applied + * to n in the last rule. Thus its condition holds for 119, 219, 319... + * </p><pre> + * "one: n is 1; few: n mod 10 in 2..4 and n mod 100 not in 12..14"</pre> + * This illustrates conjunction and negation. The condition for 'few' + * has two parts, both of which must be met: "n mod 10 in 2..4" and + * "n mod 100 not in 12..14". The first part applies a modulus to n + * before the test as in the previous example. The second part applies + * a different modulus and also uses negation, thus it matches all + * numbers _not_ in 12, 13, 14, 112, 113, 114, 212, 213, 214... + * </p> + * <p> + * Syntax:<pre> + * \code + * rules = rule (';' rule)* + * rule = keyword ':' condition + * keyword = <identifier> + * condition = and_condition ('or' and_condition)* + * and_condition = relation ('and' relation)* + * relation = is_relation | in_relation | within_relation | 'n' <EOL> + * is_relation = expr 'is' ('not')? value + * in_relation = expr ('not')? 'in' range_list + * within_relation = expr ('not')? 'within' range + * expr = ('n' | 'i' | 'f' | 'v' | 'j') ('mod' value)? + * range_list = (range | value) (',' range_list)* + * value = digit+ ('.' digit+)? + * digit = 0|1|2|3|4|5|6|7|8|9 + * range = value'..'value + * \endcode + * </pre></p> + * <p> + * <p> + * The i, f, and v values are defined as follows: + * </p> + * <ul> + * <li>i to be the integer digits.</li> + * <li>f to be the visible fractional digits, as an integer.</li> + * <li>v to be the number of visible fraction digits.</li> + * <li>j is defined to only match integers. That is j is 3 fails if v != 0 (eg for 3.1 or 3.0).</li> + * </ul> + * <p> + * Examples are in the following table: + * </p> + * <table border='1' style="border-collapse:collapse"> + * <tbody> + * <tr> + * <th>n</th> + * <th>i</th> + * <th>f</th> + * <th>v</th> + * </tr> + * <tr> + * <td>1.0</td> + * <td>1</td> + * <td align="right">0</td> + * <td>1</td> + * </tr> + * <tr> + * <td>1.00</td> + * <td>1</td> + * <td align="right">0</td> + * <td>2</td> + * </tr> + * <tr> + * <td>1.3</td> + * <td>1</td> + * <td align="right">3</td> + * <td>1</td> + * </tr> + * <tr> + * <td>1.03</td> + * <td>1</td> + * <td align="right">3</td> + * <td>2</td> + * </tr> + * <tr> + * <td>1.23</td> + * <td>1</td> + * <td align="right">23</td> + * <td>2</td> + * </tr> + * </tbody> + * </table> + * <p> + * The difference between 'in' and 'within' is that 'in' only includes integers in the specified range, while 'within' + * includes all values. Using 'within' with a range_list consisting entirely of values is the same as using 'in' (it's + * not an error). + * </p> + + * An "identifier" is a sequence of characters that do not have the + * Unicode Pattern_Syntax or Pattern_White_Space properties. + * <p> + * The difference between 'in' and 'within' is that 'in' only includes + * integers in the specified range, while 'within' includes all values. + * Using 'within' with a range_list consisting entirely of values is the + * same as using 'in' (it's not an error). + *</p> + * <p> + * Keywords + * could be defined by users or from ICU locale data. There are 6 + * predefined values in ICU - 'zero', 'one', 'two', 'few', 'many' and + * 'other'. Callers need to check the value of keyword returned by + * {@link #select} method. + * </p> + * + * Examples:<pre> + * UnicodeString keyword = pl->select(number); + * if (keyword== UnicodeString("one") { + * ... + * } + * else if ( ... ) + * </pre> + * <strong>Note:</strong><br> + * <p> + * ICU defines plural rules for many locales based on CLDR <i>Language Plural Rules</i>. + * For these predefined rules, see CLDR page at + * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html + * </p> + */ +class U_I18N_API PluralRules : public UObject { +public: + + /** + * Constructor. + * @param status Output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * + * @stable ICU 4.0 + */ + PluralRules(UErrorCode& status); + + /** + * Copy constructor. + * @stable ICU 4.0 + */ + PluralRules(const PluralRules& other); + + /** + * Destructor. + * @stable ICU 4.0 + */ + virtual ~PluralRules(); + + /** + * Clone + * @stable ICU 4.0 + */ + PluralRules* clone() const; + + /** + * Assignment operator. + * @stable ICU 4.0 + */ + PluralRules& operator=(const PluralRules&); + + /** + * Creates a PluralRules from a description if it is parsable, otherwise + * returns NULL. + * + * @param description rule description + * @param status Output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return new PluralRules pointer. NULL if there is an error. + * @stable ICU 4.0 + */ + static PluralRules* U_EXPORT2 createRules(const UnicodeString& description, + UErrorCode& status); + + /** + * The default rules that accept any number. + * + * @param status Output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return new PluralRules pointer. NULL if there is an error. + * @stable ICU 4.0 + */ + static PluralRules* U_EXPORT2 createDefaultRules(UErrorCode& status); + + /** + * Provides access to the predefined cardinal-number <code>PluralRules</code> for a given + * locale. + * Same as forLocale(locale, UPLURAL_TYPE_CARDINAL, status). + * + * @param locale The locale for which a <code>PluralRules</code> object is + * returned. + * @param status Output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return The predefined <code>PluralRules</code> object pointer for + * this locale. If there's no predefined rules for this locale, + * the rules for the closest parent in the locale hierarchy + * that has one will be returned. The final fallback always + * returns the default 'other' rules. + * @stable ICU 4.0 + */ + static PluralRules* U_EXPORT2 forLocale(const Locale& locale, UErrorCode& status); + + /** + * Provides access to the predefined <code>PluralRules</code> for a given + * locale and the plural type. + * + * @param locale The locale for which a <code>PluralRules</code> object is + * returned. + * @param type The plural type (e.g., cardinal or ordinal). + * @param status Output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return The predefined <code>PluralRules</code> object pointer for + * this locale. If there's no predefined rules for this locale, + * the rules for the closest parent in the locale hierarchy + * that has one will be returned. The final fallback always + * returns the default 'other' rules. + * @stable ICU 50 + */ + static PluralRules* U_EXPORT2 forLocale(const Locale& locale, UPluralType type, UErrorCode& status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Return a StringEnumeration over the locales for which there is plurals data. + * @return a StringEnumeration over the locales available. + * @internal + */ + static StringEnumeration* U_EXPORT2 getAvailableLocales(UErrorCode &status); + + /** + * Returns whether or not there are overrides. + * @param locale the locale to check. + * @return + * @internal + */ + static UBool hasOverride(const Locale &locale); + + /** + * For ICU use only. + * creates a SharedPluralRules object + * @internal + */ + static PluralRules* U_EXPORT2 internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status); + + /** + * For ICU use only. + * Returns handle to the shared, cached PluralRules instance. + * Caller must call removeRef() on returned value once it is done with + * the shared instance. + * @internal + */ + static const SharedPluralRules* U_EXPORT2 createSharedInstance( + const Locale& locale, UPluralType type, UErrorCode& status); + + +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Given a number, returns the keyword of the first rule that applies to + * the number. This function can be used with isKeyword* functions to + * determine the keyword for default plural rules. + * + * @param number The number for which the rule has to be determined. + * @return The keyword of the selected rule. + * @stable ICU 4.0 + */ + UnicodeString select(int32_t number) const; + + /** + * Given a number, returns the keyword of the first rule that applies to + * the number. This function can be used with isKeyword* functions to + * determine the keyword for default plural rules. + * + * @param number The number for which the rule has to be determined. + * @return The keyword of the selected rule. + * @stable ICU 4.0 + */ + UnicodeString select(double number) const; + +#ifndef U_HIDE_INTERNAL_API + /** + * @internal + */ + UnicodeString select(const FixedDecimal &number) const; + /** + * @internal + */ + UnicodeString select(const VisibleDigitsWithExponent &number) const; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Returns a list of all rule keywords used in this <code>PluralRules</code> + * object. The rule 'other' is always present by default. + * + * @param status Output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return StringEnumeration with the keywords. + * The caller must delete the object. + * @stable ICU 4.0 + */ + StringEnumeration* getKeywords(UErrorCode& status) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Deprecated Function, does not return useful results. + * + * Originally intended to return a unique value for this keyword if it exists, + * else the constant UPLRULES_NO_UNIQUE_VALUE. + * + * @param keyword The keyword. + * @return Stub deprecated function returns UPLRULES_NO_UNIQUE_VALUE always. + * @deprecated ICU 55 + */ + double getUniqueKeywordValue(const UnicodeString& keyword); + + /** + * Deprecated Function, does not produce useful results. + * + * Orginally intended to return all the values for which select() would return the keyword. + * If the keyword is unknown, returns no values, but this is not an error. If + * the number of values is unlimited, returns no values and -1 as the + * count. + * + * The number of returned values is typically small. + * + * @param keyword The keyword. + * @param dest Array into which to put the returned values. May + * be NULL if destCapacity is 0. + * @param destCapacity The capacity of the array, must be at least 0. + * @param status The error code. Deprecated function, always sets U_UNSUPPORTED_ERROR. + * @return The count of values available, or -1. This count + * can be larger than destCapacity, but no more than + * destCapacity values will be written. + * @deprecated ICU 55 + */ + int32_t getAllKeywordValues(const UnicodeString &keyword, + double *dest, int32_t destCapacity, + UErrorCode& status); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Returns sample values for which select() would return the keyword. If + * the keyword is unknown, returns no values, but this is not an error. + * + * The number of returned values is typically small. + * + * @param keyword The keyword. + * @param dest Array into which to put the returned values. May + * be NULL if destCapacity is 0. + * @param destCapacity The capacity of the array, must be at least 0. + * @param status The error code. + * @return The count of values written. + * If more than destCapacity samples are available, then + * only destCapacity are written, and destCapacity is returned as the count, + * rather than setting a U_BUFFER_OVERFLOW_ERROR. + * (The actual number of keyword values could be unlimited.) + * @stable ICU 4.8 + */ + int32_t getSamples(const UnicodeString &keyword, + double *dest, int32_t destCapacity, + UErrorCode& status); + + /** + * Returns TRUE if the given keyword is defined in this + * <code>PluralRules</code> object. + * + * @param keyword the input keyword. + * @return TRUE if the input keyword is defined. + * Otherwise, return FALSE. + * @stable ICU 4.0 + */ + UBool isKeyword(const UnicodeString& keyword) const; + + + /** + * Returns keyword for default plural form. + * + * @return keyword for default plural form. + * @stable ICU 4.0 + */ + UnicodeString getKeywordOther() const; + +#ifndef U_HIDE_INTERNAL_API + /** + * + * @internal + */ + UnicodeString getRules() const; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Compares the equality of two PluralRules objects. + * + * @param other The other PluralRules object to be compared with. + * @return True if the given PluralRules is the same as this + * PluralRules; false otherwise. + * @stable ICU 4.0 + */ + virtual UBool operator==(const PluralRules& other) const; + + /** + * Compares the inequality of two PluralRules objects. + * + * @param other The PluralRules object to be compared with. + * @return True if the given PluralRules is not the same as this + * PluralRules; false otherwise. + * @stable ICU 4.0 + */ + UBool operator!=(const PluralRules& other) const {return !operator==(other);} + + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 4.0 + * + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 4.0 + */ + virtual UClassID getDynamicClassID() const; + + +private: + RuleChain *mRules; + + PluralRules(); // default constructor not implemented + void parseDescription(const UnicodeString& ruleData, UErrorCode &status); + int32_t getNumberValue(const UnicodeString& token) const; + UnicodeString getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& status); + RuleChain *rulesForKeyword(const UnicodeString &keyword) const; + + friend class PluralRuleParser; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _PLURRULE +//eof diff --git a/intl/icu/source/i18n/unicode/rbnf.h b/intl/icu/source/i18n/unicode/rbnf.h new file mode 100644 index 000000000..d41ffbe38 --- /dev/null +++ b/intl/icu/source/i18n/unicode/rbnf.h @@ -0,0 +1,1078 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1997-2015, International Business Machines Corporation and others. +* All Rights Reserved. +******************************************************************************* +*/ + +#ifndef RBNF_H +#define RBNF_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Rule Based Number Format + */ + +/** + * \def U_HAVE_RBNF + * This will be 0 if RBNF support is not included in ICU + * and 1 if it is. + * + * @stable ICU 2.4 + */ +#if UCONFIG_NO_FORMATTING +#define U_HAVE_RBNF 0 +#else +#define U_HAVE_RBNF 1 + +#include "unicode/dcfmtsym.h" +#include "unicode/fmtable.h" +#include "unicode/locid.h" +#include "unicode/numfmt.h" +#include "unicode/unistr.h" +#include "unicode/strenum.h" +#include "unicode/brkiter.h" +#include "unicode/upluralrules.h" + +U_NAMESPACE_BEGIN + +class NFRule; +class NFRuleSet; +class LocalizationInfo; +class PluralFormat; +class RuleBasedCollator; + +/** + * Tags for the predefined rulesets. + * + * @stable ICU 2.2 + */ +enum URBNFRuleSetTag { + URBNF_SPELLOUT, + URBNF_ORDINAL, + URBNF_DURATION, + URBNF_NUMBERING_SYSTEM, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal URBNFRuleSetTag value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + URBNF_COUNT +#endif // U_HIDE_DEPRECATED_API +}; + +/** + * The RuleBasedNumberFormat class formats numbers according to a set of rules. This number formatter is + * typically used for spelling out numeric values in words (e.g., 25,3476 as + * "twenty-five thousand three hundred seventy-six" or "vingt-cinq mille trois + * cents soixante-seize" or + * "fünfundzwanzigtausenddreihundertsechsundsiebzig"), but can also be used for + * other complicated formatting tasks, such as formatting a number of seconds as hours, + * minutes and seconds (e.g., 3,730 as "1:02:10"). + * + * <p>The resources contain three predefined formatters for each locale: spellout, which + * spells out a value in words (123 is "one hundred twenty-three"); ordinal, which + * appends an ordinal suffix to the end of a numeral (123 is "123rd"); and + * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is + * "2:03"). The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s + * by supplying programmer-defined rule sets.</p> + * + * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description + * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource + * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em> + * Each rule has a string of output text and a value or range of values it is applicable to. + * In a typical spellout rule set, the first twenty rules are the words for the numbers from + * 0 to 19:</p> + * + * <pre>zero; one; two; three; four; five; six; seven; eight; nine; + * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre> + * + * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and + * we only have to supply the words for the multiples of 10:</p> + * + * <pre> 20: twenty[->>]; + * 30: thirty[->>]; + * 40: forty[->>]; + * 50: fifty[->>]; + * 60: sixty[->>]; + * 70: seventy[->>]; + * 80: eighty[->>]; + * 90: ninety[->>];</pre> + * + * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the + * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable + * to all numbers from its own base value to one less than the next rule's base value. The + * ">>" token is called a <em>substitution</em> and tells the fomatter to + * isolate the number's ones digit, format it using this same set of rules, and place the + * result at the position of the ">>" token. Text in brackets is omitted if + * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24 + * is "twenty-four," not "twenty four").</p> + * + * <p>For even larger numbers, we can actually look up several parts of the number in the + * list:</p> + * + * <pre>100: << hundred[ >>];</pre> + * + * <p>The "<<" represents a new kind of substitution. The << isolates + * the hundreds digit (and any digits to its left), formats it using this same rule set, and + * places the result where the "<<" was. Notice also that the meaning of + * >> has changed: it now refers to both the tens and the ones digits. The meaning of + * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em> + * which is the highest power of 10 that is less than or equal to the base value (the user + * can change this). To fill in the substitutions, the formatter divides the number being + * formatted by the divisor. The integral quotient is used to fill in the << + * substitution, and the remainder is used to fill in the >> substitution. The meaning + * of the brackets changes similarly: text in brackets is omitted if the value being + * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so + * if a substitution is filled in with text that includes another substitution, that + * substitution is also filled in.</p> + * + * <p>This rule covers values up to 999, at which point we add another rule:</p> + * + * <pre>1000: << thousand[ >>];</pre> + * + * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's + * base value is a higher power of 10, changing the rule's divisor. This rule can actually be + * used all the way up to 999,999. This allows us to finish out the rules as follows:</p> + * + * <pre> 1,000,000: << million[ >>]; + * 1,000,000,000: << billion[ >>]; + * 1,000,000,000,000: << trillion[ >>]; + * 1,000,000,000,000,000: OUT OF RANGE!;</pre> + * + * <p>Commas, periods, and spaces can be used in the base values to improve legibility and + * are ignored by the rule parser. The last rule in the list is customarily treated as an + * "overflow rule," applying to everything from its base value on up, and often (as + * in this example) being used to print out an error message or default representation. + * Notice also that the size of the major groupings in large numbers is controlled by the + * spacing of the rules: because in English we group numbers by thousand, the higher rules + * are separated from each other by a factor of 1,000.</p> + * + * <p>To see how these rules actually work in practice, consider the following example: + * Formatting 25,430 with this rule set would work like this:</p> + * + * <table border="0" width="100%"> + * <tr> + * <td><strong><< thousand >></strong></td> + * <td>[the rule whose base value is 1,000 is applicable to 25,340]</td> + * </tr> + * <tr> + * <td><strong>twenty->></strong> thousand >></td> + * <td>[25,340 over 1,000 is 25. The rule for 20 applies.]</td> + * </tr> + * <tr> + * <td>twenty-<strong>five</strong> thousand >></td> + * <td>[25 mod 10 is 5. The rule for 5 is "five."</td> + * </tr> + * <tr> + * <td>twenty-five thousand <strong><< hundred >></strong></td> + * <td>[25,340 mod 1,000 is 340. The rule for 100 applies.]</td> + * </tr> + * <tr> + * <td>twenty-five thousand <strong>three</strong> hundred >></td> + * <td>[340 over 100 is 3. The rule for 3 is "three."]</td> + * </tr> + * <tr> + * <td>twenty-five thousand three hundred <strong>forty</strong></td> + * <td>[340 mod 100 is 40. The rule for 40 applies. Since 40 divides + * evenly by 10, the hyphen and substitution in the brackets are omitted.]</td> + * </tr> + * </table> + * + * <p>The above syntax suffices only to format positive integers. To format negative numbers, + * we add a special rule:</p> + * + * <pre>-x: minus >>;</pre> + * + * <p>This is called a <em>negative-number rule,</em> and is identified by "-x" + * where the base value would be. This rule is used to format all negative numbers. the + * >> token here means "find the number's absolute value, format it with these + * rules, and put the result here."</p> + * + * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional + * parts:</p> + * + * <pre>x.x: << point >>;</pre> + * + * <p>This rule is used for all positive non-integers (negative non-integers pass through the + * negative-number rule first and then through this rule). Here, the << token refers to + * the number's integral part, and the >> to the number's fractional part. The + * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be + * formatted as "one hundred twenty-three point four five six").</p> + * + * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p> + * + * <p>There is actually much more flexibility built into the rule language than the + * description above shows. A formatter may own multiple rule sets, which can be selected by + * the caller, and which can use each other to fill in their substitutions. Substitutions can + * also be filled in with digits, using a DecimalFormat object. There is syntax that can be + * used to alter a rule's divisor in various ways. And there is provision for much more + * flexible fraction handling. A complete description of the rule syntax follows:</p> + * + * <hr> + * + * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule + * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule + * set name must begin with a % sign. Rule sets with names that begin with a single % sign + * are <em>public:</em> the caller can specify that they be used to format and parse numbers. + * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use + * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p> + * + * <p>The user can also specify a special "rule set" named <tt>%%lenient-parse</tt>. + * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt> + * description which is used to define equivalences for lenient parsing. For more information + * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing, + * see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning + * in collation rules, such as '&', have no particular meaning when appearing outside + * of the <tt>lenient-parse</tt> rule set.</p> + * + * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em> + * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em> + * These parameters are controlled by the description syntax, which consists of a <em>rule + * descriptor,</em> a colon, and a <em>rule body.</em></p> + * + * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the + * name of a token):</p> + * + * <table border="0" width="100%"> + * <tr> + * <td><em>bv</em>:</td> + * <td><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal + * number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas, + * which are ignored. The rule's divisor is the highest power of 10 less than or equal to + * the base value.</td> + * </tr> + * <tr> + * <td><em>bv</em>/<em>rad</em>:</td> + * <td><em>bv</em> specifies the rule's base value. The rule's divisor is the + * highest power of <em>rad</em> less than or equal to the base value.</td> + * </tr> + * <tr> + * <td><em>bv</em>>:</td> + * <td><em>bv</em> specifies the rule's base value. To calculate the divisor, + * let the radix be 10, and the exponent be the highest exponent of the radix that yields a + * result less than or equal to the base value. Every > character after the base value + * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix + * raised to the power of the exponent; otherwise, the divisor is 1.</td> + * </tr> + * <tr> + * <td><em>bv</em>/<em>rad</em>>:</td> + * <td><em>bv</em> specifies the rule's base value. To calculate the divisor, + * let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that + * yields a result less than or equal to the base value. Every > character after the radix + * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix + * raised to the power of the exponent; otherwise, the divisor is 1.</td> + * </tr> + * <tr> + * <td>-x:</td> + * <td>The rule is a negative-number rule.</td> + * </tr> + * <tr> + * <td>x.x:</td> + * <td>The rule is an <em>improper fraction rule</em>. If the full stop in + * the middle of the rule name is replaced with the decimal point + * that is used in the language or DecimalFormatSymbols, then that rule will + * have precedence when formatting and parsing this rule. For example, some + * languages use the comma, and can thus be written as x,x instead. For example, + * you can use "x.x: << point >>;x,x: << comma >>;" to + * handle the decimal point that matches the language's natural spelling of + * the punctuation of either the full stop or comma.</td> + * </tr> + * <tr> + * <td>0.x:</td> + * <td>The rule is a <em>proper fraction rule</em>. If the full stop in + * the middle of the rule name is replaced with the decimal point + * that is used in the language or DecimalFormatSymbols, then that rule will + * have precedence when formatting and parsing this rule. For example, some + * languages use the comma, and can thus be written as 0,x instead. For example, + * you can use "0.x: point >>;0,x: comma >>;" to + * handle the decimal point that matches the language's natural spelling of + * the punctuation of either the full stop or comma.</td> + * </tr> + * <tr> + * <td>x.0:</td> + * <td>The rule is a <em>master rule</em>. If the full stop in + * the middle of the rule name is replaced with the decimal point + * that is used in the language or DecimalFormatSymbols, then that rule will + * have precedence when formatting and parsing this rule. For example, some + * languages use the comma, and can thus be written as x,0 instead. For example, + * you can use "x.0: << point;x,0: << comma;" to + * handle the decimal point that matches the language's natural spelling of + * the punctuation of either the full stop or comma.</td> + * </tr> + * <tr> + * <td>Inf:</td> + * <td>The rule for infinity.</td> + * </tr> + * <tr> + * <td>NaN:</td> + * <td>The rule for an IEEE 754 NaN (not a number).</td> + * </tr> + * <tr> + * <tr> + * <td><em>nothing</em></td> + * <td>If the rule's rule descriptor is left out, the base value is one plus the + * preceding rule's base value (or zero if this is the first rule in the list) in a normal + * rule set. In a fraction rule set, the base value is the same as the preceding rule's + * base value.</td> + * </tr> + * </table> + * + * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending + * on whether it is used to format a number's integral part (or the whole number) or a + * number's fractional part. Using a rule set to format a rule's fractional part makes it a + * fraction rule set.</p> + * + * <p>Which rule is used to format a number is defined according to one of the following + * algorithms: If the rule set is a regular rule set, do the following: + * + * <ul> + * <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>), + * use the master rule. (If the number being formatted was passed in as a <tt>long</tt>, + * the master rule is ignored.)</li> + * <li>If the number is negative, use the negative-number rule.</li> + * <li>If the number has a fractional part and is greater than 1, use the improper fraction + * rule.</li> + * <li>If the number has a fractional part and is between 0 and 1, use the proper fraction + * rule.</li> + * <li>Binary-search the rule list for the rule with the highest base value less than or equal + * to the number. If that rule has two substitutions, its base value is not an even multiple + * of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the + * rule that precedes it in the rule list. Otherwise, use the rule itself.</li> + * </ul> + * + * <p>If the rule set is a fraction rule set, do the following: + * + * <ul> + * <li>Ignore negative-number and fraction rules.</li> + * <li>For each rule in the list, multiply the number being formatted (which will always be + * between 0 and 1) by the rule's base value. Keep track of the distance between the result + * the nearest integer.</li> + * <li>Use the rule that produced the result closest to zero in the above calculation. In the + * event of a tie or a direct hit, use the first matching rule encountered. (The idea here is + * to try each rule's base value as a possible denominator of a fraction. Whichever + * denominator produces the fraction closest in value to the number being formatted wins.) If + * the rule following the matching rule has the same base value, use it if the numerator of + * the fraction is anything other than 1; if the numerator is 1, use the original matching + * rule. (This is to allow singular and plural forms of the rule text without a lot of extra + * hassle.)</li> + * </ul> + * + * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule + * may include zero, one, or two <em>substitution tokens,</em> and a range of text in + * brackets. The brackets denote optional text (and may also include one or both + * substitutions). The exact meanings of the substitution tokens, and under what conditions + * optional text is omitted, depend on the syntax of the substitution token and the context. + * The rest of the text in a rule body is literal text that is output when the rule matches + * the number being formatted.</p> + * + * <p>A substitution token begins and ends with a <em>token character.</em> The token + * character and the context together specify a mathematical operation to be performed on the + * number being formatted. An optional <em>substitution descriptor </em>specifies how the + * value resulting from that operation is used to fill in the substitution. The position of + * the substitution token in the rule body specifies the location of the resultant text in + * the original rule text.</p> + * + * <p>The meanings of the substitution token characters are as follows:</p> + * + * <table border="0" width="100%"> + * <tr> + * <td>>></td> + * <td>in normal rule</td> + * <td>Divide the number by the rule's divisor and format the remainder</td> + * </tr> + * <tr> + * <td></td> + * <td>in negative-number rule</td> + * <td>Find the absolute value of the number and format the result</td> + * </tr> + * <tr> + * <td></td> + * <td>in fraction or master rule</td> + * <td>Isolate the number's fractional part and format it.</td> + * </tr> + * <tr> + * <td></td> + * <td>in rule in fraction rule set</td> + * <td>Not allowed.</td> + * </tr> + * <tr> + * <td>>>></td> + * <td>in normal rule</td> + * <td>Divide the number by the rule's divisor and format the remainder, + * but bypass the normal rule-selection process and just use the + * rule that precedes this one in this rule list.</td> + * </tr> + * <tr> + * <td></td> + * <td>in all other rules</td> + * <td>Not allowed.</td> + * </tr> + * <tr> + * <td><<</td> + * <td>in normal rule</td> + * <td>Divide the number by the rule's divisor and format the quotient</td> + * </tr> + * <tr> + * <td></td> + * <td>in negative-number rule</td> + * <td>Not allowed.</td> + * </tr> + * <tr> + * <td></td> + * <td>in fraction or master rule</td> + * <td>Isolate the number's integral part and format it.</td> + * </tr> + * <tr> + * <td></td> + * <td>in rule in fraction rule set</td> + * <td>Multiply the number by the rule's base value and format the result.</td> + * </tr> + * <tr> + * <td>==</td> + * <td>in all rule sets</td> + * <td>Format the number unchanged</td> + * </tr> + * <tr> + * <td>[]</td> + * <td>in normal rule</td> + * <td>Omit the optional text if the number is an even multiple of the rule's divisor</td> + * </tr> + * <tr> + * <td></td> + * <td>in negative-number rule</td> + * <td>Not allowed.</td> + * </tr> + * <tr> + * <td></td> + * <td>in improper-fraction rule</td> + * <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an + * x.x rule and a 0.x rule)</td> + * </tr> + * <tr> + * <td></td> + * <td>in master rule</td> + * <td>Omit the optional text if the number is an integer (same as specifying both an x.x + * rule and an x.0 rule)</td> + * </tr> + * <tr> + * <td></td> + * <td>in proper-fraction rule</td> + * <td>Not allowed.</td> + * </tr> + * <tr> + * <td></td> + * <td>in rule in fraction rule set</td> + * <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td> + * </tr> + * <tr> + * <td width="37">$(cardinal,<i>plural syntax</i>)$</td> + * <td width="23"></td> + * <td width="165" valign="top">in all rule sets</td> + * <td>This provides the ability to choose a word based on the number divided by the radix to the power of the + * exponent of the base value for the specified locale, which is normally equivalent to the << value. + * This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated + * as the same base value for parsing.</td> + * </tr> + * <tr> + * <td width="37">$(ordinal,<i>plural syntax</i>)$</td> + * <td width="23"></td> + * <td width="165" valign="top">in all rule sets</td> + * <td>This provides the ability to choose a word based on the number divided by the radix to the power of the + * exponent of the base value for the specified locale, which is normally equivalent to the << value. + * This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated + * as the same base value for parsing.</td> + * </tr> + * </table> + * + * <p>The substitution descriptor (i.e., the text between the token characters) may take one + * of three forms:</p> + * + * <table border="0" width="100%"> + * <tr> + * <td>a rule set name</td> + * <td>Perform the mathematical operation on the number, and format the result using the + * named rule set.</td> + * </tr> + * <tr> + * <td>a DecimalFormat pattern</td> + * <td>Perform the mathematical operation on the number, and format the result using a + * DecimalFormat with the specified pattern. The pattern must begin with 0 or #.</td> + * </tr> + * <tr> + * <td>nothing</td> + * <td>Perform the mathematical operation on the number, and format the result using the rule + * set containing the current rule, except: + * <ul> + * <li>You can't have an empty substitution descriptor with a == substitution.</li> + * <li>If you omit the substitution descriptor in a >> substitution in a fraction rule, + * format the result one digit at a time using the rule set containing the current rule.</li> + * <li>If you omit the substitution descriptor in a << substitution in a rule in a + * fraction rule set, format the result using the default rule set for this formatter.</li> + * </ul> + * </td> + * </tr> + * </table> + * + * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule + * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe, + * the apostrophe is ignored, but all text after it becomes significant (this is how you can + * have a rule's rule text begin with whitespace). There is no escape function: the semicolon + * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set + * names. The characters beginning a substitution token are always treated as the beginning + * of a substitution token.</p> + * + * <p>See the resource data and the demo program for annotated examples of real rule sets + * using these features.</p> + * + * <p><em>User subclasses are not supported.</em> While clients may write + * subclasses, such code will not necessarily work and will not be + * guaranteed to work stably from release to release. + * + * <p><b>Localizations</b></p> + * <p>Constructors are available that allow the specification of localizations for the + * public rule sets (and also allow more control over what public rule sets are available). + * Localization data is represented as a textual description. The description represents + * an array of arrays of string. The first element is an array of the public rule set names, + * each of these must be one of the public rule set names that appear in the rules. Only + * names in this array will be treated as public rule set names by the API. Each subsequent + * element is an array of localizations of these names. The first element of one of these + * subarrays is the locale name, and the remaining elements are localizations of the + * public rule set names, in the same order as they were listed in the first arrray.</p> + * <p>In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used + * to separate elements of an array. Whitespace is ignored, unless quoted.</p> + * <p>For example:<pre> + * < < %foo, %bar, %baz >, + * < en, Foo, Bar, Baz >, + * < fr, 'le Foo', 'le Bar', 'le Baz' > + * < zh, \\u7532, \\u4e59, \\u4e19 > > + * </pre></p> + * @author Richard Gillam + * @see NumberFormat + * @see DecimalFormat + * @see PluralFormat + * @see PluralRules + * @stable ICU 2.0 + */ +class U_I18N_API RuleBasedNumberFormat : public NumberFormat { +public: + + //----------------------------------------------------------------------- + // constructors + //----------------------------------------------------------------------- + + /** + * Creates a RuleBasedNumberFormat that behaves according to the description + * passed in. The formatter uses the default locale. + * @param rules A description of the formatter's desired behavior. + * See the class documentation for a complete explanation of the description + * syntax. + * @param perror The parse error if an error was encountered. + * @param status The status indicating whether the constructor succeeded. + * @stable ICU 3.2 + */ + RuleBasedNumberFormat(const UnicodeString& rules, UParseError& perror, UErrorCode& status); + + /** + * Creates a RuleBasedNumberFormat that behaves according to the description + * passed in. The formatter uses the default locale. + * <p> + * The localizations data provides information about the public + * rule sets and their localized display names for different + * locales. The first element in the list is an array of the names + * of the public rule sets. The first element in this array is + * the initial default ruleset. The remaining elements in the + * list are arrays of localizations of the names of the public + * rule sets. Each of these is one longer than the initial array, + * with the first String being the ULocale ID, and the remaining + * Strings being the localizations of the rule set names, in the + * same order as the initial array. Arrays are NULL-terminated. + * @param rules A description of the formatter's desired behavior. + * See the class documentation for a complete explanation of the description + * syntax. + * @param localizations the localization information. + * names in the description. These will be copied by the constructor. + * @param perror The parse error if an error was encountered. + * @param status The status indicating whether the constructor succeeded. + * @stable ICU 3.2 + */ + RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations, + UParseError& perror, UErrorCode& status); + + /** + * Creates a RuleBasedNumberFormat that behaves according to the rules + * passed in. The formatter uses the specified locale to determine the + * characters to use when formatting numerals, and to define equivalences + * for lenient parsing. + * @param rules The formatter rules. + * See the class documentation for a complete explanation of the rule + * syntax. + * @param locale A locale that governs which characters are used for + * formatting values in numerals and which characters are equivalent in + * lenient parsing. + * @param perror The parse error if an error was encountered. + * @param status The status indicating whether the constructor succeeded. + * @stable ICU 2.0 + */ + RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale, + UParseError& perror, UErrorCode& status); + + /** + * Creates a RuleBasedNumberFormat that behaves according to the description + * passed in. The formatter uses the default locale. + * <p> + * The localizations data provides information about the public + * rule sets and their localized display names for different + * locales. The first element in the list is an array of the names + * of the public rule sets. The first element in this array is + * the initial default ruleset. The remaining elements in the + * list are arrays of localizations of the names of the public + * rule sets. Each of these is one longer than the initial array, + * with the first String being the ULocale ID, and the remaining + * Strings being the localizations of the rule set names, in the + * same order as the initial array. Arrays are NULL-terminated. + * @param rules A description of the formatter's desired behavior. + * See the class documentation for a complete explanation of the description + * syntax. + * @param localizations a list of localizations for the rule set + * names in the description. These will be copied by the constructor. + * @param locale A locale that governs which characters are used for + * formatting values in numerals and which characters are equivalent in + * lenient parsing. + * @param perror The parse error if an error was encountered. + * @param status The status indicating whether the constructor succeeded. + * @stable ICU 3.2 + */ + RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations, + const Locale& locale, UParseError& perror, UErrorCode& status); + + /** + * Creates a RuleBasedNumberFormat from a predefined ruleset. The selector + * code choosed among three possible predefined formats: spellout, ordinal, + * and duration. + * @param tag A selector code specifying which kind of formatter to create for that + * locale. There are four legal values: URBNF_SPELLOUT, which creates a formatter that + * spells out a value in words in the desired language, URBNF_ORDINAL, which attaches + * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"), + * URBNF_DURATION, which formats a duration in seconds as hours, minutes, and seconds always rounding down, + * and URBNF_NUMBERING_SYSTEM, which is used to invoke rules for alternate numbering + * systems such as the Hebrew numbering system, or for Roman Numerals, etc. + * @param locale The locale for the formatter. + * @param status The status indicating whether the constructor succeeded. + * @stable ICU 2.0 + */ + RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& locale, UErrorCode& status); + + //----------------------------------------------------------------------- + // boilerplate + //----------------------------------------------------------------------- + + /** + * Copy constructor + * @param rhs the object to be copied from. + * @stable ICU 2.6 + */ + RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs); + + /** + * Assignment operator + * @param rhs the object to be copied from. + * @stable ICU 2.6 + */ + RuleBasedNumberFormat& operator=(const RuleBasedNumberFormat& rhs); + + /** + * Release memory allocated for a RuleBasedNumberFormat when you are finished with it. + * @stable ICU 2.6 + */ + virtual ~RuleBasedNumberFormat(); + + /** + * Clone this object polymorphically. The caller is responsible + * for deleting the result when done. + * @return A copy of the object. + * @stable ICU 2.6 + */ + virtual Format* clone(void) const; + + /** + * Return true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * @param other the object to be compared with. + * @return true if the given Format objects are semantically equal. + * @stable ICU 2.6 + */ + virtual UBool operator==(const Format& other) const; + +//----------------------------------------------------------------------- +// public API functions +//----------------------------------------------------------------------- + + /** + * return the rules that were provided to the RuleBasedNumberFormat. + * @return the result String that was passed in + * @stable ICU 2.0 + */ + virtual UnicodeString getRules() const; + + /** + * Return the number of public rule set names. + * @return the number of public rule set names. + * @stable ICU 2.0 + */ + virtual int32_t getNumberOfRuleSetNames() const; + + /** + * Return the name of the index'th public ruleSet. If index is not valid, + * the function returns null. + * @param index the index of the ruleset + * @return the name of the index'th public ruleSet. + * @stable ICU 2.0 + */ + virtual UnicodeString getRuleSetName(int32_t index) const; + + /** + * Return the number of locales for which we have localized rule set display names. + * @return the number of locales for which we have localized rule set display names. + * @stable ICU 3.2 + */ + virtual int32_t getNumberOfRuleSetDisplayNameLocales(void) const; + + /** + * Return the index'th display name locale. + * @param index the index of the locale + * @param status set to a failure code when this function fails + * @return the locale + * @see #getNumberOfRuleSetDisplayNameLocales + * @stable ICU 3.2 + */ + virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const; + + /** + * Return the rule set display names for the provided locale. These are in the same order + * as those returned by getRuleSetName. The locale is matched against the locales for + * which there is display name data, using normal fallback rules. If no locale matches, + * the default display names are returned. (These are the internal rule set names minus + * the leading '%'.) + * @param index the index of the rule set + * @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized + * display name is desired + * @return the display name for the given index, which might be bogus if there is an error + * @see #getRuleSetName + * @stable ICU 3.2 + */ + virtual UnicodeString getRuleSetDisplayName(int32_t index, + const Locale& locale = Locale::getDefault()); + + /** + * Return the rule set display name for the provided rule set and locale. + * The locale is matched against the locales for which there is display name data, using + * normal fallback rules. If no locale matches, the default display name is returned. + * @return the display name for the rule set + * @stable ICU 3.2 + * @see #getRuleSetDisplayName + */ + virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName, + const Locale& locale = Locale::getDefault()); + + + using NumberFormat::format; + + /** + * Formats the specified 32-bit number using the default ruleset. + * @param number The number to format. + * @param toAppendTo the string that will hold the (appended) result + * @param pos the fieldposition + * @return A textual representation of the number. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(int32_t number, + UnicodeString& toAppendTo, + FieldPosition& pos) const; + + /** + * Formats the specified 64-bit number using the default ruleset. + * @param number The number to format. + * @param toAppendTo the string that will hold the (appended) result + * @param pos the fieldposition + * @return A textual representation of the number. + * @stable ICU 2.1 + */ + virtual UnicodeString& format(int64_t number, + UnicodeString& toAppendTo, + FieldPosition& pos) const; + /** + * Formats the specified number using the default ruleset. + * @param number The number to format. + * @param toAppendTo the string that will hold the (appended) result + * @param pos the fieldposition + * @return A textual representation of the number. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(double number, + UnicodeString& toAppendTo, + FieldPosition& pos) const; + + /** + * Formats the specified number using the named ruleset. + * @param number The number to format. + * @param ruleSetName The name of the rule set to format the number with. + * This must be the name of a valid public rule set for this formatter. + * @param toAppendTo the string that will hold the (appended) result + * @param pos the fieldposition + * @param status the status + * @return A textual representation of the number. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(int32_t number, + const UnicodeString& ruleSetName, + UnicodeString& toAppendTo, + FieldPosition& pos, + UErrorCode& status) const; + /** + * Formats the specified 64-bit number using the named ruleset. + * @param number The number to format. + * @param ruleSetName The name of the rule set to format the number with. + * This must be the name of a valid public rule set for this formatter. + * @param toAppendTo the string that will hold the (appended) result + * @param pos the fieldposition + * @param status the status + * @return A textual representation of the number. + * @stable ICU 2.1 + */ + virtual UnicodeString& format(int64_t number, + const UnicodeString& ruleSetName, + UnicodeString& toAppendTo, + FieldPosition& pos, + UErrorCode& status) const; + /** + * Formats the specified number using the named ruleset. + * @param number The number to format. + * @param ruleSetName The name of the rule set to format the number with. + * This must be the name of a valid public rule set for this formatter. + * @param toAppendTo the string that will hold the (appended) result + * @param pos the fieldposition + * @param status the status + * @return A textual representation of the number. + * @stable ICU 2.0 + */ + virtual UnicodeString& format(double number, + const UnicodeString& ruleSetName, + UnicodeString& toAppendTo, + FieldPosition& pos, + UErrorCode& status) const; + + using NumberFormat::parse; + + /** + * Parses the specfied string, beginning at the specified position, according + * to this formatter's rules. This will match the string against all of the + * formatter's public rule sets and return the value corresponding to the longest + * parseable substring. This function's behavior is affected by the lenient + * parse mode. + * @param text The string to parse + * @param result the result of the parse, either a double or a long. + * @param parsePosition On entry, contains the position of the first character + * in "text" to examine. On exit, has been updated to contain the position + * of the first character in "text" that wasn't consumed by the parse. + * @see #setLenient + * @stable ICU 2.0 + */ + virtual void parse(const UnicodeString& text, + Formattable& result, + ParsePosition& parsePosition) const; + +#if !UCONFIG_NO_COLLATION + + /** + * Turns lenient parse mode on and off. + * + * When in lenient parse mode, the formatter uses a Collator for parsing the text. + * Only primary differences are treated as significant. This means that case + * differences, accent differences, alternate spellings of the same letter + * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in + * matching the text. In many cases, numerals will be accepted in place of words + * or phrases as well. + * + * For example, all of the following will correctly parse as 255 in English in + * lenient-parse mode: + * <br>"two hundred fifty-five" + * <br>"two hundred fifty five" + * <br>"TWO HUNDRED FIFTY-FIVE" + * <br>"twohundredfiftyfive" + * <br>"2 hundred fifty-5" + * + * The Collator used is determined by the locale that was + * passed to this object on construction. The description passed to this object + * on construction may supply additional collation rules that are appended to the + * end of the default collator for the locale, enabling additional equivalences + * (such as adding more ignorable characters or permitting spelled-out version of + * symbols; see the demo program for examples). + * + * It's important to emphasize that even strict parsing is relatively lenient: it + * will accept some text that it won't produce as output. In English, for example, + * it will correctly parse "two hundred zero" and "fifteen hundred". + * + * @param enabled If true, turns lenient-parse mode on; if false, turns it off. + * @see RuleBasedCollator + * @stable ICU 2.0 + */ + virtual void setLenient(UBool enabled); + + /** + * Returns true if lenient-parse mode is turned on. Lenient parsing is off + * by default. + * @return true if lenient-parse mode is turned on. + * @see #setLenient + * @stable ICU 2.0 + */ + virtual inline UBool isLenient(void) const; + +#endif + + /** + * Override the default rule set to use. If ruleSetName is null, reset + * to the initial default rule set. If the rule set is not a public rule set name, + * U_ILLEGAL_ARGUMENT_ERROR is returned in status. + * @param ruleSetName the name of the rule set, or null to reset the initial default. + * @param status set to failure code when a problem occurs. + * @stable ICU 2.6 + */ + virtual void setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status); + + /** + * Return the name of the current default rule set. If the current rule set is + * not public, returns a bogus (and empty) UnicodeString. + * @return the name of the current default rule set + * @stable ICU 3.0 + */ + virtual UnicodeString getDefaultRuleSetName() const; + + /** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see + * NumberFormat. + * @param value The UDisplayContext value to set. + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @stable ICU 53 + */ + virtual void setContext(UDisplayContext value, UErrorCode& status); + +public: + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.8 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Sets the decimal format symbols, which is generally not changed + * by the programmer or user. The formatter takes ownership of + * symbolsToAdopt; the client must not delete it. + * + * @param symbolsToAdopt DecimalFormatSymbols to be adopted. + * @stable ICU 49 + */ + virtual void adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt); + + /** + * Sets the decimal format symbols, which is generally not changed + * by the programmer or user. A clone of the symbols is created and + * the symbols is _not_ adopted; the client is still responsible for + * deleting it. + * + * @param symbols DecimalFormatSymbols. + * @stable ICU 49 + */ + virtual void setDecimalFormatSymbols(const DecimalFormatSymbols& symbols); + +private: + RuleBasedNumberFormat(); // default constructor not implemented + + // this will ref the localizations if they are not NULL + // caller must deref to get adoption + RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations, + const Locale& locale, UParseError& perror, UErrorCode& status); + + void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status); + void initCapitalizationContextInfo(const Locale& thelocale); + void dispose(); + void stripWhitespace(UnicodeString& src); + void initDefaultRuleSet(); + void format(double number, NFRuleSet& ruleSet); + NFRuleSet* findRuleSet(const UnicodeString& name, UErrorCode& status) const; + + /* friend access */ + friend class NFSubstitution; + friend class NFRule; + friend class NFRuleSet; + friend class FractionalPartSubstitution; + + inline NFRuleSet * getDefaultRuleSet() const; + const RuleBasedCollator * getCollator() const; + DecimalFormatSymbols * initializeDecimalFormatSymbols(UErrorCode &status); + const DecimalFormatSymbols * getDecimalFormatSymbols() const; + NFRule * initializeDefaultInfinityRule(UErrorCode &status); + const NFRule * getDefaultInfinityRule() const; + NFRule * initializeDefaultNaNRule(UErrorCode &status); + const NFRule * getDefaultNaNRule() const; + PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const; + UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult) const; + +private: + NFRuleSet **ruleSets; + UnicodeString* ruleSetDescriptions; + int32_t numRuleSets; + NFRuleSet *defaultRuleSet; + Locale locale; + RuleBasedCollator* collator; + DecimalFormatSymbols* decimalFormatSymbols; + NFRule *defaultInfinityRule; + NFRule *defaultNaNRule; + UBool lenient; + UnicodeString* lenientParseRules; + LocalizationInfo* localizations; + UnicodeString originalDescription; + UBool capitalizationInfoSet; + UBool capitalizationForUIListMenu; + UBool capitalizationForStandAlone; + BreakIterator* capitalizationBrkIter; +}; + +// --------------- + +#if !UCONFIG_NO_COLLATION + +inline UBool +RuleBasedNumberFormat::isLenient(void) const { + return lenient; +} + +#endif + +inline NFRuleSet* +RuleBasedNumberFormat::getDefaultRuleSet() const { + return defaultRuleSet; +} + +U_NAMESPACE_END + +/* U_HAVE_RBNF */ +#endif + +/* RBNF_H */ +#endif diff --git a/intl/icu/source/i18n/unicode/rbtz.h b/intl/icu/source/i18n/unicode/rbtz.h new file mode 100644 index 000000000..4df5850a8 --- /dev/null +++ b/intl/icu/source/i18n/unicode/rbtz.h @@ -0,0 +1,364 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2013, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +#ifndef RBTZ_H +#define RBTZ_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Rule based customizable time zone + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/basictz.h" +#include "unicode/unistr.h" + +U_NAMESPACE_BEGIN + +// forward declaration +class UVector; +struct Transition; + +/** + * a BasicTimeZone subclass implemented in terms of InitialTimeZoneRule and TimeZoneRule instances + * @see BasicTimeZone + * @see InitialTimeZoneRule + * @see TimeZoneRule + */ +class U_I18N_API RuleBasedTimeZone : public BasicTimeZone { +public: + /** + * Constructs a <code>RuleBasedTimeZone</code> object with the ID and the + * <code>InitialTimeZoneRule</code>. The input <code>InitialTimeZoneRule</code> + * is adopted by this <code>RuleBasedTimeZone</code>, thus the caller must not + * delete it. + * @param id The time zone ID. + * @param initialRule The initial time zone rule. + * @stable ICU 3.8 + */ + RuleBasedTimeZone(const UnicodeString& id, InitialTimeZoneRule* initialRule); + + /** + * Copy constructor. + * @param source The RuleBasedTimeZone object to be copied. + * @stable ICU 3.8 + */ + RuleBasedTimeZone(const RuleBasedTimeZone& source); + + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~RuleBasedTimeZone(); + + /** + * Assignment operator. + * @param right The object to be copied. + * @stable ICU 3.8 + */ + RuleBasedTimeZone& operator=(const RuleBasedTimeZone& right); + + /** + * Return true if the given <code>TimeZone</code> objects are + * semantically equal. Objects of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZone</code> objects are + *semantically equal. + * @stable ICU 3.8 + */ + virtual UBool operator==(const TimeZone& that) const; + + /** + * Return true if the given <code>TimeZone</code> objects are + * semantically unequal. Objects of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZone</code> objects are + * semantically unequal. + * @stable ICU 3.8 + */ + virtual UBool operator!=(const TimeZone& that) const; + + /** + * Adds the <code>TimeZoneRule</code> which represents time transitions. + * The <code>TimeZoneRule</code> must have start times, that is, the result + * of isTransitionRule() must be true. Otherwise, U_ILLEGAL_ARGUMENT_ERROR + * is set to the error code. + * The input <code>TimeZoneRule</code> is adopted by this + * <code>RuleBasedTimeZone</code> on successful completion of this method, + * thus, the caller must not delete it when no error is returned. + * After all rules are added, the caller must call complete() method to + * make this <code>RuleBasedTimeZone</code> ready to handle common time + * zone functions. + * @param rule The <code>TimeZoneRule</code>. + * @param status Output param to filled in with a success or an error. + * @stable ICU 3.8 + */ + void addTransitionRule(TimeZoneRule* rule, UErrorCode& status); + + /** + * Makes the <code>TimeZoneRule</code> ready to handle actual timezone + * calcuation APIs. This method collects time zone rules specified + * by the caller via the constructor and addTransitionRule() and + * builds internal structure for making the object ready to support + * time zone APIs such as getOffset(), getNextTransition() and others. + * @param status Output param to filled in with a success or an error. + * @stable ICU 3.8 + */ + void complete(UErrorCode& status); + + /** + * Clones TimeZone objects polymorphically. Clients are responsible for deleting + * the TimeZone object cloned. + * + * @return A new copy of this TimeZone object. + * @stable ICU 3.8 + */ + virtual TimeZone* clone(void) const; + + /** + * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time in this time zone, taking daylight savings time into + * account) as of a particular reference date. The reference date is used to determine + * whether daylight savings time is in effect and needs to be figured into the offset + * that is returned (in other words, what is the adjusted GMT offset in this time zone + * at this particular date and time?). For the time zones produced by createTimeZone(), + * the reference data is specified according to the Gregorian calendar, and the date + * and time fields are local standard time. + * + * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload, + * which returns both the raw and the DST offset for a given time. This method + * is retained only for backward compatibility. + * + * @param era The reference date's era + * @param year The reference date's year + * @param month The reference date's month (0-based; 0 is January) + * @param day The reference date's day-in-month (1-based) + * @param dayOfWeek The reference date's day-of-week (1-based; 1 is Sunday) + * @param millis The reference date's milliseconds in day, local standard time + * @param status Output param to filled in with a success or an error. + * @return The offset in milliseconds to add to GMT to get local time. + * @stable ICU 3.8 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const; + + /** + * Gets the time zone offset, for current date, modified in case of + * daylight savings. This is the offset to add *to* UTC to get local time. + * + * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload, + * which returns both the raw and the DST offset for a given time. This method + * is retained only for backward compatibility. + * + * @param era The reference date's era + * @param year The reference date's year + * @param month The reference date's month (0-based; 0 is January) + * @param day The reference date's day-in-month (1-based) + * @param dayOfWeek The reference date's day-of-week (1-based; 1 is Sunday) + * @param millis The reference date's milliseconds in day, local standard time + * @param monthLength The length of the given month in days. + * @param status Output param to filled in with a success or an error. + * @return The offset in milliseconds to add to GMT to get local time. + * @stable ICU 3.8 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, + int32_t monthLength, UErrorCode& status) const; + + /** + * Returns the time zone raw and GMT offset for the given moment + * in time. Upon return, local-millis = GMT-millis + rawOffset + + * dstOffset. All computations are performed in the proleptic + * Gregorian calendar. The default implementation in the TimeZone + * class delegates to the 8-argument getOffset(). + * + * @param date moment in time for which to return offsets, in + * units of milliseconds from January 1, 1970 0:00 GMT, either GMT + * time or local wall time, depending on `local'. + * @param local if true, `date' is local wall time; otherwise it + * is in GMT time. + * @param rawOffset output parameter to receive the raw offset, that + * is, the offset not including DST adjustments + * @param dstOffset output parameter to receive the DST offset, + * that is, the offset to be added to `rawOffset' to obtain the + * total offset between local and GMT time. If DST is not in + * effect, this value is zero; otherwise it is a positive value, + * typically one hour. + * @param ec input-output error code + * @stable ICU 3.8 + */ + virtual void getOffset(UDate date, UBool local, int32_t& rawOffset, + int32_t& dstOffset, UErrorCode& ec) const; + + /** + * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time, before taking daylight savings time into account). + * + * @param offsetMillis The new raw GMT offset for this time zone. + * @stable ICU 3.8 + */ + virtual void setRawOffset(int32_t offsetMillis); + + /** + * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time, before taking daylight savings time into account). + * + * @return The TimeZone's raw GMT offset. + * @stable ICU 3.8 + */ + virtual int32_t getRawOffset(void) const; + + /** + * Queries if this time zone uses daylight savings time. + * @return true if this time zone uses daylight savings time, + * false, otherwise. + * @stable ICU 3.8 + */ + virtual UBool useDaylightTime(void) const; + + /** + * Queries if the given date is in daylight savings time in + * this time zone. + * This method is wasteful since it creates a new GregorianCalendar and + * deletes it each time it is called. This is a deprecated method + * and provided only for Java compatibility. + * + * @param date the given UDate. + * @param status Output param filled in with success/error code. + * @return true if the given date is in daylight savings time, + * false, otherwise. + * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead. + */ + virtual UBool inDaylightTime(UDate date, UErrorCode& status) const; + + /** + * Returns true if this zone has the same rule and offset as another zone. + * That is, if this zone differs only in ID, if at all. + * @param other the <code>TimeZone</code> object to be compared with + * @return true if the given zone is the same as this one, + * with the possible exception of the ID + * @stable ICU 3.8 + */ + virtual UBool hasSameRules(const TimeZone& other) const; + + /** + * Gets the first time zone transition after the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the first transition after the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const; + + /** + * Gets the most recent time zone transition before the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the most recent transition before the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const; + + /** + * Returns the number of <code>TimeZoneRule</code>s which represents time transitions, + * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except + * <code>InitialTimeZoneRule</code>. The return value range is 0 or any positive value. + * @param status Receives error status code. + * @return The number of <code>TimeZoneRule</code>s representing time transitions. + * @stable ICU 3.8 + */ + virtual int32_t countTransitionRules(UErrorCode& status) const; + + /** + * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code> + * which represent time transitions for this time zone. On successful return, + * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and + * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code> + * instances up to the size specified by trscount. The results are referencing the + * rule instance held by this time zone instance. Therefore, after this time zone + * is destructed, they are no longer available. + * @param initial Receives the initial timezone rule + * @param trsrules Receives the timezone transition rules + * @param trscount On input, specify the size of the array 'transitions' receiving + * the timezone transition rules. On output, actual number of + * rules filled in the array will be set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const; + + /** + * Get time zone offsets from local wall time. + * @internal + */ + virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const; + +private: + void deleteRules(void); + void deleteTransitions(void); + UVector* copyRules(UVector* source); + TimeZoneRule* findRuleInFinal(UDate date, UBool local, + int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const; + UBool findNext(UDate base, UBool inclusive, UDate& time, TimeZoneRule*& from, TimeZoneRule*& to) const; + UBool findPrev(UDate base, UBool inclusive, UDate& time, TimeZoneRule*& from, TimeZoneRule*& to) const; + int32_t getLocalDelta(int32_t rawBefore, int32_t dstBefore, int32_t rawAfter, int32_t dstAfter, + int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const; + UDate getTransitionTime(Transition* transition, UBool local, + int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt) const; + void getOffsetInternal(UDate date, UBool local, int32_t NonExistingTimeOpt, int32_t DuplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& ec) const; + void completeConst(UErrorCode &status) const; + + InitialTimeZoneRule *fInitialRule; + UVector *fHistoricRules; + UVector *fFinalRules; + UVector *fHistoricTransitions; + UBool fUpToDate; + +public: + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 3.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 3.8 + */ + virtual UClassID getDynamicClassID(void) const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // RBTZ_H + +//eof diff --git a/intl/icu/source/i18n/unicode/regex.h b/intl/icu/source/i18n/unicode/regex.h new file mode 100644 index 000000000..d23a3ab4d --- /dev/null +++ b/intl/icu/source/i18n/unicode/regex.h @@ -0,0 +1,1885 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: regex.h +* encoding: US-ASCII +* indentation:4 +* +* created on: 2002oct22 +* created by: Andy Heninger +* +* ICU Regular Expressions, API for C++ +*/ + +#ifndef REGEX_H +#define REGEX_H + +//#define REGEX_DEBUG + +/** + * \file + * \brief C++ API: Regular Expressions + * + * <h2>Regular Expression API</h2> + * + * <p>The ICU API for processing regular expressions consists of two classes, + * <code>RegexPattern</code> and <code>RegexMatcher</code>. + * <code>RegexPattern</code> objects represent a pre-processed, or compiled + * regular expression. They are created from a regular expression pattern string, + * and can be used to create <code>RegexMatcher</code> objects for the pattern.</p> + * + * <p>Class <code>RegexMatcher</code> bundles together a regular expression + * pattern and a target string to which the search pattern will be applied. + * <code>RegexMatcher</code> includes API for doing plain find or search + * operations, for search and replace operations, and for obtaining detailed + * information about bounds of a match. </p> + * + * <p>Note that by constructing <code>RegexMatcher</code> objects directly from regular + * expression pattern strings application code can be simplified and the explicit + * need for <code>RegexPattern</code> objects can usually be eliminated. + * </p> + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_REGULAR_EXPRESSIONS + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/utext.h" +#include "unicode/parseerr.h" + +#include "unicode/uregex.h" + +// Forward Declarations + +struct UHashtable; + +U_NAMESPACE_BEGIN + +struct Regex8BitSet; +class RegexCImpl; +class RegexMatcher; +class RegexPattern; +struct REStackFrame; +class RuleBasedBreakIterator; +class UnicodeSet; +class UVector; +class UVector32; +class UVector64; + + +/** + * Class <code>RegexPattern</code> represents a compiled regular expression. It includes + * factory methods for creating a RegexPattern object from the source (string) form + * of a regular expression, methods for creating RegexMatchers that allow the pattern + * to be applied to input text, and a few convenience methods for simple common + * uses of regular expressions. + * + * <p>Class RegexPattern is not intended to be subclassed.</p> + * + * @stable ICU 2.4 + */ +class U_I18N_API RegexPattern U_FINAL : public UObject { +public: + + /** + * default constructor. Create a RegexPattern object that refers to no actual + * pattern. Not normally needed; RegexPattern objects are usually + * created using the factory method <code>compile()</code>. + * + * @stable ICU 2.4 + */ + RegexPattern(); + + /** + * Copy Constructor. Create a new RegexPattern object that is equivalent + * to the source object. + * @param source the pattern object to be copied. + * @stable ICU 2.4 + */ + RegexPattern(const RegexPattern &source); + + /** + * Destructor. Note that a RegexPattern object must persist so long as any + * RegexMatcher objects that were created from the RegexPattern are active. + * @stable ICU 2.4 + */ + virtual ~RegexPattern(); + + /** + * Comparison operator. Two RegexPattern objects are considered equal if they + * were constructed from identical source patterns using the same match flag + * settings. + * @param that a RegexPattern object to compare with "this". + * @return TRUE if the objects are equivalent. + * @stable ICU 2.4 + */ + UBool operator==(const RegexPattern& that) const; + + /** + * Comparison operator. Two RegexPattern objects are considered equal if they + * were constructed from identical source patterns using the same match flag + * settings. + * @param that a RegexPattern object to compare with "this". + * @return TRUE if the objects are different. + * @stable ICU 2.4 + */ + inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);} + + /** + * Assignment operator. After assignment, this RegexPattern will behave identically + * to the source object. + * @stable ICU 2.4 + */ + RegexPattern &operator =(const RegexPattern &source); + + /** + * Create an exact copy of this RegexPattern object. Since RegexPattern is not + * intended to be subclassed, <code>clone()</code> and the copy construction are + * equivalent operations. + * @return the copy of this RegexPattern + * @stable ICU 2.4 + */ + virtual RegexPattern *clone() const; + + + /** + * Compiles the regular expression in string form into a RegexPattern + * object. These compile methods, rather than the constructors, are the usual + * way that RegexPattern objects are created. + * + * <p>Note that RegexPattern objects must not be deleted while RegexMatcher + * objects created from the pattern are active. RegexMatchers keep a pointer + * back to their pattern, so premature deletion of the pattern is a + * catastrophic error.</p> + * + * <p>All pattern match mode flags are set to their default values.</p> + * + * <p>Note that it is often more convenient to construct a RegexMatcher directly + * from a pattern string rather than separately compiling the pattern and + * then creating a RegexMatcher object from the pattern.</p> + * + * @param regex The regular expression to be compiled. + * @param pe Receives the position (line and column nubers) of any error + * within the regular expression.) + * @param status A reference to a UErrorCode to receive any errors. + * @return A regexPattern object for the compiled pattern. + * + * @stable ICU 2.4 + */ + static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, + UParseError &pe, + UErrorCode &status); + + /** + * Compiles the regular expression in string form into a RegexPattern + * object. These compile methods, rather than the constructors, are the usual + * way that RegexPattern objects are created. + * + * <p>Note that RegexPattern objects must not be deleted while RegexMatcher + * objects created from the pattern are active. RegexMatchers keep a pointer + * back to their pattern, so premature deletion of the pattern is a + * catastrophic error.</p> + * + * <p>All pattern match mode flags are set to their default values.</p> + * + * <p>Note that it is often more convenient to construct a RegexMatcher directly + * from a pattern string rather than separately compiling the pattern and + * then creating a RegexMatcher object from the pattern.</p> + * + * @param regex The regular expression to be compiled. Note, the text referred + * to by this UText must not be deleted during the lifetime of the + * RegexPattern object or any RegexMatcher object created from it. + * @param pe Receives the position (line and column nubers) of any error + * within the regular expression.) + * @param status A reference to a UErrorCode to receive any errors. + * @return A regexPattern object for the compiled pattern. + * + * @stable ICU 4.6 + */ + static RegexPattern * U_EXPORT2 compile( UText *regex, + UParseError &pe, + UErrorCode &status); + + /** + * Compiles the regular expression in string form into a RegexPattern + * object using the specified match mode flags. These compile methods, + * rather than the constructors, are the usual way that RegexPattern objects + * are created. + * + * <p>Note that RegexPattern objects must not be deleted while RegexMatcher + * objects created from the pattern are active. RegexMatchers keep a pointer + * back to their pattern, so premature deletion of the pattern is a + * catastrophic error.</p> + * + * <p>Note that it is often more convenient to construct a RegexMatcher directly + * from a pattern string instead of than separately compiling the pattern and + * then creating a RegexMatcher object from the pattern.</p> + * + * @param regex The regular expression to be compiled. + * @param flags The match mode flags to be used. + * @param pe Receives the position (line and column numbers) of any error + * within the regular expression.) + * @param status A reference to a UErrorCode to receive any errors. + * @return A regexPattern object for the compiled pattern. + * + * @stable ICU 2.4 + */ + static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, + uint32_t flags, + UParseError &pe, + UErrorCode &status); + + /** + * Compiles the regular expression in string form into a RegexPattern + * object using the specified match mode flags. These compile methods, + * rather than the constructors, are the usual way that RegexPattern objects + * are created. + * + * <p>Note that RegexPattern objects must not be deleted while RegexMatcher + * objects created from the pattern are active. RegexMatchers keep a pointer + * back to their pattern, so premature deletion of the pattern is a + * catastrophic error.</p> + * + * <p>Note that it is often more convenient to construct a RegexMatcher directly + * from a pattern string instead of than separately compiling the pattern and + * then creating a RegexMatcher object from the pattern.</p> + * + * @param regex The regular expression to be compiled. Note, the text referred + * to by this UText must not be deleted during the lifetime of the + * RegexPattern object or any RegexMatcher object created from it. + * @param flags The match mode flags to be used. + * @param pe Receives the position (line and column numbers) of any error + * within the regular expression.) + * @param status A reference to a UErrorCode to receive any errors. + * @return A regexPattern object for the compiled pattern. + * + * @stable ICU 4.6 + */ + static RegexPattern * U_EXPORT2 compile( UText *regex, + uint32_t flags, + UParseError &pe, + UErrorCode &status); + + /** + * Compiles the regular expression in string form into a RegexPattern + * object using the specified match mode flags. These compile methods, + * rather than the constructors, are the usual way that RegexPattern objects + * are created. + * + * <p>Note that RegexPattern objects must not be deleted while RegexMatcher + * objects created from the pattern are active. RegexMatchers keep a pointer + * back to their pattern, so premature deletion of the pattern is a + * catastrophic error.</p> + * + * <p>Note that it is often more convenient to construct a RegexMatcher directly + * from a pattern string instead of than separately compiling the pattern and + * then creating a RegexMatcher object from the pattern.</p> + * + * @param regex The regular expression to be compiled. + * @param flags The match mode flags to be used. + * @param status A reference to a UErrorCode to receive any errors. + * @return A regexPattern object for the compiled pattern. + * + * @stable ICU 2.6 + */ + static RegexPattern * U_EXPORT2 compile( const UnicodeString ®ex, + uint32_t flags, + UErrorCode &status); + + /** + * Compiles the regular expression in string form into a RegexPattern + * object using the specified match mode flags. These compile methods, + * rather than the constructors, are the usual way that RegexPattern objects + * are created. + * + * <p>Note that RegexPattern objects must not be deleted while RegexMatcher + * objects created from the pattern are active. RegexMatchers keep a pointer + * back to their pattern, so premature deletion of the pattern is a + * catastrophic error.</p> + * + * <p>Note that it is often more convenient to construct a RegexMatcher directly + * from a pattern string instead of than separately compiling the pattern and + * then creating a RegexMatcher object from the pattern.</p> + * + * @param regex The regular expression to be compiled. Note, the text referred + * to by this UText must not be deleted during the lifetime of the + * RegexPattern object or any RegexMatcher object created from it. + * @param flags The match mode flags to be used. + * @param status A reference to a UErrorCode to receive any errors. + * @return A regexPattern object for the compiled pattern. + * + * @stable ICU 4.6 + */ + static RegexPattern * U_EXPORT2 compile( UText *regex, + uint32_t flags, + UErrorCode &status); + + /** + * Get the match mode flags that were used when compiling this pattern. + * @return the match mode flags + * @stable ICU 2.4 + */ + virtual uint32_t flags() const; + + /** + * Creates a RegexMatcher that will match the given input against this pattern. The + * RegexMatcher can then be used to perform match, find or replace operations + * on the input. Note that a RegexPattern object must not be deleted while + * RegexMatchers created from it still exist and might possibly be used again. + * <p> + * The matcher will retain a reference to the supplied input string, and all regexp + * pattern matching operations happen directly on this original string. It is + * critical that the string not be altered or deleted before use by the regular + * expression operations is complete. + * + * @param input The input string to which the regular expression will be applied. + * @param status A reference to a UErrorCode to receive any errors. + * @return A RegexMatcher object for this pattern and input. + * + * @stable ICU 2.4 + */ + virtual RegexMatcher *matcher(const UnicodeString &input, + UErrorCode &status) const; + +private: + /** + * Cause a compilation error if an application accidentally attempts to + * create a matcher with a (UChar *) string as input rather than + * a UnicodeString. Avoids a dangling reference to a temporary string. + * <p> + * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * using one of the aliasing constructors, such as + * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code> + * or in a UText, using + * <code>utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);</code> + * + */ + RegexMatcher *matcher(const UChar *input, + UErrorCode &status) const; +public: + + + /** + * Creates a RegexMatcher that will match against this pattern. The + * RegexMatcher can be used to perform match, find or replace operations. + * Note that a RegexPattern object must not be deleted while + * RegexMatchers created from it still exist and might possibly be used again. + * + * @param status A reference to a UErrorCode to receive any errors. + * @return A RegexMatcher object for this pattern and input. + * + * @stable ICU 2.6 + */ + virtual RegexMatcher *matcher(UErrorCode &status) const; + + + /** + * Test whether a string matches a regular expression. This convenience function + * both compiles the regular expression and applies it in a single operation. + * Note that if the same pattern needs to be applied repeatedly, this method will be + * less efficient than creating and reusing a RegexMatcher object. + * + * @param regex The regular expression + * @param input The string data to be matched + * @param pe Receives the position of any syntax errors within the regular expression + * @param status A reference to a UErrorCode to receive any errors. + * @return True if the regular expression exactly matches the full input string. + * + * @stable ICU 2.4 + */ + static UBool U_EXPORT2 matches(const UnicodeString ®ex, + const UnicodeString &input, + UParseError &pe, + UErrorCode &status); + + /** + * Test whether a string matches a regular expression. This convenience function + * both compiles the regular expression and applies it in a single operation. + * Note that if the same pattern needs to be applied repeatedly, this method will be + * less efficient than creating and reusing a RegexMatcher object. + * + * @param regex The regular expression + * @param input The string data to be matched + * @param pe Receives the position of any syntax errors within the regular expression + * @param status A reference to a UErrorCode to receive any errors. + * @return True if the regular expression exactly matches the full input string. + * + * @stable ICU 4.6 + */ + static UBool U_EXPORT2 matches(UText *regex, + UText *input, + UParseError &pe, + UErrorCode &status); + + /** + * Returns the regular expression from which this pattern was compiled. This method will work + * even if the pattern was compiled from a UText. + * + * Note: If the pattern was originally compiled from a UText, and that UText was modified, + * the returned string may no longer reflect the RegexPattern object. + * @stable ICU 2.4 + */ + virtual UnicodeString pattern() const; + + + /** + * Returns the regular expression from which this pattern was compiled. This method will work + * even if the pattern was compiled from a UnicodeString. + * + * Note: This is the original input, not a clone. If the pattern was originally compiled from a + * UText, and that UText was modified, the returned UText may no longer reflect the RegexPattern + * object. + * + * @stable ICU 4.6 + */ + virtual UText *patternText(UErrorCode &status) const; + + + /** + * Get the group number corresponding to a named capture group. + * The returned number can be used with any function that access + * capture groups by number. + * + * The function returns an error status if the specified name does not + * appear in the pattern. + * + * @param groupName The capture group name. + * @param status A UErrorCode to receive any errors. + * + * @stable ICU 55 + */ + virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const; + + + /** + * Get the group number corresponding to a named capture group. + * The returned number can be used with any function that access + * capture groups by number. + * + * The function returns an error status if the specified name does not + * appear in the pattern. + * + * @param groupName The capture group name, + * platform invariant characters only. + * @param nameLength The length of the name, or -1 if the name is + * nul-terminated. + * @param status A UErrorCode to receive any errors. + * + * @stable ICU 55 + */ + virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const; + + + /** + * Split a string into fields. Somewhat like split() from Perl or Java. + * Pattern matches identify delimiters that separate the input + * into fields. The input data between the delimiters becomes the + * fields themselves. + * + * If the delimiter pattern includes capture groups, the captured text will + * also appear in the destination array of output strings, interspersed + * with the fields. This is similar to Perl, but differs from Java, + * which ignores the presence of capture groups in the pattern. + * + * Trailing empty fields will always be returned, assuming sufficient + * destination capacity. This differs from the default behavior for Java + * and Perl where trailing empty fields are not returned. + * + * The number of strings produced by the split operation is returned. + * This count includes the strings from capture groups in the delimiter pattern. + * This behavior differs from Java, which ignores capture groups. + * + * For the best performance on split() operations, + * <code>RegexMatcher::split</code> is preferable to this function + * + * @param input The string to be split into fields. The field delimiters + * match the pattern (in the "this" object) + * @param dest An array of UnicodeStrings to receive the results of the split. + * This is an array of actual UnicodeString objects, not an + * array of pointers to strings. Local (stack based) arrays can + * work well here. + * @param destCapacity The number of elements in the destination array. + * If the number of fields found is less than destCapacity, the + * extra strings in the destination array are not altered. + * If the number of destination strings is less than the number + * of fields, the trailing part of the input string, including any + * field delimiters, is placed in the last destination string. + * @param status A reference to a UErrorCode to receive any errors. + * @return The number of fields into which the input string was split. + * @stable ICU 2.4 + */ + virtual int32_t split(const UnicodeString &input, + UnicodeString dest[], + int32_t destCapacity, + UErrorCode &status) const; + + + /** + * Split a string into fields. Somewhat like split() from Perl or Java. + * Pattern matches identify delimiters that separate the input + * into fields. The input data between the delimiters becomes the + * fields themselves. + * + * If the delimiter pattern includes capture groups, the captured text will + * also appear in the destination array of output strings, interspersed + * with the fields. This is similar to Perl, but differs from Java, + * which ignores the presence of capture groups in the pattern. + * + * Trailing empty fields will always be returned, assuming sufficient + * destination capacity. This differs from the default behavior for Java + * and Perl where trailing empty fields are not returned. + * + * The number of strings produced by the split operation is returned. + * This count includes the strings from capture groups in the delimiter pattern. + * This behavior differs from Java, which ignores capture groups. + * + * For the best performance on split() operations, + * <code>RegexMatcher::split</code> is preferable to this function + * + * @param input The string to be split into fields. The field delimiters + * match the pattern (in the "this" object) + * @param dest An array of mutable UText structs to receive the results of the split. + * If a field is NULL, a new UText is allocated to contain the results for + * that field. This new UText is not guaranteed to be mutable. + * @param destCapacity The number of elements in the destination array. + * If the number of fields found is less than destCapacity, the + * extra strings in the destination array are not altered. + * If the number of destination strings is less than the number + * of fields, the trailing part of the input string, including any + * field delimiters, is placed in the last destination string. + * @param status A reference to a UErrorCode to receive any errors. + * @return The number of destination strings used. + * + * @stable ICU 4.6 + */ + virtual int32_t split(UText *input, + UText *dest[], + int32_t destCapacity, + UErrorCode &status) const; + + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.4 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.4 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +private: + // + // Implementation Data + // + UText *fPattern; // The original pattern string. + UnicodeString *fPatternString; // The original pattern UncodeString if relevant + uint32_t fFlags; // The flags used when compiling the pattern. + // + UVector64 *fCompiledPat; // The compiled pattern p-code. + UnicodeString fLiteralText; // Any literal string data from the pattern, + // after un-escaping, for use during the match. + + UVector *fSets; // Any UnicodeSets referenced from the pattern. + Regex8BitSet *fSets8; // (and fast sets for latin-1 range.) + + + UErrorCode fDeferredStatus; // status if some prior error has left this + // RegexPattern in an unusable state. + + int32_t fMinMatchLen; // Minimum Match Length. All matches will have length + // >= this value. For some patterns, this calculated + // value may be less than the true shortest + // possible match. + + int32_t fFrameSize; // Size of a state stack frame in the + // execution engine. + + int32_t fDataSize; // The size of the data needed by the pattern that + // does not go on the state stack, but has just + // a single copy per matcher. + + UVector32 *fGroupMap; // Map from capture group number to position of + // the group's variables in the matcher stack frame. + + UnicodeSet **fStaticSets; // Ptr to static (shared) sets for predefined + // regex character classes, e.g. Word. + + Regex8BitSet *fStaticSets8; // Ptr to the static (shared) latin-1 only + // sets for predefined regex classes. + + int32_t fStartType; // Info on how a match must start. + int32_t fInitialStringIdx; // + int32_t fInitialStringLen; + UnicodeSet *fInitialChars; + UChar32 fInitialChar; + Regex8BitSet *fInitialChars8; + UBool fNeedsAltInput; + + UHashtable *fNamedCaptureMap; // Map from capture group names to numbers. + + friend class RegexCompile; + friend class RegexMatcher; + friend class RegexCImpl; + + // + // Implementation Methods + // + void init(); // Common initialization, for use by constructors. + void zap(); // Common cleanup + + void dumpOp(int32_t index) const; + + public: +#ifndef U_HIDE_INTERNAL_API + /** + * Dump a compiled pattern. Internal debug function. + * @internal + */ + void dumpPattern() const; +#endif /* U_HIDE_INTERNAL_API */ +}; + + + +/** + * class RegexMatcher bundles together a regular expression pattern and + * input text to which the expression can be applied. It includes methods + * for testing for matches, and for find and replace operations. + * + * <p>Class RegexMatcher is not intended to be subclassed.</p> + * + * @stable ICU 2.4 + */ +class U_I18N_API RegexMatcher U_FINAL : public UObject { +public: + + /** + * Construct a RegexMatcher for a regular expression. + * This is a convenience method that avoids the need to explicitly create + * a RegexPattern object. Note that if several RegexMatchers need to be + * created for the same expression, it will be more efficient to + * separately create and cache a RegexPattern object, and use + * its matcher() method to create the RegexMatcher objects. + * + * @param regexp The Regular Expression to be compiled. + * @param flags Regular expression options, such as case insensitive matching. + * @see UREGEX_CASE_INSENSITIVE + * @param status Any errors are reported by setting this UErrorCode variable. + * @stable ICU 2.6 + */ + RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status); + + /** + * Construct a RegexMatcher for a regular expression. + * This is a convenience method that avoids the need to explicitly create + * a RegexPattern object. Note that if several RegexMatchers need to be + * created for the same expression, it will be more efficient to + * separately create and cache a RegexPattern object, and use + * its matcher() method to create the RegexMatcher objects. + * + * @param regexp The regular expression to be compiled. + * @param flags Regular expression options, such as case insensitive matching. + * @see UREGEX_CASE_INSENSITIVE + * @param status Any errors are reported by setting this UErrorCode variable. + * + * @stable ICU 4.6 + */ + RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status); + + /** + * Construct a RegexMatcher for a regular expression. + * This is a convenience method that avoids the need to explicitly create + * a RegexPattern object. Note that if several RegexMatchers need to be + * created for the same expression, it will be more efficient to + * separately create and cache a RegexPattern object, and use + * its matcher() method to create the RegexMatcher objects. + * <p> + * The matcher will retain a reference to the supplied input string, and all regexp + * pattern matching operations happen directly on the original string. It is + * critical that the string not be altered or deleted before use by the regular + * expression operations is complete. + * + * @param regexp The Regular Expression to be compiled. + * @param input The string to match. The matcher retains a reference to the + * caller's string; mo copy is made. + * @param flags Regular expression options, such as case insensitive matching. + * @see UREGEX_CASE_INSENSITIVE + * @param status Any errors are reported by setting this UErrorCode variable. + * @stable ICU 2.6 + */ + RegexMatcher(const UnicodeString ®exp, const UnicodeString &input, + uint32_t flags, UErrorCode &status); + + /** + * Construct a RegexMatcher for a regular expression. + * This is a convenience method that avoids the need to explicitly create + * a RegexPattern object. Note that if several RegexMatchers need to be + * created for the same expression, it will be more efficient to + * separately create and cache a RegexPattern object, and use + * its matcher() method to create the RegexMatcher objects. + * <p> + * The matcher will make a shallow clone of the supplied input text, and all regexp + * pattern matching operations happen on this clone. While read-only operations on + * the supplied text are permitted, it is critical that the underlying string not be + * altered or deleted before use by the regular expression operations is complete. + * + * @param regexp The Regular Expression to be compiled. + * @param input The string to match. The matcher retains a shallow clone of the text. + * @param flags Regular expression options, such as case insensitive matching. + * @see UREGEX_CASE_INSENSITIVE + * @param status Any errors are reported by setting this UErrorCode variable. + * + * @stable ICU 4.6 + */ + RegexMatcher(UText *regexp, UText *input, + uint32_t flags, UErrorCode &status); + +private: + /** + * Cause a compilation error if an application accidentally attempts to + * create a matcher with a (UChar *) string as input rather than + * a UnicodeString. Avoids a dangling reference to a temporary string. + * <p> + * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * using one of the aliasing constructors, such as + * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code> + * or in a UText, using + * <code>utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);</code> + * + */ + RegexMatcher(const UnicodeString ®exp, const UChar *input, + uint32_t flags, UErrorCode &status); +public: + + + /** + * Destructor. + * + * @stable ICU 2.4 + */ + virtual ~RegexMatcher(); + + + /** + * Attempts to match the entire input region against the pattern. + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if there is a match + * @stable ICU 2.4 + */ + virtual UBool matches(UErrorCode &status); + + + /** + * Resets the matcher, then attempts to match the input beginning + * at the specified startIndex, and extending to the end of the input. + * The input region is reset to include the entire input string. + * A successful match must extend to the end of the input. + * @param startIndex The input string (native) index at which to begin matching. + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if there is a match + * @stable ICU 2.8 + */ + virtual UBool matches(int64_t startIndex, UErrorCode &status); + + + /** + * Attempts to match the input string, starting from the beginning of the region, + * against the pattern. Like the matches() method, this function + * always starts at the beginning of the input region; + * unlike that function, it does not require that the entire region be matched. + * + * <p>If the match succeeds then more information can be obtained via the <code>start()</code>, + * <code>end()</code>, and <code>group()</code> functions.</p> + * + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if there is a match at the start of the input string. + * @stable ICU 2.4 + */ + virtual UBool lookingAt(UErrorCode &status); + + + /** + * Attempts to match the input string, starting from the specified index, against the pattern. + * The match may be of any length, and is not required to extend to the end + * of the input string. Contrast with match(). + * + * <p>If the match succeeds then more information can be obtained via the <code>start()</code>, + * <code>end()</code>, and <code>group()</code> functions.</p> + * + * @param startIndex The input string (native) index at which to begin matching. + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if there is a match. + * @stable ICU 2.8 + */ + virtual UBool lookingAt(int64_t startIndex, UErrorCode &status); + + + /** + * Find the next pattern match in the input string. + * The find begins searching the input at the location following the end of + * the previous match, or at the start of the string if there is no previous match. + * If a match is found, <code>start(), end()</code> and <code>group()</code> + * will provide more information regarding the match. + * <p>Note that if the input string is changed by the application, + * use find(startPos, status) instead of find(), because the saved starting + * position may not be valid with the altered input string.</p> + * @return TRUE if a match is found. + * @stable ICU 2.4 + */ + virtual UBool find(); + + + /** + * Find the next pattern match in the input string. + * The find begins searching the input at the location following the end of + * the previous match, or at the start of the string if there is no previous match. + * If a match is found, <code>start(), end()</code> and <code>group()</code> + * will provide more information regarding the match. + * <p>Note that if the input string is changed by the application, + * use find(startPos, status) instead of find(), because the saved starting + * position may not be valid with the altered input string.</p> + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if a match is found. + * @stable ICU 55 + */ + virtual UBool find(UErrorCode &status); + + /** + * Resets this RegexMatcher and then attempts to find the next substring of the + * input string that matches the pattern, starting at the specified index. + * + * @param start The (native) index in the input string to begin the search. + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if a match is found. + * @stable ICU 2.4 + */ + virtual UBool find(int64_t start, UErrorCode &status); + + + /** + * Returns a string containing the text matched by the previous match. + * If the pattern can match an empty string, an empty string may be returned. + * @param status A reference to a UErrorCode to receive any errors. + * Possible errors are U_REGEX_INVALID_STATE if no match + * has been attempted or the last match failed. + * @return a string containing the matched input text. + * @stable ICU 2.4 + */ + virtual UnicodeString group(UErrorCode &status) const; + + + /** + * Returns a string containing the text captured by the given group + * during the previous match operation. Group(0) is the entire match. + * + * A zero length string is returned both for capture groups that did not + * participate in the match and for actual zero length matches. + * To distinguish between these two cases use the function start(), + * which returns -1 for non-participating groups. + * + * @param groupNum the capture group number + * @param status A reference to a UErrorCode to receive any errors. + * Possible errors are U_REGEX_INVALID_STATE if no match + * has been attempted or the last match failed and + * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. + * @return the captured text + * @stable ICU 2.4 + */ + virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const; + + /** + * Returns the number of capturing groups in this matcher's pattern. + * @return the number of capture groups + * @stable ICU 2.4 + */ + virtual int32_t groupCount() const; + + + /** + * Returns a shallow clone of the entire live input string with the UText current native index + * set to the beginning of the requested group. + * + * @param dest The UText into which the input should be cloned, or NULL to create a new UText + * @param group_len A reference to receive the length of the desired capture group + * @param status A reference to a UErrorCode to receive any errors. + * Possible errors are U_REGEX_INVALID_STATE if no match + * has been attempted or the last match failed and + * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. + * @return dest if non-NULL, a shallow copy of the input text otherwise + * + * @stable ICU 4.6 + */ + virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const; + + /** + * Returns a shallow clone of the entire live input string with the UText current native index + * set to the beginning of the requested group. + * + * A group length of zero is returned both for capture groups that did not + * participate in the match and for actual zero length matches. + * To distinguish between these two cases use the function start(), + * which returns -1 for non-participating groups. + * + * @param groupNum The capture group number. + * @param dest The UText into which the input should be cloned, or NULL to create a new UText. + * @param group_len A reference to receive the length of the desired capture group + * @param status A reference to a UErrorCode to receive any errors. + * Possible errors are U_REGEX_INVALID_STATE if no match + * has been attempted or the last match failed and + * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. + * @return dest if non-NULL, a shallow copy of the input text otherwise + * + * @stable ICU 4.6 + */ + virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const; + + /** + * Returns the index in the input string of the start of the text matched + * during the previous match operation. + * @param status a reference to a UErrorCode to receive any errors. + * @return The (native) position in the input string of the start of the last match. + * @stable ICU 2.4 + */ + virtual int32_t start(UErrorCode &status) const; + + /** + * Returns the index in the input string of the start of the text matched + * during the previous match operation. + * @param status a reference to a UErrorCode to receive any errors. + * @return The (native) position in the input string of the start of the last match. + * @stable ICU 4.6 + */ + virtual int64_t start64(UErrorCode &status) const; + + + /** + * Returns the index in the input string of the start of the text matched by the + * specified capture group during the previous match operation. Return -1 if + * the capture group exists in the pattern, but was not part of the last match. + * + * @param group the capture group number + * @param status A reference to a UErrorCode to receive any errors. Possible + * errors are U_REGEX_INVALID_STATE if no match has been + * attempted or the last match failed, and + * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number + * @return the (native) start position of substring matched by the specified group. + * @stable ICU 2.4 + */ + virtual int32_t start(int32_t group, UErrorCode &status) const; + + /** + * Returns the index in the input string of the start of the text matched by the + * specified capture group during the previous match operation. Return -1 if + * the capture group exists in the pattern, but was not part of the last match. + * + * @param group the capture group number. + * @param status A reference to a UErrorCode to receive any errors. Possible + * errors are U_REGEX_INVALID_STATE if no match has been + * attempted or the last match failed, and + * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. + * @return the (native) start position of substring matched by the specified group. + * @stable ICU 4.6 + */ + virtual int64_t start64(int32_t group, UErrorCode &status) const; + + /** + * Returns the index in the input string of the first character following the + * text matched during the previous match operation. + * + * @param status A reference to a UErrorCode to receive any errors. Possible + * errors are U_REGEX_INVALID_STATE if no match has been + * attempted or the last match failed. + * @return the index of the last character matched, plus one. + * The index value returned is a native index, corresponding to + * code units for the underlying encoding type, for example, + * a byte index for UTF-8. + * @stable ICU 2.4 + */ + virtual int32_t end(UErrorCode &status) const; + + /** + * Returns the index in the input string of the first character following the + * text matched during the previous match operation. + * + * @param status A reference to a UErrorCode to receive any errors. Possible + * errors are U_REGEX_INVALID_STATE if no match has been + * attempted or the last match failed. + * @return the index of the last character matched, plus one. + * The index value returned is a native index, corresponding to + * code units for the underlying encoding type, for example, + * a byte index for UTF-8. + * @stable ICU 4.6 + */ + virtual int64_t end64(UErrorCode &status) const; + + + /** + * Returns the index in the input string of the character following the + * text matched by the specified capture group during the previous match operation. + * + * @param group the capture group number + * @param status A reference to a UErrorCode to receive any errors. Possible + * errors are U_REGEX_INVALID_STATE if no match has been + * attempted or the last match failed and + * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number + * @return the index of the first character following the text + * captured by the specified group during the previous match operation. + * Return -1 if the capture group exists in the pattern but was not part of the match. + * The index value returned is a native index, corresponding to + * code units for the underlying encoding type, for example, + * a byte index for UTF8. + * @stable ICU 2.4 + */ + virtual int32_t end(int32_t group, UErrorCode &status) const; + + /** + * Returns the index in the input string of the character following the + * text matched by the specified capture group during the previous match operation. + * + * @param group the capture group number + * @param status A reference to a UErrorCode to receive any errors. Possible + * errors are U_REGEX_INVALID_STATE if no match has been + * attempted or the last match failed and + * U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number + * @return the index of the first character following the text + * captured by the specified group during the previous match operation. + * Return -1 if the capture group exists in the pattern but was not part of the match. + * The index value returned is a native index, corresponding to + * code units for the underlying encoding type, for example, + * a byte index for UTF8. + * @stable ICU 4.6 + */ + virtual int64_t end64(int32_t group, UErrorCode &status) const; + + /** + * Resets this matcher. The effect is to remove any memory of previous matches, + * and to cause subsequent find() operations to begin at the beginning of + * the input string. + * + * @return this RegexMatcher. + * @stable ICU 2.4 + */ + virtual RegexMatcher &reset(); + + + /** + * Resets this matcher, and set the current input position. + * The effect is to remove any memory of previous matches, + * and to cause subsequent find() operations to begin at + * the specified (native) position in the input string. + * <p> + * The matcher's region is reset to its default, which is the entire + * input string. + * <p> + * An alternative to this function is to set a match region + * beginning at the desired index. + * + * @return this RegexMatcher. + * @stable ICU 2.8 + */ + virtual RegexMatcher &reset(int64_t index, UErrorCode &status); + + + /** + * Resets this matcher with a new input string. This allows instances of RegexMatcher + * to be reused, which is more efficient than creating a new RegexMatcher for + * each input string to be processed. + * @param input The new string on which subsequent pattern matches will operate. + * The matcher retains a reference to the callers string, and operates + * directly on that. Ownership of the string remains with the caller. + * Because no copy of the string is made, it is essential that the + * caller not delete the string until after regexp operations on it + * are done. + * Note that while a reset on the matcher with an input string that is then + * modified across/during matcher operations may be supported currently for UnicodeString, + * this was not originally intended behavior, and support for this is not guaranteed + * in upcoming versions of ICU. + * @return this RegexMatcher. + * @stable ICU 2.4 + */ + virtual RegexMatcher &reset(const UnicodeString &input); + + + /** + * Resets this matcher with a new input string. This allows instances of RegexMatcher + * to be reused, which is more efficient than creating a new RegexMatcher for + * each input string to be processed. + * @param input The new string on which subsequent pattern matches will operate. + * The matcher makes a shallow clone of the given text; ownership of the + * original string remains with the caller. Because no deep copy of the + * text is made, it is essential that the caller not modify the string + * until after regexp operations on it are done. + * @return this RegexMatcher. + * + * @stable ICU 4.6 + */ + virtual RegexMatcher &reset(UText *input); + + + /** + * Set the subject text string upon which the regular expression is looking for matches + * without changing any other aspect of the matching state. + * The new and previous text strings must have the same content. + * + * This function is intended for use in environments where ICU is operating on + * strings that may move around in memory. It provides a mechanism for notifying + * ICU that the string has been relocated, and providing a new UText to access the + * string in its new position. + * + * Note that the regular expression implementation never copies the underlying text + * of a string being matched, but always operates directly on the original text + * provided by the user. Refreshing simply drops the references to the old text + * and replaces them with references to the new. + * + * Caution: this function is normally used only by very specialized, + * system-level code. One example use case is with garbage collection that moves + * the text in memory. + * + * @param input The new (moved) text string. + * @param status Receives errors detected by this function. + * + * @stable ICU 4.8 + */ + virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status); + +private: + /** + * Cause a compilation error if an application accidentally attempts to + * reset a matcher with a (UChar *) string as input rather than + * a UnicodeString. Avoids a dangling reference to a temporary string. + * <p> + * To efficiently work with UChar *strings, wrap the data in a UnicodeString + * using one of the aliasing constructors, such as + * <code>UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);</code> + * or in a UText, using + * <code>utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);</code> + * + */ + RegexMatcher &reset(const UChar *input); +public: + + /** + * Returns the input string being matched. Ownership of the string belongs to + * the matcher; it should not be altered or deleted. This method will work even if the input + * was originally supplied as a UText. + * @return the input string + * @stable ICU 2.4 + */ + virtual const UnicodeString &input() const; + + /** + * Returns the input string being matched. This is the live input text; it should not be + * altered or deleted. This method will work even if the input was originally supplied as + * a UnicodeString. + * @return the input text + * + * @stable ICU 4.6 + */ + virtual UText *inputText() const; + + /** + * Returns the input string being matched, either by copying it into the provided + * UText parameter or by returning a shallow clone of the live input. Note that copying + * the entire input may cause significant performance and memory issues. + * @param dest The UText into which the input should be copied, or NULL to create a new UText + * @param status error code + * @return dest if non-NULL, a shallow copy of the input text otherwise + * + * @stable ICU 4.6 + */ + virtual UText *getInput(UText *dest, UErrorCode &status) const; + + + /** Sets the limits of this matcher's region. + * The region is the part of the input string that will be searched to find a match. + * Invoking this method resets the matcher, and then sets the region to start + * at the index specified by the start parameter and end at the index specified + * by the end parameter. + * + * Depending on the transparency and anchoring being used (see useTransparentBounds + * and useAnchoringBounds), certain constructs such as anchors may behave differently + * at or around the boundaries of the region + * + * The function will fail if start is greater than limit, or if either index + * is less than zero or greater than the length of the string being matched. + * + * @param start The (native) index to begin searches at. + * @param limit The index to end searches at (exclusive). + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ + virtual RegexMatcher ®ion(int64_t start, int64_t limit, UErrorCode &status); + + /** + * Identical to region(start, limit, status) but also allows a start position without + * resetting the region state. + * @param regionStart The region start + * @param regionLimit the limit of the region + * @param startIndex The (native) index within the region bounds at which to begin searches. + * @param status A reference to a UErrorCode to receive any errors. + * If startIndex is not within the specified region bounds, + * U_INDEX_OUTOFBOUNDS_ERROR is returned. + * @stable ICU 4.6 + */ + virtual RegexMatcher ®ion(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status); + + /** + * Reports the start index of this matcher's region. The searches this matcher + * conducts are limited to finding matches within regionStart (inclusive) and + * regionEnd (exclusive). + * + * @return The starting (native) index of this matcher's region. + * @stable ICU 4.0 + */ + virtual int32_t regionStart() const; + + /** + * Reports the start index of this matcher's region. The searches this matcher + * conducts are limited to finding matches within regionStart (inclusive) and + * regionEnd (exclusive). + * + * @return The starting (native) index of this matcher's region. + * @stable ICU 4.6 + */ + virtual int64_t regionStart64() const; + + + /** + * Reports the end (limit) index (exclusive) of this matcher's region. The searches + * this matcher conducts are limited to finding matches within regionStart + * (inclusive) and regionEnd (exclusive). + * + * @return The ending point (native) of this matcher's region. + * @stable ICU 4.0 + */ + virtual int32_t regionEnd() const; + + /** + * Reports the end (limit) index (exclusive) of this matcher's region. The searches + * this matcher conducts are limited to finding matches within regionStart + * (inclusive) and regionEnd (exclusive). + * + * @return The ending point (native) of this matcher's region. + * @stable ICU 4.6 + */ + virtual int64_t regionEnd64() const; + + /** + * Queries the transparency of region bounds for this matcher. + * See useTransparentBounds for a description of transparent and opaque bounds. + * By default, a matcher uses opaque region boundaries. + * + * @return TRUE if this matcher is using opaque bounds, false if it is not. + * @stable ICU 4.0 + */ + virtual UBool hasTransparentBounds() const; + + /** + * Sets the transparency of region bounds for this matcher. + * Invoking this function with an argument of true will set this matcher to use transparent bounds. + * If the boolean argument is false, then opaque bounds will be used. + * + * Using transparent bounds, the boundaries of this matcher's region are transparent + * to lookahead, lookbehind, and boundary matching constructs. Those constructs can + * see text beyond the boundaries of the region while checking for a match. + * + * With opaque bounds, no text outside of the matcher's region is visible to lookahead, + * lookbehind, and boundary matching constructs. + * + * By default, a matcher uses opaque bounds. + * + * @param b TRUE for transparent bounds; FALSE for opaque bounds + * @return This Matcher; + * @stable ICU 4.0 + **/ + virtual RegexMatcher &useTransparentBounds(UBool b); + + + /** + * Return true if this matcher is using anchoring bounds. + * By default, matchers use anchoring region bounds. + * + * @return TRUE if this matcher is using anchoring bounds. + * @stable ICU 4.0 + */ + virtual UBool hasAnchoringBounds() const; + + + /** + * Set whether this matcher is using Anchoring Bounds for its region. + * With anchoring bounds, pattern anchors such as ^ and $ will match at the start + * and end of the region. Without Anchoring Bounds, anchors will only match at + * the positions they would in the complete text. + * + * Anchoring Bounds are the default for regions. + * + * @param b TRUE if to enable anchoring bounds; FALSE to disable them. + * @return This Matcher + * @stable ICU 4.0 + */ + virtual RegexMatcher &useAnchoringBounds(UBool b); + + + /** + * Return TRUE if the most recent matching operation attempted to access + * additional input beyond the available input text. + * In this case, additional input text could change the results of the match. + * + * hitEnd() is defined for both successful and unsuccessful matches. + * In either case hitEnd() will return TRUE if if the end of the text was + * reached at any point during the matching process. + * + * @return TRUE if the most recent match hit the end of input + * @stable ICU 4.0 + */ + virtual UBool hitEnd() const; + + /** + * Return TRUE the most recent match succeeded and additional input could cause + * it to fail. If this method returns false and a match was found, then more input + * might change the match but the match won't be lost. If a match was not found, + * then requireEnd has no meaning. + * + * @return TRUE if more input could cause the most recent match to no longer match. + * @stable ICU 4.0 + */ + virtual UBool requireEnd() const; + + + /** + * Returns the pattern that is interpreted by this matcher. + * @return the RegexPattern for this RegexMatcher + * @stable ICU 2.4 + */ + virtual const RegexPattern &pattern() const; + + + /** + * Replaces every substring of the input that matches the pattern + * with the given replacement string. This is a convenience function that + * provides a complete find-and-replace-all operation. + * + * This method first resets this matcher. It then scans the input string + * looking for matches of the pattern. Input that is not part of any + * match is left unchanged; each match is replaced in the result by the + * replacement string. The replacement string may contain references to + * capture groups. + * + * @param replacement a string containing the replacement text. + * @param status a reference to a UErrorCode to receive any errors. + * @return a string containing the results of the find and replace. + * @stable ICU 2.4 + */ + virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status); + + + /** + * Replaces every substring of the input that matches the pattern + * with the given replacement string. This is a convenience function that + * provides a complete find-and-replace-all operation. + * + * This method first resets this matcher. It then scans the input string + * looking for matches of the pattern. Input that is not part of any + * match is left unchanged; each match is replaced in the result by the + * replacement string. The replacement string may contain references to + * capture groups. + * + * @param replacement a string containing the replacement text. + * @param dest a mutable UText in which the results are placed. + * If NULL, a new UText will be created (which may not be mutable). + * @param status a reference to a UErrorCode to receive any errors. + * @return a string containing the results of the find and replace. + * If a pre-allocated UText was provided, it will always be used and returned. + * + * @stable ICU 4.6 + */ + virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status); + + + /** + * Replaces the first substring of the input that matches + * the pattern with the replacement string. This is a convenience + * function that provides a complete find-and-replace operation. + * + * <p>This function first resets this RegexMatcher. It then scans the input string + * looking for a match of the pattern. Input that is not part + * of the match is appended directly to the result string; the match is replaced + * in the result by the replacement string. The replacement string may contain + * references to captured groups.</p> + * + * <p>The state of the matcher (the position at which a subsequent find() + * would begin) after completing a replaceFirst() is not specified. The + * RegexMatcher should be reset before doing additional find() operations.</p> + * + * @param replacement a string containing the replacement text. + * @param status a reference to a UErrorCode to receive any errors. + * @return a string containing the results of the find and replace. + * @stable ICU 2.4 + */ + virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status); + + + /** + * Replaces the first substring of the input that matches + * the pattern with the replacement string. This is a convenience + * function that provides a complete find-and-replace operation. + * + * <p>This function first resets this RegexMatcher. It then scans the input string + * looking for a match of the pattern. Input that is not part + * of the match is appended directly to the result string; the match is replaced + * in the result by the replacement string. The replacement string may contain + * references to captured groups.</p> + * + * <p>The state of the matcher (the position at which a subsequent find() + * would begin) after completing a replaceFirst() is not specified. The + * RegexMatcher should be reset before doing additional find() operations.</p> + * + * @param replacement a string containing the replacement text. + * @param dest a mutable UText in which the results are placed. + * If NULL, a new UText will be created (which may not be mutable). + * @param status a reference to a UErrorCode to receive any errors. + * @return a string containing the results of the find and replace. + * If a pre-allocated UText was provided, it will always be used and returned. + * + * @stable ICU 4.6 + */ + virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status); + + + /** + * Implements a replace operation intended to be used as part of an + * incremental find-and-replace. + * + * <p>The input string, starting from the end of the previous replacement and ending at + * the start of the current match, is appended to the destination string. Then the + * replacement string is appended to the output string, + * including handling any substitutions of captured text.</p> + * + * <p>For simple, prepackaged, non-incremental find-and-replace + * operations, see replaceFirst() or replaceAll().</p> + * + * @param dest A UnicodeString to which the results of the find-and-replace are appended. + * @param replacement A UnicodeString that provides the text to be substituted for + * the input text that matched the regexp pattern. The replacement + * text may contain references to captured text from the + * input. + * @param status A reference to a UErrorCode to receive any errors. Possible + * errors are U_REGEX_INVALID_STATE if no match has been + * attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR + * if the replacement text specifies a capture group that + * does not exist in the pattern. + * + * @return this RegexMatcher + * @stable ICU 2.4 + * + */ + virtual RegexMatcher &appendReplacement(UnicodeString &dest, + const UnicodeString &replacement, UErrorCode &status); + + + /** + * Implements a replace operation intended to be used as part of an + * incremental find-and-replace. + * + * <p>The input string, starting from the end of the previous replacement and ending at + * the start of the current match, is appended to the destination string. Then the + * replacement string is appended to the output string, + * including handling any substitutions of captured text.</p> + * + * <p>For simple, prepackaged, non-incremental find-and-replace + * operations, see replaceFirst() or replaceAll().</p> + * + * @param dest A mutable UText to which the results of the find-and-replace are appended. + * Must not be NULL. + * @param replacement A UText that provides the text to be substituted for + * the input text that matched the regexp pattern. The replacement + * text may contain references to captured text from the input. + * @param status A reference to a UErrorCode to receive any errors. Possible + * errors are U_REGEX_INVALID_STATE if no match has been + * attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR + * if the replacement text specifies a capture group that + * does not exist in the pattern. + * + * @return this RegexMatcher + * + * @stable ICU 4.6 + */ + virtual RegexMatcher &appendReplacement(UText *dest, + UText *replacement, UErrorCode &status); + + + /** + * As the final step in a find-and-replace operation, append the remainder + * of the input string, starting at the position following the last appendReplacement(), + * to the destination string. <code>appendTail()</code> is intended to be invoked after one + * or more invocations of the <code>RegexMatcher::appendReplacement()</code>. + * + * @param dest A UnicodeString to which the results of the find-and-replace are appended. + * @return the destination string. + * @stable ICU 2.4 + */ + virtual UnicodeString &appendTail(UnicodeString &dest); + + + /** + * As the final step in a find-and-replace operation, append the remainder + * of the input string, starting at the position following the last appendReplacement(), + * to the destination string. <code>appendTail()</code> is intended to be invoked after one + * or more invocations of the <code>RegexMatcher::appendReplacement()</code>. + * + * @param dest A mutable UText to which the results of the find-and-replace are appended. + * Must not be NULL. + * @param status error cod + * @return the destination string. + * + * @stable ICU 4.6 + */ + virtual UText *appendTail(UText *dest, UErrorCode &status); + + + /** + * Split a string into fields. Somewhat like split() from Perl. + * The pattern matches identify delimiters that separate the input + * into fields. The input data between the matches becomes the + * fields themselves. + * + * @param input The string to be split into fields. The field delimiters + * match the pattern (in the "this" object). This matcher + * will be reset to this input string. + * @param dest An array of UnicodeStrings to receive the results of the split. + * This is an array of actual UnicodeString objects, not an + * array of pointers to strings. Local (stack based) arrays can + * work well here. + * @param destCapacity The number of elements in the destination array. + * If the number of fields found is less than destCapacity, the + * extra strings in the destination array are not altered. + * If the number of destination strings is less than the number + * of fields, the trailing part of the input string, including any + * field delimiters, is placed in the last destination string. + * @param status A reference to a UErrorCode to receive any errors. + * @return The number of fields into which the input string was split. + * @stable ICU 2.6 + */ + virtual int32_t split(const UnicodeString &input, + UnicodeString dest[], + int32_t destCapacity, + UErrorCode &status); + + + /** + * Split a string into fields. Somewhat like split() from Perl. + * The pattern matches identify delimiters that separate the input + * into fields. The input data between the matches becomes the + * fields themselves. + * + * @param input The string to be split into fields. The field delimiters + * match the pattern (in the "this" object). This matcher + * will be reset to this input string. + * @param dest An array of mutable UText structs to receive the results of the split. + * If a field is NULL, a new UText is allocated to contain the results for + * that field. This new UText is not guaranteed to be mutable. + * @param destCapacity The number of elements in the destination array. + * If the number of fields found is less than destCapacity, the + * extra strings in the destination array are not altered. + * If the number of destination strings is less than the number + * of fields, the trailing part of the input string, including any + * field delimiters, is placed in the last destination string. + * @param status A reference to a UErrorCode to receive any errors. + * @return The number of fields into which the input string was split. + * + * @stable ICU 4.6 + */ + virtual int32_t split(UText *input, + UText *dest[], + int32_t destCapacity, + UErrorCode &status); + + /** + * Set a processing time limit for match operations with this Matcher. + * + * Some patterns, when matching certain strings, can run in exponential time. + * For practical purposes, the match operation may appear to be in an + * infinite loop. + * When a limit is set a match operation will fail with an error if the + * limit is exceeded. + * <p> + * The units of the limit are steps of the match engine. + * Correspondence with actual processor time will depend on the speed + * of the processor and the details of the specific pattern, but will + * typically be on the order of milliseconds. + * <p> + * By default, the matching time is not limited. + * <p> + * + * @param limit The limit value, or 0 for no limit. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ + virtual void setTimeLimit(int32_t limit, UErrorCode &status); + + /** + * Get the time limit, if any, for match operations made with this Matcher. + * + * @return the maximum allowed time for a match, in units of processing steps. + * @stable ICU 4.0 + */ + virtual int32_t getTimeLimit() const; + + /** + * Set the amount of heap storage available for use by the match backtracking stack. + * The matcher is also reset, discarding any results from previous matches. + * <p> + * ICU uses a backtracking regular expression engine, with the backtrack stack + * maintained on the heap. This function sets the limit to the amount of memory + * that can be used for this purpose. A backtracking stack overflow will + * result in an error from the match operation that caused it. + * <p> + * A limit is desirable because a malicious or poorly designed pattern can use + * excessive memory, potentially crashing the process. A limit is enabled + * by default. + * <p> + * @param limit The maximum size, in bytes, of the matching backtrack stack. + * A value of zero means no limit. + * The limit must be greater or equal to zero. + * + * @param status A reference to a UErrorCode to receive any errors. + * + * @stable ICU 4.0 + */ + virtual void setStackLimit(int32_t limit, UErrorCode &status); + + /** + * Get the size of the heap storage available for use by the back tracking stack. + * + * @return the maximum backtracking stack size, in bytes, or zero if the + * stack size is unlimited. + * @stable ICU 4.0 + */ + virtual int32_t getStackLimit() const; + + + /** + * Set a callback function for use with this Matcher. + * During matching operations the function will be called periodically, + * giving the application the opportunity to terminate a long-running + * match. + * + * @param callback A pointer to the user-supplied callback function. + * @param context User context pointer. The value supplied at the + * time the callback function is set will be saved + * and passed to the callback each time that it is called. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ + virtual void setMatchCallback(URegexMatchCallback *callback, + const void *context, + UErrorCode &status); + + + /** + * Get the callback function for this URegularExpression. + * + * @param callback Out parameter, receives a pointer to the user-supplied + * callback function. + * @param context Out parameter, receives the user context pointer that + * was set when uregex_setMatchCallback() was called. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ + virtual void getMatchCallback(URegexMatchCallback *&callback, + const void *&context, + UErrorCode &status); + + + /** + * Set a progress callback function for use with find operations on this Matcher. + * During find operations, the callback will be invoked after each return from a + * match attempt, giving the application the opportunity to terminate a long-running + * find operation. + * + * @param callback A pointer to the user-supplied callback function. + * @param context User context pointer. The value supplied at the + * time the callback function is set will be saved + * and passed to the callback each time that it is called. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.6 + */ + virtual void setFindProgressCallback(URegexFindProgressCallback *callback, + const void *context, + UErrorCode &status); + + + /** + * Get the find progress callback function for this URegularExpression. + * + * @param callback Out parameter, receives a pointer to the user-supplied + * callback function. + * @param context Out parameter, receives the user context pointer that + * was set when uregex_setFindProgressCallback() was called. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.6 + */ + virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, + const void *&context, + UErrorCode &status); + +#ifndef U_HIDE_INTERNAL_API + /** + * setTrace Debug function, enable/disable tracing of the matching engine. + * For internal ICU development use only. DO NO USE!!!! + * @internal + */ + void setTrace(UBool state); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + +private: + // Constructors and other object boilerplate are private. + // Instances of RegexMatcher can not be assigned, copied, cloned, etc. + RegexMatcher(); // default constructor not implemented + RegexMatcher(const RegexPattern *pat); + RegexMatcher(const RegexMatcher &other); + RegexMatcher &operator =(const RegexMatcher &rhs); + void init(UErrorCode &status); // Common initialization + void init2(UText *t, UErrorCode &e); // Common initialization, part 2. + + friend class RegexPattern; + friend class RegexCImpl; +public: +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + void resetPreserveRegion(); // Reset matcher state, but preserve any region. +#endif /* U_HIDE_INTERNAL_API */ +private: + + // + // MatchAt This is the internal interface to the match engine itself. + // Match status comes back in matcher member variables. + // + void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status); + inline void backTrack(int64_t &inputIdx, int32_t &patIdx); + UBool isWordBoundary(int64_t pos); // perform Perl-like \b test + UBool isUWordBoundary(int64_t pos); // perform RBBI based \b test + REStackFrame *resetStack(); + inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status); + void IncrementTime(UErrorCode &status); + + // Call user find callback function, if set. Return TRUE if operation should be interrupted. + inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status); + + int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const; + + UBool findUsingChunk(UErrorCode &status); + void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status); + UBool isChunkWordBoundary(int32_t pos); + + const RegexPattern *fPattern; + RegexPattern *fPatternOwned; // Non-NULL if this matcher owns the pattern, and + // should delete it when through. + + const UnicodeString *fInput; // The string being matched. Only used for input() + UText *fInputText; // The text being matched. Is never NULL. + UText *fAltInputText; // A shallow copy of the text being matched. + // Only created if the pattern contains backreferences. + int64_t fInputLength; // Full length of the input text. + int32_t fFrameSize; // The size of a frame in the backtrack stack. + + int64_t fRegionStart; // Start of the input region, default = 0. + int64_t fRegionLimit; // End of input region, default to input.length. + + int64_t fAnchorStart; // Region bounds for anchoring operations (^ or $). + int64_t fAnchorLimit; // See useAnchoringBounds + + int64_t fLookStart; // Region bounds for look-ahead/behind and + int64_t fLookLimit; // and other boundary tests. See + // useTransparentBounds + + int64_t fActiveStart; // Currently active bounds for matching. + int64_t fActiveLimit; // Usually is the same as region, but + // is changed to fLookStart/Limit when + // entering look around regions. + + UBool fTransparentBounds; // True if using transparent bounds. + UBool fAnchoringBounds; // True if using anchoring bounds. + + UBool fMatch; // True if the last attempted match was successful. + int64_t fMatchStart; // Position of the start of the most recent match + int64_t fMatchEnd; // First position after the end of the most recent match + // Zero if no previous match, even when a region + // is active. + int64_t fLastMatchEnd; // First position after the end of the previous match, + // or -1 if there was no previous match. + int64_t fAppendPosition; // First position after the end of the previous + // appendReplacement(). As described by the + // JavaDoc for Java Matcher, where it is called + // "append position" + UBool fHitEnd; // True if the last match touched the end of input. + UBool fRequireEnd; // True if the last match required end-of-input + // (matched $ or Z) + + UVector64 *fStack; + REStackFrame *fFrame; // After finding a match, the last active stack frame, + // which will contain the capture group results. + // NOT valid while match engine is running. + + int64_t *fData; // Data area for use by the compiled pattern. + int64_t fSmallData[8]; // Use this for data if it's enough. + + int32_t fTimeLimit; // Max time (in arbitrary steps) to let the + // match engine run. Zero for unlimited. + + int32_t fTime; // Match time, accumulates while matching. + int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves. + // Kept separately from fTime to keep as much + // code as possible out of the inline + // StateSave function. + + int32_t fStackLimit; // Maximum memory size to use for the backtrack + // stack, in bytes. Zero for unlimited. + + URegexMatchCallback *fCallbackFn; // Pointer to match progress callback funct. + // NULL if there is no callback. + const void *fCallbackContext; // User Context ptr for callback function. + + URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct. + // NULL if there is no callback. + const void *fFindProgressCallbackContext; // User Context ptr for callback function. + + + UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility. + + UBool fTraceDebug; // Set true for debug tracing of match engine. + + UErrorCode fDeferredStatus; // Save error state that cannot be immediately + // reported, or that permanently disables this matcher. + + RuleBasedBreakIterator *fWordBreakItr; +}; + +U_NAMESPACE_END +#endif // UCONFIG_NO_REGULAR_EXPRESSIONS +#endif diff --git a/intl/icu/source/i18n/unicode/region.h b/intl/icu/source/i18n/unicode/region.h new file mode 100644 index 000000000..6bb6c746b --- /dev/null +++ b/intl/icu/source/i18n/unicode/region.h @@ -0,0 +1,224 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 2014-2016, International Business Machines Corporation and others. + * All Rights Reserved. + ******************************************************************************* + */ + +#ifndef REGION_H +#define REGION_H + +/** + * \file + * \brief C++ API: Region classes (territory containment) + */ + +#include "unicode/utypes.h" +#include "unicode/uregion.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "unicode/strenum.h" + +U_NAMESPACE_BEGIN + +/** + * <code>Region</code> is the class representing a Unicode Region Code, also known as a + * Unicode Region Subtag, which is defined based upon the BCP 47 standard. We often think of + * "regions" as "countries" when defining the characteristics of a locale. Region codes There are different + * types of region codes that are important to distinguish. + * <p> + * Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or + * selected economic and other grouping" as defined in + * UN M.49 (http://unstats.un.org/unsd/methods/m49/m49regin.htm). + * These are typically 3-digit codes, but contain some 2-letter codes, such as the LDML code QO + * added for Outlying Oceania. Not all UNM.49 codes are defined in LDML, but most of them are. + * Macroregions are represented in ICU by one of three region types: WORLD ( region code 001 ), + * CONTINENTS ( regions contained directly by WORLD ), and SUBCONTINENTS ( things contained directly + * by a continent ). + * <p> + * TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also + * include areas that are not separate countries, such as the code "AQ" for Antarctica or the code + * "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate + * codes. The codes are typically 2-letter codes aligned with the ISO 3166 standard, but BCP47 allows + * for the use of 3-digit codes in the future. + * <p> + * UNKNOWN - The code ZZ is defined by Unicode LDML for use to indicate that the Region is unknown, + * or that the value supplied as a region was invalid. + * <p> + * DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage, + * usually due to a country splitting into multiple territories or changing its name. + * <p> + * GROUPING - A widely understood grouping of territories that has a well defined membership such + * that a region code has been assigned for it. Some of these are UNM.49 codes that do't fall into + * the world/continent/sub-continent hierarchy, while others are just well known groupings that have + * their own region code. Region "EU" (European Union) is one such region code that is a grouping. + * Groupings will never be returned by the getContainingRegion() API, since a different type of region + * ( WORLD, CONTINENT, or SUBCONTINENT ) will always be the containing region instead. + * + * The Region class is not intended for public subclassing. + * + * @author John Emmons + * @stable ICU 51 + */ + +class U_I18N_API Region : public UObject { +public: + /** + * Destructor. + * @stable ICU 51 + */ + virtual ~Region(); + + /** + * Returns true if the two regions are equal. + * @stable ICU 51 + */ + UBool operator==(const Region &that) const; + + /** + * Returns true if the two regions are NOT equal; that is, if operator ==() returns false. + * @stable ICU 51 + */ + UBool operator!=(const Region &that) const; + + /** + * Returns a pointer to a Region using the given region code. The region code can be either 2-letter ISO code, + * 3-letter ISO code, UNM.49 numeric code, or other valid Unicode Region Code as defined by the LDML specification. + * The identifier will be canonicalized internally using the supplemental metadata as defined in the CLDR. + * If the region code is NULL or not recognized, the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR ) + * @stable ICU 51 + */ + static const Region* U_EXPORT2 getInstance(const char *region_code, UErrorCode &status); + + /** + * Returns a pointer to a Region using the given numeric region code. If the numeric region code is not recognized, + * the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR ). + * @stable ICU 51 + */ + static const Region* U_EXPORT2 getInstance (int32_t code, UErrorCode &status); + + /** + * Returns an enumeration over the IDs of all known regions that match the given type. + * @stable ICU 55 + */ + static StringEnumeration* U_EXPORT2 getAvailable(URegionType type, UErrorCode &status); + + /** + * Returns a pointer to the region that contains this region. Returns NULL if this region is code "001" (World) + * or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) returns the + * region "039" (Southern Europe). + * @stable ICU 51 + */ + const Region* getContainingRegion() const; + + /** + * Return a pointer to the region that geographically contains this region and matches the given type, + * moving multiple steps up the containment chain if necessary. Returns NULL if no containing region can be found + * that matches the given type. Note: The URegionTypes = "URGN_GROUPING", "URGN_DEPRECATED", or "URGN_UNKNOWN" + * are not appropriate for use in this API. NULL will be returned in this case. For example, calling this method + * with region "IT" (Italy) for type "URGN_CONTINENT" returns the region "150" ( Europe ). + * @stable ICU 51 + */ + const Region* getContainingRegion(URegionType type) const; + + /** + * Return an enumeration over the IDs of all the regions that are immediate children of this region in the + * region hierarchy. These returned regions could be either macro regions, territories, or a mixture of the two, + * depending on the containment data as defined in CLDR. This API may return NULL if this region doesn't have + * any sub-regions. For example, calling this method with region "150" (Europe) returns an enumeration containing + * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) - "154" (Northern Europe) + * and "155" (Western Europe). + * @stable ICU 55 + */ + StringEnumeration* getContainedRegions(UErrorCode &status) const; + + /** + * Returns an enumeration over the IDs of all the regions that are children of this region anywhere in the region + * hierarchy and match the given type. This API may return an empty enumeration if this region doesn't have any + * sub-regions that match the given type. For example, calling this method with region "150" (Europe) and type + * "URGN_TERRITORY" returns a set containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. ) + * @stable ICU 55 + */ + StringEnumeration* getContainedRegions( URegionType type, UErrorCode &status ) const; + + /** + * Returns true if this region contains the supplied other region anywhere in the region hierarchy. + * @stable ICU 51 + */ + UBool contains(const Region &other) const; + + /** + * For deprecated regions, return an enumeration over the IDs of the regions that are the preferred replacement + * regions for this region. Returns null for a non-deprecated region. For example, calling this method with region + * "SU" (Soviet Union) would return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc... + * @stable ICU 55 + */ + StringEnumeration* getPreferredValues(UErrorCode &status) const; + + /** + * Return this region's canonical region code. + * @stable ICU 51 + */ + const char* getRegionCode() const; + + /** + * Return this region's numeric code. + * Returns a negative value if the given region does not have a numeric code assigned to it. + * @stable ICU 51 + */ + int32_t getNumericCode() const; + + /** + * Returns the region type of this region. + * @stable ICU 51 + */ + URegionType getType() const; + +#ifndef U_HIDE_INTERNAL_API + /** + * Cleans up statically allocated memory. + * @internal + */ + static void cleanupRegionData(); +#endif /* U_HIDE_INTERNAL_API */ + +private: + char id[4]; + UnicodeString idStr; + int32_t code; + URegionType type; + Region *containingRegion; + UVector *containedRegions; + UVector *preferredValues; + + /** + * Default Constructor. Internal - use factory methods only. + */ + Region(); + + + /* + * Initializes the region data from the ICU resource bundles. The region data + * contains the basic relationships such as which regions are known, what the numeric + * codes are, any known aliases, and the territory containment data. + * + * If the region data has already loaded, then this method simply returns without doing + * anything meaningful. + */ + + static void U_CALLCONV loadRegionData(UErrorCode &status); + +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ +#endif // REGION_H + +//eof diff --git a/intl/icu/source/i18n/unicode/reldatefmt.h b/intl/icu/source/i18n/unicode/reldatefmt.h new file mode 100644 index 000000000..e91d20667 --- /dev/null +++ b/intl/icu/source/i18n/unicode/reldatefmt.h @@ -0,0 +1,523 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************** +* Copyright (C) 2014-2016, International Business Machines Corporation and +* others. +* All Rights Reserved. +***************************************************************************** +* +* File RELDATEFMT.H +***************************************************************************** +*/ + +#ifndef __RELDATEFMT_H +#define __RELDATEFMT_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/udisplaycontext.h" +#include "unicode/ureldatefmt.h" +#include "unicode/locid.h" + +/** + * \file + * \brief C++ API: Formats relative dates such as "1 day ago" or "tomorrow" + */ + +#if !UCONFIG_NO_FORMATTING + +/** + * Represents the unit for formatting a relative date. e.g "in 5 days" + * or "in 3 months" + * @stable ICU 53 + */ +typedef enum UDateRelativeUnit { + + /** + * Seconds + * @stable ICU 53 + */ + UDAT_RELATIVE_SECONDS, + + /** + * Minutes + * @stable ICU 53 + */ + UDAT_RELATIVE_MINUTES, + + /** + * Hours + * @stable ICU 53 + */ + UDAT_RELATIVE_HOURS, + + /** + * Days + * @stable ICU 53 + */ + UDAT_RELATIVE_DAYS, + + /** + * Weeks + * @stable ICU 53 + */ + UDAT_RELATIVE_WEEKS, + + /** + * Months + * @stable ICU 53 + */ + UDAT_RELATIVE_MONTHS, + + /** + * Years + * @stable ICU 53 + */ + UDAT_RELATIVE_YEARS, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UDateRelativeUnit value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDAT_RELATIVE_UNIT_COUNT +#endif // U_HIDE_DEPRECATED_API +} UDateRelativeUnit; + +/** + * Represents an absolute unit. + * @stable ICU 53 + */ +typedef enum UDateAbsoluteUnit { + + // Days of week have to remain together and in order from Sunday to + // Saturday. + /** + * Sunday + * @stable ICU 53 + */ + UDAT_ABSOLUTE_SUNDAY, + + /** + * Monday + * @stable ICU 53 + */ + UDAT_ABSOLUTE_MONDAY, + + /** + * Tuesday + * @stable ICU 53 + */ + UDAT_ABSOLUTE_TUESDAY, + + /** + * Wednesday + * @stable ICU 53 + */ + UDAT_ABSOLUTE_WEDNESDAY, + + /** + * Thursday + * @stable ICU 53 + */ + UDAT_ABSOLUTE_THURSDAY, + + /** + * Friday + * @stable ICU 53 + */ + UDAT_ABSOLUTE_FRIDAY, + + /** + * Saturday + * @stable ICU 53 + */ + UDAT_ABSOLUTE_SATURDAY, + + /** + * Day + * @stable ICU 53 + */ + UDAT_ABSOLUTE_DAY, + + /** + * Week + * @stable ICU 53 + */ + UDAT_ABSOLUTE_WEEK, + + /** + * Month + * @stable ICU 53 + */ + UDAT_ABSOLUTE_MONTH, + + /** + * Year + * @stable ICU 53 + */ + UDAT_ABSOLUTE_YEAR, + + /** + * Now + * @stable ICU 53 + */ + UDAT_ABSOLUTE_NOW, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UDateAbsoluteUnit value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDAT_ABSOLUTE_UNIT_COUNT +#endif // U_HIDE_DEPRECATED_API +} UDateAbsoluteUnit; + +/** + * Represents a direction for an absolute unit e.g "Next Tuesday" + * or "Last Tuesday" + * @stable ICU 53 + */ +typedef enum UDateDirection { + + /** + * Two before. Not fully supported in every locale. + * @stable ICU 53 + */ + UDAT_DIRECTION_LAST_2, + + /** + * Last + * @stable ICU 53 + */ + UDAT_DIRECTION_LAST, + + /** + * This + * @stable ICU 53 + */ + UDAT_DIRECTION_THIS, + + /** + * Next + * @stable ICU 53 + */ + UDAT_DIRECTION_NEXT, + + /** + * Two after. Not fully supported in every locale. + * @stable ICU 53 + */ + UDAT_DIRECTION_NEXT_2, + + /** + * Plain, which means the absence of a qualifier. + * @stable ICU 53 + */ + UDAT_DIRECTION_PLAIN, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UDateDirection value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDAT_DIRECTION_COUNT +#endif // U_HIDE_DEPRECATED_API +} UDateDirection; + +#if !UCONFIG_NO_BREAK_ITERATION + +U_NAMESPACE_BEGIN + +class RelativeDateTimeCacheData; +class SharedNumberFormat; +class SharedPluralRules; +class SharedBreakIterator; +class NumberFormat; +class UnicodeString; + +/** + * Formats simple relative dates. There are two types of relative dates that + * it handles: + * <ul> + * <li>relative dates with a quantity e.g "in 5 days"</li> + * <li>relative dates without a quantity e.g "next Tuesday"</li> + * </ul> + * <p> + * This API is very basic and is intended to be a building block for more + * fancy APIs. The caller tells it exactly what to display in a locale + * independent way. While this class automatically provides the correct plural + * forms, the grammatical form is otherwise as neutral as possible. It is the + * caller's responsibility to handle cut-off logic such as deciding between + * displaying "in 7 days" or "in 1 week." This API supports relative dates + * involving one single unit. This API does not support relative dates + * involving compound units, + * e.g "in 5 days and 4 hours" nor does it support parsing. + * <p> + * This class is mostly thread safe and immutable with the following caveats: + * 1. The assignment operator violates Immutability. It must not be used + * concurrently with other operations. + * 2. Caller must not hold onto adopted pointers. + * <p> + * This class is not intended for public subclassing. + * <p> + * Here are some examples of use: + * <blockquote> + * <pre> + * UErrorCode status = U_ZERO_ERROR; + * UnicodeString appendTo; + * RelativeDateTimeFormatter fmt(status); + * // Appends "in 1 day" + * fmt.format( + * 1, UDAT_DIRECTION_NEXT, UDAT_RELATIVE_DAYS, appendTo, status); + * // Appends "in 3 days" + * fmt.format( + * 3, UDAT_DIRECTION_NEXT, UDAT_RELATIVE_DAYS, appendTo, status); + * // Appends "3.2 years ago" + * fmt.format( + * 3.2, UDAT_DIRECTION_LAST, UDAT_RELATIVE_YEARS, appendTo, status); + * // Appends "last Sunday" + * fmt.format(UDAT_DIRECTION_LAST, UDAT_ABSOLUTE_SUNDAY, appendTo, status); + * // Appends "this Sunday" + * fmt.format(UDAT_DIRECTION_THIS, UDAT_ABSOLUTE_SUNDAY, appendTo, status); + * // Appends "next Sunday" + * fmt.format(UDAT_DIRECTION_NEXT, UDAT_ABSOLUTE_SUNDAY, appendTo, status); + * // Appends "Sunday" + * fmt.format(UDAT_DIRECTION_PLAIN, UDAT_ABSOLUTE_SUNDAY, appendTo, status); + * + * // Appends "yesterday" + * fmt.format(UDAT_DIRECTION_LAST, UDAT_ABSOLUTE_DAY, appendTo, status); + * // Appends "today" + * fmt.format(UDAT_DIRECTION_THIS, UDAT_ABSOLUTE_DAY, appendTo, status); + * // Appends "tomorrow" + * fmt.format(UDAT_DIRECTION_NEXT, UDAT_ABSOLUTE_DAY, appendTo, status); + * // Appends "now" + * fmt.format(UDAT_DIRECTION_PLAIN, UDAT_ABSOLUTE_NOW, appendTo, status); + * + * </pre> + * </blockquote> + * <p> + * In the future, we may add more forms, such as abbreviated/short forms + * (3 secs ago), and relative day periods ("yesterday afternoon"), etc. + * + * The RelativeDateTimeFormatter class is not intended for public subclassing. + * + * @stable ICU 53 + */ +class U_I18N_API RelativeDateTimeFormatter : public UObject { +public: + + /** + * Create RelativeDateTimeFormatter with default locale. + * @stable ICU 53 + */ + RelativeDateTimeFormatter(UErrorCode& status); + + /** + * Create RelativeDateTimeFormatter with given locale. + * @stable ICU 53 + */ + RelativeDateTimeFormatter(const Locale& locale, UErrorCode& status); + + /** + * Create RelativeDateTimeFormatter with given locale and NumberFormat. + * + * @param locale the locale + * @param nfToAdopt Constructed object takes ownership of this pointer. + * It is an error for caller to delete this pointer or change its + * contents after calling this constructor. + * @status Any error is returned here. + * @stable ICU 53 + */ + RelativeDateTimeFormatter( + const Locale& locale, NumberFormat *nfToAdopt, UErrorCode& status); + + /** + * Create RelativeDateTimeFormatter with given locale, NumberFormat, + * and capitalization context. + * + * @param locale the locale + * @param nfToAdopt Constructed object takes ownership of this pointer. + * It is an error for caller to delete this pointer or change its + * contents after calling this constructor. Caller may pass NULL for + * this argument if they want default number format behavior. + * @param style the format style. The UDAT_RELATIVE bit field has no effect. + * @param capitalizationContext A value from UDisplayContext that pertains to + * capitalization. + * @status Any error is returned here. + * @stable ICU 54 + */ + RelativeDateTimeFormatter( + const Locale& locale, + NumberFormat *nfToAdopt, + UDateRelativeDateTimeFormatterStyle style, + UDisplayContext capitalizationContext, + UErrorCode& status); + + /** + * Copy constructor. + * @stable ICU 53 + */ + RelativeDateTimeFormatter(const RelativeDateTimeFormatter& other); + + /** + * Assignment operator. + * @stable ICU 53 + */ + RelativeDateTimeFormatter& operator=( + const RelativeDateTimeFormatter& other); + + /** + * Destructor. + * @stable ICU 53 + */ + virtual ~RelativeDateTimeFormatter(); + + /** + * Formats a relative date with a quantity such as "in 5 days" or + * "3 months ago" + * @param quantity The numerical amount e.g 5. This value is formatted + * according to this object's NumberFormat object. + * @param direction NEXT means a future relative date; LAST means a past + * relative date. If direction is anything else, this method sets + * status to U_ILLEGAL_ARGUMENT_ERROR. + * @param unit the unit e.g day? month? year? + * @param appendTo The string to which the formatted result will be + * appended + * @param status ICU error code returned here. + * @return appendTo + * @stable ICU 53 + */ + UnicodeString& format( + double quantity, + UDateDirection direction, + UDateRelativeUnit unit, + UnicodeString& appendTo, + UErrorCode& status) const; + + /** + * Formats a relative date without a quantity. + * @param direction NEXT, LAST, THIS, etc. + * @param unit e.g SATURDAY, DAY, MONTH + * @param appendTo The string to which the formatted result will be + * appended. If the value of direction is documented as not being fully + * supported in all locales then this method leaves appendTo unchanged if + * no format string is available. + * @param status ICU error code returned here. + * @return appendTo + * @stable ICU 53 + */ + UnicodeString& format( + UDateDirection direction, + UDateAbsoluteUnit unit, + UnicodeString& appendTo, + UErrorCode& status) const; + +#ifndef U_HIDE_DRAFT_API + /** + * Format a combination of URelativeDateTimeUnit and numeric offset + * using a numeric style, e.g. "1 week ago", "in 1 week", + * "5 weeks ago", "in 5 weeks". + * + * @param offset The signed offset for the specified unit. This + * will be formatted according to this object's + * NumberFormat object. + * @param unit The unit to use when formatting the relative + * date, e.g. UDAT_REL_UNIT_WEEK, + * UDAT_REL_UNIT_FRIDAY. + * @param appendTo The string to which the formatted result will be + * appended. + * @param status ICU error code returned here. + * @return appendTo + * @draft ICU 57 + */ + UnicodeString& formatNumeric( + double offset, + URelativeDateTimeUnit unit, + UnicodeString& appendTo, + UErrorCode& status) const; + + /** + * Format a combination of URelativeDateTimeUnit and numeric offset + * using a text style if possible, e.g. "last week", "this week", + * "next week", "yesterday", "tomorrow". Falls back to numeric + * style if no appropriate text term is available for the specified + * offset in the object's locale. + * + * @param offset The signed offset for the specified unit. + * @param unit The unit to use when formatting the relative + * date, e.g. UDAT_REL_UNIT_WEEK, + * UDAT_REL_UNIT_FRIDAY. + * @param appendTo The string to which the formatted result will be + * appended. + * @param status ICU error code returned here. + * @return appendTo + * @draft ICU 57 + */ + UnicodeString& format( + double offset, + URelativeDateTimeUnit unit, + UnicodeString& appendTo, + UErrorCode& status) const; +#endif /* U_HIDE_DRAFT_API */ + + /** + * Combines a relative date string and a time string in this object's + * locale. This is done with the same date-time separator used for the + * default calendar in this locale. + * + * @param relativeDateString the relative date, e.g 'yesterday' + * @param timeString the time e.g '3:45' + * @param appendTo concatenated date and time appended here + * @param status ICU error code returned here. + * @return appendTo + * @stable ICU 53 + */ + UnicodeString& combineDateAndTime( + const UnicodeString& relativeDateString, + const UnicodeString& timeString, + UnicodeString& appendTo, + UErrorCode& status) const; + + /** + * Returns the NumberFormat this object is using. + * + * @stable ICU 53 + */ + const NumberFormat& getNumberFormat() const; + + /** + * Returns the capitalization context. + * + * @stable ICU 54 + */ + UDisplayContext getCapitalizationContext() const; + + /** + * Returns the format style. + * + * @stable ICU 54 + */ + UDateRelativeDateTimeFormatterStyle getFormatStyle() const; + +private: + const RelativeDateTimeCacheData* fCache; + const SharedNumberFormat *fNumberFormat; + const SharedPluralRules *fPluralRules; + UDateRelativeDateTimeFormatterStyle fStyle; + UDisplayContext fContext; + const SharedBreakIterator *fOptBreakIterator; + Locale fLocale; + void init( + NumberFormat *nfToAdopt, + BreakIterator *brkIter, + UErrorCode &status); + void adjustForContext(UnicodeString &) const; +}; + +U_NAMESPACE_END + +#endif /* !UCONFIG_NO_BREAK_ITERATION */ +#endif /* !UCONFIG_NO_FORMATTING */ +#endif /* __RELDATEFMT_H */ diff --git a/intl/icu/source/i18n/unicode/scientificnumberformatter.h b/intl/icu/source/i18n/unicode/scientificnumberformatter.h new file mode 100644 index 000000000..0035a6e69 --- /dev/null +++ b/intl/icu/source/i18n/unicode/scientificnumberformatter.h @@ -0,0 +1,222 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2014-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ +#ifndef SCINUMBERFORMATTER_H +#define SCINUMBERFORMATTER_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + + +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: Formats in scientific notation. + */ + +U_NAMESPACE_BEGIN + +class FieldPositionIterator; +class DecimalFormatStaticSets; +class DecimalFormatSymbols; +class DecimalFormat; +class Formattable; + +/** + * A formatter that formats numbers in user-friendly scientific notation. + * + * Sample code: + * <pre> + * UErrorCode status = U_ZERO_ERROR; + * LocalPointer<ScientificNumberFormatter> fmt( + * ScientificNumberFormatter::createMarkupInstance( + * "en", "<sup>", "</sup>", status)); + * if (U_FAILURE(status)) { + * return; + * } + * UnicodeString appendTo; + * // appendTo = "1.23456x10<sup>-78</sup>" + * fmt->format(1.23456e-78, appendTo, status); + * </pre> + * + * @stable ICU 55 + */ +class U_I18N_API ScientificNumberFormatter : public UObject { +public: + + /** + * Creates a ScientificNumberFormatter instance that uses + * superscript characters for exponents. + * @param fmtToAdopt The DecimalFormat which must be configured for + * scientific notation. + * @param status error returned here. + * @return The new ScientificNumberFormatter instance. + * + * @stable ICU 55 + */ + static ScientificNumberFormatter *createSuperscriptInstance( + DecimalFormat *fmtToAdopt, UErrorCode &status); + + /** + * Creates a ScientificNumberFormatter instance that uses + * superscript characters for exponents for this locale. + * @param locale The locale + * @param status error returned here. + * @return The ScientificNumberFormatter instance. + * + * @stable ICU 55 + */ + static ScientificNumberFormatter *createSuperscriptInstance( + const Locale &locale, UErrorCode &status); + + + /** + * Creates a ScientificNumberFormatter instance that uses + * markup for exponents. + * @param fmtToAdopt The DecimalFormat which must be configured for + * scientific notation. + * @param beginMarkup the markup to start superscript. + * @param endMarkup the markup to end superscript. + * @param status error returned here. + * @return The new ScientificNumberFormatter instance. + * + * @stable ICU 55 + */ + static ScientificNumberFormatter *createMarkupInstance( + DecimalFormat *fmtToAdopt, + const UnicodeString &beginMarkup, + const UnicodeString &endMarkup, + UErrorCode &status); + + /** + * Creates a ScientificNumberFormatter instance that uses + * markup for exponents for this locale. + * @param locale The locale + * @param beginMarkup the markup to start superscript. + * @param endMarkup the markup to end superscript. + * @param status error returned here. + * @return The ScientificNumberFormatter instance. + * + * @stable ICU 55 + */ + static ScientificNumberFormatter *createMarkupInstance( + const Locale &locale, + const UnicodeString &beginMarkup, + const UnicodeString &endMarkup, + UErrorCode &status); + + + /** + * Returns a copy of this object. Caller must free returned copy. + * @stable ICU 55 + */ + ScientificNumberFormatter *clone() const { + return new ScientificNumberFormatter(*this); + } + + /** + * Destructor. + * @stable ICU 55 + */ + virtual ~ScientificNumberFormatter(); + + /** + * Formats a number into user friendly scientific notation. + * + * @param number the number to format. + * @param appendTo formatted string appended here. + * @param status any error returned here. + * @return appendTo + * + * @stable ICU 55 + */ + UnicodeString &format( + const Formattable &number, + UnicodeString &appendTo, + UErrorCode &status) const; + private: + class U_I18N_API Style : public UObject { + public: + virtual Style *clone() const = 0; + protected: + virtual UnicodeString &format( + const UnicodeString &original, + FieldPositionIterator &fpi, + const UnicodeString &preExponent, + const DecimalFormatStaticSets &decimalFormatSets, + UnicodeString &appendTo, + UErrorCode &status) const = 0; + private: + friend class ScientificNumberFormatter; + }; + + class U_I18N_API SuperscriptStyle : public Style { + public: + virtual Style *clone() const; + protected: + virtual UnicodeString &format( + const UnicodeString &original, + FieldPositionIterator &fpi, + const UnicodeString &preExponent, + const DecimalFormatStaticSets &decimalFormatSets, + UnicodeString &appendTo, + UErrorCode &status) const; + }; + + class U_I18N_API MarkupStyle : public Style { + public: + MarkupStyle( + const UnicodeString &beginMarkup, + const UnicodeString &endMarkup) + : Style(), + fBeginMarkup(beginMarkup), + fEndMarkup(endMarkup) { } + virtual Style *clone() const; + protected: + virtual UnicodeString &format( + const UnicodeString &original, + FieldPositionIterator &fpi, + const UnicodeString &preExponent, + const DecimalFormatStaticSets &decimalFormatSets, + UnicodeString &appendTo, + UErrorCode &status) const; + private: + UnicodeString fBeginMarkup; + UnicodeString fEndMarkup; + }; + + ScientificNumberFormatter( + DecimalFormat *fmtToAdopt, + Style *styleToAdopt, + UErrorCode &status); + + ScientificNumberFormatter(const ScientificNumberFormatter &other); + ScientificNumberFormatter &operator=(const ScientificNumberFormatter &); + + static void getPreExponent( + const DecimalFormatSymbols &dfs, UnicodeString &preExponent); + + static ScientificNumberFormatter *createInstance( + DecimalFormat *fmtToAdopt, + Style *styleToAdopt, + UErrorCode &status); + + UnicodeString fPreExponent; + DecimalFormat *fDecimalFormat; + Style *fStyle; + const DecimalFormatStaticSets *fStaticSets; + +}; + +U_NAMESPACE_END + + +#endif /* !UCONFIG_NO_FORMATTING */ +#endif diff --git a/intl/icu/source/i18n/unicode/search.h b/intl/icu/source/i18n/unicode/search.h new file mode 100644 index 000000000..0acfcced3 --- /dev/null +++ b/intl/icu/source/i18n/unicode/search.h @@ -0,0 +1,577 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2011 IBM and others. All rights reserved. +********************************************************************** +* Date Name Description +* 03/22/2000 helena Creation. +********************************************************************** +*/ + +#ifndef SEARCH_H +#define SEARCH_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: SearchIterator object. + */ + +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/chariter.h" +#include "unicode/brkiter.h" +#include "unicode/usearch.h" + +/** +* @stable ICU 2.0 +*/ +struct USearch; +/** +* @stable ICU 2.0 +*/ +typedef struct USearch USearch; + +U_NAMESPACE_BEGIN + +/** + * + * <tt>SearchIterator</tt> is an abstract base class that provides + * methods to search for a pattern within a text string. Instances of + * <tt>SearchIterator</tt> maintain a current position and scans over the + * target text, returning the indices the pattern is matched and the length + * of each match. + * <p> + * <tt>SearchIterator</tt> defines a protocol for text searching. + * Subclasses provide concrete implementations of various search algorithms. + * For example, <tt>StringSearch</tt> implements language-sensitive pattern + * matching based on the comparison rules defined in a + * <tt>RuleBasedCollator</tt> object. + * <p> + * Other options for searching includes using a BreakIterator to restrict + * the points at which matches are detected. + * <p> + * <tt>SearchIterator</tt> provides an API that is similar to that of + * other text iteration classes such as <tt>BreakIterator</tt>. Using + * this class, it is easy to scan through text looking for all occurances of + * a given pattern. The following example uses a <tt>StringSearch</tt> + * object to find all instances of "fox" in the target string. Any other + * subclass of <tt>SearchIterator</tt> can be used in an identical + * manner. + * <pre><code> + * UnicodeString target("The quick brown fox jumped over the lazy fox"); + * UnicodeString pattern("fox"); + * + * SearchIterator *iter = new StringSearch(pattern, target); + * UErrorCode error = U_ZERO_ERROR; + * for (int pos = iter->first(error); pos != USEARCH_DONE; + * pos = iter->next(error)) { + * printf("Found match at %d pos, length is %d\n", pos, + * iter.getMatchLength()); + * } + * </code></pre> + * + * @see StringSearch + * @see RuleBasedCollator + */ +class U_I18N_API SearchIterator : public UObject { + +public: + + // public constructors and destructors ------------------------------- + + /** + * Copy constructor that creates a SearchIterator instance with the same + * behavior, and iterating over the same text. + * @param other the SearchIterator instance to be copied. + * @stable ICU 2.0 + */ + SearchIterator(const SearchIterator &other); + + /** + * Destructor. Cleans up the search iterator data struct. + * @stable ICU 2.0 + */ + virtual ~SearchIterator(); + + // public get and set methods ---------------------------------------- + + /** + * Sets the index to point to the given position, and clears any state + * that's affected. + * <p> + * This method takes the argument index and sets the position in the text + * string accordingly without checking if the index is pointing to a + * valid starting point to begin searching. + * @param position within the text to be set. If position is less + * than or greater than the text range for searching, + * an U_INDEX_OUTOFBOUNDS_ERROR will be returned + * @param status for errors if it occurs + * @stable ICU 2.0 + */ + virtual void setOffset(int32_t position, UErrorCode &status) = 0; + + /** + * Return the current index in the text being searched. + * If the iteration has gone past the end of the text + * (or past the beginning for a backwards search), USEARCH_DONE + * is returned. + * @return current index in the text being searched. + * @stable ICU 2.0 + */ + virtual int32_t getOffset(void) const = 0; + + /** + * Sets the text searching attributes located in the enum + * USearchAttribute with values from the enum USearchAttributeValue. + * USEARCH_DEFAULT can be used for all attributes for resetting. + * @param attribute text attribute (enum USearchAttribute) to be set + * @param value text attribute value + * @param status for errors if it occurs + * @stable ICU 2.0 + */ + void setAttribute(USearchAttribute attribute, + USearchAttributeValue value, + UErrorCode &status); + + /** + * Gets the text searching attributes + * @param attribute text attribute (enum USearchAttribute) to be retrieve + * @return text attribute value + * @stable ICU 2.0 + */ + USearchAttributeValue getAttribute(USearchAttribute attribute) const; + + /** + * Returns the index to the match in the text string that was searched. + * This call returns a valid result only after a successful call to + * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. + * Just after construction, or after a searching method returns + * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>. + * <p> + * Use getMatchedLength to get the matched string length. + * @return index of a substring within the text string that is being + * searched. + * @see #first + * @see #next + * @see #previous + * @see #last + * @stable ICU 2.0 + */ + int32_t getMatchedStart(void) const; + + /** + * Returns the length of text in the string which matches the search + * pattern. This call returns a valid result only after a successful call + * to <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. + * Just after construction, or after a searching method returns + * <tt>USEARCH_DONE</tt>, this method will return 0. + * @return The length of the match in the target text, or 0 if there + * is no match currently. + * @see #first + * @see #next + * @see #previous + * @see #last + * @stable ICU 2.0 + */ + int32_t getMatchedLength(void) const; + + /** + * Returns the text that was matched by the most recent call to + * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>. + * If the iterator is not pointing at a valid match (e.g. just after + * construction or after <tt>USEARCH_DONE</tt> has been returned, + * returns an empty string. + * @param result stores the matched string or an empty string if a match + * is not found. + * @see #first + * @see #next + * @see #previous + * @see #last + * @stable ICU 2.0 + */ + void getMatchedText(UnicodeString &result) const; + + /** + * Set the BreakIterator that will be used to restrict the points + * at which matches are detected. The user is responsible for deleting + * the breakiterator. + * @param breakiter A BreakIterator that will be used to restrict the + * points at which matches are detected. If a match is + * found, but the match's start or end index is not a + * boundary as determined by the <tt>BreakIterator</tt>, + * the match will be rejected and another will be searched + * for. If this parameter is <tt>NULL</tt>, no break + * detection is attempted. + * @param status for errors if it occurs + * @see BreakIterator + * @stable ICU 2.0 + */ + void setBreakIterator(BreakIterator *breakiter, UErrorCode &status); + + /** + * Returns the BreakIterator that is used to restrict the points at + * which matches are detected. This will be the same object that was + * passed to the constructor or to <tt>setBreakIterator</tt>. + * Note that <tt>NULL</tt> is a legal value; it means that break + * detection should not be attempted. + * @return BreakIterator used to restrict matchings. + * @see #setBreakIterator + * @stable ICU 2.0 + */ + const BreakIterator * getBreakIterator(void) const; + + /** + * Set the string text to be searched. Text iteration will hence begin at + * the start of the text string. This method is useful if you want to + * re-use an iterator to search for the same pattern within a different + * body of text. The user is responsible for deleting the text. + * @param text string to be searched. + * @param status for errors. If the text length is 0, + * an U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + virtual void setText(const UnicodeString &text, UErrorCode &status); + + /** + * Set the string text to be searched. Text iteration will hence begin at + * the start of the text string. This method is useful if you want to + * re-use an iterator to search for the same pattern within a different + * body of text. + * <p> + * Note: No parsing of the text within the <tt>CharacterIterator</tt> + * will be done during searching for this version. The block of text + * in <tt>CharacterIterator</tt> will be used as it is. + * The user is responsible for deleting the text. + * @param text string iterator to be searched. + * @param status for errors if any. If the text length is 0 then an + * U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + virtual void setText(CharacterIterator &text, UErrorCode &status); + + /** + * Return the string text to be searched. + * @return text string to be searched. + * @stable ICU 2.0 + */ + const UnicodeString & getText(void) const; + + // operator overloading ---------------------------------------------- + + /** + * Equality operator. + * @param that SearchIterator instance to be compared. + * @return TRUE if both BreakIterators are of the same class, have the + * same behavior, terates over the same text and have the same + * attributes. FALSE otherwise. + * @stable ICU 2.0 + */ + virtual UBool operator==(const SearchIterator &that) const; + + /** + * Not-equal operator. + * @param that SearchIterator instance to be compared. + * @return FALSE if operator== returns TRUE, and vice versa. + * @stable ICU 2.0 + */ + UBool operator!=(const SearchIterator &that) const; + + // public methods ---------------------------------------------------- + + /** + * Returns a copy of SearchIterator with the same behavior, and + * iterating over the same text, as this one. Note that all data will be + * replicated, except for the text string to be searched. + * @return cloned object + * @stable ICU 2.0 + */ + virtual SearchIterator* safeClone(void) const = 0; + + /** + * Returns the first index at which the string text matches the search + * pattern. The iterator is adjusted so that its current index (as + * returned by <tt>getOffset</tt>) is the match position if one + * was found. + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and + * the iterator will be adjusted to the index USEARCH_DONE + * @param status for errors if it occurs + * @return The character index of the first match, or + * <tt>USEARCH_DONE</tt> if there are no matches. + * @see #getOffset + * @stable ICU 2.0 + */ + int32_t first(UErrorCode &status); + + /** + * Returns the first index equal or greater than <tt>position</tt> at which the + * string text matches the search pattern. The iterator is adjusted so + * that its current index (as returned by <tt>getOffset</tt>) is the + * match position if one was found. + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and the + * iterator will be adjusted to the index <tt>USEARCH_DONE</tt>. + * @param position where search if to start from. If position is less + * than or greater than the text range for searching, + * an U_INDEX_OUTOFBOUNDS_ERROR will be returned + * @param status for errors if it occurs + * @return The character index of the first match following + * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no + * matches. + * @see #getOffset + * @stable ICU 2.0 + */ + int32_t following(int32_t position, UErrorCode &status); + + /** + * Returns the last index in the target text at which it matches the + * search pattern. The iterator is adjusted so that its current index + * (as returned by <tt>getOffset</tt>) is the match position if one was + * found. + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and + * the iterator will be adjusted to the index USEARCH_DONE. + * @param status for errors if it occurs + * @return The index of the first match, or <tt>USEARCH_DONE</tt> if + * there are no matches. + * @see #getOffset + * @stable ICU 2.0 + */ + int32_t last(UErrorCode &status); + + /** + * Returns the first index less than <tt>position</tt> at which the string + * text matches the search pattern. The iterator is adjusted so that its + * current index (as returned by <tt>getOffset</tt>) is the match + * position if one was found. If a match is not found, + * <tt>USEARCH_DONE</tt> will be returned and the iterator will be + * adjusted to the index USEARCH_DONE + * <p> + * When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the + * result match is always less than <tt>position</tt>. + * When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across + * <tt>position</tt>. + * + * @param position where search is to start from. If position is less + * than or greater than the text range for searching, + * an U_INDEX_OUTOFBOUNDS_ERROR will be returned + * @param status for errors if it occurs + * @return The character index of the first match preceding + * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are + * no matches. + * @see #getOffset + * @stable ICU 2.0 + */ + int32_t preceding(int32_t position, UErrorCode &status); + + /** + * Returns the index of the next point at which the text matches the + * search pattern, starting from the current position + * The iterator is adjusted so that its current index (as returned by + * <tt>getOffset</tt>) is the match position if one was found. + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and + * the iterator will be adjusted to a position after the end of the text + * string. + * @param status for errors if it occurs + * @return The index of the next match after the current position, + * or <tt>USEARCH_DONE</tt> if there are no more matches. + * @see #getOffset + * @stable ICU 2.0 + */ + int32_t next(UErrorCode &status); + + /** + * Returns the index of the previous point at which the string text + * matches the search pattern, starting at the current position. + * The iterator is adjusted so that its current index (as returned by + * <tt>getOffset</tt>) is the match position if one was found. + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and + * the iterator will be adjusted to the index USEARCH_DONE + * @param status for errors if it occurs + * @return The index of the previous match before the current position, + * or <tt>USEARCH_DONE</tt> if there are no more matches. + * @see #getOffset + * @stable ICU 2.0 + */ + int32_t previous(UErrorCode &status); + + /** + * Resets the iteration. + * Search will begin at the start of the text string if a forward + * iteration is initiated before a backwards iteration. Otherwise if a + * backwards iteration is initiated before a forwards iteration, the + * search will begin at the end of the text string. + * @stable ICU 2.0 + */ + virtual void reset(); + +protected: + // protected data members --------------------------------------------- + + /** + * C search data struct + * @stable ICU 2.0 + */ + USearch *m_search_; + + /** + * Break iterator. + * Currently the C++ breakiterator does not have getRules etc to reproduce + * another in C. Hence we keep the original around and do the verification + * at the end of the match. The user is responsible for deleting this + * break iterator. + * @stable ICU 2.0 + */ + BreakIterator *m_breakiterator_; + + /** + * Unicode string version of the search text + * @stable ICU 2.0 + */ + UnicodeString m_text_; + + // protected constructors and destructors ----------------------------- + + /** + * Default constructor. + * Initializes data to the default values. + * @stable ICU 2.0 + */ + SearchIterator(); + + /** + * Constructor for use by subclasses. + * @param text The target text to be searched. + * @param breakiter A {@link BreakIterator} that is used to restrict the + * points at which matches are detected. If + * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a + * match, but the match's start or end index is not a + * boundary as determined by the <tt>BreakIterator</tt>, + * the match is rejected and <tt>handleNext</tt> or + * <tt>handlePrev</tt> is called again. If this parameter + * is <tt>NULL</tt>, no break detection is attempted. + * @see #handleNext + * @see #handlePrev + * @stable ICU 2.0 + */ + SearchIterator(const UnicodeString &text, + BreakIterator *breakiter = NULL); + + /** + * Constructor for use by subclasses. + * <p> + * Note: No parsing of the text within the <tt>CharacterIterator</tt> + * will be done during searching for this version. The block of text + * in <tt>CharacterIterator</tt> will be used as it is. + * @param text The target text to be searched. + * @param breakiter A {@link BreakIterator} that is used to restrict the + * points at which matches are detected. If + * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a + * match, but the match's start or end index is not a + * boundary as determined by the <tt>BreakIterator</tt>, + * the match is rejected and <tt>handleNext</tt> or + * <tt>handlePrev</tt> is called again. If this parameter + * is <tt>NULL</tt>, no break detection is attempted. + * @see #handleNext + * @see #handlePrev + * @stable ICU 2.0 + */ + SearchIterator(CharacterIterator &text, BreakIterator *breakiter = NULL); + + // protected methods -------------------------------------------------- + + /** + * Assignment operator. Sets this iterator to have the same behavior, + * and iterate over the same text, as the one passed in. + * @param that instance to be copied. + * @stable ICU 2.0 + */ + SearchIterator & operator=(const SearchIterator &that); + + /** + * Abstract method which subclasses override to provide the mechanism + * for finding the next match in the target text. This allows different + * subclasses to provide different search algorithms. + * <p> + * If a match is found, the implementation should return the index at + * which the match starts and should call + * <tt>setMatchLength</tt> with the number of characters + * in the target text that make up the match. If no match is found, the + * method should return USEARCH_DONE. + * <p> + * @param position The index in the target text at which the search + * should start. + * @param status for error codes if it occurs. + * @return index at which the match starts, else if match is not found + * USEARCH_DONE is returned + * @see #setMatchLength + * @stable ICU 2.0 + */ + virtual int32_t handleNext(int32_t position, UErrorCode &status) + = 0; + + /** + * Abstract method which subclasses override to provide the mechanism for + * finding the previous match in the target text. This allows different + * subclasses to provide different search algorithms. + * <p> + * If a match is found, the implementation should return the index at + * which the match starts and should call + * <tt>setMatchLength</tt> with the number of characters + * in the target text that make up the match. If no match is found, the + * method should return USEARCH_DONE. + * <p> + * @param position The index in the target text at which the search + * should start. + * @param status for error codes if it occurs. + * @return index at which the match starts, else if match is not found + * USEARCH_DONE is returned + * @see #setMatchLength + * @stable ICU 2.0 + */ + virtual int32_t handlePrev(int32_t position, UErrorCode &status) + = 0; + + /** + * Sets the length of the currently matched string in the text string to + * be searched. + * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt> + * methods should call this when they find a match in the target text. + * @param length length of the matched text. + * @see #handleNext + * @see #handlePrev + * @stable ICU 2.0 + */ + virtual void setMatchLength(int32_t length); + + /** + * Sets the offset of the currently matched string in the text string to + * be searched. + * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt> + * methods should call this when they find a match in the target text. + * @param position start offset of the matched text. + * @see #handleNext + * @see #handlePrev + * @stable ICU 2.0 + */ + virtual void setMatchStart(int32_t position); + + /** + * sets match not found + * @stable ICU 2.0 + */ + void setMatchNotFound(); +}; + +inline UBool SearchIterator::operator!=(const SearchIterator &that) const +{ + return !operator==(that); +} +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif + diff --git a/intl/icu/source/i18n/unicode/selfmt.h b/intl/icu/source/i18n/unicode/selfmt.h new file mode 100755 index 000000000..37a8f2b82 --- /dev/null +++ b/intl/icu/source/i18n/unicode/selfmt.h @@ -0,0 +1,369 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2011, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ******************************************************************** + * + * File SELFMT.H + * + * Modification History: + * + * Date Name Description + * 11/11/09 kirtig Finished first cut of implementation. + ********************************************************************/ + +#ifndef SELFMT +#define SELFMT + +#include "unicode/messagepattern.h" +#include "unicode/numfmt.h" +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: SelectFormat object + */ + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + +class MessageFormat; + +/** + * <p><code>SelectFormat</code> supports the creation of internationalized + * messages by selecting phrases based on keywords. The pattern specifies + * how to map keywords to phrases and provides a default phrase. The + * object provided to the format method is a string that's matched + * against the keywords. If there is a match, the corresponding phrase + * is selected; otherwise, the default phrase is used.</p> + * + * <h4>Using <code>SelectFormat</code> for Gender Agreement</h4> + * + * <p>Note: Typically, select formatting is done via <code>MessageFormat</code> + * with a <code>select</code> argument type, + * rather than using a stand-alone <code>SelectFormat</code>.</p> + * + * <p>The main use case for the select format is gender based inflection. + * When names or nouns are inserted into sentences, their gender can affect pronouns, + * verb forms, articles, and adjectives. Special care needs to be + * taken for the case where the gender cannot be determined. + * The impact varies between languages:</p> + * \htmlonly + * <ul> + * <li>English has three genders, and unknown gender is handled as a special + * case. Names use the gender of the named person (if known), nouns referring + * to people use natural gender, and inanimate objects are usually neutral. + * The gender only affects pronouns: "he", "she", "it", "they". + * + * <li>German differs from English in that the gender of nouns is rather + * arbitrary, even for nouns referring to people ("Mädchen", girl, is neutral). + * The gender affects pronouns ("er", "sie", "es"), articles ("der", "die", + * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes Mädchen"). + * + * <li>French has only two genders; as in German the gender of nouns + * is rather arbitrary - for sun and moon, the genders + * are the opposite of those in German. The gender affects + * pronouns ("il", "elle"), articles ("le", "la"), + * adjective forms ("bon", "bonne"), and sometimes + * verb forms ("allé", "allée"). + * + * <li>Polish distinguishes five genders (or noun classes), + * human masculine, animate non-human masculine, inanimate masculine, + * feminine, and neuter. + * </ul> + * \endhtmlonly + * <p>Some other languages have noun classes that are not related to gender, + * but similar in grammatical use. + * Some African languages have around 20 noun classes.</p> + * + * <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence, + * we usually need to distinguish only between female, male and other/unknown.</p> + * + * <p>To enable localizers to create sentence patterns that take their + * language's gender dependencies into consideration, software has to provide + * information about the gender associated with a noun or name to + * <code>MessageFormat</code>. + * Two main cases can be distinguished:</p> + * + * <ul> + * <li>For people, natural gender information should be maintained for each person. + * Keywords like "male", "female", "mixed" (for groups of people) + * and "unknown" could be used. + * + * <li>For nouns, grammatical gender information should be maintained for + * each noun and per language, e.g., in resource bundles. + * The keywords "masculine", "feminine", and "neuter" are commonly used, + * but some languages may require other keywords. + * </ul> + * + * <p>The resulting keyword is provided to <code>MessageFormat</code> as a + * parameter separate from the name or noun it's associated with. For example, + * to generate a message such as "Jean went to Paris", three separate arguments + * would be provided: The name of the person as argument 0, the gender of + * the person as argument 1, and the name of the city as argument 2. + * The sentence pattern for English, where the gender of the person has + * no impact on this simple sentence, would not refer to argument 1 at all:</p> + * + * <pre>{0} went to {2}.</pre> + * + * <p><b>Note:</b> The entire sentence should be included (and partially repeated) + * inside each phrase. Otherwise translators would have to be trained on how to + * move bits of the sentence in and out of the select argument of a message. + * (The examples below do not follow this recommendation!)</p> + * + * <p>The sentence pattern for French, where the gender of the person affects + * the form of the participle, uses a select format based on argument 1:</p> + * + * \htmlonly<pre>{0} est {1, select, female {allée} other {allé}} à {2}.</pre>\endhtmlonly + * + * <p>Patterns can be nested, so that it's possible to handle interactions of + * number and gender where necessary. For example, if the above sentence should + * allow for the names of several people to be inserted, the following sentence + * pattern can be used (with argument 0 the list of people's names, + * argument 1 the number of people, argument 2 their combined gender, and + * argument 3 the city name):</p> + * + * \htmlonly + * <pre>{0} {1, plural, + * one {est {2, select, female {allée} other {allé}}} + * other {sont {2, select, female {allées} other {allés}}} + * }à {3}.</pre> + * \endhtmlonly + * + * <h4>Patterns and Their Interpretation</h4> + * + * <p>The <code>SelectFormat</code> pattern string defines the phrase output + * for each user-defined keyword. + * The pattern is a sequence of (keyword, message) pairs. + * A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p> + * + * <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p> + * + * <p>You always have to define a phrase for the default keyword + * <code>other</code>; this phrase is returned when the keyword + * provided to + * the <code>format</code> method matches no other keyword. + * If a pattern does not provide a phrase for <code>other</code>, the method + * it's provided to returns the error <code>U_DEFAULT_KEYWORD_MISSING</code>. + * <br> + * Pattern_White_Space between keywords and messages is ignored. + * Pattern_White_Space within a message is preserved and output.</p> + * + * <p><pre>Example: + * \htmlonly + * + * UErrorCode status = U_ZERO_ERROR; + * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est {1, select, female {allée} other {allé}} à Paris."), Locale("fr"), status); + * if (U_FAILURE(status)) { + * return; + * } + * FieldPosition ignore(FieldPosition::DONT_CARE); + * UnicodeString result; + * + * char* str1= "Kirti,female"; + * Formattable args1[] = {"Kirti","female"}; + * msgFmt->format(args1, 2, result, ignore, status); + * cout << "Input is " << str1 << " and result is: " << result << endl; + * delete msgFmt; + * + * \endhtmlonly + * </pre> + * </p> + * + * Produces the output:<br> + * \htmlonly + * <code>Kirti est allée à Paris.</code> + * \endhtmlonly + * + * @stable ICU 4.4 + */ + +class U_I18N_API SelectFormat : public Format { +public: + + /** + * Creates a new <code>SelectFormat</code> for a given pattern string. + * @param pattern the pattern for this <code>SelectFormat</code>. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.4 + */ + SelectFormat(const UnicodeString& pattern, UErrorCode& status); + + /** + * copy constructor. + * @stable ICU 4.4 + */ + SelectFormat(const SelectFormat& other); + + /** + * Destructor. + * @stable ICU 4.4 + */ + virtual ~SelectFormat(); + + /** + * Sets the pattern used by this select format. + * for the keyword rules. + * Patterns and their interpretation are specified in the class description. + * + * @param pattern the pattern for this select format + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @stable ICU 4.4 + */ + void applyPattern(const UnicodeString& pattern, UErrorCode& status); + + + using Format::format; + + /** + * Selects the phrase for the given keyword + * + * @param keyword The keyword that is used to select an alternative. + * @param appendTo output parameter to receive result. + * result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + UnicodeString& format(const UnicodeString& keyword, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Assignment operator + * + * @param other the SelectFormat object to copy from. + * @stable ICU 4.4 + */ + SelectFormat& operator=(const SelectFormat& other); + + /** + * Return true if another object is semantically equal to this one. + * + * @param other the SelectFormat object to be compared with. + * @return true if other is semantically equal to this. + * @stable ICU 4.4 + */ + virtual UBool operator==(const Format& other) const; + + /** + * Return true if another object is semantically unequal to this one. + * + * @param other the SelectFormat object to be compared with. + * @return true if other is semantically unequal to this. + * @stable ICU 4.4 + */ + virtual UBool operator!=(const Format& other) const; + + /** + * Clones this Format object polymorphically. The caller owns the + * result and should delete it when done. + * @stable ICU 4.4 + */ + virtual Format* clone(void) const; + + /** + * Format an object to produce a string. + * This method handles keyword strings. + * If the Formattable object is not a <code>UnicodeString</code>, + * then it returns a failing UErrorCode. + * + * @param obj A keyword string that is used to select an alternative. + * @param appendTo output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Returns the pattern from applyPattern() or constructor. + * + * @param appendTo output parameter to receive result. + * Result is appended to existing contents. + * @return the UnicodeString with inserted pattern. + * @stable ICU 4.4 + */ + UnicodeString& toPattern(UnicodeString& appendTo); + + /** + * This method is not yet supported by <code>SelectFormat</code>. + * <P> + * Before calling, set parse_pos.index to the offset you want to start + * parsing at in the source. After calling, parse_pos.index is the end of + * the text you parsed. If error occurs, index is unchanged. + * <P> + * When parsing, leading whitespace is discarded (with a successful parse), + * while trailing whitespace is left as is. + * <P> + * See Format::parseObject() for more. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Upon return + * this param is set to the position after the + * last character successfully parsed. If the + * source is not parsed successfully, this param + * will remain unchanged. + * @stable ICU 4.4 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& parse_pos) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * @stable ICU 4.4 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * @stable ICU 4.4 + */ + virtual UClassID getDynamicClassID() const; + +private: + friend class MessageFormat; + + SelectFormat(); // default constructor not implemented. + + /** + * Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message. + * @param pattern A MessagePattern. + * @param partIndex the index of the first SelectFormat argument style part. + * @param keyword a keyword to be matched to one of the SelectFormat argument's keywords. + * @param ec Error code. + * @return the sub-message start part index. + */ + static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex, + const UnicodeString& keyword, UErrorCode& ec); + + MessagePattern msgPattern; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _SELFMT +//eof diff --git a/intl/icu/source/i18n/unicode/simpletz.h b/intl/icu/source/i18n/unicode/simpletz.h new file mode 100644 index 000000000..d71fe3692 --- /dev/null +++ b/intl/icu/source/i18n/unicode/simpletz.h @@ -0,0 +1,930 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************** + * Copyright (C) 1997-2013, International Business Machines * + * Corporation and others. All Rights Reserved. * + ******************************************************************************** + * + * File SIMPLETZ.H + * + * Modification History: + * + * Date Name Description + * 04/21/97 aliu Overhauled header. + * 08/10/98 stephen JDK 1.2 sync + * Added setStartRule() / setEndRule() overloads + * Added hasSameRules() + * 09/02/98 stephen Added getOffset(monthLen) + * Changed getOffset() to take UErrorCode + * 07/09/99 stephen Removed millisPerHour (unused, for HP compiler) + * 12/02/99 aliu Added TimeMode and constructor and setStart/EndRule + * methods that take TimeMode. Added to docs. + ******************************************************************************** + */ + +#ifndef SIMPLETZ_H +#define SIMPLETZ_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: SimpleTimeZone is a concrete subclass of TimeZone. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/basictz.h" + +U_NAMESPACE_BEGIN + +// forward declaration +class InitialTimeZoneRule; +class TimeZoneTransition; +class AnnualTimeZoneRule; + +/** + * <code>SimpleTimeZone</code> is a concrete subclass of <code>TimeZone</code> + * that represents a time zone for use with a Gregorian calendar. This + * class does not handle historical changes. + * <P> + * When specifying daylight-savings-time begin and end dates, use a negative value for + * <code>dayOfWeekInMonth</code> to indicate that <code>SimpleTimeZone</code> should + * count from the end of the month backwards. For example, if Daylight Savings + * Time starts or ends at the last Sunday a month, use <code>dayOfWeekInMonth = -1</code> + * along with <code>dayOfWeek = UCAL_SUNDAY</code> to specify the rule. + * + * @see Calendar + * @see GregorianCalendar + * @see TimeZone + * @author D. Goldsmith, Mark Davis, Chen-Lieh Huang, Alan Liu + */ +class U_I18N_API SimpleTimeZone: public BasicTimeZone { +public: + + /** + * TimeMode is used, together with a millisecond offset after + * midnight, to specify a rule transition time. Most rules + * transition at a local wall time, that is, according to the + * current time in effect, either standard, or DST. However, some + * rules transition at local standard time, and some at a specific + * UTC time. Although it might seem that all times could be + * converted to wall time, thus eliminating the need for this + * parameter, this is not the case. + * @stable ICU 2.0 + */ + enum TimeMode { + WALL_TIME = 0, + STANDARD_TIME, + UTC_TIME + }; + + /** + * Copy constructor + * @param source the object to be copied. + * @stable ICU 2.0 + */ + SimpleTimeZone(const SimpleTimeZone& source); + + /** + * Default assignment operator + * @param right the object to be copied. + * @stable ICU 2.0 + */ + SimpleTimeZone& operator=(const SimpleTimeZone& right); + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~SimpleTimeZone(); + + /** + * Returns true if the two TimeZone objects are equal; that is, they have + * the same ID, raw GMT offset, and DST rules. + * + * @param that The SimpleTimeZone object to be compared with. + * @return True if the given time zone is equal to this time zone; false + * otherwise. + * @stable ICU 2.0 + */ + virtual UBool operator==(const TimeZone& that) const; + + /** + * Constructs a SimpleTimeZone with the given raw GMT offset and time zone ID, + * and which doesn't observe daylight savings time. Normally you should use + * TimeZone::createInstance() to create a TimeZone instead of creating a + * SimpleTimeZone directly with this constructor. + * + * @param rawOffsetGMT The given base time zone offset to GMT. + * @param ID The timezone ID which is obtained from + * TimeZone.getAvailableIDs. + * @stable ICU 2.0 + */ + SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID); + + /** + * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID, + * and times to start and end daylight savings time. To create a TimeZone that + * doesn't observe daylight savings time, don't use this constructor; use + * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use + * TimeZone.createInstance() to create a TimeZone instead of creating a + * SimpleTimeZone directly with this constructor. + * <P> + * Various types of daylight-savings time rules can be specfied by using different + * values for startDay and startDayOfWeek and endDay and endDayOfWeek. For a + * complete explanation of how these parameters work, see the documentation for + * setStartRule(). + * + * @param rawOffsetGMT The new SimpleTimeZone's raw GMT offset + * @param ID The new SimpleTimeZone's time zone ID. + * @param savingsStartMonth The daylight savings starting month. Month is + * 0-based. eg, 0 for January. + * @param savingsStartDayOfWeekInMonth The daylight savings starting + * day-of-week-in-month. See setStartRule() for a + * complete explanation. + * @param savingsStartDayOfWeek The daylight savings starting day-of-week. + * See setStartRule() for a complete explanation. + * @param savingsStartTime The daylight savings starting time, expressed as the + * number of milliseconds after midnight. + * @param savingsEndMonth The daylight savings ending month. Month is + * 0-based. eg, 0 for January. + * @param savingsEndDayOfWeekInMonth The daylight savings ending day-of-week-in-month. + * See setStartRule() for a complete explanation. + * @param savingsEndDayOfWeek The daylight savings ending day-of-week. + * See setStartRule() for a complete explanation. + * @param savingsEndTime The daylight savings ending time, expressed as the + * number of milliseconds after midnight. + * @param status An UErrorCode to receive the status. + * @stable ICU 2.0 + */ + SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID, + int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth, + int8_t savingsStartDayOfWeek, int32_t savingsStartTime, + int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth, + int8_t savingsEndDayOfWeek, int32_t savingsEndTime, + UErrorCode& status); + /** + * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID, + * and times to start and end daylight savings time. To create a TimeZone that + * doesn't observe daylight savings time, don't use this constructor; use + * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use + * TimeZone.createInstance() to create a TimeZone instead of creating a + * SimpleTimeZone directly with this constructor. + * <P> + * Various types of daylight-savings time rules can be specfied by using different + * values for startDay and startDayOfWeek and endDay and endDayOfWeek. For a + * complete explanation of how these parameters work, see the documentation for + * setStartRule(). + * + * @param rawOffsetGMT The new SimpleTimeZone's raw GMT offset + * @param ID The new SimpleTimeZone's time zone ID. + * @param savingsStartMonth The daylight savings starting month. Month is + * 0-based. eg, 0 for January. + * @param savingsStartDayOfWeekInMonth The daylight savings starting + * day-of-week-in-month. See setStartRule() for a + * complete explanation. + * @param savingsStartDayOfWeek The daylight savings starting day-of-week. + * See setStartRule() for a complete explanation. + * @param savingsStartTime The daylight savings starting time, expressed as the + * number of milliseconds after midnight. + * @param savingsEndMonth The daylight savings ending month. Month is + * 0-based. eg, 0 for January. + * @param savingsEndDayOfWeekInMonth The daylight savings ending day-of-week-in-month. + * See setStartRule() for a complete explanation. + * @param savingsEndDayOfWeek The daylight savings ending day-of-week. + * See setStartRule() for a complete explanation. + * @param savingsEndTime The daylight savings ending time, expressed as the + * number of milliseconds after midnight. + * @param savingsDST The number of milliseconds added to standard time + * to get DST time. Default is one hour. + * @param status An UErrorCode to receive the status. + * @stable ICU 2.0 + */ + SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID, + int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth, + int8_t savingsStartDayOfWeek, int32_t savingsStartTime, + int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth, + int8_t savingsEndDayOfWeek, int32_t savingsEndTime, + int32_t savingsDST, UErrorCode& status); + + /** + * Construct a SimpleTimeZone with the given raw GMT offset, time zone ID, + * and times to start and end daylight savings time. To create a TimeZone that + * doesn't observe daylight savings time, don't use this constructor; use + * SimpleTimeZone(rawOffset, ID) instead. Normally, you should use + * TimeZone.createInstance() to create a TimeZone instead of creating a + * SimpleTimeZone directly with this constructor. + * <P> + * Various types of daylight-savings time rules can be specfied by using different + * values for startDay and startDayOfWeek and endDay and endDayOfWeek. For a + * complete explanation of how these parameters work, see the documentation for + * setStartRule(). + * + * @param rawOffsetGMT The new SimpleTimeZone's raw GMT offset + * @param ID The new SimpleTimeZone's time zone ID. + * @param savingsStartMonth The daylight savings starting month. Month is + * 0-based. eg, 0 for January. + * @param savingsStartDayOfWeekInMonth The daylight savings starting + * day-of-week-in-month. See setStartRule() for a + * complete explanation. + * @param savingsStartDayOfWeek The daylight savings starting day-of-week. + * See setStartRule() for a complete explanation. + * @param savingsStartTime The daylight savings starting time, expressed as the + * number of milliseconds after midnight. + * @param savingsStartTimeMode Whether the start time is local wall time, local + * standard time, or UTC time. Default is local wall time. + * @param savingsEndMonth The daylight savings ending month. Month is + * 0-based. eg, 0 for January. + * @param savingsEndDayOfWeekInMonth The daylight savings ending day-of-week-in-month. + * See setStartRule() for a complete explanation. + * @param savingsEndDayOfWeek The daylight savings ending day-of-week. + * See setStartRule() for a complete explanation. + * @param savingsEndTime The daylight savings ending time, expressed as the + * number of milliseconds after midnight. + * @param savingsEndTimeMode Whether the end time is local wall time, local + * standard time, or UTC time. Default is local wall time. + * @param savingsDST The number of milliseconds added to standard time + * to get DST time. Default is one hour. + * @param status An UErrorCode to receive the status. + * @stable ICU 2.0 + */ + SimpleTimeZone(int32_t rawOffsetGMT, const UnicodeString& ID, + int8_t savingsStartMonth, int8_t savingsStartDayOfWeekInMonth, + int8_t savingsStartDayOfWeek, int32_t savingsStartTime, + TimeMode savingsStartTimeMode, + int8_t savingsEndMonth, int8_t savingsEndDayOfWeekInMonth, + int8_t savingsEndDayOfWeek, int32_t savingsEndTime, TimeMode savingsEndTimeMode, + int32_t savingsDST, UErrorCode& status); + + /** + * Sets the daylight savings starting year, that is, the year this time zone began + * observing its specified daylight savings time rules. The time zone is considered + * not to observe daylight savings time prior to that year; SimpleTimeZone doesn't + * support historical daylight-savings-time rules. + * @param year the daylight savings starting year. + * @stable ICU 2.0 + */ + void setStartYear(int32_t year); + + /** + * Sets the daylight savings starting rule. For example, in the U.S., Daylight Savings + * Time starts at the second Sunday in March, at 2 AM in standard time. + * Therefore, you can set the start rule by calling: + * setStartRule(UCAL_MARCH, 2, UCAL_SUNDAY, 2*60*60*1000); + * The dayOfWeekInMonth and dayOfWeek parameters together specify how to calculate + * the exact starting date. Their exact meaning depend on their respective signs, + * allowing various types of rules to be constructed, as follows: + * <ul> + * <li>If both dayOfWeekInMonth and dayOfWeek are positive, they specify the + * day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday + * of the month).</li> + * <li>If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify + * the day of week in the month counting backward from the end of the month. + * (e.g., (-1, MONDAY) is the last Monday in the month)</li> + * <li>If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth + * specifies the day of the month, regardless of what day of the week it is. + * (e.g., (10, 0) is the tenth day of the month)</li> + * <li>If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth + * specifies the day of the month counting backward from the end of the + * month, regardless of what day of the week it is (e.g., (-2, 0) is the + * next-to-last day of the month).</li> + * <li>If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the + * first specified day of the week on or after the specfied day of the month. + * (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month + * [or the 15th itself if the 15th is a Sunday].)</li> + * <li>If dayOfWeek and DayOfWeekInMonth are both negative, they specify the + * last specified day of the week on or before the specified day of the month. + * (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month + * [or the 20th itself if the 20th is a Tuesday].)</li> + * </ul> + * @param month the daylight savings starting month. Month is 0-based. + * eg, 0 for January. + * @param dayOfWeekInMonth the daylight savings starting + * day-of-week-in-month. Please see the member description for an example. + * @param dayOfWeek the daylight savings starting day-of-week. Please see + * the member description for an example. + * @param time the daylight savings starting time. Please see the member + * description for an example. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setStartRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek, + int32_t time, UErrorCode& status); + /** + * Sets the daylight savings starting rule. For example, in the U.S., Daylight Savings + * Time starts at the second Sunday in March, at 2 AM in standard time. + * Therefore, you can set the start rule by calling: + * setStartRule(UCAL_MARCH, 2, UCAL_SUNDAY, 2*60*60*1000); + * The dayOfWeekInMonth and dayOfWeek parameters together specify how to calculate + * the exact starting date. Their exact meaning depend on their respective signs, + * allowing various types of rules to be constructed, as follows: + * <ul> + * <li>If both dayOfWeekInMonth and dayOfWeek are positive, they specify the + * day of week in the month (e.g., (2, WEDNESDAY) is the second Wednesday + * of the month).</li> + * <li>If dayOfWeek is positive and dayOfWeekInMonth is negative, they specify + * the day of week in the month counting backward from the end of the month. + * (e.g., (-1, MONDAY) is the last Monday in the month)</li> + * <li>If dayOfWeek is zero and dayOfWeekInMonth is positive, dayOfWeekInMonth + * specifies the day of the month, regardless of what day of the week it is. + * (e.g., (10, 0) is the tenth day of the month)</li> + * <li>If dayOfWeek is zero and dayOfWeekInMonth is negative, dayOfWeekInMonth + * specifies the day of the month counting backward from the end of the + * month, regardless of what day of the week it is (e.g., (-2, 0) is the + * next-to-last day of the month).</li> + * <li>If dayOfWeek is negative and dayOfWeekInMonth is positive, they specify the + * first specified day of the week on or after the specfied day of the month. + * (e.g., (15, -SUNDAY) is the first Sunday after the 15th of the month + * [or the 15th itself if the 15th is a Sunday].)</li> + * <li>If dayOfWeek and DayOfWeekInMonth are both negative, they specify the + * last specified day of the week on or before the specified day of the month. + * (e.g., (-20, -TUESDAY) is the last Tuesday before the 20th of the month + * [or the 20th itself if the 20th is a Tuesday].)</li> + * </ul> + * @param month the daylight savings starting month. Month is 0-based. + * eg, 0 for January. + * @param dayOfWeekInMonth the daylight savings starting + * day-of-week-in-month. Please see the member description for an example. + * @param dayOfWeek the daylight savings starting day-of-week. Please see + * the member description for an example. + * @param time the daylight savings starting time. Please see the member + * description for an example. + * @param mode whether the time is local wall time, local standard time, + * or UTC time. Default is local wall time. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setStartRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek, + int32_t time, TimeMode mode, UErrorCode& status); + + /** + * Sets the DST start rule to a fixed date within a month. + * + * @param month The month in which this rule occurs (0-based). + * @param dayOfMonth The date in that month (1-based). + * @param time The time of that day (number of millis after midnight) + * when DST takes effect in local wall time, which is + * standard time in this case. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setStartRule(int32_t month, int32_t dayOfMonth, int32_t time, + UErrorCode& status); + /** + * Sets the DST start rule to a fixed date within a month. + * + * @param month The month in which this rule occurs (0-based). + * @param dayOfMonth The date in that month (1-based). + * @param time The time of that day (number of millis after midnight) + * when DST takes effect in local wall time, which is + * standard time in this case. + * @param mode whether the time is local wall time, local standard time, + * or UTC time. Default is local wall time. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setStartRule(int32_t month, int32_t dayOfMonth, int32_t time, + TimeMode mode, UErrorCode& status); + + /** + * Sets the DST start rule to a weekday before or after a give date within + * a month, e.g., the first Monday on or after the 8th. + * + * @param month The month in which this rule occurs (0-based). + * @param dayOfMonth A date within that month (1-based). + * @param dayOfWeek The day of the week on which this rule occurs. + * @param time The time of that day (number of millis after midnight) + * when DST takes effect in local wall time, which is + * standard time in this case. + * @param after If true, this rule selects the first dayOfWeek on + * or after dayOfMonth. If false, this rule selects + * the last dayOfWeek on or before dayOfMonth. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setStartRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + int32_t time, UBool after, UErrorCode& status); + /** + * Sets the DST start rule to a weekday before or after a give date within + * a month, e.g., the first Monday on or after the 8th. + * + * @param month The month in which this rule occurs (0-based). + * @param dayOfMonth A date within that month (1-based). + * @param dayOfWeek The day of the week on which this rule occurs. + * @param time The time of that day (number of millis after midnight) + * when DST takes effect in local wall time, which is + * standard time in this case. + * @param mode whether the time is local wall time, local standard time, + * or UTC time. Default is local wall time. + * @param after If true, this rule selects the first dayOfWeek on + * or after dayOfMonth. If false, this rule selects + * the last dayOfWeek on or before dayOfMonth. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setStartRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + int32_t time, TimeMode mode, UBool after, UErrorCode& status); + + /** + * Sets the daylight savings ending rule. For example, if Daylight + * Savings Time ends at the last (-1) Sunday in October, at 2 AM in standard time. + * Therefore, you can set the end rule by calling: + * <pre> + * setEndRule(UCAL_OCTOBER, -1, UCAL_SUNDAY, 2*60*60*1000); + * </pre> + * Various other types of rules can be specified by manipulating the dayOfWeek + * and dayOfWeekInMonth parameters. For complete details, see the documentation + * for setStartRule(). + * + * @param month the daylight savings ending month. Month is 0-based. + * eg, 0 for January. + * @param dayOfWeekInMonth the daylight savings ending + * day-of-week-in-month. See setStartRule() for a complete explanation. + * @param dayOfWeek the daylight savings ending day-of-week. See setStartRule() + * for a complete explanation. + * @param time the daylight savings ending time. Please see the member + * description for an example. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setEndRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek, + int32_t time, UErrorCode& status); + + /** + * Sets the daylight savings ending rule. For example, if Daylight + * Savings Time ends at the last (-1) Sunday in October, at 2 AM in standard time. + * Therefore, you can set the end rule by calling: + * <pre> + * setEndRule(UCAL_OCTOBER, -1, UCAL_SUNDAY, 2*60*60*1000); + * </pre> + * Various other types of rules can be specified by manipulating the dayOfWeek + * and dayOfWeekInMonth parameters. For complete details, see the documentation + * for setStartRule(). + * + * @param month the daylight savings ending month. Month is 0-based. + * eg, 0 for January. + * @param dayOfWeekInMonth the daylight savings ending + * day-of-week-in-month. See setStartRule() for a complete explanation. + * @param dayOfWeek the daylight savings ending day-of-week. See setStartRule() + * for a complete explanation. + * @param time the daylight savings ending time. Please see the member + * description for an example. + * @param mode whether the time is local wall time, local standard time, + * or UTC time. Default is local wall time. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setEndRule(int32_t month, int32_t dayOfWeekInMonth, int32_t dayOfWeek, + int32_t time, TimeMode mode, UErrorCode& status); + + /** + * Sets the DST end rule to a fixed date within a month. + * + * @param month The month in which this rule occurs (0-based). + * @param dayOfMonth The date in that month (1-based). + * @param time The time of that day (number of millis after midnight) + * when DST ends in local wall time, which is daylight + * time in this case. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setEndRule(int32_t month, int32_t dayOfMonth, int32_t time, UErrorCode& status); + + /** + * Sets the DST end rule to a fixed date within a month. + * + * @param month The month in which this rule occurs (0-based). + * @param dayOfMonth The date in that month (1-based). + * @param time The time of that day (number of millis after midnight) + * when DST ends in local wall time, which is daylight + * time in this case. + * @param mode whether the time is local wall time, local standard time, + * or UTC time. Default is local wall time. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setEndRule(int32_t month, int32_t dayOfMonth, int32_t time, + TimeMode mode, UErrorCode& status); + + /** + * Sets the DST end rule to a weekday before or after a give date within + * a month, e.g., the first Monday on or after the 8th. + * + * @param month The month in which this rule occurs (0-based). + * @param dayOfMonth A date within that month (1-based). + * @param dayOfWeek The day of the week on which this rule occurs. + * @param time The time of that day (number of millis after midnight) + * when DST ends in local wall time, which is daylight + * time in this case. + * @param after If true, this rule selects the first dayOfWeek on + * or after dayOfMonth. If false, this rule selects + * the last dayOfWeek on or before dayOfMonth. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + int32_t time, UBool after, UErrorCode& status); + + /** + * Sets the DST end rule to a weekday before or after a give date within + * a month, e.g., the first Monday on or after the 8th. + * + * @param month The month in which this rule occurs (0-based). + * @param dayOfMonth A date within that month (1-based). + * @param dayOfWeek The day of the week on which this rule occurs. + * @param time The time of that day (number of millis after midnight) + * when DST ends in local wall time, which is daylight + * time in this case. + * @param mode whether the time is local wall time, local standard time, + * or UTC time. Default is local wall time. + * @param after If true, this rule selects the first dayOfWeek on + * or after dayOfMonth. If false, this rule selects + * the last dayOfWeek on or before dayOfMonth. + * @param status An UErrorCode + * @stable ICU 2.0 + */ + void setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + int32_t time, TimeMode mode, UBool after, UErrorCode& status); + + /** + * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time in this time zone, taking daylight savings time into + * account) as of a particular reference date. The reference date is used to determine + * whether daylight savings time is in effect and needs to be figured into the offset + * that is returned (in other words, what is the adjusted GMT offset in this time zone + * at this particular date and time?). For the time zones produced by createTimeZone(), + * the reference data is specified according to the Gregorian calendar, and the date + * and time fields are in GMT, NOT local time. + * + * @param era The reference date's era + * @param year The reference date's year + * @param month The reference date's month (0-based; 0 is January) + * @param day The reference date's day-in-month (1-based) + * @param dayOfWeek The reference date's day-of-week (1-based; 1 is Sunday) + * @param millis The reference date's milliseconds in day, UTT (NOT local time). + * @param status An UErrorCode to receive the status. + * @return The offset in milliseconds to add to GMT to get local time. + * @stable ICU 2.0 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const; + + /** + * Gets the time zone offset, for current date, modified in case of + * daylight savings. This is the offset to add *to* UTC to get local time. + * @param era the era of the given date. + * @param year the year in the given date. + * @param month the month in the given date. + * Month is 0-based. e.g., 0 for January. + * @param day the day-in-month of the given date. + * @param dayOfWeek the day-of-week of the given date. + * @param milliseconds the millis in day in <em>standard</em> local time. + * @param monthLength the length of the given month in days. + * @param status An UErrorCode to receive the status. + * @return the offset to add *to* GMT to get local time. + * @stable ICU 2.0 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t milliseconds, + int32_t monthLength, UErrorCode& status) const; + /** + * Gets the time zone offset, for current date, modified in case of + * daylight savings. This is the offset to add *to* UTC to get local time. + * @param era the era of the given date. + * @param year the year in the given date. + * @param month the month in the given date. + * Month is 0-based. e.g., 0 for January. + * @param day the day-in-month of the given date. + * @param dayOfWeek the day-of-week of the given date. + * @param milliseconds the millis in day in <em>standard</em> local time. + * @param monthLength the length of the given month in days. + * @param prevMonthLength length of the previous month in days. + * @param status An UErrorCode to receive the status. + * @return the offset to add *to* GMT to get local time. + * @stable ICU 2.0 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t milliseconds, + int32_t monthLength, int32_t prevMonthLength, + UErrorCode& status) const; + + /** + * Redeclared TimeZone method. This implementation simply calls + * the base class method, which otherwise would be hidden. + * @stable ICU 2.8 + */ + virtual void getOffset(UDate date, UBool local, int32_t& rawOffset, + int32_t& dstOffset, UErrorCode& ec) const; + + /** + * Get time zone offsets from local wall time. + * @internal + */ + virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const; + + /** + * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time, before taking daylight savings time into account). + * + * @return The TimeZone's raw GMT offset. + * @stable ICU 2.0 + */ + virtual int32_t getRawOffset(void) const; + + /** + * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time, before taking daylight savings time into account). + * + * @param offsetMillis The new raw GMT offset for this time zone. + * @stable ICU 2.0 + */ + virtual void setRawOffset(int32_t offsetMillis); + + /** + * Sets the amount of time in ms that the clock is advanced during DST. + * @param millisSavedDuringDST the number of milliseconds the time is + * advanced with respect to standard time when the daylight savings rules + * are in effect. A positive number, typically one hour (3600000). + * @param status An UErrorCode to receive the status. + * @stable ICU 2.0 + */ + void setDSTSavings(int32_t millisSavedDuringDST, UErrorCode& status); + + /** + * Returns the amount of time in ms that the clock is advanced during DST. + * @return the number of milliseconds the time is + * advanced with respect to standard time when the daylight savings rules + * are in effect. A positive number, typically one hour (3600000). + * @stable ICU 2.0 + */ + virtual int32_t getDSTSavings(void) const; + + /** + * Queries if this TimeZone uses Daylight Savings Time. + * + * @return True if this TimeZone uses Daylight Savings Time; false otherwise. + * @stable ICU 2.0 + */ + virtual UBool useDaylightTime(void) const; + + /** + * Returns true if the given date is within the period when daylight savings time + * is in effect; false otherwise. If the TimeZone doesn't observe daylight savings + * time, this functions always returns false. + * This method is wasteful since it creates a new GregorianCalendar and + * deletes it each time it is called. This is a deprecated method + * and provided only for Java compatibility. + * + * @param date The date to test. + * @param status An UErrorCode to receive the status. + * @return true if the given date is in Daylight Savings Time; + * false otherwise. + * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead. + */ + virtual UBool inDaylightTime(UDate date, UErrorCode& status) const; + + /** + * Return true if this zone has the same rules and offset as another zone. + * @param other the TimeZone object to be compared with + * @return true if the given zone has the same rules and offset as this one + * @stable ICU 2.0 + */ + UBool hasSameRules(const TimeZone& other) const; + + /** + * Clones TimeZone objects polymorphically. Clients are responsible for deleting + * the TimeZone object cloned. + * + * @return A new copy of this TimeZone object. + * @stable ICU 2.0 + */ + virtual TimeZone* clone(void) const; + + /** + * Gets the first time zone transition after the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the first transition after the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const; + + /** + * Gets the most recent time zone transition before the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the most recent transition before the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const; + + /** + * Returns the number of <code>TimeZoneRule</code>s which represents time transitions, + * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except + * <code>InitialTimeZoneRule</code>. The return value range is 0 or any positive value. + * @param status Receives error status code. + * @return The number of <code>TimeZoneRule</code>s representing time transitions. + * @stable ICU 3.8 + */ + virtual int32_t countTransitionRules(UErrorCode& status) const; + + /** + * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code> + * which represent time transitions for this time zone. On successful return, + * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and + * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code> + * instances up to the size specified by trscount. The results are referencing the + * rule instance held by this time zone instance. Therefore, after this time zone + * is destructed, they are no longer available. + * @param initial Receives the initial timezone rule + * @param trsrules Receives the timezone transition rules + * @param trscount On input, specify the size of the array 'transitions' receiving + * the timezone transition rules. On output, actual number of + * rules filled in the array will be set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const; + + +public: + + /** + * Override TimeZone Returns a unique class ID POLYMORPHICALLY. Pure virtual + * override. This method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() methods call + * this method. + * + * @return The class ID for this object. All objects of a given class have the + * same class ID. Objects of other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Return the class ID for this class. This is useful only for comparing to a return + * value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . Derived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + +private: + /** + * Constants specifying values of startMode and endMode. + */ + enum EMode + { + DOM_MODE = 1, + DOW_IN_MONTH_MODE, + DOW_GE_DOM_MODE, + DOW_LE_DOM_MODE + }; + + SimpleTimeZone(); // default constructor not implemented + + /** + * Internal construction method. + * @param rawOffsetGMT The new SimpleTimeZone's raw GMT offset + * @param startMonth the month DST starts + * @param startDay the day DST starts + * @param startDayOfWeek the DOW DST starts + * @param startTime the time DST starts + * @param startTimeMode Whether the start time is local wall time, local + * standard time, or UTC time. Default is local wall time. + * @param endMonth the month DST ends + * @param endDay the day DST ends + * @param endDayOfWeek the DOW DST ends + * @param endTime the time DST ends + * @param endTimeMode Whether the end time is local wall time, local + * standard time, or UTC time. Default is local wall time. + * @param dstSavings The number of milliseconds added to standard time + * to get DST time. Default is one hour. + * @param status An UErrorCode to receive the status. + */ + void construct(int32_t rawOffsetGMT, + int8_t startMonth, int8_t startDay, int8_t startDayOfWeek, + int32_t startTime, TimeMode startTimeMode, + int8_t endMonth, int8_t endDay, int8_t endDayOfWeek, + int32_t endTime, TimeMode endTimeMode, + int32_t dstSavings, UErrorCode& status); + + /** + * Compare a given date in the year to a rule. Return 1, 0, or -1, depending + * on whether the date is after, equal to, or before the rule date. The + * millis are compared directly against the ruleMillis, so any + * standard-daylight adjustments must be handled by the caller. + * + * @return 1 if the date is after the rule date, -1 if the date is before + * the rule date, or 0 if the date is equal to the rule date. + */ + static int32_t compareToRule(int8_t month, int8_t monthLen, int8_t prevMonthLen, + int8_t dayOfMonth, + int8_t dayOfWeek, int32_t millis, int32_t millisDelta, + EMode ruleMode, int8_t ruleMonth, int8_t ruleDayOfWeek, + int8_t ruleDay, int32_t ruleMillis); + + /** + * Given a set of encoded rules in startDay and startDayOfMonth, decode + * them and set the startMode appropriately. Do the same for endDay and + * endDayOfMonth. + * <P> + * Upon entry, the day of week variables may be zero or + * negative, in order to indicate special modes. The day of month + * variables may also be negative. + * <P> + * Upon exit, the mode variables will be + * set, and the day of week and day of month variables will be positive. + * <P> + * This method also recognizes a startDay or endDay of zero as indicating + * no DST. + */ + void decodeRules(UErrorCode& status); + void decodeStartRule(UErrorCode& status); + void decodeEndRule(UErrorCode& status); + + int8_t startMonth, startDay, startDayOfWeek; // the month, day, DOW, and time DST starts + int32_t startTime; + TimeMode startTimeMode, endTimeMode; // Mode for startTime, endTime; see TimeMode + int8_t endMonth, endDay, endDayOfWeek; // the month, day, DOW, and time DST ends + int32_t endTime; + int32_t startYear; // the year these DST rules took effect + int32_t rawOffset; // the TimeZone's raw GMT offset + UBool useDaylight; // flag indicating whether this TimeZone uses DST + static const int8_t STATICMONTHLENGTH[12]; // lengths of the months + EMode startMode, endMode; // flags indicating what kind of rules the DST rules are + + /** + * A positive value indicating the amount of time saved during DST in ms. + * Typically one hour; sometimes 30 minutes. + */ + int32_t dstSavings; + + /* Private for BasicTimeZone implementation */ + void checkTransitionRules(UErrorCode& status) const; + void initTransitionRules(UErrorCode& status); + void clearTransitionRules(void); + void deleteTransitionRules(void); + UBool transitionRulesInitialized; + InitialTimeZoneRule* initialRule; + TimeZoneTransition* firstTransition; + AnnualTimeZoneRule* stdRule; + AnnualTimeZoneRule* dstRule; +}; + +inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfWeekInMonth, + int32_t dayOfWeek, + int32_t time, UErrorCode& status) { + setStartRule(month, dayOfWeekInMonth, dayOfWeek, time, WALL_TIME, status); +} + +inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfMonth, + int32_t time, + UErrorCode& status) { + setStartRule(month, dayOfMonth, time, WALL_TIME, status); +} + +inline void SimpleTimeZone::setStartRule(int32_t month, int32_t dayOfMonth, + int32_t dayOfWeek, + int32_t time, UBool after, UErrorCode& status) { + setStartRule(month, dayOfMonth, dayOfWeek, time, WALL_TIME, after, status); +} + +inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfWeekInMonth, + int32_t dayOfWeek, + int32_t time, UErrorCode& status) { + setEndRule(month, dayOfWeekInMonth, dayOfWeek, time, WALL_TIME, status); +} + +inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfMonth, + int32_t time, UErrorCode& status) { + setEndRule(month, dayOfMonth, time, WALL_TIME, status); +} + +inline void SimpleTimeZone::setEndRule(int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + int32_t time, UBool after, UErrorCode& status) { + setEndRule(month, dayOfMonth, dayOfWeek, time, WALL_TIME, after, status); +} + +inline void +SimpleTimeZone::getOffset(UDate date, UBool local, int32_t& rawOffsetRef, + int32_t& dstOffsetRef, UErrorCode& ec) const { + TimeZone::getOffset(date, local, rawOffsetRef, dstOffsetRef, ec); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _SIMPLETZ diff --git a/intl/icu/source/i18n/unicode/smpdtfmt.h b/intl/icu/source/i18n/unicode/smpdtfmt.h new file mode 100644 index 000000000..e6cf28d22 --- /dev/null +++ b/intl/icu/source/i18n/unicode/smpdtfmt.h @@ -0,0 +1,1620 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +* Copyright (C) 1997-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File SMPDTFMT.H +* +* Modification History: +* +* Date Name Description +* 02/19/97 aliu Converted from java. +* 07/09/97 helena Make ParsePosition into a class. +* 07/21/98 stephen Added GMT_PLUS, GMT_MINUS +* Changed setTwoDigitStartDate to set2DigitYearStart +* Changed getTwoDigitStartDate to get2DigitYearStart +* Removed subParseLong +* Removed getZoneIndex (added in DateFormatSymbols) +* 06/14/99 stephen Removed fgTimeZoneDataSuffix +* 10/14/99 aliu Updated class doc to describe 2-digit year parsing +* {j28 4182066}. +******************************************************************************* +*/ + +#ifndef SMPDTFMT_H +#define SMPDTFMT_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Format and parse dates in a language-independent manner. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/datefmt.h" +#include "unicode/udisplaycontext.h" +#include "unicode/tzfmt.h" /* for UTimeZoneFormatTimeType */ +#include "unicode/brkiter.h" + +U_NAMESPACE_BEGIN + +class DateFormatSymbols; +class DateFormat; +class MessageFormat; +class FieldPositionHandler; +class TimeZoneFormat; +class SharedNumberFormat; +class SimpleDateFormatMutableNFs; + +/** + * + * SimpleDateFormat is a concrete class for formatting and parsing dates in a + * language-independent manner. It allows for formatting (millis -> text), + * parsing (text -> millis), and normalization. Formats/Parses a date or time, + * which is the standard milliseconds since 24:00 GMT, Jan 1, 1970. + * <P> + * Clients are encouraged to create a date-time formatter using DateFormat::getInstance(), + * getDateInstance(), getDateInstance(), or getDateTimeInstance() rather than + * explicitly constructing an instance of SimpleDateFormat. This way, the client + * is guaranteed to get an appropriate formatting pattern for whatever locale the + * program is running in. However, if the client needs something more unusual than + * the default patterns in the locales, he can construct a SimpleDateFormat directly + * and give it an appropriate pattern (or use one of the factory methods on DateFormat + * and modify the pattern after the fact with toPattern() and applyPattern(). + * + * <p><strong>Date and Time Patterns:</strong></p> + * + * <p>Date and time formats are specified by <em>date and time pattern</em> strings. + * Within date and time pattern strings, all unquoted ASCII letters [A-Za-z] are reserved + * as pattern letters representing calendar fields. <code>SimpleDateFormat</code> supports + * the date and time formatting algorithm and pattern letters defined by + * <a href="http://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table">UTS#35 + * Unicode Locale Data Markup Language (LDML)</a> and further documented for ICU in the + * <a href="https://sites.google.com/site/icuprojectuserguide/formatparse/datetime?pli=1#TOC-Date-Field-Symbol-Table">ICU + * User Guide</a>. The following pattern letters are currently available (note that the actual + * values depend on CLDR and may change from the examples shown here):</p> + * + * <table border="1"> + * <tr> + * <th>Field</th> + * <th style="text-align: center">Sym.</th> + * <th style="text-align: center">No.</th> + * <th>Example</th> + * <th>Description</th> + * </tr> + * <tr> + * <th rowspan="3">era</th> + * <td style="text-align: center" rowspan="3">G</td> + * <td style="text-align: center">1..3</td> + * <td>AD</td> + * <td rowspan="3">Era - Replaced with the Era string for the current date. One to three letters for the + * abbreviated form, four letters for the long (wide) form, five for the narrow form.</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>Anno Domini</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>A</td> + * </tr> + * <tr> + * <th rowspan="6">year</th> + * <td style="text-align: center">y</td> + * <td style="text-align: center">1..n</td> + * <td>1996</td> + * <td>Year. Normally the length specifies the padding, but for two letters it also specifies the maximum + * length. Example:<div align="center"> + * <center> + * <table border="1" cellpadding="2" cellspacing="0"> + * <tr> + * <th>Year</th> + * <th style="text-align: right">y</th> + * <th style="text-align: right">yy</th> + * <th style="text-align: right">yyy</th> + * <th style="text-align: right">yyyy</th> + * <th style="text-align: right">yyyyy</th> + * </tr> + * <tr> + * <td>AD 1</td> + * <td style="text-align: right">1</td> + * <td style="text-align: right">01</td> + * <td style="text-align: right">001</td> + * <td style="text-align: right">0001</td> + * <td style="text-align: right">00001</td> + * </tr> + * <tr> + * <td>AD 12</td> + * <td style="text-align: right">12</td> + * <td style="text-align: right">12</td> + * <td style="text-align: right">012</td> + * <td style="text-align: right">0012</td> + * <td style="text-align: right">00012</td> + * </tr> + * <tr> + * <td>AD 123</td> + * <td style="text-align: right">123</td> + * <td style="text-align: right">23</td> + * <td style="text-align: right">123</td> + * <td style="text-align: right">0123</td> + * <td style="text-align: right">00123</td> + * </tr> + * <tr> + * <td>AD 1234</td> + * <td style="text-align: right">1234</td> + * <td style="text-align: right">34</td> + * <td style="text-align: right">1234</td> + * <td style="text-align: right">1234</td> + * <td style="text-align: right">01234</td> + * </tr> + * <tr> + * <td>AD 12345</td> + * <td style="text-align: right">12345</td> + * <td style="text-align: right">45</td> + * <td style="text-align: right">12345</td> + * <td style="text-align: right">12345</td> + * <td style="text-align: right">12345</td> + * </tr> + * </table> + * </center></div> + * </td> + * </tr> + * <tr> + * <td style="text-align: center">Y</td> + * <td style="text-align: center">1..n</td> + * <td>1997</td> + * <td>Year (in "Week of Year" based calendars). Normally the length specifies the padding, + * but for two letters it also specifies the maximum length. This year designation is used in ISO + * year-week calendar as defined by ISO 8601, but can be used in non-Gregorian based calendar systems + * where week date processing is desired. May not always be the same value as calendar year.</td> + * </tr> + * <tr> + * <td style="text-align: center">u</td> + * <td style="text-align: center">1..n</td> + * <td>4601</td> + * <td>Extended year. This is a single number designating the year of this calendar system, encompassing + * all supra-year fields. For example, for the Julian calendar system, year numbers are positive, with an + * era of BCE or CE. An extended year value for the Julian calendar system assigns positive values to CE + * years and negative values to BCE years, with 1 BCE being year 0.</td> + * </tr> + * <tr> + * <td style="text-align: center" rowspan="3">U</td> + * <td style="text-align: center">1..3</td> + * <td>甲子</td> + * <td rowspan="3">Cyclic year name. Calendars such as the Chinese lunar calendar (and related calendars) + * and the Hindu calendars use 60-year cycles of year names. Use one through three letters for the abbreviated + * name, four for the full (wide) name, or five for the narrow name (currently the data only provides abbreviated names, + * which will be used for all requested name widths). If the calendar does not provide cyclic year name data, + * or if the year value to be formatted is out of the range of years for which cyclic name data is provided, + * then numeric formatting is used (behaves like 'y').</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>(currently also 甲子)</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>(currently also 甲子)</td> + * </tr> + * <tr> + * <th rowspan="6">quarter</th> + * <td rowspan="3" style="text-align: center">Q</td> + * <td style="text-align: center">1..2</td> + * <td>02</td> + * <td rowspan="3">Quarter - Use one or two for the numerical quarter, three for the abbreviation, or four for the + * full (wide) name (five for the narrow name is not yet supported).</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>Q2</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>2nd quarter</td> + * </tr> + * <tr> + * <td rowspan="3" style="text-align: center">q</td> + * <td style="text-align: center">1..2</td> + * <td>02</td> + * <td rowspan="3"><b>Stand-Alone</b> Quarter - Use one or two for the numerical quarter, three for the abbreviation, + * or four for the full name (five for the narrow name is not yet supported).</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>Q2</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>2nd quarter</td> + * </tr> + * <tr> + * <th rowspan="8">month</th> + * <td rowspan="4" style="text-align: center">M</td> + * <td style="text-align: center">1..2</td> + * <td>09</td> + * <td rowspan="4">Month - Use one or two for the numerical month, three for the abbreviation, four for + * the full (wide) name, or five for the narrow name. With two ("MM"), the month number is zero-padded + * if necessary (e.g. "08")</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>Sep</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>September</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>S</td> + * </tr> + * <tr> + * <td rowspan="4" style="text-align: center">L</td> + * <td style="text-align: center">1..2</td> + * <td>09</td> + * <td rowspan="4"><b>Stand-Alone</b> Month - Use one or two for the numerical month, three for the abbreviation, + * four for the full (wide) name, or 5 for the narrow name. With two ("LL"), the month number is zero-padded if + * necessary (e.g. "08")</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>Sep</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>September</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>S</td> + * </tr> + * <tr> + * <th rowspan="2">week</th> + * <td style="text-align: center">w</td> + * <td style="text-align: center">1..2</td> + * <td>27</td> + * <td>Week of Year. Use "w" to show the minimum number of digits, or "ww" to always show two digits + * (zero-padding if necessary, e.g. "08").</td> + * </tr> + * <tr> + * <td style="text-align: center">W</td> + * <td style="text-align: center">1</td> + * <td>3</td> + * <td>Week of Month</td> + * </tr> + * <tr> + * <th rowspan="4">day</th> + * <td style="text-align: center">d</td> + * <td style="text-align: center">1..2</td> + * <td>1</td> + * <td>Date - Day of the month. Use "d" to show the minimum number of digits, or "dd" to always show + * two digits (zero-padding if necessary, e.g. "08").</td> + * </tr> + * <tr> + * <td style="text-align: center">D</td> + * <td style="text-align: center">1..3</td> + * <td>345</td> + * <td>Day of year</td> + * </tr> + * <tr> + * <td style="text-align: center">F</td> + * <td style="text-align: center">1</td> + * <td>2</td> + * <td>Day of Week in Month. The example is for the 2nd Wed in July</td> + * </tr> + * <tr> + * <td style="text-align: center">g</td> + * <td style="text-align: center">1..n</td> + * <td>2451334</td> + * <td>Modified Julian day. This is different from the conventional Julian day number in two regards. + * First, it demarcates days at local zone midnight, rather than noon GMT. Second, it is a local number; + * that is, it depends on the local time zone. It can be thought of as a single number that encompasses + * all the date-related fields.</td> + * </tr> + * <tr> + * <th rowspan="14">week<br> + * day</th> + * <td rowspan="4" style="text-align: center">E</td> + * <td style="text-align: center">1..3</td> + * <td>Tue</td> + * <td rowspan="4">Day of week - Use one through three letters for the short day, four for the full (wide) name, + * five for the narrow name, or six for the short name.</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>Tuesday</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>T</td> + * </tr> + * <tr> + * <td style="text-align: center">6</td> + * <td>Tu</td> + * </tr> + * <tr> + * <td rowspan="5" style="text-align: center">e</td> + * <td style="text-align: center">1..2</td> + * <td>2</td> + * <td rowspan="5">Local day of week. Same as E except adds a numeric value that will depend on the local + * starting day of the week, using one or two letters. For this example, Monday is the first day of the week.</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>Tue</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>Tuesday</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>T</td> + * </tr> + * <tr> + * <td style="text-align: center">6</td> + * <td>Tu</td> + * </tr> + * <tr> + * <td rowspan="5" style="text-align: center">c</td> + * <td style="text-align: center">1</td> + * <td>2</td> + * <td rowspan="5"><b>Stand-Alone</b> local day of week - Use one letter for the local numeric value (same + * as 'e'), three for the short day, four for the full (wide) name, five for the narrow name, or six for + * the short name.</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>Tue</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>Tuesday</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>T</td> + * </tr> + * <tr> + * <td style="text-align: center">6</td> + * <td>Tu</td> + * </tr> + * <tr> + * <th>period</th> + * <td style="text-align: center">a</td> + * <td style="text-align: center">1</td> + * <td>AM</td> + * <td>AM or PM</td> + * </tr> + * <tr> + * <th rowspan="4">hour</th> + * <td style="text-align: center">h</td> + * <td style="text-align: center">1..2</td> + * <td>11</td> + * <td>Hour [1-12]. When used in skeleton data or in a skeleton passed in an API for flexible data pattern + * generation, it should match the 12-hour-cycle format preferred by the locale (h or K); it should not match + * a 24-hour-cycle format (H or k). Use hh for zero padding.</td> + * </tr> + * <tr> + * <td style="text-align: center">H</td> + * <td style="text-align: center">1..2</td> + * <td>13</td> + * <td>Hour [0-23]. When used in skeleton data or in a skeleton passed in an API for flexible data pattern + * generation, it should match the 24-hour-cycle format preferred by the locale (H or k); it should not match a + * 12-hour-cycle format (h or K). Use HH for zero padding.</td> + * </tr> + * <tr> + * <td style="text-align: center">K</td> + * <td style="text-align: center">1..2</td> + * <td>0</td> + * <td>Hour [0-11]. When used in a skeleton, only matches K or h, see above. Use KK for zero padding.</td> + * </tr> + * <tr> + * <td style="text-align: center">k</td> + * <td style="text-align: center">1..2</td> + * <td>24</td> + * <td>Hour [1-24]. When used in a skeleton, only matches k or H, see above. Use kk for zero padding.</td> + * </tr> + * <tr> + * <th>minute</th> + * <td style="text-align: center">m</td> + * <td style="text-align: center">1..2</td> + * <td>59</td> + * <td>Minute. Use "m" to show the minimum number of digits, or "mm" to always show two digits + * (zero-padding if necessary, e.g. "08").</td> + * </tr> + * <tr> + * <th rowspan="3">second</th> + * <td style="text-align: center">s</td> + * <td style="text-align: center">1..2</td> + * <td>12</td> + * <td>Second. Use "s" to show the minimum number of digits, or "ss" to always show two digits + * (zero-padding if necessary, e.g. "08").</td> + * </tr> + * <tr> + * <td style="text-align: center">S</td> + * <td style="text-align: center">1..n</td> + * <td>3450</td> + * <td>Fractional Second - truncates (like other time fields) to the count of letters when formatting. + * Appends zeros if more than 3 letters specified. Truncates at three significant digits when parsing. + * (example shows display using pattern SSSS for seconds value 12.34567)</td> + * </tr> + * <tr> + * <td style="text-align: center">A</td> + * <td style="text-align: center">1..n</td> + * <td>69540000</td> + * <td>Milliseconds in day. This field behaves <i>exactly</i> like a composite of all time-related fields, + * not including the zone fields. As such, it also reflects discontinuities of those fields on DST transition + * days. On a day of DST onset, it will jump forward. On a day of DST cessation, it will jump backward. This + * reflects the fact that is must be combined with the offset field to obtain a unique local time value.</td> + * </tr> + * <tr> + * <th rowspan="23">zone</th> + * <td rowspan="2" style="text-align: center">z</td> + * <td style="text-align: center">1..3</td> + * <td>PDT</td> + * <td>The <i>short specific non-location format</i>. + * Where that is unavailable, falls back to the <i>short localized GMT format</i> ("O").</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>Pacific Daylight Time</td> + * <td>The <i>long specific non-location format</i>. + * Where that is unavailable, falls back to the <i>long localized GMT format</i> ("OOOO").</td> + * </tr> + * <tr> + * <td rowspan="3" style="text-align: center">Z</td> + * <td style="text-align: center">1..3</td> + * <td>-0800</td> + * <td>The <i>ISO8601 basic format</i> with hours, minutes and optional seconds fields. + * The format is equivalent to RFC 822 zone format (when optional seconds field is absent). + * This is equivalent to the "xxxx" specifier.</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>GMT-8:00</td> + * <td>The <i>long localized GMT format</i>. + * This is equivalent to the "OOOO" specifier.</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>-08:00<br> + * -07:52:58</td> + * <td>The <i>ISO8601 extended format</i> with hours, minutes and optional seconds fields. + * The ISO8601 UTC indicator "Z" is used when local time offset is 0. + * This is equivalent to the "XXXXX" specifier.</td> + * </tr> + * <tr> + * <td rowspan="2" style="text-align: center">O</td> + * <td style="text-align: center">1</td> + * <td>GMT-8</td> + * <td>The <i>short localized GMT format</i>.</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>GMT-08:00</td> + * <td>The <i>long localized GMT format</i>.</td> + * </tr> + * <tr> + * <td rowspan="2" style="text-align: center">v</td> + * <td style="text-align: center">1</td> + * <td>PT</td> + * <td>The <i>short generic non-location format</i>. + * Where that is unavailable, falls back to the <i>generic location format</i> ("VVVV"), + * then the <i>short localized GMT format</i> as the final fallback.</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>Pacific Time</td> + * <td>The <i>long generic non-location format</i>. + * Where that is unavailable, falls back to <i>generic location format</i> ("VVVV"). + * </tr> + * <tr> + * <td rowspan="4" style="text-align: center">V</td> + * <td style="text-align: center">1</td> + * <td>uslax</td> + * <td>The short time zone ID. + * Where that is unavailable, the special short time zone ID <i>unk</i> (Unknown Zone) is used.<br> + * <i><b>Note</b>: This specifier was originally used for a variant of the short specific non-location format, + * but it was deprecated in the later version of the LDML specification. In CLDR 23/ICU 51, the definition of + * the specifier was changed to designate a short time zone ID.</i></td> + * </tr> + * <tr> + * <td style="text-align: center">2</td> + * <td>America/Los_Angeles</td> + * <td>The long time zone ID.</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>Los Angeles</td> + * <td>The exemplar city (location) for the time zone. + * Where that is unavailable, the localized exemplar city name for the special zone <i>Etc/Unknown</i> is used + * as the fallback (for example, "Unknown City"). </td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>Los Angeles Time</td> + * <td>The <i>generic location format</i>. + * Where that is unavailable, falls back to the <i>long localized GMT format</i> ("OOOO"; + * Note: Fallback is only necessary with a GMT-style Time Zone ID, like Etc/GMT-830.)<br> + * This is especially useful when presenting possible timezone choices for user selection, + * since the naming is more uniform than the "v" format.</td> + * </tr> + * <tr> + * <td rowspan="5" style="text-align: center">X</td> + * <td style="text-align: center">1</td> + * <td>-08<br> + * +0530<br> + * Z</td> + * <td>The <i>ISO8601 basic format</i> with hours field and optional minutes field. + * The ISO8601 UTC indicator "Z" is used when local time offset is 0.</td> + * </tr> + * <tr> + * <td style="text-align: center">2</td> + * <td>-0800<br> + * Z</td> + * <td>The <i>ISO8601 basic format</i> with hours and minutes fields. + * The ISO8601 UTC indicator "Z" is used when local time offset is 0.</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>-08:00<br> + * Z</td> + * <td>The <i>ISO8601 extended format</i> with hours and minutes fields. + * The ISO8601 UTC indicator "Z" is used when local time offset is 0.</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>-0800<br> + * -075258<br> + * Z</td> + * <td>The <i>ISO8601 basic format</i> with hours, minutes and optional seconds fields. + * (Note: The seconds field is not supported by the ISO8601 specification.) + * The ISO8601 UTC indicator "Z" is used when local time offset is 0.</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>-08:00<br> + * -07:52:58<br> + * Z</td> + * <td>The <i>ISO8601 extended format</i> with hours, minutes and optional seconds fields. + * (Note: The seconds field is not supported by the ISO8601 specification.) + * The ISO8601 UTC indicator "Z" is used when local time offset is 0.</td> + * </tr> + * <tr> + * <td rowspan="5" style="text-align: center">x</td> + * <td style="text-align: center">1</td> + * <td>-08<br> + * +0530</td> + * <td>The <i>ISO8601 basic format</i> with hours field and optional minutes field.</td> + * </tr> + * <tr> + * <td style="text-align: center">2</td> + * <td>-0800</td> + * <td>The <i>ISO8601 basic format</i> with hours and minutes fields.</td> + * </tr> + * <tr> + * <td style="text-align: center">3</td> + * <td>-08:00</td> + * <td>The <i>ISO8601 extended format</i> with hours and minutes fields.</td> + * </tr> + * <tr> + * <td style="text-align: center">4</td> + * <td>-0800<br> + * -075258</td> + * <td>The <i>ISO8601 basic format</i> with hours, minutes and optional seconds fields. + * (Note: The seconds field is not supported by the ISO8601 specification.)</td> + * </tr> + * <tr> + * <td style="text-align: center">5</td> + * <td>-08:00<br> + * -07:52:58</td> + * <td>The <i>ISO8601 extended format</i> with hours, minutes and optional seconds fields. + * (Note: The seconds field is not supported by the ISO8601 specification.)</td> + * </tr> + * </table> + * + * <P> + * Any characters in the pattern that are not in the ranges of ['a'..'z'] and + * ['A'..'Z'] will be treated as quoted text. For instance, characters + * like ':', '.', ' ', '#' and '@' will appear in the resulting time text + * even they are not embraced within single quotes. + * <P> + * A pattern containing any invalid pattern letter will result in a failing + * UErrorCode result during formatting or parsing. + * <P> + * Examples using the US locale: + * <pre> + * \code + * Format Pattern Result + * -------------- ------- + * "yyyy.MM.dd G 'at' HH:mm:ss vvvv" ->> 1996.07.10 AD at 15:08:56 Pacific Time + * "EEE, MMM d, ''yy" ->> Wed, July 10, '96 + * "h:mm a" ->> 12:08 PM + * "hh 'o''clock' a, zzzz" ->> 12 o'clock PM, Pacific Daylight Time + * "K:mm a, vvv" ->> 0:00 PM, PT + * "yyyyy.MMMMM.dd GGG hh:mm aaa" ->> 1996.July.10 AD 12:08 PM + * \endcode + * </pre> + * Code Sample: + * <pre> + * \code + * UErrorCode success = U_ZERO_ERROR; + * SimpleTimeZone* pdt = new SimpleTimeZone(-8 * 60 * 60 * 1000, "PST"); + * pdt->setStartRule( Calendar::APRIL, 1, Calendar::SUNDAY, 2*60*60*1000); + * pdt->setEndRule( Calendar::OCTOBER, -1, Calendar::SUNDAY, 2*60*60*1000); + * + * // Format the current time. + * SimpleDateFormat* formatter + * = new SimpleDateFormat ("yyyy.MM.dd G 'at' hh:mm:ss a zzz", success ); + * GregorianCalendar cal(success); + * UDate currentTime_1 = cal.getTime(success); + * FieldPosition fp(FieldPosition::DONT_CARE); + * UnicodeString dateString; + * formatter->format( currentTime_1, dateString, fp ); + * cout << "result: " << dateString << endl; + * + * // Parse the previous string back into a Date. + * ParsePosition pp(0); + * UDate currentTime_2 = formatter->parse(dateString, pp ); + * \endcode + * </pre> + * In the above example, the time value "currentTime_2" obtained from parsing + * will be equal to currentTime_1. However, they may not be equal if the am/pm + * marker 'a' is left out from the format pattern while the "hour in am/pm" + * pattern symbol is used. This information loss can happen when formatting the + * time in PM. + * + * <p> + * When parsing a date string using the abbreviated year pattern ("y" or "yy"), + * SimpleDateFormat must interpret the abbreviated year + * relative to some century. It does this by adjusting dates to be + * within 80 years before and 20 years after the time the SimpleDateFormat + * instance is created. For example, using a pattern of "MM/dd/yy" and a + * SimpleDateFormat instance created on Jan 1, 1997, the string + * "01/11/12" would be interpreted as Jan 11, 2012 while the string "05/04/64" + * would be interpreted as May 4, 1964. + * During parsing, only strings consisting of exactly two digits, as defined by + * <code>Unicode::isDigit()</code>, will be parsed into the default century. + * Any other numeric string, such as a one digit string, a three or more digit + * string, or a two digit string that isn't all digits (for example, "-1"), is + * interpreted literally. So "01/02/3" or "01/02/003" are parsed (for the + * Gregorian calendar), using the same pattern, as Jan 2, 3 AD. Likewise (but + * only in lenient parse mode, the default) "01/02/-3" is parsed as Jan 2, 4 BC. + * + * <p> + * If the year pattern has more than two 'y' characters, the year is + * interpreted literally, regardless of the number of digits. So using the + * pattern "MM/dd/yyyy", "01/11/12" parses to Jan 11, 12 A.D. + * + * <p> + * When numeric fields abut one another directly, with no intervening delimiter + * characters, they constitute a run of abutting numeric fields. Such runs are + * parsed specially. For example, the format "HHmmss" parses the input text + * "123456" to 12:34:56, parses the input text "12345" to 1:23:45, and fails to + * parse "1234". In other words, the leftmost field of the run is flexible, + * while the others keep a fixed width. If the parse fails anywhere in the run, + * then the leftmost field is shortened by one character, and the entire run is + * parsed again. This is repeated until either the parse succeeds or the + * leftmost field is one character in length. If the parse still fails at that + * point, the parse of the run fails. + * + * <P> + * For time zones that have no names, SimpleDateFormat uses strings GMT+hours:minutes or + * GMT-hours:minutes. + * <P> + * The calendar defines what is the first day of the week, the first week of the + * year, whether hours are zero based or not (0 vs 12 or 24), and the timezone. + * There is one common number format to handle all the numbers; the digit count + * is handled programmatically according to the pattern. + * + * <p><em>User subclasses are not supported.</em> While clients may write + * subclasses, such code will not necessarily work and will not be + * guaranteed to work stably from release to release. + */ +class U_I18N_API SimpleDateFormat: public DateFormat { +public: + /** + * Construct a SimpleDateFormat using the default pattern for the default + * locale. + * <P> + * [Note:] Not all locales support SimpleDateFormat; for full generality, + * use the factory methods in the DateFormat class. + * @param status Output param set to success/failure code. + * @stable ICU 2.0 + */ + SimpleDateFormat(UErrorCode& status); + + /** + * Construct a SimpleDateFormat using the given pattern and the default locale. + * The locale is used to obtain the symbols used in formatting (e.g., the + * names of the months), but not to provide the pattern. + * <P> + * [Note:] Not all locales support SimpleDateFormat; for full generality, + * use the factory methods in the DateFormat class. + * @param pattern the pattern for the format. + * @param status Output param set to success/failure code. + * @stable ICU 2.0 + */ + SimpleDateFormat(const UnicodeString& pattern, + UErrorCode& status); + + /** + * Construct a SimpleDateFormat using the given pattern, numbering system override, and the default locale. + * The locale is used to obtain the symbols used in formatting (e.g., the + * names of the months), but not to provide the pattern. + * <P> + * A numbering system override is a string containing either the name of a known numbering system, + * or a set of field and numbering system pairs that specify which fields are to be formattied with + * the alternate numbering system. For example, to specify that all numeric fields in the specified + * date or time pattern are to be rendered using Thai digits, simply specify the numbering system override + * as "thai". To specify that just the year portion of the date be formatted using Hebrew numbering, + * use the override string "y=hebrew". Numbering system overrides can be combined using a semi-colon + * character in the override string, such as "d=decimal;M=arabic;y=hebrew", etc. + * + * <P> + * [Note:] Not all locales support SimpleDateFormat; for full generality, + * use the factory methods in the DateFormat class. + * @param pattern the pattern for the format. + * @param override the override string. + * @param status Output param set to success/failure code. + * @stable ICU 4.2 + */ + SimpleDateFormat(const UnicodeString& pattern, + const UnicodeString& override, + UErrorCode& status); + + /** + * Construct a SimpleDateFormat using the given pattern and locale. + * The locale is used to obtain the symbols used in formatting (e.g., the + * names of the months), but not to provide the pattern. + * <P> + * [Note:] Not all locales support SimpleDateFormat; for full generality, + * use the factory methods in the DateFormat class. + * @param pattern the pattern for the format. + * @param locale the given locale. + * @param status Output param set to success/failure code. + * @stable ICU 2.0 + */ + SimpleDateFormat(const UnicodeString& pattern, + const Locale& locale, + UErrorCode& status); + + /** + * Construct a SimpleDateFormat using the given pattern, numbering system override, and locale. + * The locale is used to obtain the symbols used in formatting (e.g., the + * names of the months), but not to provide the pattern. + * <P> + * A numbering system override is a string containing either the name of a known numbering system, + * or a set of field and numbering system pairs that specify which fields are to be formattied with + * the alternate numbering system. For example, to specify that all numeric fields in the specified + * date or time pattern are to be rendered using Thai digits, simply specify the numbering system override + * as "thai". To specify that just the year portion of the date be formatted using Hebrew numbering, + * use the override string "y=hebrew". Numbering system overrides can be combined using a semi-colon + * character in the override string, such as "d=decimal;M=arabic;y=hebrew", etc. + * <P> + * [Note:] Not all locales support SimpleDateFormat; for full generality, + * use the factory methods in the DateFormat class. + * @param pattern the pattern for the format. + * @param override the numbering system override. + * @param locale the given locale. + * @param status Output param set to success/failure code. + * @stable ICU 4.2 + */ + SimpleDateFormat(const UnicodeString& pattern, + const UnicodeString& override, + const Locale& locale, + UErrorCode& status); + + /** + * Construct a SimpleDateFormat using the given pattern and locale-specific + * symbol data. The formatter takes ownership of the DateFormatSymbols object; + * the caller is no longer responsible for deleting it. + * @param pattern the given pattern for the format. + * @param formatDataToAdopt the symbols to be adopted. + * @param status Output param set to success/faulure code. + * @stable ICU 2.0 + */ + SimpleDateFormat(const UnicodeString& pattern, + DateFormatSymbols* formatDataToAdopt, + UErrorCode& status); + + /** + * Construct a SimpleDateFormat using the given pattern and locale-specific + * symbol data. The DateFormatSymbols object is NOT adopted; the caller + * remains responsible for deleting it. + * @param pattern the given pattern for the format. + * @param formatData the formatting symbols to be use. + * @param status Output param set to success/faulure code. + * @stable ICU 2.0 + */ + SimpleDateFormat(const UnicodeString& pattern, + const DateFormatSymbols& formatData, + UErrorCode& status); + + /** + * Copy constructor. + * @stable ICU 2.0 + */ + SimpleDateFormat(const SimpleDateFormat&); + + /** + * Assignment operator. + * @stable ICU 2.0 + */ + SimpleDateFormat& operator=(const SimpleDateFormat&); + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~SimpleDateFormat(); + + /** + * Clone this Format object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @stable ICU 2.0 + */ + virtual Format* clone(void) const; + + /** + * Return true if the given Format objects are semantically equal. Objects + * of different subclasses are considered unequal. + * @param other the object to be compared with. + * @return true if the given Format objects are semantically equal. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Format& other) const; + + + using DateFormat::format; + + /** + * Format a date or time, which is the standard millis since 24:00 GMT, Jan + * 1, 1970. Overrides DateFormat pure virtual method. + * <P> + * Example: using the US locale: "yyyy.MM.dd e 'at' HH:mm:ss zzz" ->> + * 1996.07.10 AD at 15:08:56 PDT + * + * @param cal Calendar set to the date and time to be formatted + * into a date/time string. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param pos The formatting position. On input: an alignment field, + * if desired. On output: the offsets of the alignment field. + * @return Reference to 'appendTo' parameter. + * @stable ICU 2.1 + */ + virtual UnicodeString& format( Calendar& cal, + UnicodeString& appendTo, + FieldPosition& pos) const; + + /** + * Format a date or time, which is the standard millis since 24:00 GMT, Jan + * 1, 1970. Overrides DateFormat pure virtual method. + * <P> + * Example: using the US locale: "yyyy.MM.dd e 'at' HH:mm:ss zzz" ->> + * 1996.07.10 AD at 15:08:56 PDT + * + * @param cal Calendar set to the date and time to be formatted + * into a date/time string. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param posIter On return, can be used to iterate over positions + * of fields generated by this format call. Field values + * are defined in UDateFormatField. + * @param status Input/output param set to success/failure code. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.4 + */ + virtual UnicodeString& format( Calendar& cal, + UnicodeString& appendTo, + FieldPositionIterator* posIter, + UErrorCode& status) const; + + using DateFormat::parse; + + /** + * Parse a date/time string beginning at the given parse position. For + * example, a time text "07/10/96 4:5 PM, PDT" will be parsed into a Date + * that is equivalent to Date(837039928046). + * <P> + * By default, parsing is lenient: If the input is not in the form used by + * this object's format method but can still be parsed as a date, then the + * parse succeeds. Clients may insist on strict adherence to the format by + * calling setLenient(false). + * @see DateFormat::setLenient(boolean) + * + * @param text The date/time string to be parsed + * @param cal A Calendar set on input to the date and time to be used for + * missing values in the date/time string being parsed, and set + * on output to the parsed date/time. When the calendar type is + * different from the internal calendar held by this SimpleDateFormat + * instance, the internal calendar will be cloned to a work + * calendar set to the same milliseconds and time zone as the + * cal parameter, field values will be parsed based on the work + * calendar, then the result (milliseconds and time zone) will + * be set in this calendar. + * @param pos On input, the position at which to start parsing; on + * output, the position at which parsing terminated, or the + * start position if the parse failed. + * @stable ICU 2.1 + */ + virtual void parse( const UnicodeString& text, + Calendar& cal, + ParsePosition& pos) const; + + + /** + * Set the start UDate used to interpret two-digit year strings. + * When dates are parsed having 2-digit year strings, they are placed within + * a assumed range of 100 years starting on the two digit start date. For + * example, the string "24-Jan-17" may be in the year 1817, 1917, 2017, or + * some other year. SimpleDateFormat chooses a year so that the resultant + * date is on or after the two digit start date and within 100 years of the + * two digit start date. + * <P> + * By default, the two digit start date is set to 80 years before the current + * time at which a SimpleDateFormat object is created. + * @param d start UDate used to interpret two-digit year strings. + * @param status Filled in with U_ZERO_ERROR if the parse was successful, and with + * an error value if there was a parse error. + * @stable ICU 2.0 + */ + virtual void set2DigitYearStart(UDate d, UErrorCode& status); + + /** + * Get the start UDate used to interpret two-digit year strings. + * When dates are parsed having 2-digit year strings, they are placed within + * a assumed range of 100 years starting on the two digit start date. For + * example, the string "24-Jan-17" may be in the year 1817, 1917, 2017, or + * some other year. SimpleDateFormat chooses a year so that the resultant + * date is on or after the two digit start date and within 100 years of the + * two digit start date. + * <P> + * By default, the two digit start date is set to 80 years before the current + * time at which a SimpleDateFormat object is created. + * @param status Filled in with U_ZERO_ERROR if the parse was successful, and with + * an error value if there was a parse error. + * @stable ICU 2.0 + */ + UDate get2DigitYearStart(UErrorCode& status) const; + + /** + * Return a pattern string describing this date format. + * @param result Output param to receive the pattern. + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + virtual UnicodeString& toPattern(UnicodeString& result) const; + + /** + * Return a localized pattern string describing this date format. + * In most cases, this will return the same thing as toPattern(), + * but a locale can specify characters to use in pattern descriptions + * in place of the ones described in this class's class documentation. + * (Presumably, letters that would be more mnemonic in that locale's + * language.) This function would produce a pattern using those + * letters. + * <p> + * <b>Note:</b> This implementation depends on DateFormatSymbols::getLocalPatternChars() + * to get localized format pattern characters. ICU does not include + * localized pattern character data, therefore, unless user sets localized + * pattern characters manually, this method returns the same result as + * toPattern(). + * + * @param result Receives the localized pattern. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + virtual UnicodeString& toLocalizedPattern(UnicodeString& result, + UErrorCode& status) const; + + /** + * Apply the given unlocalized pattern string to this date format. + * (i.e., after this call, this formatter will format dates according to + * the new pattern) + * + * @param pattern The pattern to be applied. + * @stable ICU 2.0 + */ + virtual void applyPattern(const UnicodeString& pattern); + + /** + * Apply the given localized pattern string to this date format. + * (see toLocalizedPattern() for more information on localized patterns.) + * + * @param pattern The localized pattern to be applied. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @stable ICU 2.0 + */ + virtual void applyLocalizedPattern(const UnicodeString& pattern, + UErrorCode& status); + + /** + * Gets the date/time formatting symbols (this is an object carrying + * the various strings and other symbols used in formatting: e.g., month + * names and abbreviations, time zone names, AM/PM strings, etc.) + * @return a copy of the date-time formatting data associated + * with this date-time formatter. + * @stable ICU 2.0 + */ + virtual const DateFormatSymbols* getDateFormatSymbols(void) const; + + /** + * Set the date/time formatting symbols. The caller no longer owns the + * DateFormatSymbols object and should not delete it after making this call. + * @param newFormatSymbols the given date-time formatting symbols to copy. + * @stable ICU 2.0 + */ + virtual void adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols); + + /** + * Set the date/time formatting data. + * @param newFormatSymbols the given date-time formatting symbols to copy. + * @stable ICU 2.0 + */ + virtual void setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols); + + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Set the calendar to be used by this date format. Initially, the default + * calendar for the specified or default locale is used. The caller should + * not delete the Calendar object after it is adopted by this call. + * Adopting a new calendar will change to the default symbols. + * + * @param calendarToAdopt Calendar object to be adopted. + * @stable ICU 2.0 + */ + virtual void adoptCalendar(Calendar* calendarToAdopt); + + /* Cannot use #ifndef U_HIDE_INTERNAL_API for the following methods since they are virtual */ + /** + * Sets the TimeZoneFormat to be used by this date/time formatter. + * The caller should not delete the TimeZoneFormat object after + * it is adopted by this call. + * @param timeZoneFormatToAdopt The TimeZoneFormat object to be adopted. + * @internal ICU 49 technology preview + */ + virtual void adoptTimeZoneFormat(TimeZoneFormat* timeZoneFormatToAdopt); + + /** + * Sets the TimeZoneFormat to be used by this date/time formatter. + * @param newTimeZoneFormat The TimeZoneFormat object to copy. + * @internal ICU 49 technology preview + */ + virtual void setTimeZoneFormat(const TimeZoneFormat& newTimeZoneFormat); + + /** + * Gets the time zone format object associated with this date/time formatter. + * @return the time zone format associated with this date/time formatter. + * @internal ICU 49 technology preview + */ + virtual const TimeZoneFormat* getTimeZoneFormat(void) const; + + /** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see + * DateFormat. + * @param value The UDisplayContext value to set. + * @param status Input/output status. If at entry this indicates a failure + * status, the function will do nothing; otherwise this will be + * updated with any new status from the function. + * @stable ICU 53 + */ + virtual void setContext(UDisplayContext value, UErrorCode& status); + + /** + * Overrides base class method and + * This method clears per field NumberFormat instances + * previously set by {@see adoptNumberFormat(const UnicodeString&, NumberFormat*, UErrorCode)} + * @param adoptNF the NumbeferFormat used + * @stable ICU 54 + */ + void adoptNumberFormat(NumberFormat *formatToAdopt); + + /** + * Allow the user to set the NumberFormat for several fields + * It can be a single field like: "y"(year) or "M"(month) + * It can be several field combined together: "yM"(year and month) + * Note: + * 1 symbol field is enough for multiple symbol field (so "y" will override "yy", "yyy") + * If the field is not numeric, then override has no effect (like "MMM" will use abbreviation, not numerical field) + * Per field NumberFormat can also be cleared in {@see DateFormat::setNumberFormat(const NumberFormat& newNumberFormat)} + * + * @param fields the fields to override(like y) + * @param adoptNF the NumbeferFormat used + * @param status Receives a status code, which will be U_ZERO_ERROR + * if the operation succeeds. + * @stable ICU 54 + */ + void adoptNumberFormat(const UnicodeString& fields, NumberFormat *formatToAdopt, UErrorCode &status); + + /** + * Get the numbering system to be used for a particular field. + * @param field The UDateFormatField to get + * @stable ICU 54 + */ + const NumberFormat * getNumberFormatForField(UChar field) const; + +#ifndef U_HIDE_INTERNAL_API + /** + * This is for ICU internal use only. Please do not use. + * Check whether the 'field' is smaller than all the fields covered in + * pattern, return TRUE if it is. The sequence of calendar field, + * from large to small is: ERA, YEAR, MONTH, DATE, AM_PM, HOUR, MINUTE,... + * @param field the calendar field need to check against + * @return TRUE if the 'field' is smaller than all the fields + * covered in pattern. FALSE otherwise. + * @internal ICU 4.0 + */ + UBool isFieldUnitIgnored(UCalendarDateFields field) const; + + + /** + * This is for ICU internal use only. Please do not use. + * Check whether the 'field' is smaller than all the fields covered in + * pattern, return TRUE if it is. The sequence of calendar field, + * from large to small is: ERA, YEAR, MONTH, DATE, AM_PM, HOUR, MINUTE,... + * @param pattern the pattern to check against + * @param field the calendar field need to check against + * @return TRUE if the 'field' is smaller than all the fields + * covered in pattern. FALSE otherwise. + * @internal ICU 4.0 + */ + static UBool isFieldUnitIgnored(const UnicodeString& pattern, + UCalendarDateFields field); + + /** + * This is for ICU internal use only. Please do not use. + * Get the locale of this simple date formatter. + * It is used in DateIntervalFormat. + * + * @return locale in this simple date formatter + * @internal ICU 4.0 + */ + const Locale& getSmpFmtLocale(void) const; +#endif /* U_HIDE_INTERNAL_API */ + +private: + friend class DateFormat; + + void initializeDefaultCentury(void); + + void initializeBooleanAttributes(void); + + SimpleDateFormat(); // default constructor not implemented + + /** + * Used by the DateFormat factory methods to construct a SimpleDateFormat. + * @param timeStyle the time style. + * @param dateStyle the date style. + * @param locale the given locale. + * @param status Output param set to success/failure code on + * exit. + */ + SimpleDateFormat(EStyle timeStyle, EStyle dateStyle, const Locale& locale, UErrorCode& status); + + /** + * Construct a SimpleDateFormat for the given locale. If no resource data + * is available, create an object of last resort, using hard-coded strings. + * This is an internal method, called by DateFormat. It should never fail. + * @param locale the given locale. + * @param status Output param set to success/failure code on + * exit. + */ + SimpleDateFormat(const Locale& locale, UErrorCode& status); // Use default pattern + + /** + * Hook called by format(... FieldPosition& ...) and format(...FieldPositionIterator&...) + */ + UnicodeString& _format(Calendar& cal, UnicodeString& appendTo, FieldPositionHandler& handler, UErrorCode& status) const; + + /** + * Called by format() to format a single field. + * + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param ch The format character we encountered in the pattern. + * @param count Number of characters in the current pattern symbol (e.g., + * "yyyy" in the pattern would result in a call to this function + * with ch equal to 'y' and count equal to 4) + * @param capitalizationContext Capitalization context for this date format. + * @param fieldNum Zero-based numbering of current field within the overall format. + * @param handler Records information about field positions. + * @param cal Calendar to use + * @param status Receives a status code, which will be U_ZERO_ERROR if the operation + * succeeds. + */ + void subFormat(UnicodeString &appendTo, + UChar ch, + int32_t count, + UDisplayContext capitalizationContext, + int32_t fieldNum, + FieldPositionHandler& handler, + Calendar& cal, + SimpleDateFormatMutableNFs &mutableNFs, + UErrorCode& status) const; // in case of illegal argument + + /** + * Used by subFormat() to format a numeric value. + * Appends to toAppendTo a string representation of "value" + * having a number of digits between "minDigits" and + * "maxDigits". Uses the DateFormat's NumberFormat. + * + * @param currentNumberFormat + * @param appendTo Output parameter to receive result. + * Formatted number is appended to existing contents. + * @param value Value to format. + * @param minDigits Minimum number of digits the result should have + * @param maxDigits Maximum number of digits the result should have + */ + void zeroPaddingNumber(NumberFormat *currentNumberFormat, + UnicodeString &appendTo, + int32_t value, + int32_t minDigits, + int32_t maxDigits) const; + + /** + * Return true if the given format character, occuring count + * times, represents a numeric field. + */ + static UBool isNumeric(UChar formatChar, int32_t count); + + /** + * Returns TRUE if the patternOffset is at the start of a numeric field. + */ + static UBool isAtNumericField(const UnicodeString &pattern, int32_t patternOffset); + + /** + * Returns TRUE if the patternOffset is right after a non-numeric field. + */ + static UBool isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset); + + /** + * initializes fCalendar from parameters. Returns fCalendar as a convenience. + * @param adoptZone Zone to be adopted, or NULL for TimeZone::createDefault(). + * @param locale Locale of the calendar + * @param status Error code + * @return the newly constructed fCalendar + */ + Calendar *initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status); + + /** + * Called by several of the constructors to load pattern data and formatting symbols + * out of a resource bundle and initialize the locale based on it. + * @param timeStyle The time style, as passed to DateFormat::createDateInstance(). + * @param dateStyle The date style, as passed to DateFormat::createTimeInstance(). + * @param locale The locale to load the patterns from. + * @param status Filled in with an error code if loading the data from the + * resources fails. + */ + void construct(EStyle timeStyle, EStyle dateStyle, const Locale& locale, UErrorCode& status); + + /** + * Called by construct() and the various constructors to set up the SimpleDateFormat's + * Calendar and NumberFormat objects. + * @param locale The locale for which we want a Calendar and a NumberFormat. + * @param status Filled in with an error code if creating either subobject fails. + */ + void initialize(const Locale& locale, UErrorCode& status); + + /** + * Private code-size reduction function used by subParse. + * @param text the time text being parsed. + * @param start where to start parsing. + * @param field the date field being parsed. + * @param stringArray the string array to parsed. + * @param stringArrayCount the size of the array. + * @param monthPattern pointer to leap month pattern, or NULL if none. + * @param cal a Calendar set to the date and time to be formatted + * into a date/time string. + * @return the new start position if matching succeeded; a negative number + * indicating matching failure, otherwise. + */ + int32_t matchString(const UnicodeString& text, int32_t start, UCalendarDateFields field, + const UnicodeString* stringArray, int32_t stringArrayCount, + const UnicodeString* monthPattern, Calendar& cal) const; + + /** + * Private code-size reduction function used by subParse. + * @param text the time text being parsed. + * @param start where to start parsing. + * @param field the date field being parsed. + * @param stringArray the string array to parsed. + * @param stringArrayCount the size of the array. + * @param cal a Calendar set to the date and time to be formatted + * into a date/time string. + * @return the new start position if matching succeeded; a negative number + * indicating matching failure, otherwise. + */ + int32_t matchQuarterString(const UnicodeString& text, int32_t start, UCalendarDateFields field, + const UnicodeString* stringArray, int32_t stringArrayCount, Calendar& cal) const; + + /** + * Used by subParse() to match localized day period strings. + */ + int32_t matchDayPeriodStrings(const UnicodeString& text, int32_t start, + const UnicodeString* stringArray, int32_t stringArrayCount, + int32_t &dayPeriod) const; + + /** + * Private function used by subParse to match literal pattern text. + * + * @param pattern the pattern string + * @param patternOffset the starting offset into the pattern text. On + * outupt will be set the offset of the first non-literal character in the pattern + * @param text the text being parsed + * @param textOffset the starting offset into the text. On output + * will be set to the offset of the character after the match + * @param whitespaceLenient <code>TRUE</code> if whitespace parse is lenient, <code>FALSE</code> otherwise. + * @param partialMatchLenient <code>TRUE</code> if partial match parse is lenient, <code>FALSE</code> otherwise. + * @param oldLeniency <code>TRUE</code> if old leniency control is lenient, <code>FALSE</code> otherwise. + * + * @return <code>TRUE</code> if the literal text could be matched, <code>FALSE</code> otherwise. + */ + static UBool matchLiterals(const UnicodeString &pattern, int32_t &patternOffset, + const UnicodeString &text, int32_t &textOffset, + UBool whitespaceLenient, UBool partialMatchLenient, UBool oldLeniency); + + /** + * Private member function that converts the parsed date strings into + * timeFields. Returns -start (for ParsePosition) if failed. + * @param text the time text to be parsed. + * @param start where to start parsing. + * @param ch the pattern character for the date field text to be parsed. + * @param count the count of a pattern character. + * @param obeyCount if true then the count is strictly obeyed. + * @param allowNegative + * @param ambiguousYear If true then the two-digit year == the default start year. + * @param saveHebrewMonth Used to hang onto month until year is known. + * @param cal a Calendar set to the date and time to be formatted + * into a date/time string. + * @param patLoc + * @param numericLeapMonthFormatter If non-null, used to parse numeric leap months. + * @param tzTimeType the type of parsed time zone - standard, daylight or unknown (output). + * This parameter can be NULL if caller does not need the information. + * @return the new start position if matching succeeded; a negative number + * indicating matching failure, otherwise. + */ + int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, + UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal, + int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType, SimpleDateFormatMutableNFs &mutableNFs, + int32_t *dayPeriod=NULL) const; + + void parseInt(const UnicodeString& text, + Formattable& number, + ParsePosition& pos, + UBool allowNegative, + NumberFormat *fmt) const; + + void parseInt(const UnicodeString& text, + Formattable& number, + int32_t maxDigits, + ParsePosition& pos, + UBool allowNegative, + NumberFormat *fmt) const; + + int32_t checkIntSuffix(const UnicodeString& text, int32_t start, + int32_t patLoc, UBool isNegative) const; + + /** + * Translate a pattern, mapping each character in the from string to the + * corresponding character in the to string. Return an error if the original + * pattern contains an unmapped character, or if a quote is unmatched. + * Quoted (single quotes only) material is not translated. + * @param originalPattern the original pattern. + * @param translatedPattern Output param to receive the translited pattern. + * @param from the characters to be translited from. + * @param to the characters to be translited to. + * @param status Receives a status code, which will be U_ZERO_ERROR + * if the operation succeeds. + */ + static void translatePattern(const UnicodeString& originalPattern, + UnicodeString& translatedPattern, + const UnicodeString& from, + const UnicodeString& to, + UErrorCode& status); + + /** + * Sets the starting date of the 100-year window that dates with 2-digit years + * are considered to fall within. + * @param startDate the start date + * @param status Receives a status code, which will be U_ZERO_ERROR + * if the operation succeeds. + */ + void parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status); + + /** + * Return the length matched by the given affix, or -1 if none. + * Runs of white space in the affix, match runs of white space in + * the input. + * @param affix pattern string, taken as a literal + * @param input input text + * @param pos offset into input at which to begin matching + * @return length of input that matches, or -1 if match failure + */ + int32_t compareSimpleAffix(const UnicodeString& affix, + const UnicodeString& input, + int32_t pos) const; + + /** + * Skip over a run of zero or more Pattern_White_Space characters at + * pos in text. + */ + int32_t skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const; + + /** + * Skip over a run of zero or more isUWhiteSpace() characters at pos + * in text. + */ + int32_t skipUWhiteSpace(const UnicodeString& text, int32_t pos) const; + + /** + * Initialize NumberFormat instances used for numbering system overrides. + */ + void initNumberFormatters(const Locale &locale,UErrorCode &status); + + /** + * Parse the given override string and set up structures for number formats + */ + void processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status); + + /** + * Used to map pattern characters to Calendar field identifiers. + */ + static const UCalendarDateFields fgPatternIndexToCalendarField[]; + + /** + * Map index into pattern character string to DateFormat field number + */ + static const UDateFormatField fgPatternIndexToDateFormatField[]; + + /** + * Lazy TimeZoneFormat instantiation, semantically const + */ + TimeZoneFormat *tzFormat() const; + + const NumberFormat* getNumberFormatByIndex(UDateFormatField index) const; + + /** + * Used to map Calendar field to field level. + * The larger the level, the smaller the field unit. + * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10, + * UCAL_MONTH level is 20. + */ + static const int32_t fgCalendarFieldToLevel[]; + + /** + * Map calendar field letter into calendar field level. + */ + static int32_t getLevelFromChar(UChar ch); + + /** + * Tell if a character can be used to define a field in a format string. + */ + static UBool isSyntaxChar(UChar ch); + + /** + * The formatting pattern for this formatter. + */ + UnicodeString fPattern; + + /** + * The numbering system override for dates. + */ + UnicodeString fDateOverride; + + /** + * The numbering system override for times. + */ + UnicodeString fTimeOverride; + + + /** + * The original locale used (for reloading symbols) + */ + Locale fLocale; + + /** + * A pointer to an object containing the strings to use in formatting (e.g., + * month and day names, AM and PM strings, time zone names, etc.) + */ + DateFormatSymbols* fSymbols; // Owned + + /** + * The time zone formatter + */ + TimeZoneFormat* fTimeZoneFormat; + + /** + * If dates have ambiguous years, we map them into the century starting + * at defaultCenturyStart, which may be any date. If defaultCenturyStart is + * set to SYSTEM_DEFAULT_CENTURY, which it is by default, then the system + * values are used. The instance values defaultCenturyStart and + * defaultCenturyStartYear are only used if explicitly set by the user + * through the API method parseAmbiguousDatesAsAfter(). + */ + UDate fDefaultCenturyStart; + + UBool fHasMinute; + UBool fHasSecond; + + /** + * Sets fHasMinutes and fHasSeconds. + */ + void parsePattern(); + + /** + * See documentation for defaultCenturyStart. + */ + /*transient*/ int32_t fDefaultCenturyStartYear; + + struct NSOverride : public UMemory { + const SharedNumberFormat *snf; + int32_t hash; + NSOverride *next; + void free(); + NSOverride() : snf(NULL), hash(0), next(NULL) { + } + ~NSOverride(); + }; + + /** + * The number format in use for each date field. NULL means fall back + * to fNumberFormat in DateFormat. + */ + const SharedNumberFormat **fSharedNumberFormatters; + + UBool fHaveDefaultCentury; + + BreakIterator* fCapitalizationBrkIter; +}; + +inline UDate +SimpleDateFormat::get2DigitYearStart(UErrorCode& /*status*/) const +{ + return fDefaultCenturyStart; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _SMPDTFMT +//eof diff --git a/intl/icu/source/i18n/unicode/sortkey.h b/intl/icu/source/i18n/unicode/sortkey.h new file mode 100644 index 000000000..814e29c4b --- /dev/null +++ b/intl/icu/source/i18n/unicode/sortkey.h @@ -0,0 +1,340 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ***************************************************************************** + * Copyright (C) 1996-2014, International Business Machines Corporation and others. + * All Rights Reserved. + ***************************************************************************** + * + * File sortkey.h + * + * Created by: Helena Shih + * + * Modification History: + * + * Date Name Description + * + * 6/20/97 helena Java class name change. + * 8/18/97 helena Added internal API documentation. + * 6/26/98 erm Changed to use byte arrays and memcmp. + ***************************************************************************** + */ + +#ifndef SORTKEY_H +#define SORTKEY_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Keys for comparing strings multiple times. + */ + +#if !UCONFIG_NO_COLLATION + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/coll.h" + +U_NAMESPACE_BEGIN + +/* forward declaration */ +class RuleBasedCollator; +class CollationKeyByteSink; + +/** + * + * Collation keys are generated by the Collator class. Use the CollationKey objects + * instead of Collator to compare strings multiple times. A CollationKey + * preprocesses the comparison information from the Collator object to + * make the comparison faster. If you are not going to comparing strings + * multiple times, then using the Collator object is generally faster, + * since it only processes as much of the string as needed to make a + * comparison. + * <p> For example (with strength == tertiary) + * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator + * only needs to process a couple of characters, while a comparison + * with CollationKeys will process all of the characters. On the other hand, + * if you are doing a sort of a number of fields, it is much faster to use + * CollationKeys, since you will be comparing strings multiple times. + * <p>Typical use of CollationKeys are in databases, where you store a CollationKey + * in a hidden field, and use it for sorting or indexing. + * + * <p>Example of use: + * <pre> + * \code + * UErrorCode success = U_ZERO_ERROR; + * Collator* myCollator = Collator::createInstance(success); + * CollationKey* keys = new CollationKey [3]; + * myCollator->getCollationKey("Tom", keys[0], success ); + * myCollator->getCollationKey("Dick", keys[1], success ); + * myCollator->getCollationKey("Harry", keys[2], success ); + * + * // Inside body of sort routine, compare keys this way: + * CollationKey tmp; + * if(keys[0].compareTo( keys[1] ) > 0 ) { + * tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp; + * } + * //... + * \endcode + * </pre> + * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort + * long lists of words by retrieving collation keys with Collator::getCollationKey(). + * You can then cache the collation keys and compare them using CollationKey::compareTo(). + * <p> + * <strong>Note:</strong> <code>Collator</code>s with different Locale, + * CollationStrength and DecompositionMode settings will return different + * CollationKeys for the same set of strings. Locales have specific + * collation rules, and the way in which secondary and tertiary differences + * are taken into account, for example, will result in different CollationKeys + * for same strings. + * <p> + + * @see Collator + * @see RuleBasedCollator + * @version 1.3 12/18/96 + * @author Helena Shih + * @stable ICU 2.0 + */ +class U_I18N_API CollationKey : public UObject { +public: + /** + * This creates an empty collation key based on the null string. An empty + * collation key contains no sorting information. When comparing two empty + * collation keys, the result is Collator::EQUAL. Comparing empty collation key + * with non-empty collation key is always Collator::LESS. + * @stable ICU 2.0 + */ + CollationKey(); + + + /** + * Creates a collation key based on the collation key values. + * @param values the collation key values + * @param count number of collation key values, including trailing nulls. + * @stable ICU 2.0 + */ + CollationKey(const uint8_t* values, + int32_t count); + + /** + * Copy constructor. + * @param other the object to be copied. + * @stable ICU 2.0 + */ + CollationKey(const CollationKey& other); + + /** + * Sort key destructor. + * @stable ICU 2.0 + */ + virtual ~CollationKey(); + + /** + * Assignment operator + * @param other the object to be copied. + * @stable ICU 2.0 + */ + const CollationKey& operator=(const CollationKey& other); + + /** + * Compare if two collation keys are the same. + * @param source the collation key to compare to. + * @return Returns true if two collation keys are equal, false otherwise. + * @stable ICU 2.0 + */ + UBool operator==(const CollationKey& source) const; + + /** + * Compare if two collation keys are not the same. + * @param source the collation key to compare to. + * @return Returns TRUE if two collation keys are different, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool operator!=(const CollationKey& source) const; + + + /** + * Test to see if the key is in an invalid state. The key will be in an + * invalid state if it couldn't allocate memory for some operation. + * @return Returns TRUE if the key is in an invalid, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool isBogus(void) const; + + /** + * Returns a pointer to the collation key values. The storage is owned + * by the collation key and the pointer will become invalid if the key + * is deleted. + * @param count the output parameter of number of collation key values, + * including any trailing nulls. + * @return a pointer to the collation key values. + * @stable ICU 2.0 + */ + const uint8_t* getByteArray(int32_t& count) const; + +#ifdef U_USE_COLLATION_KEY_DEPRECATES + /** + * Extracts the collation key values into a new array. The caller owns + * this storage and should free it. + * @param count the output parameter of number of collation key values, + * including any trailing nulls. + * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release. + */ + uint8_t* toByteArray(int32_t& count) const; +#endif + +#ifndef U_HIDE_DEPRECATED_API + /** + * Convenience method which does a string(bit-wise) comparison of the + * two collation keys. + * @param target target collation key to be compared with + * @return Returns Collator::LESS if sourceKey < targetKey, + * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL + * otherwise. + * @deprecated ICU 2.6 use the overload with error code + */ + Collator::EComparisonResult compareTo(const CollationKey& target) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Convenience method which does a string(bit-wise) comparison of the + * two collation keys. + * @param target target collation key to be compared with + * @param status error code + * @return Returns UCOL_LESS if sourceKey < targetKey, + * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL + * otherwise. + * @stable ICU 2.6 + */ + UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const; + + /** + * Creates an integer that is unique to the collation key. NOTE: this + * is not the same as String.hashCode. + * <p>Example of use: + * <pre> + * . UErrorCode status = U_ZERO_ERROR; + * . Collator *myCollation = Collator::createInstance(Locale::US, status); + * . if (U_FAILURE(status)) return; + * . CollationKey key1, key2; + * . UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR; + * . myCollation->getCollationKey("abc", key1, status1); + * . if (U_FAILURE(status1)) { delete myCollation; return; } + * . myCollation->getCollationKey("ABC", key2, status2); + * . if (U_FAILURE(status2)) { delete myCollation; return; } + * . // key1.hashCode() != key2.hashCode() + * </pre> + * @return the hash value based on the string's collation order. + * @see UnicodeString#hashCode + * @stable ICU 2.0 + */ + int32_t hashCode(void) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +private: + /** + * Replaces the current bytes buffer with a new one of newCapacity + * and copies length bytes from the old buffer to the new one. + * @return the new buffer, or NULL if the allocation failed + */ + uint8_t *reallocate(int32_t newCapacity, int32_t length); + /** + * Set a new length for a new sort key in the existing fBytes. + */ + void setLength(int32_t newLength); + + uint8_t *getBytes() { + return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes; + } + const uint8_t *getBytes() const { + return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes; + } + int32_t getCapacity() const { + return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity; + } + int32_t getLength() const { return fFlagAndLength & 0x7fffffff; } + + /** + * Set the CollationKey to a "bogus" or invalid state + * @return this CollationKey + */ + CollationKey& setToBogus(void); + /** + * Resets this CollationKey to an empty state + * @return this CollationKey + */ + CollationKey& reset(void); + + /** + * Allow private access to RuleBasedCollator + */ + friend class RuleBasedCollator; + friend class CollationKeyByteSink; + + // Class fields. sizeof(CollationKey) is intended to be 48 bytes + // on a machine with 64-bit pointers. + // We use a union to maximize the size of the internal buffer, + // similar to UnicodeString but not as tight and complex. + + // (implicit) *vtable; + /** + * Sort key length and flag. + * Bit 31 is set if the buffer is heap-allocated. + * Bits 30..0 contain the sort key length. + */ + int32_t fFlagAndLength; + /** + * Unique hash value of this CollationKey. + * Special value 2 if the key is bogus. + */ + mutable int32_t fHashCode; + /** + * fUnion provides 32 bytes for the internal buffer or for + * pointer+capacity. + */ + union StackBufferOrFields { + /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */ + uint8_t fStackBuffer[32]; + struct { + uint8_t *fBytes; + int32_t fCapacity; + } fFields; + } fUnion; +}; + +inline UBool +CollationKey::operator!=(const CollationKey& other) const +{ + return !(*this == other); +} + +inline UBool +CollationKey::isBogus() const +{ + return fHashCode == 2; // kBogusHashCode +} + +inline const uint8_t* +CollationKey::getByteArray(int32_t &count) const +{ + count = getLength(); + return getBytes(); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/stsearch.h b/intl/icu/source/i18n/unicode/stsearch.h new file mode 100644 index 000000000..ec50151e2 --- /dev/null +++ b/intl/icu/source/i18n/unicode/stsearch.h @@ -0,0 +1,506 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2014 IBM and others. All rights reserved. +********************************************************************** +* Date Name Description +* 03/22/2000 helena Creation. +********************************************************************** +*/ + +#ifndef STSEARCH_H +#define STSEARCH_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Service for searching text based on RuleBasedCollator. + */ + +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/tblcoll.h" +#include "unicode/coleitr.h" +#include "unicode/search.h" + +U_NAMESPACE_BEGIN + +/** + * + * <tt>StringSearch</tt> is a <tt>SearchIterator</tt> that provides + * language-sensitive text searching based on the comparison rules defined + * in a {@link RuleBasedCollator} object. + * StringSearch ensures that language eccentricity can be + * handled, e.g. for the German collator, characters ß and SS will be matched + * if case is chosen to be ignored. + * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> + * "ICU Collation Design Document"</a> for more information. + * <p> + * There are 2 match options for selection:<br> + * Let S' be the sub-string of a text string S between the offsets start and + * end [start, end]. + * <br> + * A pattern string P matches a text string S at the offsets [start, end] + * if + * <pre> + * option 1. Some canonical equivalent of P matches some canonical equivalent + * of S' + * option 2. P matches S' and if P starts or ends with a combining mark, + * there exists no non-ignorable combining mark before or after S? + * in S respectively. + * </pre> + * Option 2. will be the default. + * <p> + * This search has APIs similar to that of other text iteration mechanisms + * such as the break iterators in <tt>BreakIterator</tt>. Using these + * APIs, it is easy to scan through text looking for all occurrences of + * a given pattern. This search iterator allows changing of direction by + * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. + * Though a direction change can occur without calling <tt>reset</tt> first, + * this operation comes with some speed penalty. + * Match results in the forward direction will match the result matches in + * the backwards direction in the reverse order + * <p> + * <tt>SearchIterator</tt> provides APIs to specify the starting position + * within the text string to be searched, e.g. <tt>setOffset</tt>, + * <tt>preceding</tt> and <tt>following</tt>. Since the + * starting position will be set as it is specified, please take note that + * there are some danger points which the search may render incorrect + * results: + * <ul> + * <li> The midst of a substring that requires normalization. + * <li> If the following match is to be found, the position should not be the + * second character which requires to be swapped with the preceding + * character. Vice versa, if the preceding match is to be found, + * position to search from should not be the first character which + * requires to be swapped with the next character. E.g certain Thai and + * Lao characters require swapping. + * <li> If a following pattern match is to be found, any position within a + * contracting sequence except the first will fail. Vice versa if a + * preceding pattern match is to be found, a invalid starting point + * would be any character within a contracting sequence except the last. + * </ul> + * <p> + * A <tt>BreakIterator</tt> can be used if only matches at logical breaks are desired. + * Using a <tt>BreakIterator</tt> will only give you results that exactly matches the + * boundaries given by the breakiterator. For instance the pattern "e" will + * not be found in the string "\u00e9" if a character break iterator is used. + * <p> + * Options are provided to handle overlapping matches. + * E.g. In English, overlapping matches produces the result 0 and 2 + * for the pattern "abab" in the text "ababab", where else mutually + * exclusive matches only produce the result of 0. + * <p> + * Though collator attributes will be taken into consideration while + * performing matches, there are no APIs here for setting and getting the + * attributes. These attributes can be set by getting the collator + * from <tt>getCollator</tt> and using the APIs in <tt>coll.h</tt>. + * Lastly to update <tt>StringSearch</tt> to the new collator attributes, + * <tt>reset</tt> has to be called. + * <p> + * Restriction: <br> + * Currently there are no composite characters that consists of a + * character with combining class > 0 before a character with combining + * class == 0. However, if such a character exists in the future, + * <tt>StringSearch</tt> does not guarantee the results for option 1. + * <p> + * Consult the <tt>SearchIterator</tt> documentation for information on + * and examples of how to use instances of this class to implement text + * searching. + * <pre><code> + * UnicodeString target("The quick brown fox jumps over the lazy dog."); + * UnicodeString pattern("fox"); + * + * UErrorCode error = U_ZERO_ERROR; + * StringSearch iter(pattern, target, Locale::getUS(), NULL, status); + * for (int pos = iter.first(error); + * pos != USEARCH_DONE; + * pos = iter.next(error)) + * { + * printf("Found match at %d pos, length is %d\n", pos, + * iter.getMatchLength()); + * } + * </code></pre> + * <p> + * Note, <tt>StringSearch</tt> is not to be subclassed. + * </p> + * @see SearchIterator + * @see RuleBasedCollator + * @since ICU 2.0 + */ + +class U_I18N_API StringSearch U_FINAL : public SearchIterator +{ +public: + + // public constructors and destructors -------------------------------- + + /** + * Creating a <tt>StringSearch</tt> instance using the argument locale + * language rule set. A collator will be created in the process, which + * will be owned by this instance and will be deleted during + * destruction + * @param pattern The text for which this object will search. + * @param text The text in which to search for the pattern. + * @param locale A locale which defines the language-sensitive + * comparison rules used to determine whether text in the + * pattern and target matches. + * @param breakiter A <tt>BreakIterator</tt> object used to constrain + * the matches that are found. Matches whose start and end + * indices in the target text are not boundaries as + * determined by the <tt>BreakIterator</tt> are + * ignored. If this behavior is not desired, + * <tt>NULL</tt> can be passed in instead. + * @param status for errors if any. If pattern or text is NULL, or if + * either the length of pattern or text is 0 then an + * U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + StringSearch(const UnicodeString &pattern, const UnicodeString &text, + const Locale &locale, + BreakIterator *breakiter, + UErrorCode &status); + + /** + * Creating a <tt>StringSearch</tt> instance using the argument collator + * language rule set. Note, user retains the ownership of this collator, + * it does not get destroyed during this instance's destruction. + * @param pattern The text for which this object will search. + * @param text The text in which to search for the pattern. + * @param coll A <tt>RuleBasedCollator</tt> object which defines + * the language-sensitive comparison rules used to + * determine whether text in the pattern and target + * matches. User is responsible for the clearing of this + * object. + * @param breakiter A <tt>BreakIterator</tt> object used to constrain + * the matches that are found. Matches whose start and end + * indices in the target text are not boundaries as + * determined by the <tt>BreakIterator</tt> are + * ignored. If this behavior is not desired, + * <tt>NULL</tt> can be passed in instead. + * @param status for errors if any. If either the length of pattern or + * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + StringSearch(const UnicodeString &pattern, + const UnicodeString &text, + RuleBasedCollator *coll, + BreakIterator *breakiter, + UErrorCode &status); + + /** + * Creating a <tt>StringSearch</tt> instance using the argument locale + * language rule set. A collator will be created in the process, which + * will be owned by this instance and will be deleted during + * destruction + * <p> + * Note: No parsing of the text within the <tt>CharacterIterator</tt> + * will be done during searching for this version. The block of text + * in <tt>CharacterIterator</tt> will be used as it is. + * @param pattern The text for which this object will search. + * @param text The text iterator in which to search for the pattern. + * @param locale A locale which defines the language-sensitive + * comparison rules used to determine whether text in the + * pattern and target matches. User is responsible for + * the clearing of this object. + * @param breakiter A <tt>BreakIterator</tt> object used to constrain + * the matches that are found. Matches whose start and end + * indices in the target text are not boundaries as + * determined by the <tt>BreakIterator</tt> are + * ignored. If this behavior is not desired, + * <tt>NULL</tt> can be passed in instead. + * @param status for errors if any. If either the length of pattern or + * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + StringSearch(const UnicodeString &pattern, CharacterIterator &text, + const Locale &locale, + BreakIterator *breakiter, + UErrorCode &status); + + /** + * Creating a <tt>StringSearch</tt> instance using the argument collator + * language rule set. Note, user retains the ownership of this collator, + * it does not get destroyed during this instance's destruction. + * <p> + * Note: No parsing of the text within the <tt>CharacterIterator</tt> + * will be done during searching for this version. The block of text + * in <tt>CharacterIterator</tt> will be used as it is. + * @param pattern The text for which this object will search. + * @param text The text in which to search for the pattern. + * @param coll A <tt>RuleBasedCollator</tt> object which defines + * the language-sensitive comparison rules used to + * determine whether text in the pattern and target + * matches. User is responsible for the clearing of this + * object. + * @param breakiter A <tt>BreakIterator</tt> object used to constrain + * the matches that are found. Matches whose start and end + * indices in the target text are not boundaries as + * determined by the <tt>BreakIterator</tt> are + * ignored. If this behavior is not desired, + * <tt>NULL</tt> can be passed in instead. + * @param status for errors if any. If either the length of pattern or + * text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + StringSearch(const UnicodeString &pattern, CharacterIterator &text, + RuleBasedCollator *coll, + BreakIterator *breakiter, + UErrorCode &status); + + /** + * Copy constructor that creates a StringSearch instance with the same + * behavior, and iterating over the same text. + * @param that StringSearch instance to be copied. + * @stable ICU 2.0 + */ + StringSearch(const StringSearch &that); + + /** + * Destructor. Cleans up the search iterator data struct. + * If a collator is created in the constructor, it will be destroyed here. + * @stable ICU 2.0 + */ + virtual ~StringSearch(void); + + /** + * Clone this object. + * Clones can be used concurrently in multiple threads. + * If an error occurs, then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + StringSearch *clone() const; + + // operator overloading --------------------------------------------- + + /** + * Assignment operator. Sets this iterator to have the same behavior, + * and iterate over the same text, as the one passed in. + * @param that instance to be copied. + * @stable ICU 2.0 + */ + StringSearch & operator=(const StringSearch &that); + + /** + * Equality operator. + * @param that instance to be compared. + * @return TRUE if both instances have the same attributes, + * breakiterators, collators and iterate over the same text + * while looking for the same pattern. + * @stable ICU 2.0 + */ + virtual UBool operator==(const SearchIterator &that) const; + + // public get and set methods ---------------------------------------- + + /** + * Sets the index to point to the given position, and clears any state + * that's affected. + * <p> + * This method takes the argument index and sets the position in the text + * string accordingly without checking if the index is pointing to a + * valid starting point to begin searching. + * @param position within the text to be set. If position is less + * than or greater than the text range for searching, + * an U_INDEX_OUTOFBOUNDS_ERROR will be returned + * @param status for errors if it occurs + * @stable ICU 2.0 + */ + virtual void setOffset(int32_t position, UErrorCode &status); + + /** + * Return the current index in the text being searched. + * If the iteration has gone past the end of the text + * (or past the beginning for a backwards search), USEARCH_DONE + * is returned. + * @return current index in the text being searched. + * @stable ICU 2.0 + */ + virtual int32_t getOffset(void) const; + + /** + * Set the target text to be searched. + * Text iteration will hence begin at the start of the text string. + * This method is + * useful if you want to re-use an iterator to search for the same + * pattern within a different body of text. + * @param text text string to be searched + * @param status for errors if any. If the text length is 0 then an + * U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + virtual void setText(const UnicodeString &text, UErrorCode &status); + + /** + * Set the target text to be searched. + * Text iteration will hence begin at the start of the text string. + * This method is + * useful if you want to re-use an iterator to search for the same + * pattern within a different body of text. + * Note: No parsing of the text within the <tt>CharacterIterator</tt> + * will be done during searching for this version. The block of text + * in <tt>CharacterIterator</tt> will be used as it is. + * @param text text string to be searched + * @param status for errors if any. If the text length is 0 then an + * U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + virtual void setText(CharacterIterator &text, UErrorCode &status); + + /** + * Gets the collator used for the language rules. + * <p> + * Caller may modify but <b>must not</b> delete the <tt>RuleBasedCollator</tt>! + * Modifications to this collator will affect the original collator passed in to + * the <tt>StringSearch></tt> constructor or to setCollator, if any. + * @return collator used for string search + * @stable ICU 2.0 + */ + RuleBasedCollator * getCollator() const; + + /** + * Sets the collator used for the language rules. User retains the + * ownership of this collator, thus the responsibility of deletion lies + * with the user. The iterator's position will not be changed by this method. + * @param coll collator + * @param status for errors if any + * @stable ICU 2.0 + */ + void setCollator(RuleBasedCollator *coll, UErrorCode &status); + + /** + * Sets the pattern used for matching. + * The iterator's position will not be changed by this method. + * @param pattern search pattern to be found + * @param status for errors if any. If the pattern length is 0 then an + * U_ILLEGAL_ARGUMENT_ERROR is returned. + * @stable ICU 2.0 + */ + void setPattern(const UnicodeString &pattern, UErrorCode &status); + + /** + * Gets the search pattern. + * @return pattern used for matching + * @stable ICU 2.0 + */ + const UnicodeString & getPattern() const; + + // public methods ---------------------------------------------------- + + /** + * Reset the iteration. + * Search will begin at the start of the text string if a forward + * iteration is initiated before a backwards iteration. Otherwise if + * a backwards iteration is initiated before a forwards iteration, the + * search will begin at the end of the text string. + * @stable ICU 2.0 + */ + virtual void reset(); + + /** + * Returns a copy of StringSearch with the same behavior, and + * iterating over the same text, as this one. Note that all data will be + * replicated, except for the user-specified collator and the + * breakiterator. + * @return cloned object + * @stable ICU 2.0 + */ + virtual SearchIterator * safeClone(void) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +protected: + + // protected method ------------------------------------------------- + + /** + * Search forward for matching text, starting at a given location. + * Clients should not call this method directly; instead they should + * call {@link SearchIterator#next }. + * <p> + * If a match is found, this method returns the index at which the match + * starts and calls {@link SearchIterator#setMatchLength } with the number + * of characters in the target text that make up the match. If no match + * is found, the method returns <tt>USEARCH_DONE</tt>. + * <p> + * The <tt>StringSearch</tt> is adjusted so that its current index + * (as returned by {@link #getOffset }) is the match position if one was + * found. + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and + * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE. + * @param position The index in the target text at which the search + * starts + * @param status for errors if any occurs + * @return The index at which the matched text in the target starts, or + * USEARCH_DONE if no match was found. + * @stable ICU 2.0 + */ + virtual int32_t handleNext(int32_t position, UErrorCode &status); + + /** + * Search backward for matching text, starting at a given location. + * Clients should not call this method directly; instead they should call + * <tt>SearchIterator.previous()</tt>, which this method overrides. + * <p> + * If a match is found, this method returns the index at which the match + * starts and calls {@link SearchIterator#setMatchLength } with the number + * of characters in the target text that make up the match. If no match + * is found, the method returns <tt>USEARCH_DONE</tt>. + * <p> + * The <tt>StringSearch</tt> is adjusted so that its current index + * (as returned by {@link #getOffset }) is the match position if one was + * found. + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and + * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE. + * @param position The index in the target text at which the search + * starts. + * @param status for errors if any occurs + * @return The index at which the matched text in the target starts, or + * USEARCH_DONE if no match was found. + * @stable ICU 2.0 + */ + virtual int32_t handlePrev(int32_t position, UErrorCode &status); + +private : + StringSearch(); // default constructor not implemented + + // private data members ---------------------------------------------- + + /** + * Pattern text + * @stable ICU 2.0 + */ + UnicodeString m_pattern_; + /** + * String search struct data + * @stable ICU 2.0 + */ + UStringSearch *m_strsrch_; + +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif + diff --git a/intl/icu/source/i18n/unicode/tblcoll.h b/intl/icu/source/i18n/unicode/tblcoll.h new file mode 100644 index 000000000..c48ea38c1 --- /dev/null +++ b/intl/icu/source/i18n/unicode/tblcoll.h @@ -0,0 +1,877 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1996-2016, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +*/ + +/** + * \file + * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class. + */ + +/** +* File tblcoll.h +* +* Created by: Helena Shih +* +* Modification History: +* +* Date Name Description +* 2/5/97 aliu Added streamIn and streamOut methods. Added +* constructor which reads RuleBasedCollator object from +* a binary file. Added writeToFile method which streams +* RuleBasedCollator out to a binary file. The streamIn +* and streamOut methods use istream and ostream objects +* in binary mode. +* 2/12/97 aliu Modified to use TableCollationData sub-object to +* hold invariant data. +* 2/13/97 aliu Moved several methods into this class from Collation. +* Added a private RuleBasedCollator(Locale&) constructor, +* to be used by Collator::createDefault(). General +* clean up. +* 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy +* constructor and getDynamicClassID. +* 3/5/97 aliu Modified constructFromFile() to add parameter +* specifying whether or not binary loading is to be +* attempted. This is required for dynamic rule loading. +* 05/07/97 helena Added memory allocation error detection. +* 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to +* use MergeCollation::getPattern. +* 6/20/97 helena Java class name change. +* 8/18/97 helena Added internal API documentation. +* 09/03/97 helena Added createCollationKeyValues(). +* 02/10/98 damiba Added compare with "length" parameter +* 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java +* 04/23/99 stephen Removed EDecompositionMode, merged with +* Normalizer::EMode +* 06/14/99 stephen Removed kResourceBundleSuffix +* 11/02/99 helena Collator performance enhancements. Eliminates the +* UnicodeString construction and special case for NO_OP. +* 11/23/99 srl More performance enhancements. Updates to NormalizerIterator +* internal state management. +* 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator +* to implementation file. +* 01/29/01 synwee Modified into a C++ wrapper which calls C API +* (ucol.h) +* 2012-2014 markus Rewritten in C++ again. +*/ + +#ifndef TBLCOLL_H +#define TBLCOLL_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "unicode/coll.h" +#include "unicode/locid.h" +#include "unicode/uiter.h" +#include "unicode/ucol.h" + +U_NAMESPACE_BEGIN + +struct CollationCacheEntry; +struct CollationData; +struct CollationSettings; +struct CollationTailoring; +/** +* @stable ICU 2.0 +*/ +class StringSearch; +/** +* @stable ICU 2.0 +*/ +class CollationElementIterator; +class CollationKey; +class SortKeyByteSink; +class UnicodeSet; +class UnicodeString; +class UVector64; + +/** + * The RuleBasedCollator class provides the implementation of + * Collator, using data-driven tables. The user can create a customized + * table-based collation. + * <p> + * For more information about the collation service see + * <a href="http://userguide.icu-project.org/collation">the User Guide</a>. + * <p> + * Collation service provides correct sorting orders for most locales supported in ICU. + * If specific data for a locale is not available, the orders eventually falls back + * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. + * <p> + * Sort ordering may be customized by providing your own set of rules. For more on + * this subject see the <a href="http://userguide.icu-project.org/collation/customization"> + * Collation Customization</a> section of the User Guide. + * <p> + * Note, RuleBasedCollator is not to be subclassed. + * @see Collator + */ +class U_I18N_API RuleBasedCollator : public Collator { +public: + /** + * RuleBasedCollator constructor. This takes the table rules and builds a + * collation table out of them. Please see RuleBasedCollator class + * description for more details on the collation rule syntax. + * @param rules the collation rules to build the collation table from. + * @param status reporting a success or an error. + * @stable ICU 2.0 + */ + RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); + + /** + * RuleBasedCollator constructor. This takes the table rules and builds a + * collation table out of them. Please see RuleBasedCollator class + * description for more details on the collation rule syntax. + * @param rules the collation rules to build the collation table from. + * @param collationStrength strength for comparison + * @param status reporting a success or an error. + * @stable ICU 2.0 + */ + RuleBasedCollator(const UnicodeString& rules, + ECollationStrength collationStrength, + UErrorCode& status); + + /** + * RuleBasedCollator constructor. This takes the table rules and builds a + * collation table out of them. Please see RuleBasedCollator class + * description for more details on the collation rule syntax. + * @param rules the collation rules to build the collation table from. + * @param decompositionMode the normalisation mode + * @param status reporting a success or an error. + * @stable ICU 2.0 + */ + RuleBasedCollator(const UnicodeString& rules, + UColAttributeValue decompositionMode, + UErrorCode& status); + + /** + * RuleBasedCollator constructor. This takes the table rules and builds a + * collation table out of them. Please see RuleBasedCollator class + * description for more details on the collation rule syntax. + * @param rules the collation rules to build the collation table from. + * @param collationStrength strength for comparison + * @param decompositionMode the normalisation mode + * @param status reporting a success or an error. + * @stable ICU 2.0 + */ + RuleBasedCollator(const UnicodeString& rules, + ECollationStrength collationStrength, + UColAttributeValue decompositionMode, + UErrorCode& status); + +#ifndef U_HIDE_INTERNAL_API + /** + * TODO: document & propose as public API + * @internal + */ + RuleBasedCollator(const UnicodeString &rules, + UParseError &parseError, UnicodeString &reason, + UErrorCode &errorCode); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Copy constructor. + * @param other the RuleBasedCollator object to be copied + * @stable ICU 2.0 + */ + RuleBasedCollator(const RuleBasedCollator& other); + + + /** Opens a collator from a collator binary image created using + * cloneBinary. Binary image used in instantiation of the + * collator remains owned by the user and should stay around for + * the lifetime of the collator. The API also takes a base collator + * which must be the root collator. + * @param bin binary image owned by the user and required through the + * lifetime of the collator + * @param length size of the image. If negative, the API will try to + * figure out the length of the image + * @param base Base collator, for lookup of untailored characters. + * Must be the root collator, must not be NULL. + * The base is required to be present through the lifetime of the collator. + * @param status for catching errors + * @return newly created collator + * @see cloneBinary + * @stable ICU 3.4 + */ + RuleBasedCollator(const uint8_t *bin, int32_t length, + const RuleBasedCollator *base, + UErrorCode &status); + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~RuleBasedCollator(); + + /** + * Assignment operator. + * @param other other RuleBasedCollator object to copy from. + * @stable ICU 2.0 + */ + RuleBasedCollator& operator=(const RuleBasedCollator& other); + + /** + * Returns true if argument is the same as this object. + * @param other Collator object to be compared. + * @return true if arguments is the same as this object. + * @stable ICU 2.0 + */ + virtual UBool operator==(const Collator& other) const; + + /** + * Makes a copy of this object. + * @return a copy of this object, owned by the caller + * @stable ICU 2.0 + */ + virtual Collator* clone(void) const; + + /** + * Creates a collation element iterator for the source string. The caller of + * this method is responsible for the memory management of the return + * pointer. + * @param source the string over which the CollationElementIterator will + * iterate. + * @return the collation element iterator of the source string using this as + * the based Collator. + * @stable ICU 2.2 + */ + virtual CollationElementIterator* createCollationElementIterator( + const UnicodeString& source) const; + + /** + * Creates a collation element iterator for the source. The caller of this + * method is responsible for the memory management of the returned pointer. + * @param source the CharacterIterator which produces the characters over + * which the CollationElementItgerator will iterate. + * @return the collation element iterator of the source using this as the + * based Collator. + * @stable ICU 2.2 + */ + virtual CollationElementIterator* createCollationElementIterator( + const CharacterIterator& source) const; + + // Make deprecated versions of Collator::compare() visible. + using Collator::compare; + + /** + * The comparison function compares the character data stored in two + * different strings. Returns information about whether a string is less + * than, greater than or equal to another string. + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source is greater + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less + * than target + * @stable ICU 2.6 + **/ + virtual UCollationResult compare(const UnicodeString& source, + const UnicodeString& target, + UErrorCode &status) const; + + /** + * Does the same thing as compare but limits the comparison to a specified + * length + * @param source the source string to be compared with. + * @param target the string that is to be compared with the source string. + * @param length the length the comparison is limited to + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source (up to the specified + * length) is greater than target; UCOL_EQUAL if source (up to specified + * length) is equal to target; UCOL_LESS if source (up to the specified + * length) is less than target. + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const UnicodeString& source, + const UnicodeString& target, + int32_t length, + UErrorCode &status) const; + + /** + * The comparison function compares the character data stored in two + * different string arrays. Returns information about whether a string array + * is less than, greater than or equal to another string array. + * @param source the source string array to be compared with. + * @param sourceLength the length of the source string array. If this value + * is equal to -1, the string array is null-terminated. + * @param target the string that is to be compared with the source string. + * @param targetLength the length of the target string array. If this value + * is equal to -1, the string array is null-terminated. + * @param status possible error code + * @return Returns an enum value. UCOL_GREATER if source is greater + * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less + * than target + * @stable ICU 2.6 + */ + virtual UCollationResult compare(const UChar* source, int32_t sourceLength, + const UChar* target, int32_t targetLength, + UErrorCode &status) const; + + /** + * Compares two strings using the Collator. + * Returns whether the first one compares less than/equal to/greater than + * the second one. + * This version takes UCharIterator input. + * @param sIter the first ("source") string iterator + * @param tIter the second ("target") string iterator + * @param status ICU status + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER + * @stable ICU 4.2 + */ + virtual UCollationResult compare(UCharIterator &sIter, + UCharIterator &tIter, + UErrorCode &status) const; + + /** + * Compares two UTF-8 strings using the Collator. + * Returns whether the first one compares less than/equal to/greater than + * the second one. + * This version takes UTF-8 input. + * Note that a StringPiece can be implicitly constructed + * from a std::string or a NUL-terminated const char * string. + * @param source the first UTF-8 string + * @param target the second UTF-8 string + * @param status ICU status + * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER + * @stable ICU 51 + */ + virtual UCollationResult compareUTF8(const StringPiece &source, + const StringPiece &target, + UErrorCode &status) const; + + /** + * Transforms the string into a series of characters + * that can be compared with CollationKey.compare(). + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source the source string. + * @param key the transformed key of the source string. + * @param status the error code status. + * @return the transformed key. + * @see CollationKey + * @stable ICU 2.0 + */ + virtual CollationKey& getCollationKey(const UnicodeString& source, + CollationKey& key, + UErrorCode& status) const; + + /** + * Transforms a specified region of the string into a series of characters + * that can be compared with CollationKey.compare. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source the source string. + * @param sourceLength the length of the source string. + * @param key the transformed key of the source string. + * @param status the error code status. + * @return the transformed key. + * @see CollationKey + * @stable ICU 2.0 + */ + virtual CollationKey& getCollationKey(const UChar *source, + int32_t sourceLength, + CollationKey& key, + UErrorCode& status) const; + + /** + * Generates the hash code for the rule-based collation object. + * @return the hash code. + * @stable ICU 2.0 + */ + virtual int32_t hashCode() const; + + /** + * Gets the locale of the Collator + * @param type can be either requested, valid or actual locale. For more + * information see the definition of ULocDataLocaleType in + * uloc.h + * @param status the error code status. + * @return locale where the collation data lives. If the collator + * was instantiated from rules, locale is empty. + * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback + */ + virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + + /** + * Gets the tailoring rules for this collator. + * @return the collation tailoring from which this collator was created + * @stable ICU 2.0 + */ + const UnicodeString& getRules() const; + + /** + * Gets the version information for a Collator. + * @param info the version # information, the result will be filled in + * @stable ICU 2.0 + */ + virtual void getVersion(UVersionInfo info) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Returns the maximum length of any expansion sequences that end with the + * specified comparison order. + * + * This is specific to the kind of collation element values and sequences + * returned by the CollationElementIterator. + * Call CollationElementIterator::getMaxExpansion() instead. + * + * @param order a collation order returned by CollationElementIterator::previous + * or CollationElementIterator::next. + * @return maximum size of the expansion sequences ending with the collation + * element, or 1 if the collation element does not occur at the end of + * any expansion sequence + * @see CollationElementIterator#getMaxExpansion + * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead. + */ + int32_t getMaxExpansion(int32_t order) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * @return The class ID for this object. All objects of a given class have + * the same class ID. Objects of other classes have different class + * IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Returns the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * Base* polymorphic_pointer = createPolymorphicObject(); + * if (polymorphic_pointer->getDynamicClassID() == + * Derived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Do not use this method: The caller and the ICU library might use different heaps. + * Use cloneBinary() instead which writes to caller-provided memory. + * + * Returns a binary format of this collator. + * @param length Returns the length of the data, in bytes + * @param status the error code status. + * @return memory, owned by the caller, of size 'length' bytes. + * @deprecated ICU 52. Use cloneBinary() instead. + */ + uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** Creates a binary image of a collator. This binary image can be stored and + * later used to instantiate a collator using ucol_openBinary. + * This API supports preflighting. + * @param buffer a fill-in buffer to receive the binary image + * @param capacity capacity of the destination buffer + * @param status for catching errors + * @return size of the image + * @see ucol_openBinary + * @stable ICU 3.4 + */ + int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const; + + /** + * Returns current rules. Delta defines whether full rules are returned or + * just the tailoring. + * + * getRules(void) should normally be used instead. + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales + * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. + * @param buffer UnicodeString to store the result rules + * @stable ICU 2.2 + * @see UCOL_FULL_RULES + */ + void getRules(UColRuleOption delta, UnicodeString &buffer) const; + + /** + * Universal attribute setter + * @param attr attribute type + * @param value attribute value + * @param status to indicate whether the operation went on smoothly or there were errors + * @stable ICU 2.2 + */ + virtual void setAttribute(UColAttribute attr, UColAttributeValue value, + UErrorCode &status); + + /** + * Universal attribute getter. + * @param attr attribute type + * @param status to indicate whether the operation went on smoothly or there were errors + * @return attribute value + * @stable ICU 2.2 + */ + virtual UColAttributeValue getAttribute(UColAttribute attr, + UErrorCode &status) const; + + /** + * Sets the variable top to the top of the specified reordering group. + * The variable top determines the highest-sorting character + * which is affected by UCOL_ALTERNATE_HANDLING. + * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. + * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, + * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; + * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @see getMaxVariable + * @stable ICU 53 + */ + virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode); + + /** + * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. + * @return the maximum variable reordering group. + * @see setMaxVariable + * @stable ICU 53 + */ + virtual UColReorderCode getMaxVariable() const; + + /** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop one or more (if contraction) UChars to which the variable top should be set + * @param len length of variable top string. If -1 it is considered to be zero terminated. + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> + * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by setMaxVariable() + * @return variable top primary weight + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); + + /** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set + * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> + * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by setMaxVariable() + * @return variable top primary weight + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status); + + /** + * Sets the variable top to the specified primary weight. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See setMaxVariable(). + * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop + * @param status error code + * @deprecated ICU 53 Call setMaxVariable() instead. + */ + virtual void setVariableTop(uint32_t varTop, UErrorCode &status); + + /** + * Gets the variable top value of a Collator. + * @param status error code (not changed by function). If error code is set, the return value is undefined. + * @return the variable top primary weight + * @see getMaxVariable + * @stable ICU 2.0 + */ + virtual uint32_t getVariableTop(UErrorCode &status) const; + + /** + * Get a UnicodeSet that contains all the characters and sequences tailored in + * this collator. + * @param status error code of the operation + * @return a pointer to a UnicodeSet object containing all the + * code points and sequences that may sort differently than + * in the root collator. The object must be disposed of by using delete + * @stable ICU 2.4 + */ + virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; + + /** + * Get the sort key as an array of bytes from a UnicodeString. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source string to be processed. + * @param result buffer to store result in. If NULL, number of bytes needed + * will be returned. + * @param resultLength length of the result buffer. If if not enough the + * buffer will be filled to capacity. + * @return Number of bytes needed for storing the sort key + * @stable ICU 2.0 + */ + virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, + int32_t resultLength) const; + + /** + * Get the sort key as an array of bytes from a UChar buffer. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * @param source string to be processed. + * @param sourceLength length of string to be processed. If -1, the string + * is 0 terminated and length will be decided by the function. + * @param result buffer to store result in. If NULL, number of bytes needed + * will be returned. + * @param resultLength length of the result buffer. If if not enough the + * buffer will be filled to capacity. + * @return Number of bytes needed for storing the sort key + * @stable ICU 2.2 + */ + virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, + uint8_t *result, int32_t resultLength) const; + + /** + * Retrieves the reordering codes for this collator. + * @param dest The array to fill with the script ordering. + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function + * will only return the length of the result without writing any codes (pre-flighting). + * @param status A reference to an error code value, which must not indicate + * a failure before the function call. + * @return The length of the script ordering array. + * @see ucol_setReorderCodes + * @see Collator#getEquivalentReorderCodes + * @see Collator#setReorderCodes + * @stable ICU 4.8 + */ + virtual int32_t getReorderCodes(int32_t *dest, + int32_t destCapacity, + UErrorCode& status) const; + + /** + * Sets the ordering of scripts for this collator. + * @param reorderCodes An array of script codes in the new order. This can be NULL if the + * length is also set to 0. An empty array will clear any reordering codes on the collator. + * @param reorderCodesLength The length of reorderCodes. + * @param status error code + * @see ucol_setReorderCodes + * @see Collator#getReorderCodes + * @see Collator#getEquivalentReorderCodes + * @stable ICU 4.8 + */ + virtual void setReorderCodes(const int32_t* reorderCodes, + int32_t reorderCodesLength, + UErrorCode& status) ; + + /** + * Implements ucol_strcollUTF8(). + * @internal + */ + virtual UCollationResult internalCompareUTF8( + const char *left, int32_t leftLength, + const char *right, int32_t rightLength, + UErrorCode &errorCode) const; + + /** Get the short definition string for a collator. This internal API harvests the collator's + * locale and the attribute set and produces a string that can be used for opening + * a collator with the same attributes using the ucol_openFromShortString API. + * This string will be normalized. + * The structure and the syntax of the string is defined in the "Naming collators" + * section of the users guide: + * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme + * This function supports preflighting. + * + * This is internal, and intended to be used with delegate converters. + * + * @param locale a locale that will appear as a collators locale in the resulting + * short string definition. If NULL, the locale will be harvested + * from the collator. + * @param buffer space to hold the resulting string + * @param capacity capacity of the buffer + * @param status for returning errors. All the preflighting errors are featured + * @return length of the resulting string + * @see ucol_openFromShortString + * @see ucol_normalizeShortDefinitionString + * @see ucol_getShortDefinitionString + * @internal + */ + virtual int32_t internalGetShortDefinitionString(const char *locale, + char *buffer, + int32_t capacity, + UErrorCode &status) const; + + /** + * Implements ucol_nextSortKeyPart(). + * @internal + */ + virtual int32_t internalNextSortKeyPart( + UCharIterator *iter, uint32_t state[2], + uint8_t *dest, int32_t count, UErrorCode &errorCode) const; + + // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API + /** + * Only for use in ucol_openRules(). + * @internal + */ + RuleBasedCollator(); + +#ifndef U_HIDE_INTERNAL_API + /** + * Implements ucol_getLocaleByType(). + * Needed because the lifetime of the locale ID string must match that of the collator. + * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper. + * @internal + */ + const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const; + + /** + * Implements ucol_getContractionsAndExpansions(). + * Gets this collator's sets of contraction strings and/or + * characters and strings that map to multiple collation elements (expansions). + * If addPrefixes is TRUE, then contractions that are expressed as + * prefix/pre-context rules are included. + * @param contractions if not NULL, the set to hold the contractions + * @param expansions if not NULL, the set to hold the expansions + * @param addPrefixes include prefix contextual mappings + * @param errorCode in/out ICU error code + * @internal + */ + void internalGetContractionsAndExpansions( + UnicodeSet *contractions, UnicodeSet *expansions, + UBool addPrefixes, UErrorCode &errorCode) const; + + /** + * Adds the contractions that start with character c to the set. + * Ignores prefixes. Used by AlphabeticIndex. + * @internal + */ + void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const; + + /** + * Implements from-rule constructors, and ucol_openRules(). + * @internal + */ + void internalBuildTailoring( + const UnicodeString &rules, + int32_t strength, + UColAttributeValue decompositionMode, + UParseError *outParseError, UnicodeString *outReason, + UErrorCode &errorCode); + + /** @internal */ + static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) { + return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc)); + } + /** @internal */ + static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) { + return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc)); + } + + /** + * Appends the CEs for the string to the vector. + * @internal for tests & tools + */ + void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const; +#endif // U_HIDE_INTERNAL_API + +protected: + /** + * Used internally by registration to define the requested and valid locales. + * @param requestedLocale the requested locale + * @param validLocale the valid locale + * @param actualLocale the actual locale + * @internal + */ + virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); + +private: + friend class CollationElementIterator; + friend class Collator; + + RuleBasedCollator(const CollationCacheEntry *entry); + + /** + * Enumeration of attributes that are relevant for short definition strings + * (e.g., ucol_getShortDefinitionString()). + * Effectively extends UColAttribute. + */ + enum Attributes { + ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT, + ATTR_LIMIT + }; + + void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode); + + // Both lengths must be <0 or else both must be >=0. + UCollationResult doCompare(const UChar *left, int32_t leftLength, + const UChar *right, int32_t rightLength, + UErrorCode &errorCode) const; + UCollationResult doCompare(const uint8_t *left, int32_t leftLength, + const uint8_t *right, int32_t rightLength, + UErrorCode &errorCode) const; + + void writeSortKey(const UChar *s, int32_t length, + SortKeyByteSink &sink, UErrorCode &errorCode) const; + + void writeIdenticalLevel(const UChar *s, const UChar *limit, + SortKeyByteSink &sink, UErrorCode &errorCode) const; + + const CollationSettings &getDefaultSettings() const; + + void setAttributeDefault(int32_t attribute) { + explicitlySetAttributes &= ~((uint32_t)1 << attribute); + } + void setAttributeExplicitly(int32_t attribute) { + explicitlySetAttributes |= (uint32_t)1 << attribute; + } + UBool attributeHasBeenSetExplicitly(int32_t attribute) const { + // assert(0 <= attribute < ATTR_LIMIT); + return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0); + } + + /** + * Tests whether a character is "unsafe" for use as a collation starting point. + * + * @param c code point or code unit + * @return TRUE if c is unsafe + * @see CollationElementIterator#setOffset(int) + */ + UBool isUnsafe(UChar32 c) const; + + static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode); + UBool initMaxExpansions(UErrorCode &errorCode) const; + + void setFastLatinOptions(CollationSettings &ownedSettings) const; + + const CollationData *data; + const CollationSettings *settings; // reference-counted + const CollationTailoring *tailoring; // alias of cacheEntry->tailoring + const CollationCacheEntry *cacheEntry; // reference-counted + Locale validLocale; + uint32_t explicitlySetAttributes; + + UBool actualLocaleIsSameAsValid; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_COLLATION +#endif // TBLCOLL_H diff --git a/intl/icu/source/i18n/unicode/timezone.h b/intl/icu/source/i18n/unicode/timezone.h new file mode 100644 index 000000000..4254cc7c9 --- /dev/null +++ b/intl/icu/source/i18n/unicode/timezone.h @@ -0,0 +1,968 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/************************************************************************* +* Copyright (c) 1997-2016, International Business Machines Corporation +* and others. All Rights Reserved. +************************************************************************** +* +* File TIMEZONE.H +* +* Modification History: +* +* Date Name Description +* 04/21/97 aliu Overhauled header. +* 07/09/97 helena Changed createInstance to createDefault. +* 08/06/97 aliu Removed dependency on internal header for Hashtable. +* 08/10/98 stephen Changed getDisplayName() API conventions to match +* 08/19/98 stephen Changed createTimeZone() to never return 0 +* 09/02/98 stephen Sync to JDK 1.2 8/31 +* - Added getOffset(... monthlen ...) +* - Added hasSameRules() +* 09/15/98 stephen Added getStaticClassID +* 12/03/99 aliu Moved data out of static table into icudata.dll. +* Hashtable replaced by new static data structures. +* 12/14/99 aliu Made GMT public. +* 08/15/01 grhoten Made GMT private and added the getGMT() function +************************************************************************** +*/ + +#ifndef TIMEZONE_H +#define TIMEZONE_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: TimeZone object + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/ures.h" +#include "unicode/ucal.h" + +U_NAMESPACE_BEGIN + +class StringEnumeration; + +/** + * + * <code>TimeZone</code> represents a time zone offset, and also figures out daylight + * savings. + * + * <p> + * Typically, you get a <code>TimeZone</code> using <code>createDefault</code> + * which creates a <code>TimeZone</code> based on the time zone where the program + * is running. For example, for a program running in Japan, <code>createDefault</code> + * creates a <code>TimeZone</code> object based on Japanese Standard Time. + * + * <p> + * You can also get a <code>TimeZone</code> using <code>createTimeZone</code> along + * with a time zone ID. For instance, the time zone ID for the US Pacific + * Time zone is "America/Los_Angeles". So, you can get a Pacific Time <code>TimeZone</code> object + * with: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * TimeZone *tz = TimeZone::createTimeZone("America/Los_Angeles"); + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * You can use the <code>createEnumeration</code> method to iterate through + * all the supported time zone IDs, or the <code>getCanonicalID</code> method to check + * if a time zone ID is supported or not. You can then choose a + * supported ID to get a <code>TimeZone</code>. + * If the time zone you want is not represented by one of the + * supported IDs, then you can create a custom time zone ID with + * the following syntax: + * + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * GMT[+|-]hh[[:]mm] + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * For example, you might specify GMT+14:00 as a custom + * time zone ID. The <code>TimeZone</code> that is returned + * when you specify a custom time zone ID uses the specified + * offset from GMT(=UTC) and does not observe daylight saving + * time. For example, you might specify GMT+14:00 as a custom + * time zone ID to create a TimeZone representing 14 hours ahead + * of GMT (with no daylight saving time). In addition, + * <code>getCanonicalID</code> can also be used to + * normalize a custom time zone ID. + * + * TimeZone is an abstract class representing a time zone. A TimeZone is needed for + * Calendar to produce local time for a particular time zone. A TimeZone comprises + * three basic pieces of information: + * <ul> + * <li>A time zone offset; that, is the number of milliseconds to add or subtract + * from a time expressed in terms of GMT to convert it to the same time in that + * time zone (without taking daylight savings time into account).</li> + * <li>Logic necessary to take daylight savings time into account if daylight savings + * time is observed in that time zone (e.g., the days and hours on which daylight + * savings time begins and ends).</li> + * <li>An ID. This is a text string that uniquely identifies the time zone.</li> + * </ul> + * + * (Only the ID is actually implemented in TimeZone; subclasses of TimeZone may handle + * daylight savings time and GMT offset in different ways. Currently we have the following + * TimeZone subclasses: RuleBasedTimeZone, SimpleTimeZone, and VTimeZone.) + * <P> + * The TimeZone class contains a static list containing a TimeZone object for every + * combination of GMT offset and daylight-savings time rules currently in use in the + * world, each with a unique ID. Each ID consists of a region (usually a continent or + * ocean) and a city in that region, separated by a slash, (for example, US Pacific + * Time is "America/Los_Angeles.") Because older versions of this class used + * three- or four-letter abbreviations instead, there is also a table that maps the older + * abbreviations to the newer ones (for example, "PST" maps to "America/Los_Angeles"). + * Anywhere the API requires an ID, you can use either form. + * <P> + * To create a new TimeZone, you call the factory function TimeZone::createTimeZone() + * and pass it a time zone ID. You can use the createEnumeration() function to + * obtain a list of all the time zone IDs recognized by createTimeZone(). + * <P> + * You can also use TimeZone::createDefault() to create a TimeZone. This function uses + * platform-specific APIs to produce a TimeZone for the time zone corresponding to + * the client's computer's physical location. For example, if you're in Japan (assuming + * your machine is set up correctly), TimeZone::createDefault() will return a TimeZone + * for Japanese Standard Time ("Asia/Tokyo"). + */ +class U_I18N_API TimeZone : public UObject { +public: + /** + * @stable ICU 2.0 + */ + virtual ~TimeZone(); + + /** + * Returns the "unknown" time zone. + * It behaves like the GMT/UTC time zone but has the + * <code>UCAL_UNKNOWN_ZONE_ID</code> = "Etc/Unknown". + * createTimeZone() returns a mutable clone of this time zone if the input ID is not recognized. + * + * @return the "unknown" time zone. + * @see UCAL_UNKNOWN_ZONE_ID + * @see createTimeZone + * @see getGMT + * @stable ICU 49 + */ + static const TimeZone& U_EXPORT2 getUnknown(); + + /** + * The GMT (=UTC) time zone has a raw offset of zero and does not use daylight + * savings time. This is a commonly used time zone. + * + * <p>Note: For backward compatibility reason, the ID used by the time + * zone returned by this method is "GMT", although the ICU's canonical + * ID for the GMT time zone is "Etc/GMT". + * + * @return the GMT/UTC time zone. + * @see getUnknown + * @stable ICU 2.0 + */ + static const TimeZone* U_EXPORT2 getGMT(void); + + /** + * Creates a <code>TimeZone</code> for the given ID. + * @param ID the ID for a <code>TimeZone</code>, such as "America/Los_Angeles", + * or a custom ID such as "GMT-8:00". + * @return the specified <code>TimeZone</code>, or a mutable clone of getUnknown() + * if the given ID cannot be understood or if the given ID is "Etc/Unknown". + * The return result is guaranteed to be non-NULL. + * If you require that the specific zone asked for be returned, + * compare the result with getUnknown() or check the ID of the return result. + * @stable ICU 2.0 + */ + static TimeZone* U_EXPORT2 createTimeZone(const UnicodeString& ID); + + /** + * Returns an enumeration over system time zone IDs with the given + * filter conditions. + * @param zoneType The system time zone type. + * @param region The ISO 3166 two-letter country code or UN M.49 + * three-digit area code. When NULL, no filtering + * done by region. + * @param rawOffset An offset from GMT in milliseconds, ignoring + * the effect of daylight savings time, if any. + * When NULL, no filtering done by zone offset. + * @param ec Output param to filled in with a success or + * an error. + * @return an enumeration object, owned by the caller. + * @stable ICU 4.8 + */ + static StringEnumeration* U_EXPORT2 createTimeZoneIDEnumeration( + USystemTimeZoneType zoneType, + const char* region, + const int32_t* rawOffset, + UErrorCode& ec); + + /** + * Returns an enumeration over all recognized time zone IDs. (i.e., + * all strings that createTimeZone() accepts) + * + * @return an enumeration object, owned by the caller. + * @stable ICU 2.4 + */ + static StringEnumeration* U_EXPORT2 createEnumeration(); + + /** + * Returns an enumeration over time zone IDs with a given raw + * offset from GMT. There may be several times zones with the + * same GMT offset that differ in the way they handle daylight + * savings time. For example, the state of Arizona doesn't + * observe daylight savings time. If you ask for the time zone + * IDs corresponding to GMT-7:00, you'll get back an enumeration + * over two time zone IDs: "America/Denver," which corresponds to + * Mountain Standard Time in the winter and Mountain Daylight Time + * in the summer, and "America/Phoenix", which corresponds to + * Mountain Standard Time year-round, even in the summer. + * + * @param rawOffset an offset from GMT in milliseconds, ignoring + * the effect of daylight savings time, if any + * @return an enumeration object, owned by the caller + * @stable ICU 2.4 + */ + static StringEnumeration* U_EXPORT2 createEnumeration(int32_t rawOffset); + + /** + * Returns an enumeration over time zone IDs associated with the + * given country. Some zones are affiliated with no country + * (e.g., "UTC"); these may also be retrieved, as a group. + * + * @param country The ISO 3166 two-letter country code, or NULL to + * retrieve zones not affiliated with any country. + * @return an enumeration object, owned by the caller + * @stable ICU 2.4 + */ + static StringEnumeration* U_EXPORT2 createEnumeration(const char* country); + + /** + * Returns the number of IDs in the equivalency group that + * includes the given ID. An equivalency group contains zones + * that have the same GMT offset and rules. + * + * <p>The returned count includes the given ID; it is always >= 1. + * The given ID must be a system time zone. If it is not, returns + * zero. + * @param id a system time zone ID + * @return the number of zones in the equivalency group containing + * 'id', or zero if 'id' is not a valid system ID + * @see #getEquivalentID + * @stable ICU 2.0 + */ + static int32_t U_EXPORT2 countEquivalentIDs(const UnicodeString& id); + + /** + * Returns an ID in the equivalency group that + * includes the given ID. An equivalency group contains zones + * that have the same GMT offset and rules. + * + * <p>The given index must be in the range 0..n-1, where n is the + * value returned by <code>countEquivalentIDs(id)</code>. For + * some value of 'index', the returned value will be equal to the + * given id. If the given id is not a valid system time zone, or + * if 'index' is out of range, then returns an empty string. + * @param id a system time zone ID + * @param index a value from 0 to n-1, where n is the value + * returned by <code>countEquivalentIDs(id)</code> + * @return the ID of the index-th zone in the equivalency group + * containing 'id', or an empty string if 'id' is not a valid + * system ID or 'index' is out of range + * @see #countEquivalentIDs + * @stable ICU 2.0 + */ + static const UnicodeString U_EXPORT2 getEquivalentID(const UnicodeString& id, + int32_t index); + + /** + * Creates an instance of TimeZone detected from the current host + * system configuration. Note that ICU4C does not change the default + * time zone unless TimeZone::adoptDefault(TimeZone*) or + * TimeZone::setDefault(const TimeZone&) is explicitly called by a + * user. This method does not update the current ICU's default, + * and may return a different TimeZone from the one returned by + * TimeZone::createDefault(). + * + * @return A new instance of TimeZone detected from the current host system + * configuration. + * @stable ICU 55 + */ + static TimeZone* U_EXPORT2 detectHostTimeZone(); + + /** + * Creates a new copy of the default TimeZone for this host. Unless the default time + * zone has already been set using adoptDefault() or setDefault(), the default is + * determined by querying the system using methods in TPlatformUtilities. If the + * system routines fail, or if they specify a TimeZone or TimeZone offset which is not + * recognized, the TimeZone indicated by the ID kLastResortID is instantiated + * and made the default. + * + * @return A default TimeZone. Clients are responsible for deleting the time zone + * object returned. + * @stable ICU 2.0 + */ + static TimeZone* U_EXPORT2 createDefault(void); + +#define ICU_TZ_HAS_RECREATE_DEFAULT + static void U_EXPORT2 recreateDefault(); + + /** + * Sets the default time zone (i.e., what's returned by createDefault()) to be the + * specified time zone. If NULL is specified for the time zone, the default time + * zone is set to the default host time zone. This call adopts the TimeZone object + * passed in; the client is no longer responsible for deleting it. + * + * <p>This function is not thread safe. It is an error for multiple threads + * to concurrently attempt to set the default time zone, or for any thread + * to attempt to reference the default zone while another thread is setting it. + * + * @param zone A pointer to the new TimeZone object to use as the default. + * @stable ICU 2.0 + */ + static void U_EXPORT2 adoptDefault(TimeZone* zone); + +#ifndef U_HIDE_SYSTEM_API + /** + * Same as adoptDefault(), except that the TimeZone object passed in is NOT adopted; + * the caller remains responsible for deleting it. + * + * <p>See the thread safety note under adoptDefault(). + * + * @param zone The given timezone. + * @system + * @stable ICU 2.0 + */ + static void U_EXPORT2 setDefault(const TimeZone& zone); +#endif /* U_HIDE_SYSTEM_API */ + + /** + * Returns the timezone data version currently used by ICU. + * @param status Output param to filled in with a success or an error. + * @return the version string, such as "2007f" + * @stable ICU 3.8 + */ + static const char* U_EXPORT2 getTZDataVersion(UErrorCode& status); + + /** + * Returns the canonical system timezone ID or the normalized + * custom time zone ID for the given time zone ID. + * @param id The input time zone ID to be canonicalized. + * @param canonicalID Receives the canonical system time zone ID + * or the custom time zone ID in normalized format. + * @param status Receives the status. When the given time zone ID + * is neither a known system time zone ID nor a + * valid custom time zone ID, U_ILLEGAL_ARGUMENT_ERROR + * is set. + * @return A reference to the result. + * @stable ICU 4.0 + */ + static UnicodeString& U_EXPORT2 getCanonicalID(const UnicodeString& id, + UnicodeString& canonicalID, UErrorCode& status); + + /** + * Returns the canonical system time zone ID or the normalized + * custom time zone ID for the given time zone ID. + * @param id The input time zone ID to be canonicalized. + * @param canonicalID Receives the canonical system time zone ID + * or the custom time zone ID in normalized format. + * @param isSystemID Receives if the given ID is a known system + * time zone ID. + * @param status Receives the status. When the given time zone ID + * is neither a known system time zone ID nor a + * valid custom time zone ID, U_ILLEGAL_ARGUMENT_ERROR + * is set. + * @return A reference to the result. + * @stable ICU 4.0 + */ + static UnicodeString& U_EXPORT2 getCanonicalID(const UnicodeString& id, + UnicodeString& canonicalID, UBool& isSystemID, UErrorCode& status); + + /** + * Converts a system time zone ID to an equivalent Windows time zone ID. For example, + * Windows time zone ID "Pacific Standard Time" is returned for input "America/Los_Angeles". + * + * <p>There are system time zones that cannot be mapped to Windows zones. When the input + * system time zone ID is unknown or unmappable to a Windows time zone, then the result will be + * empty, but the operation itself remains successful (no error status set on return). + * + * <p>This implementation utilizes <a href="http://unicode.org/cldr/charts/supplemental/zone_tzid.html"> + * Zone-Tzid mapping data</a>. The mapping data is updated time to time. To get the latest changes, + * please read the ICU user guide section <a href="http://userguide.icu-project.org/datetime/timezone#TOC-Updating-the-Time-Zone-Data"> + * Updating the Time Zone Data</a>. + * + * @param id A system time zone ID. + * @param winid Receives a Windows time zone ID. When the input system time zone ID is unknown + * or unmappable to a Windows time zone ID, then an empty string is set on return. + * @param status Receives the status. + * @return A reference to the result (<code>winid</code>). + * @see getIDForWindowsID + * + * @stable ICU 52 + */ + static UnicodeString& U_EXPORT2 getWindowsID(const UnicodeString& id, + UnicodeString& winid, UErrorCode& status); + + /** + * Converts a Windows time zone ID to an equivalent system time zone ID + * for a region. For example, system time zone ID "America/Los_Angeles" is returned + * for input Windows ID "Pacific Standard Time" and region "US" (or <code>null</code>), + * "America/Vancouver" is returned for the same Windows ID "Pacific Standard Time" and + * region "CA". + * + * <p>Not all Windows time zones can be mapped to system time zones. When the input + * Windows time zone ID is unknown or unmappable to a system time zone, then the result + * will be empty, but the operation itself remains successful (no error status set on return). + * + * <p>This implementation utilizes <a href="http://unicode.org/cldr/charts/supplemental/zone_tzid.html"> + * Zone-Tzid mapping data</a>. The mapping data is updated time to time. To get the latest changes, + * please read the ICU user guide section <a href="http://userguide.icu-project.org/datetime/timezone#TOC-Updating-the-Time-Zone-Data"> + * Updating the Time Zone Data</a>. + * + * @param winid A Windows time zone ID. + * @param region A null-terminated region code, or <code>NULL</code> if no regional preference. + * @param id Receives a system time zone ID. When the input Windows time zone ID is unknown + * or unmappable to a system time zone ID, then an empty string is set on return. + * @param status Receives the status. + * @return A reference to the result (<code>id</code>). + * @see getWindowsID + * + * @stable ICU 52 + */ + static UnicodeString& U_EXPORT2 getIDForWindowsID(const UnicodeString& winid, const char* region, + UnicodeString& id, UErrorCode& status); + + /** + * Returns true if the two TimeZones are equal. (The TimeZone version only compares + * IDs, but subclasses are expected to also compare the fields they add.) + * + * @param that The TimeZone object to be compared with. + * @return True if the given TimeZone is equal to this TimeZone; false + * otherwise. + * @stable ICU 2.0 + */ + virtual UBool operator==(const TimeZone& that) const; + + /** + * Returns true if the two TimeZones are NOT equal; that is, if operator==() returns + * false. + * + * @param that The TimeZone object to be compared with. + * @return True if the given TimeZone is not equal to this TimeZone; false + * otherwise. + * @stable ICU 2.0 + */ + UBool operator!=(const TimeZone& that) const {return !operator==(that);} + + /** + * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time in this time zone, taking daylight savings time into + * account) as of a particular reference date. The reference date is used to determine + * whether daylight savings time is in effect and needs to be figured into the offset + * that is returned (in other words, what is the adjusted GMT offset in this time zone + * at this particular date and time?). For the time zones produced by createTimeZone(), + * the reference data is specified according to the Gregorian calendar, and the date + * and time fields are local standard time. + * + * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload, + * which returns both the raw and the DST offset for a given time. This method + * is retained only for backward compatibility. + * + * @param era The reference date's era + * @param year The reference date's year + * @param month The reference date's month (0-based; 0 is January) + * @param day The reference date's day-in-month (1-based) + * @param dayOfWeek The reference date's day-of-week (1-based; 1 is Sunday) + * @param millis The reference date's milliseconds in day, local standard time + * @param status Output param to filled in with a success or an error. + * @return The offset in milliseconds to add to GMT to get local time. + * @stable ICU 2.0 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const = 0; + + /** + * Gets the time zone offset, for current date, modified in case of + * daylight savings. This is the offset to add *to* UTC to get local time. + * + * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload, + * which returns both the raw and the DST offset for a given time. This method + * is retained only for backward compatibility. + * + * @param era the era of the given date. + * @param year the year in the given date. + * @param month the month in the given date. + * Month is 0-based. e.g., 0 for January. + * @param day the day-in-month of the given date. + * @param dayOfWeek the day-of-week of the given date. + * @param milliseconds the millis in day in <em>standard</em> local time. + * @param monthLength the length of the given month in days. + * @param status Output param to filled in with a success or an error. + * @return the offset to add *to* GMT to get local time. + * @stable ICU 2.0 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t milliseconds, + int32_t monthLength, UErrorCode& status) const = 0; + + /** + * Returns the time zone raw and GMT offset for the given moment + * in time. Upon return, local-millis = GMT-millis + rawOffset + + * dstOffset. All computations are performed in the proleptic + * Gregorian calendar. The default implementation in the TimeZone + * class delegates to the 8-argument getOffset(). + * + * @param date moment in time for which to return offsets, in + * units of milliseconds from January 1, 1970 0:00 GMT, either GMT + * time or local wall time, depending on `local'. + * @param local if true, `date' is local wall time; otherwise it + * is in GMT time. + * @param rawOffset output parameter to receive the raw offset, that + * is, the offset not including DST adjustments + * @param dstOffset output parameter to receive the DST offset, + * that is, the offset to be added to `rawOffset' to obtain the + * total offset between local and GMT time. If DST is not in + * effect, this value is zero; otherwise it is a positive value, + * typically one hour. + * @param ec input-output error code + * + * @stable ICU 2.8 + */ + virtual void getOffset(UDate date, UBool local, int32_t& rawOffset, + int32_t& dstOffset, UErrorCode& ec) const; + + /** + * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time, before taking daylight savings time into account). + * + * @param offsetMillis The new raw GMT offset for this time zone. + * @stable ICU 2.0 + */ + virtual void setRawOffset(int32_t offsetMillis) = 0; + + /** + * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time, before taking daylight savings time into account). + * + * @return The TimeZone's raw GMT offset. + * @stable ICU 2.0 + */ + virtual int32_t getRawOffset(void) const = 0; + + /** + * Fills in "ID" with the TimeZone's ID. + * + * @param ID Receives this TimeZone's ID. + * @return A reference to 'ID' + * @stable ICU 2.0 + */ + UnicodeString& getID(UnicodeString& ID) const; + + /** + * Sets the TimeZone's ID to the specified value. This doesn't affect any other + * fields (for example, if you say< + * blockquote><pre> + * . TimeZone* foo = TimeZone::createTimeZone("America/New_York"); + * . foo.setID("America/Los_Angeles"); + * </pre>\htmlonly</blockquote>\endhtmlonly + * the time zone's GMT offset and daylight-savings rules don't change to those for + * Los Angeles. They're still those for New York. Only the ID has changed.) + * + * @param ID The new time zone ID. + * @stable ICU 2.0 + */ + void setID(const UnicodeString& ID); + + /** + * Enum for use with getDisplayName + * @stable ICU 2.4 + */ + enum EDisplayType { + /** + * Selector for short display name + * @stable ICU 2.4 + */ + SHORT = 1, + /** + * Selector for long display name + * @stable ICU 2.4 + */ + LONG, + /** + * Selector for short generic display name + * @stable ICU 4.4 + */ + SHORT_GENERIC, + /** + * Selector for long generic display name + * @stable ICU 4.4 + */ + LONG_GENERIC, + /** + * Selector for short display name derived + * from time zone offset + * @stable ICU 4.4 + */ + SHORT_GMT, + /** + * Selector for long display name derived + * from time zone offset + * @stable ICU 4.4 + */ + LONG_GMT, + /** + * Selector for short display name derived + * from the time zone's fallback name + * @stable ICU 4.4 + */ + SHORT_COMMONLY_USED, + /** + * Selector for long display name derived + * from the time zone's fallback name + * @stable ICU 4.4 + */ + GENERIC_LOCATION + }; + + /** + * Returns a name of this time zone suitable for presentation to the user + * in the default locale. + * This method returns the long name, not including daylight savings. + * If the display name is not available for the locale, + * then this method returns a string in the localized GMT offset format + * such as <code>GMT[+-]HH:mm</code>. + * @param result the human-readable name of this time zone in the default locale. + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + UnicodeString& getDisplayName(UnicodeString& result) const; + + /** + * Returns a name of this time zone suitable for presentation to the user + * in the specified locale. + * This method returns the long name, not including daylight savings. + * If the display name is not available for the locale, + * then this method returns a string in the localized GMT offset format + * such as <code>GMT[+-]HH:mm</code>. + * @param locale the locale in which to supply the display name. + * @param result the human-readable name of this time zone in the given locale + * or in the default locale if the given locale is not recognized. + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + UnicodeString& getDisplayName(const Locale& locale, UnicodeString& result) const; + + /** + * Returns a name of this time zone suitable for presentation to the user + * in the default locale. + * If the display name is not available for the locale, + * then this method returns a string in the localized GMT offset format + * such as <code>GMT[+-]HH:mm</code>. + * @param daylight if true, return the daylight savings name. + * @param style + * @param result the human-readable name of this time zone in the default locale. + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + UnicodeString& getDisplayName(UBool daylight, EDisplayType style, UnicodeString& result) const; + + /** + * Returns a name of this time zone suitable for presentation to the user + * in the specified locale. + * If the display name is not available for the locale, + * then this method returns a string in the localized GMT offset format + * such as <code>GMT[+-]HH:mm</code>. + * @param daylight if true, return the daylight savings name. + * @param style + * @param locale the locale in which to supply the display name. + * @param result the human-readable name of this time zone in the given locale + * or in the default locale if the given locale is not recognized. + * @return A refence to 'result'. + * @stable ICU 2.0 + */ + UnicodeString& getDisplayName(UBool daylight, EDisplayType style, const Locale& locale, UnicodeString& result) const; + + /** + * Queries if this time zone uses daylight savings time. + * @return true if this time zone uses daylight savings time, + * false, otherwise. + * <p><strong>Note:</strong>The default implementation of + * ICU TimeZone uses the tz database, which supports historic + * rule changes, for system time zones. With the implementation, + * there are time zones that used daylight savings time in the + * past, but no longer used currently. For example, Asia/Tokyo has + * never used daylight savings time since 1951. Most clients would + * expect that this method to return <code>FALSE</code> for such case. + * The default implementation of this method returns <code>TRUE</code> + * when the time zone uses daylight savings time in the current + * (Gregorian) calendar year. + * <p>In Java 7, <code>observesDaylightTime()</code> was added in + * addition to <code>useDaylightTime()</code>. In Java, <code>useDaylightTime()</code> + * only checks if daylight saving time is observed by the last known + * rule. This specification might not be what most users would expect + * if daylight saving time is currently observed, but not scheduled + * in future. In this case, Java's <code>userDaylightTime()</code> returns + * <code>false</code>. To resolve the issue, Java 7 added <code>observesDaylightTime()</code>, + * which takes the current rule into account. The method <code>observesDaylightTime()</code> + * was added in ICU4J for supporting API signature compatibility with JDK. + * In general, ICU4C also provides JDK compatible methods, but the current + * implementation <code>userDaylightTime()</code> serves the purpose + * (takes the current rule into account), <code>observesDaylightTime()</code> + * is not added in ICU4C. In addition to <code>useDaylightTime()</code>, ICU4C + * <code>BasicTimeZone</code> class (Note that <code>TimeZone::createTimeZone(const UnicodeString &ID)</code> + * always returns a <code>BasicTimeZone</code>) provides a series of methods allowing + * historic and future time zone rule iteration, so you can check if daylight saving + * time is observed or not within a given period. + * + * @stable ICU 2.0 + */ + virtual UBool useDaylightTime(void) const = 0; + + /** + * Queries if the given date is in daylight savings time in + * this time zone. + * This method is wasteful since it creates a new GregorianCalendar and + * deletes it each time it is called. This is a deprecated method + * and provided only for Java compatibility. + * + * @param date the given UDate. + * @param status Output param filled in with success/error code. + * @return true if the given date is in daylight savings time, + * false, otherwise. + * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead. + */ + virtual UBool inDaylightTime(UDate date, UErrorCode& status) const = 0; + + /** + * Returns true if this zone has the same rule and offset as another zone. + * That is, if this zone differs only in ID, if at all. + * @param other the <code>TimeZone</code> object to be compared with + * @return true if the given zone is the same as this one, + * with the possible exception of the ID + * @stable ICU 2.0 + */ + virtual UBool hasSameRules(const TimeZone& other) const; + + /** + * Clones TimeZone objects polymorphically. Clients are responsible for deleting + * the TimeZone object cloned. + * + * @return A new copy of this TimeZone object. + * @stable ICU 2.0 + */ + virtual TimeZone* clone(void) const = 0; + + /** + * Return the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. This method is to + * implement a simple version of RTTI, since not all C++ compilers support genuine + * RTTI. Polymorphic operator==() and clone() methods call this method. + * <P> + * Concrete subclasses of TimeZone must use the UOBJECT_DEFINE_RTTI_IMPLEMENTATION + * macro from uobject.h in their implementation to provide correct RTTI information. + * @return The class ID for this object. All objects of a given class have the + * same class ID. Objects of other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Returns the amount of time to be added to local standard time + * to get local wall clock time. + * <p> + * The default implementation always returns 3600000 milliseconds + * (i.e., one hour) if this time zone observes Daylight Saving + * Time. Otherwise, 0 (zero) is returned. + * <p> + * If an underlying TimeZone implementation subclass supports + * historical Daylight Saving Time changes, this method returns + * the known latest daylight saving value. + * + * @return the amount of saving time in milliseconds + * @stable ICU 3.6 + */ + virtual int32_t getDSTSavings() const; + + /** + * Gets the region code associated with the given + * system time zone ID. The region code is either ISO 3166 + * 2-letter country code or UN M.49 3-digit area code. + * When the time zone is not associated with a specific location, + * for example - "Etc/UTC", "EST5EDT", then this method returns + * "001" (UN M.49 area code for World). + * + * @param id The system time zone ID. + * @param region Output buffer for receiving the region code. + * @param capacity The size of the output buffer. + * @param status Receives the status. When the given time zone ID + * is not a known system time zone ID, + * U_ILLEGAL_ARGUMENT_ERROR is set. + * @return The length of the output region code. + * @stable ICU 4.8 + */ + static int32_t U_EXPORT2 getRegion(const UnicodeString& id, + char *region, int32_t capacity, UErrorCode& status); + +protected: + + /** + * Default constructor. ID is initialized to the empty string. + * @stable ICU 2.0 + */ + TimeZone(); + + /** + * Construct a TimeZone with a given ID. + * @param id a system time zone ID + * @stable ICU 2.0 + */ + TimeZone(const UnicodeString &id); + + /** + * Copy constructor. + * @param source the object to be copied. + * @stable ICU 2.0 + */ + TimeZone(const TimeZone& source); + + /** + * Default assignment operator. + * @param right the object to be copied. + * @stable ICU 2.0 + */ + TimeZone& operator=(const TimeZone& right); + +#ifndef U_HIDE_INTERNAL_API + /** + * Utility function. For internally loading rule data. + * @param top Top resource bundle for tz data + * @param ruleid ID of rule to load + * @param oldbundle Old bundle to reuse or NULL + * @param status Status parameter + * @return either a new bundle or *oldbundle + * @internal + */ + static UResourceBundle* loadRule(const UResourceBundle* top, const UnicodeString& ruleid, UResourceBundle* oldbundle, UErrorCode&status); +#endif /* U_HIDE_INTERNAL_API */ + +private: + friend class ZoneMeta; + + + static TimeZone* createCustomTimeZone(const UnicodeString&); // Creates a time zone based on the string. + + /** + * Finds the given ID in the Olson tzdata. If the given ID is found in the tzdata, + * returns the pointer to the ID resource. This method is exposed through ZoneMeta class + * for ICU internal implementation and useful for building hashtable using a time zone + * ID as a key. + * @param id zone id string + * @return the pointer of the ID resource, or NULL. + */ + static const UChar* findID(const UnicodeString& id); + + /** + * Resolve a link in Olson tzdata. When the given id is known and it's not a link, + * the id itself is returned. When the given id is known and it is a link, then + * dereferenced zone id is returned. When the given id is unknown, then it returns + * NULL. + * @param id zone id string + * @return the dereferenced zone or NULL + */ + static const UChar* dereferOlsonLink(const UnicodeString& id); + + /** + * Returns the region code associated with the given zone, + * or NULL if the zone is not known. + * @param id zone id string + * @return the region associated with the given zone + */ + static const UChar* getRegion(const UnicodeString& id); + + public: +#ifndef U_HIDE_INTERNAL_API + /** + * Returns the region code associated with the given zone, + * or NULL if the zone is not known. + * @param id zone id string + * @param status Status parameter + * @return the region associated with the given zone + * @internal + */ + static const UChar* getRegion(const UnicodeString& id, UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + + private: + /** + * Parses the given custom time zone identifier + * @param id id A string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or + * GMT[+-]hh. + * @param sign Receves parsed sign, 1 for positive, -1 for negative. + * @param hour Receives parsed hour field + * @param minute Receives parsed minute field + * @param second Receives parsed second field + * @return Returns TRUE when the given custom id is valid. + */ + static UBool parseCustomID(const UnicodeString& id, int32_t& sign, int32_t& hour, + int32_t& minute, int32_t& second); + + /** + * Parse a custom time zone identifier and return the normalized + * custom time zone identifier for the given custom id string. + * @param id a string of the form GMT[+-]hh:mm, GMT[+-]hhmm, or + * GMT[+-]hh. + * @param normalized Receives the normalized custom ID + * @param status Receives the status. When the input ID string is invalid, + * U_ILLEGAL_ARGUMENT_ERROR is set. + * @return The normalized custom id string. + */ + static UnicodeString& getCustomID(const UnicodeString& id, UnicodeString& normalized, + UErrorCode& status); + + /** + * Returns the normalized custome time zone ID for the given offset fields. + * @param hour offset hours + * @param min offset minutes + * @param sec offset seconds + * @param negative sign of the offset, TRUE for negative offset. + * @param id Receves the format result (normalized custom ID) + * @return The reference to id + */ + static UnicodeString& formatCustomID(int32_t hour, int32_t min, int32_t sec, + UBool negative, UnicodeString& id); + + UnicodeString fID; // this time zone's ID + + friend class TZEnumeration; +}; + + +// ------------------------------------- + +inline UnicodeString& +TimeZone::getID(UnicodeString& ID) const +{ + ID = fID; + return ID; +} + +// ------------------------------------- + +inline void +TimeZone::setID(const UnicodeString& ID) +{ + fID = ID; +} +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif //_TIMEZONE +//eof diff --git a/intl/icu/source/i18n/unicode/tmunit.h b/intl/icu/source/i18n/unicode/tmunit.h new file mode 100644 index 000000000..a19a1f3c1 --- /dev/null +++ b/intl/icu/source/i18n/unicode/tmunit.h @@ -0,0 +1,137 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 2009-2016, International Business Machines Corporation, * + * Google, and others. All Rights Reserved. * + ******************************************************************************* + */ + +#ifndef __TMUNIT_H__ +#define __TMUNIT_H__ + + +/** + * \file + * \brief C++ API: time unit object + */ + + +#include "unicode/measunit.h" + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + +/** + * Measurement unit for time units. + * @see TimeUnitAmount + * @see TimeUnit + * @stable ICU 4.2 + */ +class U_I18N_API TimeUnit: public MeasureUnit { +public: + /** + * Constants for all the time units we supported. + * @stable ICU 4.2 + */ + enum UTimeUnitFields { + UTIMEUNIT_YEAR, + UTIMEUNIT_MONTH, + UTIMEUNIT_DAY, + UTIMEUNIT_WEEK, + UTIMEUNIT_HOUR, + UTIMEUNIT_MINUTE, + UTIMEUNIT_SECOND, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UTimeUnitFields value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UTIMEUNIT_FIELD_COUNT +#endif // U_HIDE_DEPRECATED_API + }; + + /** + * Create Instance. + * @param timeUnitField time unit field based on which the instance + * is created. + * @param status input-output error code. + * If the timeUnitField is invalid, + * then this will be set to U_ILLEGAL_ARGUMENT_ERROR. + * @return a TimeUnit instance + * @stable ICU 4.2 + */ + static TimeUnit* U_EXPORT2 createInstance(UTimeUnitFields timeUnitField, + UErrorCode& status); + + + /** + * Override clone. + * @stable ICU 4.2 + */ + virtual UObject* clone() const; + + /** + * Copy operator. + * @stable ICU 4.2 + */ + TimeUnit(const TimeUnit& other); + + /** + * Assignment operator. + * @stable ICU 4.2 + */ + TimeUnit& operator=(const TimeUnit& other); + + /** + * Returns a unique class ID for this object POLYMORPHICALLY. + * This method implements a simple form of RTTI used by ICU. + * @return The class ID for this object. All objects of a given + * class have the same class ID. Objects of other classes have + * different class IDs. + * @stable ICU 4.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * Returns the class ID for this class. This is used to compare to + * the return value of getDynamicClassID(). + * @return The class ID for all objects of this class. + * @stable ICU 4.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + + /** + * Get time unit field. + * @return time unit field. + * @stable ICU 4.2 + */ + UTimeUnitFields getTimeUnitField() const; + + /** + * Destructor. + * @stable ICU 4.2 + */ + virtual ~TimeUnit(); + +private: + UTimeUnitFields fTimeUnitField; + + /** + * Constructor + * @internal (private) + */ + TimeUnit(UTimeUnitFields timeUnitField); + +}; + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // __TMUNIT_H__ +//eof +// diff --git a/intl/icu/source/i18n/unicode/tmutamt.h b/intl/icu/source/i18n/unicode/tmutamt.h new file mode 100644 index 000000000..ff132f8a4 --- /dev/null +++ b/intl/icu/source/i18n/unicode/tmutamt.h @@ -0,0 +1,170 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 2009-2010, Google, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +#ifndef __TMUTAMT_H__ +#define __TMUTAMT_H__ + + +/** + * \file + * \brief C++ API: time unit amount object. + */ + +#include "unicode/measure.h" +#include "unicode/tmunit.h" + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + + +/** + * Express a duration as a time unit and number. Patterned after Currency. + * @see TimeUnitAmount + * @see TimeUnitFormat + * @stable ICU 4.2 + */ +class U_I18N_API TimeUnitAmount: public Measure { +public: + /** + * Construct TimeUnitAmount object with the given number and the + * given time unit. + * @param number a numeric object; number.isNumeric() must be TRUE + * @param timeUnitField the time unit field of a time unit + * @param status the input-output error code. + * If the number is not numeric or the timeUnitField + * is not valid, + * then this will be set to a failing value: + * U_ILLEGAL_ARGUMENT_ERROR. + * @stable ICU 4.2 + */ + TimeUnitAmount(const Formattable& number, + TimeUnit::UTimeUnitFields timeUnitField, + UErrorCode& status); + + /** + * Construct TimeUnitAmount object with the given numeric amount and the + * given time unit. + * @param amount a numeric amount. + * @param timeUnitField the time unit field on which a time unit amount + * object will be created. + * @param status the input-output error code. + * If the timeUnitField is not valid, + * then this will be set to a failing value: + * U_ILLEGAL_ARGUMENT_ERROR. + * @stable ICU 4.2 + */ + TimeUnitAmount(double amount, TimeUnit::UTimeUnitFields timeUnitField, + UErrorCode& status); + + + /** + * Copy constructor + * @stable ICU 4.2 + */ + TimeUnitAmount(const TimeUnitAmount& other); + + + /** + * Assignment operator + * @stable ICU 4.2 + */ + TimeUnitAmount& operator=(const TimeUnitAmount& other); + + + /** + * Clone. + * @return a polymorphic clone of this object. The result will have the same class as returned by getDynamicClassID(). + * @stable ICU 4.2 + */ + virtual UObject* clone() const; + + + /** + * Destructor + * @stable ICU 4.2 + */ + virtual ~TimeUnitAmount(); + + + /** + * Equality operator. + * @param other the object to compare to. + * @return true if this object is equal to the given object. + * @stable ICU 4.2 + */ + virtual UBool operator==(const UObject& other) const; + + + /** + * Not-equality operator. + * @param other the object to compare to. + * @return true if this object is not equal to the given object. + * @stable ICU 4.2 + */ + UBool operator!=(const UObject& other) const; + + + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 4.2 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 4.2 + */ + virtual UClassID getDynamicClassID(void) const; + + + /** + * Get the time unit. + * @return time unit object. + * @stable ICU 4.2 + */ + const TimeUnit& getTimeUnit() const; + + /** + * Get the time unit field value. + * @return time unit field value. + * @stable ICU 4.2 + */ + TimeUnit::UTimeUnitFields getTimeUnitField() const; +}; + + + +inline UBool +TimeUnitAmount::operator!=(const UObject& other) const { + return !operator==(other); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // __TMUTAMT_H__ +//eof +// diff --git a/intl/icu/source/i18n/unicode/tmutfmt.h b/intl/icu/source/i18n/unicode/tmutfmt.h new file mode 100644 index 000000000..b90d4a096 --- /dev/null +++ b/intl/icu/source/i18n/unicode/tmutfmt.h @@ -0,0 +1,248 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 2008-2014, Google, International Business Machines Corporation + * and others. All Rights Reserved. + ******************************************************************************* + */ + +#ifndef __TMUTFMT_H__ +#define __TMUTFMT_H__ + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Format and parse duration in single time unit + */ + + +#if !UCONFIG_NO_FORMATTING +#ifndef U_HIDE_DEPRECATED_API + +#include "unicode/unistr.h" +#include "unicode/tmunit.h" +#include "unicode/tmutamt.h" +#include "unicode/measfmt.h" +#include "unicode/numfmt.h" +#include "unicode/plurrule.h" + + +/** + * Constants for various styles. + * There are 2 styles: full name and abbreviated name. + * For example, for English, the full name for hour duration is "3 hours", + * and the abbreviated name is "3 hrs". + * @deprecated ICU 53 Use MeasureFormat and UMeasureFormatWidth instead. + */ +enum UTimeUnitFormatStyle { + /** @deprecated ICU 53 */ + UTMUTFMT_FULL_STYLE, + /** @deprecated ICU 53 */ + UTMUTFMT_ABBREVIATED_STYLE, + /** @deprecated ICU 53 */ + UTMUTFMT_FORMAT_STYLE_COUNT +}; +typedef enum UTimeUnitFormatStyle UTimeUnitFormatStyle; /**< @deprecated ICU 53 */ + + +U_NAMESPACE_BEGIN + +class Hashtable; +class UVector; + +struct TimeUnitFormatReadSink; + +/** + * Format or parse a TimeUnitAmount, using plural rules for the units where available. + * + * <P> + * Code Sample: + * <pre> + * // create time unit amount instance - a combination of Number and time unit + * UErrorCode status = U_ZERO_ERROR; + * TimeUnitAmount* source = new TimeUnitAmount(2, TimeUnit::UTIMEUNIT_YEAR, status); + * // create time unit format instance + * TimeUnitFormat* format = new TimeUnitFormat(Locale("en"), status); + * // format a time unit amount + * UnicodeString formatted; + * Formattable formattable; + * if (U_SUCCESS(status)) { + * formattable.adoptObject(source); + * formatted = ((Format*)format)->format(formattable, formatted, status); + * Formattable result; + * ((Format*)format)->parseObject(formatted, result, status); + * if (U_SUCCESS(status)) { + * assert (result == formattable); + * } + * } + * </pre> + * + * <P> + * @see TimeUnitAmount + * @see TimeUnitFormat + * @deprecated ICU 53 Use the MeasureFormat class instead. + */ +class U_I18N_API TimeUnitFormat: public MeasureFormat { +public: + + /** + * Create TimeUnitFormat with default locale, and full name style. + * Use setLocale and/or setFormat to modify. + * @deprecated ICU 53 + */ + TimeUnitFormat(UErrorCode& status); + + /** + * Create TimeUnitFormat given locale, and full name style. + * @deprecated ICU 53 + */ + TimeUnitFormat(const Locale& locale, UErrorCode& status); + + /** + * Create TimeUnitFormat given locale and style. + * @deprecated ICU 53 + */ + TimeUnitFormat(const Locale& locale, UTimeUnitFormatStyle style, UErrorCode& status); + + /** + * Copy constructor. + * @deprecated ICU 53 + */ + TimeUnitFormat(const TimeUnitFormat&); + + /** + * deconstructor + * @deprecated ICU 53 + */ + virtual ~TimeUnitFormat(); + + /** + * Clone this Format object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @deprecated ICU 53 + */ + virtual Format* clone(void) const; + + /** + * Assignment operator + * @deprecated ICU 53 + */ + TimeUnitFormat& operator=(const TimeUnitFormat& other); + + /** + * Return true if the given Format objects are not semantically equal. + * Objects of different subclasses are considered unequal. + * @param other the object to be compared with. + * @return true if the given Format objects are not semantically equal. + * @deprecated ICU 53 + */ + UBool operator!=(const Format& other) const; + + /** + * Set the locale used for formatting or parsing. + * @param locale the locale to be set + * @param status output param set to success/failure code on exit + * @deprecated ICU 53 + */ + void setLocale(const Locale& locale, UErrorCode& status); + + + /** + * Set the number format used for formatting or parsing. + * @param format the number formatter to be set + * @param status output param set to success/failure code on exit + * @deprecated ICU 53 + */ + void setNumberFormat(const NumberFormat& format, UErrorCode& status); + + /** + * Parse a TimeUnitAmount. + * @see Format#parseObject(const UnicodeString&, Formattable&, ParsePosition&) const; + * @deprecated ICU 53 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& pos) const; + + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @deprecated ICU 53 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @deprecated ICU 53 + */ + virtual UClassID getDynamicClassID(void) const; + +private: + Hashtable* fTimeUnitToCountToPatterns[TimeUnit::UTIMEUNIT_FIELD_COUNT]; + UTimeUnitFormatStyle fStyle; + + void create(UTimeUnitFormatStyle style, UErrorCode& status); + + // it might actually be simpler to make them Decimal Formats later. + // initialize all private data members + void setup(UErrorCode& status); + + // initialize data member without fill in data for fTimeUnitToCountToPattern + void initDataMembers(UErrorCode& status); + + // initialize fTimeUnitToCountToPatterns from current locale's resource. + void readFromCurrentLocale(UTimeUnitFormatStyle style, const char* key, const UVector& pluralCounts, + UErrorCode& status); + + // check completeness of fTimeUnitToCountToPatterns against all time units, + // and all plural rules, fill in fallback as necessary. + void checkConsistency(UTimeUnitFormatStyle style, const char* key, UErrorCode& status); + + // fill in fTimeUnitToCountToPatterns from locale fall-back chain + void searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName, + TimeUnit::UTimeUnitFields field, const UnicodeString&, + const char*, Hashtable*, UErrorCode&); + + // initialize hash table + Hashtable* initHash(UErrorCode& status); + + // delete hash table + void deleteHash(Hashtable* htable); + + // copy hash table + void copyHash(const Hashtable* source, Hashtable* target, UErrorCode& status); + // get time unit name, such as "year", from time unit field enum, such as + // UTIMEUNIT_YEAR. + static const char* getTimeUnitName(TimeUnit::UTimeUnitFields field, UErrorCode& status); + + friend struct TimeUnitFormatReadSink; +}; + +inline UBool +TimeUnitFormat::operator!=(const Format& other) const { + return !operator==(other); +} + +U_NAMESPACE_END + +#endif /* U_HIDE_DEPRECATED_API */ +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // __TMUTFMT_H__ +//eof diff --git a/intl/icu/source/i18n/unicode/translit.h b/intl/icu/source/i18n/unicode/translit.h new file mode 100644 index 000000000..b06879100 --- /dev/null +++ b/intl/icu/source/i18n/unicode/translit.h @@ -0,0 +1,1344 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/17/99 aliu Creation. +********************************************************************** +*/ +#ifndef TRANSLIT_H +#define TRANSLIT_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Tranforms text from one format to another. + */ + +#if !UCONFIG_NO_TRANSLITERATION + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/parseerr.h" +#include "unicode/utrans.h" // UTransPosition, UTransDirection +#include "unicode/strenum.h" + +U_NAMESPACE_BEGIN + +class UnicodeFilter; +class UnicodeSet; +class CompoundTransliterator; +class TransliteratorParser; +class NormalizationTransliterator; +class TransliteratorIDParser; + +/** + * + * <code>Transliterator</code> is an abstract class that + * transliterates text from one format to another. The most common + * kind of transliterator is a script, or alphabet, transliterator. + * For example, a Russian to Latin transliterator changes Russian text + * written in Cyrillic characters to phonetically equivalent Latin + * characters. It does not <em>translate</em> Russian to English! + * Transliteration, unlike translation, operates on characters, without + * reference to the meanings of words and sentences. + * + * <p>Although script conversion is its most common use, a + * transliterator can actually perform a more general class of tasks. + * In fact, <code>Transliterator</code> defines a very general API + * which specifies only that a segment of the input text is replaced + * by new text. The particulars of this conversion are determined + * entirely by subclasses of <code>Transliterator</code>. + * + * <p><b>Transliterators are stateless</b> + * + * <p><code>Transliterator</code> objects are <em>stateless</em>; they + * retain no information between calls to + * <code>transliterate()</code>. (However, this does <em>not</em> + * mean that threads may share transliterators without synchronizing + * them. Transliterators are not immutable, so they must be + * synchronized when shared between threads.) This might seem to + * limit the complexity of the transliteration operation. In + * practice, subclasses perform complex transliterations by delaying + * the replacement of text until it is known that no other + * replacements are possible. In other words, although the + * <code>Transliterator</code> objects are stateless, the source text + * itself embodies all the needed information, and delayed operation + * allows arbitrary complexity. + * + * <p><b>Batch transliteration</b> + * + * <p>The simplest way to perform transliteration is all at once, on a + * string of existing text. This is referred to as <em>batch</em> + * transliteration. For example, given a string <code>input</code> + * and a transliterator <code>t</code>, the call + * + * \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input); + * </code>\htmlonly</blockquote>\endhtmlonly + * + * will transliterate it and return the result. Other methods allow + * the client to specify a substring to be transliterated and to use + * {@link Replaceable } objects instead of strings, in order to + * preserve out-of-band information (such as text styles). + * + * <p><b>Keyboard transliteration</b> + * + * <p>Somewhat more involved is <em>keyboard</em>, or incremental + * transliteration. This is the transliteration of text that is + * arriving from some source (typically the user's keyboard) one + * character at a time, or in some other piecemeal fashion. + * + * <p>In keyboard transliteration, a <code>Replaceable</code> buffer + * stores the text. As text is inserted, as much as possible is + * transliterated on the fly. This means a GUI that displays the + * contents of the buffer may show text being modified as each new + * character arrives. + * + * <p>Consider the simple <code>RuleBasedTransliterator</code>: + * + * \htmlonly<blockquote>\endhtmlonly<code> + * th>{theta}<br> + * t>{tau} + * </code>\htmlonly</blockquote>\endhtmlonly + * + * When the user types 't', nothing will happen, since the + * transliterator is waiting to see if the next character is 'h'. To + * remedy this, we introduce the notion of a cursor, marked by a '|' + * in the output string: + * + * \htmlonly<blockquote>\endhtmlonly<code> + * t>|{tau}<br> + * {tau}h>{theta} + * </code>\htmlonly</blockquote>\endhtmlonly + * + * Now when the user types 't', tau appears, and if the next character + * is 'h', the tau changes to a theta. This is accomplished by + * maintaining a cursor position (independent of the insertion point, + * and invisible in the GUI) across calls to + * <code>transliterate()</code>. Typically, the cursor will + * be coincident with the insertion point, but in a case like the one + * above, it will precede the insertion point. + * + * <p>Keyboard transliteration methods maintain a set of three indices + * that are updated with each call to + * <code>transliterate()</code>, including the cursor, start, + * and limit. Since these indices are changed by the method, they are + * passed in an <code>int[]</code> array. The <code>START</code> index + * marks the beginning of the substring that the transliterator will + * look at. It is advanced as text becomes committed (but it is not + * the committed index; that's the <code>CURSOR</code>). The + * <code>CURSOR</code> index, described above, marks the point at + * which the transliterator last stopped, either because it reached + * the end, or because it required more characters to disambiguate + * between possible inputs. The <code>CURSOR</code> can also be + * explicitly set by rules in a <code>RuleBasedTransliterator</code>. + * Any characters before the <code>CURSOR</code> index are frozen; + * future keyboard transliteration calls within this input sequence + * will not change them. New text is inserted at the + * <code>LIMIT</code> index, which marks the end of the substring that + * the transliterator looks at. + * + * <p>Because keyboard transliteration assumes that more characters + * are to arrive, it is conservative in its operation. It only + * transliterates when it can do so unambiguously. Otherwise it waits + * for more characters to arrive. When the client code knows that no + * more characters are forthcoming, perhaps because the user has + * performed some input termination operation, then it should call + * <code>finishTransliteration()</code> to complete any + * pending transliterations. + * + * <p><b>Inverses</b> + * + * <p>Pairs of transliterators may be inverses of one another. For + * example, if transliterator <b>A</b> transliterates characters by + * incrementing their Unicode value (so "abc" -> "def"), and + * transliterator <b>B</b> decrements character values, then <b>A</b> + * is an inverse of <b>B</b> and vice versa. If we compose <b>A</b> + * with <b>B</b> in a compound transliterator, the result is the + * indentity transliterator, that is, a transliterator that does not + * change its input text. + * + * The <code>Transliterator</code> method <code>getInverse()</code> + * returns a transliterator's inverse, if one exists, or + * <code>null</code> otherwise. However, the result of + * <code>getInverse()</code> usually will <em>not</em> be a true + * mathematical inverse. This is because true inverse transliterators + * are difficult to formulate. For example, consider two + * transliterators: <b>AB</b>, which transliterates the character 'A' + * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'. It might + * seem that these are exact inverses, since + * + * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br> + * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly + * + * where 'x' represents transliteration. However, + * + * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br> + * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly + * + * so <b>AB</b> composed with <b>BA</b> is not the + * identity. Nonetheless, <b>BA</b> may be usefully considered to be + * <b>AB</b>'s inverse, and it is on this basis that + * <b>AB</b><code>.getInverse()</code> could legitimately return + * <b>BA</b>. + * + * <p><b>IDs and display names</b> + * + * <p>A transliterator is designated by a short identifier string or + * <em>ID</em>. IDs follow the format <em>source-destination</em>, + * where <em>source</em> describes the entity being replaced, and + * <em>destination</em> describes the entity replacing + * <em>source</em>. The entities may be the names of scripts, + * particular sequences of characters, or whatever else it is that the + * transliterator converts to or from. For example, a transliterator + * from Russian to Latin might be named "Russian-Latin". A + * transliterator from keyboard escape sequences to Latin-1 characters + * might be named "KeyboardEscape-Latin1". By convention, system + * entity names are in English, with the initial letters of words + * capitalized; user entity names may follow any format so long as + * they do not contain dashes. + * + * <p>In addition to programmatic IDs, transliterator objects have + * display names for presentation in user interfaces, returned by + * {@link #getDisplayName }. + * + * <p><b>Factory methods and registration</b> + * + * <p>In general, client code should use the factory method + * {@link #createInstance } to obtain an instance of a + * transliterator given its ID. Valid IDs may be enumerated using + * <code>getAvailableIDs()</code>. Since transliterators are mutable, + * multiple calls to {@link #createInstance } with the same ID will + * return distinct objects. + * + * <p>In addition to the system transliterators registered at startup, + * user transliterators may be registered by calling + * <code>registerInstance()</code> at run time. A registered instance + * acts a template; future calls to {@link #createInstance } with the ID + * of the registered object return clones of that object. Thus any + * object passed to <tt>registerInstance()</tt> must implement + * <tt>clone()</tt> propertly. To register a transliterator subclass + * without instantiating it (until it is needed), users may call + * {@link #registerFactory }. In this case, the objects are + * instantiated by invoking the zero-argument public constructor of + * the class. + * + * <p><b>Subclassing</b> + * + * Subclasses must implement the abstract method + * <code>handleTransliterate()</code>. <p>Subclasses should override + * the <code>transliterate()</code> method taking a + * <code>Replaceable</code> and the <code>transliterate()</code> + * method taking a <code>String</code> and <code>StringBuffer</code> + * if the performance of these methods can be improved over the + * performance obtained by the default implementations in this class. + * + * @author Alan Liu + * @stable ICU 2.0 + */ +class U_I18N_API Transliterator : public UObject { + +private: + + /** + * Programmatic name, e.g., "Latin-Arabic". + */ + UnicodeString ID; + + /** + * This transliterator's filter. Any character for which + * <tt>filter.contains()</tt> returns <tt>false</tt> will not be + * altered by this transliterator. If <tt>filter</tt> is + * <tt>null</tt> then no filtering is applied. + */ + UnicodeFilter* filter; + + int32_t maximumContextLength; + + public: + + /** + * A context integer or pointer for a factory function, passed by + * value. + * @stable ICU 2.4 + */ + union Token { + /** + * This token, interpreted as a 32-bit integer. + * @stable ICU 2.4 + */ + int32_t integer; + /** + * This token, interpreted as a native pointer. + * @stable ICU 2.4 + */ + void* pointer; + }; + +#ifndef U_HIDE_INTERNAL_API + /** + * Return a token containing an integer. + * @return a token containing an integer. + * @internal + */ + inline static Token integerToken(int32_t); + + /** + * Return a token containing a pointer. + * @return a token containing a pointer. + * @internal + */ + inline static Token pointerToken(void*); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * A function that creates and returns a Transliterator. When + * invoked, it will be passed the ID string that is being + * instantiated, together with the context pointer that was passed + * in when the factory function was first registered. Many + * factory functions will ignore both parameters, however, + * functions that are registered to more than one ID may use the + * ID or the context parameter to parameterize the transliterator + * they create. + * @param ID the string identifier for this transliterator + * @param context a context pointer that will be stored and + * later passed to the factory function when an ID matching + * the registration ID is being instantiated with this factory. + * @stable ICU 2.4 + */ + typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context); + +protected: + + /** + * Default constructor. + * @param ID the string identifier for this transliterator + * @param adoptedFilter the filter. Any character for which + * <tt>filter.contains()</tt> returns <tt>false</tt> will not be + * altered by this transliterator. If <tt>filter</tt> is + * <tt>null</tt> then no filtering is applied. + * @stable ICU 2.4 + */ + Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter); + + /** + * Copy constructor. + * @stable ICU 2.4 + */ + Transliterator(const Transliterator&); + + /** + * Assignment operator. + * @stable ICU 2.4 + */ + Transliterator& operator=(const Transliterator&); + + /** + * Create a transliterator from a basic ID. This is an ID + * containing only the forward direction source, target, and + * variant. + * @param id a basic ID of the form S-T or S-T/V. + * @param canon canonical ID to assign to the object, or + * NULL to leave the ID unchanged + * @return a newly created Transliterator or null if the ID is + * invalid. + * @stable ICU 2.4 + */ + static Transliterator* createBasicInstance(const UnicodeString& id, + const UnicodeString* canon); + + friend class TransliteratorParser; // for parseID() + friend class TransliteratorIDParser; // for createBasicInstance() + friend class TransliteratorAlias; // for setID() + +public: + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~Transliterator(); + + /** + * Implements Cloneable. + * All subclasses are encouraged to implement this method if it is + * possible and reasonable to do so. Subclasses that are to be + * registered with the system using <tt>registerInstance()</tt> + * are required to implement this method. If a subclass does not + * implement clone() properly and is registered with the system + * using registerInstance(), then the default clone() implementation + * will return null, and calls to createInstance() will fail. + * + * @return a copy of the object. + * @see #registerInstance + * @stable ICU 2.0 + */ + virtual Transliterator* clone() const; + + /** + * Transliterates a segment of a string, with optional filtering. + * + * @param text the string to be transliterated + * @param start the beginning index, inclusive; <code>0 <= start + * <= limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit + * <= text.length()</code>. + * @return The new limit index. The text previously occupying <code>[start, + * limit)</code> has been transliterated, possibly to a string of a different + * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where + * <em>new-limit</em> is the return value. If the input offsets are out of bounds, + * the returned value is -1 and the input string remains unchanged. + * @stable ICU 2.0 + */ + virtual int32_t transliterate(Replaceable& text, + int32_t start, int32_t limit) const; + + /** + * Transliterates an entire string in place. Convenience method. + * @param text the string to be transliterated + * @stable ICU 2.0 + */ + virtual void transliterate(Replaceable& text) const; + + /** + * Transliterates the portion of the text buffer that can be + * transliterated unambiguosly after new text has been inserted, + * typically as a result of a keyboard event. The new text in + * <code>insertion</code> will be inserted into <code>text</code> + * at <code>index.limit</code>, advancing + * <code>index.limit</code> by <code>insertion.length()</code>. + * Then the transliterator will try to transliterate characters of + * <code>text</code> between <code>index.cursor</code> and + * <code>index.limit</code>. Characters before + * <code>index.cursor</code> will not be changed. + * + * <p>Upon return, values in <code>index</code> will be updated. + * <code>index.start</code> will be advanced to the first + * character that future calls to this method will read. + * <code>index.cursor</code> and <code>index.limit</code> will + * be adjusted to delimit the range of text that future calls to + * this method may change. + * + * <p>Typical usage of this method begins with an initial call + * with <code>index.start</code> and <code>index.limit</code> + * set to indicate the portion of <code>text</code> to be + * transliterated, and <code>index.cursor == index.start</code>. + * Thereafter, <code>index</code> can be used without + * modification in future calls, provided that all changes to + * <code>text</code> are made via this method. + * + * <p>This method assumes that future calls may be made that will + * insert new text into the buffer. As a result, it only performs + * unambiguous transliterations. After the last call to this + * method, there may be untransliterated text that is waiting for + * more input to resolve an ambiguity. In order to perform these + * pending transliterations, clients should call {@link + * #finishTransliteration } after the last call to this + * method has been made. + * + * @param text the buffer holding transliterated and untransliterated text + * @param index an array of three integers. + * + * <ul><li><code>index.start</code>: the beginning index, + * inclusive; <code>0 <= index.start <= index.limit</code>. + * + * <li><code>index.limit</code>: the ending index, exclusive; + * <code>index.start <= index.limit <= text.length()</code>. + * <code>insertion</code> is inserted at + * <code>index.limit</code>. + * + * <li><code>index.cursor</code>: the next character to be + * considered for transliteration; <code>index.start <= + * index.cursor <= index.limit</code>. Characters before + * <code>index.cursor</code> will not be changed by future calls + * to this method.</ul> + * + * @param insertion text to be inserted and possibly + * transliterated into the translation buffer at + * <code>index.limit</code>. If <code>null</code> then no text + * is inserted. + * @param status Output param to filled in with a success or an error. + * @see #handleTransliterate + * @exception IllegalArgumentException if <code>index</code> + * is invalid + * @see UTransPosition + * @stable ICU 2.0 + */ + virtual void transliterate(Replaceable& text, UTransPosition& index, + const UnicodeString& insertion, + UErrorCode& status) const; + + /** + * Transliterates the portion of the text buffer that can be + * transliterated unambiguosly after a new character has been + * inserted, typically as a result of a keyboard event. This is a + * convenience method. + * @param text the buffer holding transliterated and + * untransliterated text + * @param index an array of three integers. + * @param insertion text to be inserted and possibly + * transliterated into the translation buffer at + * <code>index.limit</code>. + * @param status Output param to filled in with a success or an error. + * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const + * @stable ICU 2.0 + */ + virtual void transliterate(Replaceable& text, UTransPosition& index, + UChar32 insertion, + UErrorCode& status) const; + + /** + * Transliterates the portion of the text buffer that can be + * transliterated unambiguosly. This is a convenience method; see + * {@link + * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const } + * for details. + * @param text the buffer holding transliterated and + * untransliterated text + * @param index an array of three integers. See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }. + * @param status Output param to filled in with a success or an error. + * @see #transliterate(Replaceable, int[], String) + * @stable ICU 2.0 + */ + virtual void transliterate(Replaceable& text, UTransPosition& index, + UErrorCode& status) const; + + /** + * Finishes any pending transliterations that were waiting for + * more characters. Clients should call this method as the last + * call after a sequence of one or more calls to + * <code>transliterate()</code>. + * @param text the buffer holding transliterated and + * untransliterated text. + * @param index the array of indices previously passed to {@link + * #transliterate } + * @stable ICU 2.0 + */ + virtual void finishTransliteration(Replaceable& text, + UTransPosition& index) const; + +private: + + /** + * This internal method does incremental transliteration. If the + * 'insertion' is non-null then we append it to 'text' before + * proceeding. This method calls through to the pure virtual + * framework method handleTransliterate() to do the actual + * work. + * @param text the buffer holding transliterated and + * untransliterated text + * @param index an array of three integers. See {@link + * #transliterate(Replaceable, int[], String)}. + * @param insertion text to be inserted and possibly + * transliterated into the translation buffer at + * <code>index.limit</code>. + * @param status Output param to filled in with a success or an error. + */ + void _transliterate(Replaceable& text, + UTransPosition& index, + const UnicodeString* insertion, + UErrorCode &status) const; + +protected: + + /** + * Abstract method that concrete subclasses define to implement + * their transliteration algorithm. This method handles both + * incremental and non-incremental transliteration. Let + * <code>originalStart</code> refer to the value of + * <code>pos.start</code> upon entry. + * + * <ul> + * <li>If <code>incremental</code> is false, then this method + * should transliterate all characters between + * <code>pos.start</code> and <code>pos.limit</code>. Upon return + * <code>pos.start</code> must == <code> pos.limit</code>.</li> + * + * <li>If <code>incremental</code> is true, then this method + * should transliterate all characters between + * <code>pos.start</code> and <code>pos.limit</code> that can be + * unambiguously transliterated, regardless of future insertions + * of text at <code>pos.limit</code>. Upon return, + * <code>pos.start</code> should be in the range + * [<code>originalStart</code>, <code>pos.limit</code>). + * <code>pos.start</code> should be positioned such that + * characters [<code>originalStart</code>, <code> + * pos.start</code>) will not be changed in the future by this + * transliterator and characters [<code>pos.start</code>, + * <code>pos.limit</code>) are unchanged.</li> + * </ul> + * + * <p>Implementations of this method should also obey the + * following invariants:</p> + * + * <ul> + * <li> <code>pos.limit</code> and <code>pos.contextLimit</code> + * should be updated to reflect changes in length of the text + * between <code>pos.start</code> and <code>pos.limit</code>. The + * difference <code> pos.contextLimit - pos.limit</code> should + * not change.</li> + * + * <li><code>pos.contextStart</code> should not change.</li> + * + * <li>Upon return, neither <code>pos.start</code> nor + * <code>pos.limit</code> should be less than + * <code>originalStart</code>.</li> + * + * <li>Text before <code>originalStart</code> and text after + * <code>pos.limit</code> should not change.</li> + * + * <li>Text before <code>pos.contextStart</code> and text after + * <code> pos.contextLimit</code> should be ignored.</li> + * </ul> + * + * <p>Subclasses may safely assume that all characters in + * [<code>pos.start</code>, <code>pos.limit</code>) are filtered. + * In other words, the filter has already been applied by the time + * this method is called. See + * <code>filteredTransliterate()</code>. + * + * <p>This method is <b>not</b> for public consumption. Calling + * this method directly will transliterate + * [<code>pos.start</code>, <code>pos.limit</code>) without + * applying the filter. End user code should call <code> + * transliterate()</code> instead of this method. Subclass code + * and wrapping transliterators should call + * <code>filteredTransliterate()</code> instead of this method.<p> + * + * @param text the buffer holding transliterated and + * untransliterated text + * + * @param pos the indices indicating the start, limit, context + * start, and context limit of the text. + * + * @param incremental if true, assume more text may be inserted at + * <code>pos.limit</code> and act accordingly. Otherwise, + * transliterate all text between <code>pos.start</code> and + * <code>pos.limit</code> and move <code>pos.start</code> up to + * <code>pos.limit</code>. + * + * @see #transliterate + * @stable ICU 2.4 + */ + virtual void handleTransliterate(Replaceable& text, + UTransPosition& pos, + UBool incremental) const = 0; + +public: + /** + * Transliterate a substring of text, as specified by index, taking filters + * into account. This method is for subclasses that need to delegate to + * another transliterator, such as CompoundTransliterator. + * @param text the text to be transliterated + * @param index the position indices + * @param incremental if TRUE, then assume more characters may be inserted + * at index.limit, and postpone processing to accomodate future incoming + * characters + * @stable ICU 2.4 + */ + virtual void filteredTransliterate(Replaceable& text, + UTransPosition& index, + UBool incremental) const; + +private: + + /** + * Top-level transliteration method, handling filtering, incremental and + * non-incremental transliteration, and rollback. All transliteration + * public API methods eventually call this method with a rollback argument + * of TRUE. Other entities may call this method but rollback should be + * FALSE. + * + * <p>If this transliterator has a filter, break up the input text into runs + * of unfiltered characters. Pass each run to + * subclass.handleTransliterate(). + * + * <p>In incremental mode, if rollback is TRUE, perform a special + * incremental procedure in which several passes are made over the input + * text, adding one character at a time, and committing successful + * transliterations as they occur. Unsuccessful transliterations are rolled + * back and retried with additional characters to give correct results. + * + * @param text the text to be transliterated + * @param index the position indices + * @param incremental if TRUE, then assume more characters may be inserted + * at index.limit, and postpone processing to accomodate future incoming + * characters + * @param rollback if TRUE and if incremental is TRUE, then perform special + * incremental processing, as described above, and undo partial + * transliterations where necessary. If incremental is FALSE then this + * parameter is ignored. + */ + virtual void filteredTransliterate(Replaceable& text, + UTransPosition& index, + UBool incremental, + UBool rollback) const; + +public: + + /** + * Returns the length of the longest context required by this transliterator. + * This is <em>preceding</em> context. The default implementation supplied + * by <code>Transliterator</code> returns zero; subclasses + * that use preceding context should override this method to return the + * correct value. For example, if a transliterator translates "ddd" (where + * d is any digit) to "555" when preceded by "(ddd)", then the preceding + * context length is 5, the length of "(ddd)". + * + * @return The maximum number of preceding context characters this + * transliterator needs to examine + * @stable ICU 2.0 + */ + int32_t getMaximumContextLength(void) const; + +protected: + + /** + * Method for subclasses to use to set the maximum context length. + * @param maxContextLength the new value to be set. + * @see #getMaximumContextLength + * @stable ICU 2.4 + */ + void setMaximumContextLength(int32_t maxContextLength); + +public: + + /** + * Returns a programmatic identifier for this transliterator. + * If this identifier is passed to <code>createInstance()</code>, it + * will return this object, if it has been registered. + * @return a programmatic identifier for this transliterator. + * @see #registerInstance + * @see #registerFactory + * @see #getAvailableIDs + * @stable ICU 2.0 + */ + virtual const UnicodeString& getID(void) const; + + /** + * Returns a name for this transliterator that is appropriate for + * display to the user in the default locale. See {@link + * #getDisplayName } for details. + * @param ID the string identifier for this transliterator + * @param result Output param to receive the display name + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID, + UnicodeString& result); + + /** + * Returns a name for this transliterator that is appropriate for + * display to the user in the given locale. This name is taken + * from the locale resource data in the standard manner of the + * <code>java.text</code> package. + * + * <p>If no localized names exist in the system resource bundles, + * a name is synthesized using a localized + * <code>MessageFormat</code> pattern from the resource data. The + * arguments to this pattern are an integer followed by one or two + * strings. The integer is the number of strings, either 1 or 2. + * The strings are formed by splitting the ID for this + * transliterator at the first '-'. If there is no '-', then the + * entire ID forms the only string. + * @param ID the string identifier for this transliterator + * @param inLocale the Locale in which the display name should be + * localized. + * @param result Output param to receive the display name + * @return A reference to 'result'. + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID, + const Locale& inLocale, + UnicodeString& result); + + /** + * Returns the filter used by this transliterator, or <tt>NULL</tt> + * if this transliterator uses no filter. + * @return the filter used by this transliterator, or <tt>NULL</tt> + * if this transliterator uses no filter. + * @stable ICU 2.0 + */ + const UnicodeFilter* getFilter(void) const; + + /** + * Returns the filter used by this transliterator, or <tt>NULL</tt> if this + * transliterator uses no filter. The caller must eventually delete the + * result. After this call, this transliterator's filter is set to + * <tt>NULL</tt>. + * @return the filter used by this transliterator, or <tt>NULL</tt> if this + * transliterator uses no filter. + * @stable ICU 2.4 + */ + UnicodeFilter* orphanFilter(void); + + /** + * Changes the filter used by this transliterator. If the filter + * is set to <tt>null</tt> then no filtering will occur. + * + * <p>Callers must take care if a transliterator is in use by + * multiple threads. The filter should not be changed by one + * thread while another thread may be transliterating. + * @param adoptedFilter the new filter to be adopted. + * @stable ICU 2.0 + */ + void adoptFilter(UnicodeFilter* adoptedFilter); + + /** + * Returns this transliterator's inverse. See the class + * documentation for details. This implementation simply inverts + * the two entities in the ID and attempts to retrieve the + * resulting transliterator. That is, if <code>getID()</code> + * returns "A-B", then this method will return the result of + * <code>createInstance("B-A")</code>, or <code>null</code> if that + * call fails. + * + * <p>Subclasses with knowledge of their inverse may wish to + * override this method. + * + * @param status Output param to filled in with a success or an error. + * @return a transliterator that is an inverse, not necessarily + * exact, of this transliterator, or <code>null</code> if no such + * transliterator is registered. + * @see #registerInstance + * @stable ICU 2.0 + */ + Transliterator* createInverse(UErrorCode& status) const; + + /** + * Returns a <code>Transliterator</code> object given its ID. + * The ID must be either a system transliterator ID or a ID registered + * using <code>registerInstance()</code>. + * + * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> + * @param dir either FORWARD or REVERSE. + * @param parseError Struct to recieve information on position + * of error if an error is encountered + * @param status Output param to filled in with a success or an error. + * @return A <code>Transliterator</code> object with the given ID + * @see #registerInstance + * @see #getAvailableIDs + * @see #getID + * @stable ICU 2.0 + */ + static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID, + UTransDirection dir, + UParseError& parseError, + UErrorCode& status); + + /** + * Returns a <code>Transliterator</code> object given its ID. + * The ID must be either a system transliterator ID or a ID registered + * using <code>registerInstance()</code>. + * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> + * @param dir either FORWARD or REVERSE. + * @param status Output param to filled in with a success or an error. + * @return A <code>Transliterator</code> object with the given ID + * @stable ICU 2.0 + */ + static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID, + UTransDirection dir, + UErrorCode& status); + + /** + * Returns a <code>Transliterator</code> object constructed from + * the given rule string. This will be a RuleBasedTransliterator, + * if the rule string contains only rules, or a + * CompoundTransliterator, if it contains ID blocks, or a + * NullTransliterator, if it contains ID blocks which parse as + * empty for the given direction. + * @param ID the id for the transliterator. + * @param rules rules, separated by ';' + * @param dir either FORWARD or REVERSE. + * @param parseError Struct to recieve information on position + * of error if an error is encountered + * @param status Output param set to success/failure code. + * @stable ICU 2.0 + */ + static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID, + const UnicodeString& rules, + UTransDirection dir, + UParseError& parseError, + UErrorCode& status); + + /** + * Create a rule string that can be passed to createFromRules() + * to recreate this transliterator. + * @param result the string to receive the rules. Previous + * contents will be deleted. + * @param escapeUnprintable if TRUE then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @stable ICU 2.0 + */ + virtual UnicodeString& toRules(UnicodeString& result, + UBool escapeUnprintable) const; + + /** + * Return the number of elements that make up this transliterator. + * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" + * were created, the return value of this method would be 3. + * + * <p>If this transliterator is not composed of other + * transliterators, then this method returns 1. + * @return the number of transliterators that compose this + * transliterator, or 1 if this transliterator is not composed of + * multiple transliterators + * @stable ICU 3.0 + */ + int32_t countElements() const; + + /** + * Return an element that makes up this transliterator. For + * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek" + * were created, the return value of this method would be one + * of the three transliterator objects that make up that + * transliterator: [NFD, Jamo-Latin, Latin-Greek]. + * + * <p>If this transliterator is not composed of other + * transliterators, then this method will return a reference to + * this transliterator when given the index 0. + * @param index a value from 0..countElements()-1 indicating the + * transliterator to return + * @param ec input-output error code + * @return one of the transliterators that makes up this + * transliterator, if this transliterator is made up of multiple + * transliterators, otherwise a reference to this object if given + * an index of 0 + * @stable ICU 3.0 + */ + const Transliterator& getElement(int32_t index, UErrorCode& ec) const; + + /** + * Returns the set of all characters that may be modified in the + * input text by this Transliterator. This incorporates this + * object's current filter; if the filter is changed, the return + * value of this function will change. The default implementation + * returns an empty set. Some subclasses may override {@link + * #handleGetSourceSet } to return a more precise result. The + * return result is approximate in any case and is intended for + * use by tests, tools, or utilities. + * @param result receives result set; previous contents lost + * @return a reference to result + * @see #getTargetSet + * @see #handleGetSourceSet + * @stable ICU 2.4 + */ + UnicodeSet& getSourceSet(UnicodeSet& result) const; + + /** + * Framework method that returns the set of all characters that + * may be modified in the input text by this Transliterator, + * ignoring the effect of this object's filter. The base class + * implementation returns the empty set. Subclasses that wish to + * implement this should override this method. + * @return the set of characters that this transliterator may + * modify. The set may be modified, so subclasses should return a + * newly-created object. + * @param result receives result set; previous contents lost + * @see #getSourceSet + * @see #getTargetSet + * @stable ICU 2.4 + */ + virtual void handleGetSourceSet(UnicodeSet& result) const; + + /** + * Returns the set of all characters that may be generated as + * replacement text by this transliterator. The default + * implementation returns the empty set. Some subclasses may + * override this method to return a more precise result. The + * return result is approximate in any case and is intended for + * use by tests, tools, or utilities requiring such + * meta-information. + * @param result receives result set; previous contents lost + * @return a reference to result + * @see #getTargetSet + * @stable ICU 2.4 + */ + virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; + +public: + + /** + * Registers a factory function that creates transliterators of + * a given ID. + * + * Because ICU may choose to cache Transliterators internally, this must + * be called at application startup, prior to any calls to + * Transliterator::createXXX to avoid undefined behavior. + * + * @param id the ID being registered + * @param factory a function pointer that will be copied and + * called later when the given ID is passed to createInstance() + * @param context a context pointer that will be stored and + * later passed to the factory function when an ID matching + * the registration ID is being instantiated with this factory. + * @stable ICU 2.0 + */ + static void U_EXPORT2 registerFactory(const UnicodeString& id, + Factory factory, + Token context); + + /** + * Registers an instance <tt>obj</tt> of a subclass of + * <code>Transliterator</code> with the system. When + * <tt>createInstance()</tt> is called with an ID string that is + * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is + * returned. + * + * After this call the Transliterator class owns the adoptedObj + * and will delete it. + * + * Because ICU may choose to cache Transliterators internally, this must + * be called at application startup, prior to any calls to + * Transliterator::createXXX to avoid undefined behavior. + * + * @param adoptedObj an instance of subclass of + * <code>Transliterator</code> that defines <tt>clone()</tt> + * @see #createInstance + * @see #registerFactory + * @see #unregister + * @stable ICU 2.0 + */ + static void U_EXPORT2 registerInstance(Transliterator* adoptedObj); + + /** + * Registers an ID string as an alias of another ID string. + * That is, after calling this function, <tt>createInstance(aliasID)</tt> + * will return the same thing as <tt>createInstance(realID)</tt>. + * This is generally used to create shorter, more mnemonic aliases + * for long compound IDs. + * + * @param aliasID The new ID being registered. + * @param realID The ID that the new ID is to be an alias for. + * This can be a compound ID and can include filters and should + * refer to transliterators that have already been registered with + * the framework, although this isn't checked. + * @stable ICU 3.6 + */ + static void U_EXPORT2 registerAlias(const UnicodeString& aliasID, + const UnicodeString& realID); + +protected: + +#ifndef U_HIDE_INTERNAL_API + /** + * @param id the ID being registered + * @param factory a function pointer that will be copied and + * called later when the given ID is passed to createInstance() + * @param context a context pointer that will be stored and + * later passed to the factory function when an ID matching + * the registration ID is being instantiated with this factory. + * @internal + */ + static void _registerFactory(const UnicodeString& id, + Factory factory, + Token context); + + /** + * @internal + */ + static void _registerInstance(Transliterator* adoptedObj); + + /** + * @internal + */ + static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID); + + /** + * Register two targets as being inverses of one another. For + * example, calling registerSpecialInverse("NFC", "NFD", true) causes + * Transliterator to form the following inverse relationships: + * + * <pre>NFC => NFD + * Any-NFC => Any-NFD + * NFD => NFC + * Any-NFD => Any-NFC</pre> + * + * (Without the special inverse registration, the inverse of NFC + * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but + * that the presence or absence of "Any-" is preserved. + * + * <p>The relationship is symmetrical; registering (a, b) is + * equivalent to registering (b, a). + * + * <p>The relevant IDs must still be registered separately as + * factories or classes. + * + * <p>Only the targets are specified. Special inverses always + * have the form Any-Target1 <=> Any-Target2. The target should + * have canonical casing (the casing desired to be produced when + * an inverse is formed) and should contain no whitespace or other + * extraneous characters. + * + * @param target the target against which to register the inverse + * @param inverseTarget the inverse of target, that is + * Any-target.getInverse() => Any-inverseTarget + * @param bidirectional if true, register the reverse relation + * as well, that is, Any-inverseTarget.getInverse() => Any-target + * @internal + */ + static void _registerSpecialInverse(const UnicodeString& target, + const UnicodeString& inverseTarget, + UBool bidirectional); +#endif /* U_HIDE_INTERNAL_API */ + +public: + + /** + * Unregisters a transliterator or class. This may be either + * a system transliterator or a user transliterator or class. + * Any attempt to construct an unregistered transliterator based + * on its ID will fail. + * + * Because ICU may choose to cache Transliterators internally, this should + * be called during application shutdown, after all calls to + * Transliterator::createXXX to avoid undefined behavior. + * + * @param ID the ID of the transliterator or class + * @return the <code>Object</code> that was registered with + * <code>ID</code>, or <code>null</code> if none was + * @see #registerInstance + * @see #registerFactory + * @stable ICU 2.0 + */ + static void U_EXPORT2 unregister(const UnicodeString& ID); + +public: + + /** + * Return a StringEnumeration over the IDs available at the time of the + * call, including user-registered IDs. + * @param ec input-output error code + * @return a newly-created StringEnumeration over the transliterators + * available at the time of the call. The caller should delete this object + * when done using it. + * @stable ICU 3.0 + */ + static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec); + + /** + * Return the number of registered source specifiers. + * @return the number of registered source specifiers. + * @stable ICU 2.0 + */ + static int32_t U_EXPORT2 countAvailableSources(void); + + /** + * Return a registered source specifier. + * @param index which specifier to return, from 0 to n-1, where + * n = countAvailableSources() + * @param result fill-in paramter to receive the source specifier. + * If index is out of range, result will be empty. + * @return reference to result + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index, + UnicodeString& result); + + /** + * Return the number of registered target specifiers for a given + * source specifier. + * @param source the given source specifier. + * @return the number of registered target specifiers for a given + * source specifier. + * @stable ICU 2.0 + */ + static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source); + + /** + * Return a registered target specifier for a given source. + * @param index which specifier to return, from 0 to n-1, where + * n = countAvailableTargets(source) + * @param source the source specifier + * @param result fill-in paramter to receive the target specifier. + * If source is invalid or if index is out of range, result will + * be empty. + * @return reference to result + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index, + const UnicodeString& source, + UnicodeString& result); + + /** + * Return the number of registered variant specifiers for a given + * source-target pair. + * @param source the source specifiers. + * @param target the target specifiers. + * @stable ICU 2.0 + */ + static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source, + const UnicodeString& target); + + /** + * Return a registered variant specifier for a given source-target + * pair. + * @param index which specifier to return, from 0 to n-1, where + * n = countAvailableVariants(source, target) + * @param source the source specifier + * @param target the target specifier + * @param result fill-in paramter to receive the variant + * specifier. If source is invalid or if target is invalid or if + * index is out of range, result will be empty. + * @return reference to result + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index, + const UnicodeString& source, + const UnicodeString& target, + UnicodeString& result); + +protected: + +#ifndef U_HIDE_INTERNAL_API + /** + * Non-mutexed internal method + * @internal + */ + static int32_t _countAvailableSources(void); + + /** + * Non-mutexed internal method + * @internal + */ + static UnicodeString& _getAvailableSource(int32_t index, + UnicodeString& result); + + /** + * Non-mutexed internal method + * @internal + */ + static int32_t _countAvailableTargets(const UnicodeString& source); + + /** + * Non-mutexed internal method + * @internal + */ + static UnicodeString& _getAvailableTarget(int32_t index, + const UnicodeString& source, + UnicodeString& result); + + /** + * Non-mutexed internal method + * @internal + */ + static int32_t _countAvailableVariants(const UnicodeString& source, + const UnicodeString& target); + + /** + * Non-mutexed internal method + * @internal + */ + static UnicodeString& _getAvailableVariant(int32_t index, + const UnicodeString& source, + const UnicodeString& target, + UnicodeString& result); +#endif /* U_HIDE_INTERNAL_API */ + +protected: + + /** + * Set the ID of this transliterators. Subclasses shouldn't do + * this, unless the underlying script behavior has changed. + * @param id the new id t to be set. + * @stable ICU 2.4 + */ + void setID(const UnicodeString& id); + +public: + + /** + * Return the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). + * Note that Transliterator is an abstract base class, and therefor + * no fully constructed object will have a dynamic + * UCLassID that equals the UClassID returned from + * TRansliterator::getStaticClassID(). + * @return The class ID for class Transliterator. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID <b>polymorphically</b>. This method + * is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and + * clone() methods call this method. + * + * <p>Concrete subclasses of Transliterator must use the + * UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from + * uobject.h to provide the RTTI functions. + * + * @return The class ID for this object. All objects of a given + * class have the same class ID. Objects of other classes have + * different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + +private: + static UBool initializeRegistry(UErrorCode &status); + +public: +#ifndef U_HIDE_OBSOLETE_API + /** + * Return the number of IDs currently registered with the system. + * To retrieve the actual IDs, call getAvailableID(i) with + * i from 0 to countAvailableIDs() - 1. + * @return the number of IDs currently registered with the system. + * @obsolete ICU 3.4 use getAvailableIDs() instead + */ + static int32_t U_EXPORT2 countAvailableIDs(void); + + /** + * Return the index-th available ID. index must be between 0 + * and countAvailableIDs() - 1, inclusive. If index is out of + * range, the result of getAvailableID(0) is returned. + * @param index the given ID index. + * @return the index-th available ID. index must be between 0 + * and countAvailableIDs() - 1, inclusive. If index is out of + * range, the result of getAvailableID(0) is returned. + * @obsolete ICU 3.4 use getAvailableIDs() instead; this function + * is not thread safe, since it returns a reference to storage that + * may become invalid if another thread calls unregister + */ + static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index); +#endif /* U_HIDE_OBSOLETE_API */ +}; + +inline int32_t Transliterator::getMaximumContextLength(void) const { + return maximumContextLength; +} + +inline void Transliterator::setID(const UnicodeString& id) { + ID = id; + // NUL-terminate the ID string, which is a non-aliased copy. + ID.append((UChar)0); + ID.truncate(ID.length()-1); +} + +#ifndef U_HIDE_INTERNAL_API +inline Transliterator::Token Transliterator::integerToken(int32_t i) { + Token t; + t.integer = i; + return t; +} + +inline Transliterator::Token Transliterator::pointerToken(void* p) { + Token t; + t.pointer = p; + return t; +} +#endif /* U_HIDE_INTERNAL_API */ + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/tzfmt.h b/intl/icu/source/i18n/unicode/tzfmt.h new file mode 100644 index 000000000..faf4b814f --- /dev/null +++ b/intl/icu/source/i18n/unicode/tzfmt.h @@ -0,0 +1,1097 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2015, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ +#ifndef __TZFMT_H +#define __TZFMT_H + +/** + * \file + * \brief C++ API: TimeZoneFormat + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/format.h" +#include "unicode/timezone.h" +#include "unicode/tznames.h" + +U_CDECL_BEGIN +/** + * Constants for time zone display format style used by format/parse APIs + * in TimeZoneFormat. + * @stable ICU 50 + */ +typedef enum UTimeZoneFormatStyle { + /** + * Generic location format, such as "United States Time (New York)", "Italy Time" + * @stable ICU 50 + */ + UTZFMT_STYLE_GENERIC_LOCATION, + /** + * Generic long non-location format, such as "Eastern Time". + * @stable ICU 50 + */ + UTZFMT_STYLE_GENERIC_LONG, + /** + * Generic short non-location format, such as "ET". + * @stable ICU 50 + */ + UTZFMT_STYLE_GENERIC_SHORT, + /** + * Specific long format, such as "Eastern Standard Time". + * @stable ICU 50 + */ + UTZFMT_STYLE_SPECIFIC_LONG, + /** + * Specific short format, such as "EST", "PDT". + * @stable ICU 50 + */ + UTZFMT_STYLE_SPECIFIC_SHORT, + /** + * Localized GMT offset format, such as "GMT-05:00", "UTC+0100" + * @stable ICU 50 + */ + UTZFMT_STYLE_LOCALIZED_GMT, + /** + * Short localized GMT offset format, such as "GMT-5", "UTC+1:30" + * This style is equivalent to the LDML date format pattern "O". + * @stable ICU 51 + */ + UTZFMT_STYLE_LOCALIZED_GMT_SHORT, + /** + * Short ISO 8601 local time difference (basic format) or the UTC indicator. + * For example, "-05", "+0530", and "Z"(UTC). + * This style is equivalent to the LDML date format pattern "X". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_BASIC_SHORT, + /** + * Short ISO 8601 locale time difference (basic format). + * For example, "-05" and "+0530". + * This style is equivalent to the LDML date format pattern "x". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, + /** + * Fixed width ISO 8601 local time difference (basic format) or the UTC indicator. + * For example, "-0500", "+0530", and "Z"(UTC). + * This style is equivalent to the LDML date format pattern "XX". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_BASIC_FIXED, + /** + * Fixed width ISO 8601 local time difference (basic format). + * For example, "-0500" and "+0530". + * This style is equivalent to the LDML date format pattern "xx". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, + /** + * ISO 8601 local time difference (basic format) with optional seconds field, or the UTC indicator. + * For example, "-0500", "+052538", and "Z"(UTC). + * This style is equivalent to the LDML date format pattern "XXXX". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_BASIC_FULL, + /** + * ISO 8601 local time difference (basic format) with optional seconds field. + * For example, "-0500" and "+052538". + * This style is equivalent to the LDML date format pattern "xxxx". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, + /** + * Fixed width ISO 8601 local time difference (extended format) or the UTC indicator. + * For example, "-05:00", "+05:30", and "Z"(UTC). + * This style is equivalent to the LDML date format pattern "XXX". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_EXTENDED_FIXED, + /** + * Fixed width ISO 8601 local time difference (extended format). + * For example, "-05:00" and "+05:30". + * This style is equivalent to the LDML date format pattern "xxx" and "ZZZZZ". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, + /** + * ISO 8601 local time difference (extended format) with optional seconds field, or the UTC indicator. + * For example, "-05:00", "+05:25:38", and "Z"(UTC). + * This style is equivalent to the LDML date format pattern "XXXXX". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_EXTENDED_FULL, + /** + * ISO 8601 local time difference (extended format) with optional seconds field. + * For example, "-05:00" and "+05:25:38". + * This style is equivalent to the LDML date format pattern "xxxxx". + * @stable ICU 51 + */ + UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, + /** + * Time Zone ID, such as "America/Los_Angeles". + * @stable ICU 51 + */ + UTZFMT_STYLE_ZONE_ID, + /** + * Short Time Zone ID (BCP 47 Unicode location extension, time zone type value), such as "uslax". + * @stable ICU 51 + */ + UTZFMT_STYLE_ZONE_ID_SHORT, + /** + * Exemplar location, such as "Los Angeles" and "Paris". + * @stable ICU 51 + */ + UTZFMT_STYLE_EXEMPLAR_LOCATION +} UTimeZoneFormatStyle; + +/** + * Constants for GMT offset pattern types. + * @stable ICU 50 + */ +typedef enum UTimeZoneFormatGMTOffsetPatternType { + /** + * Positive offset with hours and minutes fields + * @stable ICU 50 + */ + UTZFMT_PAT_POSITIVE_HM, + /** + * Positive offset with hours, minutes and seconds fields + * @stable ICU 50 + */ + UTZFMT_PAT_POSITIVE_HMS, + /** + * Negative offset with hours and minutes fields + * @stable ICU 50 + */ + UTZFMT_PAT_NEGATIVE_HM, + /** + * Negative offset with hours, minutes and seconds fields + * @stable ICU 50 + */ + UTZFMT_PAT_NEGATIVE_HMS, + /** + * Positive offset with hours field + * @stable ICU 51 + */ + UTZFMT_PAT_POSITIVE_H, + /** + * Negative offset with hours field + * @stable ICU 51 + */ + UTZFMT_PAT_NEGATIVE_H, + + /* The following cannot be #ifndef U_HIDE_INTERNAL_API, needed for other .h declarations */ + /** + * Number of UTimeZoneFormatGMTOffsetPatternType types. + * @internal + */ + UTZFMT_PAT_COUNT = 6 +} UTimeZoneFormatGMTOffsetPatternType; + +/** + * Constants for time types used by TimeZoneFormat APIs for + * receiving time type (standard time, daylight time or unknown). + * @stable ICU 50 + */ +typedef enum UTimeZoneFormatTimeType { + /** + * Unknown + * @stable ICU 50 + */ + UTZFMT_TIME_TYPE_UNKNOWN, + /** + * Standard time + * @stable ICU 50 + */ + UTZFMT_TIME_TYPE_STANDARD, + /** + * Daylight saving time + * @stable ICU 50 + */ + UTZFMT_TIME_TYPE_DAYLIGHT +} UTimeZoneFormatTimeType; + +/** + * Constants for parse option flags, used for specifying optional parse behavior. + * @stable ICU 50 + */ +typedef enum UTimeZoneFormatParseOption { + /** + * No option. + * @stable ICU 50 + */ + UTZFMT_PARSE_OPTION_NONE = 0x00, + /** + * When a time zone display name is not found within a set of display names + * used for the specified style, look for the name from display names used + * by other styles. + * @stable ICU 50 + */ + UTZFMT_PARSE_OPTION_ALL_STYLES = 0x01, + /** + * When parsing a time zone display name in UTZFMT_STYLE_SPECIFIC_SHORT, + * look for the IANA tz database compatible zone abbreviations in addition + * to the localized names coming from the {@link TimeZoneNames} currently + * used by the {@link TimeZoneFormat}. + * @stable ICU 54 + */ + UTZFMT_PARSE_OPTION_TZ_DATABASE_ABBREVIATIONS = 0x02 +} UTimeZoneFormatParseOption; + +U_CDECL_END + +U_NAMESPACE_BEGIN + +class TimeZoneGenericNames; +class TZDBTimeZoneNames; +class UVector; + +/** + * <code>TimeZoneFormat</code> supports time zone display name formatting and parsing. + * An instance of TimeZoneFormat works as a subformatter of {@link SimpleDateFormat}, + * but you can also directly get a new instance of <code>TimeZoneFormat</code> and + * formatting/parsing time zone display names. + * <p> + * ICU implements the time zone display names defined by <a href="http://www.unicode.org/reports/tr35/">UTS#35 + * Unicode Locale Data Markup Language (LDML)</a>. {@link TimeZoneNames} represents the + * time zone display name data model and this class implements the algorithm for actual + * formatting and parsing. + * + * @see SimpleDateFormat + * @see TimeZoneNames + * @stable ICU 50 + */ +class U_I18N_API TimeZoneFormat : public Format { +public: + /** + * Copy constructor. + * @stable ICU 50 + */ + TimeZoneFormat(const TimeZoneFormat& other); + + /** + * Destructor. + * @stable ICU 50 + */ + virtual ~TimeZoneFormat(); + + /** + * Assignment operator. + * @stable ICU 50 + */ + TimeZoneFormat& operator=(const TimeZoneFormat& other); + + /** + * Return true if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * @param other The object to be compared with. + * @return Return TRUE if the given Format objects are semantically equal. + * Objects of different subclasses are considered unequal. + * @stable ICU 50 + */ + virtual UBool operator==(const Format& other) const; + + /** + * Clone this object polymorphically. The caller is responsible + * for deleting the result when done. + * @return A copy of the object + * @stable ICU 50 + */ + virtual Format* clone() const; + + /** + * Creates an instance of <code>TimeZoneFormat</code> for the given locale. + * @param locale The locale. + * @param status Receives the status. + * @return An instance of <code>TimeZoneFormat</code> for the given locale, + * owned by the caller. + * @stable ICU 50 + */ + static TimeZoneFormat* U_EXPORT2 createInstance(const Locale& locale, UErrorCode& status); + + /** + * Returns the time zone display name data used by this instance. + * @return The time zone display name data. + * @stable ICU 50 + */ + const TimeZoneNames* getTimeZoneNames() const; + + /** + * Sets the time zone display name data to this format instnace. + * The caller should not delete the TimeZoenNames object after it is adopted + * by this call. + * @param tznames TimeZoneNames object to be adopted. + * @stable ICU 50 + */ + void adoptTimeZoneNames(TimeZoneNames *tznames); + + /** + * Sets the time zone display name data to this format instnace. + * @param tznames TimeZoneNames object to be set. + * @stable ICU 50 + */ + void setTimeZoneNames(const TimeZoneNames &tznames); + + /** + * Returns the localized GMT format pattern. + * @param pattern Receives the localized GMT format pattern. + * @return A reference to the result pattern. + * @see #setGMTPattern + * @stable ICU 50 + */ + UnicodeString& getGMTPattern(UnicodeString& pattern) const; + + /** + * Sets the localized GMT format pattern. The pattern must contain + * a single argument {0}, for example "GMT {0}". + * @param pattern The localized GMT format pattern to be used by this object. + * @param status Recieves the status. + * @see #getGMTPattern + * @stable ICU 50 + */ + void setGMTPattern(const UnicodeString& pattern, UErrorCode& status); + + /** + * Returns the offset pattern used for localized GMT format. + * @param type The offset pattern type enum. + * @param pattern Receives the offset pattern. + * @return A reference to the result pattern. + * @see #setGMTOffsetPattern + * @stable ICU 50 + */ + UnicodeString& getGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type, UnicodeString& pattern) const; + + /** + * Sets the offset pattern for the given offset type. + * @param type The offset pattern type enum. + * @param pattern The offset pattern used for localized GMT format for the type. + * @param status Receives the status. + * @see #getGMTOffsetPattern + * @stable ICU 50 + */ + void setGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type, const UnicodeString& pattern, UErrorCode& status); + + /** + * Returns the decimal digit characters used for localized GMT format. + * The return string contains exactly 10 code points (may include Unicode + * supplementary character) representing digit 0 to digit 9 in the ascending + * order. + * @param digits Receives the decimal digits used for localized GMT format. + * @see #setGMTOffsetDigits + * @stable ICU 50 + */ + UnicodeString& getGMTOffsetDigits(UnicodeString& digits) const; + + /** + * Sets the decimal digit characters used for localized GMT format. + * The input <code>digits</code> must contain exactly 10 code points + * (Unicode supplementary characters are also allowed) representing + * digit 0 to digit 9 in the ascending order. When the input <code>digits</code> + * does not satisfy the condition, <code>U_ILLEGAL_ARGUMENT_ERROR</code> + * will be set to the return status. + * @param digits The decimal digits used for localized GMT format. + * @param status Receives the status. + * @see #getGMTOffsetDigits + * @stable ICU 50 + */ + void setGMTOffsetDigits(const UnicodeString& digits, UErrorCode& status); + + /** + * Returns the localized GMT format string for GMT(UTC) itself (GMT offset is 0). + * @param gmtZeroFormat Receives the localized GMT string string for GMT(UTC) itself. + * @return A reference to the result GMT string. + * @see #setGMTZeroFormat + * @stable ICU 50 + */ + UnicodeString& getGMTZeroFormat(UnicodeString& gmtZeroFormat) const; + + /** + * Sets the localized GMT format string for GMT(UTC) itself (GMT offset is 0). + * @param gmtZeroFormat The localized GMT format string for GMT(UTC). + * @param status Receives the status. + * @see #getGMTZeroFormat + * @stable ICU 50 + */ + void setGMTZeroFormat(const UnicodeString& gmtZeroFormat, UErrorCode& status); + + /** + * Returns the bitwise flags of UTimeZoneFormatParseOption representing the default parse + * options used by this object. + * @return the default parse options. + * @see ParseOption + * @stable ICU 50 + */ + uint32_t getDefaultParseOptions(void) const; + + /** + * Sets the default parse options. + * <p><b>Note</b>: By default, an instance of <code>TimeZoneFormat</code> + * created by {@link #createInstance} has no parse options set (UTZFMT_PARSE_OPTION_NONE). + * To specify multipe options, use bitwise flags of UTimeZoneFormatParseOption. + * @see #UTimeZoneFormatParseOption + * @stable ICU 50 + */ + void setDefaultParseOptions(uint32_t flags); + + /** + * Returns the ISO 8601 basic time zone string for the given offset. + * For example, "-08", "-0830" and "Z" + * + * @param offset the offset from GMT(UTC) in milliseconds. + * @param useUtcIndicator true if ISO 8601 UTC indicator "Z" is used when the offset is 0. + * @param isShort true if shortest form is used. + * @param ignoreSeconds true if non-zero offset seconds is appended. + * @param result Receives the ISO format string. + * @param status Receives the status + * @return the ISO 8601 basic format. + * @see #formatOffsetISO8601Extended + * @see #parseOffsetISO8601 + * @stable ICU 51 + */ + UnicodeString& formatOffsetISO8601Basic(int32_t offset, UBool useUtcIndicator, UBool isShort, UBool ignoreSeconds, + UnicodeString& result, UErrorCode& status) const; + + /** + * Returns the ISO 8601 extended time zone string for the given offset. + * For example, "-08:00", "-08:30" and "Z" + * + * @param offset the offset from GMT(UTC) in milliseconds. + * @param useUtcIndicator true if ISO 8601 UTC indicator "Z" is used when the offset is 0. + * @param isShort true if shortest form is used. + * @param ignoreSeconds true if non-zero offset seconds is appended. + * @param result Receives the ISO format string. + * @param status Receives the status + * @return the ISO 8601 basic format. + * @see #formatOffsetISO8601Extended + * @see #parseOffsetISO8601 + * @stable ICU 51 + */ + UnicodeString& formatOffsetISO8601Extended(int32_t offset, UBool useUtcIndicator, UBool isShort, UBool ignoreSeconds, + UnicodeString& result, UErrorCode& status) const; + + /** + * Returns the localized GMT(UTC) offset format for the given offset. + * The localized GMT offset is defined by; + * <ul> + * <li>GMT format pattern (e.g. "GMT {0}" - see {@link #getGMTPattern}) + * <li>Offset time pattern (e.g. "+HH:mm" - see {@link #getGMTOffsetPattern}) + * <li>Offset digits (e.g. "0123456789" - see {@link #getGMTOffsetDigits}) + * <li>GMT zero format (e.g. "GMT" - see {@link #getGMTZeroFormat}) + * </ul> + * This format always uses 2 digit hours and minutes. When the given offset has non-zero + * seconds, 2 digit seconds field will be appended. For example, + * GMT+05:00 and GMT+05:28:06. + * @param offset the offset from GMT(UTC) in milliseconds. + * @param status Receives the status + * @param result Receives the localized GMT format string. + * @return A reference to the result. + * @see #parseOffsetLocalizedGMT + * @stable ICU 50 + */ + UnicodeString& formatOffsetLocalizedGMT(int32_t offset, UnicodeString& result, UErrorCode& status) const; + + /** + * Returns the short localized GMT(UTC) offset format for the given offset. + * The short localized GMT offset is defined by; + * <ul> + * <li>GMT format pattern (e.g. "GMT {0}" - see {@link #getGMTPattern}) + * <li>Offset time pattern (e.g. "+HH:mm" - see {@link #getGMTOffsetPattern}) + * <li>Offset digits (e.g. "0123456789" - see {@link #getGMTOffsetDigits}) + * <li>GMT zero format (e.g. "GMT" - see {@link #getGMTZeroFormat}) + * </ul> + * This format uses the shortest representation of offset. The hours field does not + * have leading zero and lower fields with zero will be truncated. For example, + * GMT+5 and GMT+530. + * @param offset the offset from GMT(UTC) in milliseconds. + * @param status Receives the status + * @param result Receives the short localized GMT format string. + * @return A reference to the result. + * @see #parseOffsetShortLocalizedGMT + * @stable ICU 51 + */ + UnicodeString& formatOffsetShortLocalizedGMT(int32_t offset, UnicodeString& result, UErrorCode& status) const; + + using Format::format; + + /** + * Returns the display name of the time zone at the given date for the style. + * @param style The style (e.g. <code>UTZFMT_STYLE_GENERIC_LONG</code>, <code>UTZFMT_STYLE_LOCALIZED_GMT</code>...) + * @param tz The time zone. + * @param date The date. + * @param name Receives the display name. + * @param timeType the output argument for receiving the time type (standard/daylight/unknown) + * used for the display name, or NULL if the information is not necessary. + * @return A reference to the result + * @see #UTimeZoneFormatStyle + * @see #UTimeZoneFormatTimeType + * @stable ICU 50 + */ + virtual UnicodeString& format(UTimeZoneFormatStyle style, const TimeZone& tz, UDate date, + UnicodeString& name, UTimeZoneFormatTimeType* timeType = NULL) const; + + /** + * Returns offset from GMT(UTC) in milliseconds for the given ISO 8601 + * style time zone string. When the given string is not an ISO 8601 time zone + * string, this method sets the current position as the error index + * to <code>ParsePosition pos</code> and returns 0. + * @param text The text contains ISO8601 style time zone string (e.g. "-08:00", "Z") + * at the position. + * @param pos The ParsePosition object. + * @return The offset from GMT(UTC) in milliseconds for the given ISO 8601 style + * time zone string. + * @see #formatOffsetISO8601Basic + * @see #formatOffsetISO8601Extended + * @stable ICU 50 + */ + int32_t parseOffsetISO8601(const UnicodeString& text, ParsePosition& pos) const; + + /** + * Returns offset from GMT(UTC) in milliseconds for the given localized GMT + * offset format string. When the given string cannot be parsed, this method + * sets the current position as the error index to <code>ParsePosition pos</code> + * and returns 0. + * @param text The text contains a localized GMT offset string at the position. + * @param pos The ParsePosition object. + * @return The offset from GMT(UTC) in milliseconds for the given localized GMT + * offset format string. + * @see #formatOffsetLocalizedGMT + * @stable ICU 50 + */ + int32_t parseOffsetLocalizedGMT(const UnicodeString& text, ParsePosition& pos) const; + + /** + * Returns offset from GMT(UTC) in milliseconds for the given short localized GMT + * offset format string. When the given string cannot be parsed, this method + * sets the current position as the error index to <code>ParsePosition pos</code> + * and returns 0. + * @param text The text contains a short localized GMT offset string at the position. + * @param pos The ParsePosition object. + * @return The offset from GMT(UTC) in milliseconds for the given short localized GMT + * offset format string. + * @see #formatOffsetShortLocalizedGMT + * @stable ICU 51 + */ + int32_t parseOffsetShortLocalizedGMT(const UnicodeString& text, ParsePosition& pos) const; + + /** + * Returns a <code>TimeZone</code> by parsing the time zone string according to + * the given parse position, the specified format style and parse options. + * + * @param text The text contains a time zone string at the position. + * @param style The format style + * @param pos The position. + * @param parseOptions The parse options repesented by bitwise flags of UTimeZoneFormatParseOption. + * @param timeType The output argument for receiving the time type (standard/daylight/unknown), + * or NULL if the information is not necessary. + * @return A <code>TimeZone</code>, or null if the input could not be parsed. + * @see UTimeZoneFormatStyle + * @see UTimeZoneFormatParseOption + * @see UTimeZoneFormatTimeType + * @stable ICU 50 + */ + virtual TimeZone* parse(UTimeZoneFormatStyle style, const UnicodeString& text, ParsePosition& pos, + int32_t parseOptions, UTimeZoneFormatTimeType* timeType = NULL) const; + + /** + * Returns a <code>TimeZone</code> by parsing the time zone string according to + * the given parse position, the specified format style and the default parse options. + * + * @param text The text contains a time zone string at the position. + * @param style The format style + * @param pos The position. + * @param timeType The output argument for receiving the time type (standard/daylight/unknown), + * or NULL if the information is not necessary. + * @return A <code>TimeZone</code>, or null if the input could not be parsed. + * @see UTimeZoneFormatStyle + * @see UTimeZoneFormatParseOption + * @see UTimeZoneFormatTimeType + * @stable ICU 50 + */ + TimeZone* parse(UTimeZoneFormatStyle style, const UnicodeString& text, ParsePosition& pos, + UTimeZoneFormatTimeType* timeType = NULL) const; + + /* ---------------------------------------------- + * Format APIs + * ---------------------------------------------- */ + + /** + * Format an object to produce a time zone display string using localized GMT offset format. + * This method handles Formattable objects with a <code>TimeZone</code>. If a the Formattable + * object type is not a <code>TimeZone</code>, then it returns a failing UErrorCode. + * @param obj The object to format. Must be a <code>TimeZone</code>. + * @param appendTo Output parameter to receive result. Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @stable ICU 50 + */ + virtual UnicodeString& format(const Formattable& obj, UnicodeString& appendTo, + FieldPosition& pos, UErrorCode& status) const; + + /** + * Parse a string to produce an object. This methods handles parsing of + * time zone display strings into Formattable objects with <code>TimeZone</code>. + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Upon return this param is set to the position after the + * last character successfully parsed. If the source is not parsed successfully, this param + * will remain unchanged. + * @return A newly created Formattable* object, or NULL on failure. The caller owns this and should + * delete it when done. + * @stable ICU 50 + */ + virtual void parseObject(const UnicodeString& source, Formattable& result, ParsePosition& parse_pos) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * @stable ICU 50 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * @stable ICU 50 + */ + virtual UClassID getDynamicClassID() const; + +protected: + /** + * Constructs a TimeZoneFormat object for the specified locale. + * @param locale the locale + * @param status receives the status. + * @stable ICU 50 + */ + TimeZoneFormat(const Locale& locale, UErrorCode& status); + +private: + /* Locale of this object */ + Locale fLocale; + + /* Stores the region (could be implicit default) */ + char fTargetRegion[ULOC_COUNTRY_CAPACITY]; + + /* TimeZoneNames object used by this formatter */ + TimeZoneNames* fTimeZoneNames; + + /* TimeZoneGenericNames object used by this formatter - lazily instantiated */ + TimeZoneGenericNames* fTimeZoneGenericNames; + + /* Localized GMT format pattern - e.g. "GMT{0}" */ + UnicodeString fGMTPattern; + + /* Array of offset patterns used by Localized GMT format - e.g. "+HH:mm" */ + UnicodeString fGMTOffsetPatterns[UTZFMT_PAT_COUNT]; + + /* Localized decimal digits used by Localized GMT format */ + UChar32 fGMTOffsetDigits[10]; + + /* Localized GMT zero format - e.g. "GMT" */ + UnicodeString fGMTZeroFormat; + + /* Bit flags representing parse options */ + uint32_t fDefParseOptionFlags; + + /* Constant parts of GMT format pattern, populated from localized GMT format pattern*/ + UnicodeString fGMTPatternPrefix; /* Substring before {0} */ + UnicodeString fGMTPatternSuffix; /* Substring after {0} */ + + /* Compiled offset patterns generated from fGMTOffsetPatterns[] */ + UVector* fGMTOffsetPatternItems[UTZFMT_PAT_COUNT]; + + UBool fAbuttingOffsetHoursAndMinutes; + + /* TZDBTimeZoneNames object used for parsing */ + TZDBTimeZoneNames* fTZDBTimeZoneNames; + + /** + * Returns the time zone's specific format string. + * @param tz the time zone + * @param stdType the name type used for standard time + * @param dstType the name type used for daylight time + * @param date the date + * @param name receives the time zone's specific format name string + * @param timeType when null, actual time type is set + * @return a reference to name. + */ + UnicodeString& formatSpecific(const TimeZone& tz, UTimeZoneNameType stdType, UTimeZoneNameType dstType, + UDate date, UnicodeString& name, UTimeZoneFormatTimeType *timeType) const; + + /** + * Returns the time zone's generic format string. + * @param tz the time zone + * @param genType the generic name type + * @param date the date + * @param name receives the time zone's generic format name string + * @return a reference to name. + */ + UnicodeString& formatGeneric(const TimeZone& tz, int32_t genType, UDate date, UnicodeString& name) const; + + /** + * Lazily create a TimeZoneGenericNames instance + * @param status receives the status + * @return the cached TimeZoneGenericNames. + */ + const TimeZoneGenericNames* getTimeZoneGenericNames(UErrorCode& status) const; + + /** + * Lazily create a TZDBTimeZoneNames instance + * @param status receives the status + * @return the cached TZDBTimeZoneNames. + */ + const TZDBTimeZoneNames* getTZDBTimeZoneNames(UErrorCode& status) const; + + /** + * Private method returning the time zone's exemplar location string. + * This method will never return empty. + * @param tz the time zone + * @param name receives the time zone's exemplar location name + * @return a reference to name. + */ + UnicodeString& formatExemplarLocation(const TimeZone& tz, UnicodeString& name) const; + + /** + * Private enum specifying a combination of offset fields + */ + enum OffsetFields { + FIELDS_H, + FIELDS_HM, + FIELDS_HMS + }; + + /** + * Parses the localized GMT pattern string and initialize + * localized gmt pattern fields. + * @param gmtPattern the localized GMT pattern string such as "GMT {0}" + * @param status U_ILLEGAL_ARGUMENT_ERROR is set when the specified pattern does not + * contain an argument "{0}". + */ + void initGMTPattern(const UnicodeString& gmtPattern, UErrorCode& status); + + /** + * Parse the GMT offset pattern into runtime optimized format. + * @param pattern the offset pattern string + * @param required the required set of fields, such as FIELDS_HM + * @param status U_ILLEGAL_ARGUMENT is set when the specified pattern does not contain + * pattern letters for the required fields. + * @return A list of GMTOffsetField objects, or NULL on error. + */ + static UVector* parseOffsetPattern(const UnicodeString& pattern, OffsetFields required, UErrorCode& status); + + /** + * Appends seconds field to the offset pattern with hour/minute + * Note: This code will be obsoleted once we add hour-minute-second pattern data in CLDR. + * @param offsetHM the offset pattern including hours and minutes fields + * @param result the output offset pattern including hour, minute and seconds fields + * @param status receives the status + * @return a reference to result + */ + static UnicodeString& expandOffsetPattern(const UnicodeString& offsetHM, UnicodeString& result, UErrorCode& status); + + /** + * Truncates minutes field to the offset pattern with hour/minute + * Note: This code will be obsoleted once we add hour pattern data in CLDR. + * @param offsetHM the offset pattern including hours and minutes fields + * @param result the output offset pattern including only hours field + * @param status receives the status + * @return a reference to result + */ + static UnicodeString& truncateOffsetPattern(const UnicodeString& offsetHM, UnicodeString& result, UErrorCode& status); + + /** + * Break input string into UChar32[]. Each array element represents + * a code point. This method is used for parsing localized digit + * characters and support characters in Unicode supplemental planes. + * @param str the string + * @param codeArray receives the result + * @param capacity the capacity of codeArray + * @return TRUE when the specified code array is fully filled with code points + * (no under/overflow). + */ + static UBool toCodePoints(const UnicodeString& str, UChar32* codeArray, int32_t capacity); + + /** + * Private method supprting all of ISO8601 formats + * @param offset the offset from GMT(UTC) in milliseconds. + * @param useUtcIndicator true if ISO 8601 UTC indicator "Z" is used when the offset is 0. + * @param isShort true if shortest form is used. + * @param ignoreSeconds true if non-zero offset seconds is appended. + * @param result Receives the result + * @param status Receives the status + * @return the ISO 8601 basic format. + */ + UnicodeString& formatOffsetISO8601(int32_t offset, UBool isBasic, UBool useUtcIndicator, + UBool isShort, UBool ignoreSeconds, UnicodeString& result, UErrorCode& status) const; + + /** + * Private method used for localized GMT formatting. + * @param offset the zone's UTC offset + * @param isShort true if the short localized GMT format is desired. + * @param result receives the localized GMT format string + * @param status receives the status + */ + UnicodeString& formatOffsetLocalizedGMT(int32_t offset, UBool isShort, UnicodeString& result, UErrorCode& status) const; + + /** + * Returns offset from GMT(UTC) in milliseconds for the given ISO 8601 style + * (extended format) time zone string. When the given string is not an ISO 8601 time + * zone string, this method sets the current position as the error index + * to <code>ParsePosition pos</code> and returns 0. + * @param text the text contains ISO 8601 style time zone string (e.g. "-08:00", "Z") + * at the position. + * @param pos the position, non-negative error index will be set on failure. + * @param extendedOnly TRUE if parsing the text as ISO 8601 extended offset format (e.g. "-08:00"), + * or FALSE to evaluate the text as basic format. + * @param hasDigitOffset receiving if the parsed zone string contains offset digits. + * @return the offset from GMT(UTC) in milliseconds for the given ISO 8601 style + * time zone string. + */ + int32_t parseOffsetISO8601(const UnicodeString& text, ParsePosition& pos, UBool extendedOnly, + UBool* hasDigitOffset = NULL) const; + + /** + * Appends localized digits to the buffer. + * This code assumes that the input number is 0 - 59 + * @param buf the target buffer + * @param n the integer number + * @param minDigits the minimum digits width + */ + void appendOffsetDigits(UnicodeString& buf, int32_t n, uint8_t minDigits) const; + + /** + * Returns offset from GMT(UTC) in milliseconds for the given localized GMT + * offset format string. When the given string cannot be parsed, this method + * sets the current position as the error index to <code>ParsePosition pos</code> + * and returns 0. + * @param text the text contains a localized GMT offset string at the position. + * @param pos the position, non-negative error index will be set on failure. + * @param isShort true if this parser to try the short format first + * @param hasDigitOffset receiving if the parsed zone string contains offset digits. + * @return the offset from GMT(UTC) in milliseconds for the given localized GMT + * offset format string. + */ + int32_t parseOffsetLocalizedGMT(const UnicodeString& text, ParsePosition& pos, + UBool isShort, UBool* hasDigitOffset) const; + + /** + * Parse localized GMT format generated by the patter used by this formatter, except + * GMT Zero format. + * @param text the input text + * @param start the start index + * @param isShort true if the short localized format is parsed. + * @param parsedLen receives the parsed length + * @return the parsed offset in milliseconds + */ + int32_t parseOffsetLocalizedGMTPattern(const UnicodeString& text, int32_t start, + UBool isShort, int32_t& parsedLen) const; + + /** + * Parses localized GMT offset fields into offset. + * @param text the input text + * @param start the start index + * @param isShort true if this is a short format - currently not used + * @param parsedLen the parsed length, or 0 on failure. + * @return the parsed offset in milliseconds. + */ + int32_t parseOffsetFields(const UnicodeString& text, int32_t start, UBool isShort, int32_t& parsedLen) const; + + /** + * Parse localized GMT offset fields with the given pattern. + * @param text the input text + * @param start the start index + * @param pattenItems the pattern (already itemized) + * @param forceSingleHourDigit true if hours field is parsed as a single digit + * @param hour receives the hour offset field + * @param min receives the minute offset field + * @param sec receives the second offset field + * @return the parsed length + */ + int32_t parseOffsetFieldsWithPattern(const UnicodeString& text, int32_t start, + UVector* patternItems, UBool forceSingleHourDigit, int32_t& hour, int32_t& min, int32_t& sec) const; + + /** + * Parses abutting localized GMT offset fields (such as 0800) into offset. + * @param text the input text + * @param start the start index + * @param parsedLen the parsed length, or 0 on failure + * @return the parsed offset in milliseconds. + */ + int32_t parseAbuttingOffsetFields(const UnicodeString& text, int32_t start, int32_t& parsedLen) const; + + /** + * Parses the input text using the default format patterns (e.g. "UTC{0}"). + * @param text the input text + * @param start the start index + * @param parsedLen the parsed length, or 0 on failure + * @return the parsed offset in milliseconds. + */ + int32_t parseOffsetDefaultLocalizedGMT(const UnicodeString& text, int start, int32_t& parsedLen) const; + + /** + * Parses the input GMT offset fields with the default offset pattern. + * @param text the input text + * @param start the start index + * @param separator the separator character, e.g. ':' + * @param parsedLen the parsed length, or 0 on failure. + * @return the parsed offset in milliseconds. + */ + int32_t parseDefaultOffsetFields(const UnicodeString& text, int32_t start, UChar separator, + int32_t& parsedLen) const; + + /** + * Reads an offset field value. This method will stop parsing when + * 1) number of digits reaches <code>maxDigits</code> + * 2) just before already parsed number exceeds <code>maxVal</code> + * + * @param text the text + * @param start the start offset + * @param minDigits the minimum number of required digits + * @param maxDigits the maximum number of digits + * @param minVal the minimum value + * @param maxVal the maximum value + * @param parsedLen the actual parsed length. + * @return the integer value parsed + */ + int32_t parseOffsetFieldWithLocalizedDigits(const UnicodeString& text, int32_t start, + uint8_t minDigits, uint8_t maxDigits, uint16_t minVal, uint16_t maxVal, int32_t& parsedLen) const; + + /** + * Reads a single decimal digit, either localized digits used by this object + * or any Unicode numeric character. + * @param text the text + * @param start the start index + * @param len the actual length read from the text + * the start index is not a decimal number. + * @return the integer value of the parsed digit, or -1 on failure. + */ + int32_t parseSingleLocalizedDigit(const UnicodeString& text, int32_t start, int32_t& len) const; + + /** + * Formats offset using ASCII digits. The input offset range must be + * within +/-24 hours (exclusive). + * @param offset The offset + * @param sep The field separator character or 0 if not required + * @param minFields The minimum fields + * @param maxFields The maximum fields + * @return The offset string + */ + static UnicodeString& formatOffsetWithAsciiDigits(int32_t offset, UChar sep, + OffsetFields minFields, OffsetFields maxFields, UnicodeString& result); + + /** + * Parses offset represented by contiguous ASCII digits. + * <p> + * Note: This method expects the input position is already at the start of + * ASCII digits and does not parse sign (+/-). + * @param text The text contains a sequence of ASCII digits + * @param pos The parse position + * @param minFields The minimum Fields to be parsed + * @param maxFields The maximum Fields to be parsed + * @param fixedHourWidth true if hours field must be width of 2 + * @return Parsed offset, 0 or positive number. + */ + static int32_t parseAbuttingAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, + OffsetFields minFields, OffsetFields maxFields, UBool fixedHourWidth); + + /** + * Parses offset represented by ASCII digits and separators. + * <p> + * Note: This method expects the input position is already at the start of + * ASCII digits and does not parse sign (+/-). + * @param text The text + * @param pos The parse position + * @param sep The separator character + * @param minFields The minimum Fields to be parsed + * @param maxFields The maximum Fields to be parsed + * @return Parsed offset, 0 or positive number. + */ + static int32_t parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, UChar sep, + OffsetFields minFields, OffsetFields maxFields); + + /** + * Unquotes the message format style pattern. + * @param pattern the pattern + * @param result receive the unquoted pattern. + * @return A reference to result. + */ + static UnicodeString& unquote(const UnicodeString& pattern, UnicodeString& result); + + /** + * Initialize localized GMT format offset hour/min/sec patterns. + * This method parses patterns into optimized run-time format. + * @param status receives the status. + */ + void initGMTOffsetPatterns(UErrorCode& status); + + /** + * Check if there are any GMT format offset patterns without + * any separators between hours field and minutes field and update + * fAbuttingOffsetHoursAndMinutes field. This method must be called + * after all patterns are parsed into pattern items. + */ + void checkAbuttingHoursAndMinutes(); + + /** + * Creates an instance of TimeZone for the given offset + * @param offset the offset + * @return A TimeZone with the given offset + */ + TimeZone* createTimeZoneForOffset(int32_t offset) const; + + /** + * Returns the time type for the given name type + * @param nameType the name type + * @return the time type (unknown/standard/daylight) + */ + static UTimeZoneFormatTimeType getTimeType(UTimeZoneNameType nameType); + + /** + * Returns the time zone ID of a match at the specified index within + * the MatchInfoCollection. + * @param matches the collection of matches + * @param idx the index withing matches + * @param tzID receives the resolved time zone ID + * @return a reference to tzID. + */ + UnicodeString& getTimeZoneID(const TimeZoneNames::MatchInfoCollection* matches, int32_t idx, UnicodeString& tzID) const; + + + /** + * Parse a zone ID. + * @param text the text contains a time zone ID string at the position. + * @param pos the position + * @param tzID receives the zone ID + * @return a reference to tzID + */ + UnicodeString& parseZoneID(const UnicodeString& text, ParsePosition& pos, UnicodeString& tzID) const; + + /** + * Parse a short zone ID. + * @param text the text contains a short time zone ID string at the position. + * @param pos the position + * @param tzID receives the short zone ID + * @return a reference to tzID + */ + UnicodeString& parseShortZoneID(const UnicodeString& text, ParsePosition& pos, UnicodeString& tzID) const; + + /** + * Parse an exemplar location string. + * @param text the text contains an exemplar location string at the position. + * @param pos the position. + * @param tzID receives the time zone ID + * @return a reference to tzID + */ + UnicodeString& parseExemplarLocation(const UnicodeString& text, ParsePosition& pos, UnicodeString& tzID) const; +}; + +U_NAMESPACE_END + +#endif /* !UCONFIG_NO_FORMATTING */ +#endif diff --git a/intl/icu/source/i18n/unicode/tznames.h b/intl/icu/source/i18n/unicode/tznames.h new file mode 100644 index 000000000..8861a7d02 --- /dev/null +++ b/intl/icu/source/i18n/unicode/tznames.h @@ -0,0 +1,416 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ +#ifndef __TZNAMES_H +#define __TZNAMES_H + +/** + * \file + * \brief C++ API: TimeZoneNames + */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uloc.h" +#include "unicode/unistr.h" + +U_CDECL_BEGIN + +/** + * Constants for time zone display name types. + * @stable ICU 50 + */ +typedef enum UTimeZoneNameType { + /** + * Unknown display name type. + * @stable ICU 50 + */ + UTZNM_UNKNOWN = 0x00, + /** + * Long display name, such as "Eastern Time". + * @stable ICU 50 + */ + UTZNM_LONG_GENERIC = 0x01, + /** + * Long display name for standard time, such as "Eastern Standard Time". + * @stable ICU 50 + */ + UTZNM_LONG_STANDARD = 0x02, + /** + * Long display name for daylight saving time, such as "Eastern Daylight Time". + * @stable ICU 50 + */ + UTZNM_LONG_DAYLIGHT = 0x04, + /** + * Short display name, such as "ET". + * @stable ICU 50 + */ + UTZNM_SHORT_GENERIC = 0x08, + /** + * Short display name for standard time, such as "EST". + * @stable ICU 50 + */ + UTZNM_SHORT_STANDARD = 0x10, + /** + * Short display name for daylight saving time, such as "EDT". + * @stable ICU 50 + */ + UTZNM_SHORT_DAYLIGHT = 0x20, + /** + * Exemplar location name, such as "Los Angeles". + * @stable ICU 51 + */ + UTZNM_EXEMPLAR_LOCATION = 0x40 +} UTimeZoneNameType; + +U_CDECL_END + +U_NAMESPACE_BEGIN + +class UVector; +struct MatchInfo; + +/** + * <code>TimeZoneNames</code> is an abstract class representing the time zone display name data model defined + * by <a href="http://www.unicode.org/reports/tr35/">UTS#35 Unicode Locale Data Markup Language (LDML)</a>. + * The model defines meta zone, which is used for storing a set of display names. A meta zone can be shared + * by multiple time zones. Also a time zone may have multiple meta zone historic mappings. + * <p> + * For example, people in the United States refer the zone used by the east part of North America as "Eastern Time". + * The tz database contains multiple time zones "America/New_York", "America/Detroit", "America/Montreal" and some + * others that belong to "Eastern Time". However, assigning different display names to these time zones does not make + * much sense for most of people. + * <p> + * In <a href="http://cldr.unicode.org/">CLDR</a> (which uses LDML for representing locale data), the display name + * "Eastern Time" is stored as long generic display name of a meta zone identified by the ID "America_Eastern". + * Then, there is another table maintaining the historic mapping to meta zones for each time zone. The time zones in + * the above example ("America/New_York", "America/Detroit"...) are mapped to the meta zone "America_Eastern". + * <p> + * Sometimes, a time zone is mapped to a different time zone in the past. For example, "America/Indiana/Knox" + * had been moving "Eastern Time" and "Central Time" back and forth. Therefore, it is necessary that time zone + * to meta zones mapping data are stored by date range. + * + * <p><b>Note:</b> + * The methods in this class assume that time zone IDs are already canonicalized. For example, you may not get proper + * result returned by a method with time zone ID "America/Indiana/Indianapolis", because it's not a canonical time zone + * ID (the canonical time zone ID for the time zone is "America/Indianapolis". See + * {@link TimeZone#getCanonicalID(const UnicodeString& id, UnicodeString& canonicalID, UErrorCode& status)} about ICU + * canonical time zone IDs. + * + * <p> + * In CLDR, most of time zone display names except location names are provided through meta zones. But a time zone may + * have a specific name that is not shared with other time zones. + * + * For example, time zone "Europe/London" has English long name for standard time "Greenwich Mean Time", which is also + * shared with other time zones. However, the long name for daylight saving time is "British Summer Time", which is only + * used for "Europe/London". + * + * <p> + * {@link #getTimeZoneDisplayName} is designed for accessing a name only used by a single time zone. + * But is not necessarily mean that a subclass implementation use the same model with CLDR. A subclass implementation + * may provide time zone names only through {@link #getTimeZoneDisplayName}, or only through {@link #getMetaZoneDisplayName}, + * or both. + * + * <p> + * The default <code>TimeZoneNames</code> implementation returned by {@link #createInstance} + * uses the locale data imported from CLDR. In CLDR, set of meta zone IDs and mappings between zone IDs and meta zone + * IDs are shared by all locales. Therefore, the behavior of {@link #getAvailableMetaZoneIDs}, + * {@link #getMetaZoneID}, and {@link #getReferenceZoneID} won't be changed no matter + * what locale is used for getting an instance of <code>TimeZoneNames</code>. + * + * @stable ICU 50 + */ +class U_I18N_API TimeZoneNames : public UObject { +public: + /** + * Destructor. + * @stable ICU 50 + */ + virtual ~TimeZoneNames(); + + /** + * Return true if the given TimeZoneNames objects are semantically equal. + * @param other the object to be compared with. + * @return Return TRUE if the given Format objects are semantically equal. + * @stable ICU 50 + */ + virtual UBool operator==(const TimeZoneNames& other) const = 0; + + /** + * Return true if the given TimeZoneNames objects are not semantically + * equal. + * @param other the object to be compared with. + * @return Return TRUE if the given Format objects are not semantically equal. + * @stable ICU 50 + */ + UBool operator!=(const TimeZoneNames& other) const { return !operator==(other); } + + /** + * Clone this object polymorphically. The caller is responsible + * for deleting the result when done. + * @return A copy of the object + * @stable ICU 50 + */ + virtual TimeZoneNames* clone() const = 0; + + /** + * Returns an instance of <code>TimeZoneNames</code> for the specified locale. + * + * @param locale The locale. + * @param status Receives the status. + * @return An instance of <code>TimeZoneNames</code> + * @stable ICU 50 + */ + static TimeZoneNames* U_EXPORT2 createInstance(const Locale& locale, UErrorCode& status); + + /** + * Returns an instance of <code>TimeZoneNames</code> containing only short specific + * zone names (SHORT_STANDARD and SHORT_DAYLIGHT), + * compatible with the IANA tz database's zone abbreviations (not localized). + * <br> + * Note: The input locale is used for resolving ambiguous names (e.g. "IST" is parsed + * as Israel Standard Time for Israel, while it is parsed as India Standard Time for + * all other regions). The zone names returned by this instance are not localized. + * @stable ICU 54 + */ + static TimeZoneNames* U_EXPORT2 createTZDBInstance(const Locale& locale, UErrorCode& status); + + /** + * Returns an enumeration of all available meta zone IDs. + * @param status Receives the status. + * @return an enumeration object, owned by the caller. + * @stable ICU 50 + */ + virtual StringEnumeration* getAvailableMetaZoneIDs(UErrorCode& status) const = 0; + + /** + * Returns an enumeration of all available meta zone IDs used by the given time zone. + * @param tzID The canoical tiem zone ID. + * @param status Receives the status. + * @return an enumeration object, owned by the caller. + * @stable ICU 50 + */ + virtual StringEnumeration* getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCode& status) const = 0; + + /** + * Returns the meta zone ID for the given canonical time zone ID at the given date. + * @param tzID The canonical time zone ID. + * @param date The date. + * @param mzID Receives the meta zone ID for the given time zone ID at the given date. If the time zone does not have a + * corresponding meta zone at the given date or the implementation does not support meta zones, "bogus" state + * is set. + * @return A reference to the result. + * @stable ICU 50 + */ + virtual UnicodeString& getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeString& mzID) const = 0; + + /** + * Returns the reference zone ID for the given meta zone ID for the region. + * + * Note: Each meta zone must have a reference zone associated with a special region "001" (world). + * Some meta zones may have region specific reference zone IDs other than the special region + * "001". When a meta zone does not have any region specific reference zone IDs, this method + * return the reference zone ID for the special region "001" (world). + * + * @param mzID The meta zone ID. + * @param region The region. + * @param tzID Receives the reference zone ID ("golden zone" in the LDML specification) for the given time zone ID for the + * region. If the meta zone is unknown or the implementation does not support meta zones, "bogus" state + * is set. + * @return A reference to the result. + * @stable ICU 50 + */ + virtual UnicodeString& getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const = 0; + + /** + * Returns the display name of the meta zone. + * @param mzID The meta zone ID. + * @param type The display name type. See {@link #UTimeZoneNameType}. + * @param name Receives the display name of the meta zone. When this object does not have a localized display name for the given + * meta zone with the specified type or the implementation does not provide any display names associated + * with meta zones, "bogus" state is set. + * @return A reference to the result. + * @stable ICU 50 + */ + virtual UnicodeString& getMetaZoneDisplayName(const UnicodeString& mzID, UTimeZoneNameType type, UnicodeString& name) const = 0; + + /** + * Returns the display name of the time zone. Unlike {@link #getDisplayName}, + * this method does not get a name from a meta zone used by the time zone. + * @param tzID The canonical time zone ID. + * @param type The display name type. See {@link #UTimeZoneNameType}. + * @param name Receives the display name for the time zone. When this object does not have a localized display name for the given + * time zone with the specified type, "bogus" state is set. + * @return A reference to the result. + * @stable ICU 50 + */ + virtual UnicodeString& getTimeZoneDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UnicodeString& name) const = 0; + + /** + * Returns the exemplar location name for the given time zone. When this object does not have a localized location + * name, the default implementation may still returns a programmatically generated name with the logic described + * below. + * <ol> + * <li>Check if the ID contains "/". If not, return null. + * <li>Check if the ID does not start with "Etc/" or "SystemV/". If it does, return null. + * <li>Extract a substring after the last occurrence of "/". + * <li>Replace "_" with " ". + * </ol> + * For example, "New York" is returned for the time zone ID "America/New_York" when this object does not have the + * localized location name. + * + * @param tzID The canonical time zone ID + * @param name Receives the exemplar location name for the given time zone, or "bogus" state is set when a localized + * location name is not available and the fallback logic described above cannot extract location from the ID. + * @return A reference to the result. + * @stable ICU 50 + */ + virtual UnicodeString& getExemplarLocationName(const UnicodeString& tzID, UnicodeString& name) const; + + /** + * Returns the display name of the time zone at the given date. + * <p> + * <b>Note:</b> This method calls the subclass's {@link #getTimeZoneDisplayName} first. When the + * result is bogus, this method calls {@link #getMetaZoneID} to get the meta zone ID mapped from the + * time zone, then calls {@link #getMetaZoneDisplayName}. + * + * @param tzID The canonical time zone ID. + * @param type The display name type. See {@link #UTimeZoneNameType}. + * @param date The date. + * @param name Receives the display name for the time zone at the given date. When this object does not have a localized display + * name for the time zone with the specified type and date, "bogus" state is set. + * @return A reference to the result. + * @stable ICU 50 + */ + virtual UnicodeString& getDisplayName(const UnicodeString& tzID, UTimeZoneNameType type, UDate date, UnicodeString& name) const; + + /** + * @internal For specific users only until proposed publicly. + * @deprecated This API is ICU internal only. + */ + virtual void loadAllDisplayNames(UErrorCode& status); + + /** + * @internal For specific users only until proposed publicly. + * @deprecated This API is ICU internal only. + */ + virtual void getDisplayNames(const UnicodeString& tzID, const UTimeZoneNameType types[], int32_t numTypes, UDate date, UnicodeString dest[], UErrorCode& status) const; + + /** + * <code>MatchInfoCollection</code> represents a collection of time zone name matches used by + * {@link TimeZoneNames#find}. + * @internal + */ + class U_I18N_API MatchInfoCollection : public UMemory { + public: + /** + * Constructor. + * @internal + */ + MatchInfoCollection(); + /** + * Destructor. + * @internal + */ + virtual ~MatchInfoCollection(); + +#ifndef U_HIDE_INTERNAL_API + /** + * Adds a zone match. + * @param nameType The name type. + * @param matchLength The match length. + * @param tzID The time zone ID. + * @param status Receives the status + * @internal + */ + void addZone(UTimeZoneNameType nameType, int32_t matchLength, + const UnicodeString& tzID, UErrorCode& status); + + /** + * Adds a meata zone match. + * @param nameType The name type. + * @param matchLength The match length. + * @param mzID The metazone ID. + * @param status Receives the status + * @internal + */ + void addMetaZone(UTimeZoneNameType nameType, int32_t matchLength, + const UnicodeString& mzID, UErrorCode& status); + + /** + * Returns the number of entries available in this object. + * @return The number of entries. + * @internal + */ + int32_t size() const; + + /** + * Returns the time zone name type of a match at the specified index. + * @param idx The index + * @return The time zone name type. If the specified idx is out of range, + * it returns UTZNM_UNKNOWN. + * @see UTimeZoneNameType + * @internal + */ + UTimeZoneNameType getNameTypeAt(int32_t idx) const; + + /** + * Returns the match length of a match at the specified index. + * @param idx The index + * @return The match length. If the specified idx is out of range, + * it returns 0. + * @internal + */ + int32_t getMatchLengthAt(int32_t idx) const; + + /** + * Gets the zone ID of a match at the specified index. + * @param idx The index + * @param tzID Receives the zone ID. + * @return TRUE if the zone ID was set to tzID. + * @internal + */ + UBool getTimeZoneIDAt(int32_t idx, UnicodeString& tzID) const; + + /** + * Gets the metazone ID of a match at the specified index. + * @param idx The index + * @param mzID Receives the metazone ID + * @return TRUE if the meta zone ID was set to mzID. + * @internal + */ + UBool getMetaZoneIDAt(int32_t idx, UnicodeString& mzID) const; +#endif /* U_HIDE_INTERNAL_API */ + + private: + UVector* fMatches; // vector of MatchEntry + + UVector* matches(UErrorCode& status); + }; + + /** + * Finds time zone name prefix matches for the input text at the + * given offset and returns a collection of the matches. + * @param text The text. + * @param start The starting offset within the text. + * @param types The set of name types represented by bitwise flags of UTimeZoneNameType enums, + * or UTZNM_UNKNOWN for all name types. + * @param status Receives the status. + * @return A collection of matches (owned by the caller), or NULL if no matches are found. + * @see UTimeZoneNameType + * @see MatchInfoCollection + * @internal + */ + virtual MatchInfoCollection* find(const UnicodeString& text, int32_t start, uint32_t types, UErrorCode& status) const = 0; +}; + +U_NAMESPACE_END + +#endif +#endif diff --git a/intl/icu/source/i18n/unicode/tzrule.h b/intl/icu/source/i18n/unicode/tzrule.h new file mode 100644 index 000000000..51b3ae17d --- /dev/null +++ b/intl/icu/source/i18n/unicode/tzrule.h @@ -0,0 +1,830 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +#ifndef TZRULE_H +#define TZRULE_H + +/** + * \file + * \brief C++ API: Time zone rule classes + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/dtrule.h" + +U_NAMESPACE_BEGIN + +/** + * <code>TimeZoneRule</code> is a class representing a rule for time zone. + * <code>TimeZoneRule</code> has a set of time zone attributes, such as zone name, + * raw offset (UTC offset for standard time) and daylight saving time offset. + * + * @stable ICU 3.8 + */ +class U_I18N_API TimeZoneRule : public UObject { +public: + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~TimeZoneRule(); + + /** + * Clone this TimeZoneRule object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @stable ICU 3.8 + */ + virtual TimeZoneRule* clone(void) const = 0; + + /** + * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZoneRule</code> objects are semantically equal. + * @stable ICU 3.8 + */ + virtual UBool operator==(const TimeZoneRule& that) const; + + /** + * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZoneRule</code> objects are semantically unequal. + * @stable ICU 3.8 + */ + virtual UBool operator!=(const TimeZoneRule& that) const; + + /** + * Fills in "name" with the name of this time zone. + * @param name Receives the name of this time zone. + * @return A reference to "name" + * @stable ICU 3.8 + */ + UnicodeString& getName(UnicodeString& name) const; + + /** + * Gets the standard time offset. + * @return The standard time offset from UTC in milliseconds. + * @stable ICU 3.8 + */ + int32_t getRawOffset(void) const; + + /** + * Gets the amount of daylight saving delta time from the standard time. + * @return The amount of daylight saving offset used by this rule + * in milliseconds. + * @stable ICU 3.8 + */ + int32_t getDSTSavings(void) const; + + /** + * Returns if this rule represents the same rule and offsets as another. + * When two <code>TimeZoneRule</code> objects differ only its names, this method + * returns true. + * @param other The <code>TimeZoneRule</code> object to be compared with. + * @return true if the other <code>TimeZoneRule</code> is the same as this one. + * @stable ICU 3.8 + */ + virtual UBool isEquivalentTo(const TimeZoneRule& other) const; + + /** + * Gets the very first time when this rule takes effect. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the very first time when this rule takes effect. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const = 0; + + /** + * Gets the final time when this rule takes effect. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the final time when this rule takes effect. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const = 0; + + /** + * Gets the first time when this rule takes effect after the specified time. + * @param base The first start time after this base time will be returned. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives The first time when this rule takes effect after + * the specified base time. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings, + UBool inclusive, UDate& result) const = 0; + + /** + * Gets the most recent time when this rule takes effect before the specified time. + * @param base The most recent time before this base time will be returned. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives The most recent time when this rule takes effect before + * the specified base time. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings, + UBool inclusive, UDate& result) const = 0; + +protected: + + /** + * Constructs a <code>TimeZoneRule</code> with the name, the GMT offset of its + * standard time and the amount of daylight saving offset adjustment. + * @param name The time zone name. + * @param rawOffset The UTC offset of its standard time in milliseconds. + * @param dstSavings The amount of daylight saving offset adjustment in milliseconds. + * If this ia a rule for standard time, the value of this argument is 0. + * @stable ICU 3.8 + */ + TimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings); + + /** + * Copy constructor. + * @param source The TimeZoneRule object to be copied. + * @stable ICU 3.8 + */ + TimeZoneRule(const TimeZoneRule& source); + + /** + * Assignment operator. + * @param right The object to be copied. + * @stable ICU 3.8 + */ + TimeZoneRule& operator=(const TimeZoneRule& right); + +private: + UnicodeString fName; // time name + int32_t fRawOffset; // UTC offset of the standard time in milliseconds + int32_t fDSTSavings; // DST saving amount in milliseconds +}; + +/** + * <code>InitialTimeZoneRule</code> represents a time zone rule + * representing a time zone effective from the beginning and + * has no actual start times. + * @stable ICU 3.8 + */ +class U_I18N_API InitialTimeZoneRule : public TimeZoneRule { +public: + /** + * Constructs an <code>InitialTimeZoneRule</code> with the name, the GMT offset of its + * standard time and the amount of daylight saving offset adjustment. + * @param name The time zone name. + * @param rawOffset The UTC offset of its standard time in milliseconds. + * @param dstSavings The amount of daylight saving offset adjustment in milliseconds. + * If this ia a rule for standard time, the value of this argument is 0. + * @stable ICU 3.8 + */ + InitialTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings); + + /** + * Copy constructor. + * @param source The InitialTimeZoneRule object to be copied. + * @stable ICU 3.8 + */ + InitialTimeZoneRule(const InitialTimeZoneRule& source); + + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~InitialTimeZoneRule(); + + /** + * Clone this InitialTimeZoneRule object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @stable ICU 3.8 + */ + virtual InitialTimeZoneRule* clone(void) const; + + /** + * Assignment operator. + * @param right The object to be copied. + * @stable ICU 3.8 + */ + InitialTimeZoneRule& operator=(const InitialTimeZoneRule& right); + + /** + * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZoneRule</code> objects are semantically equal. + * @stable ICU 3.8 + */ + virtual UBool operator==(const TimeZoneRule& that) const; + + /** + * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZoneRule</code> objects are semantically unequal. + * @stable ICU 3.8 + */ + virtual UBool operator!=(const TimeZoneRule& that) const; + + /** + * Gets the time when this rule takes effect in the given year. + * @param year The Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the start time in the year. + * @return true if this rule takes effect in the year and the result is set to + * "result". + * @stable ICU 3.8 + */ + UBool getStartInYear(int32_t year, int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const; + + /** + * Returns if this rule represents the same rule and offsets as another. + * When two <code>TimeZoneRule</code> objects differ only its names, this method + * returns true. + * @param that The <code>TimeZoneRule</code> object to be compared with. + * @return true if the other <code>TimeZoneRule</code> is equivalent to this one. + * @stable ICU 3.8 + */ + virtual UBool isEquivalentTo(const TimeZoneRule& that) const; + + /** + * Gets the very first time when this rule takes effect. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the very first time when this rule takes effect. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const; + + /** + * Gets the final time when this rule takes effect. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the final time when this rule takes effect. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const; + + /** + * Gets the first time when this rule takes effect after the specified time. + * @param base The first start time after this base time will be returned. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives The first time when this rule takes effect after + * the specified base time. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings, + UBool inclusive, UDate& result) const; + + /** + * Gets the most recent time when this rule takes effect before the specified time. + * @param base The most recent time before this base time will be returned. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives The most recent time when this rule takes effect before + * the specified base time. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings, + UBool inclusive, UDate& result) const; + +public: + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 3.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 3.8 + */ + virtual UClassID getDynamicClassID(void) const; +}; + +/** + * <code>AnnualTimeZoneRule</code> is a class used for representing a time zone + * rule which takes effect annually. The calenday system used for the rule is + * is based on Gregorian calendar + * + * @stable ICU 3.8 + */ +class U_I18N_API AnnualTimeZoneRule : public TimeZoneRule { +public: + /** + * The constant representing the maximum year used for designating + * a rule is permanent. + */ + static const int32_t MAX_YEAR; + + /** + * Constructs a <code>AnnualTimeZoneRule</code> with the name, the GMT offset of its + * standard time, the amount of daylight saving offset adjustment, the annual start + * time rule and the start/until years. The input DateTimeRule is copied by this + * constructor, so the caller remains responsible for deleting the object. + * @param name The time zone name. + * @param rawOffset The GMT offset of its standard time in milliseconds. + * @param dstSavings The amount of daylight saving offset adjustment in + * milliseconds. If this ia a rule for standard time, + * the value of this argument is 0. + * @param dateTimeRule The start date/time rule repeated annually. + * @param startYear The first year when this rule takes effect. + * @param endYear The last year when this rule takes effect. If this + * rule is effective forever in future, specify MAX_YEAR. + * @stable ICU 3.8 + */ + AnnualTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings, + const DateTimeRule& dateTimeRule, int32_t startYear, int32_t endYear); + + /** + * Constructs a <code>AnnualTimeZoneRule</code> with the name, the GMT offset of its + * standard time, the amount of daylight saving offset adjustment, the annual start + * time rule and the start/until years. The input DateTimeRule object is adopted + * by this object, therefore, the caller must not delete the object. + * @param name The time zone name. + * @param rawOffset The GMT offset of its standard time in milliseconds. + * @param dstSavings The amount of daylight saving offset adjustment in + * milliseconds. If this ia a rule for standard time, + * the value of this argument is 0. + * @param dateTimeRule The start date/time rule repeated annually. + * @param startYear The first year when this rule takes effect. + * @param endYear The last year when this rule takes effect. If this + * rule is effective forever in future, specify MAX_YEAR. + * @stable ICU 3.8 + */ + AnnualTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings, + DateTimeRule* dateTimeRule, int32_t startYear, int32_t endYear); + + /** + * Copy constructor. + * @param source The AnnualTimeZoneRule object to be copied. + * @stable ICU 3.8 + */ + AnnualTimeZoneRule(const AnnualTimeZoneRule& source); + + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~AnnualTimeZoneRule(); + + /** + * Clone this AnnualTimeZoneRule object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @stable ICU 3.8 + */ + virtual AnnualTimeZoneRule* clone(void) const; + + /** + * Assignment operator. + * @param right The object to be copied. + * @stable ICU 3.8 + */ + AnnualTimeZoneRule& operator=(const AnnualTimeZoneRule& right); + + /** + * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZoneRule</code> objects are semantically equal. + * @stable ICU 3.8 + */ + virtual UBool operator==(const TimeZoneRule& that) const; + + /** + * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZoneRule</code> objects are semantically unequal. + * @stable ICU 3.8 + */ + virtual UBool operator!=(const TimeZoneRule& that) const; + + /** + * Gets the start date/time rule used by this rule. + * @return The <code>AnnualDateTimeRule</code> which represents the start date/time + * rule used by this time zone rule. + * @stable ICU 3.8 + */ + const DateTimeRule* getRule(void) const; + + /** + * Gets the first year when this rule takes effect. + * @return The start year of this rule. The year is in Gregorian calendar + * with 0 == 1 BCE, -1 == 2 BCE, etc. + * @stable ICU 3.8 + */ + int32_t getStartYear(void) const; + + /** + * Gets the end year when this rule takes effect. + * @return The end year of this rule (inclusive). The year is in Gregorian calendar + * with 0 == 1 BCE, -1 == 2 BCE, etc. + * @stable ICU 3.8 + */ + int32_t getEndYear(void) const; + + /** + * Gets the time when this rule takes effect in the given year. + * @param year The Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the start time in the year. + * @return true if this rule takes effect in the year and the result is set to + * "result". + * @stable ICU 3.8 + */ + UBool getStartInYear(int32_t year, int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const; + + /** + * Returns if this rule represents the same rule and offsets as another. + * When two <code>TimeZoneRule</code> objects differ only its names, this method + * returns true. + * @param that The <code>TimeZoneRule</code> object to be compared with. + * @return true if the other <code>TimeZoneRule</code> is equivalent to this one. + * @stable ICU 3.8 + */ + virtual UBool isEquivalentTo(const TimeZoneRule& that) const; + + /** + * Gets the very first time when this rule takes effect. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the very first time when this rule takes effect. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const; + + /** + * Gets the final time when this rule takes effect. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the final time when this rule takes effect. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const; + + /** + * Gets the first time when this rule takes effect after the specified time. + * @param base The first start time after this base time will be returned. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives The first time when this rule takes effect after + * the specified base time. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings, + UBool inclusive, UDate& result) const; + + /** + * Gets the most recent time when this rule takes effect before the specified time. + * @param base The most recent time before this base time will be returned. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives The most recent time when this rule takes effect before + * the specified base time. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings, + UBool inclusive, UDate& result) const; + + +private: + DateTimeRule* fDateTimeRule; + int32_t fStartYear; + int32_t fEndYear; + +public: + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 3.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 3.8 + */ + virtual UClassID getDynamicClassID(void) const; +}; + +/** + * <code>TimeArrayTimeZoneRule</code> represents a time zone rule whose start times are + * defined by an array of milliseconds since the standard base time. + * + * @stable ICU 3.8 + */ +class U_I18N_API TimeArrayTimeZoneRule : public TimeZoneRule { +public: + /** + * Constructs a <code>TimeArrayTimeZoneRule</code> with the name, the GMT offset of its + * standard time, the amount of daylight saving offset adjustment and + * the array of times when this rule takes effect. + * @param name The time zone name. + * @param rawOffset The UTC offset of its standard time in milliseconds. + * @param dstSavings The amount of daylight saving offset adjustment in + * milliseconds. If this ia a rule for standard time, + * the value of this argument is 0. + * @param startTimes The array start times in milliseconds since the base time + * (January 1, 1970, 00:00:00). + * @param numStartTimes The number of elements in the parameter "startTimes" + * @param timeRuleType The time type of the start times, which is one of + * <code>DataTimeRule::WALL_TIME</code>, <code>STANDARD_TIME</code> + * and <code>UTC_TIME</code>. + * @stable ICU 3.8 + */ + TimeArrayTimeZoneRule(const UnicodeString& name, int32_t rawOffset, int32_t dstSavings, + const UDate* startTimes, int32_t numStartTimes, DateTimeRule::TimeRuleType timeRuleType); + + /** + * Copy constructor. + * @param source The TimeArrayTimeZoneRule object to be copied. + * @stable ICU 3.8 + */ + TimeArrayTimeZoneRule(const TimeArrayTimeZoneRule& source); + + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~TimeArrayTimeZoneRule(); + + /** + * Clone this TimeArrayTimeZoneRule object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @stable ICU 3.8 + */ + virtual TimeArrayTimeZoneRule* clone(void) const; + + /** + * Assignment operator. + * @param right The object to be copied. + * @stable ICU 3.8 + */ + TimeArrayTimeZoneRule& operator=(const TimeArrayTimeZoneRule& right); + + /** + * Return true if the given <code>TimeZoneRule</code> objects are semantically equal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZoneRule</code> objects are semantically equal. + * @stable ICU 3.8 + */ + virtual UBool operator==(const TimeZoneRule& that) const; + + /** + * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZoneRule</code> objects are semantically unequal. + * @stable ICU 3.8 + */ + virtual UBool operator!=(const TimeZoneRule& that) const; + + /** + * Gets the time type of the start times used by this rule. The return value + * is either <code>DateTimeRule::WALL_TIME</code> or <code>STANDARD_TIME</code> + * or <code>UTC_TIME</code>. + * + * @return The time type used of the start times used by this rule. + * @stable ICU 3.8 + */ + DateTimeRule::TimeRuleType getTimeType(void) const; + + /** + * Gets a start time at the index stored in this rule. + * @param index The index of start times + * @param result Receives the start time at the index + * @return true if the index is within the valid range and + * and the result is set. When false, the output + * parameger "result" is unchanged. + * @stable ICU 3.8 + */ + UBool getStartTimeAt(int32_t index, UDate& result) const; + + /** + * Returns the number of start times stored in this rule + * @return The number of start times. + * @stable ICU 3.8 + */ + int32_t countStartTimes(void) const; + + /** + * Returns if this rule represents the same rule and offsets as another. + * When two <code>TimeZoneRule</code> objects differ only its names, this method + * returns true. + * @param that The <code>TimeZoneRule</code> object to be compared with. + * @return true if the other <code>TimeZoneRule</code> is equivalent to this one. + * @stable ICU 3.8 + */ + virtual UBool isEquivalentTo(const TimeZoneRule& that) const; + + /** + * Gets the very first time when this rule takes effect. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the very first time when this rule takes effect. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getFirstStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const; + + /** + * Gets the final time when this rule takes effect. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param result Receives the final time when this rule takes effect. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getFinalStart(int32_t prevRawOffset, int32_t prevDSTSavings, UDate& result) const; + + /** + * Gets the first time when this rule takes effect after the specified time. + * @param base The first start time after this base time will be returned. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives The first time when this rule takes effect after + * the specified base time. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getNextStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings, + UBool inclusive, UDate& result) const; + + /** + * Gets the most recent time when this rule takes effect before the specified time. + * @param base The most recent time before this base time will be returned. + * @param prevRawOffset The standard time offset from UTC before this rule + * takes effect in milliseconds. + * @param prevDSTSavings The amount of daylight saving offset from the + * standard time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives The most recent time when this rule takes effect before + * the specified base time. + * @return true if the start time is available. When false is returned, output parameter + * "result" is unchanged. + * @stable ICU 3.8 + */ + virtual UBool getPreviousStart(UDate base, int32_t prevRawOffset, int32_t prevDSTSavings, + UBool inclusive, UDate& result) const; + + +private: + enum { TIMEARRAY_STACK_BUFFER_SIZE = 32 }; + UBool initStartTimes(const UDate source[], int32_t size, UErrorCode& ec); + UDate getUTC(UDate time, int32_t raw, int32_t dst) const; + + DateTimeRule::TimeRuleType fTimeRuleType; + int32_t fNumStartTimes; + UDate* fStartTimes; + UDate fLocalStartTimes[TIMEARRAY_STACK_BUFFER_SIZE]; + +public: + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 3.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 3.8 + */ + virtual UClassID getDynamicClassID(void) const; +}; + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // TZRULE_H + +//eof diff --git a/intl/icu/source/i18n/unicode/tztrans.h b/intl/icu/source/i18n/unicode/tztrans.h new file mode 100644 index 000000000..53c34fef9 --- /dev/null +++ b/intl/icu/source/i18n/unicode/tztrans.h @@ -0,0 +1,197 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +#ifndef TZTRANS_H +#define TZTRANS_H + +/** + * \file + * \brief C++ API: Time zone transition + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN + +// Forward declaration +class TimeZoneRule; + +/** + * <code>TimeZoneTransition</code> is a class representing a time zone transition. + * An instance has a time of transition and rules for both before and after the transition. + * @stable ICU 3.8 + */ +class U_I18N_API TimeZoneTransition : public UObject { +public: + /** + * Constructs a <code>TimeZoneTransition</code> with the time and the rules before/after + * the transition. + * + * @param time The time of transition in milliseconds since the base time. + * @param from The time zone rule used before the transition. + * @param to The time zone rule used after the transition. + * @stable ICU 3.8 + */ + TimeZoneTransition(UDate time, const TimeZoneRule& from, const TimeZoneRule& to); + + /** + * Constructs an empty <code>TimeZoneTransition</code> + * @stable ICU 3.8 + */ + TimeZoneTransition(); + + /** + * Copy constructor. + * @param source The TimeZoneTransition object to be copied. + * @stable ICU 3.8 + */ + TimeZoneTransition(const TimeZoneTransition& source); + + /** + * Destructor. + * @stable ICU 3.8 + */ + ~TimeZoneTransition(); + + /** + * Clone this TimeZoneTransition object polymorphically. The caller owns the result and + * should delete it when done. + * @return A copy of the object. + * @stable ICU 3.8 + */ + TimeZoneTransition* clone(void) const; + + /** + * Assignment operator. + * @param right The object to be copied. + * @stable ICU 3.8 + */ + TimeZoneTransition& operator=(const TimeZoneTransition& right); + + /** + * Return true if the given TimeZoneTransition objects are semantically equal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given TimeZoneTransition objects are semantically equal. + * @stable ICU 3.8 + */ + UBool operator==(const TimeZoneTransition& that) const; + + /** + * Return true if the given TimeZoneTransition objects are semantically unequal. Objects + * of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given TimeZoneTransition objects are semantically unequal. + * @stable ICU 3.8 + */ + UBool operator!=(const TimeZoneTransition& that) const; + + /** + * Returns the time of transition in milliseconds. + * @return The time of the transition in milliseconds since the 1970 Jan 1 epoch time. + * @stable ICU 3.8 + */ + UDate getTime(void) const; + + /** + * Sets the time of transition in milliseconds. + * @param time The time of the transition in milliseconds since the 1970 Jan 1 epoch time. + * @stable ICU 3.8 + */ + void setTime(UDate time); + + /** + * Returns the rule used before the transition. + * @return The time zone rule used after the transition. + * @stable ICU 3.8 + */ + const TimeZoneRule* getFrom(void) const; + + /** + * Sets the rule used before the transition. The caller remains + * responsible for deleting the <code>TimeZoneRule</code> object. + * @param from The time zone rule used before the transition. + * @stable ICU 3.8 + */ + void setFrom(const TimeZoneRule& from); + + /** + * Adopts the rule used before the transition. The caller must + * not delete the <code>TimeZoneRule</code> object passed in. + * @param from The time zone rule used before the transition. + * @stable ICU 3.8 + */ + void adoptFrom(TimeZoneRule* from); + + /** + * Sets the rule used after the transition. The caller remains + * responsible for deleting the <code>TimeZoneRule</code> object. + * @param to The time zone rule used after the transition. + * @stable ICU 3.8 + */ + void setTo(const TimeZoneRule& to); + + /** + * Adopts the rule used after the transition. The caller must + * not delete the <code>TimeZoneRule</code> object passed in. + * @param to The time zone rule used after the transition. + * @stable ICU 3.8 + */ + void adoptTo(TimeZoneRule* to); + + /** + * Returns the rule used after the transition. + * @return The time zone rule used after the transition. + * @stable ICU 3.8 + */ + const TimeZoneRule* getTo(void) const; + +private: + UDate fTime; + TimeZoneRule* fFrom; + TimeZoneRule* fTo; + +public: + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 3.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 3.8 + */ + virtual UClassID getDynamicClassID(void) const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // TZTRANS_H + +//eof diff --git a/intl/icu/source/i18n/unicode/ucal.h b/intl/icu/source/i18n/unicode/ucal.h new file mode 100644 index 000000000..29c084574 --- /dev/null +++ b/intl/icu/source/i18n/unicode/ucal.h @@ -0,0 +1,1564 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 1996-2015, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ + +#ifndef UCAL_H +#define UCAL_H + +#include "unicode/utypes.h" +#include "unicode/uenum.h" +#include "unicode/uloc.h" +#include "unicode/localpointer.h" + +#if !UCONFIG_NO_FORMATTING + +/** + * \file + * \brief C API: Calendar + * + * <h2>Calendar C API</h2> + * + * UCalendar C API is used for converting between a <code>UDate</code> object + * and a set of integer fields such as <code>UCAL_YEAR</code>, <code>UCAL_MONTH</code>, + * <code>UCAL_DAY</code>, <code>UCAL_HOUR</code>, and so on. + * (A <code>UDate</code> object represents a specific instant in + * time with millisecond precision. See UDate + * for information about the <code>UDate</code> .) + * + * <p> + * Types of <code>UCalendar</code> interpret a <code>UDate</code> + * according to the rules of a specific calendar system. The U_STABLE + * provides the enum UCalendarType with UCAL_TRADITIONAL and + * UCAL_GREGORIAN. + * <p> + * Like other locale-sensitive C API, calendar API provides a + * function, <code>ucal_open()</code>, which returns a pointer to + * <code>UCalendar</code> whose time fields have been initialized + * with the current date and time. We need to specify the type of + * calendar to be opened and the timezoneId. + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * UCalendar *caldef; + * UChar *tzId; + * UErrorCode status; + * tzId=(UChar*)malloc(sizeof(UChar) * (strlen("PST") +1) ); + * u_uastrcpy(tzId, "PST"); + * caldef=ucal_open(tzID, u_strlen(tzID), NULL, UCAL_TRADITIONAL, &status); + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * <p> + * A <code>UCalendar</code> object can produce all the time field values + * needed to implement the date-time formatting for a particular language + * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional). + * + * <p> + * When computing a <code>UDate</code> from time fields, two special circumstances + * may arise: there may be insufficient information to compute the + * <code>UDate</code> (such as only year and month but no day in the month), + * or there may be inconsistent information (such as "Tuesday, July 15, 1996" + * -- July 15, 1996 is actually a Monday). + * + * <p> + * <strong>Insufficient information.</strong> The calendar will use default + * information to specify the missing fields. This may vary by calendar; for + * the Gregorian calendar, the default for a field is the same as that of the + * start of the epoch: i.e., UCAL_YEAR = 1970, UCAL_MONTH = JANUARY, UCAL_DATE = 1, etc. + * + * <p> + * <strong>Inconsistent information.</strong> If fields conflict, the calendar + * will give preference to fields set more recently. For example, when + * determining the day, the calendar will look for one of the following + * combinations of fields. The most recent combination, as determined by the + * most recently set single field, will be used. + * + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * UCAL_MONTH + UCAL_DAY_OF_MONTH + * UCAL_MONTH + UCAL_WEEK_OF_MONTH + UCAL_DAY_OF_WEEK + * UCAL_MONTH + UCAL_DAY_OF_WEEK_IN_MONTH + UCAL_DAY_OF_WEEK + * UCAL_DAY_OF_YEAR + * UCAL_DAY_OF_WEEK + UCAL_WEEK_OF_YEAR + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * For the time of day: + * + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * UCAL_HOUR_OF_DAY + * UCAL_AM_PM + UCAL_HOUR + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * <p> + * <strong>Note:</strong> for some non-Gregorian calendars, different + * fields may be necessary for complete disambiguation. For example, a full + * specification of the historial Arabic astronomical calendar requires year, + * month, day-of-month <em>and</em> day-of-week in some cases. + * + * <p> + * <strong>Note:</strong> There are certain possible ambiguities in + * interpretation of certain singular times, which are resolved in the + * following ways: + * <ol> + * <li> 24:00:00 "belongs" to the following day. That is, + * 23:59 on Dec 31, 1969 < 24:00 on Jan 1, 1970 < 24:01:00 on Jan 1, 1970 + * + * <li> Although historically not precise, midnight also belongs to "am", + * and noon belongs to "pm", so on the same day, + * 12:00 am (midnight) < 12:01 am, and 12:00 pm (noon) < 12:01 pm + * </ol> + * + * <p> + * The date or time format strings are not part of the definition of a + * calendar, as those must be modifiable or overridable by the user at + * runtime. Use {@link icu::DateFormat} + * to format dates. + * + * <p> + * <code>Calendar</code> provides an API for field "rolling", where fields + * can be incremented or decremented, but wrap around. For example, rolling the + * month up in the date <code>December 12, <b>1996</b></code> results in + * <code>January 12, <b>1996</b></code>. + * + * <p> + * <code>Calendar</code> also provides a date arithmetic function for + * adding the specified (signed) amount of time to a particular time field. + * For example, subtracting 5 days from the date <code>September 12, 1996</code> + * results in <code>September 7, 1996</code>. + * + * @stable ICU 2.0 + */ + +/** + * The time zone ID reserved for unknown time zone. + * @stable ICU 4.8 + */ +#define UCAL_UNKNOWN_ZONE_ID "Etc/Unknown" + +/** A calendar. + * For usage in C programs. + * @stable ICU 2.0 + */ +typedef void* UCalendar; + +/** Possible types of UCalendars + * @stable ICU 2.0 + */ +enum UCalendarType { + /** + * Despite the name, UCAL_TRADITIONAL designates the locale's default calendar, + * which may be the Gregorian calendar or some other calendar. + * @stable ICU 2.0 + */ + UCAL_TRADITIONAL, + /** + * A better name for UCAL_TRADITIONAL. + * @stable ICU 4.2 + */ + UCAL_DEFAULT = UCAL_TRADITIONAL, + /** + * Unambiguously designates the Gregorian calendar for the locale. + * @stable ICU 2.0 + */ + UCAL_GREGORIAN +}; + +/** @stable ICU 2.0 */ +typedef enum UCalendarType UCalendarType; + +/** Possible fields in a UCalendar + * @stable ICU 2.0 + */ +enum UCalendarDateFields { + /** + * Field number indicating the era, e.g., AD or BC in the Gregorian (Julian) calendar. + * This is a calendar-specific value. + * @stable ICU 2.6 + */ + UCAL_ERA, + + /** + * Field number indicating the year. This is a calendar-specific value. + * @stable ICU 2.6 + */ + UCAL_YEAR, + + /** + * Field number indicating the month. This is a calendar-specific value. + * The first month of the year is + * <code>JANUARY</code>; the last depends on the number of months in a year. + * @see #UCAL_JANUARY + * @see #UCAL_FEBRUARY + * @see #UCAL_MARCH + * @see #UCAL_APRIL + * @see #UCAL_MAY + * @see #UCAL_JUNE + * @see #UCAL_JULY + * @see #UCAL_AUGUST + * @see #UCAL_SEPTEMBER + * @see #UCAL_OCTOBER + * @see #UCAL_NOVEMBER + * @see #UCAL_DECEMBER + * @see #UCAL_UNDECIMBER + * @stable ICU 2.6 + */ + UCAL_MONTH, + + /** + * Field number indicating the + * week number within the current year. The first week of the year, as + * defined by <code>UCAL_FIRST_DAY_OF_WEEK</code> and <code>UCAL_MINIMAL_DAYS_IN_FIRST_WEEK</code> + * attributes, has value 1. Subclasses define + * the value of <code>UCAL_WEEK_OF_YEAR</code> for days before the first week of + * the year. + * @see ucal_getAttribute + * @see ucal_setAttribute + * @stable ICU 2.6 + */ + UCAL_WEEK_OF_YEAR, + + /** + * Field number indicating the + * week number within the current month. The first week of the month, as + * defined by <code>UCAL_FIRST_DAY_OF_WEEK</code> and <code>UCAL_MINIMAL_DAYS_IN_FIRST_WEEK</code> + * attributes, has value 1. Subclasses define + * the value of <code>WEEK_OF_MONTH</code> for days before the first week of + * the month. + * @see ucal_getAttribute + * @see ucal_setAttribute + * @see #UCAL_FIRST_DAY_OF_WEEK + * @see #UCAL_MINIMAL_DAYS_IN_FIRST_WEEK + * @stable ICU 2.6 + */ + UCAL_WEEK_OF_MONTH, + + /** + * Field number indicating the + * day of the month. This is a synonym for <code>DAY_OF_MONTH</code>. + * The first day of the month has value 1. + * @see #UCAL_DAY_OF_MONTH + * @stable ICU 2.6 + */ + UCAL_DATE, + + /** + * Field number indicating the day + * number within the current year. The first day of the year has value 1. + * @stable ICU 2.6 + */ + UCAL_DAY_OF_YEAR, + + /** + * Field number indicating the day + * of the week. This field takes values <code>SUNDAY</code>, + * <code>MONDAY</code>, <code>TUESDAY</code>, <code>WEDNESDAY</code>, + * <code>THURSDAY</code>, <code>FRIDAY</code>, and <code>SATURDAY</code>. + * @see #UCAL_SUNDAY + * @see #UCAL_MONDAY + * @see #UCAL_TUESDAY + * @see #UCAL_WEDNESDAY + * @see #UCAL_THURSDAY + * @see #UCAL_FRIDAY + * @see #UCAL_SATURDAY + * @stable ICU 2.6 + */ + UCAL_DAY_OF_WEEK, + + /** + * Field number indicating the + * ordinal number of the day of the week within the current month. Together + * with the <code>DAY_OF_WEEK</code> field, this uniquely specifies a day + * within a month. Unlike <code>WEEK_OF_MONTH</code> and + * <code>WEEK_OF_YEAR</code>, this field's value does <em>not</em> depend on + * <code>getFirstDayOfWeek()</code> or + * <code>getMinimalDaysInFirstWeek()</code>. <code>DAY_OF_MONTH 1</code> + * through <code>7</code> always correspond to <code>DAY_OF_WEEK_IN_MONTH + * 1</code>; <code>8</code> through <code>15</code> correspond to + * <code>DAY_OF_WEEK_IN_MONTH 2</code>, and so on. + * <code>DAY_OF_WEEK_IN_MONTH 0</code> indicates the week before + * <code>DAY_OF_WEEK_IN_MONTH 1</code>. Negative values count back from the + * end of the month, so the last Sunday of a month is specified as + * <code>DAY_OF_WEEK = SUNDAY, DAY_OF_WEEK_IN_MONTH = -1</code>. Because + * negative values count backward they will usually be aligned differently + * within the month than positive values. For example, if a month has 31 + * days, <code>DAY_OF_WEEK_IN_MONTH -1</code> will overlap + * <code>DAY_OF_WEEK_IN_MONTH 5</code> and the end of <code>4</code>. + * @see #UCAL_DAY_OF_WEEK + * @see #UCAL_WEEK_OF_MONTH + * @stable ICU 2.6 + */ + UCAL_DAY_OF_WEEK_IN_MONTH, + + /** + * Field number indicating + * whether the <code>HOUR</code> is before or after noon. + * E.g., at 10:04:15.250 PM the <code>AM_PM</code> is <code>PM</code>. + * @see #UCAL_AM + * @see #UCAL_PM + * @see #UCAL_HOUR + * @stable ICU 2.6 + */ + UCAL_AM_PM, + + /** + * Field number indicating the + * hour of the morning or afternoon. <code>HOUR</code> is used for the 12-hour + * clock. + * E.g., at 10:04:15.250 PM the <code>HOUR</code> is 10. + * @see #UCAL_AM_PM + * @see #UCAL_HOUR_OF_DAY + * @stable ICU 2.6 + */ + UCAL_HOUR, + + /** + * Field number indicating the + * hour of the day. <code>HOUR_OF_DAY</code> is used for the 24-hour clock. + * E.g., at 10:04:15.250 PM the <code>HOUR_OF_DAY</code> is 22. + * @see #UCAL_HOUR + * @stable ICU 2.6 + */ + UCAL_HOUR_OF_DAY, + + /** + * Field number indicating the + * minute within the hour. + * E.g., at 10:04:15.250 PM the <code>UCAL_MINUTE</code> is 4. + * @stable ICU 2.6 + */ + UCAL_MINUTE, + + /** + * Field number indicating the + * second within the minute. + * E.g., at 10:04:15.250 PM the <code>UCAL_SECOND</code> is 15. + * @stable ICU 2.6 + */ + UCAL_SECOND, + + /** + * Field number indicating the + * millisecond within the second. + * E.g., at 10:04:15.250 PM the <code>UCAL_MILLISECOND</code> is 250. + * @stable ICU 2.6 + */ + UCAL_MILLISECOND, + + /** + * Field number indicating the + * raw offset from GMT in milliseconds. + * @stable ICU 2.6 + */ + UCAL_ZONE_OFFSET, + + /** + * Field number indicating the + * daylight savings offset in milliseconds. + * @stable ICU 2.6 + */ + UCAL_DST_OFFSET, + + /** + * Field number + * indicating the extended year corresponding to the + * <code>UCAL_WEEK_OF_YEAR</code> field. This may be one greater or less + * than the value of <code>UCAL_EXTENDED_YEAR</code>. + * @stable ICU 2.6 + */ + UCAL_YEAR_WOY, + + /** + * Field number + * indicating the localized day of week. This will be a value from 1 + * to 7 inclusive, with 1 being the localized first day of the week. + * @stable ICU 2.6 + */ + UCAL_DOW_LOCAL, + + /** + * Year of this calendar system, encompassing all supra-year fields. For example, + * in Gregorian/Julian calendars, positive Extended Year values indicate years AD, + * 1 BC = 0 extended, 2 BC = -1 extended, and so on. + * @stable ICU 2.8 + */ + UCAL_EXTENDED_YEAR, + + /** + * Field number + * indicating the modified Julian day number. This is different from + * the conventional Julian day number in two regards. First, it + * demarcates days at local zone midnight, rather than noon GMT. + * Second, it is a local number; that is, it depends on the local time + * zone. It can be thought of as a single number that encompasses all + * the date-related fields. + * @stable ICU 2.8 + */ + UCAL_JULIAN_DAY, + + /** + * Ranges from 0 to 23:59:59.999 (regardless of DST). This field behaves <em>exactly</em> + * like a composite of all time-related fields, not including the zone fields. As such, + * it also reflects discontinuities of those fields on DST transition days. On a day + * of DST onset, it will jump forward. On a day of DST cessation, it will jump + * backward. This reflects the fact that it must be combined with the DST_OFFSET field + * to obtain a unique local time value. + * @stable ICU 2.8 + */ + UCAL_MILLISECONDS_IN_DAY, + + /** + * Whether or not the current month is a leap month (0 or 1). See the Chinese calendar for + * an example of this. + */ + UCAL_IS_LEAP_MONTH, + + // Do not conditionalize with #ifndef U_HIDE_DEPRECATED_API, + // it is needed for layout of Calendar, DateFormat, and other objects + /** + * One more than the highest normal UCalendarDateFields value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCAL_FIELD_COUNT, + + /** + * Field number indicating the + * day of the month. This is a synonym for <code>UCAL_DATE</code>. + * The first day of the month has value 1. + * @see #UCAL_DATE + * Synonym for UCAL_DATE + * @stable ICU 2.8 + **/ + UCAL_DAY_OF_MONTH=UCAL_DATE +}; + +/** @stable ICU 2.0 */ +typedef enum UCalendarDateFields UCalendarDateFields; + /** + * Useful constant for days of week. Note: Calendar day-of-week is 1-based. Clients + * who create locale resources for the field of first-day-of-week should be aware of + * this. For instance, in US locale, first-day-of-week is set to 1, i.e., UCAL_SUNDAY. + */ +/** Possible days of the week in a UCalendar + * @stable ICU 2.0 + */ +enum UCalendarDaysOfWeek { + /** Sunday */ + UCAL_SUNDAY = 1, + /** Monday */ + UCAL_MONDAY, + /** Tuesday */ + UCAL_TUESDAY, + /** Wednesday */ + UCAL_WEDNESDAY, + /** Thursday */ + UCAL_THURSDAY, + /** Friday */ + UCAL_FRIDAY, + /** Saturday */ + UCAL_SATURDAY +}; + +/** @stable ICU 2.0 */ +typedef enum UCalendarDaysOfWeek UCalendarDaysOfWeek; + +/** Possible months in a UCalendar. Note: Calendar month is 0-based. + * @stable ICU 2.0 + */ +enum UCalendarMonths { + /** January */ + UCAL_JANUARY, + /** February */ + UCAL_FEBRUARY, + /** March */ + UCAL_MARCH, + /** April */ + UCAL_APRIL, + /** May */ + UCAL_MAY, + /** June */ + UCAL_JUNE, + /** July */ + UCAL_JULY, + /** August */ + UCAL_AUGUST, + /** September */ + UCAL_SEPTEMBER, + /** October */ + UCAL_OCTOBER, + /** November */ + UCAL_NOVEMBER, + /** December */ + UCAL_DECEMBER, + /** Value of the <code>UCAL_MONTH</code> field indicating the + * thirteenth month of the year. Although the Gregorian calendar + * does not use this value, lunar calendars do. + */ + UCAL_UNDECIMBER +}; + +/** @stable ICU 2.0 */ +typedef enum UCalendarMonths UCalendarMonths; + +/** Possible AM/PM values in a UCalendar + * @stable ICU 2.0 + */ +enum UCalendarAMPMs { + /** AM */ + UCAL_AM, + /** PM */ + UCAL_PM +}; + +/** @stable ICU 2.0 */ +typedef enum UCalendarAMPMs UCalendarAMPMs; + +/** + * System time zone type constants used by filtering zones + * in ucal_openTimeZoneIDEnumeration. + * @see ucal_openTimeZoneIDEnumeration + * @stable ICU 4.8 + */ +enum USystemTimeZoneType { + /** + * Any system zones. + * @stable ICU 4.8 + */ + UCAL_ZONE_TYPE_ANY, + /** + * Canonical system zones. + * @stable ICU 4.8 + */ + UCAL_ZONE_TYPE_CANONICAL, + /** + * Canonical system zones associated with actual locations. + * @stable ICU 4.8 + */ + UCAL_ZONE_TYPE_CANONICAL_LOCATION +}; + +/** @stable ICU 4.8 */ +typedef enum USystemTimeZoneType USystemTimeZoneType; + +/** + * Create an enumeration over system time zone IDs with the given + * filter conditions. + * @param zoneType The system time zone type. + * @param region The ISO 3166 two-letter country code or UN M.49 + * three-digit area code. When NULL, no filtering + * done by region. + * @param rawOffset An offset from GMT in milliseconds, ignoring the + * effect of daylight savings time, if any. When NULL, + * no filtering done by zone offset. + * @param ec A pointer to an UErrorCode to receive any errors + * @return an enumeration object that the caller must dispose of + * using enum_close(), or NULL upon failure. In case of failure, + * *ec will indicate the error. + * @stable ICU 4.8 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucal_openTimeZoneIDEnumeration(USystemTimeZoneType zoneType, const char* region, + const int32_t* rawOffset, UErrorCode* ec); + +/** + * Create an enumeration over all time zones. + * + * @param ec input/output error code + * + * @return an enumeration object that the caller must dispose of using + * uenum_close(), or NULL upon failure. In case of failure *ec will + * indicate the error. + * + * @stable ICU 2.6 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucal_openTimeZones(UErrorCode* ec); + +/** + * Create an enumeration over all time zones associated with the given + * country. Some zones are affiliated with no country (e.g., "UTC"); + * these may also be retrieved, as a group. + * + * @param country the ISO 3166 two-letter country code, or NULL to + * retrieve zones not affiliated with any country + * + * @param ec input/output error code + * + * @return an enumeration object that the caller must dispose of using + * uenum_close(), or NULL upon failure. In case of failure *ec will + * indicate the error. + * + * @stable ICU 2.6 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucal_openCountryTimeZones(const char* country, UErrorCode* ec); + +/** + * Return the default time zone. The default is determined initially + * by querying the host operating system. It may be changed with + * ucal_setDefaultTimeZone() or with the C++ TimeZone API. + * + * @param result A buffer to receive the result, or NULL + * + * @param resultCapacity The capacity of the result buffer + * + * @param ec input/output error code + * + * @return The result string length, not including the terminating + * null + * + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getDefaultTimeZone(UChar* result, int32_t resultCapacity, UErrorCode* ec); + +/** + * Set the default time zone. + * + * @param zoneID null-terminated time zone ID + * + * @param ec input/output error code + * + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +ucal_setDefaultTimeZone(const UChar* zoneID, UErrorCode* ec); + +/** + * Return the amount of time in milliseconds that the clock is + * advanced during daylight savings time for the given time zone, or + * zero if the time zone does not observe daylight savings time. + * + * @param zoneID null-terminated time zone ID + * + * @param ec input/output error code + * + * @return the number of milliseconds the time is advanced with + * respect to standard time when the daylight savings rules are in + * effect. This is always a non-negative number, most commonly either + * 3,600,000 (one hour) or zero. + * + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getDSTSavings(const UChar* zoneID, UErrorCode* ec); + +/** + * Get the current date and time. + * The value returned is represented as milliseconds from the epoch. + * @return The current date and time. + * @stable ICU 2.0 + */ +U_STABLE UDate U_EXPORT2 +ucal_getNow(void); + +/** + * Open a UCalendar. + * A UCalendar may be used to convert a millisecond value to a year, + * month, and day. + * <p> + * Note: When unknown TimeZone ID is specified or if the TimeZone ID specified is "Etc/Unknown", + * the UCalendar returned by the function is initialized with GMT zone with TimeZone ID + * <code>UCAL_UNKNOWN_ZONE_ID</code> ("Etc/Unknown") without any errors/warnings. If you want + * to check if a TimeZone ID is valid prior to this function, use <code>ucal_getCanonicalTimeZoneID</code>. + * + * @param zoneID The desired TimeZone ID. If 0, use the default time zone. + * @param len The length of zoneID, or -1 if null-terminated. + * @param locale The desired locale + * @param type The type of UCalendar to open. This can be UCAL_GREGORIAN to open the Gregorian + * calendar for the locale, or UCAL_DEFAULT to open the default calendar for the locale (the + * default calendar may also be Gregorian). To open a specific non-Gregorian calendar for the + * locale, use uloc_setKeywordValue to set the value of the calendar keyword for the locale + * and then pass the locale to ucal_open with UCAL_DEFAULT as the type. + * @param status A pointer to an UErrorCode to receive any errors + * @return A pointer to a UCalendar, or 0 if an error occurred. + * @see #UCAL_UNKNOWN_ZONE_ID + * @stable ICU 2.0 + */ +U_STABLE UCalendar* U_EXPORT2 +ucal_open(const UChar* zoneID, + int32_t len, + const char* locale, + UCalendarType type, + UErrorCode* status); + +/** + * Close a UCalendar. + * Once closed, a UCalendar may no longer be used. + * @param cal The UCalendar to close. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_close(UCalendar *cal); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCalendarPointer + * "Smart pointer" class, closes a UCalendar via ucal_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCalendarPointer, UCalendar, ucal_close); + +U_NAMESPACE_END + +#endif + +/** + * Open a copy of a UCalendar. + * This function performs a deep copy. + * @param cal The calendar to copy + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UCalendar identical to cal. + * @stable ICU 4.0 + */ +U_STABLE UCalendar* U_EXPORT2 +ucal_clone(const UCalendar* cal, + UErrorCode* status); + +/** + * Set the TimeZone used by a UCalendar. + * A UCalendar uses a timezone for converting from Greenwich time to local time. + * @param cal The UCalendar to set. + * @param zoneID The desired TimeZone ID. If 0, use the default time zone. + * @param len The length of zoneID, or -1 if null-terminated. + * @param status A pointer to an UErrorCode to receive any errors. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_setTimeZone(UCalendar* cal, + const UChar* zoneID, + int32_t len, + UErrorCode* status); + +/** + * Get the ID of the UCalendar's time zone. + * + * @param cal The UCalendar to query. + * @param result Receives the UCalendar's time zone ID. + * @param resultLength The maximum size of result. + * @param status Receives the status. + * @return The total buffer size needed; if greater than resultLength, the output was truncated. + * @stable ICU 51 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getTimeZoneID(const UCalendar *cal, + UChar *result, + int32_t resultLength, + UErrorCode *status); + +/** + * Possible formats for a UCalendar's display name + * @stable ICU 2.0 + */ +enum UCalendarDisplayNameType { + /** Standard display name */ + UCAL_STANDARD, + /** Short standard display name */ + UCAL_SHORT_STANDARD, + /** Daylight savings display name */ + UCAL_DST, + /** Short daylight savings display name */ + UCAL_SHORT_DST +}; + +/** @stable ICU 2.0 */ +typedef enum UCalendarDisplayNameType UCalendarDisplayNameType; + +/** + * Get the display name for a UCalendar's TimeZone. + * A display name is suitable for presentation to a user. + * @param cal The UCalendar to query. + * @param type The desired display name format; one of UCAL_STANDARD, UCAL_SHORT_STANDARD, + * UCAL_DST, UCAL_SHORT_DST + * @param locale The desired locale for the display name. + * @param result A pointer to a buffer to receive the formatted number. + * @param resultLength The maximum size of result. + * @param status A pointer to an UErrorCode to receive any errors + * @return The total buffer size needed; if greater than resultLength, the output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getTimeZoneDisplayName(const UCalendar* cal, + UCalendarDisplayNameType type, + const char* locale, + UChar* result, + int32_t resultLength, + UErrorCode* status); + +/** + * Determine if a UCalendar is currently in daylight savings time. + * Daylight savings time is not used in all parts of the world. + * @param cal The UCalendar to query. + * @param status A pointer to an UErrorCode to receive any errors + * @return TRUE if cal is currently in daylight savings time, FALSE otherwise + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucal_inDaylightTime(const UCalendar* cal, + UErrorCode* status ); + +/** + * Sets the GregorianCalendar change date. This is the point when the switch from + * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October + * 15, 1582. Previous to this time and date will be Julian dates. + * + * This function works only for Gregorian calendars. If the UCalendar is not + * an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR + * error code is set. + * + * @param cal The calendar object. + * @param date The given Gregorian cutover date. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * + * @see GregorianCalendar::setGregorianChange + * @see ucal_getGregorianChange + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ucal_setGregorianChange(UCalendar *cal, UDate date, UErrorCode *pErrorCode); + +/** + * Gets the Gregorian Calendar change date. This is the point when the switch from + * Julian dates to Gregorian dates occurred. Default is 00:00:00 local time, October + * 15, 1582. Previous to this time and date will be Julian dates. + * + * This function works only for Gregorian calendars. If the UCalendar is not + * an instance of a Gregorian calendar, then a U_UNSUPPORTED_ERROR + * error code is set. + * + * @param cal The calendar object. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The Gregorian cutover time for this calendar. + * + * @see GregorianCalendar::getGregorianChange + * @see ucal_setGregorianChange + * @stable ICU 3.6 + */ +U_STABLE UDate U_EXPORT2 +ucal_getGregorianChange(const UCalendar *cal, UErrorCode *pErrorCode); + +/** + * Types of UCalendar attributes + * @stable ICU 2.0 + */ +enum UCalendarAttribute { + /** + * Lenient parsing + * @stable ICU 2.0 + */ + UCAL_LENIENT, + /** + * First day of week + * @stable ICU 2.0 + */ + UCAL_FIRST_DAY_OF_WEEK, + /** + * Minimum number of days in first week + * @stable ICU 2.0 + */ + UCAL_MINIMAL_DAYS_IN_FIRST_WEEK, + /** + * The behavior for handling wall time repeating multiple times + * at negative time zone offset transitions + * @stable ICU 49 + */ + UCAL_REPEATED_WALL_TIME, + /** + * The behavior for handling skipped wall time at positive time + * zone offset transitions. + * @stable ICU 49 + */ + UCAL_SKIPPED_WALL_TIME +}; + +/** @stable ICU 2.0 */ +typedef enum UCalendarAttribute UCalendarAttribute; + +/** + * Options for handling ambiguous wall time at time zone + * offset transitions. + * @stable ICU 49 + */ +enum UCalendarWallTimeOption { + /** + * An ambiguous wall time to be interpreted as the latest. + * This option is valid for UCAL_REPEATED_WALL_TIME and + * UCAL_SKIPPED_WALL_TIME. + * @stable ICU 49 + */ + UCAL_WALLTIME_LAST, + /** + * An ambiguous wall time to be interpreted as the earliest. + * This option is valid for UCAL_REPEATED_WALL_TIME and + * UCAL_SKIPPED_WALL_TIME. + * @stable ICU 49 + */ + UCAL_WALLTIME_FIRST, + /** + * An ambiguous wall time to be interpreted as the next valid + * wall time. This option is valid for UCAL_SKIPPED_WALL_TIME. + * @stable ICU 49 + */ + UCAL_WALLTIME_NEXT_VALID +}; +/** @stable ICU 49 */ +typedef enum UCalendarWallTimeOption UCalendarWallTimeOption; + +/** + * Get a numeric attribute associated with a UCalendar. + * Numeric attributes include the first day of the week, or the minimal numbers + * of days in the first week of the month. + * @param cal The UCalendar to query. + * @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK, + * UCAL_MINIMAL_DAYS_IN_FIRST_WEEK, UCAL_REPEATED_WALL_TIME or UCAL_SKIPPED_WALL_TIME + * @return The value of attr. + * @see ucal_setAttribute + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getAttribute(const UCalendar* cal, + UCalendarAttribute attr); + +/** + * Set a numeric attribute associated with a UCalendar. + * Numeric attributes include the first day of the week, or the minimal numbers + * of days in the first week of the month. + * @param cal The UCalendar to set. + * @param attr The desired attribute; one of UCAL_LENIENT, UCAL_FIRST_DAY_OF_WEEK, + * UCAL_MINIMAL_DAYS_IN_FIRST_WEEK, UCAL_REPEATED_WALL_TIME or UCAL_SKIPPED_WALL_TIME + * @param newValue The new value of attr. + * @see ucal_getAttribute + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_setAttribute(UCalendar* cal, + UCalendarAttribute attr, + int32_t newValue); + +/** + * Get a locale for which calendars are available. + * A UCalendar in a locale returned by this function will contain the correct + * day and month names for the locale. + * @param localeIndex The index of the desired locale. + * @return A locale for which calendars are available, or 0 if none. + * @see ucal_countAvailable + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +ucal_getAvailable(int32_t localeIndex); + +/** + * Determine how many locales have calendars available. + * This function is most useful as determining the loop ending condition for + * calls to \ref ucal_getAvailable. + * @return The number of locales for which calendars are available. + * @see ucal_getAvailable + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucal_countAvailable(void); + +/** + * Get a UCalendar's current time in millis. + * The time is represented as milliseconds from the epoch. + * @param cal The UCalendar to query. + * @param status A pointer to an UErrorCode to receive any errors + * @return The calendar's current time in millis. + * @see ucal_setMillis + * @see ucal_setDate + * @see ucal_setDateTime + * @stable ICU 2.0 + */ +U_STABLE UDate U_EXPORT2 +ucal_getMillis(const UCalendar* cal, + UErrorCode* status); + +/** + * Set a UCalendar's current time in millis. + * The time is represented as milliseconds from the epoch. + * @param cal The UCalendar to set. + * @param dateTime The desired date and time. + * @param status A pointer to an UErrorCode to receive any errors + * @see ucal_getMillis + * @see ucal_setDate + * @see ucal_setDateTime + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_setMillis(UCalendar* cal, + UDate dateTime, + UErrorCode* status ); + +/** + * Set a UCalendar's current date. + * The date is represented as a series of 32-bit integers. + * @param cal The UCalendar to set. + * @param year The desired year. + * @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY, + * UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER + * @param date The desired day of the month. + * @param status A pointer to an UErrorCode to receive any errors + * @see ucal_getMillis + * @see ucal_setMillis + * @see ucal_setDateTime + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_setDate(UCalendar* cal, + int32_t year, + int32_t month, + int32_t date, + UErrorCode* status); + +/** + * Set a UCalendar's current date. + * The date is represented as a series of 32-bit integers. + * @param cal The UCalendar to set. + * @param year The desired year. + * @param month The desired month; one of UCAL_JANUARY, UCAL_FEBRUARY, UCAL_MARCH, UCAL_APRIL, UCAL_MAY, + * UCAL_JUNE, UCAL_JULY, UCAL_AUGUST, UCAL_SEPTEMBER, UCAL_OCTOBER, UCAL_NOVEMBER, UCAL_DECEMBER, UCAL_UNDECIMBER + * @param date The desired day of the month. + * @param hour The desired hour of day. + * @param minute The desired minute. + * @param second The desirec second. + * @param status A pointer to an UErrorCode to receive any errors + * @see ucal_getMillis + * @see ucal_setMillis + * @see ucal_setDate + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_setDateTime(UCalendar* cal, + int32_t year, + int32_t month, + int32_t date, + int32_t hour, + int32_t minute, + int32_t second, + UErrorCode* status); + +/** + * Returns TRUE if two UCalendars are equivalent. Equivalent + * UCalendars will behave identically, but they may be set to + * different times. + * @param cal1 The first of the UCalendars to compare. + * @param cal2 The second of the UCalendars to compare. + * @return TRUE if cal1 and cal2 are equivalent, FALSE otherwise. + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucal_equivalentTo(const UCalendar* cal1, + const UCalendar* cal2); + +/** + * Add a specified signed amount to a particular field in a UCalendar. + * This can modify more significant fields in the calendar. + * Adding a positive value always means moving forward in time, so for the Gregorian calendar, + * starting with 100 BC and adding +1 to year results in 99 BC (even though this actually reduces + * the numeric value of the field itself). + * @param cal The UCalendar to which to add. + * @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH, + * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK, + * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND, + * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET. + * @param amount The signed amount to add to field. If the amount causes the value + * to exceed to maximum or minimum values for that field, other fields are modified + * to preserve the magnitude of the change. + * @param status A pointer to an UErrorCode to receive any errors + * @see ucal_roll + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_add(UCalendar* cal, + UCalendarDateFields field, + int32_t amount, + UErrorCode* status); + +/** + * Add a specified signed amount to a particular field in a UCalendar. + * This will not modify more significant fields in the calendar. + * Rolling by a positive value always means moving forward in time (unless the limit of the + * field is reached, in which case it may pin or wrap), so for Gregorian calendar, + * starting with 100 BC and rolling the year by +1 results in 99 BC. + * When eras have a definite beginning and end (as in the Chinese calendar, or as in most eras in the + * Japanese calendar) then rolling the year past either limit of the era will cause the year to wrap around. + * When eras only have a limit at one end, then attempting to roll the year past that limit will result in + * pinning the year at that limit. Note that for most calendars in which era 0 years move forward in time + * (such as Buddhist, Hebrew, or Islamic), it is possible for add or roll to result in negative years for + * era 0 (that is the only way to represent years before the calendar epoch). + * @param cal The UCalendar to which to add. + * @param field The field to which to add the signed value; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH, + * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK, + * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND, + * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET. + * @param amount The signed amount to add to field. If the amount causes the value + * to exceed to maximum or minimum values for that field, the field is pinned to a permissible + * value. + * @param status A pointer to an UErrorCode to receive any errors + * @see ucal_add + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_roll(UCalendar* cal, + UCalendarDateFields field, + int32_t amount, + UErrorCode* status); + +/** + * Get the current value of a field from a UCalendar. + * All fields are represented as 32-bit integers. + * @param cal The UCalendar to query. + * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH, + * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK, + * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND, + * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET. + * @param status A pointer to an UErrorCode to receive any errors + * @return The value of the desired field. + * @see ucal_set + * @see ucal_isSet + * @see ucal_clearField + * @see ucal_clear + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucal_get(const UCalendar* cal, + UCalendarDateFields field, + UErrorCode* status ); + +/** + * Set the value of a field in a UCalendar. + * All fields are represented as 32-bit integers. + * @param cal The UCalendar to set. + * @param field The field to set; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH, + * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK, + * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND, + * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET. + * @param value The desired value of field. + * @see ucal_get + * @see ucal_isSet + * @see ucal_clearField + * @see ucal_clear + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_set(UCalendar* cal, + UCalendarDateFields field, + int32_t value); + +/** + * Determine if a field in a UCalendar is set. + * All fields are represented as 32-bit integers. + * @param cal The UCalendar to query. + * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH, + * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK, + * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND, + * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET. + * @return TRUE if field is set, FALSE otherwise. + * @see ucal_get + * @see ucal_set + * @see ucal_clearField + * @see ucal_clear + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucal_isSet(const UCalendar* cal, + UCalendarDateFields field); + +/** + * Clear a field in a UCalendar. + * All fields are represented as 32-bit integers. + * @param cal The UCalendar containing the field to clear. + * @param field The field to clear; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH, + * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK, + * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND, + * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET. + * @see ucal_get + * @see ucal_set + * @see ucal_isSet + * @see ucal_clear + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_clearField(UCalendar* cal, + UCalendarDateFields field); + +/** + * Clear all fields in a UCalendar. + * All fields are represented as 32-bit integers. + * @param calendar The UCalendar to clear. + * @see ucal_get + * @see ucal_set + * @see ucal_isSet + * @see ucal_clearField + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucal_clear(UCalendar* calendar); + +/** + * Possible limit values for a UCalendar + * @stable ICU 2.0 + */ +enum UCalendarLimitType { + /** Minimum value */ + UCAL_MINIMUM, + /** Maximum value */ + UCAL_MAXIMUM, + /** Greatest minimum value */ + UCAL_GREATEST_MINIMUM, + /** Leaest maximum value */ + UCAL_LEAST_MAXIMUM, + /** Actual minimum value */ + UCAL_ACTUAL_MINIMUM, + /** Actual maximum value */ + UCAL_ACTUAL_MAXIMUM +}; + +/** @stable ICU 2.0 */ +typedef enum UCalendarLimitType UCalendarLimitType; + +/** + * Determine a limit for a field in a UCalendar. + * A limit is a maximum or minimum value for a field. + * @param cal The UCalendar to query. + * @param field The desired field; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH, + * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK, + * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND, + * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET. + * @param type The desired critical point; one of UCAL_MINIMUM, UCAL_MAXIMUM, UCAL_GREATEST_MINIMUM, + * UCAL_LEAST_MAXIMUM, UCAL_ACTUAL_MINIMUM, UCAL_ACTUAL_MAXIMUM + * @param status A pointer to an UErrorCode to receive any errors. + * @return The requested value. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getLimit(const UCalendar* cal, + UCalendarDateFields field, + UCalendarLimitType type, + UErrorCode* status); + +/** Get the locale for this calendar object. You can choose between valid and actual locale. + * @param cal The calendar object + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale name + * @stable ICU 2.8 + */ +U_STABLE const char * U_EXPORT2 +ucal_getLocaleByType(const UCalendar *cal, ULocDataLocaleType type, UErrorCode* status); + +/** + * Returns the timezone data version currently used by ICU. + * @param status error code for the operation + * @return the version string, such as "2007f" + * @stable ICU 3.8 + */ +U_STABLE const char * U_EXPORT2 +ucal_getTZDataVersion(UErrorCode* status); + +/** + * Returns the canonical system timezone ID or the normalized + * custom time zone ID for the given time zone ID. + * @param id The input timezone ID to be canonicalized. + * @param len The length of id, or -1 if null-terminated. + * @param result The buffer receives the canonical system timezone ID + * or the custom timezone ID in normalized format. + * @param resultCapacity The capacity of the result buffer. + * @param isSystemID Receives if the given ID is a known system + * timezone ID. + * @param status Receives the status. When the given timezone ID + * is neither a known system time zone ID nor a + * valid custom timezone ID, U_ILLEGAL_ARGUMENT_ERROR + * is set. + * @return The result string length, not including the terminating + * null. + * @stable ICU 4.0 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getCanonicalTimeZoneID(const UChar* id, int32_t len, + UChar* result, int32_t resultCapacity, UBool *isSystemID, UErrorCode* status); +/** + * Get the resource keyword value string designating the calendar type for the UCalendar. + * @param cal The UCalendar to query. + * @param status The error code for the operation. + * @return The resource keyword value string. + * @stable ICU 4.2 + */ +U_STABLE const char * U_EXPORT2 +ucal_getType(const UCalendar *cal, UErrorCode* status); + +/** + * Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param key one of the keys supported by this service. For now, only + * "calendar" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @param status error status + * @return a string enumeration over keyword values for the given key and the locale. + * @stable ICU 4.2 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucal_getKeywordValuesForLocale(const char* key, + const char* locale, + UBool commonlyUsed, + UErrorCode* status); + + +/** Weekday types, as returned by ucal_getDayOfWeekType(). + * @stable ICU 4.4 + */ +enum UCalendarWeekdayType { + /** + * Designates a full weekday (no part of the day is included in the weekend). + * @stable ICU 4.4 + */ + UCAL_WEEKDAY, + /** + * Designates a full weekend day (the entire day is included in the weekend). + * @stable ICU 4.4 + */ + UCAL_WEEKEND, + /** + * Designates a day that starts as a weekday and transitions to the weekend. + * Call ucal_getWeekendTransition() to get the time of transition. + * @stable ICU 4.4 + */ + UCAL_WEEKEND_ONSET, + /** + * Designates a day that starts as the weekend and transitions to a weekday. + * Call ucal_getWeekendTransition() to get the time of transition. + * @stable ICU 4.4 + */ + UCAL_WEEKEND_CEASE +}; + +/** @stable ICU 4.4 */ +typedef enum UCalendarWeekdayType UCalendarWeekdayType; + +/** + * Returns whether the given day of the week is a weekday, a weekend day, + * or a day that transitions from one to the other, for the locale and + * calendar system associated with this UCalendar (the locale's region is + * often the most determinant factor). If a transition occurs at midnight, + * then the days before and after the transition will have the + * type UCAL_WEEKDAY or UCAL_WEEKEND. If a transition occurs at a time + * other than midnight, then the day of the transition will have + * the type UCAL_WEEKEND_ONSET or UCAL_WEEKEND_CEASE. In this case, the + * function ucal_getWeekendTransition() will return the point of + * transition. + * @param cal The UCalendar to query. + * @param dayOfWeek The day of the week whose type is desired (UCAL_SUNDAY..UCAL_SATURDAY). + * @param status The error code for the operation. + * @return The UCalendarWeekdayType for the day of the week. + * @stable ICU 4.4 + */ +U_STABLE UCalendarWeekdayType U_EXPORT2 +ucal_getDayOfWeekType(const UCalendar *cal, UCalendarDaysOfWeek dayOfWeek, UErrorCode* status); + +/** + * Returns the time during the day at which the weekend begins or ends in + * this calendar system. If ucal_getDayOfWeekType() returns UCAL_WEEKEND_ONSET + * for the specified dayOfWeek, return the time at which the weekend begins. + * If ucal_getDayOfWeekType() returns UCAL_WEEKEND_CEASE for the specified dayOfWeek, + * return the time at which the weekend ends. If ucal_getDayOfWeekType() returns + * some other UCalendarWeekdayType for the specified dayOfWeek, is it an error condition + * (U_ILLEGAL_ARGUMENT_ERROR). + * @param cal The UCalendar to query. + * @param dayOfWeek The day of the week for which the weekend transition time is + * desired (UCAL_SUNDAY..UCAL_SATURDAY). + * @param status The error code for the operation. + * @return The milliseconds after midnight at which the weekend begins or ends. + * @stable ICU 4.4 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getWeekendTransition(const UCalendar *cal, UCalendarDaysOfWeek dayOfWeek, UErrorCode *status); + +/** + * Returns TRUE if the given UDate is in the weekend in + * this calendar system. + * @param cal The UCalendar to query. + * @param date The UDate in question. + * @param status The error code for the operation. + * @return TRUE if the given UDate is in the weekend in + * this calendar system, FALSE otherwise. + * @stable ICU 4.4 + */ +U_STABLE UBool U_EXPORT2 +ucal_isWeekend(const UCalendar *cal, UDate date, UErrorCode *status); + +/** + * Return the difference between the target time and the time this calendar object is currently set to. + * If the target time is after the current calendar setting, the the returned value will be positive. + * The field parameter specifies the units of the return value. For example, if field is UCAL_MONTH + * and ucal_getFieldDifference returns 3, then the target time is 3 to less than 4 months after the + * current calendar setting. + * + * As a side effect of this call, this calendar is advanced toward target by the given amount. That is, + * calling this function has the side effect of calling ucal_add on this calendar with the specified + * field and an amount equal to the return value from this function. + * + * A typical way of using this function is to call it first with the largest field of interest, then + * with progressively smaller fields. + * + * @param cal The UCalendar to compare and update. + * @param target The target date to compare to the current calendar setting. + * @param field The field to compare; one of UCAL_ERA, UCAL_YEAR, UCAL_MONTH, + * UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_DATE, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK, + * UCAL_DAY_OF_WEEK_IN_MONTH, UCAL_AM_PM, UCAL_HOUR, UCAL_HOUR_OF_DAY, UCAL_MINUTE, UCAL_SECOND, + * UCAL_MILLISECOND, UCAL_ZONE_OFFSET, UCAL_DST_OFFSET. + * @param status A pointer to an UErrorCode to receive any errors + * @return The date difference for the specified field. + * @stable ICU 4.8 + */ +U_STABLE int32_t U_EXPORT2 +ucal_getFieldDifference(UCalendar* cal, + UDate target, + UCalendarDateFields field, + UErrorCode* status); + +/** + * Time zone transition types for ucal_getTimeZoneTransitionDate + * @stable ICU 50 + */ +enum UTimeZoneTransitionType { + /** + * Get the next transition after the current date, + * i.e. excludes the current date + * @stable ICU 50 + */ + UCAL_TZ_TRANSITION_NEXT, + /** + * Get the next transition on or after the current date, + * i.e. may include the current date + * @stable ICU 50 + */ + UCAL_TZ_TRANSITION_NEXT_INCLUSIVE, + /** + * Get the previous transition before the current date, + * i.e. excludes the current date + * @stable ICU 50 + */ + UCAL_TZ_TRANSITION_PREVIOUS, + /** + * Get the previous transition on or before the current date, + * i.e. may include the current date + * @stable ICU 50 + */ + UCAL_TZ_TRANSITION_PREVIOUS_INCLUSIVE +}; + +typedef enum UTimeZoneTransitionType UTimeZoneTransitionType; /**< @stable ICU 50 */ + +/** +* Get the UDate for the next/previous time zone transition relative to +* the calendar's current date, in the time zone to which the calendar +* is currently set. If there is no known time zone transition of the +* requested type relative to the calendar's date, the function returns +* FALSE. +* @param cal The UCalendar to query. +* @param type The type of transition desired. +* @param transition A pointer to a UDate to be set to the transition time. +* If the function returns FALSE, the value set is unspecified. +* @param status A pointer to a UErrorCode to receive any errors. +* @return TRUE if a valid transition time is set in *transition, FALSE +* otherwise. +* @stable ICU 50 +*/ +U_STABLE UBool U_EXPORT2 +ucal_getTimeZoneTransitionDate(const UCalendar* cal, UTimeZoneTransitionType type, + UDate* transition, UErrorCode* status); + +/** +* Converts a system time zone ID to an equivalent Windows time zone ID. For example, +* Windows time zone ID "Pacific Standard Time" is returned for input "America/Los_Angeles". +* +* <p>There are system time zones that cannot be mapped to Windows zones. When the input +* system time zone ID is unknown or unmappable to a Windows time zone, then this +* function returns 0 as the result length, but the operation itself remains successful +* (no error status set on return). +* +* <p>This implementation utilizes <a href="http://unicode.org/cldr/charts/supplemental/zone_tzid.html"> +* Zone-Tzid mapping data</a>. The mapping data is updated time to time. To get the latest changes, +* please read the ICU user guide section <a href="http://userguide.icu-project.org/datetime/timezone#TOC-Updating-the-Time-Zone-Data"> +* Updating the Time Zone Data</a>. +* +* @param id A system time zone ID. +* @param len The length of <code>id</code>, or -1 if null-terminated. +* @param winid A buffer to receive a Windows time zone ID. +* @param winidCapacity The capacity of the result buffer <code>winid</code>. +* @param status Receives the status. +* @return The result string length, not including the terminating null. +* @see ucal_getTimeZoneIDForWindowsID +* +* @stable ICU 52 +*/ +U_STABLE int32_t U_EXPORT2 +ucal_getWindowsTimeZoneID(const UChar* id, int32_t len, + UChar* winid, int32_t winidCapacity, UErrorCode* status); + +/** +* Converts a Windows time zone ID to an equivalent system time zone ID +* for a region. For example, system time zone ID "America/Los_Angeles" is returned +* for input Windows ID "Pacific Standard Time" and region "US" (or <code>null</code>), +* "America/Vancouver" is returned for the same Windows ID "Pacific Standard Time" and +* region "CA". +* +* <p>Not all Windows time zones can be mapped to system time zones. When the input +* Windows time zone ID is unknown or unmappable to a system time zone, then this +* function returns 0 as the result length, but the operation itself remains successful +* (no error status set on return). +* +* <p>This implementation utilizes <a href="http://unicode.org/cldr/charts/supplemental/zone_tzid.html"> +* Zone-Tzid mapping data</a>. The mapping data is updated time to time. To get the latest changes, +* please read the ICU user guide section <a href="http://userguide.icu-project.org/datetime/timezone#TOC-Updating-the-Time-Zone-Data"> +* Updating the Time Zone Data</a>. +* +* @param winid A Windows time zone ID. +* @param len The length of <code>winid</code>, or -1 if null-terminated. +* @param region A null-terminated region code, or <code>NULL</code> if no regional preference. +* @param id A buffer to receive a system time zone ID. +* @param idCapacity The capacity of the result buffer <code>id</code>. +* @param status Receives the status. +* @return The result string length, not including the terminating null. +* @see ucal_getWindowsTimeZoneID +* +* @stable ICU 52 +*/ +U_STABLE int32_t U_EXPORT2 +ucal_getTimeZoneIDForWindowsID(const UChar* winid, int32_t len, const char* region, + UChar* id, int32_t idCapacity, UErrorCode* status); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/ucol.h b/intl/icu/source/i18n/unicode/ucol.h new file mode 100644 index 000000000..eae88f8ce --- /dev/null +++ b/intl/icu/source/i18n/unicode/ucol.h @@ -0,0 +1,1497 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (c) 1996-2015, International Business Machines Corporation and others. +* All Rights Reserved. +******************************************************************************* +*/ + +#ifndef UCOL_H +#define UCOL_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "unicode/unorm.h" +#include "unicode/localpointer.h" +#include "unicode/parseerr.h" +#include "unicode/uloc.h" +#include "unicode/uset.h" +#include "unicode/uscript.h" + +/** + * \file + * \brief C API: Collator + * + * <h2> Collator C API </h2> + * + * The C API for Collator performs locale-sensitive + * string comparison. You use this service to build + * searching and sorting routines for natural language text. + * <p> + * For more information about the collation service see + * <a href="http://userguide.icu-project.org/collation">the User Guide</a>. + * <p> + * Collation service provides correct sorting orders for most locales supported in ICU. + * If specific data for a locale is not available, the orders eventually falls back + * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. + * <p> + * Sort ordering may be customized by providing your own set of rules. For more on + * this subject see the <a href="http://userguide.icu-project.org/collation/customization"> + * Collation Customization</a> section of the User Guide. + * <p> + * @see UCollationResult + * @see UNormalizationMode + * @see UCollationStrength + * @see UCollationElements + */ + +/** A collator. +* For usage in C programs. +*/ +struct UCollator; +/** structure representing a collator object instance + * @stable ICU 2.0 + */ +typedef struct UCollator UCollator; + + +/** + * UCOL_LESS is returned if source string is compared to be less than target + * string in the ucol_strcoll() method. + * UCOL_EQUAL is returned if source string is compared to be equal to target + * string in the ucol_strcoll() method. + * UCOL_GREATER is returned if source string is compared to be greater than + * target string in the ucol_strcoll() method. + * @see ucol_strcoll() + * <p> + * Possible values for a comparison result + * @stable ICU 2.0 + */ +typedef enum { + /** string a == string b */ + UCOL_EQUAL = 0, + /** string a > string b */ + UCOL_GREATER = 1, + /** string a < string b */ + UCOL_LESS = -1 +} UCollationResult ; + + +/** Enum containing attribute values for controling collation behavior. + * Here are all the allowable values. Not every attribute can take every value. The only + * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined + * value for that locale + * @stable ICU 2.0 + */ +typedef enum { + /** accepted by most attributes */ + UCOL_DEFAULT = -1, + + /** Primary collation strength */ + UCOL_PRIMARY = 0, + /** Secondary collation strength */ + UCOL_SECONDARY = 1, + /** Tertiary collation strength */ + UCOL_TERTIARY = 2, + /** Default collation strength */ + UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, + UCOL_CE_STRENGTH_LIMIT, + /** Quaternary collation strength */ + UCOL_QUATERNARY=3, + /** Identical collation strength */ + UCOL_IDENTICAL=15, + UCOL_STRENGTH_LIMIT, + + /** Turn the feature off - works for UCOL_FRENCH_COLLATION, + UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE + & UCOL_DECOMPOSITION_MODE*/ + UCOL_OFF = 16, + /** Turn the feature on - works for UCOL_FRENCH_COLLATION, + UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE + & UCOL_DECOMPOSITION_MODE*/ + UCOL_ON = 17, + + /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ + UCOL_SHIFTED = 20, + /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ + UCOL_NON_IGNORABLE = 21, + + /** Valid for UCOL_CASE_FIRST - + lower case sorts before upper case */ + UCOL_LOWER_FIRST = 24, + /** upper case sorts before lower case */ + UCOL_UPPER_FIRST = 25, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UColAttributeValue value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCOL_ATTRIBUTE_VALUE_COUNT +#endif // U_HIDE_DEPRECATED_API +} UColAttributeValue; + +/** + * Enum containing the codes for reordering segments of the collation table that are not script + * codes. These reordering codes are to be used in conjunction with the script codes. + * @see ucol_getReorderCodes + * @see ucol_setReorderCodes + * @see ucol_getEquivalentReorderCodes + * @see UScriptCode + * @stable ICU 4.8 + */ + typedef enum { + /** + * A special reordering code that is used to specify the default + * reordering codes for a locale. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_DEFAULT = -1, + /** + * A special reordering code that is used to specify no reordering codes. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, + /** + * A special reordering code that is used to specify all other codes used for + * reordering except for the codes lised as UColReorderCode values and those + * listed explicitly in a reordering. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, + /** + * Characters with the space property. + * This is equivalent to the rule value "space". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_SPACE = 0x1000, + /** + * The first entry in the enumeration of reordering groups. This is intended for use in + * range checking and enumeration of the reorder codes. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, + /** + * Characters with the punctuation property. + * This is equivalent to the rule value "punct". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_PUNCTUATION = 0x1001, + /** + * Characters with the symbol property. + * This is equivalent to the rule value "symbol". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_SYMBOL = 0x1002, + /** + * Characters with the currency property. + * This is equivalent to the rule value "currency". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_CURRENCY = 0x1003, + /** + * Characters with the digit property. + * This is equivalent to the rule value "digit". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_DIGIT = 0x1004, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UColReorderCode value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCOL_REORDER_CODE_LIMIT = 0x1005 +#endif // U_HIDE_DEPRECATED_API +} UColReorderCode; + +/** + * Base letter represents a primary difference. Set comparison + * level to UCOL_PRIMARY to ignore secondary and tertiary differences. + * Use this to set the strength of a Collator object. + * Example of primary difference, "abc" < "abd" + * + * Diacritical differences on the same base letter represent a secondary + * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary + * differences. Use this to set the strength of a Collator object. + * Example of secondary difference, "ä" >> "a". + * + * Uppercase and lowercase versions of the same character represents a + * tertiary difference. Set comparison level to UCOL_TERTIARY to include + * all comparison differences. Use this to set the strength of a Collator + * object. + * Example of tertiary difference, "abc" <<< "ABC". + * + * Two characters are considered "identical" when they have the same + * unicode spellings. UCOL_IDENTICAL. + * For example, "ä" == "ä". + * + * UCollationStrength is also used to determine the strength of sort keys + * generated from UCollator objects + * These values can be now found in the UColAttributeValue enum. + * @stable ICU 2.0 + **/ +typedef UColAttributeValue UCollationStrength; + +/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT + * value, as well as the values specific to each one. + * @stable ICU 2.0 + */ +typedef enum { + /** Attribute for direction of secondary weights - used in Canadian French. + * Acceptable values are UCOL_ON, which results in secondary weights + * being considered backwards and UCOL_OFF which treats secondary + * weights in the order they appear. + * @stable ICU 2.0 + */ + UCOL_FRENCH_COLLATION, + /** Attribute for handling variable elements. + * Acceptable values are UCOL_NON_IGNORABLE (default) + * which treats all the codepoints with non-ignorable + * primary weights in the same way, + * and UCOL_SHIFTED which causes codepoints with primary + * weights that are equal or below the variable top value + * to be ignored on primary level and moved to the quaternary + * level. + * @stable ICU 2.0 + */ + UCOL_ALTERNATE_HANDLING, + /** Controls the ordering of upper and lower case letters. + * Acceptable values are UCOL_OFF (default), which orders + * upper and lower case letters in accordance to their tertiary + * weights, UCOL_UPPER_FIRST which forces upper case letters to + * sort before lower case letters, and UCOL_LOWER_FIRST which does + * the opposite. + * @stable ICU 2.0 + */ + UCOL_CASE_FIRST, + /** Controls whether an extra case level (positioned before the third + * level) is generated or not. Acceptable values are UCOL_OFF (default), + * when case level is not generated, and UCOL_ON which causes the case + * level to be generated. Contents of the case level are affected by + * the value of UCOL_CASE_FIRST attribute. A simple way to ignore + * accent differences in a string is to set the strength to UCOL_PRIMARY + * and enable case level. + * @stable ICU 2.0 + */ + UCOL_CASE_LEVEL, + /** Controls whether the normalization check and necessary normalizations + * are performed. When set to UCOL_OFF (default) no normalization check + * is performed. The correctness of the result is guaranteed only if the + * input data is in so-called FCD form (see users manual for more info). + * When set to UCOL_ON, an incremental check is performed to see whether + * the input data is in the FCD form. If the data is not in the FCD form, + * incremental NFD normalization is performed. + * @stable ICU 2.0 + */ + UCOL_NORMALIZATION_MODE, + /** An alias for UCOL_NORMALIZATION_MODE attribute. + * @stable ICU 2.0 + */ + UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, + /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, + * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength + * for most locales (except Japanese) is tertiary. + * + * Quaternary strength + * is useful when combined with shifted setting for alternate handling + * attribute and for JIS X 4061 collation, when it is used to distinguish + * between Katakana and Hiragana. + * Otherwise, quaternary level + * is affected only by the number of non-ignorable code points in + * the string. + * + * Identical strength is rarely useful, as it amounts + * to codepoints of the NFD form of the string. + * @stable ICU 2.0 + */ + UCOL_STRENGTH, +#ifndef U_HIDE_DEPRECATED_API + /** When turned on, this attribute positions Hiragana before all + * non-ignorables on quaternary level This is a sneaky way to produce JIS + * sort order. + * + * This attribute was an implementation detail of the CLDR Japanese tailoring. + * Since ICU 50, this attribute is not settable any more via API functions. + * Since CLDR 25/ICU 53, explicit quaternary relations are used + * to achieve the same Japanese sort order. + * + * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. + */ + UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, +#endif /* U_HIDE_DEPRECATED_API */ + /** + * When turned on, this attribute makes + * substrings of digits sort according to their numeric values. + * + * This is a way to get '100' to sort AFTER '2'. Note that the longest + * digit substring that can be treated as a single unit is + * 254 digits (not counting leading zeros). If a digit substring is + * longer than that, the digits beyond the limit will be treated as a + * separate digit substring. + * + * A "digit" in this sense is a code point with General_Category=Nd, + * which does not include circled numbers, roman numerals, etc. + * Only a contiguous digit substring is considered, that is, + * non-negative integers without separators. + * There is no support for plus/minus signs, decimals, exponents, etc. + * + * @stable ICU 2.8 + */ + UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, + + // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + // it is needed for layout of RuleBasedCollator object. + /** + * One more than the highest normal UColAttribute value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCOL_ATTRIBUTE_COUNT +} UColAttribute; + +/** Options for retrieving the rule string + * @stable ICU 2.0 + */ +typedef enum { + /** + * Retrieves the tailoring rules only. + * Same as calling the version of getRules() without UColRuleOption. + * @stable ICU 2.0 + */ + UCOL_TAILORING_ONLY, + /** + * Retrieves the "UCA rules" concatenated with the tailoring rules. + * The "UCA rules" are an <i>approximation</i> of the root collator's sort order. + * They are almost never used or useful at runtime and can be removed from the data. + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales + * @stable ICU 2.0 + */ + UCOL_FULL_RULES +} UColRuleOption ; + +/** + * Open a UCollator for comparing strings. + * + * For some languages, multiple collation types are available; + * for example, "de@collation=phonebook". + * Starting with ICU 54, collation attributes can be specified via locale keywords as well, + * in the old locale extension syntax ("el@colCaseFirst=upper") + * or in language tag syntax ("el-u-kf-upper"). + * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>. + * + * The UCollator pointer is used in all the calls to the Collation + * service. After finished, collator must be disposed of by calling + * {@link #ucol_close }. + * @param loc The locale containing the required collation rules. + * Special values for locales can be passed in - + * if NULL is passed for the locale, the default locale + * collation rules will be used. If empty string ("") or + * "root" are passed, the root collator will be returned. + * @param status A pointer to a UErrorCode to receive any errors + * @return A pointer to a UCollator, or 0 if an error occurred. + * @see ucol_openRules + * @see ucol_safeClone + * @see ucol_close + * @stable ICU 2.0 + */ +U_STABLE UCollator* U_EXPORT2 +ucol_open(const char *loc, UErrorCode *status); + +/** + * Produce a UCollator instance according to the rules supplied. + * The rules are used to change the default ordering, defined in the + * UCA in a process called tailoring. The resulting UCollator pointer + * can be used in the same way as the one obtained by {@link #ucol_strcoll }. + * @param rules A string describing the collation rules. For the syntax + * of the rules please see users guide. + * @param rulesLength The length of rules, or -1 if null-terminated. + * @param normalizationMode The normalization mode: One of + * UCOL_OFF (expect the text to not need normalization), + * UCOL_ON (normalize), or + * UCOL_DEFAULT (set the mode according to the rules) + * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, + * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. + * @param parseError A pointer to UParseError to recieve information about errors + * occurred during parsing. This argument can currently be set + * to NULL, but at users own risk. Please provide a real structure. + * @param status A pointer to a UErrorCode to receive any errors + * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case + * of error - please use status argument to check for errors. + * @see ucol_open + * @see ucol_safeClone + * @see ucol_close + * @stable ICU 2.0 + */ +U_STABLE UCollator* U_EXPORT2 +ucol_openRules( const UChar *rules, + int32_t rulesLength, + UColAttributeValue normalizationMode, + UCollationStrength strength, + UParseError *parseError, + UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Open a collator defined by a short form string. + * The structure and the syntax of the string is defined in the "Naming collators" + * section of the users guide: + * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme + * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final + * strength will be 3. 3066bis locale overrides individual locale parts. + * The call to this function is equivalent to a call to ucol_open, followed by a + * series of calls to ucol_setAttribute and ucol_setVariableTop. + * @param definition A short string containing a locale and a set of attributes. + * Attributes not explicitly mentioned are left at the default + * state for a locale. + * @param parseError if not NULL, structure that will get filled with error's pre + * and post context in case of error. + * @param forceDefaults if FALSE, the settings that are the same as the collator + * default settings will not be applied (for example, setting + * French secondary on a French collator would not be executed). + * If TRUE, all the settings will be applied regardless of the + * collator default value. If the definition + * strings are to be cached, should be set to FALSE. + * @param status Error code. Apart from regular error conditions connected to + * instantiating collators (like out of memory or similar), this + * API will return an error if an invalid attribute or attribute/value + * combination is specified. + * @return A pointer to a UCollator or 0 if an error occured (including an + * invalid attribute). + * @see ucol_open + * @see ucol_setAttribute + * @see ucol_setVariableTop + * @see ucol_getShortDefinitionString + * @see ucol_normalizeShortDefinitionString + * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead. + */ +U_DEPRECATED UCollator* U_EXPORT2 +ucol_openFromShortString( const char *definition, + UBool forceDefaults, + UParseError *parseError, + UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_HIDE_DEPRECATED_API +/** + * Get a set containing the contractions defined by the collator. The set includes + * both the root collator's contractions and the contractions defined by the collator. This set + * will contain only strings. If a tailoring explicitly suppresses contractions from + * the root collator (like Russian), removed contractions will not be in the resulting set. + * @param coll collator + * @param conts the set to hold the result. It gets emptied before + * contractions are added. + * @param status to hold the error code + * @return the size of the contraction set + * + * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead + */ +U_DEPRECATED int32_t U_EXPORT2 +ucol_getContractions( const UCollator *coll, + USet *conts, + UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Get a set containing the expansions defined by the collator. The set includes + * both the root collator's expansions and the expansions defined by the tailoring + * @param coll collator + * @param contractions if not NULL, the set to hold the contractions + * @param expansions if not NULL, the set to hold the expansions + * @param addPrefixes add the prefix contextual elements to contractions + * @param status to hold the error code + * + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +ucol_getContractionsAndExpansions( const UCollator *coll, + USet *contractions, USet *expansions, + UBool addPrefixes, UErrorCode *status); + +/** + * Close a UCollator. + * Once closed, a UCollator should not be used. Every open collator should + * be closed. Otherwise, a memory leak will result. + * @param coll The UCollator to close. + * @see ucol_open + * @see ucol_openRules + * @see ucol_safeClone + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_close(UCollator *coll); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCollatorPointer + * "Smart pointer" class, closes a UCollator via ucol_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); + +U_NAMESPACE_END + +#endif + +/** + * Compare two strings. + * The strings will be compared using the options already specified. + * @param coll The UCollator containing the comparison rules. + * @param source The source string. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param target The target string. + * @param targetLength The length of target, or -1 if null-terminated. + * @return The result of comparing the strings; one of UCOL_EQUAL, + * UCOL_GREATER, UCOL_LESS + * @see ucol_greater + * @see ucol_greaterOrEqual + * @see ucol_equal + * @stable ICU 2.0 + */ +U_STABLE UCollationResult U_EXPORT2 +ucol_strcoll( const UCollator *coll, + const UChar *source, + int32_t sourceLength, + const UChar *target, + int32_t targetLength); + +/** +* Compare two strings in UTF-8. +* The strings will be compared using the options already specified. +* Note: When input string contains malformed a UTF-8 byte sequence, +* this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). +* @param coll The UCollator containing the comparison rules. +* @param source The source UTF-8 string. +* @param sourceLength The length of source, or -1 if null-terminated. +* @param target The target UTF-8 string. +* @param targetLength The length of target, or -1 if null-terminated. +* @param status A pointer to a UErrorCode to receive any errors +* @return The result of comparing the strings; one of UCOL_EQUAL, +* UCOL_GREATER, UCOL_LESS +* @see ucol_greater +* @see ucol_greaterOrEqual +* @see ucol_equal +* @stable ICU 50 +*/ +U_STABLE UCollationResult U_EXPORT2 +ucol_strcollUTF8( + const UCollator *coll, + const char *source, + int32_t sourceLength, + const char *target, + int32_t targetLength, + UErrorCode *status); + +/** + * Determine if one string is greater than another. + * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER + * @param coll The UCollator containing the comparison rules. + * @param source The source string. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param target The target string. + * @param targetLength The length of target, or -1 if null-terminated. + * @return TRUE if source is greater than target, FALSE otherwise. + * @see ucol_strcoll + * @see ucol_greaterOrEqual + * @see ucol_equal + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucol_greater(const UCollator *coll, + const UChar *source, int32_t sourceLength, + const UChar *target, int32_t targetLength); + +/** + * Determine if one string is greater than or equal to another. + * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS + * @param coll The UCollator containing the comparison rules. + * @param source The source string. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param target The target string. + * @param targetLength The length of target, or -1 if null-terminated. + * @return TRUE if source is greater than or equal to target, FALSE otherwise. + * @see ucol_strcoll + * @see ucol_greater + * @see ucol_equal + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucol_greaterOrEqual(const UCollator *coll, + const UChar *source, int32_t sourceLength, + const UChar *target, int32_t targetLength); + +/** + * Compare two strings for equality. + * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL + * @param coll The UCollator containing the comparison rules. + * @param source The source string. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param target The target string. + * @param targetLength The length of target, or -1 if null-terminated. + * @return TRUE if source is equal to target, FALSE otherwise + * @see ucol_strcoll + * @see ucol_greater + * @see ucol_greaterOrEqual + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucol_equal(const UCollator *coll, + const UChar *source, int32_t sourceLength, + const UChar *target, int32_t targetLength); + +/** + * Compare two UTF-8 encoded trings. + * The strings will be compared using the options already specified. + * @param coll The UCollator containing the comparison rules. + * @param sIter The source string iterator. + * @param tIter The target string iterator. + * @return The result of comparing the strings; one of UCOL_EQUAL, + * UCOL_GREATER, UCOL_LESS + * @param status A pointer to a UErrorCode to receive any errors + * @see ucol_strcoll + * @stable ICU 2.6 + */ +U_STABLE UCollationResult U_EXPORT2 +ucol_strcollIter( const UCollator *coll, + UCharIterator *sIter, + UCharIterator *tIter, + UErrorCode *status); + +/** + * Get the collation strength used in a UCollator. + * The strength influences how strings are compared. + * @param coll The UCollator to query. + * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, + * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL + * @see ucol_setStrength + * @stable ICU 2.0 + */ +U_STABLE UCollationStrength U_EXPORT2 +ucol_getStrength(const UCollator *coll); + +/** + * Set the collation strength used in a UCollator. + * The strength influences how strings are compared. + * @param coll The UCollator to set. + * @param strength The desired collation strength; one of UCOL_PRIMARY, + * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT + * @see ucol_getStrength + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_setStrength(UCollator *coll, + UCollationStrength strength); + +/** + * Retrieves the reordering codes for this collator. + * These reordering codes are a combination of UScript codes and UColReorderCode entries. + * @param coll The UCollator to query. + * @param dest The array to fill with the script ordering. + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function + * will only return the length of the result without writing any codes (pre-flighting). + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a + * failure before the function call. + * @return The number of reordering codes written to the dest array. + * @see ucol_setReorderCodes + * @see ucol_getEquivalentReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getReorderCodes(const UCollator* coll, + int32_t* dest, + int32_t destCapacity, + UErrorCode *pErrorCode); +/** + * Sets the reordering codes for this collator. + * Collation reordering allows scripts and some other groups of characters + * to be moved relative to each other. This reordering is done on top of + * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed + * at the start and/or the end of the collation order. These groups are specified using + * UScript codes and UColReorderCode entries. + * + * <p>By default, reordering codes specified for the start of the order are placed in the + * order given after several special non-script blocks. These special groups of characters + * are space, punctuation, symbol, currency, and digit. These special groups are represented with + * UColReorderCode entries. Script groups can be intermingled with + * these special non-script groups if those special groups are explicitly specified in the reordering. + * + * <p>The special code OTHERS stands for any script that is not explicitly + * mentioned in the list of reordering codes given. Anything that is after OTHERS + * will go at the very end of the reordering in the order given. + * + * <p>The special reorder code DEFAULT will reset the reordering for this collator + * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that + * was specified when this collator was created from resource data or from rules. The + * DEFAULT code <b>must</b> be the sole code supplied when it is used. + * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set. + * + * <p>The special reorder code NONE will remove any reordering for this collator. + * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The + * NONE code <b>must</b> be the sole code supplied when it is used. + * + * @param coll The UCollator to set. + * @param reorderCodes An array of script codes in the new order. This can be NULL if the + * length is also set to 0. An empty array will clear any reordering codes on the collator. + * @param reorderCodesLength The length of reorderCodes. + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a + * failure before the function call. + * @see ucol_getReorderCodes + * @see ucol_getEquivalentReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ +U_STABLE void U_EXPORT2 +ucol_setReorderCodes(UCollator* coll, + const int32_t* reorderCodes, + int32_t reorderCodesLength, + UErrorCode *pErrorCode); + +/** + * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder + * codes will be grouped and must reorder together. + * Beginning with ICU 55, scripts only reorder together if they are primary-equal, + * for example Hiragana and Katakana. + * + * @param reorderCode The reorder code to determine equivalence for. + * @param dest The array to fill with the script ordering. + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function + * will only return the length of the result without writing any codes (pre-flighting). + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate + * a failure before the function call. + * @return The number of reordering codes written to the dest array. + * @see ucol_setReorderCodes + * @see ucol_getReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getEquivalentReorderCodes(int32_t reorderCode, + int32_t* dest, + int32_t destCapacity, + UErrorCode *pErrorCode); + +/** + * Get the display name for a UCollator. + * The display name is suitable for presentation to a user. + * @param objLoc The locale of the collator in question. + * @param dispLoc The locale for display. + * @param result A pointer to a buffer to receive the attribute. + * @param resultLength The maximum size of result. + * @param status A pointer to a UErrorCode to receive any errors + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getDisplayName( const char *objLoc, + const char *dispLoc, + UChar *result, + int32_t resultLength, + UErrorCode *status); + +/** + * Get a locale for which collation rules are available. + * A UCollator in a locale returned by this function will perform the correct + * collation for the locale. + * @param localeIndex The index of the desired locale. + * @return A locale for which collation rules are available, or 0 if none. + * @see ucol_countAvailable + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +ucol_getAvailable(int32_t localeIndex); + +/** + * Determine how many locales have collation rules available. + * This function is most useful as determining the loop ending condition for + * calls to {@link #ucol_getAvailable }. + * @return The number of locales for which collation rules are available. + * @see ucol_getAvailable + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_countAvailable(void); + +#if !UCONFIG_NO_SERVICE +/** + * Create a string enumerator of all locales for which a valid + * collator may be opened. + * @param status input-output error code + * @return a string enumeration over locale strings. The caller is + * responsible for closing the result. + * @stable ICU 3.0 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucol_openAvailableLocales(UErrorCode *status); +#endif + +/** + * Create a string enumerator of all possible keywords that are relevant to + * collation. At this point, the only recognized keyword for this + * service is "collation". + * @param status input-output error code + * @return a string enumeration over locale strings. The caller is + * responsible for closing the result. + * @stable ICU 3.0 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucol_getKeywords(UErrorCode *status); + +/** + * Given a keyword, create a string enumeration of all values + * for that keyword that are currently in use. + * @param keyword a particular keyword as enumerated by + * ucol_getKeywords. If any other keyword is passed in, *status is set + * to U_ILLEGAL_ARGUMENT_ERROR. + * @param status input-output error code + * @return a string enumeration over collation keyword values, or NULL + * upon error. The caller is responsible for closing the result. + * @stable ICU 3.0 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucol_getKeywordValues(const char *keyword, UErrorCode *status); + +/** + * Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param key one of the keys supported by this service. For now, only + * "collation" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @param status error status + * @return a string enumeration over keyword values for the given key and the locale. + * @stable ICU 4.2 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucol_getKeywordValuesForLocale(const char* key, + const char* locale, + UBool commonlyUsed, + UErrorCode* status); + +/** + * Return the functionally equivalent locale for the specified + * input locale, with respect to given keyword, for the + * collation service. If two different input locale + keyword + * combinations produce the same result locale, then collators + * instantiated for these two different input locales will behave + * equivalently. The converse is not always true; two collators + * may in fact be equivalent, but return different results, due to + * internal details. The return result has no other meaning than + * that stated above, and implies nothing as to the relationship + * between the two locales. This is intended for use by + * applications who wish to cache collators, or otherwise reuse + * collators when possible. The functional equivalent may change + * over time. For more information, please see the <a + * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services"> + * Locales and Services</a> section of the ICU User Guide. + * @param result fillin for the functionally equivalent result locale + * @param resultCapacity capacity of the fillin buffer + * @param keyword a particular keyword as enumerated by + * ucol_getKeywords. + * @param locale the specified input locale + * @param isAvailable if non-NULL, pointer to a fillin parameter that + * on return indicates whether the specified input locale was 'available' + * to the collation service. A locale is defined as 'available' if it + * physically exists within the collation locale data. + * @param status pointer to input-output error code + * @return the actual buffer size needed for the locale. If greater + * than resultCapacity, the returned full name will be truncated and + * an error code will be returned. + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, + const char* keyword, const char* locale, + UBool* isAvailable, UErrorCode* status); + +/** + * Get the collation tailoring rules from a UCollator. + * The rules will follow the rule syntax. + * @param coll The UCollator to query. + * @param length + * @return The collation tailoring rules. + * @stable ICU 2.0 + */ +U_STABLE const UChar* U_EXPORT2 +ucol_getRules( const UCollator *coll, + int32_t *length); + +#ifndef U_HIDE_DEPRECATED_API +/** Get the short definition string for a collator. This API harvests the collator's + * locale and the attribute set and produces a string that can be used for opening + * a collator with the same attributes using the ucol_openFromShortString API. + * This string will be normalized. + * The structure and the syntax of the string is defined in the "Naming collators" + * section of the users guide: + * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme + * This API supports preflighting. + * @param coll a collator + * @param locale a locale that will appear as a collators locale in the resulting + * short string definition. If NULL, the locale will be harvested + * from the collator. + * @param buffer space to hold the resulting string + * @param capacity capacity of the buffer + * @param status for returning errors. All the preflighting errors are featured + * @return length of the resulting string + * @see ucol_openFromShortString + * @see ucol_normalizeShortDefinitionString + * @deprecated ICU 54 + */ +U_DEPRECATED int32_t U_EXPORT2 +ucol_getShortDefinitionString(const UCollator *coll, + const char *locale, + char *buffer, + int32_t capacity, + UErrorCode *status); + +/** Verifies and normalizes short definition string. + * Normalized short definition string has all the option sorted by the argument name, + * so that equivalent definition strings are the same. + * This API supports preflighting. + * @param source definition string + * @param destination space to hold the resulting string + * @param capacity capacity of the buffer + * @param parseError if not NULL, structure that will get filled with error's pre + * and post context in case of error. + * @param status Error code. This API will return an error if an invalid attribute + * or attribute/value combination is specified. All the preflighting + * errors are also featured + * @return length of the resulting normalized string. + * + * @see ucol_openFromShortString + * @see ucol_getShortDefinitionString + * + * @deprecated ICU 54 + */ + +U_DEPRECATED int32_t U_EXPORT2 +ucol_normalizeShortDefinitionString(const char *source, + char *destination, + int32_t capacity, + UParseError *parseError, + UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + + +/** + * Get a sort key for a string from a UCollator. + * Sort keys may be compared using <TT>strcmp</TT>. + * + * Note that sort keys are often less efficient than simply doing comparison. + * For more details, see the ICU User Guide. + * + * Like ICU functions that write to an output buffer, the buffer contents + * is undefined if the buffer capacity (resultLength parameter) is too small. + * Unlike ICU functions that write a string to an output buffer, + * the terminating zero byte is counted in the sort key length. + * @param coll The UCollator containing the collation rules. + * @param source The string to transform. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param result A pointer to a buffer to receive the attribute. + * @param resultLength The maximum size of result. + * @return The size needed to fully store the sort key. + * If there was an internal error generating the sort key, + * a zero value is returned. + * @see ucol_keyHashCode + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getSortKey(const UCollator *coll, + const UChar *source, + int32_t sourceLength, + uint8_t *result, + int32_t resultLength); + + +/** Gets the next count bytes of a sort key. Caller needs + * to preserve state array between calls and to provide + * the same type of UCharIterator set with the same string. + * The destination buffer provided must be big enough to store + * the number of requested bytes. + * + * The generated sort key may or may not be compatible with + * sort keys generated using ucol_getSortKey(). + * @param coll The UCollator containing the collation rules. + * @param iter UCharIterator containing the string we need + * the sort key to be calculated for. + * @param state Opaque state of sortkey iteration. + * @param dest Buffer to hold the resulting sortkey part + * @param count number of sort key bytes required. + * @param status error code indicator. + * @return the actual number of bytes of a sortkey. It can be + * smaller than count if we have reached the end of + * the sort key. + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +ucol_nextSortKeyPart(const UCollator *coll, + UCharIterator *iter, + uint32_t state[2], + uint8_t *dest, int32_t count, + UErrorCode *status); + +/** enum that is taken by ucol_getBound API + * See below for explanation + * do not change the values assigned to the + * members of this enum. Underlying code + * depends on them having these numbers + * @stable ICU 2.0 + */ +typedef enum { + /** lower bound */ + UCOL_BOUND_LOWER = 0, + /** upper bound that will match strings of exact size */ + UCOL_BOUND_UPPER = 1, + /** upper bound that will match all the strings that have the same initial substring as the given string */ + UCOL_BOUND_UPPER_LONG = 2, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UColBoundMode value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCOL_BOUND_VALUE_COUNT +#endif // U_HIDE_DEPRECATED_API +} UColBoundMode; + +/** + * Produce a bound for a given sortkey and a number of levels. + * Return value is always the number of bytes needed, regardless of + * whether the result buffer was big enough or even valid.<br> + * Resulting bounds can be used to produce a range of strings that are + * between upper and lower bounds. For example, if bounds are produced + * for a sortkey of string "smith", strings between upper and lower + * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> + * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER + * is produced, strings matched would be as above. However, if bound + * produced using UCOL_BOUND_UPPER_LONG is used, the above example will + * also match "Smithsonian" and similar.<br> + * For more on usage, see example in cintltst/capitst.c in procedure + * TestBounds. + * Sort keys may be compared using <TT>strcmp</TT>. + * @param source The source sortkey. + * @param sourceLength The length of source, or -1 if null-terminated. + * (If an unmodified sortkey is passed, it is always null + * terminated). + * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which + * produces a lower inclusive bound, UCOL_BOUND_UPPER, that + * produces upper bound that matches strings of the same length + * or UCOL_BOUND_UPPER_LONG that matches strings that have the + * same starting substring as the source string. + * @param noOfLevels Number of levels required in the resulting bound (for most + * uses, the recommended value is 1). See users guide for + * explanation on number of levels a sortkey can have. + * @param result A pointer to a buffer to receive the resulting sortkey. + * @param resultLength The maximum size of result. + * @param status Used for returning error code if something went wrong. If the + * number of levels requested is higher than the number of levels + * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is + * issued. + * @return The size needed to fully store the bound. + * @see ucol_keyHashCode + * @stable ICU 2.1 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getBound(const uint8_t *source, + int32_t sourceLength, + UColBoundMode boundType, + uint32_t noOfLevels, + uint8_t *result, + int32_t resultLength, + UErrorCode *status); + +/** + * Gets the version information for a Collator. Version is currently + * an opaque 32-bit number which depends, among other things, on major + * versions of the collator tailoring and UCA. + * @param coll The UCollator to query. + * @param info the version # information, the result will be filled in + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_getVersion(const UCollator* coll, UVersionInfo info); + +/** + * Gets the UCA version information for a Collator. Version is the + * UCA version number (3.1.1, 4.0). + * @param coll The UCollator to query. + * @param info the version # information, the result will be filled in + * @stable ICU 2.8 + */ +U_STABLE void U_EXPORT2 +ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); + +/** + * Merges two sort keys. The levels are merged with their corresponding counterparts + * (primaries with primaries, secondaries with secondaries etc.). Between the values + * from the same level a separator is inserted. + * + * This is useful, for example, for combining sort keys from first and last names + * to sort such pairs. + * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys + * + * The recommended way to achieve "merged" sorting is by + * concatenating strings with U+FFFE between them. + * The concatenation has the same sort order as the merged sort keys, + * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2). + * Using strings with U+FFFE may yield shorter sort keys. + * + * For details about Sort Key Features see + * http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features + * + * It is possible to merge multiple sort keys by consecutively merging + * another one with the intermediate result. + * + * The length of the merge result is the sum of the lengths of the input sort keys. + * + * Example (uncompressed): + * <pre>191B1D 01 050505 01 910505 00 + * 1F2123 01 050505 01 910505 00</pre> + * will be merged as + * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> + * + * If the destination buffer is not big enough, then its contents are undefined. + * If any of source lengths are zero or any of the source pointers are NULL/undefined, + * the result is of size zero. + * + * @param src1 the first sort key + * @param src1Length the length of the first sort key, including the zero byte at the end; + * can be -1 if the function is to find the length + * @param src2 the second sort key + * @param src2Length the length of the second sort key, including the zero byte at the end; + * can be -1 if the function is to find the length + * @param dest the buffer where the merged sort key is written, + * can be NULL if destCapacity==0 + * @param destCapacity the number of bytes in the dest buffer + * @return the length of the merged sort key, src1Length+src2Length; + * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), + * in which cases the contents of dest is undefined + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, + const uint8_t *src2, int32_t src2Length, + uint8_t *dest, int32_t destCapacity); + +/** + * Universal attribute setter + * @param coll collator which attributes are to be changed + * @param attr attribute type + * @param value attribute value + * @param status to indicate whether the operation went on smoothly or there were errors + * @see UColAttribute + * @see UColAttributeValue + * @see ucol_getAttribute + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); + +/** + * Universal attribute getter + * @param coll collator which attributes are to be changed + * @param attr attribute type + * @return attribute value + * @param status to indicate whether the operation went on smoothly or there were errors + * @see UColAttribute + * @see UColAttributeValue + * @see ucol_setAttribute + * @stable ICU 2.0 + */ +U_STABLE UColAttributeValue U_EXPORT2 +ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); + +/** + * Sets the variable top to the top of the specified reordering group. + * The variable top determines the highest-sorting character + * which is affected by UCOL_ALTERNATE_HANDLING. + * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. + * @param coll the collator + * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, + * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; + * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @see ucol_getMaxVariable + * @stable ICU 53 + */ +U_STABLE void U_EXPORT2 +ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode); + +/** + * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. + * @param coll the collator + * @return the maximum variable reordering group. + * @see ucol_setMaxVariable + * @stable ICU 53 + */ +U_STABLE UColReorderCode U_EXPORT2 +ucol_getMaxVariable(const UCollator *coll); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Sets the variable top to the primary weight of the specified string. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See ucol_setMaxVariable(). + * @param coll the collator + * @param varTop one or more (if contraction) UChars to which the variable top should be set + * @param len length of variable top string. If -1 it is considered to be zero terminated. + * @param status error code. If error code is set, the return value is undefined. + * Errors set by this function are:<br> + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> + * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond + * the last reordering group supported by ucol_setMaxVariable() + * @return variable top primary weight + * @see ucol_getVariableTop + * @see ucol_restoreVariableTop + * @deprecated ICU 53 Call ucol_setMaxVariable() instead. + */ +U_DEPRECATED uint32_t U_EXPORT2 +ucol_setVariableTop(UCollator *coll, + const UChar *varTop, int32_t len, + UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Gets the variable top value of a Collator. + * @param coll collator which variable top needs to be retrieved + * @param status error code (not changed by function). If error code is set, + * the return value is undefined. + * @return the variable top primary weight + * @see ucol_getMaxVariable + * @see ucol_setVariableTop + * @see ucol_restoreVariableTop + * @stable ICU 2.0 + */ +U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Sets the variable top to the specified primary weight. + * + * Beginning with ICU 53, the variable top is pinned to + * the top of one of the supported reordering groups, + * and it must not be beyond the last of those groups. + * See ucol_setMaxVariable(). + * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop + * @param status error code + * @see ucol_getVariableTop + * @see ucol_setVariableTop + * @deprecated ICU 53 Call ucol_setMaxVariable() instead. + */ +U_DEPRECATED void U_EXPORT2 +ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Thread safe cloning operation. The result is a clone of a given collator. + * @param coll collator to be cloned + * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> + * user allocated space for the new clone. + * If NULL new memory will be allocated. + * If buffer is not large enough, new memory will be allocated. + * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. + * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> + * pointer to size of allocated space. + * If *pBufferSize == 0, a sufficient size for use in cloning will + * be returned ('pre-flighting') + * If *pBufferSize is not enough for a stack-based safe clone, + * new memory will be allocated. + * @param status to indicate whether the operation went on smoothly or there were errors + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any + * allocations were necessary. + * @return pointer to the new clone + * @see ucol_open + * @see ucol_openRules + * @see ucol_close + * @stable ICU 2.0 + */ +U_STABLE UCollator* U_EXPORT2 +ucol_safeClone(const UCollator *coll, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API + +/** default memory size for the new clone. + * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. + */ +#define U_COL_SAFECLONE_BUFFERSIZE 1 + +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Returns current rules. Delta defines whether full rules are returned or just the tailoring. + * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough + * to store rules, will store up to available space. + * + * ucol_getRules() should normally be used instead. + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales + * @param coll collator to get the rules from + * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. + * @param buffer buffer to store the result in. If NULL, you'll get no rules. + * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in. + * @return current rules + * @stable ICU 2.0 + * @see UCOL_FULL_RULES + */ +U_STABLE int32_t U_EXPORT2 +ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); + +#ifndef U_HIDE_DEPRECATED_API +/** + * gets the locale name of the collator. If the collator + * is instantiated from the rules, then this function returns + * NULL. + * @param coll The UCollator for which the locale is needed + * @param type You can choose between requested, valid and actual + * locale. For description see the definition of + * ULocDataLocaleType in uloc.h + * @param status error code of the operation + * @return real locale name from which the collation data comes. + * If the collator was instantiated from rules, returns + * NULL. + * @deprecated ICU 2.8 Use ucol_getLocaleByType instead + */ +U_DEPRECATED const char * U_EXPORT2 +ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * gets the locale name of the collator. If the collator + * is instantiated from the rules, then this function returns + * NULL. + * @param coll The UCollator for which the locale is needed + * @param type You can choose between requested, valid and actual + * locale. For description see the definition of + * ULocDataLocaleType in uloc.h + * @param status error code of the operation + * @return real locale name from which the collation data comes. + * If the collator was instantiated from rules, returns + * NULL. + * @stable ICU 2.8 + */ +U_STABLE const char * U_EXPORT2 +ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); + +/** + * Get a Unicode set that contains all the characters and sequences tailored in + * this collator. The result must be disposed of by using uset_close. + * @param coll The UCollator for which we want to get tailored chars + * @param status error code of the operation + * @return a pointer to newly created USet. Must be be disposed by using uset_close + * @see ucol_openRules + * @see uset_close + * @stable ICU 2.4 + */ +U_STABLE USet * U_EXPORT2 +ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); + +#ifndef U_HIDE_INTERNAL_API +/** Calculates the set of unsafe code points, given a collator. + * A character is unsafe if you could append any character and cause the ordering to alter significantly. + * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. + * Thus if you have a character like a_umlaut, and you add a lower_dot to it, + * then it normalizes to a_lower_dot + umlaut, and sorts differently. + * @param coll Collator + * @param unsafe a fill-in set to receive the unsafe points + * @param status for catching errors + * @return number of elements in the set + * @internal ICU 3.0 + */ +U_INTERNAL int32_t U_EXPORT2 +ucol_getUnsafeSet( const UCollator *coll, + USet *unsafe, + UErrorCode *status); + +/** Touches all resources needed for instantiating a collator from a short string definition, + * thus filling up the cache. + * @param definition A short string containing a locale and a set of attributes. + * Attributes not explicitly mentioned are left at the default + * state for a locale. + * @param parseError if not NULL, structure that will get filled with error's pre + * and post context in case of error. + * @param forceDefaults if FALSE, the settings that are the same as the collator + * default settings will not be applied (for example, setting + * French secondary on a French collator would not be executed). + * If TRUE, all the settings will be applied regardless of the + * collator default value. If the definition + * strings are to be cached, should be set to FALSE. + * @param status Error code. Apart from regular error conditions connected to + * instantiating collators (like out of memory or similar), this + * API will return an error if an invalid attribute or attribute/value + * combination is specified. + * @see ucol_openFromShortString + * @internal ICU 3.2.1 + */ +U_INTERNAL void U_EXPORT2 +ucol_prepareShortStringOpen( const char *definition, + UBool forceDefaults, + UParseError *parseError, + UErrorCode *status); +#endif /* U_HIDE_INTERNAL_API */ + +/** Creates a binary image of a collator. This binary image can be stored and + * later used to instantiate a collator using ucol_openBinary. + * This API supports preflighting. + * @param coll Collator + * @param buffer a fill-in buffer to receive the binary image + * @param capacity capacity of the destination buffer + * @param status for catching errors + * @return size of the image + * @see ucol_openBinary + * @stable ICU 3.2 + */ +U_STABLE int32_t U_EXPORT2 +ucol_cloneBinary(const UCollator *coll, + uint8_t *buffer, int32_t capacity, + UErrorCode *status); + +/** Opens a collator from a collator binary image created using + * ucol_cloneBinary. Binary image used in instantiation of the + * collator remains owned by the user and should stay around for + * the lifetime of the collator. The API also takes a base collator + * which must be the root collator. + * @param bin binary image owned by the user and required through the + * lifetime of the collator + * @param length size of the image. If negative, the API will try to + * figure out the length of the image + * @param base Base collator, for lookup of untailored characters. + * Must be the root collator, must not be NULL. + * The base is required to be present through the lifetime of the collator. + * @param status for catching errors + * @return newly created collator + * @see ucol_cloneBinary + * @stable ICU 3.2 + */ +U_STABLE UCollator* U_EXPORT2 +ucol_openBinary(const uint8_t *bin, int32_t length, + const UCollator *base, + UErrorCode *status); + + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/ucoleitr.h b/intl/icu/source/i18n/unicode/ucoleitr.h new file mode 100644 index 000000000..b84104986 --- /dev/null +++ b/intl/icu/source/i18n/unicode/ucoleitr.h @@ -0,0 +1,268 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* File ucoleitr.h +* +* Modification History: +* +* Date Name Description +* 02/15/2001 synwee Modified all methods to process its own function +* instead of calling the equivalent c++ api (coleitr.h) +*******************************************************************************/ + +#ifndef UCOLEITR_H +#define UCOLEITR_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +/** + * This indicates an error has occured during processing or if no more CEs is + * to be returned. + * @stable ICU 2.0 + */ +#define UCOL_NULLORDER ((int32_t)0xFFFFFFFF) + +#include "unicode/ucol.h" + +/** + * The UCollationElements struct. + * For usage in C programs. + * @stable ICU 2.0 + */ +typedef struct UCollationElements UCollationElements; + +/** + * \file + * \brief C API: UCollationElements + * + * The UCollationElements API is used as an iterator to walk through each + * character of an international string. Use the iterator to return the + * ordering priority of the positioned character. The ordering priority of a + * character, which we refer to as a key, defines how a character is collated + * in the given collation object. + * For example, consider the following in Slovak and in traditional Spanish collation: + * <pre> + * . "ca" -> the first key is key('c') and second key is key('a'). + * . "cha" -> the first key is key('ch') and second key is key('a'). + * </pre> + * And in German phonebook collation, + * <pre> + * . "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and + * . the third key is key('b'). + * </pre> + * <p>Example of the iterator usage: (without error checking) + * <pre> + * . void CollationElementIterator_Example() + * . { + * . UChar *s; + * . t_int32 order, primaryOrder; + * . UCollationElements *c; + * . UCollatorOld *coll; + * . UErrorCode success = U_ZERO_ERROR; + * . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) ); + * . u_uastrcpy(s, "This is a test"); + * . coll = ucol_open(NULL, &success); + * . c = ucol_openElements(coll, str, u_strlen(str), &status); + * . order = ucol_next(c, &success); + * . ucol_reset(c); + * . order = ucol_prev(c, &success); + * . free(s); + * . ucol_close(coll); + * . ucol_closeElements(c); + * . } + * </pre> + * <p> + * ucol_next() returns the collation order of the next. + * ucol_prev() returns the collation order of the previous character. + * The Collation Element Iterator moves only in one direction between calls to + * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used. + * Whenever ucol_prev is to be called after ucol_next() or vice versa, + * ucol_reset has to be called first to reset the status, shifting pointers to + * either the end or the start of the string. Hence at the next call of + * ucol_prev or ucol_next, the first or last collation order will be returned. + * If a change of direction is done without a ucol_reset, the result is + * undefined. + * The result of a forward iterate (ucol_next) and reversed result of the + * backward iterate (ucol_prev) on the same string are equivalent, if + * collation orders with the value 0 are ignored. + * Character based on the comparison level of the collator. A collation order + * consists of primary order, secondary order and tertiary order. The data + * type of the collation order is <strong>int32_t</strong>. + * + * @see UCollator + */ + +/** + * Open the collation elements for a string. + * + * @param coll The collator containing the desired collation rules. + * @param text The text to iterate over. + * @param textLength The number of characters in text, or -1 if null-terminated + * @param status A pointer to a UErrorCode to receive any errors. + * @return a struct containing collation element information + * @stable ICU 2.0 + */ +U_STABLE UCollationElements* U_EXPORT2 +ucol_openElements(const UCollator *coll, + const UChar *text, + int32_t textLength, + UErrorCode *status); + + +/** + * get a hash code for a key... Not very useful! + * @param key the given key. + * @param length the size of the key array. + * @return the hash code. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_keyHashCode(const uint8_t* key, int32_t length); + +/** + * Close a UCollationElements. + * Once closed, a UCollationElements may no longer be used. + * @param elems The UCollationElements to close. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_closeElements(UCollationElements *elems); + +/** + * Reset the collation elements to their initial state. + * This will move the 'cursor' to the beginning of the text. + * Property settings for collation will be reset to the current status. + * @param elems The UCollationElements to reset. + * @see ucol_next + * @see ucol_previous + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_reset(UCollationElements *elems); + +/** + * Get the ordering priority of the next collation element in the text. + * A single character may contain more than one collation element. + * @param elems The UCollationElements containing the text. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The next collation elements ordering, otherwise returns NULLORDER + * if an error has occured or if the end of string has been reached + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_next(UCollationElements *elems, UErrorCode *status); + +/** + * Get the ordering priority of the previous collation element in the text. + * A single character may contain more than one collation element. + * Note that internally a stack is used to store buffered collation elements. + * @param elems The UCollationElements containing the text. + * @param status A pointer to a UErrorCode to receive any errors. Noteably + * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack + * buffer has been exhausted. + * @return The previous collation elements ordering, otherwise returns + * NULLORDER if an error has occured or if the start of string has + * been reached. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_previous(UCollationElements *elems, UErrorCode *status); + +/** + * Get the maximum length of any expansion sequences that end with the + * specified comparison order. + * This is useful for .... ? + * @param elems The UCollationElements containing the text. + * @param order A collation order returned by previous or next. + * @return maximum size of the expansion sequences ending with the collation + * element or 1 if collation element does not occur at the end of any + * expansion sequence + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getMaxExpansion(const UCollationElements *elems, int32_t order); + +/** + * Set the text containing the collation elements. + * Property settings for collation will remain the same. + * In order to reset the iterator to the current collation property settings, + * the API reset() has to be called. + * @param elems The UCollationElements to set. + * @param text The source text containing the collation elements. + * @param textLength The length of text, or -1 if null-terminated. + * @param status A pointer to a UErrorCode to receive any errors. + * @see ucol_getText + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_setText( UCollationElements *elems, + const UChar *text, + int32_t textLength, + UErrorCode *status); + +/** + * Get the offset of the current source character. + * This is an offset into the text of the character containing the current + * collation elements. + * @param elems The UCollationElements to query. + * @return The offset of the current source character. + * @see ucol_setOffset + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getOffset(const UCollationElements *elems); + +/** + * Set the offset of the current source character. + * This is an offset into the text of the character to be processed. + * Property settings for collation will remain the same. + * In order to reset the iterator to the current collation property settings, + * the API reset() has to be called. + * @param elems The UCollationElements to set. + * @param offset The desired character offset. + * @param status A pointer to a UErrorCode to receive any errors. + * @see ucol_getOffset + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_setOffset(UCollationElements *elems, + int32_t offset, + UErrorCode *status); + +/** +* Get the primary order of a collation order. +* @param order the collation order +* @return the primary order of a collation order. +* @stable ICU 2.6 +*/ +U_STABLE int32_t U_EXPORT2 +ucol_primaryOrder (int32_t order); + +/** +* Get the secondary order of a collation order. +* @param order the collation order +* @return the secondary order of a collation order. +* @stable ICU 2.6 +*/ +U_STABLE int32_t U_EXPORT2 +ucol_secondaryOrder (int32_t order); + +/** +* Get the tertiary order of a collation order. +* @param order the collation order +* @return the tertiary order of a collation order. +* @stable ICU 2.6 +*/ +U_STABLE int32_t U_EXPORT2 +ucol_tertiaryOrder (int32_t order); + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/ucsdet.h b/intl/icu/source/i18n/unicode/ucsdet.h new file mode 100644 index 000000000..470cbab61 --- /dev/null +++ b/intl/icu/source/i18n/unicode/ucsdet.h @@ -0,0 +1,415 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (C) 2005-2013, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * file name: ucsdet.h + * encoding: US-ASCII + * indentation:4 + * + * created on: 2005Aug04 + * created by: Andy Heninger + * + * ICU Character Set Detection, API for C + * + * Draft version 18 Oct 2005 + * + */ + +#ifndef __UCSDET_H +#define __UCSDET_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/localpointer.h" +#include "unicode/uenum.h" + +/** + * \file + * \brief C API: Charset Detection API + * + * This API provides a facility for detecting the + * charset or encoding of character data in an unknown text format. + * The input data can be from an array of bytes. + * <p> + * Character set detection is at best an imprecise operation. The detection + * process will attempt to identify the charset that best matches the characteristics + * of the byte data, but the process is partly statistical in nature, and + * the results can not be guaranteed to always be correct. + * <p> + * For best accuracy in charset detection, the input data should be primarily + * in a single language, and a minimum of a few hundred bytes worth of plain text + * in the language are needed. The detection process will attempt to + * ignore html or xml style markup that could otherwise obscure the content. + */ + + +struct UCharsetDetector; +/** + * Structure representing a charset detector + * @stable ICU 3.6 + */ +typedef struct UCharsetDetector UCharsetDetector; + +struct UCharsetMatch; +/** + * Opaque structure representing a match that was identified + * from a charset detection operation. + * @stable ICU 3.6 + */ +typedef struct UCharsetMatch UCharsetMatch; + +/** + * Open a charset detector. + * + * @param status Any error conditions occurring during the open + * operation are reported back in this variable. + * @return the newly opened charset detector. + * @stable ICU 3.6 + */ +U_STABLE UCharsetDetector * U_EXPORT2 +ucsdet_open(UErrorCode *status); + +/** + * Close a charset detector. All storage and any other resources + * owned by this charset detector will be released. Failure to + * close a charset detector when finished with it can result in + * memory leaks in the application. + * + * @param ucsd The charset detector to be closed. + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ucsdet_close(UCharsetDetector *ucsd); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCharsetDetectorPointer + * "Smart pointer" class, closes a UCharsetDetector via ucsdet_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCharsetDetectorPointer, UCharsetDetector, ucsdet_close); + +U_NAMESPACE_END + +#endif + +/** + * Set the input byte data whose charset is to detected. + * + * Ownership of the input text byte array remains with the caller. + * The input string must not be altered or deleted until the charset + * detector is either closed or reset to refer to different input text. + * + * @param ucsd the charset detector to be used. + * @param textIn the input text of unknown encoding. . + * @param len the length of the input text, or -1 if the text + * is NUL terminated. + * @param status any error conditions are reported back in this variable. + * + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status); + + +/** Set the declared encoding for charset detection. + * The declared encoding of an input text is an encoding obtained + * by the user from an http header or xml declaration or similar source that + * can be provided as an additional hint to the charset detector. + * + * How and whether the declared encoding will be used during the + * detection process is TBD. + * + * @param ucsd the charset detector to be used. + * @param encoding an encoding for the current data obtained from + * a header or declaration or other source outside + * of the byte data itself. + * @param length the length of the encoding name, or -1 if the name string + * is NUL terminated. + * @param status any error conditions are reported back in this variable. + * + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status); + + +/** + * Return the charset that best matches the supplied input data. + * + * Note though, that because the detection + * only looks at the start of the input data, + * there is a possibility that the returned charset will fail to handle + * the full set of input data. + * <p> + * The returned UCharsetMatch object is owned by the UCharsetDetector. + * It will remain valid until the detector input is reset, or until + * the detector is closed. + * <p> + * The function will fail if + * <ul> + * <li>no charset appears to match the data.</li> + * <li>no input text has been provided</li> + * </ul> + * + * @param ucsd the charset detector to be used. + * @param status any error conditions are reported back in this variable. + * @return a UCharsetMatch representing the best matching charset, + * or NULL if no charset matches the byte data. + * + * @stable ICU 3.6 + */ +U_STABLE const UCharsetMatch * U_EXPORT2 +ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status); + + +/** + * Find all charset matches that appear to be consistent with the input, + * returning an array of results. The results are ordered with the + * best quality match first. + * + * Because the detection only looks at a limited amount of the + * input byte data, some of the returned charsets may fail to handle + * the all of input data. + * <p> + * The returned UCharsetMatch objects are owned by the UCharsetDetector. + * They will remain valid until the detector is closed or modified + * + * <p> + * Return an error if + * <ul> + * <li>no charsets appear to match the input data.</li> + * <li>no input text has been provided</li> + * </ul> + * + * @param ucsd the charset detector to be used. + * @param matchesFound pointer to a variable that will be set to the + * number of charsets identified that are consistent with + * the input data. Output only. + * @param status any error conditions are reported back in this variable. + * @return A pointer to an array of pointers to UCharSetMatch objects. + * This array, and the UCharSetMatch instances to which it refers, + * are owned by the UCharsetDetector, and will remain valid until + * the detector is closed or modified. + * @stable ICU 3.6 + */ +U_STABLE const UCharsetMatch ** U_EXPORT2 +ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status); + + + +/** + * Get the name of the charset represented by a UCharsetMatch. + * + * The storage for the returned name string is owned by the + * UCharsetMatch, and will remain valid while the UCharsetMatch + * is valid. + * + * The name returned is suitable for use with the ICU conversion APIs. + * + * @param ucsm The charset match object. + * @param status Any error conditions are reported back in this variable. + * @return The name of the matching charset. + * + * @stable ICU 3.6 + */ +U_STABLE const char * U_EXPORT2 +ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status); + +/** + * Get a confidence number for the quality of the match of the byte + * data with the charset. Confidence numbers range from zero to 100, + * with 100 representing complete confidence and zero representing + * no confidence. + * + * The confidence values are somewhat arbitrary. They define an + * an ordering within the results for any single detection operation + * but are not generally comparable between the results for different input. + * + * A confidence value of ten does have a general meaning - it is used + * for charsets that can represent the input data, but for which there + * is no other indication that suggests that the charset is the correct one. + * Pure 7 bit ASCII data, for example, is compatible with a + * great many charsets, most of which will appear as possible matches + * with a confidence of 10. + * + * @param ucsm The charset match object. + * @param status Any error conditions are reported back in this variable. + * @return A confidence number for the charset match. + * + * @stable ICU 3.6 + */ +U_STABLE int32_t U_EXPORT2 +ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status); + +/** + * Get the RFC 3066 code for the language of the input data. + * + * The Charset Detection service is intended primarily for detecting + * charsets, not language. For some, but not all, charsets, a language is + * identified as a byproduct of the detection process, and that is what + * is returned by this function. + * + * CAUTION: + * 1. Language information is not available for input data encoded in + * all charsets. In particular, no language is identified + * for UTF-8 input data. + * + * 2. Closely related languages may sometimes be confused. + * + * If more accurate language detection is required, a linguistic + * analysis package should be used. + * + * The storage for the returned name string is owned by the + * UCharsetMatch, and will remain valid while the UCharsetMatch + * is valid. + * + * @param ucsm The charset match object. + * @param status Any error conditions are reported back in this variable. + * @return The RFC 3066 code for the language of the input data, or + * an empty string if the language could not be determined. + * + * @stable ICU 3.6 + */ +U_STABLE const char * U_EXPORT2 +ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status); + + +/** + * Get the entire input text as a UChar string, placing it into + * a caller-supplied buffer. A terminating + * NUL character will be appended to the buffer if space is available. + * + * The number of UChars in the output string, not including the terminating + * NUL, is returned. + * + * If the supplied buffer is smaller than required to hold the output, + * the contents of the buffer are undefined. The full output string length + * (in UChars) is returned as always, and can be used to allocate a buffer + * of the correct size. + * + * + * @param ucsm The charset match object. + * @param buf A UChar buffer to be filled with the converted text data. + * @param cap The capacity of the buffer in UChars. + * @param status Any error conditions are reported back in this variable. + * @return The number of UChars in the output string. + * + * @stable ICU 3.6 + */ +U_STABLE int32_t U_EXPORT2 +ucsdet_getUChars(const UCharsetMatch *ucsm, + UChar *buf, int32_t cap, UErrorCode *status); + + + +/** + * Get an iterator over the set of all detectable charsets - + * over the charsets that are known to the charset detection + * service. + * + * The returned UEnumeration provides access to the names of + * the charsets. + * + * <p> + * The state of the Charset detector that is passed in does not + * affect the result of this function, but requiring a valid, open + * charset detector as a parameter insures that the charset detection + * service has been safely initialized and that the required detection + * data is available. + * + * <p> + * <b>Note:</b> Multiple different charset encodings in a same family may use + * a single shared name in this implementation. For example, this method returns + * an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252" + * (Windows Latin 1). However, actual detection result could be "windows-1252" + * when the input data matches Latin 1 code points with any points only available + * in "windows-1252". + * + * @param ucsd a Charset detector. + * @param status Any error conditions are reported back in this variable. + * @return an iterator providing access to the detectable charset names. + * @stable ICU 3.6 + */ +U_STABLE UEnumeration * U_EXPORT2 +ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); + +/** + * Test whether input filtering is enabled for this charset detector. + * Input filtering removes text that appears to be HTML or xml + * markup from the input before applying the code page detection + * heuristics. + * + * @param ucsd The charset detector to check. + * @return TRUE if filtering is enabled. + * @stable ICU 3.6 + */ + +U_STABLE UBool U_EXPORT2 +ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd); + + +/** + * Enable filtering of input text. If filtering is enabled, + * text within angle brackets ("<" and ">") will be removed + * before detection, which will remove most HTML or xml markup. + * + * @param ucsd the charset detector to be modified. + * @param filter <code>true</code> to enable input text filtering. + * @return The previous setting. + * + * @stable ICU 3.6 + */ +U_STABLE UBool U_EXPORT2 +ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter); + +#ifndef U_HIDE_INTERNAL_API +/** + * Get an iterator over the set of detectable charsets - + * over the charsets that are enabled by the specified charset detector. + * + * The returned UEnumeration provides access to the names of + * the charsets. + * + * @param ucsd a Charset detector. + * @param status Any error conditions are reported back in this variable. + * @return an iterator providing access to the detectable charset names by + * the specified charset detector. + * @internal + */ +U_INTERNAL UEnumeration * U_EXPORT2 +ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); + +/** + * Enable or disable individual charset encoding. + * A name of charset encoding must be included in the names returned by + * {@link #getAllDetectableCharsets()}. + * + * @param ucsd a Charset detector. + * @param encoding encoding the name of charset encoding. + * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the + * charset encoding. + * @param status receives the return status. When the name of charset encoding + * is not supported, U_ILLEGAL_ARGUMENT_ERROR is set. + * @internal + */ +U_INTERNAL void U_EXPORT2 +ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status); +#endif /* U_HIDE_INTERNAL_API */ + +#endif +#endif /* __UCSDET_H */ + + diff --git a/intl/icu/source/i18n/unicode/udat.h b/intl/icu/source/i18n/unicode/udat.h new file mode 100644 index 000000000..cacfbe850 --- /dev/null +++ b/intl/icu/source/i18n/unicode/udat.h @@ -0,0 +1,1662 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 1996-2016, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************* +*/ + +#ifndef UDAT_H +#define UDAT_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/localpointer.h" +#include "unicode/ucal.h" +#include "unicode/unum.h" +#include "unicode/udisplaycontext.h" +#include "unicode/ufieldpositer.h" +/** + * \file + * \brief C API: DateFormat + * + * <h2> Date Format C API</h2> + * + * Date Format C API consists of functions that convert dates and + * times from their internal representations to textual form and back again in a + * language-independent manner. Converting from the internal representation (milliseconds + * since midnight, January 1, 1970) to text is known as "formatting," and converting + * from text to millis is known as "parsing." We currently define only one concrete + * structure UDateFormat, which can handle pretty much all normal + * date formatting and parsing actions. + * <P> + * Date Format helps you to format and parse dates for any locale. Your code can + * be completely independent of the locale conventions for months, days of the + * week, or even the calendar format: lunar vs. solar. + * <P> + * To format a date for the current Locale with default time and date style, + * use one of the static factory methods: + * <pre> + * \code + * UErrorCode status = U_ZERO_ERROR; + * UChar *myString; + * int32_t myStrlen = 0; + * UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, -1, &status); + * myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, NULL, &status); + * if (status==U_BUFFER_OVERFLOW_ERROR){ + * status=U_ZERO_ERROR; + * myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) ); + * udat_format(dfmt, myDate, myString, myStrlen+1, NULL, &status); + * } + * \endcode + * </pre> + * If you are formatting multiple numbers, it is more efficient to get the + * format and use it multiple times so that the system doesn't have to fetch the + * information about the local language and country conventions multiple times. + * <pre> + * \code + * UErrorCode status = U_ZERO_ERROR; + * int32_t i, myStrlen = 0; + * UChar* myString; + * char buffer[1024]; + * UDate myDateArr[] = { 0.0, 100000000.0, 2000000000.0 }; // test values + * UDateFormat* df = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, NULL, -1, NULL, 0, &status); + * for (i = 0; i < 3; i++) { + * myStrlen = udat_format(df, myDateArr[i], NULL, myStrlen, NULL, &status); + * if(status == U_BUFFER_OVERFLOW_ERROR){ + * status = U_ZERO_ERROR; + * myString = (UChar*)malloc(sizeof(UChar) * (myStrlen+1) ); + * udat_format(df, myDateArr[i], myString, myStrlen+1, NULL, &status); + * printf("%s\n", u_austrcpy(buffer, myString) ); + * free(myString); + * } + * } + * \endcode + * </pre> + * To get specific fields of a date, you can use UFieldPosition to + * get specific fields. + * <pre> + * \code + * UErrorCode status = U_ZERO_ERROR; + * UFieldPosition pos; + * UChar *myString; + * int32_t myStrlen = 0; + * char buffer[1024]; + * + * pos.field = 1; // Same as the DateFormat::EField enum + * UDateFormat* dfmt = udat_open(UDAT_DEFAULT, UDAT_DEFAULT, NULL, -1, NULL, 0, &status); + * myStrlen = udat_format(dfmt, myDate, NULL, myStrlen, &pos, &status); + * if (status==U_BUFFER_OVERFLOW_ERROR){ + * status=U_ZERO_ERROR; + * myString=(UChar*)malloc(sizeof(UChar) * (myStrlen+1) ); + * udat_format(dfmt, myDate, myString, myStrlen+1, &pos, &status); + * } + * printf("date format: %s\n", u_austrcpy(buffer, myString)); + * buffer[pos.endIndex] = 0; // NULL terminate the string. + * printf("UFieldPosition position equals %s\n", &buffer[pos.beginIndex]); + * \endcode + * </pre> + * To format a date for a different Locale, specify it in the call to + * udat_open() + * <pre> + * \code + * UDateFormat* df = udat_open(UDAT_SHORT, UDAT_SHORT, "fr_FR", NULL, -1, NULL, 0, &status); + * \endcode + * </pre> + * You can use a DateFormat API udat_parse() to parse. + * <pre> + * \code + * UErrorCode status = U_ZERO_ERROR; + * int32_t parsepos=0; + * UDate myDate = udat_parse(df, myString, u_strlen(myString), &parsepos, &status); + * \endcode + * </pre> + * You can pass in different options for the arguments for date and time style + * to control the length of the result; from SHORT to MEDIUM to LONG to FULL. + * The exact result depends on the locale, but generally: + * see UDateFormatStyle for more details + * <ul type=round> + * <li> UDAT_SHORT is completely numeric, such as 12/13/52 or 3:30pm + * <li> UDAT_MEDIUM is longer, such as Jan 12, 1952 + * <li> UDAT_LONG is longer, such as January 12, 1952 or 3:30:32pm + * <li> UDAT_FULL is pretty completely specified, such as + * Tuesday, April 12, 1952 AD or 3:30:42pm PST. + * </ul> + * You can also set the time zone on the format if you wish. + * <P> + * You can also use forms of the parse and format methods with Parse Position and + * UFieldPosition to allow you to + * <ul type=round> + * <li> Progressively parse through pieces of a string. + * <li> Align any particular field, or find out where it is for selection + * on the screen. + * </ul> + * <p><strong>Date and Time Patterns:</strong></p> + * + * <p>Date and time formats are specified by <em>date and time pattern</em> strings. + * Within date and time pattern strings, all unquoted ASCII letters [A-Za-z] are reserved + * as pattern letters representing calendar fields. <code>UDateFormat</code> supports + * the date and time formatting algorithm and pattern letters defined by + * <a href="http://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table">UTS#35 + * Unicode Locale Data Markup Language (LDML)</a> and further documented for ICU in the + * <a href="https://sites.google.com/site/icuprojectuserguide/formatparse/datetime?pli=1#TOC-Date-Field-Symbol-Table">ICU + * User Guide</a>.</p> + */ + +/** A date formatter. + * For usage in C programs. + * @stable ICU 2.6 + */ +typedef void* UDateFormat; + +/** The possible date/time format styles + * @stable ICU 2.6 + */ +typedef enum UDateFormatStyle { + /** Full style */ + UDAT_FULL, + /** Long style */ + UDAT_LONG, + /** Medium style */ + UDAT_MEDIUM, + /** Short style */ + UDAT_SHORT, + /** Default style */ + UDAT_DEFAULT = UDAT_MEDIUM, + + /** Bitfield for relative date */ + UDAT_RELATIVE = (1 << 7), + + UDAT_FULL_RELATIVE = UDAT_FULL | UDAT_RELATIVE, + + UDAT_LONG_RELATIVE = UDAT_LONG | UDAT_RELATIVE, + + UDAT_MEDIUM_RELATIVE = UDAT_MEDIUM | UDAT_RELATIVE, + + UDAT_SHORT_RELATIVE = UDAT_SHORT | UDAT_RELATIVE, + + + /** No style */ + UDAT_NONE = -1, + + /** + * Use the pattern given in the parameter to udat_open + * @see udat_open + * @stable ICU 50 + */ + UDAT_PATTERN = -2, + +#ifndef U_HIDE_INTERNAL_API + /** @internal alias to UDAT_PATTERN */ + UDAT_IGNORE = UDAT_PATTERN +#endif /* U_HIDE_INTERNAL_API */ +} UDateFormatStyle; + +/* Skeletons for dates. */ + +/** + * Constant for date skeleton with year. + * @stable ICU 4.0 + */ +#define UDAT_YEAR "y" +/** + * Constant for date skeleton with quarter. + * @stable ICU 51 + */ +#define UDAT_QUARTER "QQQQ" +/** + * Constant for date skeleton with abbreviated quarter. + * @stable ICU 51 + */ +#define UDAT_ABBR_QUARTER "QQQ" +/** + * Constant for date skeleton with year and quarter. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_QUARTER "yQQQQ" +/** + * Constant for date skeleton with year and abbreviated quarter. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_ABBR_QUARTER "yQQQ" +/** + * Constant for date skeleton with month. + * @stable ICU 4.0 + */ +#define UDAT_MONTH "MMMM" +/** + * Constant for date skeleton with abbreviated month. + * @stable ICU 4.0 + */ +#define UDAT_ABBR_MONTH "MMM" +/** + * Constant for date skeleton with numeric month. + * @stable ICU 4.0 + */ +#define UDAT_NUM_MONTH "M" +/** + * Constant for date skeleton with year and month. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_MONTH "yMMMM" +/** + * Constant for date skeleton with year and abbreviated month. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_ABBR_MONTH "yMMM" +/** + * Constant for date skeleton with year and numeric month. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_NUM_MONTH "yM" +/** + * Constant for date skeleton with day. + * @stable ICU 4.0 + */ +#define UDAT_DAY "d" +/** + * Constant for date skeleton with year, month, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_MONTH_DAY "yMMMMd" +/** + * Constant for date skeleton with year, abbreviated month, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_ABBR_MONTH_DAY "yMMMd" +/** + * Constant for date skeleton with year, numeric month, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_NUM_MONTH_DAY "yMd" +/** + * Constant for date skeleton with weekday. + * @stable ICU 51 + */ +#define UDAT_WEEKDAY "EEEE" +/** + * Constant for date skeleton with abbreviated weekday. + * @stable ICU 51 + */ +#define UDAT_ABBR_WEEKDAY "E" +/** + * Constant for date skeleton with year, month, weekday, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_MONTH_WEEKDAY_DAY "yMMMMEEEEd" +/** + * Constant for date skeleton with year, abbreviated month, weekday, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_ABBR_MONTH_WEEKDAY_DAY "yMMMEd" +/** + * Constant for date skeleton with year, numeric month, weekday, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_YEAR_NUM_MONTH_WEEKDAY_DAY "yMEd" +/** + * Constant for date skeleton with long month and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_MONTH_DAY "MMMMd" +/** + * Constant for date skeleton with abbreviated month and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_ABBR_MONTH_DAY "MMMd" +/** + * Constant for date skeleton with numeric month and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_NUM_MONTH_DAY "Md" +/** + * Constant for date skeleton with month, weekday, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_MONTH_WEEKDAY_DAY "MMMMEEEEd" +/** + * Constant for date skeleton with abbreviated month, weekday, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_ABBR_MONTH_WEEKDAY_DAY "MMMEd" +/** + * Constant for date skeleton with numeric month, weekday, and day. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_NUM_MONTH_WEEKDAY_DAY "MEd" + +/* Skeletons for times. */ + +/** + * Constant for date skeleton with hour, with the locale's preferred hour format (12 or 24). + * @stable ICU 4.0 + */ +#define UDAT_HOUR "j" +/** + * Constant for date skeleton with hour in 24-hour presentation. + * @stable ICU 51 + */ +#define UDAT_HOUR24 "H" +/** + * Constant for date skeleton with minute. + * @stable ICU 51 + */ +#define UDAT_MINUTE "m" +/** + * Constant for date skeleton with hour and minute, with the locale's preferred hour format (12 or 24). + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_HOUR_MINUTE "jm" +/** + * Constant for date skeleton with hour and minute in 24-hour presentation. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_HOUR24_MINUTE "Hm" +/** + * Constant for date skeleton with second. + * @stable ICU 51 + */ +#define UDAT_SECOND "s" +/** + * Constant for date skeleton with hour, minute, and second, + * with the locale's preferred hour format (12 or 24). + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_HOUR_MINUTE_SECOND "jms" +/** + * Constant for date skeleton with hour, minute, and second in + * 24-hour presentation. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_HOUR24_MINUTE_SECOND "Hms" +/** + * Constant for date skeleton with minute and second. + * Used in combinations date + time, date + time + zone, or time + zone. + * @stable ICU 4.0 + */ +#define UDAT_MINUTE_SECOND "ms" + +/* Skeletons for time zones. */ + +/** + * Constant for <i>generic location format</i>, such as Los Angeles Time; + * used in combinations date + time + zone, or time + zone. + * @see <a href="http://unicode.org/reports/tr35/#Date_Format_Patterns">LDML Date Format Patterns</a> + * @see <a href="http://unicode.org/reports/tr35/#Time_Zone_Fallback">LDML Time Zone Fallback</a> + * @stable ICU 51 + */ +#define UDAT_LOCATION_TZ "VVVV" +/** + * Constant for <i>generic non-location format</i>, such as Pacific Time; + * used in combinations date + time + zone, or time + zone. + * @see <a href="http://unicode.org/reports/tr35/#Date_Format_Patterns">LDML Date Format Patterns</a> + * @see <a href="http://unicode.org/reports/tr35/#Time_Zone_Fallback">LDML Time Zone Fallback</a> + * @stable ICU 51 + */ +#define UDAT_GENERIC_TZ "vvvv" +/** + * Constant for <i>generic non-location format</i>, abbreviated if possible, such as PT; + * used in combinations date + time + zone, or time + zone. + * @see <a href="http://unicode.org/reports/tr35/#Date_Format_Patterns">LDML Date Format Patterns</a> + * @see <a href="http://unicode.org/reports/tr35/#Time_Zone_Fallback">LDML Time Zone Fallback</a> + * @stable ICU 51 + */ +#define UDAT_ABBR_GENERIC_TZ "v" +/** + * Constant for <i>specific non-location format</i>, such as Pacific Daylight Time; + * used in combinations date + time + zone, or time + zone. + * @see <a href="http://unicode.org/reports/tr35/#Date_Format_Patterns">LDML Date Format Patterns</a> + * @see <a href="http://unicode.org/reports/tr35/#Time_Zone_Fallback">LDML Time Zone Fallback</a> + * @stable ICU 51 + */ +#define UDAT_SPECIFIC_TZ "zzzz" +/** + * Constant for <i>specific non-location format</i>, abbreviated if possible, such as PDT; + * used in combinations date + time + zone, or time + zone. + * @see <a href="http://unicode.org/reports/tr35/#Date_Format_Patterns">LDML Date Format Patterns</a> + * @see <a href="http://unicode.org/reports/tr35/#Time_Zone_Fallback">LDML Time Zone Fallback</a> + * @stable ICU 51 + */ +#define UDAT_ABBR_SPECIFIC_TZ "z" +/** + * Constant for <i>localized GMT/UTC format</i>, such as GMT+8:00 or HPG-8:00; + * used in combinations date + time + zone, or time + zone. + * @see <a href="http://unicode.org/reports/tr35/#Date_Format_Patterns">LDML Date Format Patterns</a> + * @see <a href="http://unicode.org/reports/tr35/#Time_Zone_Fallback">LDML Time Zone Fallback</a> + * @stable ICU 51 + */ +#define UDAT_ABBR_UTC_TZ "ZZZZ" + +/* deprecated skeleton constants */ + +#ifndef U_HIDE_DEPRECATED_API +/** + * Constant for date skeleton with standalone month. + * @deprecated ICU 50 Use UDAT_MONTH instead. + */ +#define UDAT_STANDALONE_MONTH "LLLL" +/** + * Constant for date skeleton with standalone abbreviated month. + * @deprecated ICU 50 Use UDAT_ABBR_MONTH instead. + */ +#define UDAT_ABBR_STANDALONE_MONTH "LLL" + +/** + * Constant for date skeleton with hour, minute, and generic timezone. + * @deprecated ICU 50 Use instead UDAT_HOUR_MINUTE UDAT_ABBR_GENERIC_TZ or some other timezone presentation. + */ +#define UDAT_HOUR_MINUTE_GENERIC_TZ "jmv" +/** + * Constant for date skeleton with hour, minute, and timezone. + * @deprecated ICU 50 Use instead UDAT_HOUR_MINUTE UDAT_ABBR_SPECIFIC_TZ or some other timezone presentation. + */ +#define UDAT_HOUR_MINUTE_TZ "jmz" +/** + * Constant for date skeleton with hour and generic timezone. + * @deprecated ICU 50 Use instead UDAT_HOUR UDAT_ABBR_GENERIC_TZ or some other timezone presentation. + */ +#define UDAT_HOUR_GENERIC_TZ "jv" +/** + * Constant for date skeleton with hour and timezone. + * @deprecated ICU 50 Use instead UDAT_HOUR UDAT_ABBR_SPECIFIC_TZ or some other timezone presentation. + */ +#define UDAT_HOUR_TZ "jz" +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * FieldPosition and UFieldPosition selectors for format fields + * defined by DateFormat and UDateFormat. + * @stable ICU 3.0 + */ +typedef enum UDateFormatField { + /** + * FieldPosition and UFieldPosition selector for 'G' field alignment, + * corresponding to the UCAL_ERA field. + * @stable ICU 3.0 + */ + UDAT_ERA_FIELD = 0, + + /** + * FieldPosition and UFieldPosition selector for 'y' field alignment, + * corresponding to the UCAL_YEAR field. + * @stable ICU 3.0 + */ + UDAT_YEAR_FIELD = 1, + + /** + * FieldPosition and UFieldPosition selector for 'M' field alignment, + * corresponding to the UCAL_MONTH field. + * @stable ICU 3.0 + */ + UDAT_MONTH_FIELD = 2, + + /** + * FieldPosition and UFieldPosition selector for 'd' field alignment, + * corresponding to the UCAL_DATE field. + * @stable ICU 3.0 + */ + UDAT_DATE_FIELD = 3, + + /** + * FieldPosition and UFieldPosition selector for 'k' field alignment, + * corresponding to the UCAL_HOUR_OF_DAY field. + * UDAT_HOUR_OF_DAY1_FIELD is used for the one-based 24-hour clock. + * For example, 23:59 + 01:00 results in 24:59. + * @stable ICU 3.0 + */ + UDAT_HOUR_OF_DAY1_FIELD = 4, + + /** + * FieldPosition and UFieldPosition selector for 'H' field alignment, + * corresponding to the UCAL_HOUR_OF_DAY field. + * UDAT_HOUR_OF_DAY0_FIELD is used for the zero-based 24-hour clock. + * For example, 23:59 + 01:00 results in 00:59. + * @stable ICU 3.0 + */ + UDAT_HOUR_OF_DAY0_FIELD = 5, + + /** + * FieldPosition and UFieldPosition selector for 'm' field alignment, + * corresponding to the UCAL_MINUTE field. + * @stable ICU 3.0 + */ + UDAT_MINUTE_FIELD = 6, + + /** + * FieldPosition and UFieldPosition selector for 's' field alignment, + * corresponding to the UCAL_SECOND field. + * @stable ICU 3.0 + */ + UDAT_SECOND_FIELD = 7, + + /** + * FieldPosition and UFieldPosition selector for 'S' field alignment, + * corresponding to the UCAL_MILLISECOND field. + * + * Note: Time formats that use 'S' can display a maximum of three + * significant digits for fractional seconds, corresponding to millisecond + * resolution and a fractional seconds sub-pattern of SSS. If the + * sub-pattern is S or SS, the fractional seconds value will be truncated + * (not rounded) to the number of display places specified. If the + * fractional seconds sub-pattern is longer than SSS, the additional + * display places will be filled with zeros. + * @stable ICU 3.0 + */ + UDAT_FRACTIONAL_SECOND_FIELD = 8, + + /** + * FieldPosition and UFieldPosition selector for 'E' field alignment, + * corresponding to the UCAL_DAY_OF_WEEK field. + * @stable ICU 3.0 + */ + UDAT_DAY_OF_WEEK_FIELD = 9, + + /** + * FieldPosition and UFieldPosition selector for 'D' field alignment, + * corresponding to the UCAL_DAY_OF_YEAR field. + * @stable ICU 3.0 + */ + UDAT_DAY_OF_YEAR_FIELD = 10, + + /** + * FieldPosition and UFieldPosition selector for 'F' field alignment, + * corresponding to the UCAL_DAY_OF_WEEK_IN_MONTH field. + * @stable ICU 3.0 + */ + UDAT_DAY_OF_WEEK_IN_MONTH_FIELD = 11, + + /** + * FieldPosition and UFieldPosition selector for 'w' field alignment, + * corresponding to the UCAL_WEEK_OF_YEAR field. + * @stable ICU 3.0 + */ + UDAT_WEEK_OF_YEAR_FIELD = 12, + + /** + * FieldPosition and UFieldPosition selector for 'W' field alignment, + * corresponding to the UCAL_WEEK_OF_MONTH field. + * @stable ICU 3.0 + */ + UDAT_WEEK_OF_MONTH_FIELD = 13, + + /** + * FieldPosition and UFieldPosition selector for 'a' field alignment, + * corresponding to the UCAL_AM_PM field. + * @stable ICU 3.0 + */ + UDAT_AM_PM_FIELD = 14, + + /** + * FieldPosition and UFieldPosition selector for 'h' field alignment, + * corresponding to the UCAL_HOUR field. + * UDAT_HOUR1_FIELD is used for the one-based 12-hour clock. + * For example, 11:30 PM + 1 hour results in 12:30 AM. + * @stable ICU 3.0 + */ + UDAT_HOUR1_FIELD = 15, + + /** + * FieldPosition and UFieldPosition selector for 'K' field alignment, + * corresponding to the UCAL_HOUR field. + * UDAT_HOUR0_FIELD is used for the zero-based 12-hour clock. + * For example, 11:30 PM + 1 hour results in 00:30 AM. + * @stable ICU 3.0 + */ + UDAT_HOUR0_FIELD = 16, + + /** + * FieldPosition and UFieldPosition selector for 'z' field alignment, + * corresponding to the UCAL_ZONE_OFFSET and + * UCAL_DST_OFFSET fields. + * @stable ICU 3.0 + */ + UDAT_TIMEZONE_FIELD = 17, + + /** + * FieldPosition and UFieldPosition selector for 'Y' field alignment, + * corresponding to the UCAL_YEAR_WOY field. + * @stable ICU 3.0 + */ + UDAT_YEAR_WOY_FIELD = 18, + + /** + * FieldPosition and UFieldPosition selector for 'e' field alignment, + * corresponding to the UCAL_DOW_LOCAL field. + * @stable ICU 3.0 + */ + UDAT_DOW_LOCAL_FIELD = 19, + + /** + * FieldPosition and UFieldPosition selector for 'u' field alignment, + * corresponding to the UCAL_EXTENDED_YEAR field. + * @stable ICU 3.0 + */ + UDAT_EXTENDED_YEAR_FIELD = 20, + + /** + * FieldPosition and UFieldPosition selector for 'g' field alignment, + * corresponding to the UCAL_JULIAN_DAY field. + * @stable ICU 3.0 + */ + UDAT_JULIAN_DAY_FIELD = 21, + + /** + * FieldPosition and UFieldPosition selector for 'A' field alignment, + * corresponding to the UCAL_MILLISECONDS_IN_DAY field. + * @stable ICU 3.0 + */ + UDAT_MILLISECONDS_IN_DAY_FIELD = 22, + + /** + * FieldPosition and UFieldPosition selector for 'Z' field alignment, + * corresponding to the UCAL_ZONE_OFFSET and + * UCAL_DST_OFFSET fields. + * @stable ICU 3.0 + */ + UDAT_TIMEZONE_RFC_FIELD = 23, + + /** + * FieldPosition and UFieldPosition selector for 'v' field alignment, + * corresponding to the UCAL_ZONE_OFFSET field. + * @stable ICU 3.4 + */ + UDAT_TIMEZONE_GENERIC_FIELD = 24, + /** + * FieldPosition selector for 'c' field alignment, + * corresponding to the {@link #UCAL_DOW_LOCAL} field. + * This displays the stand alone day name, if available. + * @stable ICU 3.4 + */ + UDAT_STANDALONE_DAY_FIELD = 25, + + /** + * FieldPosition selector for 'L' field alignment, + * corresponding to the {@link #UCAL_MONTH} field. + * This displays the stand alone month name, if available. + * @stable ICU 3.4 + */ + UDAT_STANDALONE_MONTH_FIELD = 26, + + /** + * FieldPosition selector for "Q" field alignment, + * corresponding to quarters. This is implemented + * using the {@link #UCAL_MONTH} field. This + * displays the quarter. + * @stable ICU 3.6 + */ + UDAT_QUARTER_FIELD = 27, + + /** + * FieldPosition selector for the "q" field alignment, + * corresponding to stand-alone quarters. This is + * implemented using the {@link #UCAL_MONTH} field. + * This displays the stand-alone quarter. + * @stable ICU 3.6 + */ + UDAT_STANDALONE_QUARTER_FIELD = 28, + + /** + * FieldPosition and UFieldPosition selector for 'V' field alignment, + * corresponding to the UCAL_ZONE_OFFSET field. + * @stable ICU 3.8 + */ + UDAT_TIMEZONE_SPECIAL_FIELD = 29, + + /** + * FieldPosition selector for "U" field alignment, + * corresponding to cyclic year names. This is implemented + * using the {@link #UCAL_YEAR} field. This displays + * the cyclic year name, if available. + * @stable ICU 49 + */ + UDAT_YEAR_NAME_FIELD = 30, + + /** + * FieldPosition selector for 'O' field alignment, + * corresponding to the UCAL_ZONE_OFFSET and UCAL_DST_OFFSETfields. + * This displays the localized GMT format. + * @stable ICU 51 + */ + UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD = 31, + + /** + * FieldPosition selector for 'X' field alignment, + * corresponding to the UCAL_ZONE_OFFSET and UCAL_DST_OFFSETfields. + * This displays the ISO 8601 local time offset format or UTC indicator ("Z"). + * @stable ICU 51 + */ + UDAT_TIMEZONE_ISO_FIELD = 32, + + /** + * FieldPosition selector for 'x' field alignment, + * corresponding to the UCAL_ZONE_OFFSET and UCAL_DST_OFFSET fields. + * This displays the ISO 8601 local time offset format. + * @stable ICU 51 + */ + UDAT_TIMEZONE_ISO_LOCAL_FIELD = 33, + +#ifndef U_HIDE_INTERNAL_API + /** + * FieldPosition and UFieldPosition selector for 'r' field alignment, + * no directly corresponding UCAL_ field. + * @internal ICU 53 + */ + UDAT_RELATED_YEAR_FIELD = 34, +#endif /* U_HIDE_INTERNAL_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * FieldPosition selector for 'b' field alignment. + * Displays midnight and noon for 12am and 12pm, respectively, if available; + * otherwise fall back to AM / PM. + * @draft ICU 57 + */ + UDAT_AM_PM_MIDNIGHT_NOON_FIELD = 35, + + /* FieldPosition selector for 'B' field alignment. + * Displays flexible day periods, such as "in the morning", if available. + * @draft ICU 57 + */ + UDAT_FLEXIBLE_DAY_PERIOD_FIELD = 36, +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_INTERNAL_API + /** + * FieldPosition and UFieldPosition selector for time separator, + * no corresponding UCAL_ field. No pattern character is currently + * defined for this. + * @internal + */ + UDAT_TIME_SEPARATOR_FIELD = 37, +#endif /* U_HIDE_INTERNAL_API */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * Number of FieldPosition and UFieldPosition selectors for + * DateFormat and UDateFormat. + * Valid selectors range from 0 to UDAT_FIELD_COUNT-1. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDAT_FIELD_COUNT = 38 +#endif // U_HIDE_DEPRECATED_API +} UDateFormatField; + + +#ifndef U_HIDE_INTERNAL_API +/** + * Is a pattern character defined for UDAT_TIME_SEPARATOR_FIELD? + * In ICU 55 it was COLON, but that was withdrawn in ICU 56. + * @internal ICU 56 + */ +#define UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR 0 +#endif /* U_HIDE_INTERNAL_API */ + + +/** + * Maps from a UDateFormatField to the corresponding UCalendarDateFields. + * Note: since the mapping is many-to-one, there is no inverse mapping. + * @param field the UDateFormatField. + * @return the UCalendarDateField. This will be UCAL_FIELD_COUNT in case + * of error (e.g., the input field is UDAT_FIELD_COUNT). + * @stable ICU 4.4 + */ +U_STABLE UCalendarDateFields U_EXPORT2 +udat_toCalendarDateField(UDateFormatField field); + + +/** + * Open a new UDateFormat for formatting and parsing dates and times. + * A UDateFormat may be used to format dates in calls to {@link #udat_format }, + * and to parse dates in calls to {@link #udat_parse }. + * @param timeStyle The style used to format times; one of UDAT_FULL, UDAT_LONG, + * UDAT_MEDIUM, UDAT_SHORT, UDAT_DEFAULT, or UDAT_NONE (relative time styles + * are not currently supported). + * When the pattern parameter is used, pass in UDAT_PATTERN for both timeStyle and dateStyle. + * @param dateStyle The style used to format dates; one of UDAT_FULL, UDAT_LONG, + * UDAT_MEDIUM, UDAT_SHORT, UDAT_DEFAULT, UDAT_FULL_RELATIVE, UDAT_LONG_RELATIVE, + * UDAT_MEDIUM_RELATIVE, UDAT_SHORT_RELATIVE, or UDAT_NONE. + * When the pattern parameter is used, pass in UDAT_PATTERN for both timeStyle and dateStyle. + * As currently implemented, + * relative date formatting only affects a limited range of calendar days before or + * after the current date, based on the CLDR <field type="day">/<relative> data: For + * example, in English, "Yesterday", "Today", and "Tomorrow". Outside of this range, + * dates are formatted using the corresponding non-relative style. + * @param locale The locale specifying the formatting conventions + * @param tzID A timezone ID specifying the timezone to use. If 0, use + * the default timezone. + * @param tzIDLength The length of tzID, or -1 if null-terminated. + * @param pattern A pattern specifying the format to use. + * @param patternLength The number of characters in the pattern, or -1 if null-terminated. + * @param status A pointer to an UErrorCode to receive any errors + * @return A pointer to a UDateFormat to use for formatting dates and times, or 0 if + * an error occurred. + * @stable ICU 2.0 + */ +U_STABLE UDateFormat* U_EXPORT2 +udat_open(UDateFormatStyle timeStyle, + UDateFormatStyle dateStyle, + const char *locale, + const UChar *tzID, + int32_t tzIDLength, + const UChar *pattern, + int32_t patternLength, + UErrorCode *status); + + +/** +* Close a UDateFormat. +* Once closed, a UDateFormat may no longer be used. +* @param format The formatter to close. +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +udat_close(UDateFormat* format); + + +/** + * DateFormat boolean attributes + * + * @stable ICU 53 + */ +typedef enum UDateFormatBooleanAttribute { + /** + * indicates whether whitespace is allowed. Includes trailing dot tolerance. + * @stable ICU 53 + */ + UDAT_PARSE_ALLOW_WHITESPACE = 0, + /** + * indicates tolerance of numeric data when String data may be assumed. eg: UDAT_YEAR_NAME_FIELD, + * UDAT_STANDALONE_MONTH_FIELD, UDAT_DAY_OF_WEEK_FIELD + * @stable ICU 53 + */ + UDAT_PARSE_ALLOW_NUMERIC = 1, + /** + * indicates tolerance of a partial literal match + * e.g. accepting "--mon-02-march-2011" for a pattern of "'--: 'EEE-WW-MMMM-yyyy" + * @stable ICU 56 + */ + UDAT_PARSE_PARTIAL_LITERAL_MATCH = 2, + /** + * indicates tolerance of pattern mismatch between input data and specified format pattern. + * e.g. accepting "September" for a month pattern of MMM ("Sep") + * @stable ICU 56 + */ + UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH = 3, + + // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + // it is needed for layout of DateFormat object. + /** + * One more than the highest normal UDateFormatBooleanAttribute value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDAT_BOOLEAN_ATTRIBUTE_COUNT = 4 +} UDateFormatBooleanAttribute; + +/** + * Get a boolean attribute associated with a UDateFormat. + * An example would be a true value for a key of UDAT_PARSE_ALLOW_WHITESPACE indicating allowing whitespace leniency. + * If the formatter does not understand the attribute, -1 is returned. + * @param fmt The formatter to query. + * @param attr The attribute to query; e.g. UDAT_PARSE_ALLOW_WHITESPACE. + * @param status A pointer to an UErrorCode to receive any errors + * @return The value of attr. + * @stable ICU 53 + */ +U_STABLE UBool U_EXPORT2 +udat_getBooleanAttribute(const UDateFormat* fmt, UDateFormatBooleanAttribute attr, UErrorCode* status); + +/** + * Set a boolean attribute associated with a UDateFormat. + * An example of a boolean attribute is parse leniency control. If the formatter does not understand + * the attribute, the call is ignored. + * @param fmt The formatter to set. + * @param attr The attribute to set; one of UDAT_PARSE_ALLOW_WHITESPACE or UDAT_PARSE_ALLOW_NUMERIC + * @param newValue The new value of attr. + * @param status A pointer to an UErrorCode to receive any errors + * @stable ICU 53 + */ +U_STABLE void U_EXPORT2 +udat_setBooleanAttribute(UDateFormat *fmt, UDateFormatBooleanAttribute attr, UBool newValue, UErrorCode* status); + + + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUDateFormatPointer + * "Smart pointer" class, closes a UDateFormat via udat_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUDateFormatPointer, UDateFormat, udat_close); + +U_NAMESPACE_END + +#endif + +/** + * Open a copy of a UDateFormat. + * This function performs a deep copy. + * @param fmt The format to copy + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UDateFormat identical to fmt. + * @stable ICU 2.0 + */ +U_STABLE UDateFormat* U_EXPORT2 +udat_clone(const UDateFormat *fmt, + UErrorCode *status); + +/** +* Format a date using a UDateFormat. +* The date will be formatted using the conventions specified in {@link #udat_open } +* @param format The formatter to use +* @param dateToFormat The date to format +* @param result A pointer to a buffer to receive the formatted number. +* @param resultLength The maximum size of result. +* @param position A pointer to a UFieldPosition. On input, position->field +* is read. On output, position->beginIndex and position->endIndex indicate +* the beginning and ending indices of field number position->field, if such +* a field exists. This parameter may be NULL, in which case no field +* position data is returned. +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see udat_parse +* @see UFieldPosition +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +udat_format( const UDateFormat* format, + UDate dateToFormat, + UChar* result, + int32_t resultLength, + UFieldPosition* position, + UErrorCode* status); + +/** +* Format a date using an UDateFormat. +* The date will be formatted using the conventions specified in {@link #udat_open } +* @param format The formatter to use +* @param calendar The calendar to format. The calendar instance might be +* mutated if fields are not yet fully calculated, though +* the function won't change the logical date and time held +* by the instance. +* @param result A pointer to a buffer to receive the formatted number. +* @param capacity The maximum size of result. +* @param position A pointer to a UFieldPosition. On input, position->field +* is read. On output, position->beginIndex and position->endIndex indicate +* the beginning and ending indices of field number position->field, if such +* a field exists. This parameter may be NULL, in which case no field +* position data is returned. +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see udat_format +* @see udat_parseCalendar +* @see UFieldPosition +* @stable ICU 55 +*/ +U_STABLE int32_t U_EXPORT2 +udat_formatCalendar( const UDateFormat* format, + UCalendar* calendar, + UChar* result, + int32_t capacity, + UFieldPosition* position, + UErrorCode* status); + +/** +* Format a date using a UDateFormat. +* The date will be formatted using the conventions specified in {@link #udat_open} +* @param format +* The formatter to use +* @param dateToFormat +* The date to format +* @param result +* A pointer to a buffer to receive the formatted number. +* @param resultLength +* The maximum size of result. +* @param fpositer +* A pointer to a UFieldPositionIterator created by {@link #ufieldpositer_open} +* (may be NULL if field position information is not needed). Any +* iteration information already present in the UFieldPositionIterator +* will be deleted, and the iterator will be reset to apply to the +* fields in the formatted string created by this function call; the +* field values provided by {@link #ufieldpositer_next} will be from the +* UDateFormatField enum. +* @param status +* A pointer to a UErrorCode to receive any errors +* @return +* The total buffer size needed; if greater than resultLength, the output was truncated. +* @see udat_parse +* @see UFieldPositionIterator +* @stable ICU 55 +*/ +U_STABLE int32_t U_EXPORT2 +udat_formatForFields( const UDateFormat* format, + UDate dateToFormat, + UChar* result, + int32_t resultLength, + UFieldPositionIterator* fpositer, + UErrorCode* status); + +/** +* Format a date using a UDateFormat. +* The date will be formatted using the conventions specified in {@link #udat_open } +* @param format +* The formatter to use +* @param calendar +* The calendar to format. The calendar instance might be mutated if fields +* are not yet fully calculated, though the function won't change the logical +* date and time held by the instance. +* @param result +* A pointer to a buffer to receive the formatted number. +* @param capacity +* The maximum size of result. +* @param fpositer +* A pointer to a UFieldPositionIterator created by {@link #ufieldpositer_open} +* (may be NULL if field position information is not needed). Any +* iteration information already present in the UFieldPositionIterator +* will be deleted, and the iterator will be reset to apply to the +* fields in the formatted string created by this function call; the +* field values provided by {@link #ufieldpositer_next} will be from the +* UDateFormatField enum. +* @param status +* A pointer to a UErrorCode to receive any errors +* @return +* The total buffer size needed; if greater than resultLength, the output was truncated. +* @see udat_format +* @see udat_parseCalendar +* @see UFieldPositionIterator +* @stable ICU 55 +*/ +U_STABLE int32_t U_EXPORT2 +udat_formatCalendarForFields( const UDateFormat* format, + UCalendar* calendar, + UChar* result, + int32_t capacity, + UFieldPositionIterator* fpositer, + UErrorCode* status); + + +/** +* Parse a string into an date/time using a UDateFormat. +* The date will be parsed using the conventions specified in {@link #udat_open }. +* <P> +* Note that the normal date formats associated with some calendars - such +* as the Chinese lunar calendar - do not specify enough fields to enable +* dates to be parsed unambiguously. In the case of the Chinese lunar +* calendar, while the year within the current 60-year cycle is specified, +* the number of such cycles since the start date of the calendar (in the +* UCAL_ERA field of the UCalendar object) is not normally part of the format, +* and parsing may assume the wrong era. For cases such as this it is +* recommended that clients parse using udat_parseCalendar with the UCalendar +* passed in set to the current date, or to a date within the era/cycle that +* should be assumed if absent in the format. +* +* @param format The formatter to use. +* @param text The text to parse. +* @param textLength The length of text, or -1 if null-terminated. +* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which +* to begin parsing. If not 0, on output the offset at which parsing ended. +* @param status A pointer to an UErrorCode to receive any errors +* @return The value of the parsed date/time +* @see udat_format +* @stable ICU 2.0 +*/ +U_STABLE UDate U_EXPORT2 +udat_parse(const UDateFormat* format, + const UChar* text, + int32_t textLength, + int32_t *parsePos, + UErrorCode *status); + +/** +* Parse a string into an date/time using a UDateFormat. +* The date will be parsed using the conventions specified in {@link #udat_open }. +* @param format The formatter to use. +* @param calendar A calendar set on input to the date and time to be used for +* missing values in the date/time string being parsed, and set +* on output to the parsed date/time. When the calendar type is +* different from the internal calendar held by the UDateFormat +* instance, the internal calendar will be cloned to a work +* calendar set to the same milliseconds and time zone as this +* calendar parameter, field values will be parsed based on the +* work calendar, then the result (milliseconds and time zone) +* will be set in this calendar. +* @param text The text to parse. +* @param textLength The length of text, or -1 if null-terminated. +* @param parsePos If not 0, on input a pointer to an integer specifying the offset at which +* to begin parsing. If not 0, on output the offset at which parsing ended. +* @param status A pointer to an UErrorCode to receive any errors +* @see udat_format +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +udat_parseCalendar(const UDateFormat* format, + UCalendar* calendar, + const UChar* text, + int32_t textLength, + int32_t *parsePos, + UErrorCode *status); + +/** +* Determine if an UDateFormat will perform lenient parsing. +* With lenient parsing, the parser may use heuristics to interpret inputs that do not +* precisely match the pattern. With strict parsing, inputs must match the pattern. +* @param fmt The formatter to query +* @return TRUE if fmt is set to perform lenient parsing, FALSE otherwise. +* @see udat_setLenient +* @stable ICU 2.0 +*/ +U_STABLE UBool U_EXPORT2 +udat_isLenient(const UDateFormat* fmt); + +/** +* Specify whether an UDateFormat will perform lenient parsing. +* With lenient parsing, the parser may use heuristics to interpret inputs that do not +* precisely match the pattern. With strict parsing, inputs must match the pattern. +* @param fmt The formatter to set +* @param isLenient TRUE if fmt should perform lenient parsing, FALSE otherwise. +* @see dat_isLenient +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +udat_setLenient( UDateFormat* fmt, + UBool isLenient); + +/** +* Get the UCalendar associated with an UDateFormat. +* A UDateFormat uses a UCalendar to convert a raw value to, for example, +* the day of the week. +* @param fmt The formatter to query. +* @return A pointer to the UCalendar used by fmt. +* @see udat_setCalendar +* @stable ICU 2.0 +*/ +U_STABLE const UCalendar* U_EXPORT2 +udat_getCalendar(const UDateFormat* fmt); + +/** +* Set the UCalendar associated with an UDateFormat. +* A UDateFormat uses a UCalendar to convert a raw value to, for example, +* the day of the week. +* @param fmt The formatter to set. +* @param calendarToSet A pointer to an UCalendar to be used by fmt. +* @see udat_setCalendar +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +udat_setCalendar( UDateFormat* fmt, + const UCalendar* calendarToSet); + +/** +* Get the UNumberFormat associated with an UDateFormat. +* A UDateFormat uses a UNumberFormat to format numbers within a date, +* for example the day number. +* @param fmt The formatter to query. +* @return A pointer to the UNumberFormat used by fmt to format numbers. +* @see udat_setNumberFormat +* @stable ICU 2.0 +*/ +U_STABLE const UNumberFormat* U_EXPORT2 +udat_getNumberFormat(const UDateFormat* fmt); + +/** +* Get the UNumberFormat for specific field associated with an UDateFormat. +* For example: 'y' for year and 'M' for month +* @param fmt The formatter to query. +* @param field the field to query +* @return A pointer to the UNumberFormat used by fmt to format field numbers. +* @see udat_setNumberFormatForField +* @stable ICU 54 +*/ +U_STABLE const UNumberFormat* U_EXPORT2 +udat_getNumberFormatForField(const UDateFormat* fmt, UChar field); + +/** +* Set the UNumberFormat for specific field associated with an UDateFormat. +* It can be a single field like: "y"(year) or "M"(month) +* It can be several field combined together: "yM"(year and month) +* Note: +* 1 symbol field is enough for multiple symbol field (so "y" will override "yy", "yyy") +* If the field is not numeric, then override has no effect (like "MMM" will use abbreviation, not numerical field) +* +* @param fields the fields to set +* @param fmt The formatter to set. +* @param numberFormatToSet A pointer to the UNumberFormat to be used by fmt to format numbers. +* @param status error code passed around (memory allocation or invalid fields) +* @see udat_getNumberFormatForField +* @stable ICU 54 +*/ +U_STABLE void U_EXPORT2 +udat_adoptNumberFormatForFields( UDateFormat* fmt, + const UChar* fields, + UNumberFormat* numberFormatToSet, + UErrorCode* status); +/** +* Set the UNumberFormat associated with an UDateFormat. +* A UDateFormat uses a UNumberFormat to format numbers within a date, +* for example the day number. +* This method also clears per field NumberFormat instances previously +* set by {@see udat_setNumberFormatForField} +* @param fmt The formatter to set. +* @param numberFormatToSet A pointer to the UNumberFormat to be used by fmt to format numbers. +* @see udat_getNumberFormat +* @see udat_setNumberFormatForField +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +udat_setNumberFormat( UDateFormat* fmt, + const UNumberFormat* numberFormatToSet); +/** +* Adopt the UNumberFormat associated with an UDateFormat. +* A UDateFormat uses a UNumberFormat to format numbers within a date, +* for example the day number. +* @param fmt The formatter to set. +* @param numberFormatToAdopt A pointer to the UNumberFormat to be used by fmt to format numbers. +* @see udat_getNumberFormat +* @stable ICU 54 +*/ +U_STABLE void U_EXPORT2 +udat_adoptNumberFormat( UDateFormat* fmt, + UNumberFormat* numberFormatToAdopt); +/** +* Get a locale for which date/time formatting patterns are available. +* A UDateFormat in a locale returned by this function will perform the correct +* formatting and parsing for the locale. +* @param localeIndex The index of the desired locale. +* @return A locale for which date/time formatting patterns are available, or 0 if none. +* @see udat_countAvailable +* @stable ICU 2.0 +*/ +U_STABLE const char* U_EXPORT2 +udat_getAvailable(int32_t localeIndex); + +/** +* Determine how many locales have date/time formatting patterns available. +* This function is most useful as determining the loop ending condition for +* calls to {@link #udat_getAvailable }. +* @return The number of locales for which date/time formatting patterns are available. +* @see udat_getAvailable +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +udat_countAvailable(void); + +/** +* Get the year relative to which all 2-digit years are interpreted. +* For example, if the 2-digit start year is 2100, the year 99 will be +* interpreted as 2199. +* @param fmt The formatter to query. +* @param status A pointer to an UErrorCode to receive any errors +* @return The year relative to which all 2-digit years are interpreted. +* @see udat_Set2DigitYearStart +* @stable ICU 2.0 +*/ +U_STABLE UDate U_EXPORT2 +udat_get2DigitYearStart( const UDateFormat *fmt, + UErrorCode *status); + +/** +* Set the year relative to which all 2-digit years will be interpreted. +* For example, if the 2-digit start year is 2100, the year 99 will be +* interpreted as 2199. +* @param fmt The formatter to set. +* @param d The year relative to which all 2-digit years will be interpreted. +* @param status A pointer to an UErrorCode to receive any errors +* @see udat_Set2DigitYearStart +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +udat_set2DigitYearStart( UDateFormat *fmt, + UDate d, + UErrorCode *status); + +/** +* Extract the pattern from a UDateFormat. +* The pattern will follow the pattern syntax rules. +* @param fmt The formatter to query. +* @param localized TRUE if the pattern should be localized, FALSE otherwise. +* @param result A pointer to a buffer to receive the pattern. +* @param resultLength The maximum size of result. +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see udat_applyPattern +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +udat_toPattern( const UDateFormat *fmt, + UBool localized, + UChar *result, + int32_t resultLength, + UErrorCode *status); + +/** +* Set the pattern used by an UDateFormat. +* The pattern should follow the pattern syntax rules. +* @param format The formatter to set. +* @param localized TRUE if the pattern is localized, FALSE otherwise. +* @param pattern The new pattern +* @param patternLength The length of pattern, or -1 if null-terminated. +* @see udat_toPattern +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +udat_applyPattern( UDateFormat *format, + UBool localized, + const UChar *pattern, + int32_t patternLength); + +/** + * The possible types of date format symbols + * @stable ICU 2.6 + */ +typedef enum UDateFormatSymbolType { + /** The era names, for example AD */ + UDAT_ERAS, + /** The month names, for example February */ + UDAT_MONTHS, + /** The short month names, for example Feb. */ + UDAT_SHORT_MONTHS, + /** The CLDR-style format "wide" weekday names, for example Monday */ + UDAT_WEEKDAYS, + /** + * The CLDR-style format "abbreviated" (not "short") weekday names, for example "Mon." + * For the CLDR-style format "short" weekday names, use UDAT_SHORTER_WEEKDAYS. + */ + UDAT_SHORT_WEEKDAYS, + /** The AM/PM names, for example AM */ + UDAT_AM_PMS, + /** The localized characters */ + UDAT_LOCALIZED_CHARS, + /** The long era names, for example Anno Domini */ + UDAT_ERA_NAMES, + /** The narrow month names, for example F */ + UDAT_NARROW_MONTHS, + /** The CLDR-style format "narrow" weekday names, for example "M" */ + UDAT_NARROW_WEEKDAYS, + /** Standalone context versions of months */ + UDAT_STANDALONE_MONTHS, + UDAT_STANDALONE_SHORT_MONTHS, + UDAT_STANDALONE_NARROW_MONTHS, + /** The CLDR-style stand-alone "wide" weekday names */ + UDAT_STANDALONE_WEEKDAYS, + /** + * The CLDR-style stand-alone "abbreviated" (not "short") weekday names. + * For the CLDR-style stand-alone "short" weekday names, use UDAT_STANDALONE_SHORTER_WEEKDAYS. + */ + UDAT_STANDALONE_SHORT_WEEKDAYS, + /** The CLDR-style stand-alone "narrow" weekday names */ + UDAT_STANDALONE_NARROW_WEEKDAYS, + /** The quarters, for example 1st Quarter */ + UDAT_QUARTERS, + /** The short quarter names, for example Q1 */ + UDAT_SHORT_QUARTERS, + /** Standalone context versions of quarters */ + UDAT_STANDALONE_QUARTERS, + UDAT_STANDALONE_SHORT_QUARTERS, + /** + * The CLDR-style short weekday names, e.g. "Su", Mo", etc. + * These are named "SHORTER" to contrast with the constants using _SHORT_ + * above, which actually get the CLDR-style *abbreviated* versions of the + * corresponding names. + * @stable ICU 51 + */ + UDAT_SHORTER_WEEKDAYS, + /** + * Standalone version of UDAT_SHORTER_WEEKDAYS. + * @stable ICU 51 + */ + UDAT_STANDALONE_SHORTER_WEEKDAYS, + /** + * Cyclic year names (only supported for some calendars, and only for FORMAT usage; + * udat_setSymbols not supported for UDAT_CYCLIC_YEARS_WIDE) + * @stable ICU 54 + */ + UDAT_CYCLIC_YEARS_WIDE, + /** + * Cyclic year names (only supported for some calendars, and only for FORMAT usage) + * @stable ICU 54 + */ + UDAT_CYCLIC_YEARS_ABBREVIATED, + /** + * Cyclic year names (only supported for some calendars, and only for FORMAT usage; + * udat_setSymbols not supported for UDAT_CYCLIC_YEARS_NARROW) + * @stable ICU 54 + */ + UDAT_CYCLIC_YEARS_NARROW, + /** + * Calendar zodiac names (only supported for some calendars, and only for FORMAT usage; + * udat_setSymbols not supported for UDAT_ZODIAC_NAMES_WIDE) + * @stable ICU 54 + */ + UDAT_ZODIAC_NAMES_WIDE, + /** + * Calendar zodiac names (only supported for some calendars, and only for FORMAT usage) + * @stable ICU 54 + */ + UDAT_ZODIAC_NAMES_ABBREVIATED, + /** + * Calendar zodiac names (only supported for some calendars, and only for FORMAT usage; + * udat_setSymbols not supported for UDAT_ZODIAC_NAMES_NARROW) + * @stable ICU 54 + */ + UDAT_ZODIAC_NAMES_NARROW +} UDateFormatSymbolType; + +struct UDateFormatSymbols; +/** Date format symbols. + * For usage in C programs. + * @stable ICU 2.6 + */ +typedef struct UDateFormatSymbols UDateFormatSymbols; + +/** +* Get the symbols associated with an UDateFormat. +* The symbols are what a UDateFormat uses to represent locale-specific data, +* for example month or day names. +* @param fmt The formatter to query. +* @param type The type of symbols to get. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS, +* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS +* @param symbolIndex The desired symbol of type type. +* @param result A pointer to a buffer to receive the pattern. +* @param resultLength The maximum size of result. +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see udat_countSymbols +* @see udat_setSymbols +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +udat_getSymbols(const UDateFormat *fmt, + UDateFormatSymbolType type, + int32_t symbolIndex, + UChar *result, + int32_t resultLength, + UErrorCode *status); + +/** +* Count the number of particular symbols for an UDateFormat. +* This function is most useful as for detemining the loop termination condition +* for calls to {@link #udat_getSymbols }. +* @param fmt The formatter to query. +* @param type The type of symbols to count. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS, +* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS +* @return The number of symbols of type type. +* @see udat_getSymbols +* @see udat_setSymbols +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +udat_countSymbols( const UDateFormat *fmt, + UDateFormatSymbolType type); + +/** +* Set the symbols associated with an UDateFormat. +* The symbols are what a UDateFormat uses to represent locale-specific data, +* for example month or day names. +* @param format The formatter to set +* @param type The type of symbols to set. One of UDAT_ERAS, UDAT_MONTHS, UDAT_SHORT_MONTHS, +* UDAT_WEEKDAYS, UDAT_SHORT_WEEKDAYS, UDAT_AM_PMS, or UDAT_LOCALIZED_CHARS +* @param symbolIndex The index of the symbol to set of type type. +* @param value The new value +* @param valueLength The length of value, or -1 if null-terminated +* @param status A pointer to an UErrorCode to receive any errors +* @see udat_getSymbols +* @see udat_countSymbols +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +udat_setSymbols( UDateFormat *format, + UDateFormatSymbolType type, + int32_t symbolIndex, + UChar *value, + int32_t valueLength, + UErrorCode *status); + +/** + * Get the locale for this date format object. + * You can choose between valid and actual locale. + * @param fmt The formatter to get the locale from + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale name + * @stable ICU 2.8 + */ +U_STABLE const char* U_EXPORT2 +udat_getLocaleByType(const UDateFormat *fmt, + ULocDataLocaleType type, + UErrorCode* status); + +/** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. + * @param fmt The formatter for which to set a UDisplayContext value. + * @param value The UDisplayContext value to set. + * @param status A pointer to an UErrorCode to receive any errors + * @stable ICU 51 + */ +U_DRAFT void U_EXPORT2 +udat_setContext(UDateFormat* fmt, UDisplayContext value, UErrorCode* status); + +/** + * Get the formatter's UDisplayContext value for the specified UDisplayContextType, + * such as UDISPCTX_TYPE_CAPITALIZATION. + * @param fmt The formatter to query. + * @param type The UDisplayContextType whose value to return + * @param status A pointer to an UErrorCode to receive any errors + * @return The UDisplayContextValue for the specified type. + * @stable ICU 53 + */ +U_STABLE UDisplayContext U_EXPORT2 +udat_getContext(const UDateFormat* fmt, UDisplayContextType type, UErrorCode* status); + +#ifndef U_HIDE_INTERNAL_API +/** +* Extract the date pattern from a UDateFormat set for relative date formatting. +* The pattern will follow the pattern syntax rules. +* @param fmt The formatter to query. +* @param result A pointer to a buffer to receive the pattern. +* @param resultLength The maximum size of result. +* @param status A pointer to a UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see udat_applyPatternRelative +* @internal ICU 4.2 technology preview +*/ +U_INTERNAL int32_t U_EXPORT2 +udat_toPatternRelativeDate(const UDateFormat *fmt, + UChar *result, + int32_t resultLength, + UErrorCode *status); + +/** +* Extract the time pattern from a UDateFormat set for relative date formatting. +* The pattern will follow the pattern syntax rules. +* @param fmt The formatter to query. +* @param result A pointer to a buffer to receive the pattern. +* @param resultLength The maximum size of result. +* @param status A pointer to a UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see udat_applyPatternRelative +* @internal ICU 4.2 technology preview +*/ +U_INTERNAL int32_t U_EXPORT2 +udat_toPatternRelativeTime(const UDateFormat *fmt, + UChar *result, + int32_t resultLength, + UErrorCode *status); + +/** +* Set the date & time patterns used by a UDateFormat set for relative date formatting. +* The patterns should follow the pattern syntax rules. +* @param format The formatter to set. +* @param datePattern The new date pattern +* @param datePatternLength The length of datePattern, or -1 if null-terminated. +* @param timePattern The new time pattern +* @param timePatternLength The length of timePattern, or -1 if null-terminated. +* @param status A pointer to a UErrorCode to receive any errors +* @see udat_toPatternRelativeDate, udat_toPatternRelativeTime +* @internal ICU 4.2 technology preview +*/ +U_INTERNAL void U_EXPORT2 +udat_applyPatternRelative(UDateFormat *format, + const UChar *datePattern, + int32_t datePatternLength, + const UChar *timePattern, + int32_t timePatternLength, + UErrorCode *status); + +/** + * @internal + * @see udat_open + */ +typedef UDateFormat* (U_EXPORT2 *UDateFormatOpener) (UDateFormatStyle timeStyle, + UDateFormatStyle dateStyle, + const char *locale, + const UChar *tzID, + int32_t tzIDLength, + const UChar *pattern, + int32_t patternLength, + UErrorCode *status); + +/** + * Register a provider factory + * @internal ICU 49 + */ +U_INTERNAL void U_EXPORT2 +udat_registerOpener(UDateFormatOpener opener, UErrorCode *status); + +/** + * Un-Register a provider factory + * @internal ICU 49 + */ +U_INTERNAL UDateFormatOpener U_EXPORT2 +udat_unregisterOpener(UDateFormatOpener opener, UErrorCode *status); +#endif /* U_HIDE_INTERNAL_API */ + + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/udateintervalformat.h b/intl/icu/source/i18n/unicode/udateintervalformat.h new file mode 100644 index 000000000..582c43789 --- /dev/null +++ b/intl/icu/source/i18n/unicode/udateintervalformat.h @@ -0,0 +1,186 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************************** +* Copyright (C) 2010-2012,2015 International Business Machines +* Corporation and others. All Rights Reserved. +***************************************************************************************** +*/ + +#ifndef UDATEINTERVALFORMAT_H +#define UDATEINTERVALFORMAT_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/umisc.h" +#include "unicode/localpointer.h" + +/** + * \file + * \brief C API: Format a date interval. + * + * A UDateIntervalFormat is used to format the range between two UDate values + * in a locale-sensitive way, using a skeleton that specifies the precision and + * completeness of the information to show. If the range smaller than the resolution + * specified by the skeleton, a single date format will be produced. If the range + * is larger than the format specified by the skeleton, a locale-specific fallback + * will be used to format the items missing from the skeleton. + * + * For example, if the range is 2010-03-04 07:56 - 2010-03-04 19:56 (12 hours) + * - The skeleton jm will produce + * for en_US, "7:56 AM - 7:56 PM" + * for en_GB, "7:56 - 19:56" + * - The skeleton MMMd will produce + * for en_US, "Mar 4" + * for en_GB, "4 Mar" + * If the range is 2010-03-04 07:56 - 2010-03-08 16:11 (4 days, 8 hours, 15 minutes) + * - The skeleton jm will produce + * for en_US, "3/4/2010 7:56 AM - 3/8/2010 4:11 PM" + * for en_GB, "4/3/2010 7:56 - 8/3/2010 16:11" + * - The skeleton MMMd will produce + * for en_US, "Mar 4-8" + * for en_GB, "4-8 Mar" + * + * Note: the "-" characters in the above sample output will actually be + * Unicode 2013, EN_DASH, in all but the last example. + * + * Note, in ICU 4.4 the standard skeletons for which date interval format data + * is usually available are as follows; best results will be obtained by using + * skeletons from this set, or those formed by combining these standard skeletons + * (note that for these skeletons, the length of digit field such as d, y, or + * M vs MM is irrelevant (but for non-digit fields such as MMM vs MMMM it is + * relevant). Note that a skeleton involving h or H generally explicitly requests + * that time style (12- or 24-hour time respectively). For a skeleton that + * requests the locale's default time style (h or H), use 'j' instead of h or H. + * h, H, hm, Hm, + * hv, Hv, hmv, Hmv, + * d, + * M, MMM, MMMM, + * Md, MMMd, + * MEd, MMMEd, + * y, + * yM, yMMM, yMMMM, + * yMd, yMMMd, + * yMEd, yMMMEd + * + * Locales for which ICU 4.4 seems to have a reasonable amount of this data + * include: + * af, am, ar, be, bg, bn, ca, cs, da, de (_AT), el, en (_AU,_CA,_GB,_IE,_IN...), + * eo, es (_AR,_CL,_CO,...,_US) et, fa, fi, fo, fr (_BE,_CH,_CA), fur, gsw, he, + * hr, hu, hy, is, it (_CH), ja, kk, km, ko, lt, lv, mk, ml, mt, nb, nl )_BE), + * nn, pl, pt (_PT), rm, ro, ru (_UA), sk, sl, so, sq, sr, sr_Latn, sv, th, to, + * tr, uk, ur, vi, zh (_SG), zh_Hant (_HK,_MO) + */ + +/** + * Opaque UDateIntervalFormat object for use in C programs. + * @stable ICU 4.8 + */ +struct UDateIntervalFormat; +typedef struct UDateIntervalFormat UDateIntervalFormat; /**< C typedef for struct UDateIntervalFormat. @stable ICU 4.8 */ + +/** + * Open a new UDateIntervalFormat object using the predefined rules for a + * given locale plus a specified skeleton. + * @param locale + * The locale for whose rules should be used; may be NULL for + * default locale. + * @param skeleton + * A pattern containing only the fields desired for the interval + * format, for example "Hm", "yMMMd", or "yMMMEdHm". + * @param skeletonLength + * The length of skeleton; may be -1 if the skeleton is zero-terminated. + * @param tzID + * A timezone ID specifying the timezone to use. If 0, use the default + * timezone. + * @param tzIDLength + * The length of tzID, or -1 if null-terminated. If 0, use the default + * timezone. + * @param status + * A pointer to a UErrorCode to receive any errors. + * @return + * A pointer to a UDateIntervalFormat object for the specified locale, + * or NULL if an error occurred. + * @stable ICU 4.8 + */ +U_STABLE UDateIntervalFormat* U_EXPORT2 +udtitvfmt_open(const char* locale, + const UChar* skeleton, + int32_t skeletonLength, + const UChar* tzID, + int32_t tzIDLength, + UErrorCode* status); + +/** + * Close a UDateIntervalFormat object. Once closed it may no longer be used. + * @param formatter + * The UDateIntervalFormat object to close. + * @stable ICU 4.8 + */ +U_STABLE void U_EXPORT2 +udtitvfmt_close(UDateIntervalFormat *formatter); + + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUDateIntervalFormatPointer + * "Smart pointer" class, closes a UDateIntervalFormat via udtitvfmt_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.8 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUDateIntervalFormatPointer, UDateIntervalFormat, udtitvfmt_close); + +U_NAMESPACE_END + +#endif + + +/** + * Formats a date/time range using the conventions established for the + * UDateIntervalFormat object. + * @param formatter + * The UDateIntervalFormat object specifying the format conventions. + * @param fromDate + * The starting point of the range. + * @param toDate + * The ending point of the range. + * @param result + * A pointer to a buffer to receive the formatted range. + * @param resultCapacity + * The maximum size of result. + * @param position + * A pointer to a UFieldPosition. On input, position->field is read. + * On output, position->beginIndex and position->endIndex indicate + * the beginning and ending indices of field number position->field, + * if such a field exists. This parameter may be NULL, in which case + * no field position data is returned. + * There may be multiple instances of a given field type in an + * interval format; in this case the position indices refer to the + * first instance. + * @param status + * A pointer to a UErrorCode to receive any errors. + * @return + * The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @stable ICU 4.8 + */ +U_STABLE int32_t U_EXPORT2 +udtitvfmt_format(const UDateIntervalFormat* formatter, + UDate fromDate, + UDate toDate, + UChar* result, + int32_t resultCapacity, + UFieldPosition* position, + UErrorCode* status); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/udatpg.h b/intl/icu/source/i18n/unicode/udatpg.h new file mode 100644 index 000000000..47d3afb1d --- /dev/null +++ b/intl/icu/source/i18n/unicode/udatpg.h @@ -0,0 +1,605 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2007-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: udatpg.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2007jul30 +* created by: Markus W. Scherer +*/ + +#ifndef __UDATPG_H__ +#define __UDATPG_H__ + +#include "unicode/utypes.h" +#include "unicode/uenum.h" +#include "unicode/localpointer.h" + +/** + * \file + * \brief C API: Wrapper for icu::DateTimePatternGenerator (unicode/dtptngen.h). + * + * UDateTimePatternGenerator provides flexible generation of date format patterns, + * like "yy-MM-dd". The user can build up the generator by adding successive + * patterns. Once that is done, a query can be made using a "skeleton", which is + * a pattern which just includes the desired fields and lengths. The generator + * will return the "best fit" pattern corresponding to that skeleton. + * <p>The main method people will use is udatpg_getBestPattern, since normally + * UDateTimePatternGenerator is pre-built with data from a particular locale. + * However, generators can be built directly from other data as well. + * <p><i>Issue: may be useful to also have a function that returns the list of + * fields in a pattern, in order, since we have that internally. + * That would be useful for getting the UI order of field elements.</i> + */ + +/** + * Opaque type for a date/time pattern generator object. + * @stable ICU 3.8 + */ +typedef void *UDateTimePatternGenerator; + +/** + * Field number constants for udatpg_getAppendItemFormats() and similar functions. + * These constants are separate from UDateFormatField despite semantic overlap + * because some fields are merged for the date/time pattern generator. + * @stable ICU 3.8 + */ +typedef enum UDateTimePatternField { + /** @stable ICU 3.8 */ + UDATPG_ERA_FIELD, + /** @stable ICU 3.8 */ + UDATPG_YEAR_FIELD, + /** @stable ICU 3.8 */ + UDATPG_QUARTER_FIELD, + /** @stable ICU 3.8 */ + UDATPG_MONTH_FIELD, + /** @stable ICU 3.8 */ + UDATPG_WEEK_OF_YEAR_FIELD, + /** @stable ICU 3.8 */ + UDATPG_WEEK_OF_MONTH_FIELD, + /** @stable ICU 3.8 */ + UDATPG_WEEKDAY_FIELD, + /** @stable ICU 3.8 */ + UDATPG_DAY_OF_YEAR_FIELD, + /** @stable ICU 3.8 */ + UDATPG_DAY_OF_WEEK_IN_MONTH_FIELD, + /** @stable ICU 3.8 */ + UDATPG_DAY_FIELD, + /** @stable ICU 3.8 */ + UDATPG_DAYPERIOD_FIELD, + /** @stable ICU 3.8 */ + UDATPG_HOUR_FIELD, + /** @stable ICU 3.8 */ + UDATPG_MINUTE_FIELD, + /** @stable ICU 3.8 */ + UDATPG_SECOND_FIELD, + /** @stable ICU 3.8 */ + UDATPG_FRACTIONAL_SECOND_FIELD, + /** @stable ICU 3.8 */ + UDATPG_ZONE_FIELD, + + // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + // it is needed for layout of DateTimePatternGenerator object. + /** + * One more than the highest normal UDateTimePatternField value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDATPG_FIELD_COUNT +} UDateTimePatternField; + +/** + * Masks to control forcing the length of specified fields in the returned + * pattern to match those in the skeleton (when this would not happen + * otherwise). These may be combined to force the length of multiple fields. + * Used with udatpg_getBestPatternWithOptions, udatpg_replaceFieldTypesWithOptions. + * @stable ICU 4.4 + */ +typedef enum UDateTimePatternMatchOptions { + /** @stable ICU 4.4 */ + UDATPG_MATCH_NO_OPTIONS = 0, + /** @stable ICU 4.4 */ + UDATPG_MATCH_HOUR_FIELD_LENGTH = 1 << UDATPG_HOUR_FIELD, +#ifndef U_HIDE_INTERNAL_API + /** @internal ICU 4.4 */ + UDATPG_MATCH_MINUTE_FIELD_LENGTH = 1 << UDATPG_MINUTE_FIELD, + /** @internal ICU 4.4 */ + UDATPG_MATCH_SECOND_FIELD_LENGTH = 1 << UDATPG_SECOND_FIELD, +#endif /* U_HIDE_INTERNAL_API */ + /** @stable ICU 4.4 */ + UDATPG_MATCH_ALL_FIELDS_LENGTH = (1 << UDATPG_FIELD_COUNT) - 1 +} UDateTimePatternMatchOptions; + +/** + * Status return values from udatpg_addPattern(). + * @stable ICU 3.8 + */ +typedef enum UDateTimePatternConflict { + /** @stable ICU 3.8 */ + UDATPG_NO_CONFLICT, + /** @stable ICU 3.8 */ + UDATPG_BASE_CONFLICT, + /** @stable ICU 3.8 */ + UDATPG_CONFLICT, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UDateTimePatternConflict value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDATPG_CONFLICT_COUNT +#endif // U_HIDE_DEPRECATED_API +} UDateTimePatternConflict; + +/** + * Open a generator according to a given locale. + * @param locale + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return a pointer to UDateTimePatternGenerator. + * @stable ICU 3.8 + */ +U_STABLE UDateTimePatternGenerator * U_EXPORT2 +udatpg_open(const char *locale, UErrorCode *pErrorCode); + +/** + * Open an empty generator, to be constructed with udatpg_addPattern(...) etc. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return a pointer to UDateTimePatternGenerator. + * @stable ICU 3.8 + */ +U_STABLE UDateTimePatternGenerator * U_EXPORT2 +udatpg_openEmpty(UErrorCode *pErrorCode); + +/** + * Close a generator. + * @param dtpg a pointer to UDateTimePatternGenerator. + * @stable ICU 3.8 + */ +U_STABLE void U_EXPORT2 +udatpg_close(UDateTimePatternGenerator *dtpg); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUDateTimePatternGeneratorPointer + * "Smart pointer" class, closes a UDateTimePatternGenerator via udatpg_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUDateTimePatternGeneratorPointer, UDateTimePatternGenerator, udatpg_close); + +U_NAMESPACE_END + +#endif + +/** + * Create a copy pf a generator. + * @param dtpg a pointer to UDateTimePatternGenerator to be copied. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return a pointer to a new UDateTimePatternGenerator. + * @stable ICU 3.8 + */ +U_STABLE UDateTimePatternGenerator * U_EXPORT2 +udatpg_clone(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode); + +/** + * Get the best pattern matching the input skeleton. It is guaranteed to + * have all of the fields in the skeleton. + * + * Note that this function uses a non-const UDateTimePatternGenerator: + * It uses a stateful pattern parser which is set up for each generator object, + * rather than creating one for each function call. + * Consecutive calls to this function do not affect each other, + * but this function cannot be used concurrently on a single generator object. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param skeleton + * The skeleton is a pattern containing only the variable fields. + * For example, "MMMdd" and "mmhh" are skeletons. + * @param length the length of skeleton + * @param bestPattern + * The best pattern found from the given skeleton. + * @param capacity the capacity of bestPattern. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return the length of bestPattern. + * @stable ICU 3.8 + */ +U_STABLE int32_t U_EXPORT2 +udatpg_getBestPattern(UDateTimePatternGenerator *dtpg, + const UChar *skeleton, int32_t length, + UChar *bestPattern, int32_t capacity, + UErrorCode *pErrorCode); + +/** + * Get the best pattern matching the input skeleton. It is guaranteed to + * have all of the fields in the skeleton. + * + * Note that this function uses a non-const UDateTimePatternGenerator: + * It uses a stateful pattern parser which is set up for each generator object, + * rather than creating one for each function call. + * Consecutive calls to this function do not affect each other, + * but this function cannot be used concurrently on a single generator object. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param skeleton + * The skeleton is a pattern containing only the variable fields. + * For example, "MMMdd" and "mmhh" are skeletons. + * @param length the length of skeleton + * @param options + * Options for forcing the length of specified fields in the + * returned pattern to match those in the skeleton (when this + * would not happen otherwise). For default behavior, use + * UDATPG_MATCH_NO_OPTIONS. + * @param bestPattern + * The best pattern found from the given skeleton. + * @param capacity + * the capacity of bestPattern. + * @param pErrorCode + * a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return the length of bestPattern. + * @stable ICU 4.4 + */ +U_STABLE int32_t U_EXPORT2 +udatpg_getBestPatternWithOptions(UDateTimePatternGenerator *dtpg, + const UChar *skeleton, int32_t length, + UDateTimePatternMatchOptions options, + UChar *bestPattern, int32_t capacity, + UErrorCode *pErrorCode); + +/** + * Get a unique skeleton from a given pattern. For example, + * both "MMM-dd" and "dd/MMM" produce the skeleton "MMMdd". + * + * Note that this function uses a non-const UDateTimePatternGenerator: + * It uses a stateful pattern parser which is set up for each generator object, + * rather than creating one for each function call. + * Consecutive calls to this function do not affect each other, + * but this function cannot be used concurrently on a single generator object. + * + * @param unusedDtpg a pointer to UDateTimePatternGenerator. + * This parameter is no longer used. Callers may pass NULL. + * @param pattern input pattern, such as "dd/MMM". + * @param length the length of pattern. + * @param skeleton such as "MMMdd" + * @param capacity the capacity of skeleton. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return the length of skeleton. + * @stable ICU 3.8 + */ +U_STABLE int32_t U_EXPORT2 +udatpg_getSkeleton(UDateTimePatternGenerator *unusedDtpg, + const UChar *pattern, int32_t length, + UChar *skeleton, int32_t capacity, + UErrorCode *pErrorCode); + +/** + * Get a unique base skeleton from a given pattern. This is the same + * as the skeleton, except that differences in length are minimized so + * as to only preserve the difference between string and numeric form. So + * for example, both "MMM-dd" and "d/MMM" produce the skeleton "MMMd" + * (notice the single d). + * + * Note that this function uses a non-const UDateTimePatternGenerator: + * It uses a stateful pattern parser which is set up for each generator object, + * rather than creating one for each function call. + * Consecutive calls to this function do not affect each other, + * but this function cannot be used concurrently on a single generator object. + * + * @param unusedDtpg a pointer to UDateTimePatternGenerator. + * This parameter is no longer used. Callers may pass NULL. + * @param pattern input pattern, such as "dd/MMM". + * @param length the length of pattern. + * @param baseSkeleton such as "Md" + * @param capacity the capacity of base skeleton. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return the length of baseSkeleton. + * @stable ICU 3.8 + */ +U_STABLE int32_t U_EXPORT2 +udatpg_getBaseSkeleton(UDateTimePatternGenerator *unusedDtpg, + const UChar *pattern, int32_t length, + UChar *baseSkeleton, int32_t capacity, + UErrorCode *pErrorCode); + +/** + * Adds a pattern to the generator. If the pattern has the same skeleton as + * an existing pattern, and the override parameter is set, then the previous + * value is overriden. Otherwise, the previous value is retained. In either + * case, the conflicting status is set and previous vale is stored in + * conflicting pattern. + * <p> + * Note that single-field patterns (like "MMM") are automatically added, and + * don't need to be added explicitly! + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param pattern input pattern, such as "dd/MMM" + * @param patternLength the length of pattern. + * @param override When existing values are to be overridden use true, + * otherwise use false. + * @param conflictingPattern Previous pattern with the same skeleton. + * @param capacity the capacity of conflictingPattern. + * @param pLength a pointer to the length of conflictingPattern. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return conflicting status. The value could be UDATPG_NO_CONFLICT, + * UDATPG_BASE_CONFLICT or UDATPG_CONFLICT. + * @stable ICU 3.8 + */ +U_STABLE UDateTimePatternConflict U_EXPORT2 +udatpg_addPattern(UDateTimePatternGenerator *dtpg, + const UChar *pattern, int32_t patternLength, + UBool override, + UChar *conflictingPattern, int32_t capacity, int32_t *pLength, + UErrorCode *pErrorCode); + +/** + * An AppendItem format is a pattern used to append a field if there is no + * good match. For example, suppose that the input skeleton is "GyyyyMMMd", + * and there is no matching pattern internally, but there is a pattern + * matching "yyyyMMMd", say "d-MM-yyyy". Then that pattern is used, plus the + * G. The way these two are conjoined is by using the AppendItemFormat for G + * (era). So if that value is, say "{0}, {1}" then the final resulting + * pattern is "d-MM-yyyy, G". + * <p> + * There are actually three available variables: {0} is the pattern so far, + * {1} is the element we are adding, and {2} is the name of the element. + * <p> + * This reflects the way that the CLDR data is organized. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param field UDateTimePatternField, such as UDATPG_ERA_FIELD + * @param value pattern, such as "{0}, {1}" + * @param length the length of value. + * @stable ICU 3.8 + */ +U_STABLE void U_EXPORT2 +udatpg_setAppendItemFormat(UDateTimePatternGenerator *dtpg, + UDateTimePatternField field, + const UChar *value, int32_t length); + +/** + * Getter corresponding to setAppendItemFormat. Values below 0 or at or + * above UDATPG_FIELD_COUNT are illegal arguments. + * + * @param dtpg A pointer to UDateTimePatternGenerator. + * @param field UDateTimePatternField, such as UDATPG_ERA_FIELD + * @param pLength A pointer that will receive the length of appendItemFormat. + * @return appendItemFormat for field. + * @stable ICU 3.8 + */ +U_STABLE const UChar * U_EXPORT2 +udatpg_getAppendItemFormat(const UDateTimePatternGenerator *dtpg, + UDateTimePatternField field, + int32_t *pLength); + +/** + * Set the name of field, eg "era" in English for ERA. These are only + * used if the corresponding AppendItemFormat is used, and if it contains a + * {2} variable. + * <p> + * This reflects the way that the CLDR data is organized. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param field UDateTimePatternField + * @param value name for the field. + * @param length the length of value. + * @stable ICU 3.8 + */ +U_STABLE void U_EXPORT2 +udatpg_setAppendItemName(UDateTimePatternGenerator *dtpg, + UDateTimePatternField field, + const UChar *value, int32_t length); + +/** + * Getter corresponding to setAppendItemNames. Values below 0 or at or above + * UDATPG_FIELD_COUNT are illegal arguments. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param field UDateTimePatternField, such as UDATPG_ERA_FIELD + * @param pLength A pointer that will receive the length of the name for field. + * @return name for field + * @stable ICU 3.8 + */ +U_STABLE const UChar * U_EXPORT2 +udatpg_getAppendItemName(const UDateTimePatternGenerator *dtpg, + UDateTimePatternField field, + int32_t *pLength); + +/** + * The DateTimeFormat is a message format pattern used to compose date and + * time patterns. The default pattern in the root locale is "{1} {0}", where + * {1} will be replaced by the date pattern and {0} will be replaced by the + * time pattern; however, other locales may specify patterns such as + * "{1}, {0}" or "{1} 'at' {0}", etc. + * <p> + * This is used when the input skeleton contains both date and time fields, + * but there is not a close match among the added patterns. For example, + * suppose that this object was created by adding "dd-MMM" and "hh:mm", and + * its DateTimeFormat is the default "{1} {0}". Then if the input skeleton + * is "MMMdhmm", there is not an exact match, so the input skeleton is + * broken up into two components "MMMd" and "hmm". There are close matches + * for those two skeletons, so the result is put together with this pattern, + * resulting in "d-MMM h:mm". + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param dtFormat + * message format pattern, here {1} will be replaced by the date + * pattern and {0} will be replaced by the time pattern. + * @param length the length of dtFormat. + * @stable ICU 3.8 + */ +U_STABLE void U_EXPORT2 +udatpg_setDateTimeFormat(const UDateTimePatternGenerator *dtpg, + const UChar *dtFormat, int32_t length); + +/** + * Getter corresponding to setDateTimeFormat. + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param pLength A pointer that will receive the length of the format + * @return dateTimeFormat. + * @stable ICU 3.8 + */ +U_STABLE const UChar * U_EXPORT2 +udatpg_getDateTimeFormat(const UDateTimePatternGenerator *dtpg, + int32_t *pLength); + +/** + * The decimal value is used in formatting fractions of seconds. If the + * skeleton contains fractional seconds, then this is used with the + * fractional seconds. For example, suppose that the input pattern is + * "hhmmssSSSS", and the best matching pattern internally is "H:mm:ss", and + * the decimal string is ",". Then the resulting pattern is modified to be + * "H:mm:ss,SSSS" + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param decimal + * @param length the length of decimal. + * @stable ICU 3.8 + */ +U_STABLE void U_EXPORT2 +udatpg_setDecimal(UDateTimePatternGenerator *dtpg, + const UChar *decimal, int32_t length); + +/** + * Getter corresponding to setDecimal. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param pLength A pointer that will receive the length of the decimal string. + * @return corresponding to the decimal point. + * @stable ICU 3.8 + */ +U_STABLE const UChar * U_EXPORT2 +udatpg_getDecimal(const UDateTimePatternGenerator *dtpg, + int32_t *pLength); + +/** + * Adjusts the field types (width and subtype) of a pattern to match what is + * in a skeleton. That is, if you supply a pattern like "d-M H:m", and a + * skeleton of "MMMMddhhmm", then the input pattern is adjusted to be + * "dd-MMMM hh:mm". This is used internally to get the best match for the + * input skeleton, but can also be used externally. + * + * Note that this function uses a non-const UDateTimePatternGenerator: + * It uses a stateful pattern parser which is set up for each generator object, + * rather than creating one for each function call. + * Consecutive calls to this function do not affect each other, + * but this function cannot be used concurrently on a single generator object. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param pattern Input pattern + * @param patternLength the length of input pattern. + * @param skeleton + * @param skeletonLength the length of input skeleton. + * @param dest pattern adjusted to match the skeleton fields widths and subtypes. + * @param destCapacity the capacity of dest. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return the length of dest. + * @stable ICU 3.8 + */ +U_STABLE int32_t U_EXPORT2 +udatpg_replaceFieldTypes(UDateTimePatternGenerator *dtpg, + const UChar *pattern, int32_t patternLength, + const UChar *skeleton, int32_t skeletonLength, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode); + +/** + * Adjusts the field types (width and subtype) of a pattern to match what is + * in a skeleton. That is, if you supply a pattern like "d-M H:m", and a + * skeleton of "MMMMddhhmm", then the input pattern is adjusted to be + * "dd-MMMM hh:mm". This is used internally to get the best match for the + * input skeleton, but can also be used externally. + * + * Note that this function uses a non-const UDateTimePatternGenerator: + * It uses a stateful pattern parser which is set up for each generator object, + * rather than creating one for each function call. + * Consecutive calls to this function do not affect each other, + * but this function cannot be used concurrently on a single generator object. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param pattern Input pattern + * @param patternLength the length of input pattern. + * @param skeleton + * @param skeletonLength the length of input skeleton. + * @param options + * Options controlling whether the length of specified fields in the + * pattern are adjusted to match those in the skeleton (when this + * would not happen otherwise). For default behavior, use + * UDATPG_MATCH_NO_OPTIONS. + * @param dest pattern adjusted to match the skeleton fields widths and subtypes. + * @param destCapacity the capacity of dest. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return the length of dest. + * @stable ICU 4.4 + */ +U_STABLE int32_t U_EXPORT2 +udatpg_replaceFieldTypesWithOptions(UDateTimePatternGenerator *dtpg, + const UChar *pattern, int32_t patternLength, + const UChar *skeleton, int32_t skeletonLength, + UDateTimePatternMatchOptions options, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode); + +/** + * Return a UEnumeration list of all the skeletons in canonical form. + * Call udatpg_getPatternForSkeleton() to get the corresponding pattern. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call + * @return a UEnumeration list of all the skeletons + * The caller must close the object. + * @stable ICU 3.8 + */ +U_STABLE UEnumeration * U_EXPORT2 +udatpg_openSkeletons(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode); + +/** + * Return a UEnumeration list of all the base skeletons in canonical form. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param pErrorCode a pointer to the UErrorCode which must not indicate a + * failure before the function call. + * @return a UEnumeration list of all the base skeletons + * The caller must close the object. + * @stable ICU 3.8 + */ +U_STABLE UEnumeration * U_EXPORT2 +udatpg_openBaseSkeletons(const UDateTimePatternGenerator *dtpg, UErrorCode *pErrorCode); + +/** + * Get the pattern corresponding to a given skeleton. + * + * @param dtpg a pointer to UDateTimePatternGenerator. + * @param skeleton + * @param skeletonLength pointer to the length of skeleton. + * @param pLength pointer to the length of return pattern. + * @return pattern corresponding to a given skeleton. + * @stable ICU 3.8 + */ +U_STABLE const UChar * U_EXPORT2 +udatpg_getPatternForSkeleton(const UDateTimePatternGenerator *dtpg, + const UChar *skeleton, int32_t skeletonLength, + int32_t *pLength); + +#endif diff --git a/intl/icu/source/i18n/unicode/ufieldpositer.h b/intl/icu/source/i18n/unicode/ufieldpositer.h new file mode 100644 index 000000000..8dfa3df5a --- /dev/null +++ b/intl/icu/source/i18n/unicode/ufieldpositer.h @@ -0,0 +1,121 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************************** +* Copyright (C) 2015-2016, International Business Machines +* Corporation and others. All Rights Reserved. +***************************************************************************************** +*/ + +#ifndef UFIELDPOSITER_H +#define UFIELDPOSITER_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/localpointer.h" + +/** + * \file + * \brief C API: UFieldPositionIterator for use with format APIs. + * + * Usage: + * ufieldpositer_open creates an empty (unset) UFieldPositionIterator. + * This can be passed to format functions such as {@link #udat_formatForFields}, + * which will set it to apply to the fields in a particular formatted string. + * ufieldpositer_next can then be used to iterate over those fields, + * providing for each field its type (using values that are specific to the + * particular format type, such as date or number formats), as well as the + * start and end positions of the field in the formatted string. + * A given UFieldPositionIterator can be re-used for different format calls; + * each such call resets it to apply to that format string. + * ufieldpositer_close should be called to dispose of the UFieldPositionIterator + * when it is no longer needed. + * + * @see FieldPositionIterator + */ + +/** + * Opaque UFieldPositionIterator object for use in C. + * @stable ICU 55 + */ +struct UFieldPositionIterator; +typedef struct UFieldPositionIterator UFieldPositionIterator; /**< C typedef for struct UFieldPositionIterator. @stable ICU 55 */ + +/** + * Open a new, unset UFieldPositionIterator object. + * @param status + * A pointer to a UErrorCode to receive any errors. + * @return + * A pointer to an empty (unset) UFieldPositionIterator object, + * or NULL if an error occurred. + * @stable ICU 55 + */ +U_STABLE UFieldPositionIterator* U_EXPORT2 +ufieldpositer_open(UErrorCode* status); + +/** + * Close a UFieldPositionIterator object. Once closed it may no longer be used. + * @param fpositer + * A pointer to the UFieldPositionIterator object to close. + * @stable ICU 55 + */ +U_STABLE void U_EXPORT2 +ufieldpositer_close(UFieldPositionIterator *fpositer); + + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUFieldPositionIteratorPointer + * "Smart pointer" class, closes a UFieldPositionIterator via ufieldpositer_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 55 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUFieldPositionIteratorPointer, UFieldPositionIterator, ufieldpositer_close); + +U_NAMESPACE_END + +#endif + +/** + * Get information for the next field in the formatted string to which this + * UFieldPositionIterator currently applies, or return a negative value if there + * are no more fields. + * @param fpositer + * A pointer to the UFieldPositionIterator object containing iteration + * state for the format fields. + * @param beginIndex + * A pointer to an int32_t to receive information about the start offset + * of the field in the formatted string (undefined if the function + * returns a negative value). May be NULL if this information is not needed. + * @param endIndex + * A pointer to an int32_t to receive information about the end offset + * of the field in the formatted string (undefined if the function + * returns a negative value). May be NULL if this information is not needed. + * @return + * The field type (non-negative value), or a negative value if there are + * no more fields for which to provide information. If negative, then any + * values pointed to by beginIndex and endIndex are undefined. + * + * The values for field type depend on what type of formatter the + * UFieldPositionIterator has been set by; for a date formatter, the + * values from the UDateFormatField enum. For more information, see the + * descriptions of format functions that take a UFieldPositionIterator* + * parameter, such as {@link #udat_formatForFields}. + * + * @stable ICU 55 + */ +U_STABLE int32_t U_EXPORT2 +ufieldpositer_next(UFieldPositionIterator *fpositer, + int32_t *beginIndex, int32_t *endIndex); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/uformattable.h b/intl/icu/source/i18n/unicode/uformattable.h new file mode 100644 index 000000000..e4683d56c --- /dev/null +++ b/intl/icu/source/i18n/unicode/uformattable.h @@ -0,0 +1,288 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 2013-2014, International Business Machines Corporation and others. +* All Rights Reserved. +******************************************************************************** +* +* File UFORMATTABLE.H +* +* Modification History: +* +* Date Name Description +* 2013 Jun 7 srl New +******************************************************************************** +*/ + +/** + * \file + * \brief C API: UFormattable is a thin wrapper for primitive types used for formatting and parsing. + * + * This is a C interface to the icu::Formattable class. Static functions on this class convert + * to and from this interface (via reinterpret_cast). Note that Formattables (and thus UFormattables) + * are mutable, and many operations (even getters) may actually modify the internal state. For this + * reason, UFormattables are not thread safe, and should not be shared between threads. + * + * See {@link unum_parseToUFormattable} for example code. + */ + +#ifndef UFORMATTABLE_H +#define UFORMATTABLE_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/localpointer.h" + +/** + * Enum designating the type of a UFormattable instance. + * Practically, this indicates which of the getters would return without conversion + * or error. + * @see icu::Formattable::Type + * @stable ICU 52 + */ +typedef enum UFormattableType { + UFMT_DATE = 0, /**< ufmt_getDate() will return without conversion. @see ufmt_getDate*/ + UFMT_DOUBLE, /**< ufmt_getDouble() will return without conversion. @see ufmt_getDouble*/ + UFMT_LONG, /**< ufmt_getLong() will return without conversion. @see ufmt_getLong */ + UFMT_STRING, /**< ufmt_getUChars() will return without conversion. @see ufmt_getUChars*/ + UFMT_ARRAY, /**< ufmt_countArray() and ufmt_getArray() will return the value. @see ufmt_getArrayItemByIndex */ + UFMT_INT64, /**< ufmt_getInt64() will return without conversion. @see ufmt_getInt64 */ + UFMT_OBJECT, /**< ufmt_getObject() will return without conversion. @see ufmt_getObject*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UFormattableType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UFMT_COUNT +#endif // U_HIDE_DEPRECATED_API +} UFormattableType; + + +/** + * Opaque type representing various types of data which may be used for formatting + * and parsing operations. + * @see icu::Formattable + * @stable ICU 52 + */ +typedef void *UFormattable; + +/** + * Initialize a UFormattable, to type UNUM_LONG, value 0 + * may return error if memory allocation failed. + * parameter status error code. + * See {@link unum_parseToUFormattable} for example code. + * @stable ICU 52 + * @return the new UFormattable + * @see ufmt_close + * @see icu::Formattable::Formattable() + */ +U_STABLE UFormattable* U_EXPORT2 +ufmt_open(UErrorCode* status); + +/** + * Cleanup any additional memory allocated by this UFormattable. + * @param fmt the formatter + * @stable ICU 52 + * @see ufmt_open + */ +U_STABLE void U_EXPORT2 +ufmt_close(UFormattable* fmt); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUFormattablePointer + * "Smart pointer" class, closes a UFormattable via ufmt_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 52 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUFormattablePointer, UFormattable, ufmt_close); + +U_NAMESPACE_END + +#endif + +/** + * Return the type of this object + * @param fmt the UFormattable object + * @param status status code - U_ILLEGAL_ARGUMENT_ERROR is returned if the UFormattable contains data not supported by + * the API + * @return the value as a UFormattableType + * @see ufmt_isNumeric + * @see icu::Formattable::getType() const + * @stable ICU 52 + */ +U_STABLE UFormattableType U_EXPORT2 +ufmt_getType(const UFormattable* fmt, UErrorCode *status); + +/** + * Return whether the object is numeric. + * @param fmt the UFormattable object + * @return true if the object is a double, long, or int64 value, else false. + * @see ufmt_getType + * @see icu::Formattable::isNumeric() const + * @stable ICU 52 + */ +U_STABLE UBool U_EXPORT2 +ufmt_isNumeric(const UFormattable* fmt); + +/** + * Gets the UDate value of this object. If the type is not of type UFMT_DATE, + * status is set to U_INVALID_FORMAT_ERROR and the return value is + * undefined. + * @param fmt the UFormattable object + * @param status the error code - any conversion or format errors + * @return the value + * @stable ICU 52 + * @see icu::Formattable::getDate(UErrorCode&) const + */ +U_STABLE UDate U_EXPORT2 +ufmt_getDate(const UFormattable* fmt, UErrorCode *status); + +/** + * Gets the double value of this object. If the type is not a UFMT_DOUBLE, or + * if there are additional significant digits than fit in a double type, + * a conversion is performed with possible loss of precision. + * If the type is UFMT_OBJECT and the + * object is a Measure, then the result of + * getNumber().getDouble(status) is returned. If this object is + * neither a numeric type nor a Measure, then 0 is returned and + * the status is set to U_INVALID_FORMAT_ERROR. + * @param fmt the UFormattable object + * @param status the error code - any conversion or format errors + * @return the value + * @stable ICU 52 + * @see icu::Formattable::getDouble(UErrorCode&) const + */ +U_STABLE double U_EXPORT2 +ufmt_getDouble(UFormattable* fmt, UErrorCode *status); + +/** + * Gets the long (int32_t) value of this object. If the magnitude is too + * large to fit in a long, then the maximum or minimum long value, + * as appropriate, is returned and the status is set to + * U_INVALID_FORMAT_ERROR. If this object is of type UFMT_INT64 and + * it fits within a long, then no precision is lost. If it is of + * type kDouble or kDecimalNumber, then a conversion is peformed, with + * truncation of any fractional part. If the type is UFMT_OBJECT and + * the object is a Measure, then the result of + * getNumber().getLong(status) is returned. If this object is + * neither a numeric type nor a Measure, then 0 is returned and + * the status is set to U_INVALID_FORMAT_ERROR. + * @param fmt the UFormattable object + * @param status the error code - any conversion or format errors + * @return the value + * @stable ICU 52 + * @see icu::Formattable::getLong(UErrorCode&) const + */ +U_STABLE int32_t U_EXPORT2 +ufmt_getLong(UFormattable* fmt, UErrorCode *status); + + +/** + * Gets the int64_t value of this object. If this object is of a numeric + * type and the magnitude is too large to fit in an int64, then + * the maximum or minimum int64 value, as appropriate, is returned + * and the status is set to U_INVALID_FORMAT_ERROR. If the + * magnitude fits in an int64, then a casting conversion is + * peformed, with truncation of any fractional part. If the type + * is UFMT_OBJECT and the object is a Measure, then the result of + * getNumber().getDouble(status) is returned. If this object is + * neither a numeric type nor a Measure, then 0 is returned and + * the status is set to U_INVALID_FORMAT_ERROR. + * @param fmt the UFormattable object + * @param status the error code - any conversion or format errors + * @return the value + * @stable ICU 52 + * @see icu::Formattable::getInt64(UErrorCode&) const + */ +U_STABLE int64_t U_EXPORT2 +ufmt_getInt64(UFormattable* fmt, UErrorCode *status); + +/** + * Returns a pointer to the UObject contained within this + * formattable (as a const void*), or NULL if this object + * is not of type UFMT_OBJECT. + * @param fmt the UFormattable object + * @param status the error code - any conversion or format errors + * @return the value as a const void*. It is a polymorphic C++ object. + * @stable ICU 52 + * @see icu::Formattable::getObject() const + */ +U_STABLE const void *U_EXPORT2 +ufmt_getObject(const UFormattable* fmt, UErrorCode *status); + +/** + * Gets the string value of this object as a UChar string. If the type is not a + * string, status is set to U_INVALID_FORMAT_ERROR and a NULL pointer is returned. + * This function is not thread safe and may modify the UFormattable if need be to terminate the string. + * The returned pointer is not valid if any other functions are called on this UFormattable, or if the UFormattable is closed. + * @param fmt the UFormattable object + * @param status the error code - any conversion or format errors + * @param len if non null, contains the string length on return + * @return the null terminated string value - must not be referenced after any other functions are called on this UFormattable. + * @stable ICU 52 + * @see icu::Formattable::getString(UnicodeString&)const + */ +U_STABLE const UChar* U_EXPORT2 +ufmt_getUChars(UFormattable* fmt, int32_t *len, UErrorCode *status); + +/** + * Get the number of array objects contained, if an array type UFMT_ARRAY + * @param fmt the UFormattable object + * @param status the error code - any conversion or format errors. U_ILLEGAL_ARGUMENT_ERROR if not an array type. + * @return the number of array objects or undefined if not an array type + * @stable ICU 52 + * @see ufmt_getArrayItemByIndex + */ +U_STABLE int32_t U_EXPORT2 +ufmt_getArrayLength(const UFormattable* fmt, UErrorCode *status); + +/** + * Get the specified value from the array of UFormattables. Invalid if the object is not an array type UFMT_ARRAY + * @param fmt the UFormattable object + * @param n the number of the array to return (0 based). + * @param status the error code - any conversion or format errors. Returns an error if n is out of bounds. + * @return the nth array value, only valid while the containing UFormattable is valid. NULL if not an array. + * @stable ICU 52 + * @see icu::Formattable::getArray(int32_t&, UErrorCode&) const + */ +U_STABLE UFormattable * U_EXPORT2 +ufmt_getArrayItemByIndex(UFormattable* fmt, int32_t n, UErrorCode *status); + +/** + * Returns a numeric string representation of the number contained within this + * formattable, or NULL if this object does not contain numeric type. + * For values obtained by parsing, the returned decimal number retains + * the full precision and range of the original input, unconstrained by + * the limits of a double floating point or a 64 bit int. + * + * This function is not thread safe, and therfore is not declared const, + * even though it is logically const. + * The resulting buffer is owned by the UFormattable and is invalid if any other functions are + * called on the UFormattable. + * + * Possible errors include U_MEMORY_ALLOCATION_ERROR, and + * U_INVALID_STATE if the formattable object has not been set to + * a numeric type. + * @param fmt the UFormattable object + * @param len if non-null, on exit contains the string length (not including the terminating null) + * @param status the error code + * @return the character buffer as a NULL terminated string, which is owned by the object and must not be accessed if any other functions are called on this object. + * @stable ICU 52 + * @see icu::Formattable::getDecimalNumber(UErrorCode&) + */ +U_STABLE const char * U_EXPORT2 +ufmt_getDecNumChars(UFormattable *fmt, int32_t *len, UErrorCode *status); + +#endif + +#endif diff --git a/intl/icu/source/i18n/unicode/ugender.h b/intl/icu/source/i18n/unicode/ugender.h new file mode 100644 index 000000000..c1e591ed2 --- /dev/null +++ b/intl/icu/source/i18n/unicode/ugender.h @@ -0,0 +1,84 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************************** +* Copyright (C) 2010-2013, International Business Machines +* Corporation and others. All Rights Reserved. +***************************************************************************************** +*/ + +#ifndef UGENDER_H +#define UGENDER_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/localpointer.h" + +/** + * \file + * \brief C API: The purpose of this API is to compute the gender of a list as a + * whole given the gender of each element. + * + */ + +/** + * Genders + * @stable ICU 50 + */ +enum UGender { + /** + * Male gender. + * @stable ICU 50 + */ + UGENDER_MALE, + /** + * Female gender. + * @stable ICU 50 + */ + UGENDER_FEMALE, + /** + * Neutral gender. + * @stable ICU 50 + */ + UGENDER_OTHER +}; +/** + * @stable ICU 50 + */ +typedef enum UGender UGender; + +/** + * Opaque UGenderInfo object for use in C programs. + * @stable ICU 50 + */ +struct UGenderInfo; +typedef struct UGenderInfo UGenderInfo; + +/** + * Opens a new UGenderInfo object given locale. + * @param locale The locale for which the rules are desired. + * @param status UErrorCode pointer + * @return A UGenderInfo for the specified locale, or NULL if an error occurred. + * @stable ICU 50 + */ +U_STABLE const UGenderInfo* U_EXPORT2 +ugender_getInstance(const char *locale, UErrorCode *status); + + +/** + * Given a list, returns the gender of the list as a whole. + * @param genderInfo pointer that ugender_getInstance returns. + * @param genders the gender of each element in the list. + * @param size the size of the list. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The gender of the list. + * @stable ICU 50 + */ +U_STABLE UGender U_EXPORT2 +ugender_getListGender(const UGenderInfo* genderinfo, const UGender *genders, int32_t size, UErrorCode *status); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/ulocdata.h b/intl/icu/source/i18n/unicode/ulocdata.h new file mode 100644 index 000000000..ecf6fdcb3 --- /dev/null +++ b/intl/icu/source/i18n/unicode/ulocdata.h @@ -0,0 +1,296 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* * +* Copyright (C) 2003-2015, International Business Machines * +* Corporation and others. All Rights Reserved. * +* * +****************************************************************************** +* file name: ulocdata.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003Oct21 +* created by: Ram Viswanadha +*/ + +#ifndef __ULOCDATA_H__ +#define __ULOCDATA_H__ + +#include "unicode/ures.h" +#include "unicode/uloc.h" +#include "unicode/uset.h" +#include "unicode/localpointer.h" + +/** + * \file + * \brief C API: Provides access to locale data. + */ + +/** Forward declaration of the ULocaleData structure. @stable ICU 3.6 */ +struct ULocaleData; + +/** A locale data object. @stable ICU 3.6 */ +typedef struct ULocaleData ULocaleData; + + + +/** The possible types of exemplar character sets. + * @stable ICU 3.4 + */ +typedef enum ULocaleDataExemplarSetType { + /** Basic set @stable ICU 3.4 */ + ULOCDATA_ES_STANDARD=0, + /** Auxiliary set @stable ICU 3.4 */ + ULOCDATA_ES_AUXILIARY=1, + /** Index Character set @stable ICU 4.8 */ + ULOCDATA_ES_INDEX=2, + /** Punctuation set @stable ICU 51 */ + ULOCDATA_ES_PUNCTUATION=3, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal ULocaleDataExemplarSetType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + ULOCDATA_ES_COUNT=4 +#endif // U_HIDE_DEPRECATED_API +} ULocaleDataExemplarSetType; + +/** The possible types of delimiters. + * @stable ICU 3.4 + */ +typedef enum ULocaleDataDelimiterType { + /** Quotation start @stable ICU 3.4 */ + ULOCDATA_QUOTATION_START = 0, + /** Quotation end @stable ICU 3.4 */ + ULOCDATA_QUOTATION_END = 1, + /** Alternate quotation start @stable ICU 3.4 */ + ULOCDATA_ALT_QUOTATION_START = 2, + /** Alternate quotation end @stable ICU 3.4 */ + ULOCDATA_ALT_QUOTATION_END = 3, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal ULocaleDataDelimiterType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + ULOCDATA_DELIMITER_COUNT = 4 +#endif // U_HIDE_DEPRECATED_API +} ULocaleDataDelimiterType; + +/** + * Opens a locale data object for the given locale + * + * @param localeID Specifies the locale associated with this locale + * data object. + * @param status Pointer to error status code. + * @stable ICU 3.4 + */ +U_STABLE ULocaleData* U_EXPORT2 +ulocdata_open(const char *localeID, UErrorCode *status); + +/** + * Closes a locale data object. + * + * @param uld The locale data object to close + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +ulocdata_close(ULocaleData *uld); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalULocaleDataPointer + * "Smart pointer" class, closes a ULocaleData via ulocdata_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleDataPointer, ULocaleData, ulocdata_close); + +U_NAMESPACE_END + +#endif + +/** + * Sets the "no Substitute" attribute of the locale data + * object. If true, then any methods associated with the + * locale data object will return null when there is no + * data available for that method, given the locale ID + * supplied to ulocdata_open(). + * + * @param uld The locale data object to set. + * @param setting Value of the "no substitute" attribute. + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +ulocdata_setNoSubstitute(ULocaleData *uld, UBool setting); + +/** + * Retrieves the current "no Substitute" value of the locale data + * object. If true, then any methods associated with the + * locale data object will return null when there is no + * data available for that method, given the locale ID + * supplied to ulocdata_open(). + * + * @param uld Pointer to the The locale data object to set. + * @return UBool Value of the "no substitute" attribute. + * @stable ICU 3.4 + */ +U_STABLE UBool U_EXPORT2 +ulocdata_getNoSubstitute(ULocaleData *uld); + +/** + * Returns the set of exemplar characters for a locale. + * + * @param uld Pointer to the locale data object from which the + * exemplar character set is to be retrieved. + * @param fillIn Pointer to a USet object to receive the + * exemplar character set for the given locale. Previous + * contents of fillIn are lost. <em>If fillIn is NULL, + * then a new USet is created and returned. The caller + * owns the result and must dispose of it by calling + * uset_close.</em> + * @param options Bitmask for options to apply to the exemplar pattern. + * Specify zero to retrieve the exemplar set as it is + * defined in the locale data. Specify + * USET_CASE_INSENSITIVE to retrieve a case-folded + * exemplar set. See uset_applyPattern for a complete + * list of valid options. The USET_IGNORE_SPACE bit is + * always set, regardless of the value of 'options'. + * @param extype Specifies the type of exemplar set to be retrieved. + * @param status Pointer to an input-output error code value; + * must not be NULL. Will be set to U_MISSING_RESOURCE_ERROR + * if the requested data is not available. + * @return USet* Either fillIn, or if fillIn is NULL, a pointer to + * a newly-allocated USet that the user must close. + * In case of error, NULL is returned. + * @stable ICU 3.4 + */ +U_STABLE USet* U_EXPORT2 +ulocdata_getExemplarSet(ULocaleData *uld, USet *fillIn, + uint32_t options, ULocaleDataExemplarSetType extype, UErrorCode *status); + +/** + * Returns one of the delimiter strings associated with a locale. + * + * @param uld Pointer to the locale data object from which the + * delimiter string is to be retrieved. + * @param type the type of delimiter to be retrieved. + * @param result A pointer to a buffer to receive the result. + * @param resultLength The maximum size of result. + * @param status Pointer to an error code value + * @return int32_t The total buffer size needed; if greater than resultLength, + * the output was truncated. + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +ulocdata_getDelimiter(ULocaleData *uld, ULocaleDataDelimiterType type, UChar *result, int32_t resultLength, UErrorCode *status); + +/** + * Enumeration for representing the measurement systems. + * @stable ICU 2.8 + */ +typedef enum UMeasurementSystem { + UMS_SI, /**< Measurement system specified by SI otherwise known as Metric system. @stable ICU 2.8 */ + UMS_US, /**< Measurement system followed in the United States of America. @stable ICU 2.8 */ + UMS_UK, /**< Mix of metric and imperial units used in Great Britain. @stable ICU 55 */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UMeasurementSystem value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UMS_LIMIT +#endif // U_HIDE_DEPRECATED_API +} UMeasurementSystem; + +/** + * Returns the measurement system used in the locale specified by the localeID. + * Please note that this API will change in ICU 3.6 and will use an ulocdata object. + * + * @param localeID The id of the locale for which the measurement system to be retrieved. + * @param status Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return UMeasurementSystem the measurement system used in the locale. + * @stable ICU 2.8 + */ +U_STABLE UMeasurementSystem U_EXPORT2 +ulocdata_getMeasurementSystem(const char *localeID, UErrorCode *status); + +/** + * Returns the element gives the normal business letter size, and customary units. + * The units for the numbers are always in <em>milli-meters</em>. + * For US since 8.5 and 11 do not yeild an integral value when converted to milli-meters, + * the values are rounded off. + * So for A4 size paper the height and width are 297 mm and 210 mm repectively, + * and for US letter size the height and width are 279 mm and 216 mm respectively. + * Please note that this API will change in ICU 3.6 and will use an ulocdata object. + * + * @param localeID The id of the locale for which the paper size information to be retrieved. + * @param height A pointer to int to recieve the height information. + * @param width A pointer to int to recieve the width information. + * @param status Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @stable ICU 2.8 + */ +U_STABLE void U_EXPORT2 +ulocdata_getPaperSize(const char *localeID, int32_t *height, int32_t *width, UErrorCode *status); + +/** + * Return the current CLDR version used by the library. + * @param versionArray fillin that will recieve the version number + * @param status error code - could be U_MISSING_RESOURCE_ERROR if the version was not found. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +ulocdata_getCLDRVersion(UVersionInfo versionArray, UErrorCode *status); + +/** + * Returns locale display pattern associated with a locale. + * + * @param uld Pointer to the locale data object from which the + * exemplar character set is to be retrieved. + * @param pattern locale display pattern for locale. + * @param patternCapacity the size of the buffer to store the locale display + * pattern with. + * @param status Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return the actual buffer size needed for localeDisplayPattern. If it's greater + * than patternCapacity, the returned pattern will be truncated. + * + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +ulocdata_getLocaleDisplayPattern(ULocaleData *uld, + UChar *pattern, + int32_t patternCapacity, + UErrorCode *status); + + +/** + * Returns locale separator associated with a locale. + * + * @param uld Pointer to the locale data object from which the + * exemplar character set is to be retrieved. + * @param separator locale separator for locale. + * @param separatorCapacity the size of the buffer to store the locale + * separator with. + * @param status Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return the actual buffer size needed for localeSeparator. If it's greater + * than separatorCapacity, the returned separator will be truncated. + * + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +ulocdata_getLocaleSeparator(ULocaleData *uld, + UChar *separator, + int32_t separatorCapacity, + UErrorCode *status); +#endif diff --git a/intl/icu/source/i18n/unicode/umsg.h b/intl/icu/source/i18n/unicode/umsg.h new file mode 100644 index 000000000..b5f2ae9dc --- /dev/null +++ b/intl/icu/source/i18n/unicode/umsg.h @@ -0,0 +1,625 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2011, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ******************************************************************** + * + * file name: umsg.h + * encoding: US-ASCII + * tab size: 8 (not used) + * indentation:4 + * + * Change history: + * + * 08/5/2001 Ram Added C wrappers for C++ API. + ********************************************************************/ + +#ifndef UMSG_H +#define UMSG_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/localpointer.h" +#include "unicode/uloc.h" +#include "unicode/parseerr.h" +#include <stdarg.h> + +/** + * \file + * \brief C API: MessageFormat + * + * <h2>MessageFormat C API </h2> + * + * <p>MessageFormat prepares strings for display to users, + * with optional arguments (variables/placeholders). + * The arguments can occur in any order, which is necessary for translation + * into languages with different grammars. + * + * <p>The opaque UMessageFormat type is a thin C wrapper around + * a C++ MessageFormat. It is constructed from a <em>pattern</em> string + * with arguments in {curly braces} which will be replaced by formatted values. + * + * <p>Currently, the C API supports only numbered arguments. + * + * <p>For details about the pattern syntax and behavior, + * especially about the ASCII apostrophe vs. the + * real apostrophe (single quote) character \htmlonly’\endhtmlonly (U+2019), + * see the C++ MessageFormat class documentation. + * + * <p>Here are some examples of C API usage: + * Example 1: + * <pre> + * \code + * UChar *result, *tzID, *str; + * UChar pattern[100]; + * int32_t resultLengthOut, resultlength; + * UCalendar *cal; + * UDate d1; + * UDateFormat *def1; + * UErrorCode status = U_ZERO_ERROR; + * + * str=(UChar*)malloc(sizeof(UChar) * (strlen("disturbance in force") +1)); + * u_uastrcpy(str, "disturbance in force"); + * tzID=(UChar*)malloc(sizeof(UChar) * 4); + * u_uastrcpy(tzID, "PST"); + * cal=ucal_open(tzID, u_strlen(tzID), "en_US", UCAL_TRADITIONAL, &status); + * ucal_setDateTime(cal, 1999, UCAL_MARCH, 18, 0, 0, 0, &status); + * d1=ucal_getMillis(cal, &status); + * u_uastrcpy(pattern, "On {0, date, long}, there was a {1} on planet {2,number,integer}"); + * resultlength=0; + * resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, d1, str, 7); + * if(status==U_BUFFER_OVERFLOW_ERROR){ + * status=U_ZERO_ERROR; + * resultlength=resultLengthOut+1; + * result=(UChar*)realloc(result, sizeof(UChar) * resultlength); + * u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, d1, str, 7); + * } + * printf("%s\n", austrdup(result) );//austrdup( a function used to convert UChar* to char*) + * //output>: "On March 18, 1999, there was a disturbance in force on planet 7 + * \endcode + * </pre> + * Typically, the message format will come from resources, and the + * arguments will be dynamically set at runtime. + * <P> + * Example 2: + * <pre> + * \code + * UChar* str; + * UErrorCode status = U_ZERO_ERROR; + * UChar *result; + * UChar pattern[100]; + * int32_t resultlength, resultLengthOut, i; + * double testArgs= { 100.0, 1.0, 0.0}; + * + * str=(UChar*)malloc(sizeof(UChar) * 10); + * u_uastrcpy(str, "MyDisk"); + * u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}"); + * for(i=0; i<3; i++){ + * resultlength=0; + * resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, testArgs[i], str); + * if(status==U_BUFFER_OVERFLOW_ERROR){ + * status=U_ZERO_ERROR; + * resultlength=resultLengthOut+1; + * result=(UChar*)malloc(sizeof(UChar) * resultlength); + * u_formatMessage( "en_US", pattern, u_strlen(pattern), result, resultlength, &status, testArgs[i], str); + * } + * printf("%s\n", austrdup(result) ); //austrdup( a function used to convert UChar* to char*) + * free(result); + * } + * // output, with different testArgs: + * // output: The disk "MyDisk" contains 100 files. + * // output: The disk "MyDisk" contains one file. + * // output: The disk "MyDisk" contains no files. + * \endcode + * </pre> + * + * + * Example 3: + * <pre> + * \code + * UChar* str; + * UChar* str1; + * UErrorCode status = U_ZERO_ERROR; + * UChar *result; + * UChar pattern[100]; + * UChar expected[100]; + * int32_t resultlength,resultLengthOut; + + * str=(UChar*)malloc(sizeof(UChar) * 25); + * u_uastrcpy(str, "Kirti"); + * str1=(UChar*)malloc(sizeof(UChar) * 25); + * u_uastrcpy(str1, "female"); + * log_verbose("Testing message format with Select test #1\n:"); + * u_uastrcpy(pattern, "{0} est {1, select, female {all\\u00E9e} other {all\\u00E9}} \\u00E0 Paris."); + * u_uastrcpy(expected, "Kirti est all\\u00E9e \\u00E0 Paris."); + * resultlength=0; + * resultLengthOut=u_formatMessage( "fr", pattern, u_strlen(pattern), NULL, resultlength, &status, str , str1); + * if(status==U_BUFFER_OVERFLOW_ERROR) + * { + * status=U_ZERO_ERROR; + * resultlength=resultLengthOut+1; + * result=(UChar*)malloc(sizeof(UChar) * resultlength); + * u_formatMessage( "fr", pattern, u_strlen(pattern), result, resultlength, &status, str , str1); + * if(u_strcmp(result, expected)==0) + * log_verbose("PASS: MessagFormat successful on Select test#1\n"); + * else{ + * log_err("FAIL: Error in MessageFormat on Select test#1\n GOT %s EXPECTED %s\n", austrdup(result), + * austrdup(expected) ); + * } + * free(result); + * } + * \endcode + * </pre> + */ + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param locale The locale for which the message will be formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments specified + * in pattern. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @see u_parseMessage + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_formatMessage(const char *locale, + const UChar *pattern, + int32_t patternLength, + UChar *result, + int32_t resultLength, + UErrorCode *status, + ...); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param locale The locale for which the message will be formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param ap A variable-length argument list containing the arguments specified + * @param status A pointer to an UErrorCode to receive any errors + * in pattern. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @see u_parseMessage + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_vformatMessage( const char *locale, + const UChar *pattern, + int32_t patternLength, + UChar *result, + int32_t resultLength, + va_list ap, + UErrorCode *status); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #u_formatMessage }. + * @param locale The locale for which the message is formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_parseMessage( const char *locale, + const UChar *pattern, + int32_t patternLength, + const UChar *source, + int32_t sourceLength, + UErrorCode *status, + ...); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #u_formatMessage }. + * @param locale The locale for which the message is formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param ap A variable-length argument list containing the arguments + * @param status A pointer to an UErrorCode to receive any errors + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_vparseMessage(const char *locale, + const UChar *pattern, + int32_t patternLength, + const UChar *source, + int32_t sourceLength, + va_list ap, + UErrorCode *status); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param locale The locale for which the message will be formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments specified + * in pattern. + * @param parseError A pointer to UParseError to receive information about errors + * occurred during parsing. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @see u_parseMessage + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_formatMessageWithError( const char *locale, + const UChar *pattern, + int32_t patternLength, + UChar *result, + int32_t resultLength, + UParseError *parseError, + UErrorCode *status, + ...); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param locale The locale for which the message will be formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param parseError A pointer to UParseError to receive information about errors + * occurred during parsing. + * @param ap A variable-length argument list containing the arguments specified + * @param status A pointer to an UErrorCode to receive any errors + * in pattern. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_vformatMessageWithError( const char *locale, + const UChar *pattern, + int32_t patternLength, + UChar *result, + int32_t resultLength, + UParseError* parseError, + va_list ap, + UErrorCode *status); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #u_formatMessage }. + * @param locale The locale for which the message is formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param parseError A pointer to UParseError to receive information about errors + * occurred during parsing. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_parseMessageWithError(const char *locale, + const UChar *pattern, + int32_t patternLength, + const UChar *source, + int32_t sourceLength, + UParseError *parseError, + UErrorCode *status, + ...); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #u_formatMessage }. + * @param locale The locale for which the message is formatted + * @param pattern The pattern specifying the message's format + * @param patternLength The length of pattern + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param ap A variable-length argument list containing the arguments + * @param parseError A pointer to UParseError to receive information about errors + * occurred during parsing. + * @param status A pointer to an UErrorCode to receive any errors + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_vparseMessageWithError(const char *locale, + const UChar *pattern, + int32_t patternLength, + const UChar *source, + int32_t sourceLength, + va_list ap, + UParseError *parseError, + UErrorCode* status); + +/*----------------------- New experimental API --------------------------- */ +/** + * The message format object + * @stable ICU 2.0 + */ +typedef void* UMessageFormat; + + +/** + * Open a message formatter with given pattern and for the given locale. + * @param pattern A pattern specifying the format to use. + * @param patternLength Length of the pattern to use + * @param locale The locale for which the messages are formatted. + * @param parseError A pointer to UParseError struct to receive any errors + * occured during parsing. Can be NULL. + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UMessageFormat to use for formatting + * messages, or 0 if an error occurred. + * @stable ICU 2.0 + */ +U_STABLE UMessageFormat* U_EXPORT2 +umsg_open( const UChar *pattern, + int32_t patternLength, + const char *locale, + UParseError *parseError, + UErrorCode *status); + +/** + * Close a UMessageFormat. + * Once closed, a UMessageFormat may no longer be used. + * @param format The formatter to close. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_close(UMessageFormat* format); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUMessageFormatPointer + * "Smart pointer" class, closes a UMessageFormat via umsg_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUMessageFormatPointer, UMessageFormat, umsg_close); + +U_NAMESPACE_END + +#endif + +/** + * Open a copy of a UMessageFormat. + * This function performs a deep copy. + * @param fmt The formatter to copy + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UDateFormat identical to fmt. + * @stable ICU 2.0 + */ +U_STABLE UMessageFormat U_EXPORT2 +umsg_clone(const UMessageFormat *fmt, + UErrorCode *status); + +/** + * Sets the locale. This locale is used for fetching default number or date + * format information. + * @param fmt The formatter to set + * @param locale The locale the formatter should use. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_setLocale(UMessageFormat *fmt, + const char* locale); + +/** + * Gets the locale. This locale is used for fetching default number or date + * format information. + * @param fmt The formatter to querry + * @return the locale. + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +umsg_getLocale(const UMessageFormat *fmt); + +/** + * Sets the pattern. + * @param fmt The formatter to use + * @param pattern The pattern to be applied. + * @param patternLength Length of the pattern to use + * @param parseError Struct to receive information on position + * of error if an error is encountered.Can be NULL. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_applyPattern( UMessageFormat *fmt, + const UChar* pattern, + int32_t patternLength, + UParseError* parseError, + UErrorCode* status); + +/** + * Gets the pattern. + * @param fmt The formatter to use + * @param result A pointer to a buffer to receive the pattern. + * @param resultLength The maximum size of result. + * @param status Output param set to success/failure code on + * exit. If the pattern is invalid, this will be + * set to a failure result. + * @return the pattern of the format + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +umsg_toPattern(const UMessageFormat *fmt, + UChar* result, + int32_t resultLength, + UErrorCode* status); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param fmt The formatter to use + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments + * specified in pattern. + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +umsg_format( const UMessageFormat *fmt, + UChar *result, + int32_t resultLength, + UErrorCode *status, + ...); + +/** + * Format a message for a locale. + * This function may perform re-ordering of the arguments depending on the + * locale. For all numeric arguments, double is assumed unless the type is + * explicitly integer. All choice format arguments must be of type double. + * @param fmt The formatter to use + * @param result A pointer to a buffer to receive the formatted message. + * @param resultLength The maximum size of result. + * @param ap A variable-length argument list containing the arguments + * @param status A pointer to an UErrorCode to receive any errors + * specified in pattern. + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +umsg_vformat( const UMessageFormat *fmt, + UChar *result, + int32_t resultLength, + va_list ap, + UErrorCode *status); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #umsg_format }. + * @param fmt The formatter to use + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param count Output param to receive number of elements returned. + * @param status A pointer to an UErrorCode to receive any errors + * @param ... A variable-length argument list containing the arguments + * specified in pattern. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_parse( const UMessageFormat *fmt, + const UChar *source, + int32_t sourceLength, + int32_t *count, + UErrorCode *status, + ...); + +/** + * Parse a message. + * For numeric arguments, this function will always use doubles. Integer types + * should not be passed. + * This function is not able to parse all output from {@link #umsg_format }. + * @param fmt The formatter to use + * @param source The text to parse. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param count Output param to receive number of elements returned. + * @param ap A variable-length argument list containing the arguments + * @param status A pointer to an UErrorCode to receive any errors + * specified in pattern. + * @see u_formatMessage + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +umsg_vparse(const UMessageFormat *fmt, + const UChar *source, + int32_t sourceLength, + int32_t *count, + va_list ap, + UErrorCode *status); + + +/** + * Convert an 'apostrophe-friendly' pattern into a standard + * pattern. Standard patterns treat all apostrophes as + * quotes, which is problematic in some languages, e.g. + * French, where apostrophe is commonly used. This utility + * assumes that only an unpaired apostrophe immediately before + * a brace is a true quote. Other unpaired apostrophes are paired, + * and the resulting standard pattern string is returned. + * + * <p><b>Note</b> it is not guaranteed that the returned pattern + * is indeed a valid pattern. The only effect is to convert + * between patterns having different quoting semantics. + * + * @param pattern the 'apostrophe-friendly' patttern to convert + * @param patternLength the length of pattern, or -1 if unknown and pattern is null-terminated + * @param dest the buffer for the result, or NULL if preflight only + * @param destCapacity the length of the buffer, or 0 if preflighting + * @param ec the error code + * @return the length of the resulting text, not including trailing null + * if buffer has room for the trailing null, it is provided, otherwise + * not + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +umsg_autoQuoteApostrophe(const UChar* pattern, + int32_t patternLength, + UChar* dest, + int32_t destCapacity, + UErrorCode* ec); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/unirepl.h b/intl/icu/source/i18n/unicode/unirepl.h new file mode 100644 index 000000000..1e98ff648 --- /dev/null +++ b/intl/icu/source/i18n/unicode/unirepl.h @@ -0,0 +1,99 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2005, International Business Machines Corporation +* and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 01/14/2002 aliu Creation. +********************************************************************** +*/ +#ifndef UNIREPL_H +#define UNIREPL_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: UnicodeReplacer + */ + +U_NAMESPACE_BEGIN + +class Replaceable; +class UnicodeString; +class UnicodeSet; + +/** + * <code>UnicodeReplacer</code> defines a protocol for objects that + * replace a range of characters in a Replaceable string with output + * text. The replacement is done via the Replaceable API so as to + * preserve out-of-band data. + * + * <p>This is a mixin class. + * @author Alan Liu + * @stable ICU 2.4 + */ +class U_I18N_API UnicodeReplacer /* not : public UObject because this is an interface/mixin class */ { + + public: + + /** + * Destructor. + * @stable ICU 2.4 + */ + virtual ~UnicodeReplacer(); + + /** + * Replace characters in 'text' from 'start' to 'limit' with the + * output text of this object. Update the 'cursor' parameter to + * give the cursor position and return the length of the + * replacement text. + * + * @param text the text to be matched + * @param start inclusive start index of text to be replaced + * @param limit exclusive end index of text to be replaced; + * must be greater than or equal to start + * @param cursor output parameter for the cursor position. + * Not all replacer objects will update this, but in a complete + * tree of replacer objects, representing the entire output side + * of a transliteration rule, at least one must update it. + * @return the number of 16-bit code units in the text replacing + * the characters at offsets start..(limit-1) in text + * @stable ICU 2.4 + */ + virtual int32_t replace(Replaceable& text, + int32_t start, + int32_t limit, + int32_t& cursor) = 0; + + /** + * Returns a string representation of this replacer. If the + * result of calling this function is passed to the appropriate + * parser, typically TransliteratorParser, it will produce another + * replacer that is equal to this one. + * @param result the string to receive the pattern. Previous + * contents will be deleted. + * @param escapeUnprintable if TRUE then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are defined by + * Utility.isUnprintable(). + * @return a reference to 'result'. + * @stable ICU 2.4 + */ + virtual UnicodeString& toReplacerPattern(UnicodeString& result, + UBool escapeUnprintable) const = 0; + + /** + * Union the set of all characters that may output by this object + * into the given set. + * @param toUnionTo the set into which to union the output characters + * @stable ICU 2.4 + */ + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const = 0; +}; + +U_NAMESPACE_END + +#endif diff --git a/intl/icu/source/i18n/unicode/unum.h b/intl/icu/source/i18n/unicode/unum.h new file mode 100644 index 000000000..34d54427f --- /dev/null +++ b/intl/icu/source/i18n/unicode/unum.h @@ -0,0 +1,1380 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1997-2015, International Business Machines Corporation and others. +* All Rights Reserved. +* Modification History: +* +* Date Name Description +* 06/24/99 helena Integrated Alan's NF enhancements and Java2 bug fixes +******************************************************************************* +*/ + +#ifndef _UNUM +#define _UNUM + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/localpointer.h" +#include "unicode/uloc.h" +#include "unicode/ucurr.h" +#include "unicode/umisc.h" +#include "unicode/parseerr.h" +#include "unicode/uformattable.h" +#include "unicode/udisplaycontext.h" + +/** + * \file + * \brief C API: NumberFormat + * + * <h2> Number Format C API </h2> + * + * Number Format C API Provides functions for + * formatting and parsing a number. Also provides methods for + * determining which locales have number formats, and what their names + * are. + * <P> + * UNumberFormat helps you to format and parse numbers for any locale. + * Your code can be completely independent of the locale conventions + * for decimal points, thousands-separators, or even the particular + * decimal digits used, or whether the number format is even decimal. + * There are different number format styles like decimal, currency, + * percent and spellout. + * <P> + * To format a number for the current Locale, use one of the static + * factory methods: + * <pre> + * \code + * UChar myString[20]; + * double myNumber = 7.0; + * UErrorCode status = U_ZERO_ERROR; + * UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status); + * unum_formatDouble(nf, myNumber, myString, 20, NULL, &status); + * printf(" Example 1: %s\n", austrdup(myString) ); //austrdup( a function used to convert UChar* to char*) + * \endcode + * </pre> + * If you are formatting multiple numbers, it is more efficient to get + * the format and use it multiple times so that the system doesn't + * have to fetch the information about the local language and country + * conventions multiple times. + * <pre> + * \code + * uint32_t i, resultlength, reslenneeded; + * UErrorCode status = U_ZERO_ERROR; + * UFieldPosition pos; + * uint32_t a[] = { 123, 3333, -1234567 }; + * const uint32_t a_len = sizeof(a) / sizeof(a[0]); + * UNumberFormat* nf; + * UChar* result = NULL; + * + * nf = unum_open(UNUM_DEFAULT, NULL, -1, NULL, NULL, &status); + * for (i = 0; i < a_len; i++) { + * resultlength=0; + * reslenneeded=unum_format(nf, a[i], NULL, resultlength, &pos, &status); + * result = NULL; + * if(status==U_BUFFER_OVERFLOW_ERROR){ + * status=U_ZERO_ERROR; + * resultlength=reslenneeded+1; + * result=(UChar*)malloc(sizeof(UChar) * resultlength); + * unum_format(nf, a[i], result, resultlength, &pos, &status); + * } + * printf( " Example 2: %s\n", austrdup(result)); + * free(result); + * } + * \endcode + * </pre> + * To format a number for a different Locale, specify it in the + * call to unum_open(). + * <pre> + * \code + * UNumberFormat* nf = unum_open(UNUM_DEFAULT, NULL, -1, "fr_FR", NULL, &success) + * \endcode + * </pre> + * You can use a NumberFormat API unum_parse() to parse. + * <pre> + * \code + * UErrorCode status = U_ZERO_ERROR; + * int32_t pos=0; + * int32_t num; + * num = unum_parse(nf, str, u_strlen(str), &pos, &status); + * \endcode + * </pre> + * Use UNUM_DECIMAL to get the normal number format for that country. + * There are other static options available. Use UNUM_CURRENCY + * to get the currency number format for that country. Use UNUM_PERCENT + * to get a format for displaying percentages. With this format, a + * fraction from 0.53 is displayed as 53%. + * <P> + * Use a pattern to create either a DecimalFormat or a RuleBasedNumberFormat + * formatter. The pattern must conform to the syntax defined for those + * formatters. + * <P> + * You can also control the display of numbers with such function as + * unum_getAttributes() and unum_setAttributes(), which let you set the + * miminum fraction digits, grouping, etc. + * @see UNumberFormatAttributes for more details + * <P> + * You can also use forms of the parse and format methods with + * ParsePosition and UFieldPosition to allow you to: + * <ul type=round> + * <li>(a) progressively parse through pieces of a string. + * <li>(b) align the decimal point and other areas. + * </ul> + * <p> + * It is also possible to change or set the symbols used for a particular + * locale like the currency symbol, the grouping seperator , monetary seperator + * etc by making use of functions unum_setSymbols() and unum_getSymbols(). + */ + +/** A number formatter. + * For usage in C programs. + * @stable ICU 2.0 + */ +typedef void* UNumberFormat; + +/** The possible number format styles. + * @stable ICU 2.0 + */ +typedef enum UNumberFormatStyle { + /** + * Decimal format defined by a pattern string. + * @stable ICU 3.0 + */ + UNUM_PATTERN_DECIMAL=0, + /** + * Decimal format ("normal" style). + * @stable ICU 2.0 + */ + UNUM_DECIMAL=1, + /** + * Currency format (generic). + * Defaults to UNUM_CURRENCY_STANDARD style + * (using currency symbol, e.g., "$1.00", with non-accounting + * style for negative values e.g. using minus sign). + * The specific style may be specified using the -cf- locale key. + * @stable ICU 2.0 + */ + UNUM_CURRENCY=2, + /** + * Percent format + * @stable ICU 2.0 + */ + UNUM_PERCENT=3, + /** + * Scientific format + * @stable ICU 2.1 + */ + UNUM_SCIENTIFIC=4, + /** + * Spellout rule-based format. The default ruleset can be specified/changed using + * unum_setTextAttribute with UNUM_DEFAULT_RULESET; the available public rulesets + * can be listed using unum_getTextAttribute with UNUM_PUBLIC_RULESETS. + * @stable ICU 2.0 + */ + UNUM_SPELLOUT=5, + /** + * Ordinal rule-based format . The default ruleset can be specified/changed using + * unum_setTextAttribute with UNUM_DEFAULT_RULESET; the available public rulesets + * can be listed using unum_getTextAttribute with UNUM_PUBLIC_RULESETS. + * @stable ICU 3.0 + */ + UNUM_ORDINAL=6, + /** + * Duration rule-based format + * @stable ICU 3.0 + */ + UNUM_DURATION=7, + /** + * Numbering system rule-based format + * @stable ICU 4.2 + */ + UNUM_NUMBERING_SYSTEM=8, + /** + * Rule-based format defined by a pattern string. + * @stable ICU 3.0 + */ + UNUM_PATTERN_RULEBASED=9, + /** + * Currency format with an ISO currency code, e.g., "USD1.00". + * @stable ICU 4.8 + */ + UNUM_CURRENCY_ISO=10, + /** + * Currency format with a pluralized currency name, + * e.g., "1.00 US dollar" and "3.00 US dollars". + * @stable ICU 4.8 + */ + UNUM_CURRENCY_PLURAL=11, + /** + * Currency format for accounting, e.g., "($3.00)" for + * negative currency amount instead of "-$3.00" ({@link #UNUM_CURRENCY}). + * Overrides any style specified using -cf- key in locale. + * @stable ICU 53 + */ + UNUM_CURRENCY_ACCOUNTING=12, + /** + * Currency format with a currency symbol given CASH usage, e.g., + * "NT$3" instead of "NT$3.23". + * @stable ICU 54 + */ + UNUM_CASH_CURRENCY=13, + /** + * Decimal format expressed using compact notation + * (short form, corresponds to UNumberCompactStyle=UNUM_SHORT) + * e.g. "23K", "45B" + * @stable ICU 56 + */ + UNUM_DECIMAL_COMPACT_SHORT=14, + /** + * Decimal format expressed using compact notation + * (long form, corresponds to UNumberCompactStyle=UNUM_LONG) + * e.g. "23 thousand", "45 billion" + * @stable ICU 56 + */ + UNUM_DECIMAL_COMPACT_LONG=15, + /** + * Currency format with a currency symbol, e.g., "$1.00", + * using non-accounting style for negative values (e.g. minus sign). + * Overrides any style specified using -cf- key in locale. + * @stable ICU 56 + */ + UNUM_CURRENCY_STANDARD=16, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UNumberFormatStyle value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UNUM_FORMAT_STYLE_COUNT=17, +#endif // U_HIDE_DEPRECATED_API + + /** + * Default format + * @stable ICU 2.0 + */ + UNUM_DEFAULT = UNUM_DECIMAL, + /** + * Alias for UNUM_PATTERN_DECIMAL + * @stable ICU 3.0 + */ + UNUM_IGNORE = UNUM_PATTERN_DECIMAL +} UNumberFormatStyle; + +/** The possible number format rounding modes. + * @stable ICU 2.0 + */ +typedef enum UNumberFormatRoundingMode { + UNUM_ROUND_CEILING, + UNUM_ROUND_FLOOR, + UNUM_ROUND_DOWN, + UNUM_ROUND_UP, + /** + * Half-even rounding + * @stable, ICU 3.8 + */ + UNUM_ROUND_HALFEVEN, +#ifndef U_HIDE_DEPRECATED_API + /** + * Half-even rounding, misspelled name + * @deprecated, ICU 3.8 + */ + UNUM_FOUND_HALFEVEN = UNUM_ROUND_HALFEVEN, +#endif /* U_HIDE_DEPRECATED_API */ + UNUM_ROUND_HALFDOWN = UNUM_ROUND_HALFEVEN + 1, + UNUM_ROUND_HALFUP, + /** + * ROUND_UNNECESSARY reports an error if formatted result is not exact. + * @stable ICU 4.8 + */ + UNUM_ROUND_UNNECESSARY +} UNumberFormatRoundingMode; + +/** The possible number format pad positions. + * @stable ICU 2.0 + */ +typedef enum UNumberFormatPadPosition { + UNUM_PAD_BEFORE_PREFIX, + UNUM_PAD_AFTER_PREFIX, + UNUM_PAD_BEFORE_SUFFIX, + UNUM_PAD_AFTER_SUFFIX +} UNumberFormatPadPosition; + +/** + * Constants for specifying short or long format. + * @stable ICU 51 + */ +typedef enum UNumberCompactStyle { + /** @stable ICU 51 */ + UNUM_SHORT, + /** @stable ICU 51 */ + UNUM_LONG + /** @stable ICU 51 */ +} UNumberCompactStyle; + +/** + * Constants for specifying currency spacing + * @stable ICU 4.8 + */ +enum UCurrencySpacing { + /** @stable ICU 4.8 */ + UNUM_CURRENCY_MATCH, + /** @stable ICU 4.8 */ + UNUM_CURRENCY_SURROUNDING_MATCH, + /** @stable ICU 4.8 */ + UNUM_CURRENCY_INSERT, + + // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, + // it is needed for layout of DecimalFormatSymbols object. + /** + * One more than the highest normal UCurrencySpacing value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UNUM_CURRENCY_SPACING_COUNT +}; +typedef enum UCurrencySpacing UCurrencySpacing; /**< @stable ICU 4.8 */ + + +/** + * FieldPosition and UFieldPosition selectors for format fields + * defined by NumberFormat and UNumberFormat. + * @stable ICU 49 + */ +typedef enum UNumberFormatFields { + /** @stable ICU 49 */ + UNUM_INTEGER_FIELD, + /** @stable ICU 49 */ + UNUM_FRACTION_FIELD, + /** @stable ICU 49 */ + UNUM_DECIMAL_SEPARATOR_FIELD, + /** @stable ICU 49 */ + UNUM_EXPONENT_SYMBOL_FIELD, + /** @stable ICU 49 */ + UNUM_EXPONENT_SIGN_FIELD, + /** @stable ICU 49 */ + UNUM_EXPONENT_FIELD, + /** @stable ICU 49 */ + UNUM_GROUPING_SEPARATOR_FIELD, + /** @stable ICU 49 */ + UNUM_CURRENCY_FIELD, + /** @stable ICU 49 */ + UNUM_PERCENT_FIELD, + /** @stable ICU 49 */ + UNUM_PERMILL_FIELD, + /** @stable ICU 49 */ + UNUM_SIGN_FIELD, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UNumberFormatFields value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UNUM_FIELD_COUNT +#endif // U_HIDE_DEPRECATED_API +} UNumberFormatFields; + + +/** + * Create and return a new UNumberFormat for formatting and parsing + * numbers. A UNumberFormat may be used to format numbers by calling + * {@link #unum_format }, and to parse numbers by calling {@link #unum_parse }. + * The caller must call {@link #unum_close } when done to release resources + * used by this object. + * @param style The type of number format to open: one of + * UNUM_DECIMAL, UNUM_CURRENCY, UNUM_PERCENT, UNUM_SCIENTIFIC, + * UNUM_CURRENCY_ISO, UNUM_CURRENCY_PLURAL, UNUM_SPELLOUT, + * UNUM_ORDINAL, UNUM_DURATION, UNUM_NUMBERING_SYSTEM, + * UNUM_PATTERN_DECIMAL, UNUM_PATTERN_RULEBASED, or UNUM_DEFAULT. + * If UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED is passed then the + * number format is opened using the given pattern, which must conform + * to the syntax described in DecimalFormat or RuleBasedNumberFormat, + * respectively. + * @param pattern A pattern specifying the format to use. + * This parameter is ignored unless the style is + * UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED. + * @param patternLength The number of characters in the pattern, or -1 + * if null-terminated. This parameter is ignored unless the style is + * UNUM_PATTERN. + * @param locale A locale identifier to use to determine formatting + * and parsing conventions, or NULL to use the default locale. + * @param parseErr A pointer to a UParseError struct to receive the + * details of any parsing errors, or NULL if no parsing error details + * are desired. + * @param status A pointer to an input-output UErrorCode. + * @return A pointer to a newly created UNumberFormat, or NULL if an + * error occurred. + * @see unum_close + * @see DecimalFormat + * @stable ICU 2.0 + */ +U_STABLE UNumberFormat* U_EXPORT2 +unum_open( UNumberFormatStyle style, + const UChar* pattern, + int32_t patternLength, + const char* locale, + UParseError* parseErr, + UErrorCode* status); + + +/** +* Close a UNumberFormat. +* Once closed, a UNumberFormat may no longer be used. +* @param fmt The formatter to close. +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +unum_close(UNumberFormat* fmt); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUNumberFormatPointer + * "Smart pointer" class, closes a UNumberFormat via unum_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUNumberFormatPointer, UNumberFormat, unum_close); + +U_NAMESPACE_END + +#endif + +/** + * Open a copy of a UNumberFormat. + * This function performs a deep copy. + * @param fmt The format to copy + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UNumberFormat identical to fmt. + * @stable ICU 2.0 + */ +U_STABLE UNumberFormat* U_EXPORT2 +unum_clone(const UNumberFormat *fmt, + UErrorCode *status); + +/** +* Format an integer using a UNumberFormat. +* The integer will be formatted according to the UNumberFormat's locale. +* @param fmt The formatter to use. +* @param number The number to format. +* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If +* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength) +* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number +* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR. +* @param resultLength The maximum size of result. +* @param pos A pointer to a UFieldPosition. On input, position->field +* is read. On output, position->beginIndex and position->endIndex indicate +* the beginning and ending indices of field number position->field, if such +* a field exists. This parameter may be NULL, in which case no field +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see unum_formatInt64 +* @see unum_formatDouble +* @see unum_parse +* @see unum_parseInt64 +* @see unum_parseDouble +* @see UFieldPosition +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +unum_format( const UNumberFormat* fmt, + int32_t number, + UChar* result, + int32_t resultLength, + UFieldPosition *pos, + UErrorCode* status); + +/** +* Format an int64 using a UNumberFormat. +* The int64 will be formatted according to the UNumberFormat's locale. +* @param fmt The formatter to use. +* @param number The number to format. +* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If +* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength) +* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number +* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR. +* @param resultLength The maximum size of result. +* @param pos A pointer to a UFieldPosition. On input, position->field +* is read. On output, position->beginIndex and position->endIndex indicate +* the beginning and ending indices of field number position->field, if such +* a field exists. This parameter may be NULL, in which case no field +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see unum_format +* @see unum_formatDouble +* @see unum_parse +* @see unum_parseInt64 +* @see unum_parseDouble +* @see UFieldPosition +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +unum_formatInt64(const UNumberFormat *fmt, + int64_t number, + UChar* result, + int32_t resultLength, + UFieldPosition *pos, + UErrorCode* status); + +/** +* Format a double using a UNumberFormat. +* The double will be formatted according to the UNumberFormat's locale. +* @param fmt The formatter to use. +* @param number The number to format. +* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If +* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength) +* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number +* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR. +* @param resultLength The maximum size of result. +* @param pos A pointer to a UFieldPosition. On input, position->field +* is read. On output, position->beginIndex and position->endIndex indicate +* the beginning and ending indices of field number position->field, if such +* a field exists. This parameter may be NULL, in which case no field +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see unum_format +* @see unum_formatInt64 +* @see unum_parse +* @see unum_parseInt64 +* @see unum_parseDouble +* @see UFieldPosition +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +unum_formatDouble( const UNumberFormat* fmt, + double number, + UChar* result, + int32_t resultLength, + UFieldPosition *pos, /* 0 if ignore */ + UErrorCode* status); + +/** +* Format a decimal number using a UNumberFormat. +* The number will be formatted according to the UNumberFormat's locale. +* The syntax of the input number is a "numeric string" +* as defined in the Decimal Arithmetic Specification, available at +* http://speleotrove.com/decimal +* @param fmt The formatter to use. +* @param number The number to format. +* @param length The length of the input number, or -1 if the input is nul-terminated. +* @param result A pointer to a buffer to receive the NULL-terminated formatted number. If +* the formatted number fits into dest but cannot be NULL-terminated (length == resultLength) +* then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number +* doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR. +* @param resultLength The maximum size of result. +* @param pos A pointer to a UFieldPosition. On input, position->field +* is read. On output, position->beginIndex and position->endIndex indicate +* the beginning and ending indices of field number position->field, if such +* a field exists. This parameter may be NULL, in which case it is ignored. +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see unum_format +* @see unum_formatInt64 +* @see unum_parse +* @see unum_parseInt64 +* @see unum_parseDouble +* @see UFieldPosition +* @stable ICU 4.4 +*/ +U_STABLE int32_t U_EXPORT2 +unum_formatDecimal( const UNumberFormat* fmt, + const char * number, + int32_t length, + UChar* result, + int32_t resultLength, + UFieldPosition *pos, /* 0 if ignore */ + UErrorCode* status); + +/** + * Format a double currency amount using a UNumberFormat. + * The double will be formatted according to the UNumberFormat's locale. + * @param fmt the formatter to use + * @param number the number to format + * @param currency the 3-letter null-terminated ISO 4217 currency code + * @param result A pointer to a buffer to receive the NULL-terminated formatted number. If + * the formatted number fits into dest but cannot be NULL-terminated (length == resultLength) + * then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number + * doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * @param resultLength the maximum number of UChars to write to result + * @param pos a pointer to a UFieldPosition. On input, + * position->field is read. On output, position->beginIndex and + * position->endIndex indicate the beginning and ending indices of + * field number position->field, if such a field exists. This + * parameter may be NULL, in which case it is ignored. + * @param status a pointer to an input-output UErrorCode + * @return the total buffer size needed; if greater than resultLength, + * the output was truncated. + * @see unum_formatDouble + * @see unum_parseDoubleCurrency + * @see UFieldPosition + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +unum_formatDoubleCurrency(const UNumberFormat* fmt, + double number, + UChar* currency, + UChar* result, + int32_t resultLength, + UFieldPosition* pos, + UErrorCode* status); + +/** + * Format a UFormattable into a string. + * @param fmt the formatter to use + * @param number the number to format, as a UFormattable + * @param result A pointer to a buffer to receive the NULL-terminated formatted number. If + * the formatted number fits into dest but cannot be NULL-terminated (length == resultLength) + * then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the formatted number + * doesn't fit into result then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * @param resultLength the maximum number of UChars to write to result + * @param pos a pointer to a UFieldPosition. On input, + * position->field is read. On output, position->beginIndex and + * position->endIndex indicate the beginning and ending indices of + * field number position->field, if such a field exists. This + * parameter may be NULL, in which case it is ignored. + * @param status a pointer to an input-output UErrorCode + * @return the total buffer size needed; if greater than resultLength, + * the output was truncated. Will return 0 on error. + * @see unum_parseToUFormattable + * @stable ICU 52 + */ +U_STABLE int32_t U_EXPORT2 +unum_formatUFormattable(const UNumberFormat* fmt, + const UFormattable *number, + UChar *result, + int32_t resultLength, + UFieldPosition *pos, + UErrorCode *status); + +/** +* Parse a string into an integer using a UNumberFormat. +* The string will be parsed according to the UNumberFormat's locale. +* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT +* and UNUM_DECIMAL_COMPACT_LONG. +* @param fmt The formatter to use. +* @param text The text to parse. +* @param textLength The length of text, or -1 if null-terminated. +* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which +* to begin parsing. If not NULL, on output the offset at which parsing ended. +* @param status A pointer to an UErrorCode to receive any errors +* @return The value of the parsed integer +* @see unum_parseInt64 +* @see unum_parseDouble +* @see unum_format +* @see unum_formatInt64 +* @see unum_formatDouble +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +unum_parse( const UNumberFormat* fmt, + const UChar* text, + int32_t textLength, + int32_t *parsePos /* 0 = start */, + UErrorCode *status); + +/** +* Parse a string into an int64 using a UNumberFormat. +* The string will be parsed according to the UNumberFormat's locale. +* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT +* and UNUM_DECIMAL_COMPACT_LONG. +* @param fmt The formatter to use. +* @param text The text to parse. +* @param textLength The length of text, or -1 if null-terminated. +* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which +* to begin parsing. If not NULL, on output the offset at which parsing ended. +* @param status A pointer to an UErrorCode to receive any errors +* @return The value of the parsed integer +* @see unum_parse +* @see unum_parseDouble +* @see unum_format +* @see unum_formatInt64 +* @see unum_formatDouble +* @stable ICU 2.8 +*/ +U_STABLE int64_t U_EXPORT2 +unum_parseInt64(const UNumberFormat* fmt, + const UChar* text, + int32_t textLength, + int32_t *parsePos /* 0 = start */, + UErrorCode *status); + +/** +* Parse a string into a double using a UNumberFormat. +* The string will be parsed according to the UNumberFormat's locale. +* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT +* and UNUM_DECIMAL_COMPACT_LONG. +* @param fmt The formatter to use. +* @param text The text to parse. +* @param textLength The length of text, or -1 if null-terminated. +* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which +* to begin parsing. If not NULL, on output the offset at which parsing ended. +* @param status A pointer to an UErrorCode to receive any errors +* @return The value of the parsed double +* @see unum_parse +* @see unum_parseInt64 +* @see unum_format +* @see unum_formatInt64 +* @see unum_formatDouble +* @stable ICU 2.0 +*/ +U_STABLE double U_EXPORT2 +unum_parseDouble( const UNumberFormat* fmt, + const UChar* text, + int32_t textLength, + int32_t *parsePos /* 0 = start */, + UErrorCode *status); + + +/** +* Parse a number from a string into an unformatted numeric string using a UNumberFormat. +* The input string will be parsed according to the UNumberFormat's locale. +* The syntax of the output is a "numeric string" +* as defined in the Decimal Arithmetic Specification, available at +* http://speleotrove.com/decimal +* Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT +* and UNUM_DECIMAL_COMPACT_LONG. +* @param fmt The formatter to use. +* @param text The text to parse. +* @param textLength The length of text, or -1 if null-terminated. +* @param parsePos If not NULL, on input a pointer to an integer specifying the offset at which +* to begin parsing. If not NULL, on output the offset at which parsing ended. +* @param outBuf A (char *) buffer to receive the parsed number as a string. The output string +* will be nul-terminated if there is sufficient space. +* @param outBufLength The size of the output buffer. May be zero, in which case +* the outBuf pointer may be NULL, and the function will return the +* size of the output string. +* @param status A pointer to an UErrorCode to receive any errors +* @return the length of the output string, not including any terminating nul. +* @see unum_parse +* @see unum_parseInt64 +* @see unum_format +* @see unum_formatInt64 +* @see unum_formatDouble +* @stable ICU 4.4 +*/ +U_STABLE int32_t U_EXPORT2 +unum_parseDecimal(const UNumberFormat* fmt, + const UChar* text, + int32_t textLength, + int32_t *parsePos /* 0 = start */, + char *outBuf, + int32_t outBufLength, + UErrorCode *status); + +/** + * Parse a string into a double and a currency using a UNumberFormat. + * The string will be parsed according to the UNumberFormat's locale. + * @param fmt the formatter to use + * @param text the text to parse + * @param textLength the length of text, or -1 if null-terminated + * @param parsePos a pointer to an offset index into text at which to + * begin parsing. On output, *parsePos will point after the last + * parsed character. This parameter may be NULL, in which case parsing + * begins at offset 0. + * @param currency a pointer to the buffer to receive the parsed null- + * terminated currency. This buffer must have a capacity of at least + * 4 UChars. + * @param status a pointer to an input-output UErrorCode + * @return the parsed double + * @see unum_parseDouble + * @see unum_formatDoubleCurrency + * @stable ICU 3.0 + */ +U_STABLE double U_EXPORT2 +unum_parseDoubleCurrency(const UNumberFormat* fmt, + const UChar* text, + int32_t textLength, + int32_t* parsePos, /* 0 = start */ + UChar* currency, + UErrorCode* status); + +/** + * Parse a UChar string into a UFormattable. + * Example code: + * \snippet test/cintltst/cnumtst.c unum_parseToUFormattable + * Note: parsing is not supported for styles UNUM_DECIMAL_COMPACT_SHORT + * and UNUM_DECIMAL_COMPACT_LONG. + * @param fmt the formatter to use + * @param result the UFormattable to hold the result. If NULL, a new UFormattable will be allocated (which the caller must close with ufmt_close). + * @param text the text to parse + * @param textLength the length of text, or -1 if null-terminated + * @param parsePos a pointer to an offset index into text at which to + * begin parsing. On output, *parsePos will point after the last + * parsed character. This parameter may be NULL in which case parsing + * begins at offset 0. + * @param status a pointer to an input-output UErrorCode + * @return the UFormattable. Will be ==result unless NULL was passed in for result, in which case it will be the newly opened UFormattable. + * @see ufmt_getType + * @see ufmt_close + * @stable ICU 52 + */ +U_STABLE UFormattable* U_EXPORT2 +unum_parseToUFormattable(const UNumberFormat* fmt, + UFormattable *result, + const UChar* text, + int32_t textLength, + int32_t* parsePos, /* 0 = start */ + UErrorCode* status); + +/** + * Set the pattern used by a UNumberFormat. This can only be used + * on a DecimalFormat, other formats return U_UNSUPPORTED_ERROR + * in the status. + * @param format The formatter to set. + * @param localized TRUE if the pattern is localized, FALSE otherwise. + * @param pattern The new pattern + * @param patternLength The length of pattern, or -1 if null-terminated. + * @param parseError A pointer to UParseError to recieve information + * about errors occurred during parsing, or NULL if no parse error + * information is desired. + * @param status A pointer to an input-output UErrorCode. + * @see unum_toPattern + * @see DecimalFormat + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +unum_applyPattern( UNumberFormat *format, + UBool localized, + const UChar *pattern, + int32_t patternLength, + UParseError *parseError, + UErrorCode *status + ); + +/** +* Get a locale for which decimal formatting patterns are available. +* A UNumberFormat in a locale returned by this function will perform the correct +* formatting and parsing for the locale. The results of this call are not +* valid for rule-based number formats. +* @param localeIndex The index of the desired locale. +* @return A locale for which number formatting patterns are available, or 0 if none. +* @see unum_countAvailable +* @stable ICU 2.0 +*/ +U_STABLE const char* U_EXPORT2 +unum_getAvailable(int32_t localeIndex); + +/** +* Determine how many locales have decimal formatting patterns available. The +* results of this call are not valid for rule-based number formats. +* This function is useful for determining the loop ending condition for +* calls to {@link #unum_getAvailable }. +* @return The number of locales for which decimal formatting patterns are available. +* @see unum_getAvailable +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +unum_countAvailable(void); + +#if UCONFIG_HAVE_PARSEALLINPUT +/* The UNumberFormatAttributeValue type cannot be #ifndef U_HIDE_INTERNAL_API, needed for .h variable declaration */ +/** + * @internal + */ +typedef enum UNumberFormatAttributeValue { +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + UNUM_NO = 0, + /** @internal */ + UNUM_YES = 1, + /** @internal */ + UNUM_MAYBE = 2 +#else + /** @internal */ + UNUM_FORMAT_ATTRIBUTE_VALUE_HIDDEN +#endif /* U_HIDE_INTERNAL_API */ +} UNumberFormatAttributeValue; +#endif + +/** The possible UNumberFormat numeric attributes @stable ICU 2.0 */ +typedef enum UNumberFormatAttribute { + /** Parse integers only */ + UNUM_PARSE_INT_ONLY, + /** Use grouping separator */ + UNUM_GROUPING_USED, + /** Always show decimal point */ + UNUM_DECIMAL_ALWAYS_SHOWN, + /** Maximum integer digits */ + UNUM_MAX_INTEGER_DIGITS, + /** Minimum integer digits */ + UNUM_MIN_INTEGER_DIGITS, + /** Integer digits */ + UNUM_INTEGER_DIGITS, + /** Maximum fraction digits */ + UNUM_MAX_FRACTION_DIGITS, + /** Minimum fraction digits */ + UNUM_MIN_FRACTION_DIGITS, + /** Fraction digits */ + UNUM_FRACTION_DIGITS, + /** Multiplier */ + UNUM_MULTIPLIER, + /** Grouping size */ + UNUM_GROUPING_SIZE, + /** Rounding Mode */ + UNUM_ROUNDING_MODE, + /** Rounding increment */ + UNUM_ROUNDING_INCREMENT, + /** The width to which the output of <code>format()</code> is padded. */ + UNUM_FORMAT_WIDTH, + /** The position at which padding will take place. */ + UNUM_PADDING_POSITION, + /** Secondary grouping size */ + UNUM_SECONDARY_GROUPING_SIZE, + /** Use significant digits + * @stable ICU 3.0 */ + UNUM_SIGNIFICANT_DIGITS_USED, + /** Minimum significant digits + * @stable ICU 3.0 */ + UNUM_MIN_SIGNIFICANT_DIGITS, + /** Maximum significant digits + * @stable ICU 3.0 */ + UNUM_MAX_SIGNIFICANT_DIGITS, + /** Lenient parse mode used by rule-based formats. + * @stable ICU 3.0 + */ + UNUM_LENIENT_PARSE, +#if UCONFIG_HAVE_PARSEALLINPUT + /** Consume all input. (may use fastpath). Set to UNUM_YES (require fastpath), UNUM_NO (skip fastpath), or UNUM_MAYBE (heuristic). + * This is an internal ICU API. Do not use. + * @internal + */ + UNUM_PARSE_ALL_INPUT = 20, +#endif + /** + * Scale, which adjusts the position of the + * decimal point when formatting. Amounts will be multiplied by 10 ^ (scale) + * before they are formatted. The default value for the scale is 0 ( no adjustment ). + * + * <p>Example: setting the scale to 3, 123 formats as "123,000" + * <p>Example: setting the scale to -4, 123 formats as "0.0123" + * + * @stable ICU 51 */ + UNUM_SCALE = 21, +#ifndef U_HIDE_INTERNAL_API + /** + * Minimum grouping digits, technology preview. + * See DecimalFormat::getMinimumGroupingDigits(). + * + * @internal technology preview + */ + UNUM_MINIMUM_GROUPING_DIGITS = 22, + /* TODO: test C API when it becomes @draft */ +#endif /* U_HIDE_INTERNAL_API */ + + /** + * if this attribute is set to 0, it is set to UNUM_CURRENCY_STANDARD purpose, + * otherwise it is UNUM_CURRENCY_CASH purpose + * Default: 0 (UNUM_CURRENCY_STANDARD purpose) + * @stable ICU 54 + */ + UNUM_CURRENCY_USAGE = 23, + + /* The following cannot be #ifndef U_HIDE_INTERNAL_API, needed in .h file variable declararions */ + /** One below the first bitfield-boolean item. + * All items after this one are stored in boolean form. + * @internal */ + UNUM_MAX_NONBOOLEAN_ATTRIBUTE = 0x0FFF, + + /** If 1, specifies that if setting the "max integer digits" attribute would truncate a value, set an error status rather than silently truncating. + * For example, formatting the value 1234 with 4 max int digits would succeed, but formatting 12345 would fail. There is no effect on parsing. + * Default: 0 (not set) + * @stable ICU 50 + */ + UNUM_FORMAT_FAIL_IF_MORE_THAN_MAX_DIGITS = 0x1000, + /** + * if this attribute is set to 1, specifies that, if the pattern doesn't contain an exponent, the exponent will not be parsed. If the pattern does contain an exponent, this attribute has no effect. + * Has no effect on formatting. + * Default: 0 (unset) + * @stable ICU 50 + */ + UNUM_PARSE_NO_EXPONENT, + + /** + * if this attribute is set to 1, specifies that, if the pattern contains a + * decimal mark the input is required to have one. If this attribute is set to 0, + * specifies that input does not have to contain a decimal mark. + * Has no effect on formatting. + * Default: 0 (unset) + * @stable ICU 54 + */ + UNUM_PARSE_DECIMAL_MARK_REQUIRED = 0x1002, + + /* The following cannot be #ifndef U_HIDE_INTERNAL_API, needed in .h file variable declararions */ + /** Limit of boolean attributes. + * @internal */ + UNUM_LIMIT_BOOLEAN_ATTRIBUTE = 0x1003 +} UNumberFormatAttribute; + +/** +* Get a numeric attribute associated with a UNumberFormat. +* An example of a numeric attribute is the number of integer digits a formatter will produce. +* @param fmt The formatter to query. +* @param attr The attribute to query; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED, +* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS, +* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER, +* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE, +* UNUM_SCALE, UNUM_MINIMUM_GROUPING_DIGITS. +* @return The value of attr. +* @see unum_setAttribute +* @see unum_getDoubleAttribute +* @see unum_setDoubleAttribute +* @see unum_getTextAttribute +* @see unum_setTextAttribute +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +unum_getAttribute(const UNumberFormat* fmt, + UNumberFormatAttribute attr); + +/** +* Set a numeric attribute associated with a UNumberFormat. +* An example of a numeric attribute is the number of integer digits a formatter will produce. If the +* formatter does not understand the attribute, the call is ignored. Rule-based formatters only understand +* the lenient-parse attribute. +* @param fmt The formatter to set. +* @param attr The attribute to set; one of UNUM_PARSE_INT_ONLY, UNUM_GROUPING_USED, +* UNUM_DECIMAL_ALWAYS_SHOWN, UNUM_MAX_INTEGER_DIGITS, UNUM_MIN_INTEGER_DIGITS, UNUM_INTEGER_DIGITS, +* UNUM_MAX_FRACTION_DIGITS, UNUM_MIN_FRACTION_DIGITS, UNUM_FRACTION_DIGITS, UNUM_MULTIPLIER, +* UNUM_GROUPING_SIZE, UNUM_ROUNDING_MODE, UNUM_FORMAT_WIDTH, UNUM_PADDING_POSITION, UNUM_SECONDARY_GROUPING_SIZE, +* UNUM_LENIENT_PARSE, UNUM_SCALE, UNUM_MINIMUM_GROUPING_DIGITS. +* @param newValue The new value of attr. +* @see unum_getAttribute +* @see unum_getDoubleAttribute +* @see unum_setDoubleAttribute +* @see unum_getTextAttribute +* @see unum_setTextAttribute +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +unum_setAttribute( UNumberFormat* fmt, + UNumberFormatAttribute attr, + int32_t newValue); + + +/** +* Get a numeric attribute associated with a UNumberFormat. +* An example of a numeric attribute is the number of integer digits a formatter will produce. +* If the formatter does not understand the attribute, -1 is returned. +* @param fmt The formatter to query. +* @param attr The attribute to query; e.g. UNUM_ROUNDING_INCREMENT. +* @return The value of attr. +* @see unum_getAttribute +* @see unum_setAttribute +* @see unum_setDoubleAttribute +* @see unum_getTextAttribute +* @see unum_setTextAttribute +* @stable ICU 2.0 +*/ +U_STABLE double U_EXPORT2 +unum_getDoubleAttribute(const UNumberFormat* fmt, + UNumberFormatAttribute attr); + +/** +* Set a numeric attribute associated with a UNumberFormat. +* An example of a numeric attribute is the number of integer digits a formatter will produce. +* If the formatter does not understand the attribute, this call is ignored. +* @param fmt The formatter to set. +* @param attr The attribute to set; e.g. UNUM_ROUNDING_INCREMENT. +* @param newValue The new value of attr. +* @see unum_getAttribute +* @see unum_setAttribute +* @see unum_getDoubleAttribute +* @see unum_getTextAttribute +* @see unum_setTextAttribute +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +unum_setDoubleAttribute( UNumberFormat* fmt, + UNumberFormatAttribute attr, + double newValue); + +/** The possible UNumberFormat text attributes @stable ICU 2.0*/ +typedef enum UNumberFormatTextAttribute { + /** Positive prefix */ + UNUM_POSITIVE_PREFIX, + /** Positive suffix */ + UNUM_POSITIVE_SUFFIX, + /** Negative prefix */ + UNUM_NEGATIVE_PREFIX, + /** Negative suffix */ + UNUM_NEGATIVE_SUFFIX, + /** The character used to pad to the format width. */ + UNUM_PADDING_CHARACTER, + /** The ISO currency code */ + UNUM_CURRENCY_CODE, + /** + * The default rule set, such as "%spellout-numbering-year:", "%spellout-cardinal:", + * "%spellout-ordinal-masculine-plural:", "%spellout-ordinal-feminine:", or + * "%spellout-ordinal-neuter:". The available public rulesets can be listed using + * unum_getTextAttribute with UNUM_PUBLIC_RULESETS. This is only available with + * rule-based formatters. + * @stable ICU 3.0 + */ + UNUM_DEFAULT_RULESET, + /** + * The public rule sets. This is only available with rule-based formatters. + * This is a read-only attribute. The public rulesets are returned as a + * single string, with each ruleset name delimited by ';' (semicolon). See the + * CLDR LDML spec for more information about RBNF rulesets: + * http://www.unicode.org/reports/tr35/tr35-numbers.html#Rule-Based_Number_Formatting + * @stable ICU 3.0 + */ + UNUM_PUBLIC_RULESETS +} UNumberFormatTextAttribute; + +/** +* Get a text attribute associated with a UNumberFormat. +* An example of a text attribute is the suffix for positive numbers. If the formatter +* does not understand the attribute, U_UNSUPPORTED_ERROR is returned as the status. +* Rule-based formatters only understand UNUM_DEFAULT_RULESET and UNUM_PUBLIC_RULESETS. +* @param fmt The formatter to query. +* @param tag The attribute to query; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX, +* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE, +* UNUM_DEFAULT_RULESET, or UNUM_PUBLIC_RULESETS. +* @param result A pointer to a buffer to receive the attribute. +* @param resultLength The maximum size of result. +* @param status A pointer to an UErrorCode to receive any errors +* @return The total buffer size needed; if greater than resultLength, the output was truncated. +* @see unum_setTextAttribute +* @see unum_getAttribute +* @see unum_setAttribute +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +unum_getTextAttribute( const UNumberFormat* fmt, + UNumberFormatTextAttribute tag, + UChar* result, + int32_t resultLength, + UErrorCode* status); + +/** +* Set a text attribute associated with a UNumberFormat. +* An example of a text attribute is the suffix for positive numbers. Rule-based formatters +* only understand UNUM_DEFAULT_RULESET. +* @param fmt The formatter to set. +* @param tag The attribute to set; one of UNUM_POSITIVE_PREFIX, UNUM_POSITIVE_SUFFIX, +* UNUM_NEGATIVE_PREFIX, UNUM_NEGATIVE_SUFFIX, UNUM_PADDING_CHARACTER, UNUM_CURRENCY_CODE, +* or UNUM_DEFAULT_RULESET. +* @param newValue The new value of attr. +* @param newValueLength The length of newValue, or -1 if null-terminated. +* @param status A pointer to an UErrorCode to receive any errors +* @see unum_getTextAttribute +* @see unum_getAttribute +* @see unum_setAttribute +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +unum_setTextAttribute( UNumberFormat* fmt, + UNumberFormatTextAttribute tag, + const UChar* newValue, + int32_t newValueLength, + UErrorCode *status); + +/** + * Extract the pattern from a UNumberFormat. The pattern will follow + * the DecimalFormat pattern syntax. + * @param fmt The formatter to query. + * @param isPatternLocalized TRUE if the pattern should be localized, + * FALSE otherwise. This is ignored if the formatter is a rule-based + * formatter. + * @param result A pointer to a buffer to receive the pattern. + * @param resultLength The maximum size of result. + * @param status A pointer to an input-output UErrorCode. + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated. + * @see unum_applyPattern + * @see DecimalFormat + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +unum_toPattern( const UNumberFormat* fmt, + UBool isPatternLocalized, + UChar* result, + int32_t resultLength, + UErrorCode* status); + + +/** + * Constants for specifying a number format symbol. + * @stable ICU 2.0 + */ +typedef enum UNumberFormatSymbol { + /** The decimal separator */ + UNUM_DECIMAL_SEPARATOR_SYMBOL = 0, + /** The grouping separator */ + UNUM_GROUPING_SEPARATOR_SYMBOL = 1, + /** The pattern separator */ + UNUM_PATTERN_SEPARATOR_SYMBOL = 2, + /** The percent sign */ + UNUM_PERCENT_SYMBOL = 3, + /** Zero*/ + UNUM_ZERO_DIGIT_SYMBOL = 4, + /** Character representing a digit in the pattern */ + UNUM_DIGIT_SYMBOL = 5, + /** The minus sign */ + UNUM_MINUS_SIGN_SYMBOL = 6, + /** The plus sign */ + UNUM_PLUS_SIGN_SYMBOL = 7, + /** The currency symbol */ + UNUM_CURRENCY_SYMBOL = 8, + /** The international currency symbol */ + UNUM_INTL_CURRENCY_SYMBOL = 9, + /** The monetary separator */ + UNUM_MONETARY_SEPARATOR_SYMBOL = 10, + /** The exponential symbol */ + UNUM_EXPONENTIAL_SYMBOL = 11, + /** Per mill symbol */ + UNUM_PERMILL_SYMBOL = 12, + /** Escape padding character */ + UNUM_PAD_ESCAPE_SYMBOL = 13, + /** Infinity symbol */ + UNUM_INFINITY_SYMBOL = 14, + /** Nan symbol */ + UNUM_NAN_SYMBOL = 15, + /** Significant digit symbol + * @stable ICU 3.0 */ + UNUM_SIGNIFICANT_DIGIT_SYMBOL = 16, + /** The monetary grouping separator + * @stable ICU 3.6 + */ + UNUM_MONETARY_GROUPING_SEPARATOR_SYMBOL = 17, + /** One + * @stable ICU 4.6 + */ + UNUM_ONE_DIGIT_SYMBOL = 18, + /** Two + * @stable ICU 4.6 + */ + UNUM_TWO_DIGIT_SYMBOL = 19, + /** Three + * @stable ICU 4.6 + */ + UNUM_THREE_DIGIT_SYMBOL = 20, + /** Four + * @stable ICU 4.6 + */ + UNUM_FOUR_DIGIT_SYMBOL = 21, + /** Five + * @stable ICU 4.6 + */ + UNUM_FIVE_DIGIT_SYMBOL = 22, + /** Six + * @stable ICU 4.6 + */ + UNUM_SIX_DIGIT_SYMBOL = 23, + /** Seven + * @stable ICU 4.6 + */ + UNUM_SEVEN_DIGIT_SYMBOL = 24, + /** Eight + * @stable ICU 4.6 + */ + UNUM_EIGHT_DIGIT_SYMBOL = 25, + /** Nine + * @stable ICU 4.6 + */ + UNUM_NINE_DIGIT_SYMBOL = 26, + + /** Multiplication sign + * @stable ICU 54 + */ + UNUM_EXPONENT_MULTIPLICATION_SYMBOL = 27, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UNumberFormatSymbol value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UNUM_FORMAT_SYMBOL_COUNT = 28 +#endif // U_HIDE_DEPRECATED_API +} UNumberFormatSymbol; + +/** +* Get a symbol associated with a UNumberFormat. +* A UNumberFormat uses symbols to represent the special locale-dependent +* characters in a number, for example the percent sign. This API is not +* supported for rule-based formatters. +* @param fmt The formatter to query. +* @param symbol The UNumberFormatSymbol constant for the symbol to get +* @param buffer The string buffer that will receive the symbol string; +* if it is NULL, then only the length of the symbol is returned +* @param size The size of the string buffer +* @param status A pointer to an UErrorCode to receive any errors +* @return The length of the symbol; the buffer is not modified if +* <code>length>=size</code> +* @see unum_setSymbol +* @stable ICU 2.0 +*/ +U_STABLE int32_t U_EXPORT2 +unum_getSymbol(const UNumberFormat *fmt, + UNumberFormatSymbol symbol, + UChar *buffer, + int32_t size, + UErrorCode *status); + +/** +* Set a symbol associated with a UNumberFormat. +* A UNumberFormat uses symbols to represent the special locale-dependent +* characters in a number, for example the percent sign. This API is not +* supported for rule-based formatters. +* @param fmt The formatter to set. +* @param symbol The UNumberFormatSymbol constant for the symbol to set +* @param value The string to set the symbol to +* @param length The length of the string, or -1 for a zero-terminated string +* @param status A pointer to an UErrorCode to receive any errors. +* @see unum_getSymbol +* @stable ICU 2.0 +*/ +U_STABLE void U_EXPORT2 +unum_setSymbol(UNumberFormat *fmt, + UNumberFormatSymbol symbol, + const UChar *value, + int32_t length, + UErrorCode *status); + + +/** + * Get the locale for this number format object. + * You can choose between valid and actual locale. + * @param fmt The formatter to get the locale from + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale name + * @stable ICU 2.8 + */ +U_STABLE const char* U_EXPORT2 +unum_getLocaleByType(const UNumberFormat *fmt, + ULocDataLocaleType type, + UErrorCode* status); + +/** + * Set a particular UDisplayContext value in the formatter, such as + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. + * @param fmt The formatter for which to set a UDisplayContext value. + * @param value The UDisplayContext value to set. + * @param status A pointer to an UErrorCode to receive any errors + * @stable ICU 53 + */ +U_STABLE void U_EXPORT2 +unum_setContext(UNumberFormat* fmt, UDisplayContext value, UErrorCode* status); + +/** + * Get the formatter's UDisplayContext value for the specified UDisplayContextType, + * such as UDISPCTX_TYPE_CAPITALIZATION. + * @param fmt The formatter to query. + * @param type The UDisplayContextType whose value to return + * @param status A pointer to an UErrorCode to receive any errors + * @return The UDisplayContextValue for the specified type. + * @stable ICU 53 + */ +U_STABLE UDisplayContext U_EXPORT2 +unum_getContext(const UNumberFormat *fmt, UDisplayContextType type, UErrorCode* status); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/unumsys.h b/intl/icu/source/i18n/unicode/unumsys.h new file mode 100644 index 000000000..46289c0f9 --- /dev/null +++ b/intl/icu/source/i18n/unicode/unumsys.h @@ -0,0 +1,172 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************************** +* Copyright (C) 2013-2014, International Business Machines +* Corporation and others. All Rights Reserved. +***************************************************************************************** +*/ + +#ifndef UNUMSYS_H +#define UNUMSYS_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uenum.h" +#include "unicode/localpointer.h" + +/** + * \file + * \brief C API: UNumberingSystem, information about numbering systems + * + * Defines numbering systems. A numbering system describes the scheme by which + * numbers are to be presented to the end user. In its simplest form, a numbering + * system describes the set of digit characters that are to be used to display + * numbers, such as Western digits, Thai digits, Arabic-Indic digits, etc., in a + * positional numbering system with a specified radix (typically 10). + * More complicated numbering systems are algorithmic in nature, and require use + * of an RBNF formatter (rule based number formatter), in order to calculate + * the characters to be displayed for a given number. Examples of algorithmic + * numbering systems include Roman numerals, Chinese numerals, and Hebrew numerals. + * Formatting rules for many commonly used numbering systems are included in + * the ICU package, based on the numbering system rules defined in CLDR. + * Alternate numbering systems can be specified to a locale by using the + * numbers locale keyword. + */ + +/** + * Opaque UNumberingSystem object for use in C programs. + * @stable ICU 52 + */ +struct UNumberingSystem; +typedef struct UNumberingSystem UNumberingSystem; /**< C typedef for struct UNumberingSystem. @stable ICU 52 */ + +/** + * Opens a UNumberingSystem object using the default numbering system for the specified + * locale. + * @param locale The locale for which the default numbering system should be opened. + * @param status A pointer to a UErrorCode to receive any errors. For example, this + * may be U_UNSUPPORTED_ERROR for a locale such as "en@numbers=xyz" that + * specifies a numbering system unknown to ICU. + * @return A UNumberingSystem for the specified locale, or NULL if an error + * occurred. + * @stable ICU 52 + */ +U_STABLE UNumberingSystem * U_EXPORT2 +unumsys_open(const char *locale, UErrorCode *status); + +/** + * Opens a UNumberingSystem object using the name of one of the predefined numbering + * systems specified by CLDR and known to ICU, such as "latn", "arabext", or "hanidec"; + * the full list is returned by unumsys_openAvailableNames. Note that some of the names + * listed at http://unicode.org/repos/cldr/tags/latest/common/bcp47/number.xml - e.g. + * default, native, traditional, finance - do not identify specific numbering systems, + * but rather key values that may only be used as part of a locale, which in turn + * defines how they are mapped to a specific numbering system such as "latn" or "hant". + * + * @param name The name of the numbering system for which a UNumberingSystem object + * should be opened. + * @param status A pointer to a UErrorCode to receive any errors. For example, this + * may be U_UNSUPPORTED_ERROR for a numbering system such as "xyz" that + * is unknown to ICU. + * @return A UNumberingSystem for the specified name, or NULL if an error + * occurred. + * @stable ICU 52 + */ +U_STABLE UNumberingSystem * U_EXPORT2 +unumsys_openByName(const char *name, UErrorCode *status); + +/** + * Close a UNumberingSystem object. Once closed it may no longer be used. + * @param unumsys The UNumberingSystem object to close. + * @stable ICU 52 + */ +U_STABLE void U_EXPORT2 +unumsys_close(UNumberingSystem *unumsys); + +#if U_SHOW_CPLUSPLUS_API +U_NAMESPACE_BEGIN + +/** + * \class LocalUNumberingSystemPointer + * "Smart pointer" class, closes a UNumberingSystem via unumsys_close(). + * For most methods see the LocalPointerBase base class. + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 52 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUNumberingSystemPointer, UNumberingSystem, unumsys_close); + +U_NAMESPACE_END +#endif + +/** + * Returns an enumeration over the names of all of the predefined numbering systems known + * to ICU. + * @param status A pointer to a UErrorCode to receive any errors. + * @return A pointer to a UEnumeration that must be closed with uenum_close(), + * or NULL if an error occurred. + * @stable ICU 52 + */ +U_STABLE UEnumeration * U_EXPORT2 +unumsys_openAvailableNames(UErrorCode *status); + +/** + * Returns the name of the specified UNumberingSystem object (if it is one of the + * predefined names known to ICU). + * @param unumsys The UNumberingSystem whose name is desired. + * @return A pointer to the name of the specified UNumberingSystem object, or + * NULL if the name is not one of the ICU predefined names. The pointer + * is only valid for the lifetime of the UNumberingSystem object. + * @stable ICU 52 + */ +U_STABLE const char * U_EXPORT2 +unumsys_getName(const UNumberingSystem *unumsys); + +/** + * Returns whether the given UNumberingSystem object is for an algorithmic (not purely + * positional) system. + * @param unumsys The UNumberingSystem whose algorithmic status is desired. + * @return TRUE if the specified UNumberingSystem object is for an algorithmic + * system. + * @stable ICU 52 + */ +U_STABLE UBool U_EXPORT2 +unumsys_isAlgorithmic(const UNumberingSystem *unumsys); + +/** + * Returns the radix of the specified UNumberingSystem object. Simple positional + * numbering systems typically have radix 10, but might have a radix of e.g. 16 for + * hexadecimal. The radix is less well-defined for non-positional algorithmic systems. + * @param unumsys The UNumberingSystem whose radix is desired. + * @return The radix of the specified UNumberingSystem object. + * @stable ICU 52 + */ +U_STABLE int32_t U_EXPORT2 +unumsys_getRadix(const UNumberingSystem *unumsys); + +/** + * Get the description string of the specified UNumberingSystem object. For simple + * positional systems this is the ordered string of digits (with length matching + * the radix), e.g. "\u3007\u4E00\u4E8C\u4E09\u56DB\u4E94\u516D\u4E03\u516B\u4E5D" + * for "hanidec"; it would be "0123456789ABCDEF" for hexadecimal. For + * algorithmic systems this is the name of the RBNF ruleset used for formatting, + * e.g. "zh/SpelloutRules/%spellout-cardinal" for "hans" or "%greek-upper" for + * "grek". + * @param unumsys The UNumberingSystem whose description string is desired. + * @param result A pointer to a buffer to receive the description string. + * @param resultLength The maximum size of result. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The total buffer size needed; if greater than resultLength, the + * output was truncated. + * @stable ICU 52 + */ +U_STABLE int32_t U_EXPORT2 +unumsys_getDescription(const UNumberingSystem *unumsys, UChar *result, + int32_t resultLength, UErrorCode *status); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/upluralrules.h b/intl/icu/source/i18n/unicode/upluralrules.h new file mode 100644 index 000000000..52e34d8d2 --- /dev/null +++ b/intl/icu/source/i18n/unicode/upluralrules.h @@ -0,0 +1,149 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************************** +* Copyright (C) 2010-2013, International Business Machines +* Corporation and others. All Rights Reserved. +***************************************************************************************** +*/ + +#ifndef UPLURALRULES_H +#define UPLURALRULES_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/localpointer.h" + +/** + * \file + * \brief C API: Plural rules, select plural keywords for numeric values. + * + * A UPluralRules object defines rules for mapping non-negative numeric + * values onto a small set of keywords. Rules are constructed from a text + * description, consisting of a series of keywords and conditions. + * The uplrules_select function examines each condition in order and + * returns the keyword for the first condition that matches the number. + * If none match, the default rule(other) is returned. + * + * For more information, see the LDML spec, C.11 Language Plural Rules: + * http://www.unicode.org/reports/tr35/#Language_Plural_Rules + * + * Keywords: ICU locale data has 6 predefined values - + * 'zero', 'one', 'two', 'few', 'many' and 'other'. Callers need to check + * the value of keyword returned by the uplrules_select function. + * + * These are based on CLDR <i>Language Plural Rules</i>. For these + * predefined rules, see the CLDR page at + * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html + */ + +/** + * Type of plurals and PluralRules. + * @stable ICU 50 + */ +enum UPluralType { + /** + * Plural rules for cardinal numbers: 1 file vs. 2 files. + * @stable ICU 50 + */ + UPLURAL_TYPE_CARDINAL, + /** + * Plural rules for ordinal numbers: 1st file, 2nd file, 3rd file, 4th file, etc. + * @stable ICU 50 + */ + UPLURAL_TYPE_ORDINAL, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UPluralType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UPLURAL_TYPE_COUNT +#endif // U_HIDE_DEPRECATED_API +}; +/** + * @stable ICU 50 + */ +typedef enum UPluralType UPluralType; + +/** + * Opaque UPluralRules object for use in C programs. + * @stable ICU 4.8 + */ +struct UPluralRules; +typedef struct UPluralRules UPluralRules; /**< C typedef for struct UPluralRules. @stable ICU 4.8 */ + +/** + * Opens a new UPluralRules object using the predefined cardinal-number plural rules for a + * given locale. + * Same as uplrules_openForType(locale, UPLURAL_TYPE_CARDINAL, status). + * @param locale The locale for which the rules are desired. + * @param status A pointer to a UErrorCode to receive any errors. + * @return A UPluralRules for the specified locale, or NULL if an error occurred. + * @stable ICU 4.8 + */ +U_STABLE UPluralRules* U_EXPORT2 +uplrules_open(const char *locale, UErrorCode *status); + +/** + * Opens a new UPluralRules object using the predefined plural rules for a + * given locale and the plural type. + * @param locale The locale for which the rules are desired. + * @param type The plural type (e.g., cardinal or ordinal). + * @param status A pointer to a UErrorCode to receive any errors. + * @return A UPluralRules for the specified locale, or NULL if an error occurred. + * @stable ICU 50 + */ +U_DRAFT UPluralRules* U_EXPORT2 +uplrules_openForType(const char *locale, UPluralType type, UErrorCode *status); + +/** + * Closes a UPluralRules object. Once closed it may no longer be used. + * @param uplrules The UPluralRules object to close. + * @stable ICU 4.8 + */ +U_STABLE void U_EXPORT2 +uplrules_close(UPluralRules *uplrules); + + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUPluralRulesPointer + * "Smart pointer" class, closes a UPluralRules via uplrules_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.8 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUPluralRulesPointer, UPluralRules, uplrules_close); + +U_NAMESPACE_END + +#endif + + +/** + * Given a number, returns the keyword of the first rule that + * applies to the number, according to the supplied UPluralRules object. + * @param uplrules The UPluralRules object specifying the rules. + * @param number The number for which the rule has to be determined. + * @param keyword The keyword of the rule that applies to number. + * @param capacity The capacity of keyword. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The length of keyword. + * @stable ICU 4.8 + */ +U_STABLE int32_t U_EXPORT2 +uplrules_select(const UPluralRules *uplrules, + double number, + UChar *keyword, int32_t capacity, + UErrorCode *status); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/uregex.h b/intl/icu/source/i18n/unicode/uregex.h new file mode 100644 index 000000000..ab85719db --- /dev/null +++ b/intl/icu/source/i18n/unicode/uregex.h @@ -0,0 +1,1614 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2004-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: uregex.h +* encoding: US-ASCII +* indentation:4 +* +* created on: 2004mar09 +* created by: Andy Heninger +* +* ICU Regular Expressions, API for C +*/ + +/** + * \file + * \brief C API: Regular Expressions + * + * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.</p> + */ + +#ifndef UREGEX_H +#define UREGEX_H + +#include "unicode/utext.h" +#include "unicode/utypes.h" + +#if !UCONFIG_NO_REGULAR_EXPRESSIONS + +#include "unicode/localpointer.h" +#include "unicode/parseerr.h" + +struct URegularExpression; +/** + * Structure representing a compiled regular expression, plus the results + * of a match operation. + * @stable ICU 3.0 + */ +typedef struct URegularExpression URegularExpression; + + +/** + * Constants for Regular Expression Match Modes. + * @stable ICU 2.4 + */ +typedef enum URegexpFlag{ + +#ifndef U_HIDE_DRAFT_API + /** Forces normalization of pattern and strings. + Not implemented yet, just a placeholder, hence draft. + @draft ICU 2.4 */ + UREGEX_CANON_EQ = 128, +#endif /* U_HIDE_DRAFT_API */ + /** Enable case insensitive matching. @stable ICU 2.4 */ + UREGEX_CASE_INSENSITIVE = 2, + + /** Allow white space and comments within patterns @stable ICU 2.4 */ + UREGEX_COMMENTS = 4, + + /** If set, '.' matches line terminators, otherwise '.' matching stops at line end. + * @stable ICU 2.4 */ + UREGEX_DOTALL = 32, + + /** If set, treat the entire pattern as a literal string. + * Metacharacters or escape sequences in the input sequence will be given + * no special meaning. + * + * The flag UREGEX_CASE_INSENSITIVE retains its impact + * on matching when used in conjunction with this flag. + * The other flags become superfluous. + * + * @stable ICU 4.0 + */ + UREGEX_LITERAL = 16, + + /** Control behavior of "$" and "^" + * If set, recognize line terminators within string, + * otherwise, match only at start and end of input string. + * @stable ICU 2.4 */ + UREGEX_MULTILINE = 8, + + /** Unix-only line endings. + * When this mode is enabled, only \\u000a is recognized as a line ending + * in the behavior of ., ^, and $. + * @stable ICU 4.0 + */ + UREGEX_UNIX_LINES = 1, + + /** Unicode word boundaries. + * If set, \b uses the Unicode TR 29 definition of word boundaries. + * Warning: Unicode word boundaries are quite different from + * traditional regular expression word boundaries. See + * http://unicode.org/reports/tr29/#Word_Boundaries + * @stable ICU 2.8 + */ + UREGEX_UWORD = 256, + + /** Error on Unrecognized backslash escapes. + * If set, fail with an error on patterns that contain + * backslash-escaped ASCII letters without a known special + * meaning. If this flag is not set, these + * escaped letters represent themselves. + * @stable ICU 4.0 + */ + UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512 + +} URegexpFlag; + +/** + * Open (compile) an ICU regular expression. Compiles the regular expression in + * string form into an internal representation using the specified match mode flags. + * The resulting regular expression handle can then be used to perform various + * matching operations. + * + * + * @param pattern The Regular Expression pattern to be compiled. + * @param patternLength The length of the pattern, or -1 if the pattern is + * NUL terminated. + * @param flags Flags that alter the default matching behavior for + * the regular expression, UREGEX_CASE_INSENSITIVE, for + * example. For default behavior, set this parameter to zero. + * See <code>enum URegexpFlag</code>. All desired flags + * are bitwise-ORed together. + * @param pe Receives the position (line and column numbers) of any syntax + * error within the source regular expression string. If this + * information is not wanted, pass NULL for this parameter. + * @param status Receives error detected by this function. + * @stable ICU 3.0 + * + */ +U_STABLE URegularExpression * U_EXPORT2 +uregex_open( const UChar *pattern, + int32_t patternLength, + uint32_t flags, + UParseError *pe, + UErrorCode *status); + +/** + * Open (compile) an ICU regular expression. Compiles the regular expression in + * string form into an internal representation using the specified match mode flags. + * The resulting regular expression handle can then be used to perform various + * matching operations. + * <p> + * The contents of the pattern UText will be extracted and saved. Ownership of the + * UText struct itself remains with the caller. This is to match the behavior of + * uregex_open(). + * + * @param pattern The Regular Expression pattern to be compiled. + * @param flags Flags that alter the default matching behavior for + * the regular expression, UREGEX_CASE_INSENSITIVE, for + * example. For default behavior, set this parameter to zero. + * See <code>enum URegexpFlag</code>. All desired flags + * are bitwise-ORed together. + * @param pe Receives the position (line and column numbers) of any syntax + * error within the source regular expression string. If this + * information is not wanted, pass NULL for this parameter. + * @param status Receives error detected by this function. + * + * @stable ICU 4.6 + */ +U_STABLE URegularExpression * U_EXPORT2 +uregex_openUText(UText *pattern, + uint32_t flags, + UParseError *pe, + UErrorCode *status); + +/** + * Open (compile) an ICU regular expression. The resulting regular expression + * handle can then be used to perform various matching operations. + * <p> + * This function is the same as uregex_open, except that the pattern + * is supplied as an 8 bit char * string in the default code page. + * + * @param pattern The Regular Expression pattern to be compiled, + * NUL terminated. + * @param flags Flags that alter the default matching behavior for + * the regular expression, UREGEX_CASE_INSENSITIVE, for + * example. For default behavior, set this parameter to zero. + * See <code>enum URegexpFlag</code>. All desired flags + * are bitwise-ORed together. + * @param pe Receives the position (line and column numbers) of any syntax + * error within the source regular expression string. If this + * information is not wanted, pass NULL for this parameter. + * @param status Receives errors detected by this function. + * @return The URegularExpression object representing the compiled + * pattern. + * + * @stable ICU 3.0 + */ +#if !UCONFIG_NO_CONVERSION +U_STABLE URegularExpression * U_EXPORT2 +uregex_openC( const char *pattern, + uint32_t flags, + UParseError *pe, + UErrorCode *status); +#endif + + + +/** + * Close the regular expression, recovering all resources (memory) it + * was holding. + * + * @param regexp The regular expression to be closed. + * @stable ICU 3.0 + */ +U_STABLE void U_EXPORT2 +uregex_close(URegularExpression *regexp); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalURegularExpressionPointer + * "Smart pointer" class, closes a URegularExpression via uregex_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalURegularExpressionPointer, URegularExpression, uregex_close); + +U_NAMESPACE_END + +#endif + +/** + * Make a copy of a compiled regular expression. Cloning a regular + * expression is faster than opening a second instance from the source + * form of the expression, and requires less memory. + * <p> + * Note that the current input string and the position of any matched text + * within it are not cloned; only the pattern itself and the + * match mode flags are copied. + * <p> + * Cloning can be particularly useful to threaded applications that perform + * multiple match operations in parallel. Each concurrent RE + * operation requires its own instance of a URegularExpression. + * + * @param regexp The compiled regular expression to be cloned. + * @param status Receives indication of any errors encountered + * @return the cloned copy of the compiled regular expression. + * @stable ICU 3.0 + */ +U_STABLE URegularExpression * U_EXPORT2 +uregex_clone(const URegularExpression *regexp, UErrorCode *status); + +/** + * Returns a pointer to the source form of the pattern for this regular expression. + * This function will work even if the pattern was originally specified as a UText. + * + * @param regexp The compiled regular expression. + * @param patLength This output parameter will be set to the length of the + * pattern string. A NULL pointer may be used here if the + * pattern length is not needed, as would be the case if + * the pattern is known in advance to be a NUL terminated + * string. + * @param status Receives errors detected by this function. + * @return a pointer to the pattern string. The storage for the string is + * owned by the regular expression object, and must not be + * altered or deleted by the application. The returned string + * will remain valid until the regular expression is closed. + * @stable ICU 3.0 + */ +U_STABLE const UChar * U_EXPORT2 +uregex_pattern(const URegularExpression *regexp, + int32_t *patLength, + UErrorCode *status); + +/** + * Returns the source text of the pattern for this regular expression. + * This function will work even if the pattern was originally specified as a UChar string. + * + * @param regexp The compiled regular expression. + * @param status Receives errors detected by this function. + * @return the pattern text. The storage for the text is owned by the regular expression + * object, and must not be altered or deleted. + * + * @stable ICU 4.6 + */ +U_STABLE UText * U_EXPORT2 +uregex_patternUText(const URegularExpression *regexp, + UErrorCode *status); + +/** + * Get the match mode flags that were specified when compiling this regular expression. + * @param status Receives errors detected by this function. + * @param regexp The compiled regular expression. + * @return The match mode flags + * @see URegexpFlag + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_flags(const URegularExpression *regexp, + UErrorCode *status); + + +/** + * Set the subject text string upon which the regular expression will look for matches. + * This function may be called any number of times, allowing the regular + * expression pattern to be applied to different strings. + * <p> + * Regular expression matching operations work directly on the application's + * string data. No copy is made. The subject string data must not be + * altered after calling this function until after all regular expression + * operations involving this string data are completed. + * <p> + * Zero length strings are permitted. In this case, no subsequent match + * operation will dereference the text string pointer. + * + * @param regexp The compiled regular expression. + * @param text The subject text string. + * @param textLength The length of the subject text, or -1 if the string + * is NUL terminated. + * @param status Receives errors detected by this function. + * @stable ICU 3.0 + */ +U_STABLE void U_EXPORT2 +uregex_setText(URegularExpression *regexp, + const UChar *text, + int32_t textLength, + UErrorCode *status); + + +/** + * Set the subject text string upon which the regular expression will look for matches. + * This function may be called any number of times, allowing the regular + * expression pattern to be applied to different strings. + * <p> + * Regular expression matching operations work directly on the application's + * string data; only a shallow clone is made. The subject string data must not be + * altered after calling this function until after all regular expression + * operations involving this string data are completed. + * + * @param regexp The compiled regular expression. + * @param text The subject text string. + * @param status Receives errors detected by this function. + * + * @stable ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_setUText(URegularExpression *regexp, + UText *text, + UErrorCode *status); + +/** + * Get the subject text that is currently associated with this + * regular expression object. If the input was supplied using uregex_setText(), + * that pointer will be returned. Otherwise, the characters in the input will + * be extracted to a buffer and returned. In either case, ownership remains + * with the regular expression object. + * + * This function will work even if the input was originally specified as a UText. + * + * @param regexp The compiled regular expression. + * @param textLength The length of the string is returned in this output parameter. + * A NULL pointer may be used here if the + * text length is not needed, as would be the case if + * the text is known in advance to be a NUL terminated + * string. + * @param status Receives errors detected by this function. + * @return Pointer to the subject text string currently associated with + * this regular expression. + * @stable ICU 3.0 + */ +U_STABLE const UChar * U_EXPORT2 +uregex_getText(URegularExpression *regexp, + int32_t *textLength, + UErrorCode *status); + +/** + * Get the subject text that is currently associated with this + * regular expression object. + * + * This function will work even if the input was originally specified as a UChar string. + * + * @param regexp The compiled regular expression. + * @param dest A mutable UText in which to store the current input. + * If NULL, a new UText will be created as an immutable shallow clone + * of the actual input string. + * @param status Receives errors detected by this function. + * @return The subject text currently associated with this regular expression. + * If a pre-allocated UText was provided, it will always be used and returned. + * + * @stable ICU 4.6 + */ +U_STABLE UText * U_EXPORT2 +uregex_getUText(URegularExpression *regexp, + UText *dest, + UErrorCode *status); + +/** + * Set the subject text string upon which the regular expression is looking for matches + * without changing any other aspect of the matching state. + * The new and previous text strings must have the same content. + * + * This function is intended for use in environments where ICU is operating on + * strings that may move around in memory. It provides a mechanism for notifying + * ICU that the string has been relocated, and providing a new UText to access the + * string in its new position. + * + * Note that the regular expression implementation never copies the underlying text + * of a string being matched, but always operates directly on the original text + * provided by the user. Refreshing simply drops the references to the old text + * and replaces them with references to the new. + * + * Caution: this function is normally used only by very specialized + * system-level code. One example use case is with garbage collection + * that moves the text in memory. + * + * @param regexp The compiled regular expression. + * @param text The new (moved) text string. + * @param status Receives errors detected by this function. + * + * @stable ICU 4.8 + */ +U_STABLE void U_EXPORT2 +uregex_refreshUText(URegularExpression *regexp, + UText *text, + UErrorCode *status); + +/** + * Attempts to match the input string against the pattern. + * To succeed, the match must extend to the end of the string, + * or cover the complete match region. + * + * If startIndex >= zero the match operation starts at the specified + * index and must extend to the end of the input string. Any region + * that has been specified is reset. + * + * If startIndex == -1 the match must cover the input region, or the entire + * input string if no region has been set. This directly corresponds to + * Matcher.matches() in Java + * + * @param regexp The compiled regular expression. + * @param startIndex The input string (native) index at which to begin matching, or -1 + * to match the input Region. + * @param status Receives errors detected by this function. + * @return TRUE if there is a match + * @stable ICU 3.0 + */ +U_STABLE UBool U_EXPORT2 +uregex_matches(URegularExpression *regexp, + int32_t startIndex, + UErrorCode *status); + +/** + * 64bit version of uregex_matches. + * Attempts to match the input string against the pattern. + * To succeed, the match must extend to the end of the string, + * or cover the complete match region. + * + * If startIndex >= zero the match operation starts at the specified + * index and must extend to the end of the input string. Any region + * that has been specified is reset. + * + * If startIndex == -1 the match must cover the input region, or the entire + * input string if no region has been set. This directly corresponds to + * Matcher.matches() in Java + * + * @param regexp The compiled regular expression. + * @param startIndex The input string (native) index at which to begin matching, or -1 + * to match the input Region. + * @param status Receives errors detected by this function. + * @return TRUE if there is a match + * @stable ICU 4.6 + */ +U_STABLE UBool U_EXPORT2 +uregex_matches64(URegularExpression *regexp, + int64_t startIndex, + UErrorCode *status); + +/** + * Attempts to match the input string, starting from the specified index, against the pattern. + * The match may be of any length, and is not required to extend to the end + * of the input string. Contrast with uregex_matches(). + * + * <p>If startIndex is >= 0 any input region that was set for this + * URegularExpression is reset before the operation begins. + * + * <p>If the specified starting index == -1 the match begins at the start of the input + * region, or at the start of the full string if no region has been specified. + * This corresponds directly with Matcher.lookingAt() in Java. + * + * <p>If the match succeeds then more information can be obtained via the + * <code>uregexp_start()</code>, <code>uregexp_end()</code>, + * and <code>uregex_group()</code> functions.</p> + * + * @param regexp The compiled regular expression. + * @param startIndex The input string (native) index at which to begin matching, or + * -1 to match the Input Region + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if there is a match. + * @stable ICU 3.0 + */ +U_STABLE UBool U_EXPORT2 +uregex_lookingAt(URegularExpression *regexp, + int32_t startIndex, + UErrorCode *status); + +/** + * 64bit version of uregex_lookingAt. + * Attempts to match the input string, starting from the specified index, against the pattern. + * The match may be of any length, and is not required to extend to the end + * of the input string. Contrast with uregex_matches(). + * + * <p>If startIndex is >= 0 any input region that was set for this + * URegularExpression is reset before the operation begins. + * + * <p>If the specified starting index == -1 the match begins at the start of the input + * region, or at the start of the full string if no region has been specified. + * This corresponds directly with Matcher.lookingAt() in Java. + * + * <p>If the match succeeds then more information can be obtained via the + * <code>uregexp_start()</code>, <code>uregexp_end()</code>, + * and <code>uregex_group()</code> functions.</p> + * + * @param regexp The compiled regular expression. + * @param startIndex The input string (native) index at which to begin matching, or + * -1 to match the Input Region + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if there is a match. + * @stable ICU 4.6 + */ +U_STABLE UBool U_EXPORT2 +uregex_lookingAt64(URegularExpression *regexp, + int64_t startIndex, + UErrorCode *status); + +/** + * Find the first matching substring of the input string that matches the pattern. + * If startIndex is >= zero the search for a match begins at the specified index, + * and any match region is reset. This corresponds directly with + * Matcher.find(startIndex) in Java. + * + * If startIndex == -1 the search begins at the start of the input region, + * or at the start of the full string if no region has been specified. + * + * If a match is found, <code>uregex_start(), uregex_end()</code>, and + * <code>uregex_group()</code> will provide more information regarding the match. + * + * @param regexp The compiled regular expression. + * @param startIndex The position (native) in the input string to begin the search, or + * -1 to search within the Input Region. + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if a match is found. + * @stable ICU 3.0 + */ +U_STABLE UBool U_EXPORT2 +uregex_find(URegularExpression *regexp, + int32_t startIndex, + UErrorCode *status); + +/** + * 64bit version of uregex_find. + * Find the first matching substring of the input string that matches the pattern. + * If startIndex is >= zero the search for a match begins at the specified index, + * and any match region is reset. This corresponds directly with + * Matcher.find(startIndex) in Java. + * + * If startIndex == -1 the search begins at the start of the input region, + * or at the start of the full string if no region has been specified. + * + * If a match is found, <code>uregex_start(), uregex_end()</code>, and + * <code>uregex_group()</code> will provide more information regarding the match. + * + * @param regexp The compiled regular expression. + * @param startIndex The position (native) in the input string to begin the search, or + * -1 to search within the Input Region. + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if a match is found. + * @stable ICU 4.6 + */ +U_STABLE UBool U_EXPORT2 +uregex_find64(URegularExpression *regexp, + int64_t startIndex, + UErrorCode *status); + +/** + * Find the next pattern match in the input string. Begin searching + * the input at the location following the end of he previous match, + * or at the start of the string (or region) if there is no + * previous match. If a match is found, <code>uregex_start(), uregex_end()</code>, and + * <code>uregex_group()</code> will provide more information regarding the match. + * + * @param regexp The compiled regular expression. + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if a match is found. + * @see uregex_reset + * @stable ICU 3.0 + */ +U_STABLE UBool U_EXPORT2 +uregex_findNext(URegularExpression *regexp, + UErrorCode *status); + +/** + * Get the number of capturing groups in this regular expression's pattern. + * @param regexp The compiled regular expression. + * @param status A reference to a UErrorCode to receive any errors. + * @return the number of capture groups + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_groupCount(URegularExpression *regexp, + UErrorCode *status); + +/** + * Get the group number corresponding to a named capture group. + * The returned number can be used with any function that access + * capture groups by number. + * + * The function returns an error status if the specified name does not + * appear in the pattern. + * + * @param regexp The compiled regular expression. + * @param groupName The capture group name. + * @param nameLength The length of the name, or -1 if the name is a + * nul-terminated string. + * @param status A pointer to a UErrorCode to receive any errors. + * + * @stable ICU 55 + */ +U_STABLE int32_t U_EXPORT2 +uregex_groupNumberFromName(URegularExpression *regexp, + const UChar *groupName, + int32_t nameLength, + UErrorCode *status); + + +/** + * Get the group number corresponding to a named capture group. + * The returned number can be used with any function that access + * capture groups by number. + * + * The function returns an error status if the specified name does not + * appear in the pattern. + * + * @param regexp The compiled regular expression. + * @param groupName The capture group name, + * platform invariant characters only. + * @param nameLength The length of the name, or -1 if the name is + * nul-terminated. + * @param status A pointer to a UErrorCode to receive any errors. + * + * @stable ICU 55 + */ +U_STABLE int32_t U_EXPORT2 +uregex_groupNumberFromCName(URegularExpression *regexp, + const char *groupName, + int32_t nameLength, + UErrorCode *status); + +/** Extract the string for the specified matching expression or subexpression. + * Group #0 is the complete string of matched text. + * Group #1 is the text matched by the first set of capturing parentheses. + * + * @param regexp The compiled regular expression. + * @param groupNum The capture group to extract. Group 0 is the complete + * match. The value of this parameter must be + * less than or equal to the number of capture groups in + * the pattern. + * @param dest Buffer to receive the matching string data + * @param destCapacity Capacity of the dest buffer. + * @param status A reference to a UErrorCode to receive any errors. + * @return Length of matching data, + * or -1 if no applicable match. + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_group(URegularExpression *regexp, + int32_t groupNum, + UChar *dest, + int32_t destCapacity, + UErrorCode *status); + +/** Returns a shallow immutable clone of the entire input string with the current index set + * to the beginning of the requested capture group. The capture group length is also + * returned via groupLength. + * Group #0 is the complete string of matched text. + * Group #1 is the text matched by the first set of capturing parentheses. + * + * @param regexp The compiled regular expression. + * @param groupNum The capture group to extract. Group 0 is the complete + * match. The value of this parameter must be + * less than or equal to the number of capture groups in + * the pattern. + * @param dest A mutable UText in which to store the current input. + * If NULL, a new UText will be created as an immutable shallow clone + * of the entire input string. + * @param groupLength The group length of the desired capture group. Output parameter. + * @param status A reference to a UErrorCode to receive any errors. + * @return The subject text currently associated with this regular expression. + * If a pre-allocated UText was provided, it will always be used and returned. + + * + * @stable ICU 4.6 + */ +U_STABLE UText * U_EXPORT2 +uregex_groupUText(URegularExpression *regexp, + int32_t groupNum, + UText *dest, + int64_t *groupLength, + UErrorCode *status); + +/** + * Returns the index in the input string of the start of the text matched by the + * specified capture group during the previous match operation. Return -1 if + * the capture group was not part of the last match. + * Group #0 refers to the complete range of matched text. + * Group #1 refers to the text matched by the first set of capturing parentheses. + * + * @param regexp The compiled regular expression. + * @param groupNum The capture group number + * @param status A reference to a UErrorCode to receive any errors. + * @return the starting (native) position in the input of the text matched + * by the specified group. + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_start(URegularExpression *regexp, + int32_t groupNum, + UErrorCode *status); + +/** + * 64bit version of uregex_start. + * Returns the index in the input string of the start of the text matched by the + * specified capture group during the previous match operation. Return -1 if + * the capture group was not part of the last match. + * Group #0 refers to the complete range of matched text. + * Group #1 refers to the text matched by the first set of capturing parentheses. + * + * @param regexp The compiled regular expression. + * @param groupNum The capture group number + * @param status A reference to a UErrorCode to receive any errors. + * @return the starting (native) position in the input of the text matched + * by the specified group. + * @stable ICU 4.6 + */ +U_STABLE int64_t U_EXPORT2 +uregex_start64(URegularExpression *regexp, + int32_t groupNum, + UErrorCode *status); + +/** + * Returns the index in the input string of the position following the end + * of the text matched by the specified capture group. + * Return -1 if the capture group was not part of the last match. + * Group #0 refers to the complete range of matched text. + * Group #1 refers to the text matched by the first set of capturing parentheses. + * + * @param regexp The compiled regular expression. + * @param groupNum The capture group number + * @param status A reference to a UErrorCode to receive any errors. + * @return the (native) index of the position following the last matched character. + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_end(URegularExpression *regexp, + int32_t groupNum, + UErrorCode *status); + +/** + * 64bit version of uregex_end. + * Returns the index in the input string of the position following the end + * of the text matched by the specified capture group. + * Return -1 if the capture group was not part of the last match. + * Group #0 refers to the complete range of matched text. + * Group #1 refers to the text matched by the first set of capturing parentheses. + * + * @param regexp The compiled regular expression. + * @param groupNum The capture group number + * @param status A reference to a UErrorCode to receive any errors. + * @return the (native) index of the position following the last matched character. + * @stable ICU 4.6 + */ +U_STABLE int64_t U_EXPORT2 +uregex_end64(URegularExpression *regexp, + int32_t groupNum, + UErrorCode *status); + +/** + * Reset any saved state from the previous match. Has the effect of + * causing uregex_findNext to begin at the specified index, and causing + * uregex_start(), uregex_end() and uregex_group() to return an error + * indicating that there is no match information available. Clears any + * match region that may have been set. + * + * @param regexp The compiled regular expression. + * @param index The position (native) in the text at which a + * uregex_findNext() should begin searching. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 3.0 + */ +U_STABLE void U_EXPORT2 +uregex_reset(URegularExpression *regexp, + int32_t index, + UErrorCode *status); + +/** + * 64bit version of uregex_reset. + * Reset any saved state from the previous match. Has the effect of + * causing uregex_findNext to begin at the specified index, and causing + * uregex_start(), uregex_end() and uregex_group() to return an error + * indicating that there is no match information available. Clears any + * match region that may have been set. + * + * @param regexp The compiled regular expression. + * @param index The position (native) in the text at which a + * uregex_findNext() should begin searching. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_reset64(URegularExpression *regexp, + int64_t index, + UErrorCode *status); + +/** + * Sets the limits of the matching region for this URegularExpression. + * The region is the part of the input string that will be considered when matching. + * Invoking this method resets any saved state from the previous match, + * then sets the region to start at the index specified by the start parameter + * and end at the index specified by the end parameter. + * + * Depending on the transparency and anchoring being used (see useTransparentBounds + * and useAnchoringBounds), certain constructs such as anchors may behave differently + * at or around the boundaries of the region + * + * The function will fail if start is greater than limit, or if either index + * is less than zero or greater than the length of the string being matched. + * + * @param regexp The compiled regular expression. + * @param regionStart The (native) index to begin searches at. + * @param regionLimit The (native) index to end searches at (exclusive). + * @param status A pointer to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ +U_STABLE void U_EXPORT2 +uregex_setRegion(URegularExpression *regexp, + int32_t regionStart, + int32_t regionLimit, + UErrorCode *status); + +/** + * 64bit version of uregex_setRegion. + * Sets the limits of the matching region for this URegularExpression. + * The region is the part of the input string that will be considered when matching. + * Invoking this method resets any saved state from the previous match, + * then sets the region to start at the index specified by the start parameter + * and end at the index specified by the end parameter. + * + * Depending on the transparency and anchoring being used (see useTransparentBounds + * and useAnchoringBounds), certain constructs such as anchors may behave differently + * at or around the boundaries of the region + * + * The function will fail if start is greater than limit, or if either index + * is less than zero or greater than the length of the string being matched. + * + * @param regexp The compiled regular expression. + * @param regionStart The (native) index to begin searches at. + * @param regionLimit The (native) index to end searches at (exclusive). + * @param status A pointer to a UErrorCode to receive any errors. + * @stable ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_setRegion64(URegularExpression *regexp, + int64_t regionStart, + int64_t regionLimit, + UErrorCode *status); + +/** + * Set the matching region and the starting index for subsequent matches + * in a single operation. + * This is useful because the usual function for setting the starting + * index, urgex_reset(), also resets any region limits. + * + * @param regexp The compiled regular expression. + * @param regionStart The (native) index to begin searches at. + * @param regionLimit The (native) index to end searches at (exclusive). + * @param startIndex The index in the input text at which the next + * match operation should begin. + * @param status A pointer to a UErrorCode to receive any errors. + * @stable ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_setRegionAndStart(URegularExpression *regexp, + int64_t regionStart, + int64_t regionLimit, + int64_t startIndex, + UErrorCode *status); + +/** + * Reports the start index of the matching region. Any matches found are limited to + * to the region bounded by regionStart (inclusive) and regionEnd (exclusive). + * + * @param regexp The compiled regular expression. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The starting (native) index of this matcher's region. + * @stable ICU 4.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_regionStart(const URegularExpression *regexp, + UErrorCode *status); + +/** + * 64bit version of uregex_regionStart. + * Reports the start index of the matching region. Any matches found are limited to + * to the region bounded by regionStart (inclusive) and regionEnd (exclusive). + * + * @param regexp The compiled regular expression. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The starting (native) index of this matcher's region. + * @stable ICU 4.6 + */ +U_STABLE int64_t U_EXPORT2 +uregex_regionStart64(const URegularExpression *regexp, + UErrorCode *status); + +/** + * Reports the end index (exclusive) of the matching region for this URegularExpression. + * Any matches found are limited to to the region bounded by regionStart (inclusive) + * and regionEnd (exclusive). + * + * @param regexp The compiled regular expression. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The ending point (native) of this matcher's region. + * @stable ICU 4.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_regionEnd(const URegularExpression *regexp, + UErrorCode *status); + +/** + * 64bit version of uregex_regionEnd. + * Reports the end index (exclusive) of the matching region for this URegularExpression. + * Any matches found are limited to to the region bounded by regionStart (inclusive) + * and regionEnd (exclusive). + * + * @param regexp The compiled regular expression. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The ending point (native) of this matcher's region. + * @stable ICU 4.6 + */ +U_STABLE int64_t U_EXPORT2 +uregex_regionEnd64(const URegularExpression *regexp, + UErrorCode *status); + +/** + * Queries the transparency of region bounds for this URegularExpression. + * See useTransparentBounds for a description of transparent and opaque bounds. + * By default, matching boundaries are opaque. + * + * @param regexp The compiled regular expression. + * @param status A pointer to a UErrorCode to receive any errors. + * @return TRUE if this matcher is using opaque bounds, false if it is not. + * @stable ICU 4.0 + */ +U_STABLE UBool U_EXPORT2 +uregex_hasTransparentBounds(const URegularExpression *regexp, + UErrorCode *status); + + +/** + * Sets the transparency of region bounds for this URegularExpression. + * Invoking this function with an argument of TRUE will set matches to use transparent bounds. + * If the boolean argument is FALSE, then opaque bounds will be used. + * + * Using transparent bounds, the boundaries of the matching region are transparent + * to lookahead, lookbehind, and boundary matching constructs. Those constructs can + * see text beyond the boundaries of the region while checking for a match. + * + * With opaque bounds, no text outside of the matching region is visible to lookahead, + * lookbehind, and boundary matching constructs. + * + * By default, opaque bounds are used. + * + * @param regexp The compiled regular expression. + * @param b TRUE for transparent bounds; FALSE for opaque bounds + * @param status A pointer to a UErrorCode to receive any errors. + * @stable ICU 4.0 + **/ +U_STABLE void U_EXPORT2 +uregex_useTransparentBounds(URegularExpression *regexp, + UBool b, + UErrorCode *status); + + +/** + * Return true if this URegularExpression is using anchoring bounds. + * By default, anchoring region bounds are used. + * + * @param regexp The compiled regular expression. + * @param status A pointer to a UErrorCode to receive any errors. + * @return TRUE if this matcher is using anchoring bounds. + * @stable ICU 4.0 + */ +U_STABLE UBool U_EXPORT2 +uregex_hasAnchoringBounds(const URegularExpression *regexp, + UErrorCode *status); + + +/** + * Set whether this URegularExpression is using Anchoring Bounds for its region. + * With anchoring bounds, pattern anchors such as ^ and $ will match at the start + * and end of the region. Without Anchoring Bounds, anchors will only match at + * the positions they would in the complete text. + * + * Anchoring Bounds are the default for regions. + * + * @param regexp The compiled regular expression. + * @param b TRUE if to enable anchoring bounds; FALSE to disable them. + * @param status A pointer to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ +U_STABLE void U_EXPORT2 +uregex_useAnchoringBounds(URegularExpression *regexp, + UBool b, + UErrorCode *status); + +/** + * Return TRUE if the most recent matching operation touched the + * end of the text being processed. In this case, additional input text could + * change the results of that match. + * + * @param regexp The compiled regular expression. + * @param status A pointer to a UErrorCode to receive any errors. + * @return TRUE if the most recent match hit the end of input + * @stable ICU 4.0 + */ +U_STABLE UBool U_EXPORT2 +uregex_hitEnd(const URegularExpression *regexp, + UErrorCode *status); + +/** + * Return TRUE the most recent match succeeded and additional input could cause + * it to fail. If this function returns false and a match was found, then more input + * might change the match but the match won't be lost. If a match was not found, + * then requireEnd has no meaning. + * + * @param regexp The compiled regular expression. + * @param status A pointer to a UErrorCode to receive any errors. + * @return TRUE if more input could cause the most recent match to no longer match. + * @stable ICU 4.0 + */ +U_STABLE UBool U_EXPORT2 +uregex_requireEnd(const URegularExpression *regexp, + UErrorCode *status); + + + + + +/** + * Replaces every substring of the input that matches the pattern + * with the given replacement string. This is a convenience function that + * provides a complete find-and-replace-all operation. + * + * This method scans the input string looking for matches of the pattern. + * Input that is not part of any match is copied unchanged to the + * destination buffer. Matched regions are replaced in the output + * buffer by the replacement string. The replacement string may contain + * references to capture groups; these take the form of $1, $2, etc. + * + * @param regexp The compiled regular expression. + * @param replacementText A string containing the replacement text. + * @param replacementLength The length of the replacement string, or + * -1 if it is NUL terminated. + * @param destBuf A (UChar *) buffer that will receive the result. + * @param destCapacity The capacity of the destination buffer. + * @param status A reference to a UErrorCode to receive any errors. + * @return The length of the string resulting from the find + * and replace operation. In the event that the + * destination capacity is inadequate, the return value + * is still the full length of the untruncated string. + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_replaceAll(URegularExpression *regexp, + const UChar *replacementText, + int32_t replacementLength, + UChar *destBuf, + int32_t destCapacity, + UErrorCode *status); + +/** + * Replaces every substring of the input that matches the pattern + * with the given replacement string. This is a convenience function that + * provides a complete find-and-replace-all operation. + * + * This method scans the input string looking for matches of the pattern. + * Input that is not part of any match is copied unchanged to the + * destination buffer. Matched regions are replaced in the output + * buffer by the replacement string. The replacement string may contain + * references to capture groups; these take the form of $1, $2, etc. + * + * @param regexp The compiled regular expression. + * @param replacement A string containing the replacement text. + * @param dest A mutable UText that will receive the result. + * If NULL, a new UText will be created (which may not be mutable). + * @param status A reference to a UErrorCode to receive any errors. + * @return A UText containing the results of the find and replace. + * If a pre-allocated UText was provided, it will always be used and returned. + * + * @stable ICU 4.6 + */ +U_STABLE UText * U_EXPORT2 +uregex_replaceAllUText(URegularExpression *regexp, + UText *replacement, + UText *dest, + UErrorCode *status); + +/** + * Replaces the first substring of the input that matches the pattern + * with the given replacement string. This is a convenience function that + * provides a complete find-and-replace operation. + * + * This method scans the input string looking for a match of the pattern. + * All input that is not part of the match is copied unchanged to the + * destination buffer. The matched region is replaced in the output + * buffer by the replacement string. The replacement string may contain + * references to capture groups; these take the form of $1, $2, etc. + * + * @param regexp The compiled regular expression. + * @param replacementText A string containing the replacement text. + * @param replacementLength The length of the replacement string, or + * -1 if it is NUL terminated. + * @param destBuf A (UChar *) buffer that will receive the result. + * @param destCapacity The capacity of the destination buffer. + * @param status a reference to a UErrorCode to receive any errors. + * @return The length of the string resulting from the find + * and replace operation. In the event that the + * destination capacity is inadequate, the return value + * is still the full length of the untruncated string. + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_replaceFirst(URegularExpression *regexp, + const UChar *replacementText, + int32_t replacementLength, + UChar *destBuf, + int32_t destCapacity, + UErrorCode *status); + +/** + * Replaces the first substring of the input that matches the pattern + * with the given replacement string. This is a convenience function that + * provides a complete find-and-replace operation. + * + * This method scans the input string looking for a match of the pattern. + * All input that is not part of the match is copied unchanged to the + * destination buffer. The matched region is replaced in the output + * buffer by the replacement string. The replacement string may contain + * references to capture groups; these take the form of $1, $2, etc. + * + * @param regexp The compiled regular expression. + * @param replacement A string containing the replacement text. + * @param dest A mutable UText that will receive the result. + * If NULL, a new UText will be created (which may not be mutable). + * @param status A reference to a UErrorCode to receive any errors. + * @return A UText containing the results of the find and replace. + * If a pre-allocated UText was provided, it will always be used and returned. + * + * @stable ICU 4.6 + */ +U_STABLE UText * U_EXPORT2 +uregex_replaceFirstUText(URegularExpression *regexp, + UText *replacement, + UText *dest, + UErrorCode *status); + +/** + * Implements a replace operation intended to be used as part of an + * incremental find-and-replace. + * + * <p>The input string, starting from the end of the previous match and ending at + * the start of the current match, is appended to the destination string. Then the + * replacement string is appended to the output string, + * including handling any substitutions of captured text.</p> + * + * <p>A note on preflight computation of buffersize and error handling: + * Calls to uregex_appendReplacement() and uregex_appendTail() are + * designed to be chained, one after another, with the destination + * buffer pointer and buffer capacity updated after each in preparation + * to for the next. If the destination buffer is exhausted partway through such a + * sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal + * ICU conventions are for a function to perform no action if it is + * called with an error status, but for this one case, uregex_appendRepacement() + * will operate normally so that buffer size computations will complete + * correctly. + * + * <p>For simple, prepackaged, non-incremental find-and-replace + * operations, see replaceFirst() or replaceAll().</p> + * + * @param regexp The regular expression object. + * @param replacementText The string that will replace the matched portion of the + * input string as it is copied to the destination buffer. + * The replacement text may contain references ($1, for + * example) to capture groups from the match. + * @param replacementLength The length of the replacement text string, + * or -1 if the string is NUL terminated. + * @param destBuf The buffer into which the results of the + * find-and-replace are placed. On return, this pointer + * will be updated to refer to the beginning of the + * unused portion of buffer, leaving it in position for + * a subsequent call to this function. + * @param destCapacity The size of the output buffer, On return, this + * parameter will be updated to reflect the space remaining + * unused in the output buffer. + * @param status A reference to a UErrorCode to receive any errors. + * @return The length of the result string. In the event that + * destCapacity is inadequate, the full length of the + * untruncated output string is returned. + * + * @stable ICU 3.0 + * + */ +U_STABLE int32_t U_EXPORT2 +uregex_appendReplacement(URegularExpression *regexp, + const UChar *replacementText, + int32_t replacementLength, + UChar **destBuf, + int32_t *destCapacity, + UErrorCode *status); + +/** + * Implements a replace operation intended to be used as part of an + * incremental find-and-replace. + * + * <p>The input string, starting from the end of the previous match and ending at + * the start of the current match, is appended to the destination string. Then the + * replacement string is appended to the output string, + * including handling any substitutions of captured text.</p> + * + * <p>For simple, prepackaged, non-incremental find-and-replace + * operations, see replaceFirst() or replaceAll().</p> + * + * @param regexp The regular expression object. + * @param replacementText The string that will replace the matched portion of the + * input string as it is copied to the destination buffer. + * The replacement text may contain references ($1, for + * example) to capture groups from the match. + * @param dest A mutable UText that will receive the result. Must not be NULL. + * @param status A reference to a UErrorCode to receive any errors. + * + * @stable ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_appendReplacementUText(URegularExpression *regexp, + UText *replacementText, + UText *dest, + UErrorCode *status); + +/** + * As the final step in a find-and-replace operation, append the remainder + * of the input string, starting at the position following the last match, + * to the destination string. <code>uregex_appendTail()</code> is intended + * to be invoked after one or more invocations of the + * <code>uregex_appendReplacement()</code> function. + * + * @param regexp The regular expression object. This is needed to + * obtain the input string and with the position + * of the last match within it. + * @param destBuf The buffer in which the results of the + * find-and-replace are placed. On return, the pointer + * will be updated to refer to the beginning of the + * unused portion of buffer. + * @param destCapacity The size of the output buffer, On return, this + * value will be updated to reflect the space remaining + * unused in the output buffer. + * @param status A reference to a UErrorCode to receive any errors. + * @return The length of the result string. In the event that + * destCapacity is inadequate, the full length of the + * untruncated output string is returned. + * + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_appendTail(URegularExpression *regexp, + UChar **destBuf, + int32_t *destCapacity, + UErrorCode *status); + +/** + * As the final step in a find-and-replace operation, append the remainder + * of the input string, starting at the position following the last match, + * to the destination string. <code>uregex_appendTailUText()</code> is intended + * to be invoked after one or more invocations of the + * <code>uregex_appendReplacementUText()</code> function. + * + * @param regexp The regular expression object. This is needed to + * obtain the input string and with the position + * of the last match within it. + * @param dest A mutable UText that will receive the result. Must not be NULL. + * + * @param status Error code + * + * @return The destination UText. + * + * @stable ICU 4.6 + */ +U_STABLE UText * U_EXPORT2 +uregex_appendTailUText(URegularExpression *regexp, + UText *dest, + UErrorCode *status); + + /** + * Split a string into fields. Somewhat like split() from Perl. + * The pattern matches identify delimiters that separate the input + * into fields. The input data between the matches becomes the + * fields themselves. + * + * Each of the fields is copied from the input string to the destination + * buffer, and NUL terminated. The position of each field within + * the destination buffer is returned in the destFields array. + * + * If the delimiter pattern includes capture groups, the captured text will + * also appear in the destination array of output strings, interspersed + * with the fields. This is similar to Perl, but differs from Java, + * which ignores the presence of capture groups in the pattern. + * + * Trailing empty fields will always be returned, assuming sufficient + * destination capacity. This differs from the default behavior for Java + * and Perl where trailing empty fields are not returned. + * + * The number of strings produced by the split operation is returned. + * This count includes the strings from capture groups in the delimiter pattern. + * This behavior differs from Java, which ignores capture groups. + * + * @param regexp The compiled regular expression. + * @param destBuf A (UChar *) buffer to receive the fields that + * are extracted from the input string. These + * field pointers will refer to positions within the + * destination buffer supplied by the caller. Any + * extra positions within the destFields array will be + * set to NULL. + * @param destCapacity The capacity of the destBuf. + * @param requiredCapacity The actual capacity required of the destBuf. + * If destCapacity is too small, requiredCapacity will return + * the total capacity required to hold all of the output, and + * a U_BUFFER_OVERFLOW_ERROR will be returned. + * @param destFields An array to be filled with the position of each + * of the extracted fields within destBuf. + * @param destFieldsCapacity The number of elements in the destFields array. + * If the number of fields found is less than destFieldsCapacity, + * the extra destFields elements are set to zero. + * If destFieldsCapacity is too small, the trailing part of the + * input, including any field delimiters, is treated as if it + * were the last field - it is copied to the destBuf, and + * its position is in the destBuf is stored in the last element + * of destFields. This behavior mimics that of Perl. It is not + * an error condition, and no error status is returned when all destField + * positions are used. + * @param status A reference to a UErrorCode to receive any errors. + * @return The number of fields into which the input string was split. + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_split( URegularExpression *regexp, + UChar *destBuf, + int32_t destCapacity, + int32_t *requiredCapacity, + UChar *destFields[], + int32_t destFieldsCapacity, + UErrorCode *status); + + /** + * Split a string into fields. Somewhat like split() from Perl. + * The pattern matches identify delimiters that separate the input + * into fields. The input data between the matches becomes the + * fields themselves. + * <p> + * The behavior of this function is not very closely aligned with uregex_split(); + * instead, it is based on (and implemented directly on top of) the C++ split method. + * + * @param regexp The compiled regular expression. + * @param destFields An array of mutable UText structs to receive the results of the split. + * If a field is NULL, a new UText is allocated to contain the results for + * that field. This new UText is not guaranteed to be mutable. + * @param destFieldsCapacity The number of elements in the destination array. + * If the number of fields found is less than destCapacity, the + * extra strings in the destination array are not altered. + * If the number of destination strings is less than the number + * of fields, the trailing part of the input string, including any + * field delimiters, is placed in the last destination string. + * This behavior mimics that of Perl. It is not an error condition, and no + * error status is returned when all destField positions are used. + * @param status A reference to a UErrorCode to receive any errors. + * @return The number of fields into which the input string was split. + * + * @stable ICU 4.6 + */ +U_STABLE int32_t U_EXPORT2 +uregex_splitUText(URegularExpression *regexp, + UText *destFields[], + int32_t destFieldsCapacity, + UErrorCode *status); + +/** + * Set a processing time limit for match operations with this URegularExpression. + * + * Some patterns, when matching certain strings, can run in exponential time. + * For practical purposes, the match operation may appear to be in an + * infinite loop. + * When a limit is set a match operation will fail with an error if the + * limit is exceeded. + * <p> + * The units of the limit are steps of the match engine. + * Correspondence with actual processor time will depend on the speed + * of the processor and the details of the specific pattern, but will + * typically be on the order of milliseconds. + * <p> + * By default, the matching time is not limited. + * <p> + * + * @param regexp The compiled regular expression. + * @param limit The limit value, or 0 for no limit. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ +U_STABLE void U_EXPORT2 +uregex_setTimeLimit(URegularExpression *regexp, + int32_t limit, + UErrorCode *status); + +/** + * Get the time limit for for matches with this URegularExpression. + * A return value of zero indicates that there is no limit. + * + * @param regexp The compiled regular expression. + * @param status A reference to a UErrorCode to receive any errors. + * @return the maximum allowed time for a match, in units of processing steps. + * @stable ICU 4.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_getTimeLimit(const URegularExpression *regexp, + UErrorCode *status); + +/** + * Set the amount of heap storage available for use by the match backtracking stack. + * <p> + * ICU uses a backtracking regular expression engine, with the backtrack stack + * maintained on the heap. This function sets the limit to the amount of memory + * that can be used for this purpose. A backtracking stack overflow will + * result in an error from the match operation that caused it. + * <p> + * A limit is desirable because a malicious or poorly designed pattern can use + * excessive memory, potentially crashing the process. A limit is enabled + * by default. + * <p> + * @param regexp The compiled regular expression. + * @param limit The maximum size, in bytes, of the matching backtrack stack. + * A value of zero means no limit. + * The limit must be greater than or equal to zero. + * @param status A reference to a UErrorCode to receive any errors. + * + * @stable ICU 4.0 + */ +U_STABLE void U_EXPORT2 +uregex_setStackLimit(URegularExpression *regexp, + int32_t limit, + UErrorCode *status); + +/** + * Get the size of the heap storage available for use by the back tracking stack. + * + * @return the maximum backtracking stack size, in bytes, or zero if the + * stack size is unlimited. + * @stable ICU 4.0 + */ +U_STABLE int32_t U_EXPORT2 +uregex_getStackLimit(const URegularExpression *regexp, + UErrorCode *status); + + +/** + * Function pointer for a regular expression matching callback function. + * When set, a callback function will be called periodically during matching + * operations. If the call back function returns FALSE, the matching + * operation will be terminated early. + * + * Note: the callback function must not call other functions on this + * URegularExpression. + * + * @param context context pointer. The callback function will be invoked + * with the context specified at the time that + * uregex_setMatchCallback() is called. + * @param steps the accumulated processing time, in match steps, + * for this matching operation. + * @return TRUE to continue the matching operation. + * FALSE to terminate the matching operation. + * @stable ICU 4.0 + */ +U_CDECL_BEGIN +typedef UBool U_CALLCONV URegexMatchCallback ( + const void *context, + int32_t steps); +U_CDECL_END + +/** + * Set a callback function for this URegularExpression. + * During matching operations the function will be called periodically, + * giving the application the opportunity to terminate a long-running + * match. + * + * @param regexp The compiled regular expression. + * @param callback A pointer to the user-supplied callback function. + * @param context User context pointer. The value supplied at the + * time the callback function is set will be saved + * and passed to the callback each time that it is called. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ +U_STABLE void U_EXPORT2 +uregex_setMatchCallback(URegularExpression *regexp, + URegexMatchCallback *callback, + const void *context, + UErrorCode *status); + + +/** + * Get the callback function for this URegularExpression. + * + * @param regexp The compiled regular expression. + * @param callback Out parameter, receives a pointer to the user-supplied + * callback function. + * @param context Out parameter, receives the user context pointer that + * was set when uregex_setMatchCallback() was called. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.0 + */ +U_STABLE void U_EXPORT2 +uregex_getMatchCallback(const URegularExpression *regexp, + URegexMatchCallback **callback, + const void **context, + UErrorCode *status); + +/** + * Function pointer for a regular expression find callback function. + * + * When set, a callback function will be called during a find operation + * and for operations that depend on find, such as findNext, split and some replace + * operations like replaceFirst. + * The callback will usually be called after each attempt at a match, but this is not a + * guarantee that the callback will be invoked at each character. For finds where the + * match engine is invoked at each character, this may be close to true, but less likely + * for more optimized loops where the pattern is known to only start, and the match + * engine invoked, at certain characters. + * When invoked, this callback will specify the index at which a match operation is about + * to be attempted, giving the application the opportunity to terminate a long-running + * find operation. + * + * If the call back function returns FALSE, the find operation will be terminated early. + * + * Note: the callback function must not call other functions on this + * URegularExpression + * + * @param context context pointer. The callback function will be invoked + * with the context specified at the time that + * uregex_setFindProgressCallback() is called. + * @param matchIndex the next index at which a match attempt will be attempted for this + * find operation. If this callback interrupts the search, this is the + * index at which a find/findNext operation may be re-initiated. + * @return TRUE to continue the matching operation. + * FALSE to terminate the matching operation. + * @stable ICU 4.6 + */ +U_CDECL_BEGIN +typedef UBool U_CALLCONV URegexFindProgressCallback ( + const void *context, + int64_t matchIndex); +U_CDECL_END + + +/** + * Set the find progress callback function for this URegularExpression. + * + * @param regexp The compiled regular expression. + * @param callback A pointer to the user-supplied callback function. + * @param context User context pointer. The value supplied at the + * time the callback function is set will be saved + * and passed to the callback each time that it is called. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_setFindProgressCallback(URegularExpression *regexp, + URegexFindProgressCallback *callback, + const void *context, + UErrorCode *status); + +/** + * Get the find progress callback function for this URegularExpression. + * + * @param regexp The compiled regular expression. + * @param callback Out parameter, receives a pointer to the user-supplied + * callback function. + * @param context Out parameter, receives the user context pointer that + * was set when uregex_setFindProgressCallback() was called. + * @param status A reference to a UErrorCode to receive any errors. + * @stable ICU 4.6 + */ +U_STABLE void U_EXPORT2 +uregex_getFindProgressCallback(const URegularExpression *regexp, + URegexFindProgressCallback **callback, + const void **context, + UErrorCode *status); + +#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ +#endif /* UREGEX_H */ diff --git a/intl/icu/source/i18n/unicode/uregion.h b/intl/icu/source/i18n/unicode/uregion.h new file mode 100644 index 000000000..1b0f146e8 --- /dev/null +++ b/intl/icu/source/i18n/unicode/uregion.h @@ -0,0 +1,252 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************************** +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +***************************************************************************************** +*/ + +#ifndef UREGION_H +#define UREGION_H + +#include "unicode/utypes.h" +#include "unicode/uenum.h" + +/** + * \file + * \brief C API: URegion (territory containment and mapping) + * + * URegion objects represent data associated with a particular Unicode Region Code, also known as a + * Unicode Region Subtag, which is defined based upon the BCP 47 standard. These include: + * * Two-letter codes defined by ISO 3166-1, with special LDML treatment of certain private-use or + * reserved codes; + * * A subset of 3-digit numeric codes defined by UN M.49. + * URegion objects can also provide mappings to and from additional codes. There are different types + * of regions that are important to distinguish: + * <p> + * Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or + * selected economic and other grouping" as defined in UN M.49. These are typically 3-digit codes, + * but contain some 2-letter codes for LDML extensions, such as "QO" for Outlying Oceania. + * Macroregions are represented in ICU by one of three region types: WORLD (code 001), + * CONTINENTS (regions contained directly by WORLD), and SUBCONTINENTS (regions contained directly + * by a continent ). + * <p> + * TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also + * include areas that are not separate countries, such as the code "AQ" for Antarctica or the code + * "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate + * codes. The codes are typically 2-letter codes aligned with ISO 3166, but BCP47 allows for the use + * of 3-digit codes in the future. + * <p> + * UNKNOWN - The code ZZ is defined by Unicode LDML for use in indicating that region is unknown, + * or that the value supplied as a region was invalid. + * <p> + * DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage, + * usually due to a country splitting into multiple territories or changing its name. + * <p> + * GROUPING - A widely understood grouping of territories that has a well defined membership such + * that a region code has been assigned for it. Some of these are UN M.49 codes that don't fall into + * the world/continent/sub-continent hierarchy, while others are just well-known groupings that have + * their own region code. Region "EU" (European Union) is one such region code that is a grouping. + * Groupings will never be returned by the uregion_getContainingRegion, since a different type of region + * (WORLD, CONTINENT, or SUBCONTINENT) will always be the containing region instead. + * + * URegion objects are const/immutable, owned and maintained by ICU itself, so there are not functions + * to open or close them. + */ + +/** + * URegionType is an enumeration defining the different types of regions. Current possible + * values are URGN_WORLD, URGN_CONTINENT, URGN_SUBCONTINENT, URGN_TERRITORY, URGN_GROUPING, + * URGN_DEPRECATED, and URGN_UNKNOWN. + * + * @stable ICU 51 + */ +typedef enum URegionType { + /** + * Type representing the unknown region. + * @stable ICU 51 + */ + URGN_UNKNOWN, + + /** + * Type representing a territory. + * @stable ICU 51 + */ + URGN_TERRITORY, + + /** + * Type representing the whole world. + * @stable ICU 51 + */ + URGN_WORLD, + + /** + * Type representing a continent. + * @stable ICU 51 + */ + URGN_CONTINENT, + + /** + * Type representing a sub-continent. + * @stable ICU 51 + */ + URGN_SUBCONTINENT, + + /** + * Type representing a grouping of territories that is not to be used in + * the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree. + * @stable ICU 51 + */ + URGN_GROUPING, + + /** + * Type representing a region whose code has been deprecated, usually + * due to a country splitting into multiple territories or changing its name. + * @stable ICU 51 + */ + URGN_DEPRECATED, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal URegionType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + URGN_LIMIT +#endif // U_HIDE_DEPRECATED_API +} URegionType; + +#if !UCONFIG_NO_FORMATTING + +/** + * Opaque URegion object for use in C programs. + * @stable ICU 52 + */ +struct URegion; +typedef struct URegion URegion; /**< @stable ICU 52 */ + +/** + * Returns a pointer to a URegion for the specified region code: A 2-letter or 3-letter ISO 3166 + * code, UN M.49 numeric code (superset of ISO 3166 numeric codes), or other valid Unicode Region + * Code as defined by the LDML specification. The code will be canonicalized internally. If the + * region code is NULL or not recognized, the appropriate error code will be set + * (U_ILLEGAL_ARGUMENT_ERROR). + * @stable ICU 52 + */ +U_STABLE const URegion* U_EXPORT2 +uregion_getRegionFromCode(const char *regionCode, UErrorCode *status); + +/** + * Returns a pointer to a URegion for the specified numeric region code. If the numeric region + * code is not recognized, the appropriate error code will be set (U_ILLEGAL_ARGUMENT_ERROR). + * @stable ICU 52 + */ +U_STABLE const URegion* U_EXPORT2 +uregion_getRegionFromNumericCode (int32_t code, UErrorCode *status); + +/** + * Returns an enumeration over the canonical codes of all known regions that match the given type. + * The enumeration must be closed with with uenum_close(). + * @stable ICU 52 + */ +U_STABLE UEnumeration* U_EXPORT2 +uregion_getAvailable(URegionType type, UErrorCode *status); + +/** + * Returns true if the specified uregion is equal to the specified otherRegion. + * @stable ICU 52 + */ +U_STABLE UBool U_EXPORT2 +uregion_areEqual(const URegion* uregion, const URegion* otherRegion); + +/** + * Returns a pointer to the URegion that contains the specified uregion. Returns NULL if the + * specified uregion is code "001" (World) or "ZZ" (Unknown region). For example, calling + * this method with region "IT" (Italy) returns the URegion for "039" (Southern Europe). + * @stable ICU 52 + */ +U_STABLE const URegion* U_EXPORT2 +uregion_getContainingRegion(const URegion* uregion); + +/** + * Return a pointer to the URegion that geographically contains this uregion and matches the + * specified type, moving multiple steps up the containment chain if necessary. Returns NULL if no + * containing region can be found that matches the specified type. Will return NULL if URegionType + * is URGN_GROUPING, URGN_DEPRECATED, or URGN_UNKNOWN which are not appropriate for this API. + * For example, calling this method with uregion "IT" (Italy) for type URGN_CONTINENT returns the + * URegion "150" (Europe). + * @stable ICU 52 + */ +U_STABLE const URegion* U_EXPORT2 +uregion_getContainingRegionOfType(const URegion* uregion, URegionType type); + +/** + * Return an enumeration over the canonical codes of all the regions that are immediate children + * of the specified uregion in the region hierarchy. These returned regions could be either macro + * regions, territories, or a mixture of the two, depending on the containment data as defined in + * CLDR. This API returns NULL if this uregion doesn't have any sub-regions. For example, calling + * this function for uregion "150" (Europe) returns an enumeration containing the various + * sub-regions of Europe: "039" (Southern Europe), "151" (Eastern Europe), "154" (Northern Europe), + * and "155" (Western Europe). The enumeration must be closed with with uenum_close(). + * @stable ICU 52 + */ +U_STABLE UEnumeration* U_EXPORT2 +uregion_getContainedRegions(const URegion* uregion, UErrorCode *status); + +/** + * Returns an enumeration over the canonical codes of all the regions that are children of the + * specified uregion anywhere in the region hierarchy and match the given type. This API may return + * an empty enumeration if this uregion doesn't have any sub-regions that match the given type. + * For example, calling this method with region "150" (Europe) and type URGN_TERRITORY" returns an + * enumeration containing all the territories in Europe: "FR" (France), "IT" (Italy), "DE" (Germany), + * etc. The enumeration must be closed with with uenum_close(). + * @stable ICU 52 + */ +U_STABLE UEnumeration* U_EXPORT2 +uregion_getContainedRegionsOfType(const URegion* uregion, URegionType type, UErrorCode *status); + +/** + * Returns true if the specified uregion contains the specified otherRegion anywhere in the region + * hierarchy. + * @stable ICU 52 + */ +U_STABLE UBool U_EXPORT2 +uregion_contains(const URegion* uregion, const URegion* otherRegion); + +/** + * If the specified uregion is deprecated, returns an enumeration over the canonical codes of the + * regions that are the preferred replacement regions for the specified uregion. If the specified + * uregion is not deprecated, returns NULL. For example, calling this method with uregion + * "SU" (Soviet Union) returns a list of the regions containing "RU" (Russia), "AM" (Armenia), + * "AZ" (Azerbaijan), etc... The enumeration must be closed with with uenum_close(). + * @stable ICU 52 + */ +U_STABLE UEnumeration* U_EXPORT2 +uregion_getPreferredValues(const URegion* uregion, UErrorCode *status); + +/** + * Returns the specified uregion's canonical code. + * @stable ICU 52 + */ +U_STABLE const char* U_EXPORT2 +uregion_getRegionCode(const URegion* uregion); + +/** + * Returns the specified uregion's numeric code, or a negative value if there is no numeric code + * for the specified uregion. + * @stable ICU 52 + */ +U_STABLE int32_t U_EXPORT2 +uregion_getNumericCode(const URegion* uregion); + +/** + * Returns the URegionType of the specified uregion. + * @stable ICU 52 + */ +U_STABLE URegionType U_EXPORT2 +uregion_getType(const URegion* uregion); + + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/intl/icu/source/i18n/unicode/ureldatefmt.h b/intl/icu/source/i18n/unicode/ureldatefmt.h new file mode 100644 index 000000000..40b3d5931 --- /dev/null +++ b/intl/icu/source/i18n/unicode/ureldatefmt.h @@ -0,0 +1,371 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************************** +* Copyright (C) 2016, International Business Machines +* Corporation and others. All Rights Reserved. +***************************************************************************************** +*/ + +#ifndef URELDATEFMT_H +#define URELDATEFMT_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/unum.h" +#include "unicode/udisplaycontext.h" +#include "unicode/localpointer.h" + +/** + * \file + * \brief C API: URelativeDateTimeFormatter, relative date formatting of unit + numeric offset. + * + * Provides simple formatting of relative dates, in two ways + * <ul> + * <li>relative dates with a quantity e.g "in 5 days"</li> + * <li>relative dates without a quantity e.g "next Tuesday"</li> + * </ul> + * <p> + * This does not provide compound formatting for multiple units, + * other than the ability to combine a time string with a relative date, + * as in "next Tuesday at 3:45 PM". It also does not provide support + * for determining which unit to use, such as deciding between "in 7 days" + * and "in 1 week". + * + * @draft ICU 57 + */ + +/** + * The formatting style + * @stable ICU 54 + */ +typedef enum UDateRelativeDateTimeFormatterStyle { + /** + * Everything spelled out. + * @stable ICU 54 + */ + UDAT_STYLE_LONG, + + /** + * Abbreviations used when possible. + * @stable ICU 54 + */ + UDAT_STYLE_SHORT, + + /** + * Use the shortest possible form. + * @stable ICU 54 + */ + UDAT_STYLE_NARROW, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UDateRelativeDateTimeFormatterStyle value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDAT_STYLE_COUNT +#endif // U_HIDE_DEPRECATED_API +} UDateRelativeDateTimeFormatterStyle; + +#ifndef U_HIDE_DRAFT_API +/** + * Represents the unit for formatting a relative date. e.g "in 5 days" + * or "next year" + * @draft ICU 57 + */ +typedef enum URelativeDateTimeUnit { + /** + * Specifies that relative unit is year, e.g. "last year", + * "in 5 years". + * @draft ICU 57 + */ + UDAT_REL_UNIT_YEAR, + /** + * Specifies that relative unit is quarter, e.g. "last quarter", + * "in 5 quarters". + * @draft ICU 57 + */ + UDAT_REL_UNIT_QUARTER, + /** + * Specifies that relative unit is month, e.g. "last month", + * "in 5 months". + * @draft ICU 57 + */ + UDAT_REL_UNIT_MONTH, + /** + * Specifies that relative unit is week, e.g. "last week", + * "in 5 weeks". + * @draft ICU 57 + */ + UDAT_REL_UNIT_WEEK, + /** + * Specifies that relative unit is day, e.g. "yesterday", + * "in 5 days". + * @draft ICU 57 + */ + UDAT_REL_UNIT_DAY, + /** + * Specifies that relative unit is hour, e.g. "1 hour ago", + * "in 5 hours". + * @draft ICU 57 + */ + UDAT_REL_UNIT_HOUR, + /** + * Specifies that relative unit is minute, e.g. "1 minute ago", + * "in 5 minutes". + * @draft ICU 57 + */ + UDAT_REL_UNIT_MINUTE, + /** + * Specifies that relative unit is second, e.g. "1 second ago", + * "in 5 seconds". + * @draft ICU 57 + */ + UDAT_REL_UNIT_SECOND, + /** + * Specifies that relative unit is Sunday, e.g. "last Sunday", + * "this Sunday", "next Sunday", "in 5 Sundays". + * @draft ICU 57 + */ + UDAT_REL_UNIT_SUNDAY, + /** + * Specifies that relative unit is Monday, e.g. "last Monday", + * "this Monday", "next Monday", "in 5 Mondays". + * @draft ICU 57 + */ + UDAT_REL_UNIT_MONDAY, + /** + * Specifies that relative unit is Tuesday, e.g. "last Tuesday", + * "this Tuesday", "next Tuesday", "in 5 Tuesdays". + * @draft ICU 57 + */ + UDAT_REL_UNIT_TUESDAY, + /** + * Specifies that relative unit is Wednesday, e.g. "last Wednesday", + * "this Wednesday", "next Wednesday", "in 5 Wednesdays". + * @draft ICU 57 + */ + UDAT_REL_UNIT_WEDNESDAY, + /** + * Specifies that relative unit is Thursday, e.g. "last Thursday", + * "this Thursday", "next Thursday", "in 5 Thursdays". + * @draft ICU 57 + */ + UDAT_REL_UNIT_THURSDAY, + /** + * Specifies that relative unit is Friday, e.g. "last Friday", + * "this Friday", "next Friday", "in 5 Fridays". + * @draft ICU 57 + */ + UDAT_REL_UNIT_FRIDAY, + /** + * Specifies that relative unit is Saturday, e.g. "last Saturday", + * "this Saturday", "next Saturday", "in 5 Saturdays". + * @draft ICU 57 + */ + UDAT_REL_UNIT_SATURDAY, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal URelativeDateTimeUnit value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDAT_REL_UNIT_COUNT +#endif // U_HIDE_DEPRECATED_API +} URelativeDateTimeUnit; +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DRAFT_API + +/** + * Opaque URelativeDateTimeFormatter object for use in C programs. + * @draft ICU 57 + */ +struct URelativeDateTimeFormatter; +typedef struct URelativeDateTimeFormatter URelativeDateTimeFormatter; /**< C typedef for struct URelativeDateTimeFormatter. @draft ICU 57 */ + + +/** + * Open a new URelativeDateTimeFormatter object for a given locale using the + * specified width and capitalizationContext, along with a number formatter + * (if desired) to override the default formatter that would be used for + * display of numeric field offsets. The default formatter typically rounds + * toward 0 and has a minimum of 0 fraction digits and a maximum of 3 + * fraction digits (i.e. it will show as many decimal places as necessary + * up to 3, without showing trailing 0s). + * + * @param locale + * The locale + * @param nfToAdopt + * A number formatter to set for this URelativeDateTimeFormatter + * object (instead of the default decimal formatter). Ownership of + * this UNumberFormat object will pass to the URelativeDateTimeFormatter + * object (the URelativeDateTimeFormatter adopts the UNumberFormat), + * which becomes responsible for closing it. If the caller wishes to + * retain ownership of the UNumberFormat object, the caller must clone + * it (with unum_clone) and pass the clone to ureldatefmt_open. May be + * NULL to use the default decimal formatter. + * @param width + * The width - wide, short, narrow, etc. + * @param capitalizationContext + * A value from UDisplayContext that pertains to capitalization, e.g. + * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE. + * @param status + * A pointer to a UErrorCode to receive any errors. + * @return + * A pointer to a URelativeDateTimeFormatter object for the specified locale, + * or NULL if an error occurred. + * @draft ICU 57 + */ +U_DRAFT URelativeDateTimeFormatter* U_EXPORT2 +ureldatefmt_open( const char* locale, + UNumberFormat* nfToAdopt, + UDateRelativeDateTimeFormatterStyle width, + UDisplayContext capitalizationContext, + UErrorCode* status ); + +/** + * Close a URelativeDateTimeFormatter object. Once closed it may no longer be used. + * @param reldatefmt + * The URelativeDateTimeFormatter object to close. + * @draft ICU 57 + */ +U_DRAFT void U_EXPORT2 +ureldatefmt_close(URelativeDateTimeFormatter *reldatefmt); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalURelativeDateTimeFormatterPointer + * "Smart pointer" class, closes a URelativeDateTimeFormatter via ureldatefmt_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 57 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalURelativeDateTimeFormatterPointer, URelativeDateTimeFormatter, ureldatefmt_close); + +U_NAMESPACE_END + +#endif + +/** + * Format a combination of URelativeDateTimeUnit and numeric + * offset using a numeric style, e.g. "1 week ago", "in 1 week", + * "5 weeks ago", "in 5 weeks". + * + * @param reldatefmt + * The URelativeDateTimeFormatter object specifying the + * format conventions. + * @param offset + * The signed offset for the specified unit. This will + * be formatted according to this object's UNumberFormat + * object. + * @param unit + * The unit to use when formatting the relative + * date, e.g. UDAT_REL_UNIT_WEEK, UDAT_REL_UNIT_FRIDAY. + * @param result + * A pointer to a buffer to receive the formatted result. + * @param resultCapacity + * The maximum size of result. + * @param status + * A pointer to a UErrorCode to receive any errors. In + * case of error status, the contents of result are + * undefined. + * @return + * The length of the formatted result; may be greater + * than resultCapacity, in which case an error is returned. + * @draft ICU 57 + */ +U_DRAFT int32_t U_EXPORT2 +ureldatefmt_formatNumeric( const URelativeDateTimeFormatter* reldatefmt, + double offset, + URelativeDateTimeUnit unit, + UChar* result, + int32_t resultCapacity, + UErrorCode* status); + +/** + * Format a combination of URelativeDateTimeUnit and numeric offset + * using a text style if possible, e.g. "last week", "this week", + * "next week", "yesterday", "tomorrow". Falls back to numeric + * style if no appropriate text term is available for the specified + * offset in the object's locale. + * + * @param reldatefmt + * The URelativeDateTimeFormatter object specifying the + * format conventions. + * @param offset + * The signed offset for the specified unit. + * @param unit + * The unit to use when formatting the relative + * date, e.g. UDAT_REL_UNIT_WEEK, UDAT_REL_UNIT_FRIDAY. + * @param result + * A pointer to a buffer to receive the formatted result. + * @param resultCapacity + * The maximum size of result. + * @param status + * A pointer to a UErrorCode to receive any errors. In + * case of error status, the contents of result are + * undefined. + * @return + * The length of the formatted result; may be greater + * than resultCapacity, in which case an error is returned. + * @draft ICU 57 + */ +U_DRAFT int32_t U_EXPORT2 +ureldatefmt_format( const URelativeDateTimeFormatter* reldatefmt, + double offset, + URelativeDateTimeUnit unit, + UChar* result, + int32_t resultCapacity, + UErrorCode* status); + +/** + * Combines a relative date string and a time string in this object's + * locale. This is done with the same date-time separator used for the + * default calendar in this locale to produce a result such as + * "yesterday at 3:45 PM". + * + * @param reldatefmt + * The URelativeDateTimeFormatter object specifying the format conventions. + * @param relativeDateString + * The relative date string. + * @param relativeDateStringLen + * The length of relativeDateString; may be -1 if relativeDateString + * is zero-terminated. + * @param timeString + * The time string. + * @param timeStringLen + * The length of timeString; may be -1 if timeString is zero-terminated. + * @param result + * A pointer to a buffer to receive the formatted result. + * @param resultCapacity + * The maximum size of result. + * @param status + * A pointer to a UErrorCode to receive any errors. In case of error status, + * the contents of result are undefined. + * @return + * The length of the formatted result; may be greater than resultCapacity, + * in which case an error is returned. + * @draft ICU 57 + */ +U_DRAFT int32_t U_EXPORT2 +ureldatefmt_combineDateAndTime( const URelativeDateTimeFormatter* reldatefmt, + const UChar * relativeDateString, + int32_t relativeDateStringLen, + const UChar * timeString, + int32_t timeStringLen, + UChar* result, + int32_t resultCapacity, + UErrorCode* status ); + +#endif /* U_HIDE_DRAFT_API */ + +#endif /* !UCONFIG_NO_FORMATTING && !UCONFIG_NO_BREAK_ITERATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/usearch.h b/intl/icu/source/i18n/unicode/usearch.h new file mode 100644 index 000000000..b1d53f512 --- /dev/null +++ b/intl/icu/source/i18n/unicode/usearch.h @@ -0,0 +1,891 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2011,2014 IBM and others. All rights reserved. +********************************************************************** +* Date Name Description +* 06/28/2001 synwee Creation. +********************************************************************** +*/ +#ifndef USEARCH_H +#define USEARCH_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/localpointer.h" +#include "unicode/ucol.h" +#include "unicode/ucoleitr.h" +#include "unicode/ubrk.h" + +/** + * \file + * \brief C API: StringSearch + * + * C Apis for an engine that provides language-sensitive text searching based + * on the comparison rules defined in a <tt>UCollator</tt> data struct, + * see <tt>ucol.h</tt>. This ensures that language eccentricity can be + * handled, e.g. for the German collator, characters ß and SS will be matched + * if case is chosen to be ignored. + * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> + * "ICU Collation Design Document"</a> for more information. + * <p> + * The implementation may use a linear search or a modified form of the Boyer-Moore + * search; for more information on the latter see + * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html"> + * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i> + * in February, 1999. + * <p> + * There are 2 match options for selection:<br> + * Let S' be the sub-string of a text string S between the offsets start and + * end <start, end>. + * <br> + * A pattern string P matches a text string S at the offsets <start, end> + * if + * <pre> + * option 1. Some canonical equivalent of P matches some canonical equivalent + * of S' + * option 2. P matches S' and if P starts or ends with a combining mark, + * there exists no non-ignorable combining mark before or after S' + * in S respectively. + * </pre> + * Option 2. will be the default. + * <p> + * This search has APIs similar to that of other text iteration mechanisms + * such as the break iterators in <tt>ubrk.h</tt>. Using these + * APIs, it is easy to scan through text looking for all occurances of + * a given pattern. This search iterator allows changing of direction by + * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. + * Though a direction change can occur without calling <tt>reset</tt> first, + * this operation comes with some speed penalty. + * Generally, match results in the forward direction will match the result + * matches in the backwards direction in the reverse order + * <p> + * <tt>usearch.h</tt> provides APIs to specify the starting position + * within the text string to be searched, e.g. <tt>usearch_setOffset</tt>, + * <tt>usearch_preceding</tt> and <tt>usearch_following</tt>. Since the + * starting position will be set as it is specified, please take note that + * there are some dangerous positions which the search may render incorrect + * results: + * <ul> + * <li> The midst of a substring that requires normalization. + * <li> If the following match is to be found, the position should not be the + * second character which requires to be swapped with the preceding + * character. Vice versa, if the preceding match is to be found, + * position to search from should not be the first character which + * requires to be swapped with the next character. E.g certain Thai and + * Lao characters require swapping. + * <li> If a following pattern match is to be found, any position within a + * contracting sequence except the first will fail. Vice versa if a + * preceding pattern match is to be found, a invalid starting point + * would be any character within a contracting sequence except the last. + * </ul> + * <p> + * A breakiterator can be used if only matches at logical breaks are desired. + * Using a breakiterator will only give you results that exactly matches the + * boundaries given by the breakiterator. For instance the pattern "e" will + * not be found in the string "\u00e9" if a character break iterator is used. + * <p> + * Options are provided to handle overlapping matches. + * E.g. In English, overlapping matches produces the result 0 and 2 + * for the pattern "abab" in the text "ababab", where else mutually + * exclusive matches only produce the result of 0. + * <p> + * Options are also provided to implement "asymmetric search" as described in + * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search"> + * UTS #10 Unicode Collation Algorithm</a>, specifically the USearchAttribute + * USEARCH_ELEMENT_COMPARISON and its values. + * <p> + * Though collator attributes will be taken into consideration while + * performing matches, there are no APIs here for setting and getting the + * attributes. These attributes can be set by getting the collator + * from <tt>usearch_getCollator</tt> and using the APIs in <tt>ucol.h</tt>. + * Lastly to update String Search to the new collator attributes, + * usearch_reset() has to be called. + * <p> + * Restriction: <br> + * Currently there are no composite characters that consists of a + * character with combining class > 0 before a character with combining + * class == 0. However, if such a character exists in the future, the + * search mechanism does not guarantee the results for option 1. + * + * <p> + * Example of use:<br> + * <pre><code> + * char *tgtstr = "The quick brown fox jumped over the lazy fox"; + * char *patstr = "fox"; + * UChar target[64]; + * UChar pattern[16]; + * UErrorCode status = U_ZERO_ERROR; + * u_uastrcpy(target, tgtstr); + * u_uastrcpy(pattern, patstr); + * + * UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US", + * NULL, &status); + * if (U_SUCCESS(status)) { + * for (int pos = usearch_first(search, &status); + * pos != USEARCH_DONE; + * pos = usearch_next(search, &status)) + * { + * printf("Found match at %d pos, length is %d\n", pos, + * usearch_getMatchLength(search)); + * } + * } + * + * usearch_close(search); + * </code></pre> + * @stable ICU 2.4 + */ + +/** +* DONE is returned by previous() and next() after all valid matches have +* been returned, and by first() and last() if there are no matches at all. +* @stable ICU 2.4 +*/ +#define USEARCH_DONE -1 + +/** +* Data structure for searching +* @stable ICU 2.4 +*/ +struct UStringSearch; +/** +* Data structure for searching +* @stable ICU 2.4 +*/ +typedef struct UStringSearch UStringSearch; + +/** +* @stable ICU 2.4 +*/ +typedef enum { + /** + * Option for overlapping matches + * @stable ICU 2.4 + */ + USEARCH_OVERLAP = 0, +#ifndef U_HIDE_DEPRECATED_API + /** + * Option for canonical matches; option 1 in header documentation. + * The default value will be USEARCH_OFF. + * Note: Setting this option to USEARCH_ON currently has no effect on + * search behavior, and this option is deprecated. Instead, to control + * canonical match behavior, you must set UCOL_NORMALIZATION_MODE + * appropriately (to UCOL_OFF or UCOL_ON) in the UCollator used by + * the UStringSearch object. + * @see usearch_openFromCollator + * @see usearch_getCollator + * @see usearch_setCollator + * @see ucol_getAttribute + * @deprecated ICU 53 + */ + USEARCH_CANONICAL_MATCH = 1, +#endif /* U_HIDE_DEPRECATED_API */ + /** + * Option to control how collation elements are compared. + * The default value will be USEARCH_STANDARD_ELEMENT_COMPARISON. + * @stable ICU 4.4 + */ + USEARCH_ELEMENT_COMPARISON = 2, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal USearchAttribute value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + USEARCH_ATTRIBUTE_COUNT = 3 +#endif // U_HIDE_DEPRECATED_API +} USearchAttribute; + +/** +* @stable ICU 2.4 +*/ +typedef enum { + /** + * Default value for any USearchAttribute + * @stable ICU 2.4 + */ + USEARCH_DEFAULT = -1, + /** + * Value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH + * @stable ICU 2.4 + */ + USEARCH_OFF, + /** + * Value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH + * @stable ICU 2.4 + */ + USEARCH_ON, + /** + * Value (default) for USEARCH_ELEMENT_COMPARISON; + * standard collation element comparison at the specified collator + * strength. + * @stable ICU 4.4 + */ + USEARCH_STANDARD_ELEMENT_COMPARISON, + /** + * Value for USEARCH_ELEMENT_COMPARISON; + * collation element comparison is modified to effectively provide + * behavior between the specified strength and strength - 1. Collation + * elements in the pattern that have the base weight for the specified + * strength are treated as "wildcards" that match an element with any + * other weight at that collation level in the searched text. For + * example, with a secondary-strength English collator, a plain 'e' in + * the pattern will match a plain e or an e with any diacritic in the + * searched text, but an e with diacritic in the pattern will only + * match an e with the same diacritic in the searched text. + * + * This supports "asymmetric search" as described in + * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search"> + * UTS #10 Unicode Collation Algorithm</a>. + * + * @stable ICU 4.4 + */ + USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, + /** + * Value for USEARCH_ELEMENT_COMPARISON. + * collation element comparison is modified to effectively provide + * behavior between the specified strength and strength - 1. Collation + * elements in either the pattern or the searched text that have the + * base weight for the specified strength are treated as "wildcards" + * that match an element with any other weight at that collation level. + * For example, with a secondary-strength English collator, a plain 'e' + * in the pattern will match a plain e or an e with any diacritic in the + * searched text, but an e with diacritic in the pattern will only + * match an e with the same diacritic or a plain e in the searched text. + * + * This option is similar to "asymmetric search" as described in + * <a href="http://www.unicode.org/reports/tr10/#Asymmetric_Search"> + * UTS #10 Unicode Collation Algorithm</a, but also allows unmarked + * characters in the searched text to match marked or unmarked versions of + * that character in the pattern. + * + * @stable ICU 4.4 + */ + USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal USearchAttributeValue value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + USEARCH_ATTRIBUTE_VALUE_COUNT +#endif // U_HIDE_DEPRECATED_API +} USearchAttributeValue; + +/* open and close ------------------------------------------------------ */ + +/** +* Creating a search iterator data struct using the argument locale language +* rule set. A collator will be created in the process, which will be owned by +* this search and will be deleted in <tt>usearch_close</tt>. +* @param pattern for matching +* @param patternlength length of the pattern, -1 for null-termination +* @param text text string +* @param textlength length of the text string, -1 for null-termination +* @param locale name of locale for the rules to be used +* @param breakiter A BreakIterator that will be used to restrict the points +* at which matches are detected. If a match is found, but +* the match's start or end index is not a boundary as +* determined by the <tt>BreakIterator</tt>, the match will +* be rejected and another will be searched for. +* If this parameter is <tt>NULL</tt>, no break detection is +* attempted. +* @param status for errors if it occurs. If pattern or text is NULL, or if +* patternlength or textlength is 0 then an +* U_ILLEGAL_ARGUMENT_ERROR is returned. +* @return search iterator data structure, or NULL if there is an error. +* @stable ICU 2.4 +*/ +U_STABLE UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern, + int32_t patternlength, + const UChar *text, + int32_t textlength, + const char *locale, + UBreakIterator *breakiter, + UErrorCode *status); + +/** +* Creating a search iterator data struct using the argument collator language +* rule set. Note, user retains the ownership of this collator, thus the +* responsibility of deletion lies with the user. +* NOTE: string search cannot be instantiated from a collator that has +* collate digits as numbers (CODAN) turned on. +* @param pattern for matching +* @param patternlength length of the pattern, -1 for null-termination +* @param text text string +* @param textlength length of the text string, -1 for null-termination +* @param collator used for the language rules +* @param breakiter A BreakIterator that will be used to restrict the points +* at which matches are detected. If a match is found, but +* the match's start or end index is not a boundary as +* determined by the <tt>BreakIterator</tt>, the match will +* be rejected and another will be searched for. +* If this parameter is <tt>NULL</tt>, no break detection is +* attempted. +* @param status for errors if it occurs. If collator, pattern or text is NULL, +* or if patternlength or textlength is 0 then an +* U_ILLEGAL_ARGUMENT_ERROR is returned. +* @return search iterator data structure, or NULL if there is an error. +* @stable ICU 2.4 +*/ +U_STABLE UStringSearch * U_EXPORT2 usearch_openFromCollator( + const UChar *pattern, + int32_t patternlength, + const UChar *text, + int32_t textlength, + const UCollator *collator, + UBreakIterator *breakiter, + UErrorCode *status); + +/** +* Destroying and cleaning up the search iterator data struct. +* If a collator is created in <tt>usearch_open</tt>, it will be destroyed here. +* @param searchiter data struct to clean up +* @stable ICU 2.4 +*/ +U_STABLE void U_EXPORT2 usearch_close(UStringSearch *searchiter); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUStringSearchPointer + * "Smart pointer" class, closes a UStringSearch via usearch_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringSearchPointer, UStringSearch, usearch_close); + +U_NAMESPACE_END + +#endif + +/* get and set methods -------------------------------------------------- */ + +/** +* Sets the current position in the text string which the next search will +* start from. Clears previous states. +* This method takes the argument index and sets the position in the text +* string accordingly without checking if the index is pointing to a +* valid starting point to begin searching. +* Search positions that may render incorrect results are highlighted in the +* header comments +* @param strsrch search iterator data struct +* @param position position to start next search from. If position is less +* than or greater than the text range for searching, +* an U_INDEX_OUTOFBOUNDS_ERROR will be returned +* @param status error status if any. +* @stable ICU 2.4 +*/ +U_STABLE void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, + int32_t position, + UErrorCode *status); + +/** +* Return the current index in the string text being searched. +* If the iteration has gone past the end of the text (or past the beginning +* for a backwards search), <tt>USEARCH_DONE</tt> is returned. +* @param strsrch search iterator data struct +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch); + +/** +* Sets the text searching attributes located in the enum USearchAttribute +* with values from the enum USearchAttributeValue. +* <tt>USEARCH_DEFAULT</tt> can be used for all attributes for resetting. +* @param strsrch search iterator data struct +* @param attribute text attribute to be set +* @param value text attribute value +* @param status for errors if it occurs +* @see #usearch_getAttribute +* @stable ICU 2.4 +*/ +U_STABLE void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch, + USearchAttribute attribute, + USearchAttributeValue value, + UErrorCode *status); + +/** +* Gets the text searching attributes. +* @param strsrch search iterator data struct +* @param attribute text attribute to be retrieve +* @return text attribute value +* @see #usearch_setAttribute +* @stable ICU 2.4 +*/ +U_STABLE USearchAttributeValue U_EXPORT2 usearch_getAttribute( + const UStringSearch *strsrch, + USearchAttribute attribute); + +/** +* Returns the index to the match in the text string that was searched. +* This call returns a valid result only after a successful call to +* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, +* or <tt>usearch_last</tt>. +* Just after construction, or after a searching method returns +* <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>. +* <p> +* Use <tt>usearch_getMatchedLength</tt> to get the matched string length. +* @param strsrch search iterator data struct +* @return index to a substring within the text string that is being +* searched. +* @see #usearch_first +* @see #usearch_next +* @see #usearch_previous +* @see #usearch_last +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_getMatchedStart( + const UStringSearch *strsrch); + +/** +* Returns the length of text in the string which matches the search pattern. +* This call returns a valid result only after a successful call to +* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, +* or <tt>usearch_last</tt>. +* Just after construction, or after a searching method returns +* <tt>USEARCH_DONE</tt>, this method will return 0. +* @param strsrch search iterator data struct +* @return The length of the match in the string text, or 0 if there is no +* match currently. +* @see #usearch_first +* @see #usearch_next +* @see #usearch_previous +* @see #usearch_last +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength( + const UStringSearch *strsrch); + +/** +* Returns the text that was matched by the most recent call to +* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>, +* or <tt>usearch_last</tt>. +* If the iterator is not pointing at a valid match (e.g. just after +* construction or after <tt>USEARCH_DONE</tt> has been returned, returns +* an empty string. If result is not large enough to store the matched text, +* result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR +* will be returned in status. result will be null-terminated whenever +* possible. If the buffer fits the matched text exactly, a null-termination +* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status. +* Pre-flighting can be either done with length = 0 or the API +* <tt>usearch_getMatchLength</tt>. +* @param strsrch search iterator data struct +* @param result UChar buffer to store the matched string +* @param resultCapacity length of the result buffer +* @param status error returned if result is not large enough +* @return exact length of the matched text, not counting the null-termination +* @see #usearch_first +* @see #usearch_next +* @see #usearch_previous +* @see #usearch_last +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch, + UChar *result, + int32_t resultCapacity, + UErrorCode *status); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** +* Set the BreakIterator that will be used to restrict the points at which +* matches are detected. +* @param strsrch search iterator data struct +* @param breakiter A BreakIterator that will be used to restrict the points +* at which matches are detected. If a match is found, but +* the match's start or end index is not a boundary as +* determined by the <tt>BreakIterator</tt>, the match will +* be rejected and another will be searched for. +* If this parameter is <tt>NULL</tt>, no break detection is +* attempted. +* @param status for errors if it occurs +* @see #usearch_getBreakIterator +* @stable ICU 2.4 +*/ +U_STABLE void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch, + UBreakIterator *breakiter, + UErrorCode *status); + +/** +* Returns the BreakIterator that is used to restrict the points at which +* matches are detected. This will be the same object that was passed to the +* constructor or to <tt>usearch_setBreakIterator</tt>. Note that +* <tt>NULL</tt> +* is a legal value; it means that break detection should not be attempted. +* @param strsrch search iterator data struct +* @return break iterator used +* @see #usearch_setBreakIterator +* @stable ICU 2.4 +*/ +U_STABLE const UBreakIterator * U_EXPORT2 usearch_getBreakIterator( + const UStringSearch *strsrch); + +#endif + +/** +* Set the string text to be searched. Text iteration will hence begin at the +* start of the text string. This method is useful if you want to re-use an +* iterator to search for the same pattern within a different body of text. +* @param strsrch search iterator data struct +* @param text new string to look for match +* @param textlength length of the new string, -1 for null-termination +* @param status for errors if it occurs. If text is NULL, or textlength is 0 +* then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change +* done to strsrch. +* @see #usearch_getText +* @stable ICU 2.4 +*/ +U_STABLE void U_EXPORT2 usearch_setText( UStringSearch *strsrch, + const UChar *text, + int32_t textlength, + UErrorCode *status); + +/** +* Return the string text to be searched. +* @param strsrch search iterator data struct +* @param length returned string text length +* @return string text +* @see #usearch_setText +* @stable ICU 2.4 +*/ +U_STABLE const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch, + int32_t *length); + +/** +* Gets the collator used for the language rules. +* <p> +* Deleting the returned <tt>UCollator</tt> before calling +* <tt>usearch_close</tt> would cause the string search to fail. +* <tt>usearch_close</tt> will delete the collator if this search owns it. +* @param strsrch search iterator data struct +* @return collator +* @stable ICU 2.4 +*/ +U_STABLE UCollator * U_EXPORT2 usearch_getCollator( + const UStringSearch *strsrch); + +/** +* Sets the collator used for the language rules. User retains the ownership +* of this collator, thus the responsibility of deletion lies with the user. +* This method causes internal data such as Boyer-Moore shift tables to +* be recalculated, but the iterator's position is unchanged. +* @param strsrch search iterator data struct +* @param collator to be used +* @param status for errors if it occurs +* @stable ICU 2.4 +*/ +U_STABLE void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch, + const UCollator *collator, + UErrorCode *status); + +/** +* Sets the pattern used for matching. +* Internal data like the Boyer Moore table will be recalculated, but the +* iterator's position is unchanged. +* @param strsrch search iterator data struct +* @param pattern string +* @param patternlength pattern length, -1 for null-terminated string +* @param status for errors if it occurs. If text is NULL, or textlength is 0 +* then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change +* done to strsrch. +* @stable ICU 2.4 +*/ +U_STABLE void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch, + const UChar *pattern, + int32_t patternlength, + UErrorCode *status); + +/** +* Gets the search pattern +* @param strsrch search iterator data struct +* @param length return length of the pattern, -1 indicates that the pattern +* is null-terminated +* @return pattern string +* @stable ICU 2.4 +*/ +U_STABLE const UChar * U_EXPORT2 usearch_getPattern( + const UStringSearch *strsrch, + int32_t *length); + +/* methods ------------------------------------------------------------- */ + +/** +* Returns the first index at which the string text matches the search +* pattern. +* The iterator is adjusted so that its current index (as returned by +* <tt>usearch_getOffset</tt>) is the match position if one was found. +* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and +* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>. +* @param strsrch search iterator data struct +* @param status for errors if it occurs +* @return The character index of the first match, or +* <tt>USEARCH_DONE</tt> if there are no matches. +* @see #usearch_getOffset +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch, + UErrorCode *status); + +/** +* Returns the first index equal or greater than <tt>position</tt> at which +* the string text +* matches the search pattern. The iterator is adjusted so that its current +* index (as returned by <tt>usearch_getOffset</tt>) is the match position if +* one was found. +* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and +* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt> +* <p> +* Search positions that may render incorrect results are highlighted in the +* header comments. If position is less than or greater than the text range +* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned +* @param strsrch search iterator data struct +* @param position to start the search at +* @param status for errors if it occurs +* @return The character index of the first match following <tt>pos</tt>, +* or <tt>USEARCH_DONE</tt> if there are no matches. +* @see #usearch_getOffset +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch, + int32_t position, + UErrorCode *status); + +/** +* Returns the last index in the target text at which it matches the search +* pattern. The iterator is adjusted so that its current +* index (as returned by <tt>usearch_getOffset</tt>) is the match position if +* one was found. +* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and +* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>. +* @param strsrch search iterator data struct +* @param status for errors if it occurs +* @return The index of the first match, or <tt>USEARCH_DONE</tt> if there +* are no matches. +* @see #usearch_getOffset +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch, + UErrorCode *status); + +/** +* Returns the first index less than <tt>position</tt> at which the string text +* matches the search pattern. The iterator is adjusted so that its current +* index (as returned by <tt>usearch_getOffset</tt>) is the match position if +* one was found. +* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and +* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt> +* <p> +* Search positions that may render incorrect results are highlighted in the +* header comments. If position is less than or greater than the text range +* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned. +* <p> +* When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the +* result match is always less than <tt>position</tt>. +* When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across +* <tt>position</tt>. +* @param strsrch search iterator data struct +* @param position index position the search is to begin at +* @param status for errors if it occurs +* @return The character index of the first match preceding <tt>pos</tt>, +* or <tt>USEARCH_DONE</tt> if there are no matches. +* @see #usearch_getOffset +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch, + int32_t position, + UErrorCode *status); + +/** +* Returns the index of the next point at which the string text matches the +* search pattern, starting from the current position. +* The iterator is adjusted so that its current +* index (as returned by <tt>usearch_getOffset</tt>) is the match position if +* one was found. +* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and +* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt> +* @param strsrch search iterator data struct +* @param status for errors if it occurs +* @return The index of the next match after the current position, or +* <tt>USEARCH_DONE</tt> if there are no more matches. +* @see #usearch_first +* @see #usearch_getOffset +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, + UErrorCode *status); + +/** +* Returns the index of the previous point at which the string text matches +* the search pattern, starting at the current position. +* The iterator is adjusted so that its current +* index (as returned by <tt>usearch_getOffset</tt>) is the match position if +* one was found. +* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and +* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt> +* @param strsrch search iterator data struct +* @param status for errors if it occurs +* @return The index of the previous match before the current position, +* or <tt>USEARCH_DONE</tt> if there are no more matches. +* @see #usearch_last +* @see #usearch_getOffset +* @see #USEARCH_DONE +* @stable ICU 2.4 +*/ +U_STABLE int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch, + UErrorCode *status); + +/** +* Reset the iteration. +* Search will begin at the start of the text string if a forward iteration +* is initiated before a backwards iteration. Otherwise if a backwards +* iteration is initiated before a forwards iteration, the search will begin +* at the end of the text string. +* @param strsrch search iterator data struct +* @see #usearch_first +* @stable ICU 2.4 +*/ +U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch); + +#ifndef U_HIDE_INTERNAL_API +/** + * Simple forward search for the pattern, starting at a specified index, + * and using using a default set search options. + * + * This is an experimental function, and is not an official part of the + * ICU API. + * + * The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored. + * + * The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and + * any Break Iterator are ignored. + * + * Matches obey the following constraints: + * + * Characters at the start or end positions of a match that are ignorable + * for collation are not included as part of the match, unless they + * are part of a combining sequence, as described below. + * + * A match will not include a partial combining sequence. Combining + * character sequences are considered to be inseperable units, + * and either match the pattern completely, or are considered to not match + * at all. Thus, for example, an A followed a combining accent mark will + * not be found when searching for a plain (unaccented) A. (unless + * the collation strength has been set to ignore all accents). + * + * When beginning a search, the initial starting position, startIdx, + * is assumed to be an acceptable match boundary with respect to + * combining characters. A combining sequence that spans across the + * starting point will not supress a match beginning at startIdx. + * + * Characters that expand to multiple collation elements + * (German sharp-S becoming 'ss', or the composed forms of accented + * characters, for example) also must match completely. + * Searching for a single 's' in a string containing only a sharp-s will + * find no match. + * + * + * @param strsrch the UStringSearch struct, which references both + * the text to be searched and the pattern being sought. + * @param startIdx The index into the text to begin the search. + * @param matchStart An out parameter, the starting index of the matched text. + * This parameter may be NULL. + * A value of -1 will be returned if no match was found. + * @param matchLimit Out parameter, the index of the first position following the matched text. + * The matchLimit will be at a suitable position for beginning a subsequent search + * in the input text. + * This parameter may be NULL. + * A value of -1 will be returned if no match was found. + * + * @param status Report any errors. Note that no match found is not an error. + * @return TRUE if a match was found, FALSE otherwise. + * + * @internal + */ +U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch, + int32_t startIdx, + int32_t *matchStart, + int32_t *matchLimit, + UErrorCode *status); + +/** + * Simple backwards search for the pattern, starting at a specified index, + * and using using a default set search options. + * + * This is an experimental function, and is not an official part of the + * ICU API. + * + * The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored. + * + * The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and + * any Break Iterator are ignored. + * + * Matches obey the following constraints: + * + * Characters at the start or end positions of a match that are ignorable + * for collation are not included as part of the match, unless they + * are part of a combining sequence, as described below. + * + * A match will not include a partial combining sequence. Combining + * character sequences are considered to be inseperable units, + * and either match the pattern completely, or are considered to not match + * at all. Thus, for example, an A followed a combining accent mark will + * not be found when searching for a plain (unaccented) A. (unless + * the collation strength has been set to ignore all accents). + * + * When beginning a search, the initial starting position, startIdx, + * is assumed to be an acceptable match boundary with respect to + * combining characters. A combining sequence that spans across the + * starting point will not supress a match beginning at startIdx. + * + * Characters that expand to multiple collation elements + * (German sharp-S becoming 'ss', or the composed forms of accented + * characters, for example) also must match completely. + * Searching for a single 's' in a string containing only a sharp-s will + * find no match. + * + * + * @param strsrch the UStringSearch struct, which references both + * the text to be searched and the pattern being sought. + * @param startIdx The index into the text to begin the search. + * @param matchStart An out parameter, the starting index of the matched text. + * This parameter may be NULL. + * A value of -1 will be returned if no match was found. + * @param matchLimit Out parameter, the index of the first position following the matched text. + * The matchLimit will be at a suitable position for beginning a subsequent search + * in the input text. + * This parameter may be NULL. + * A value of -1 will be returned if no match was found. + * + * @param status Report any errors. Note that no match found is not an error. + * @return TRUE if a match was found, FALSE otherwise. + * + * @internal + */ +U_INTERNAL UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch, + int32_t startIdx, + int32_t *matchStart, + int32_t *matchLimit, + UErrorCode *status); +#endif /* U_HIDE_INTERNAL_API */ + +#endif /* #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/uspoof.h b/intl/icu/source/i18n/unicode/uspoof.h new file mode 100644 index 000000000..40b73380c --- /dev/null +++ b/intl/icu/source/i18n/unicode/uspoof.h @@ -0,0 +1,1581 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +*************************************************************************** +* Copyright (C) 2008-2016, International Business Machines Corporation +* and others. All Rights Reserved. +*************************************************************************** +* file name: uspoof.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2008Feb13 +* created by: Andy Heninger +* +* Unicode Spoof Detection +*/ + +#ifndef USPOOF_H +#define USPOOF_H + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "unicode/parseerr.h" +#include "unicode/localpointer.h" + +#if !UCONFIG_NO_NORMALIZATION + + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/unistr.h" +#include "unicode/uniset.h" +#endif + + +/** + * \file + * \brief Unicode Security and Spoofing Detection, C API. + * + * <p> + * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and + * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions: + * + * <ol> + * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and + * "Ηarvest", where the second string starts with the Greek capital letter Eta.</li> + * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof + * detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li> + * </ol> + * + * <p> + * Although originally designed as a method for flagging suspicious identifier strings such as URLs, + * <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word + * content filters. + * + * <p> + * The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++. + * + * <h2>Confusables</h2> + * + * <p> + * The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings: + * + * \code{.c} + * UErrorCode status = U_ZERO_ERROR; + * UChar* str1 = (UChar*) u"Harvest"; + * UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA + * + * USpoofChecker* sc = uspoof_open(&status); + * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status); + * + * int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status); + * UBool result = bitmask != 0; + * // areConfusable: 1 (status: U_ZERO_ERROR) + * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status)); + * uspoof_close(sc); + * \endcode + * + * <p> + * The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks} + * enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the + * confusability test; and the following line extracts the result out of the return value. For best performance, + * the instance should be created once (e.g., upon application startup), and the efficient + * {@link uspoof_areConfusable} method can be used at runtime. + * + * <p> + * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers. It will automatically call + * {@link uspoof_close} when the object goes out of scope: + * + * \code{.cpp} + * UErrorCode status = U_ZERO_ERROR; + * LocalUSpoofCheckerPointer sc(uspoof_open(&status)); + * uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status); + * // ... + * \endcode + * + * <p> + * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can + * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so + * the following snippet is equivalent to the example above: + * + * \code{.c} + * UErrorCode status = U_ZERO_ERROR; + * UChar* str1 = (UChar*) u"Harvest"; + * UChar* str2 = (UChar*) u"\u0397arvest"; // with U+0397 GREEK CAPITAL LETTER ETA + * + * USpoofChecker* sc = uspoof_open(&status); + * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status); + * + * // Get skeleton 1 + * int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status); + * UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar)); + * status = U_ZERO_ERROR; + * uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status); + * + * // Get skeleton 2 + * int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status); + * UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar)); + * status = U_ZERO_ERROR; + * uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status); + * + * // Are the skeletons the same? + * UBool result = u_strcmp(skel1, skel2) == 0; + * // areConfusable: 1 (status: U_ZERO_ERROR) + * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status)); + * uspoof_close(sc); + * free(skel1); + * free(skel2); + * \endcode + * + * <p> + * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling + * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below: + * + * \code{.c} + * UErrorCode status = U_ZERO_ERROR; + * #define DICTIONARY_LENGTH 2 + * UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" }; + * UChar* skeletons[DICTIONARY_LENGTH]; + * UChar* str = (UChar*) u"1orern"; + * + * // Setup: + * USpoofChecker* sc = uspoof_open(&status); + * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status); + * for (size_t i=0; i<DICTIONARY_LENGTH; i++) { + * UChar* word = dictionary[i]; + * int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status); + * skeletons[i] = (UChar*) malloc(++len * sizeof(UChar)); + * status = U_ZERO_ERROR; + * uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status); + * } + * + * // Live Check: + * { + * int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status); + * UChar* skel = (UChar*) malloc(++len * sizeof(UChar)); + * status = U_ZERO_ERROR; + * uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status); + * UBool result = FALSE; + * for (size_t i=0; i<DICTIONARY_LENGTH; i++) { + * result = u_strcmp(skel, skeletons[i]) == 0; + * if (result == TRUE) { break; } + * } + * // Has confusable in dictionary: 1 (status: U_ZERO_ERROR) + * printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status)); + * free(skel); + * } + * + * for (size_t i=0; i<DICTIONARY_LENGTH; i++) { + * free(skeletons[i]); + * } + * uspoof_close(sc); + * \endcode + * + * <p> + * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em> + * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons + * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons. + * + * <h2>Spoof Detection</h2> + * + * <p> + * The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a + * string: + * + * \code{.c} + * UErrorCode status = U_ZERO_ERROR; + * UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A + * + * // Get the default set of allowable characters: + * USet* allowed = uset_openEmpty(); + * uset_addAll(allowed, uspoof_getRecommendedSet(&status)); + * uset_addAll(allowed, uspoof_getInclusionSet(&status)); + * + * USpoofChecker* sc = uspoof_open(&status); + * uspoof_setAllowedChars(sc, allowed, &status); + * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE); + * + * int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status); + * UBool result = bitmask != 0; + * // fails checks: 1 (status: U_ZERO_ERROR) + * printf("fails checks: %d (status: %s)\n", result, u_errorName(status)); + * uspoof_close(sc); + * uset_close(allowed); + * \endcode + * + * <p> + * As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at + * startup, and call the cheaper {@link uspoof_check} online. We specify the set of + * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39. + * + * <p> + * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings, + * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers. + * + * <p> + * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks + * is available in the returned bitmask. For complete information, use the {@link uspoof_check2} class of functions + * with a {@link USpoofCheckResult} parameter: + * + * \code{.c} + * UErrorCode status = U_ZERO_ERROR; + * UChar* str = (UChar*) u"p\u0430ypal"; // with U+0430 CYRILLIC SMALL LETTER A + * + * // Get the default set of allowable characters: + * USet* allowed = uset_openEmpty(); + * uset_addAll(allowed, uspoof_getRecommendedSet(&status)); + * uset_addAll(allowed, uspoof_getInclusionSet(&status)); + * + * USpoofChecker* sc = uspoof_open(&status); + * uspoof_setAllowedChars(sc, allowed, &status); + * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE); + * + * USpoofCheckResult* checkResult = uspoof_openCheckResult(&status); + * int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status); + * + * int32_t failures1 = bitmask; + * int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status); + * assert(failures1 == failures2); + * // checks that failed: 0x00000010 (status: U_ZERO_ERROR) + * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status)); + * + * // Cleanup: + * uspoof_close(sc); + * uset_close(allowed); + * uspoof_closeCheckResult(checkResult); + * \endcode + * + * C++ users can take advantage of a few syntactical conveniences. The following snippet is functionally + * equivalent to the one above: + * + * \code{.cpp} + * UErrorCode status = U_ZERO_ERROR; + * UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A + * + * // Get the default set of allowable characters: + * UnicodeSet allowed; + * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status)); + * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status)); + * + * LocalUSpoofCheckerPointer sc(uspoof_open(&status)); + * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status); + * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE); + * + * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status)); + * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status); + * + * int32_t failures1 = bitmask; + * int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status); + * assert(failures1 == failures2); + * // checks that failed: 0x00000010 (status: U_ZERO_ERROR) + * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status)); + * + * // Explicit cleanup not necessary. + * \endcode + * + * <p> + * The return value is a bitmask of the checks that failed. In this case, there was one check that failed: + * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are: + * + * <ul> + * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the + * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS + * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li> + * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character + * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li> + * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable + * characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li> + * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li> + * </ul> + * + * <p> + * These checks can be enabled independently of each other. For example, if you were interested in checking for only the + * INVISIBLE and MIXED_NUMBERS conditions, you could do: + * + * \code{.c} + * UErrorCode status = U_ZERO_ERROR; + * UChar* str = (UChar*) u"8\u09EA"; // 8 mixed with U+09EA BENGALI DIGIT FOUR + * + * USpoofChecker* sc = uspoof_open(&status); + * uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status); + * + * int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status); + * UBool result = bitmask != 0; + * // fails checks: 1 (status: U_ZERO_ERROR) + * printf("fails checks: %d (status: %s)\n", result, u_errorName(status)); + * uspoof_close(sc); + * \endcode + * + * <p> + * Here is an example in C++ showing how to compute the restriction level of a string: + * + * \code{.cpp} + * UErrorCode status = U_ZERO_ERROR; + * UnicodeString str((UChar*) u"p\u0430ypal"); // with U+0430 CYRILLIC SMALL LETTER A + * + * // Get the default set of allowable characters: + * UnicodeSet allowed; + * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status)); + * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status)); + * + * LocalUSpoofCheckerPointer sc(uspoof_open(&status)); + * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status); + * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE); + * uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status); + * + * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status)); + * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status); + * + * URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status); + * // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask: + * assert((restrictionLevel & bitmask) == restrictionLevel); + * // Restriction level: 0x50000000 (status: U_ZERO_ERROR) + * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status)); + * \endcode + * + * <p> + * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE. Since + * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check. + * + * <p> + * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in + * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings + * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have + * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is + * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed + * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on + * the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of + * allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code + * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple + * scripts. + * + * <h2>Additional Information</h2> + * + * <p> + * A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers. + * + * <p> + * <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether + * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads, + * using the same USpoofChecker instance. + * + * <p> + * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are + * thread safe. Those that take a non-const USpoofChecker are not thread safe.. + * + * @stable ICU 4.6 + */ + +struct USpoofChecker; +typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */ + +#ifndef U_HIDE_DRAFT_API +/** + * @see uspoof_openCheckResult + */ +struct USpoofCheckResult; +/** + * @see uspoof_openCheckResult + */ +typedef struct USpoofCheckResult USpoofCheckResult; +#endif /* U_HIDE_DRAFT_API */ + +/** + * Enum for the kinds of checks that USpoofChecker can perform. + * These enum values are used both to select the set of checks that + * will be performed, and to report results from the check function. + * + * @stable ICU 4.2 + */ +typedef enum USpoofChecks { + /** + * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates + * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section + * 4. + * + * @see uspoof_areConfusable + * @stable ICU 4.2 + */ + USPOOF_SINGLE_SCRIPT_CONFUSABLE = 1, + + /** + * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates + * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS + * 39 section 4. + * + * @see uspoof_areConfusable + * @stable ICU 4.2 + */ + USPOOF_MIXED_SCRIPT_CONFUSABLE = 2, + + /** + * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates + * that the two strings are visually confusable and that they are not from the same script but both of them are + * single-script strings, according to UTS 39 section 4. + * + * @see uspoof_areConfusable + * @stable ICU 4.2 + */ + USPOOF_WHOLE_SCRIPT_CONFUSABLE = 4, + +#ifndef U_HIDE_DRAFT_API + /** + * Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables. You may set + * the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to + * make {@link uspoof_areConfusable} return only those types of confusables. + * + * @see uspoof_areConfusable + * @see uspoof_getSkeleton + * @draft ICU 58 + */ + USPOOF_CONFUSABLE = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * This flag is deprecated and no longer affects the behavior of SpoofChecker. + * + * @deprecated ICU 58 Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated. + */ + USPOOF_ANY_CASE = 8, +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Check that an identifier is no looser than the specified RestrictionLevel. + * The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE. + * + * If USPOOF_AUX_INFO is enabled the actual restriction level of the + * identifier being tested will also be returned by uspoof_check(). + * + * @see URestrictionLevel + * @see uspoof_setRestrictionLevel + * @see USPOOF_AUX_INFO + * + * @stable ICU 51 + */ + USPOOF_RESTRICTION_LEVEL = 16, + +#ifndef U_HIDE_DEPRECATED_API + /** Check that an identifier contains only characters from a + * single script (plus chars from the common and inherited scripts.) + * Applies to checks of a single identifier check only. + * @deprecated ICU 51 Use RESTRICTION_LEVEL instead. + */ + USPOOF_SINGLE_SCRIPT = USPOOF_RESTRICTION_LEVEL, +#endif /* U_HIDE_DEPRECATED_API */ + + /** Check an identifier for the presence of invisible characters, + * such as zero-width spaces, or character sequences that are + * likely not to display, such as multiple occurrences of the same + * non-spacing mark. This check does not test the input string as a whole + * for conformance to any particular syntax for identifiers. + */ + USPOOF_INVISIBLE = 32, + + /** Check that an identifier contains only characters from a specified set + * of acceptable characters. See {@link uspoof_setAllowedChars} and + * {@link uspoof_setAllowedLocales}. Note that a string that fails this check + * will also fail the {@link USPOOF_RESTRICTION_LEVEL} check. + */ + USPOOF_CHAR_LIMIT = 64, + + /** + * Check that an identifier does not mix numbers from different numbering systems. + * For more information, see UTS 39 section 5.3. + * + * @stable ICU 51 + */ + USPOOF_MIXED_NUMBERS = 128, + + /** + * Enable all spoof checks. + * + * @stable ICU 4.6 + */ + USPOOF_ALL_CHECKS = 0xFFFF, + + /** + * Enable the return of auxillary (non-error) information in the + * upper bits of the check results value. + * + * If this "check" is not enabled, the results of {@link uspoof_check} will be + * zero when an identifier passes all of the enabled checks. + * + * If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will + * be zero when an identifier passes all checks. + * + * @stable ICU 51 + */ + USPOOF_AUX_INFO = 0x40000000 + + } USpoofChecks; + + + /** + * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and + * for returned identifier restriction levels in check results. + * + * @stable ICU 51 + * + * @see uspoof_setRestrictionLevel + * @see uspoof_check + */ + typedef enum URestrictionLevel { + /** + * All characters in the string are in the identifier profile and all characters in the string are in the + * ASCII range. + * + * @stable ICU 51 + */ + USPOOF_ASCII = 0x10000000, + /** + * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and + * the string is single-script, according to the definition in UTS 39 section 5.1. + * + * @stable ICU 53 + */ + USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000, + /** + * The string classifies as Single Script, or all characters in the string are in the identifier profile and + * the string is covered by any of the following sets of scripts, according to the definition in UTS 39 + * section 5.1: + * <ul> + * <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li> + * <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li> + * <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li> + * </ul> + * This is the default restriction in ICU. + * + * @stable ICU 51 + */ + USPOOF_HIGHLY_RESTRICTIVE = 0x30000000, + /** + * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile + * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic, + * Greek, and Cherokee. + * + * @stable ICU 51 + */ + USPOOF_MODERATELY_RESTRICTIVE = 0x40000000, + /** + * All characters in the string are in the identifier profile. Allow arbitrary mixtures of scripts. + * + * @stable ICU 51 + */ + USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000, + /** + * Any valid identifiers, including characters outside of the Identifier Profile. + * + * @stable ICU 51 + */ + USPOOF_UNRESTRICTIVE = 0x60000000, + /** + * Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}. + * + * @stable ICU 53 + */ + USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000, +#ifndef U_HIDE_INTERNAL_API + /** + * An undefined restriction level. + * @internal + */ + USPOOF_UNDEFINED_RESTRICTIVE = -1 +#endif /* U_HIDE_INTERNAL_API */ + } URestrictionLevel; + +/** + * Create a Unicode Spoof Checker, configured to perform all + * checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT. + * Note that additional checks may be added in the future, + * resulting in the changes to the default checking behavior. + * + * @param status The error code, set if this function encounters a problem. + * @return the newly created Spoof Checker + * @stable ICU 4.2 + */ +U_STABLE USpoofChecker * U_EXPORT2 +uspoof_open(UErrorCode *status); + + +/** + * Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory. + * Inverse of uspoof_serialize(). + * The memory containing the serialized data must remain valid and unchanged + * as long as the spoof checker, or any cloned copies of the spoof checker, + * are in use. Ownership of the memory remains with the caller. + * The spoof checker (and any clones) must be closed prior to deleting the + * serialized data. + * + * @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data + * @param length the number of bytes available at data; + * can be more than necessary + * @param pActualLength receives the actual number of bytes at data taken up by the data; + * can be NULL + * @param pErrorCode ICU error code + * @return the spoof checker. + * + * @see uspoof_open + * @see uspoof_serialize + * @stable ICU 4.2 + */ +U_STABLE USpoofChecker * U_EXPORT2 +uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode); + +/** + * Open a Spoof Checker from the source form of the spoof data. + * The input corresponds to the Unicode data file confusables.txt + * as described in Unicode UAX #39. The syntax of the source data + * is as described in UAX #39 for this file, and the content of + * this file is acceptable input. + * + * The character encoding of the (char *) input text is UTF-8. + * + * @param confusables a pointer to the confusable characters definitions, + * as found in file confusables.txt from unicode.org. + * @param confusablesLen The length of the confusables text, or -1 if the + * input string is zero terminated. + * @param confusablesWholeScript + * Deprecated in ICU 58. No longer used. + * @param confusablesWholeScriptLen + * Deprecated in ICU 58. No longer used. + * @param errType In the event of an error in the input, indicates + * which of the input files contains the error. + * The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or + * USPOOF_WHOLE_SCRIPT_CONFUSABLE, or + * zero if no errors are found. + * @param pe In the event of an error in the input, receives the position + * in the input text (line, offset) of the error. + * @param status an in/out ICU UErrorCode. Among the possible errors is + * U_PARSE_ERROR, which is used to report syntax errors + * in the input. + * @return A spoof checker that uses the rules from the input files. + * @stable ICU 4.2 + */ +U_STABLE USpoofChecker * U_EXPORT2 +uspoof_openFromSource(const char *confusables, int32_t confusablesLen, + const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, + int32_t *errType, UParseError *pe, UErrorCode *status); + + +/** + * Close a Spoof Checker, freeing any memory that was being held by + * its implementation. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uspoof_close(USpoofChecker *sc); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUSpoofCheckerPointer + * "Smart pointer" class, closes a USpoofChecker via uspoof_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close); + +U_NAMESPACE_END + +#endif + +/** + * Clone a Spoof Checker. The clone will be set to perform the same checks + * as the original source. + * + * @param sc The source USpoofChecker + * @param status The error code, set if this function encounters a problem. + * @return + * @stable ICU 4.2 + */ +U_STABLE USpoofChecker * U_EXPORT2 +uspoof_clone(const USpoofChecker *sc, UErrorCode *status); + + +/** + * Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method + * overwrites any checks that may have already been enabled. By default, all checks are enabled. + * + * To enable specific checks and disable all others, the "whitelisted" checks should be ORed together. For + * example, to fail strings containing characters outside of the set specified by {@link uspoof_setAllowedChars} and + * also strings that contain digits from mixed numbering systems: + * + * <pre> + * {@code + * uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS); + * } + * </pre> + * + * To disable specific checks and enable all others, the "blacklisted" checks should be ANDed away from + * ALL_CHECKS. For example, if you are not planning to use the {@link uspoof_areConfusable} functionality, + * it is good practice to disable the CONFUSABLE check: + * + * <pre> + * {@code + * uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE); + * } + * </pre> + * + * Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and + * {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they + * enable onto the existing bitmask specified by this method. For more details, see the documentation of those + * methods. + * + * @param sc The USpoofChecker + * @param checks The set of checks that this spoof checker will perform. + * The value is a bit set, obtained by OR-ing together + * values from enum USpoofChecks. + * @param status The error code, set if this function encounters a problem. + * @stable ICU 4.2 + * + */ +U_STABLE void U_EXPORT2 +uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status); + +/** + * Get the set of checks that this Spoof Checker has been configured to perform. + * + * @param sc The USpoofChecker + * @param status The error code, set if this function encounters a problem. + * @return The set of checks that this spoof checker will perform. + * The value is a bit set, obtained by OR-ing together + * values from enum USpoofChecks. + * @stable ICU 4.2 + * + */ +U_STABLE int32_t U_EXPORT2 +uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status); + +/** + * Set the loosest restriction level allowed for strings. The default if this is not called is + * {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and + * {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are + * to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}. + * + * @param sc The USpoofChecker + * @param restrictionLevel The loosest restriction level allowed. + * @see URestrictionLevel + * @stable ICU 51 + */ +U_STABLE void U_EXPORT2 +uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel); + + +/** + * Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}. + * + * @return The restriction level + * @see URestrictionLevel + * @stable ICU 51 + */ +U_STABLE URestrictionLevel U_EXPORT2 +uspoof_getRestrictionLevel(const USpoofChecker *sc); + +/** + * Limit characters that are acceptable in identifiers being checked to those + * normally used with the languages associated with the specified locales. + * Any previously specified list of locales is replaced by the new settings. + * + * A set of languages is determined from the locale(s), and + * from those a set of acceptable Unicode scripts is determined. + * Characters from this set of scripts, along with characters from + * the "common" and "inherited" Unicode Script categories + * will be permitted. + * + * Supplying an empty string removes all restrictions; + * characters from any script will be allowed. + * + * The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this + * USpoofChecker when calling this function with a non-empty list + * of locales. + * + * The Unicode Set of characters that will be allowed is accessible + * via the uspoof_getAllowedChars() function. uspoof_setAllowedLocales() + * will <i>replace</i> any previously applied set of allowed characters. + * + * Adjustments, such as additions or deletions of certain classes of characters, + * can be made to the result of uspoof_setAllowedLocales() by + * fetching the resulting set with uspoof_getAllowedChars(), + * manipulating it with the Unicode Set API, then resetting the + * spoof detectors limits with uspoof_setAllowedChars(). + * + * @param sc The USpoofChecker + * @param localesList A list list of locales, from which the language + * and associated script are extracted. The locales + * are comma-separated if there is more than one. + * White space may not appear within an individual locale, + * but is ignored otherwise. + * The locales are syntactically like those from the + * HTTP Accept-Language header. + * If the localesList is empty, no restrictions will be placed on + * the allowed characters. + * + * @param status The error code, set if this function encounters a problem. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status); + +/** + * Get a list of locales for the scripts that are acceptable in strings + * to be checked. If no limitations on scripts have been specified, + * an empty string will be returned. + * + * uspoof_setAllowedChars() will reset the list of allowed to be empty. + * + * The format of the returned list is the same as that supplied to + * uspoof_setAllowedLocales(), but returned list may not be identical + * to the originally specified string; the string may be reformatted, + * and information other than languages from + * the originally specified locales may be omitted. + * + * @param sc The USpoofChecker + * @param status The error code, set if this function encounters a problem. + * @return A string containing a list of locales corresponding + * to the acceptable scripts, formatted like an + * HTTP Accept Language value. + * + * @stable ICU 4.2 + */ +U_STABLE const char * U_EXPORT2 +uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status); + + +/** + * Limit the acceptable characters to those specified by a Unicode Set. + * Any previously specified character limit is + * is replaced by the new settings. This includes limits on + * characters that were set with the uspoof_setAllowedLocales() function. + * + * The USPOOF_CHAR_LIMIT test is automatically enabled for this + * USpoofChecker by this function. + * + * @param sc The USpoofChecker + * @param chars A Unicode Set containing the list of + * characters that are permitted. Ownership of the set + * remains with the caller. The incoming set is cloned by + * this function, so there are no restrictions on modifying + * or deleting the USet after calling this function. + * @param status The error code, set if this function encounters a problem. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status); + + +/** + * Get a USet for the characters permitted in an identifier. + * This corresponds to the limits imposed by the Set Allowed Characters + * functions. Limitations imposed by other checks will not be + * reflected in the set returned by this function. + * + * The returned set will be frozen, meaning that it cannot be modified + * by the caller. + * + * Ownership of the returned set remains with the Spoof Detector. The + * returned set will become invalid if the spoof detector is closed, + * or if a new set of allowed characters is specified. + * + * + * @param sc The USpoofChecker + * @param status The error code, set if this function encounters a problem. + * @return A USet containing the characters that are permitted by + * the USPOOF_CHAR_LIMIT test. + * @stable ICU 4.2 + */ +U_STABLE const USet * U_EXPORT2 +uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status); + + +#if U_SHOW_CPLUSPLUS_API +/** + * Limit the acceptable characters to those specified by a Unicode Set. + * Any previously specified character limit is + * is replaced by the new settings. This includes limits on + * characters that were set with the uspoof_setAllowedLocales() function. + * + * The USPOOF_CHAR_LIMIT test is automatically enabled for this + * USoofChecker by this function. + * + * @param sc The USpoofChecker + * @param chars A Unicode Set containing the list of + * characters that are permitted. Ownership of the set + * remains with the caller. The incoming set is cloned by + * this function, so there are no restrictions on modifying + * or deleting the UnicodeSet after calling this function. + * @param status The error code, set if this function encounters a problem. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status); + + +/** + * Get a UnicodeSet for the characters permitted in an identifier. + * This corresponds to the limits imposed by the Set Allowed Characters / + * UnicodeSet functions. Limitations imposed by other checks will not be + * reflected in the set returned by this function. + * + * The returned set will be frozen, meaning that it cannot be modified + * by the caller. + * + * Ownership of the returned set remains with the Spoof Detector. The + * returned set will become invalid if the spoof detector is closed, + * or if a new set of allowed characters is specified. + * + * + * @param sc The USpoofChecker + * @param status The error code, set if this function encounters a problem. + * @return A UnicodeSet containing the characters that are permitted by + * the USPOOF_CHAR_LIMIT test. + * @stable ICU 4.2 + */ +U_STABLE const icu::UnicodeSet * U_EXPORT2 +uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status); +#endif + + +/** + * Check the specified string for possible security issues. + * The text to be checked will typically be an identifier of some sort. + * The set of checks to be performed is specified with uspoof_setChecks(). + * + * \note + * Consider using the newer API, {@link uspoof_check2}, instead. + * The newer API exposes additional information from the check procedure + * and is otherwise identical to this method. + * + * @param sc The USpoofChecker + * @param id The identifier to be checked for possible security issues, + * in UTF-16 format. + * @param length the length of the string to be checked, expressed in + * 16 bit UTF-16 code units, or -1 if the string is + * zero terminated. + * @param position Deprecated in ICU 51. Always returns zero. + * Originally, an out parameter for the index of the first + * string position that failed a check. + * This parameter may be NULL. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Spoofing or security issues detected with the input string are + * not reported here, but through the function's return value. + * @return An integer value with bits set for any potential security + * or spoofing issues detected. The bits are defined by + * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) + * will be zero if the input string passes all of the + * enabled checks. + * @see uspoof_check2 + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +uspoof_check(const USpoofChecker *sc, + const UChar *id, int32_t length, + int32_t *position, + UErrorCode *status); + + +/** + * Check the specified string for possible security issues. + * The text to be checked will typically be an identifier of some sort. + * The set of checks to be performed is specified with uspoof_setChecks(). + * + * \note + * Consider using the newer API, {@link uspoof_check2UTF8}, instead. + * The newer API exposes additional information from the check procedure + * and is otherwise identical to this method. + * + * @param sc The USpoofChecker + * @param id A identifier to be checked for possible security issues, in UTF8 format. + * @param length the length of the string to be checked, or -1 if the string is + * zero terminated. + * @param position Deprecated in ICU 51. Always returns zero. + * Originally, an out parameter for the index of the first + * string position that failed a check. + * This parameter may be NULL. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Spoofing or security issues detected with the input string are + * not reported here, but through the function's return value. + * If the input contains invalid UTF-8 sequences, + * a status of U_INVALID_CHAR_FOUND will be returned. + * @return An integer value with bits set for any potential security + * or spoofing issues detected. The bits are defined by + * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) + * will be zero if the input string passes all of the + * enabled checks. + * @see uspoof_check2UTF8 + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +uspoof_checkUTF8(const USpoofChecker *sc, + const char *id, int32_t length, + int32_t *position, + UErrorCode *status); + + +#if U_SHOW_CPLUSPLUS_API +/** + * Check the specified string for possible security issues. + * The text to be checked will typically be an identifier of some sort. + * The set of checks to be performed is specified with uspoof_setChecks(). + * + * \note + * Consider using the newer API, {@link uspoof_check2UnicodeString}, instead. + * The newer API exposes additional information from the check procedure + * and is otherwise identical to this method. + * + * @param sc The USpoofChecker + * @param id A identifier to be checked for possible security issues. + * @param position Deprecated in ICU 51. Always returns zero. + * Originally, an out parameter for the index of the first + * string position that failed a check. + * This parameter may be NULL. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Spoofing or security issues detected with the input string are + * not reported here, but through the function's return value. + * @return An integer value with bits set for any potential security + * or spoofing issues detected. The bits are defined by + * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) + * will be zero if the input string passes all of the + * enabled checks. + * @see uspoof_check2UnicodeString + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +uspoof_checkUnicodeString(const USpoofChecker *sc, + const icu::UnicodeString &id, + int32_t *position, + UErrorCode *status); +#endif + + +#ifndef U_HIDE_DRAFT_API +/** + * Check the specified string for possible security issues. + * The text to be checked will typically be an identifier of some sort. + * The set of checks to be performed is specified with uspoof_setChecks(). + * + * @param sc The USpoofChecker + * @param id The identifier to be checked for possible security issues, + * in UTF-16 format. + * @param length the length of the string to be checked, or -1 if the string is + * zero terminated. + * @param checkResult An instance of USpoofCheckResult to be filled with + * details about the identifier. Can be NULL. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Spoofing or security issues detected with the input string are + * not reported here, but through the function's return value. + * @return An integer value with bits set for any potential security + * or spoofing issues detected. The bits are defined by + * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) + * will be zero if the input string passes all of the + * enabled checks. Any information in this bitmask will be + * consistent with the information saved in the optional + * checkResult parameter. + * @see uspoof_openCheckResult + * @see uspoof_check2UTF8 + * @see uspoof_check2UnicodeString + * @draft ICU 58 + */ +U_DRAFT int32_t U_EXPORT2 +uspoof_check2(const USpoofChecker *sc, + const UChar* id, int32_t length, + USpoofCheckResult* checkResult, + UErrorCode *status); + +/** + * Check the specified string for possible security issues. + * The text to be checked will typically be an identifier of some sort. + * The set of checks to be performed is specified with uspoof_setChecks(). + * + * This version of {@link uspoof_check} accepts a USpoofCheckResult, which + * returns additional information about the identifier. For more + * information, see {@link uspoof_openCheckResult}. + * + * @param sc The USpoofChecker + * @param id A identifier to be checked for possible security issues, in UTF8 format. + * @param length the length of the string to be checked, or -1 if the string is + * zero terminated. + * @param checkResult An instance of USpoofCheckResult to be filled with + * details about the identifier. Can be NULL. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Spoofing or security issues detected with the input string are + * not reported here, but through the function's return value. + * @return An integer value with bits set for any potential security + * or spoofing issues detected. The bits are defined by + * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) + * will be zero if the input string passes all of the + * enabled checks. Any information in this bitmask will be + * consistent with the information saved in the optional + * checkResult parameter. + * @see uspoof_openCheckResult + * @see uspoof_check2 + * @see uspoof_check2UnicodeString + * @draft ICU 58 + */ +U_DRAFT int32_t U_EXPORT2 +uspoof_check2UTF8(const USpoofChecker *sc, + const char *id, int32_t length, + USpoofCheckResult* checkResult, + UErrorCode *status); + +#if U_SHOW_CPLUSPLUS_API +/** + * Check the specified string for possible security issues. + * The text to be checked will typically be an identifier of some sort. + * The set of checks to be performed is specified with uspoof_setChecks(). + * + * @param sc The USpoofChecker + * @param id A identifier to be checked for possible security issues. + * @param checkResult An instance of USpoofCheckResult to be filled with + * details about the identifier. Can be NULL. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Spoofing or security issues detected with the input string are + * not reported here, but through the function's return value. + * @return An integer value with bits set for any potential security + * or spoofing issues detected. The bits are defined by + * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) + * will be zero if the input string passes all of the + * enabled checks. Any information in this bitmask will be + * consistent with the information saved in the optional + * checkResult parameter. + * @see uspoof_openCheckResult + * @see uspoof_check2 + * @see uspoof_check2UTF8 + * @draft ICU 58 + */ +U_DRAFT int32_t U_EXPORT2 +uspoof_check2UnicodeString(const USpoofChecker *sc, + const icu::UnicodeString &id, + USpoofCheckResult* checkResult, + UErrorCode *status); +#endif + +/** + * Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return + * information about the identifier. Information includes: + * <ul> + * <li>A bitmask of the checks that failed</li> + * <li>The identifier's restriction level (UTS 39 section 5.2)</li> + * <li>The set of numerics in the string (UTS 39 section 5.3)</li> + * </ul> + * The data held in a USpoofCheckResult is cleared whenever it is passed into a new call + * of {@link uspoof_check2}. + * + * @param status The error code, set if this function encounters a problem. + * @return the newly created USpoofCheckResult + * @see uspoof_check2 + * @see uspoof_check2UTF8 + * @see uspoof_check2UnicodeString + * @draft ICU 58 + */ +U_DRAFT USpoofCheckResult* U_EXPORT2 +uspoof_openCheckResult(UErrorCode *status); + +/** + * Close a USpoofCheckResult, freeing any memory that was being held by + * its implementation. + * + * @param checkResult The instance of USpoofCheckResult to close + * @draft ICU 58 + */ +U_DRAFT void U_EXPORT2 +uspoof_closeCheckResult(USpoofCheckResult *checkResult); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUSpoofCheckResultPointer + * "Smart pointer" class, closes a USpoofCheckResult via {@link uspoof_closeCheckResult}. + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 58 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult); + +U_NAMESPACE_END + +#endif + +/** + * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests + * in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on. + * + * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult} + * @param status The error code, set if an error occurred. + * @return An integer value with bits set for any potential security + * or spoofing issues detected. The bits are defined by + * enum USpoofChecks. (returned_value & USPOOF_ALL_CHECKS) + * will be zero if the input string passes all of the + * enabled checks. + * @see uspoof_setChecks + * @draft ICU 58 + */ +U_DRAFT int32_t U_EXPORT2 +uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status); + +/** + * Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check + * was enabled; otherwise, undefined. + * + * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult} + * @param status The error code, set if an error occurred. + * @return The restriction level contained in the USpoofCheckResult + * @see uspoof_setRestrictionLevel + * @draft ICU 58 + */ +U_DRAFT URestrictionLevel U_EXPORT2 +uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status); + +/** + * Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled; + * otherwise, undefined. The set will contain the zero digit from each decimal number system found + * in the input string. Ownership of the returned USet remains with the USpoofCheckResult. + * The USet will be free'd when {@link uspoof_closeCheckResult} is called. + * + * @param checkResult The instance of USpoofCheckResult created by {@link uspoof_openCheckResult} + * @return The set of numerics contained in the USpoofCheckResult + * @param status The error code, set if an error occurred. + * @draft ICU 58 + */ +U_DRAFT const USet* U_EXPORT2 +uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status); +#endif /* U_HIDE_DRAFT_API */ + + +/** + * Check the whether two specified strings are visually confusable. + * + * If the strings are confusable, the return value will be nonzero, as long as + * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks(). + * + * The bits in the return value correspond to flags for each of the classes of + * confusables applicable to the two input strings. According to UTS 39 + * section 4, the possible flags are: + * + * <ul> + * <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li> + * <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li> + * <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li> + * </ul> + * + * If one or more of the above flags were not listed in uspoof_setChecks(), this + * function will never report that class of confusable. The check + * {@link USPOOF_CONFUSABLE} enables all three flags. + * + * + * @param sc The USpoofChecker + * @param id1 The first of the two identifiers to be compared for + * confusability. The strings are in UTF-16 format. + * @param length1 the length of the first identifer, expressed in + * 16 bit UTF-16 code units, or -1 if the string is + * nul terminated. + * @param id2 The second of the two identifiers to be compared for + * confusability. The identifiers are in UTF-16 format. + * @param length2 The length of the second identifiers, expressed in + * 16 bit UTF-16 code units, or -1 if the string is + * nul terminated. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Confusability of the identifiers is not reported here, + * but through this function's return value. + * @return An integer value with bit(s) set corresponding to + * the type of confusability found, as defined by + * enum USpoofChecks. Zero is returned if the identifiers + * are not confusable. + * + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +uspoof_areConfusable(const USpoofChecker *sc, + const UChar *id1, int32_t length1, + const UChar *id2, int32_t length2, + UErrorCode *status); + + + +/** + * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format. + * + * @param sc The USpoofChecker + * @param id1 The first of the two identifiers to be compared for + * confusability. The strings are in UTF-8 format. + * @param length1 the length of the first identifiers, in bytes, or -1 + * if the string is nul terminated. + * @param id2 The second of the two identifiers to be compared for + * confusability. The strings are in UTF-8 format. + * @param length2 The length of the second string in bytes, or -1 + * if the string is nul terminated. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Confusability of the strings is not reported here, + * but through this function's return value. + * @return An integer value with bit(s) set corresponding to + * the type of confusability found, as defined by + * enum USpoofChecks. Zero is returned if the strings + * are not confusable. + * + * @stable ICU 4.2 + * + * @see uspoof_areConfusable + */ +U_STABLE int32_t U_EXPORT2 +uspoof_areConfusableUTF8(const USpoofChecker *sc, + const char *id1, int32_t length1, + const char *id2, int32_t length2, + UErrorCode *status); + + + + +#if U_SHOW_CPLUSPLUS_API +/** + * A version of {@link uspoof_areConfusable} accepting UnicodeStrings. + * + * @param sc The USpoofChecker + * @param s1 The first of the two identifiers to be compared for + * confusability. The strings are in UTF-8 format. + * @param s2 The second of the two identifiers to be compared for + * confusability. The strings are in UTF-8 format. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * Confusability of the identifiers is not reported here, + * but through this function's return value. + * @return An integer value with bit(s) set corresponding to + * the type of confusability found, as defined by + * enum USpoofChecks. Zero is returned if the identifiers + * are not confusable. + * + * @stable ICU 4.2 + * + * @see uspoof_areConfusable + */ +U_STABLE int32_t U_EXPORT2 +uspoof_areConfusableUnicodeString(const USpoofChecker *sc, + const icu::UnicodeString &s1, + const icu::UnicodeString &s2, + UErrorCode *status); +#endif + + +/** + * Get the "skeleton" for an identifier. + * Skeletons are a transformation of the input identifier; + * Two identifiers are confusable if their skeletons are identical. + * See Unicode UAX #39 for additional information. + * + * Using skeletons directly makes it possible to quickly check + * whether an identifier is confusable with any of some large + * set of existing identifiers, by creating an efficiently + * searchable collection of the skeletons. + * + * @param sc The USpoofChecker + * @param type Deprecated in ICU 58. You may pass any number. + * Originally, controlled which of the Unicode confusable data + * tables to use. + * @param id The input identifier whose skeleton will be computed. + * @param length The length of the input identifier, expressed in 16 bit + * UTF-16 code units, or -1 if the string is zero terminated. + * @param dest The output buffer, to receive the skeleton string. + * @param destCapacity The length of the output buffer, in 16 bit units. + * The destCapacity may be zero, in which case the function will + * return the actual length of the skeleton. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * @return The length of the skeleton string. The returned length + * is always that of the complete skeleton, even when the + * supplied buffer is too small (or of zero length) + * + * @stable ICU 4.2 + * @see uspoof_areConfusable + */ +U_STABLE int32_t U_EXPORT2 +uspoof_getSkeleton(const USpoofChecker *sc, + uint32_t type, + const UChar *id, int32_t length, + UChar *dest, int32_t destCapacity, + UErrorCode *status); + +/** + * Get the "skeleton" for an identifier. + * Skeletons are a transformation of the input identifier; + * Two identifiers are confusable if their skeletons are identical. + * See Unicode UAX #39 for additional information. + * + * Using skeletons directly makes it possible to quickly check + * whether an identifier is confusable with any of some large + * set of existing identifiers, by creating an efficiently + * searchable collection of the skeletons. + * + * @param sc The USpoofChecker + * @param type Deprecated in ICU 58. You may pass any number. + * Originally, controlled which of the Unicode confusable data + * tables to use. + * @param id The UTF-8 format identifier whose skeleton will be computed. + * @param length The length of the input string, in bytes, + * or -1 if the string is zero terminated. + * @param dest The output buffer, to receive the skeleton string. + * @param destCapacity The length of the output buffer, in bytes. + * The destCapacity may be zero, in which case the function will + * return the actual length of the skeleton. + * @param status The error code, set if an error occurred while attempting to + * perform the check. Possible Errors include U_INVALID_CHAR_FOUND + * for invalid UTF-8 sequences, and + * U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small + * to hold the complete skeleton. + * @return The length of the skeleton string, in bytes. The returned length + * is always that of the complete skeleton, even when the + * supplied buffer is too small (or of zero length) + * + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +uspoof_getSkeletonUTF8(const USpoofChecker *sc, + uint32_t type, + const char *id, int32_t length, + char *dest, int32_t destCapacity, + UErrorCode *status); + +#if U_SHOW_CPLUSPLUS_API +/** + * Get the "skeleton" for an identifier. + * Skeletons are a transformation of the input identifier; + * Two identifiers are confusable if their skeletons are identical. + * See Unicode UAX #39 for additional information. + * + * Using skeletons directly makes it possible to quickly check + * whether an identifier is confusable with any of some large + * set of existing identifiers, by creating an efficiently + * searchable collection of the skeletons. + * + * @param sc The USpoofChecker. + * @param type Deprecated in ICU 58. You may pass any number. + * Originally, controlled which of the Unicode confusable data + * tables to use. + * @param id The input identifier whose skeleton will be computed. + * @param dest The output identifier, to receive the skeleton string. + * @param status The error code, set if an error occurred while attempting to + * perform the check. + * @return A reference to the destination (skeleton) string. + * + * @stable ICU 4.2 + */ +U_I18N_API icu::UnicodeString & U_EXPORT2 +uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, + uint32_t type, + const icu::UnicodeString &id, + icu::UnicodeString &dest, + UErrorCode *status); +#endif /* U_SHOW_CPLUSPLUS_API */ + +/** + * Get the set of Candidate Characters for Inclusion in Identifiers, as defined + * in http://unicode.org/Public/security/latest/xidmodifications.txt + * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms. + * + * The returned set is frozen. Ownership of the set remains with the ICU library; it must not + * be deleted by the caller. + * + * @param status The error code, set if a problem occurs while creating the set. + * + * @stable ICU 51 + */ +U_STABLE const USet * U_EXPORT2 +uspoof_getInclusionSet(UErrorCode *status); + +/** + * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined + * in http://unicode.org/Public/security/latest/xidmodifications.txt + * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms. + * + * The returned set is frozen. Ownership of the set remains with the ICU library; it must not + * be deleted by the caller. + * + * @param status The error code, set if a problem occurs while creating the set. + * + * @stable ICU 51 + */ +U_STABLE const USet * U_EXPORT2 +uspoof_getRecommendedSet(UErrorCode *status); + +#if U_SHOW_CPLUSPLUS_API + +/** + * Get the set of Candidate Characters for Inclusion in Identifiers, as defined + * in http://unicode.org/Public/security/latest/xidmodifications.txt + * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms. + * + * The returned set is frozen. Ownership of the set remains with the ICU library; it must not + * be deleted by the caller. + * + * @param status The error code, set if a problem occurs while creating the set. + * + * @stable ICU 51 + */ +U_STABLE const icu::UnicodeSet * U_EXPORT2 +uspoof_getInclusionUnicodeSet(UErrorCode *status); + +/** + * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined + * in http://unicode.org/Public/security/latest/xidmodifications.txt + * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms. + * + * The returned set is frozen. Ownership of the set remains with the ICU library; it must not + * be deleted by the caller. + * + * @param status The error code, set if a problem occurs while creating the set. + * + * @stable ICU 51 + */ +U_STABLE const icu::UnicodeSet * U_EXPORT2 +uspoof_getRecommendedUnicodeSet(UErrorCode *status); + +#endif /* U_SHOW_CPLUSPLUS_API */ + +/** + * Serialize the data for a spoof detector into a chunk of memory. + * The flattened spoof detection tables can later be used to efficiently + * instantiate a new Spoof Detector. + * + * The serialized spoof checker includes only the data compiled from the + * Unicode data tables by uspoof_openFromSource(); it does not include + * include any other state or configuration that may have been set. + * + * @param sc the Spoof Detector whose data is to be serialized. + * @param data a pointer to 32-bit-aligned memory to be filled with the data, + * can be NULL if capacity==0 + * @param capacity the number of bytes available at data, + * or 0 for preflighting + * @param status an in/out ICU UErrorCode; possible errors include: + * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization + * - U_ILLEGAL_ARGUMENT_ERROR the data or capacity parameters are bad + * @return the number of bytes written or needed for the spoof data + * + * @see utrie2_openFromSerialized() + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +uspoof_serialize(USpoofChecker *sc, + void *data, int32_t capacity, + UErrorCode *status); + + +#endif + +#endif /* USPOOF_H */ diff --git a/intl/icu/source/i18n/unicode/utmscale.h b/intl/icu/source/i18n/unicode/utmscale.h new file mode 100644 index 000000000..8f3c42a9e --- /dev/null +++ b/intl/icu/source/i18n/unicode/utmscale.h @@ -0,0 +1,483 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2004 - 2008, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ + +#ifndef UTMSCALE_H +#define UTMSCALE_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +/** + * \file + * \brief C API: Universal Time Scale + * + * There are quite a few different conventions for binary datetime, depending on different + * platforms and protocols. Some of these have severe drawbacks. For example, people using + * Unix time (seconds since Jan 1, 1970) think that they are safe until near the year 2038. + * But cases can and do arise where arithmetic manipulations causes serious problems. Consider + * the computation of the average of two datetimes, for example: if one calculates them with + * <code>averageTime = (time1 + time2)/2</code>, there will be overflow even with dates + * around the present. Moreover, even if these problems don't occur, there is the issue of + * conversion back and forth between different systems. + * + * <p> + * Binary datetimes differ in a number of ways: the datatype, the unit, + * and the epoch (origin). We'll refer to these as time scales. For example: + * + * <table border="1" cellspacing="0" cellpadding="4"> + * <caption>Table 1: Binary Time Scales</caption> + * <tr> + * <th align="left">Source</th> + * <th align="left">Datatype</th> + * <th align="left">Unit</th> + * <th align="left">Epoch</th> + * </tr> + * + * <tr> + * <td>UDTS_JAVA_TIME</td> + * <td>int64_t</td> + * <td>milliseconds</td> + * <td>Jan 1, 1970</td> + * </tr> + * <tr> + * + * <td>UDTS_UNIX_TIME</td> + * <td>int32_t or int64_t</td> + * <td>seconds</td> + * <td>Jan 1, 1970</td> + * </tr> + * <tr> + * <td>UDTS_ICU4C_TIME</td> + * + * <td>double</td> + * <td>milliseconds</td> + * <td>Jan 1, 1970</td> + * </tr> + * <tr> + * <td>UDTS_WINDOWS_FILE_TIME</td> + * <td>int64_t</td> + * + * <td>ticks (100 nanoseconds)</td> + * <td>Jan 1, 1601</td> + * </tr> + * <tr> + * <td>UDTS_DOTNET_DATE_TIME</td> + * <td>int64_t</td> + * <td>ticks (100 nanoseconds)</td> + * + * <td>Jan 1, 0001</td> + * </tr> + * <tr> + * <td>UDTS_MAC_OLD_TIME</td> + * <td>int32_t or int64_t</td> + * <td>seconds</td> + * <td>Jan 1, 1904</td> + * + * </tr> + * <tr> + * <td>UDTS_MAC_TIME</td> + * <td>double</td> + * <td>seconds</td> + * <td>Jan 1, 2001</td> + * </tr> + * + * <tr> + * <td>UDTS_EXCEL_TIME</td> + * <td>?</td> + * <td>days</td> + * <td>Dec 31, 1899</td> + * </tr> + * <tr> + * + * <td>UDTS_DB2_TIME</td> + * <td>?</td> + * <td>days</td> + * <td>Dec 31, 1899</td> + * </tr> + * + * <tr> + * <td>UDTS_UNIX_MICROSECONDS_TIME</td> + * <td>int64_t</td> + * <td>microseconds</td> + * <td>Jan 1, 1970</td> + * </tr> + * </table> + * + * <p> + * All of the epochs start at 00:00 am (the earliest possible time on the day in question), + * and are assumed to be UTC. + * + * <p> + * The ranges for different datatypes are given in the following table (all values in years). + * The range of years includes the entire range expressible with positive and negative + * values of the datatype. The range of years for double is the range that would be allowed + * without losing precision to the corresponding unit. + * + * <table border="1" cellspacing="0" cellpadding="4"> + * <tr> + * <th align="left">Units</th> + * <th align="left">int64_t</th> + * <th align="left">double</th> + * <th align="left">int32_t</th> + * </tr> + * + * <tr> + * <td>1 sec</td> + * <td align="right">5.84542x10<sup>11</sup></td> + * <td align="right">285,420,920.94</td> + * <td align="right">136.10</td> + * </tr> + * <tr> + * + * <td>1 millisecond</td> + * <td align="right">584,542,046.09</td> + * <td align="right">285,420.92</td> + * <td align="right">0.14</td> + * </tr> + * <tr> + * <td>1 microsecond</td> + * + * <td align="right">584,542.05</td> + * <td align="right">285.42</td> + * <td align="right">0.00</td> + * </tr> + * <tr> + * <td>100 nanoseconds (tick)</td> + * <td align="right">58,454.20</td> + * <td align="right">28.54</td> + * <td align="right">0.00</td> + * </tr> + * <tr> + * <td>1 nanosecond</td> + * <td align="right">584.5420461</td> + * <td align="right">0.2854</td> + * <td align="right">0.00</td> + * </tr> + * </table> + * + * <p> + * These functions implement a universal time scale which can be used as a 'pivot', + * and provide conversion functions to and from all other major time scales. + * This datetimes to be converted to the pivot time, safely manipulated, + * and converted back to any other datetime time scale. + * + *<p> + * So what to use for this pivot? Java time has plenty of range, but cannot represent + * .NET <code>System.DateTime</code> values without severe loss of precision. ICU4C time addresses this by using a + * <code>double</code> that is otherwise equivalent to the Java time. However, there are disadvantages + * with <code>doubles</code>. They provide for much more graceful degradation in arithmetic operations. + * But they only have 53 bits of accuracy, which means that they will lose precision when + * converting back and forth to ticks. What would really be nice would be a + * <code>long double</code> (80 bits -- 64 bit mantissa), but that is not supported on most systems. + * + *<p> + * The Unix extended time uses a structure with two components: time in seconds and a + * fractional field (microseconds). However, this is clumsy, slow, and + * prone to error (you always have to keep track of overflow and underflow in the + * fractional field). <code>BigDecimal</code> would allow for arbitrary precision and arbitrary range, + * but we do not want to use this as the normal type, because it is slow and does not + * have a fixed size. + * + *<p> + * Because of these issues, we ended up concluding that the .NET framework's + * <code>System.DateTime</code> would be the best pivot. However, we use the full range + * allowed by the datatype, allowing for datetimes back to 29,000 BC and up to 29,000 AD. + * This time scale is very fine grained, does not lose precision, and covers a range that + * will meet almost all requirements. It will not handle the range that Java times do, + * but frankly, being able to handle dates before 29,000 BC or after 29,000 AD is of very limited interest. + * + */ + +/** + * <code>UDateTimeScale</code> values are used to specify the time scale used for + * conversion into or out if the universal time scale. + * + * @stable ICU 3.2 + */ +typedef enum UDateTimeScale { + /** + * Used in the JDK. Data is a Java <code>long</code> (<code>int64_t</code>). Value + * is milliseconds since January 1, 1970. + * + * @stable ICU 3.2 + */ + UDTS_JAVA_TIME = 0, + + /** + * Used on Unix systems. Data is <code>int32_t</code> or <code>int64_t</code>. Value + * is seconds since January 1, 1970. + * + * @stable ICU 3.2 + */ + UDTS_UNIX_TIME, + + /** + * Used in IUC4C. Data is a <code>double</code>. Value + * is milliseconds since January 1, 1970. + * + * @stable ICU 3.2 + */ + UDTS_ICU4C_TIME, + + /** + * Used in Windows for file times. Data is an <code>int64_t</code>. Value + * is ticks (1 tick == 100 nanoseconds) since January 1, 1601. + * + * @stable ICU 3.2 + */ + UDTS_WINDOWS_FILE_TIME, + + /** + * Used in the .NET framework's <code>System.DateTime</code> structure. Data is an <code>int64_t</code>. Value + * is ticks (1 tick == 100 nanoseconds) since January 1, 0001. + * + * @stable ICU 3.2 + */ + UDTS_DOTNET_DATE_TIME, + + /** + * Used in older Macintosh systems. Data is <code>int32_t</code> or <code>int64_t</code>. Value + * is seconds since January 1, 1904. + * + * @stable ICU 3.2 + */ + UDTS_MAC_OLD_TIME, + + /** + * Used in newer Macintosh systems. Data is a <code>double</code>. Value + * is seconds since January 1, 2001. + * + * @stable ICU 3.2 + */ + UDTS_MAC_TIME, + + /** + * Used in Excel. Data is an <code>?unknown?</code>. Value + * is days since December 31, 1899. + * + * @stable ICU 3.2 + */ + UDTS_EXCEL_TIME, + + /** + * Used in DB2. Data is an <code>?unknown?</code>. Value + * is days since December 31, 1899. + * + * @stable ICU 3.2 + */ + UDTS_DB2_TIME, + + /** + * Data is a <code>long</code>. Value is microseconds since January 1, 1970. + * Similar to Unix time (linear value from 1970) and struct timeval + * (microseconds resolution). + * + * @stable ICU 3.8 + */ + UDTS_UNIX_MICROSECONDS_TIME, + + /** + * The first unused time scale value. The limit of this enum + */ + UDTS_MAX_SCALE +} UDateTimeScale; + +/** + * <code>UTimeScaleValue</code> values are used to specify the time scale values + * to <code>utmscale_getTimeScaleValue</code>. + * + * @see utmscale_getTimeScaleValue + * + * @stable ICU 3.2 + */ +typedef enum UTimeScaleValue { + /** + * The constant used to select the units vale + * for a time scale. + * + * @see utmscale_getTimeScaleValue + * + * @stable ICU 3.2 + */ + UTSV_UNITS_VALUE = 0, + + /** + * The constant used to select the epoch offset value + * for a time scale. + * + * @see utmscale_getTimeScaleValue + * + * @stable ICU 3.2 + */ + UTSV_EPOCH_OFFSET_VALUE=1, + + /** + * The constant used to select the minimum from value + * for a time scale. + * + * @see utmscale_getTimeScaleValue + * + * @stable ICU 3.2 + */ + UTSV_FROM_MIN_VALUE=2, + + /** + * The constant used to select the maximum from value + * for a time scale. + * + * @see utmscale_getTimeScaleValue + * + * @stable ICU 3.2 + */ + UTSV_FROM_MAX_VALUE=3, + + /** + * The constant used to select the minimum to value + * for a time scale. + * + * @see utmscale_getTimeScaleValue + * + * @stable ICU 3.2 + */ + UTSV_TO_MIN_VALUE=4, + + /** + * The constant used to select the maximum to value + * for a time scale. + * + * @see utmscale_getTimeScaleValue + * + * @stable ICU 3.2 + */ + UTSV_TO_MAX_VALUE=5, + +#ifndef U_HIDE_INTERNAL_API + /** + * The constant used to select the epoch plus one value + * for a time scale. + * + * NOTE: This is an internal value. DO NOT USE IT. May not + * actually be equal to the epoch offset value plus one. + * + * @see utmscale_getTimeScaleValue + * + * @internal ICU 3.2 + */ + UTSV_EPOCH_OFFSET_PLUS_1_VALUE=6, + + /** + * The constant used to select the epoch plus one value + * for a time scale. + * + * NOTE: This is an internal value. DO NOT USE IT. May not + * actually be equal to the epoch offset value plus one. + * + * @see utmscale_getTimeScaleValue + * + * @internal ICU 3.2 + */ + UTSV_EPOCH_OFFSET_MINUS_1_VALUE=7, + + /** + * The constant used to select the units round value + * for a time scale. + * + * NOTE: This is an internal value. DO NOT USE IT. + * + * @see utmscale_getTimeScaleValue + * + * @internal ICU 3.2 + */ + UTSV_UNITS_ROUND_VALUE=8, + + /** + * The constant used to select the minimum safe rounding value + * for a time scale. + * + * NOTE: This is an internal value. DO NOT USE IT. + * + * @see utmscale_getTimeScaleValue + * + * @internal ICU 3.2 + */ + UTSV_MIN_ROUND_VALUE=9, + + /** + * The constant used to select the maximum safe rounding value + * for a time scale. + * + * NOTE: This is an internal value. DO NOT USE IT. + * + * @see utmscale_getTimeScaleValue + * + * @internal ICU 3.2 + */ + UTSV_MAX_ROUND_VALUE=10, + +#endif /* U_HIDE_INTERNAL_API */ + + /** + * The number of time scale values, in other words limit of this enum. + * + * @see utmscale_getTimeScaleValue + */ + UTSV_MAX_SCALE_VALUE=11 + +} UTimeScaleValue; + +/** + * Get a value associated with a particular time scale. + * + * @param timeScale The time scale + * @param value A constant representing the value to get + * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if arguments are invalid. + * @return - the value. + * + * @stable ICU 3.2 + */ +U_STABLE int64_t U_EXPORT2 + utmscale_getTimeScaleValue(UDateTimeScale timeScale, UTimeScaleValue value, UErrorCode *status); + +/* Conversion to 'universal time scale' */ + +/** + * Convert a <code>int64_t</code> datetime from the given time scale to the universal time scale. + * + * @param otherTime The <code>int64_t</code> datetime + * @param timeScale The time scale to convert from + * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the conversion is out of range. + * + * @return The datetime converted to the universal time scale + * + * @stable ICU 3.2 + */ +U_STABLE int64_t U_EXPORT2 + utmscale_fromInt64(int64_t otherTime, UDateTimeScale timeScale, UErrorCode *status); + +/* Conversion from 'universal time scale' */ + +/** + * Convert a datetime from the universal time scale to a <code>int64_t</code> in the given time scale. + * + * @param universalTime The datetime in the universal time scale + * @param timeScale The time scale to convert to + * @param status The status code. Set to <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the conversion is out of range. + * + * @return The datetime converted to the given time scale + * + * @stable ICU 3.2 + */ +U_STABLE int64_t U_EXPORT2 + utmscale_toInt64(int64_t universalTime, UDateTimeScale timeScale, UErrorCode *status); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif + diff --git a/intl/icu/source/i18n/unicode/utrans.h b/intl/icu/source/i18n/unicode/utrans.h new file mode 100644 index 000000000..9d70eeb83 --- /dev/null +++ b/intl/icu/source/i18n/unicode/utrans.h @@ -0,0 +1,658 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1997-2011,2014-2015 International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* Date Name Description +* 06/21/00 aliu Creation. +******************************************************************************* +*/ + +#ifndef UTRANS_H +#define UTRANS_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_TRANSLITERATION + +#include "unicode/localpointer.h" +#include "unicode/urep.h" +#include "unicode/parseerr.h" +#include "unicode/uenum.h" +#include "unicode/uset.h" + +/******************************************************************** + * General Notes + ******************************************************************** + */ +/** + * \file + * \brief C API: Transliterator + * + * <h2> Transliteration </h2> + * The data structures and functions described in this header provide + * transliteration services. Transliteration services are implemented + * as C++ classes. The comments and documentation in this header + * assume the reader is familiar with the C++ headers translit.h and + * associated documentation. + * + * A significant but incomplete subset of the C++ transliteration + * services are available to C code through this header. In order to + * access more complex transliteration services, refer to the C++ + * headers and documentation. + * + * There are two sets of functions for working with transliterator IDs: + * + * An old, deprecated set uses char * IDs, which works for true and pure + * identifiers that these APIs were designed for, + * for example "Cyrillic-Latin". + * It does not work when the ID contains filters ("[:Script=Cyrl:]") + * or even a complete set of rules because then the ID string contains more + * than just "invariant" characters (see utypes.h). + * + * A new set of functions replaces the old ones and uses UChar * IDs, + * paralleling the UnicodeString IDs in the C++ API. (New in ICU 2.8.) + */ + +/******************************************************************** + * Data Structures + ********************************************************************/ + +/** + * An opaque transliterator for use in C. Open with utrans_openxxx() + * and close with utrans_close() when done. Equivalent to the C++ class + * Transliterator and its subclasses. + * @see Transliterator + * @stable ICU 2.0 + */ +typedef void* UTransliterator; + +/** + * Direction constant indicating the direction in a transliterator, + * e.g., the forward or reverse rules of a RuleBasedTransliterator. + * Specified when a transliterator is opened. An "A-B" transliterator + * transliterates A to B when operating in the forward direction, and + * B to A when operating in the reverse direction. + * @stable ICU 2.0 + */ +typedef enum UTransDirection { + + /** + * UTRANS_FORWARD means from <source> to <target> for a + * transliterator with ID <source>-<target>. For a transliterator + * opened using a rule, it means forward direction rules, e.g., + * "A > B". + */ + UTRANS_FORWARD, + + /** + * UTRANS_REVERSE means from <target> to <source> for a + * transliterator with ID <source>-<target>. For a transliterator + * opened using a rule, it means reverse direction rules, e.g., + * "A < B". + */ + UTRANS_REVERSE + +} UTransDirection; + +/** + * Position structure for utrans_transIncremental() incremental + * transliteration. This structure defines two substrings of the text + * being transliterated. The first region, [contextStart, + * contextLimit), defines what characters the transliterator will read + * as context. The second region, [start, limit), defines what + * characters will actually be transliterated. The second region + * should be a subset of the first. + * + * <p>After a transliteration operation, some of the indices in this + * structure will be modified. See the field descriptions for + * details. + * + * <p>contextStart <= start <= limit <= contextLimit + * + * <p>Note: All index values in this structure must be at code point + * boundaries. That is, none of them may occur between two code units + * of a surrogate pair. If any index does split a surrogate pair, + * results are unspecified. + * + * @stable ICU 2.0 + */ +typedef struct UTransPosition { + + /** + * Beginning index, inclusive, of the context to be considered for + * a transliteration operation. The transliterator will ignore + * anything before this index. INPUT/OUTPUT parameter: This parameter + * is updated by a transliteration operation to reflect the maximum + * amount of antecontext needed by a transliterator. + * @stable ICU 2.4 + */ + int32_t contextStart; + + /** + * Ending index, exclusive, of the context to be considered for a + * transliteration operation. The transliterator will ignore + * anything at or after this index. INPUT/OUTPUT parameter: This + * parameter is updated to reflect changes in the length of the + * text, but points to the same logical position in the text. + * @stable ICU 2.4 + */ + int32_t contextLimit; + + /** + * Beginning index, inclusive, of the text to be transliteratd. + * INPUT/OUTPUT parameter: This parameter is advanced past + * characters that have already been transliterated by a + * transliteration operation. + * @stable ICU 2.4 + */ + int32_t start; + + /** + * Ending index, exclusive, of the text to be transliteratd. + * INPUT/OUTPUT parameter: This parameter is updated to reflect + * changes in the length of the text, but points to the same + * logical position in the text. + * @stable ICU 2.4 + */ + int32_t limit; + +} UTransPosition; + +/******************************************************************** + * General API + ********************************************************************/ + +/** + * Open a custom transliterator, given a custom rules string + * OR + * a system transliterator, given its ID. + * Any non-NULL result from this function should later be closed with + * utrans_close(). + * + * @param id a valid transliterator ID + * @param idLength the length of the ID string, or -1 if NUL-terminated + * @param dir the desired direction + * @param rules the transliterator rules. See the C++ header rbt.h for + * rules syntax. If NULL then a system transliterator matching + * the ID is returned. + * @param rulesLength the length of the rules, or -1 if the rules + * are NUL-terminated. + * @param parseError a pointer to a UParseError struct to receive the details + * of any parsing errors. This parameter may be NULL if no + * parsing error details are desired. + * @param pErrorCode a pointer to the UErrorCode + * @return a transliterator pointer that may be passed to other + * utrans_xxx() functions, or NULL if the open call fails. + * @stable ICU 2.8 + */ +U_STABLE UTransliterator* U_EXPORT2 +utrans_openU(const UChar *id, + int32_t idLength, + UTransDirection dir, + const UChar *rules, + int32_t rulesLength, + UParseError *parseError, + UErrorCode *pErrorCode); + +/** + * Open an inverse of an existing transliterator. For this to work, + * the inverse must be registered with the system. For example, if + * the Transliterator "A-B" is opened, and then its inverse is opened, + * the result is the Transliterator "B-A", if such a transliterator is + * registered with the system. Otherwise the result is NULL and a + * failing UErrorCode is set. Any non-NULL result from this function + * should later be closed with utrans_close(). + * + * @param trans the transliterator to open the inverse of. + * @param status a pointer to the UErrorCode + * @return a pointer to a newly-opened transliterator that is the + * inverse of trans, or NULL if the open call fails. + * @stable ICU 2.0 + */ +U_STABLE UTransliterator* U_EXPORT2 +utrans_openInverse(const UTransliterator* trans, + UErrorCode* status); + +/** + * Create a copy of a transliterator. Any non-NULL result from this + * function should later be closed with utrans_close(). + * + * @param trans the transliterator to be copied. + * @param status a pointer to the UErrorCode + * @return a transliterator pointer that may be passed to other + * utrans_xxx() functions, or NULL if the clone call fails. + * @stable ICU 2.0 + */ +U_STABLE UTransliterator* U_EXPORT2 +utrans_clone(const UTransliterator* trans, + UErrorCode* status); + +/** + * Close a transliterator. Any non-NULL pointer returned by + * utrans_openXxx() or utrans_clone() should eventually be closed. + * @param trans the transliterator to be closed. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +utrans_close(UTransliterator* trans); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUTransliteratorPointer + * "Smart pointer" class, closes a UTransliterator via utrans_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUTransliteratorPointer, UTransliterator, utrans_close); + +U_NAMESPACE_END + +#endif + +/** + * Return the programmatic identifier for this transliterator. + * If this identifier is passed to utrans_openU(), it will open + * a transliterator equivalent to this one, if the ID has been + * registered. + * + * @param trans the transliterator to return the ID of. + * @param resultLength pointer to an output variable receiving the length + * of the ID string; can be NULL + * @return the NUL-terminated ID string. This pointer remains + * valid until utrans_close() is called on this transliterator. + * + * @stable ICU 2.8 + */ +U_STABLE const UChar * U_EXPORT2 +utrans_getUnicodeID(const UTransliterator *trans, + int32_t *resultLength); + +/** + * Register an open transliterator with the system. When + * utrans_open() is called with an ID string that is equal to that + * returned by utrans_getID(adoptedTrans,...), then + * utrans_clone(adoptedTrans,...) is returned. + * + * <p>NOTE: After this call the system owns the adoptedTrans and will + * close it. The user must not call utrans_close() on adoptedTrans. + * + * @param adoptedTrans a transliterator, typically the result of + * utrans_openRules(), to be registered with the system. + * @param status a pointer to the UErrorCode + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +utrans_register(UTransliterator* adoptedTrans, + UErrorCode* status); + +/** + * Unregister a transliterator from the system. After this call the + * system will no longer recognize the given ID when passed to + * utrans_open(). If the ID is invalid then nothing is done. + * + * @param id an ID to unregister + * @param idLength the length of id, or -1 if id is zero-terminated + * @stable ICU 2.8 + */ +U_STABLE void U_EXPORT2 +utrans_unregisterID(const UChar* id, int32_t idLength); + +/** + * Set the filter used by a transliterator. A filter can be used to + * make the transliterator pass certain characters through untouched. + * The filter is expressed using a UnicodeSet pattern. If the + * filterPattern is NULL or the empty string, then the transliterator + * will be reset to use no filter. + * + * @param trans the transliterator + * @param filterPattern a pattern string, in the form accepted by + * UnicodeSet, specifying which characters to apply the + * transliteration to. May be NULL or the empty string to indicate no + * filter. + * @param filterPatternLen the length of filterPattern, or -1 if + * filterPattern is zero-terminated + * @param status a pointer to the UErrorCode + * @see UnicodeSet + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +utrans_setFilter(UTransliterator* trans, + const UChar* filterPattern, + int32_t filterPatternLen, + UErrorCode* status); + +/** + * Return the number of system transliterators. + * It is recommended to use utrans_openIDs() instead. + * + * @return the number of system transliterators. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +utrans_countAvailableIDs(void); + +/** + * Return a UEnumeration for the available transliterators. + * + * @param pErrorCode Pointer to the UErrorCode in/out parameter. + * @return UEnumeration for the available transliterators. + * Close with uenum_close(). + * + * @stable ICU 2.8 + */ +U_STABLE UEnumeration * U_EXPORT2 +utrans_openIDs(UErrorCode *pErrorCode); + +/******************************************************************** + * Transliteration API + ********************************************************************/ + +/** + * Transliterate a segment of a UReplaceable string. The string is + * passed in as a UReplaceable pointer rep and a UReplaceableCallbacks + * function pointer struct repFunc. Functions in the repFunc struct + * will be called in order to modify the rep string. + * + * @param trans the transliterator + * @param rep a pointer to the string. This will be passed to the + * repFunc functions. + * @param repFunc a set of function pointers that will be used to + * modify the string pointed to by rep. + * @param start the beginning index, inclusive; <code>0 <= start <= + * limit</code>. + * @param limit pointer to the ending index, exclusive; <code>start <= + * limit <= repFunc->length(rep)</code>. Upon return, *limit will + * contain the new limit index. The text previously occupying + * <code>[start, limit)</code> has been transliterated, possibly to a + * string of a different length, at <code>[start, + * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em> + * is the return value. + * @param status a pointer to the UErrorCode + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +utrans_trans(const UTransliterator* trans, + UReplaceable* rep, + UReplaceableCallbacks* repFunc, + int32_t start, + int32_t* limit, + UErrorCode* status); + +/** + * Transliterate the portion of the UReplaceable text buffer that can + * be transliterated unambiguosly. This method is typically called + * after new text has been inserted, e.g. as a result of a keyboard + * event. The transliterator will try to transliterate characters of + * <code>rep</code> between <code>index.cursor</code> and + * <code>index.limit</code>. Characters before + * <code>index.cursor</code> will not be changed. + * + * <p>Upon return, values in <code>index</code> will be updated. + * <code>index.start</code> will be advanced to the first + * character that future calls to this method will read. + * <code>index.cursor</code> and <code>index.limit</code> will + * be adjusted to delimit the range of text that future calls to + * this method may change. + * + * <p>Typical usage of this method begins with an initial call + * with <code>index.start</code> and <code>index.limit</code> + * set to indicate the portion of <code>text</code> to be + * transliterated, and <code>index.cursor == index.start</code>. + * Thereafter, <code>index</code> can be used without + * modification in future calls, provided that all changes to + * <code>text</code> are made via this method. + * + * <p>This method assumes that future calls may be made that will + * insert new text into the buffer. As a result, it only performs + * unambiguous transliterations. After the last call to this method, + * there may be untransliterated text that is waiting for more input + * to resolve an ambiguity. In order to perform these pending + * transliterations, clients should call utrans_trans() with a start + * of index.start and a limit of index.end after the last call to this + * method has been made. + * + * @param trans the transliterator + * @param rep a pointer to the string. This will be passed to the + * repFunc functions. + * @param repFunc a set of function pointers that will be used to + * modify the string pointed to by rep. + * @param pos a struct containing the start and limit indices of the + * text to be read and the text to be transliterated + * @param status a pointer to the UErrorCode + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +utrans_transIncremental(const UTransliterator* trans, + UReplaceable* rep, + UReplaceableCallbacks* repFunc, + UTransPosition* pos, + UErrorCode* status); + +/** + * Transliterate a segment of a UChar* string. The string is passed + * in in a UChar* buffer. The string is modified in place. If the + * result is longer than textCapacity, it is truncated. The actual + * length of the result is returned in *textLength, if textLength is + * non-NULL. *textLength may be greater than textCapacity, but only + * textCapacity UChars will be written to *text, including the zero + * terminator. + * + * @param trans the transliterator + * @param text a pointer to a buffer containing the text to be + * transliterated on input and the result text on output. + * @param textLength a pointer to the length of the string in text. + * If the length is -1 then the string is assumed to be + * zero-terminated. Upon return, the new length is stored in + * *textLength. If textLength is NULL then the string is assumed to + * be zero-terminated. + * @param textCapacity a pointer to the length of the text buffer. + * Upon return, + * @param start the beginning index, inclusive; <code>0 <= start <= + * limit</code>. + * @param limit pointer to the ending index, exclusive; <code>start <= + * limit <= repFunc->length(rep)</code>. Upon return, *limit will + * contain the new limit index. The text previously occupying + * <code>[start, limit)</code> has been transliterated, possibly to a + * string of a different length, at <code>[start, + * </code><em>new-limit</em><code>)</code>, where <em>new-limit</em> + * is the return value. + * @param status a pointer to the UErrorCode + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +utrans_transUChars(const UTransliterator* trans, + UChar* text, + int32_t* textLength, + int32_t textCapacity, + int32_t start, + int32_t* limit, + UErrorCode* status); + +/** + * Transliterate the portion of the UChar* text buffer that can be + * transliterated unambiguosly. See utrans_transIncremental(). The + * string is passed in in a UChar* buffer. The string is modified in + * place. If the result is longer than textCapacity, it is truncated. + * The actual length of the result is returned in *textLength, if + * textLength is non-NULL. *textLength may be greater than + * textCapacity, but only textCapacity UChars will be written to + * *text, including the zero terminator. See utrans_transIncremental() + * for usage details. + * + * @param trans the transliterator + * @param text a pointer to a buffer containing the text to be + * transliterated on input and the result text on output. + * @param textLength a pointer to the length of the string in text. + * If the length is -1 then the string is assumed to be + * zero-terminated. Upon return, the new length is stored in + * *textLength. If textLength is NULL then the string is assumed to + * be zero-terminated. + * @param textCapacity the length of the text buffer + * @param pos a struct containing the start and limit indices of the + * text to be read and the text to be transliterated + * @param status a pointer to the UErrorCode + * @see utrans_transIncremental + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +utrans_transIncrementalUChars(const UTransliterator* trans, + UChar* text, + int32_t* textLength, + int32_t textCapacity, + UTransPosition* pos, + UErrorCode* status); + +/** + * Create a rule string that can be passed to utrans_openU to recreate this + * transliterator. + * + * @param trans The transliterator + * @param escapeUnprintable if TRUE then convert unprintable characters to their + * hex escape representations, \\uxxxx or \\Uxxxxxxxx. + * Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @param result A pointer to a buffer to receive the rules. + * @param resultLength The maximum size of result. + * @param status A pointer to the UErrorCode. In case of error status, the + * contents of result are undefined. + * @return int32_t The length of the rule string (may be greater than resultLength, + * in which case an error is returned). + * @stable ICU 53 + */ +U_STABLE int32_t U_EXPORT2 +utrans_toRules( const UTransliterator* trans, + UBool escapeUnprintable, + UChar* result, int32_t resultLength, + UErrorCode* status); + +/** + * Returns the set of all characters that may be modified in the input text by + * this UTransliterator, optionally ignoring the transliterator's current filter. + * @param trans The transliterator. + * @param ignoreFilter If FALSE, the returned set incorporates the + * UTransliterator's current filter; if the filter is changed, + * the return value of this function will change. If TRUE, the + * returned set ignores the effect of the UTransliterator's + * current filter. + * @param fillIn Pointer to a USet object to receive the modifiable characters + * set. Previous contents of fillIn are lost. <em>If fillIn is + * NULL, then a new USet is created and returned. The caller + * owns the result and must dispose of it by calling uset_close.</em> + * @param status A pointer to the UErrorCode. + * @return USet* Either fillIn, or if fillIn is NULL, a pointer to a + * newly-allocated USet that the user must close. In case of + * error, NULL is returned. + * @stable ICU 53 + */ +U_STABLE USet* U_EXPORT2 +utrans_getSourceSet(const UTransliterator* trans, + UBool ignoreFilter, + USet* fillIn, + UErrorCode* status); + +/* deprecated API ----------------------------------------------------------- */ + +#ifndef U_HIDE_DEPRECATED_API + +/* see utrans.h documentation for why these functions are deprecated */ + +/** + * Deprecated, use utrans_openU() instead. + * Open a custom transliterator, given a custom rules string + * OR + * a system transliterator, given its ID. + * Any non-NULL result from this function should later be closed with + * utrans_close(). + * + * @param id a valid ID, as returned by utrans_getAvailableID() + * @param dir the desired direction + * @param rules the transliterator rules. See the C++ header rbt.h + * for rules syntax. If NULL then a system transliterator matching + * the ID is returned. + * @param rulesLength the length of the rules, or -1 if the rules + * are zero-terminated. + * @param parseError a pointer to a UParseError struct to receive the + * details of any parsing errors. This parameter may be NULL if no + * parsing error details are desired. + * @param status a pointer to the UErrorCode + * @return a transliterator pointer that may be passed to other + * utrans_xxx() functions, or NULL if the open call fails. + * @deprecated ICU 2.8 Use utrans_openU() instead, see utrans.h + */ +U_DEPRECATED UTransliterator* U_EXPORT2 +utrans_open(const char* id, + UTransDirection dir, + const UChar* rules, /* may be Null */ + int32_t rulesLength, /* -1 if null-terminated */ + UParseError* parseError, /* may be Null */ + UErrorCode* status); + +/** + * Deprecated, use utrans_getUnicodeID() instead. + * Return the programmatic identifier for this transliterator. + * If this identifier is passed to utrans_open(), it will open + * a transliterator equivalent to this one, if the ID has been + * registered. + * @param trans the transliterator to return the ID of. + * @param buf the buffer in which to receive the ID. This may be + * NULL, in which case no characters are copied. + * @param bufCapacity the capacity of the buffer. Ignored if buf is + * NULL. + * @return the actual length of the ID, not including + * zero-termination. This may be greater than bufCapacity. + * @deprecated ICU 2.8 Use utrans_getUnicodeID() instead, see utrans.h + */ +U_DEPRECATED int32_t U_EXPORT2 +utrans_getID(const UTransliterator* trans, + char* buf, + int32_t bufCapacity); + +/** + * Deprecated, use utrans_unregisterID() instead. + * Unregister a transliterator from the system. After this call the + * system will no longer recognize the given ID when passed to + * utrans_open(). If the id is invalid then nothing is done. + * + * @param id a zero-terminated ID + * @deprecated ICU 2.8 Use utrans_unregisterID() instead, see utrans.h + */ +U_DEPRECATED void U_EXPORT2 +utrans_unregister(const char* id); + +/** + * Deprecated, use utrans_openIDs() instead. + * Return the ID of the index-th system transliterator. The result + * is placed in the given buffer. If the given buffer is too small, + * the initial substring is copied to buf. The result in buf is + * always zero-terminated. + * + * @param index the number of the transliterator to return. Must + * satisfy 0 <= index < utrans_countAvailableIDs(). If index is out + * of range then it is treated as if it were 0. + * @param buf the buffer in which to receive the ID. This may be + * NULL, in which case no characters are copied. + * @param bufCapacity the capacity of the buffer. Ignored if buf is + * NULL. + * @return the actual length of the index-th ID, not including + * zero-termination. This may be greater than bufCapacity. + * @deprecated ICU 2.8 Use utrans_openIDs() instead, see utrans.h + */ +U_DEPRECATED int32_t U_EXPORT2 +utrans_getAvailableID(int32_t index, + char* buf, + int32_t bufCapacity); + +#endif /* U_HIDE_DEPRECATED_API */ + +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ + +#endif diff --git a/intl/icu/source/i18n/unicode/vtzone.h b/intl/icu/source/i18n/unicode/vtzone.h new file mode 100644 index 000000000..75a5fca9c --- /dev/null +++ b/intl/icu/source/i18n/unicode/vtzone.h @@ -0,0 +1,457 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2007-2013, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ +#ifndef VTZONE_H +#define VTZONE_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: RFC2445 VTIMEZONE support + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/basictz.h" + +U_NAMESPACE_BEGIN + +class VTZWriter; +class VTZReader; +class UVector; + +/** + * <code>VTimeZone</code> is a class implementing RFC2445 VTIMEZONE. You can create a + * <code>VTimeZone</code> instance from a time zone ID supported by <code>TimeZone</code>. + * With the <code>VTimeZone</code> instance created from the ID, you can write out the rule + * in RFC2445 VTIMEZONE format. Also, you can create a <code>VTimeZone</code> instance + * from RFC2445 VTIMEZONE data stream, which allows you to calculate time + * zone offset by the rules defined by the data. Or, you can create a + * <code>VTimeZone</code> from any other ICU <code>BasicTimeZone</code>. + * <br><br> + * Note: The consumer of this class reading or writing VTIMEZONE data is responsible to + * decode or encode Non-ASCII text. Methods reading/writing VTIMEZONE data in this class + * do nothing with MIME encoding. + * @stable ICU 3.8 + */ +class U_I18N_API VTimeZone : public BasicTimeZone { +public: + /** + * Copy constructor. + * @param source The <code>VTimeZone</code> object to be copied. + * @stable ICU 3.8 + */ + VTimeZone(const VTimeZone& source); + + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~VTimeZone(); + + /** + * Assignment operator. + * @param right The object to be copied. + * @stable ICU 3.8 + */ + VTimeZone& operator=(const VTimeZone& right); + + /** + * Return true if the given <code>TimeZone</code> objects are + * semantically equal. Objects of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZone</code> objects are + *semantically equal. + * @stable ICU 3.8 + */ + virtual UBool operator==(const TimeZone& that) const; + + /** + * Return true if the given <code>TimeZone</code> objects are + * semantically unequal. Objects of different subclasses are considered unequal. + * @param that The object to be compared with. + * @return true if the given <code>TimeZone</code> objects are + * semantically unequal. + * @stable ICU 3.8 + */ + virtual UBool operator!=(const TimeZone& that) const; + + /** + * Create a <code>VTimeZone</code> instance by the time zone ID. + * @param ID The time zone ID, such as America/New_York + * @return A <code>VTimeZone</code> object initialized by the time zone ID, + * or NULL when the ID is unknown. + * @stable ICU 3.8 + */ + static VTimeZone* createVTimeZoneByID(const UnicodeString& ID); + + /** + * Create a <code>VTimeZone</code> instance using a basic time zone. + * @param basicTZ The basic time zone instance + * @param status Output param to filled in with a success or an error. + * @return A <code>VTimeZone</code> object initialized by the basic time zone. + * @stable ICU 4.6 + */ + static VTimeZone* createVTimeZoneFromBasicTimeZone(const BasicTimeZone& basicTZ, + UErrorCode &status); + + /** + * Create a <code>VTimeZone</code> instance by RFC2445 VTIMEZONE data + * + * @param vtzdata The string including VTIMEZONE data block + * @param status Output param to filled in with a success or an error. + * @return A <code>VTimeZone</code> initialized by the VTIMEZONE data or + * NULL if failed to load the rule from the VTIMEZONE data. + * @stable ICU 3.8 + */ + static VTimeZone* createVTimeZone(const UnicodeString& vtzdata, UErrorCode& status); + + /** + * Gets the RFC2445 TZURL property value. When a <code>VTimeZone</code> instance was + * created from VTIMEZONE data, the initial value is set by the TZURL property value + * in the data. Otherwise, the initial value is not set. + * @param url Receives the RFC2445 TZURL property value. + * @return TRUE if TZURL attribute is available and value is set. + * @stable ICU 3.8 + */ + UBool getTZURL(UnicodeString& url) const; + + /** + * Sets the RFC2445 TZURL property value. + * @param url The TZURL property value. + * @stable ICU 3.8 + */ + void setTZURL(const UnicodeString& url); + + /** + * Gets the RFC2445 LAST-MODIFIED property value. When a <code>VTimeZone</code> instance + * was created from VTIMEZONE data, the initial value is set by the LAST-MODIFIED property + * value in the data. Otherwise, the initial value is not set. + * @param lastModified Receives the last modified date. + * @return TRUE if lastModified attribute is available and value is set. + * @stable ICU 3.8 + */ + UBool getLastModified(UDate& lastModified) const; + + /** + * Sets the RFC2445 LAST-MODIFIED property value. + * @param lastModified The LAST-MODIFIED date. + * @stable ICU 3.8 + */ + void setLastModified(UDate lastModified); + + /** + * Writes RFC2445 VTIMEZONE data for this time zone + * @param result Output param to filled in with the VTIMEZONE data. + * @param status Output param to filled in with a success or an error. + * @stable ICU 3.8 + */ + void write(UnicodeString& result, UErrorCode& status) const; + + /** + * Writes RFC2445 VTIMEZONE data for this time zone applicalbe + * for dates after the specified start time. + * @param start The start date. + * @param result Output param to filled in with the VTIMEZONE data. + * @param status Output param to filled in with a success or an error. + * @stable ICU 3.8 + */ + void write(UDate start, UnicodeString& result, UErrorCode& status) const; + + /** + * Writes RFC2445 VTIMEZONE data applicalbe for the specified date. + * Some common iCalendar implementations can only handle a single time + * zone property or a pair of standard and daylight time properties using + * BYDAY rule with day of week (such as BYDAY=1SUN). This method produce + * the VTIMEZONE data which can be handled these implementations. The rules + * produced by this method can be used only for calculating time zone offset + * around the specified date. + * @param time The date used for rule extraction. + * @param result Output param to filled in with the VTIMEZONE data. + * @param status Output param to filled in with a success or an error. + * @stable ICU 3.8 + */ + void writeSimple(UDate time, UnicodeString& result, UErrorCode& status) const; + + /** + * Clones TimeZone objects polymorphically. Clients are responsible for deleting + * the TimeZone object cloned. + * @return A new copy of this TimeZone object. + * @stable ICU 3.8 + */ + virtual TimeZone* clone(void) const; + + /** + * Returns the TimeZone's adjusted GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time in this time zone, taking daylight savings time into + * account) as of a particular reference date. The reference date is used to determine + * whether daylight savings time is in effect and needs to be figured into the offset + * that is returned (in other words, what is the adjusted GMT offset in this time zone + * at this particular date and time?). For the time zones produced by createTimeZone(), + * the reference data is specified according to the Gregorian calendar, and the date + * and time fields are local standard time. + * + * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload, + * which returns both the raw and the DST offset for a given time. This method + * is retained only for backward compatibility. + * + * @param era The reference date's era + * @param year The reference date's year + * @param month The reference date's month (0-based; 0 is January) + * @param day The reference date's day-in-month (1-based) + * @param dayOfWeek The reference date's day-of-week (1-based; 1 is Sunday) + * @param millis The reference date's milliseconds in day, local standard time + * @param status Output param to filled in with a success or an error. + * @return The offset in milliseconds to add to GMT to get local time. + * @stable ICU 3.8 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, UErrorCode& status) const; + + /** + * Gets the time zone offset, for current date, modified in case of + * daylight savings. This is the offset to add *to* UTC to get local time. + * + * <p>Note: Don't call this method. Instead, call the getOffset(UDate...) overload, + * which returns both the raw and the DST offset for a given time. This method + * is retained only for backward compatibility. + * + * @param era The reference date's era + * @param year The reference date's year + * @param month The reference date's month (0-based; 0 is January) + * @param day The reference date's day-in-month (1-based) + * @param dayOfWeek The reference date's day-of-week (1-based; 1 is Sunday) + * @param millis The reference date's milliseconds in day, local standard time + * @param monthLength The length of the given month in days. + * @param status Output param to filled in with a success or an error. + * @return The offset in milliseconds to add to GMT to get local time. + * @stable ICU 3.8 + */ + virtual int32_t getOffset(uint8_t era, int32_t year, int32_t month, int32_t day, + uint8_t dayOfWeek, int32_t millis, + int32_t monthLength, UErrorCode& status) const; + + /** + * Returns the time zone raw and GMT offset for the given moment + * in time. Upon return, local-millis = GMT-millis + rawOffset + + * dstOffset. All computations are performed in the proleptic + * Gregorian calendar. The default implementation in the TimeZone + * class delegates to the 8-argument getOffset(). + * + * @param date moment in time for which to return offsets, in + * units of milliseconds from January 1, 1970 0:00 GMT, either GMT + * time or local wall time, depending on `local'. + * @param local if true, `date' is local wall time; otherwise it + * is in GMT time. + * @param rawOffset output parameter to receive the raw offset, that + * is, the offset not including DST adjustments + * @param dstOffset output parameter to receive the DST offset, + * that is, the offset to be added to `rawOffset' to obtain the + * total offset between local and GMT time. If DST is not in + * effect, this value is zero; otherwise it is a positive value, + * typically one hour. + * @param ec input-output error code + * @stable ICU 3.8 + */ + virtual void getOffset(UDate date, UBool local, int32_t& rawOffset, + int32_t& dstOffset, UErrorCode& ec) const; + + /** + * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time, before taking daylight savings time into account). + * + * @param offsetMillis The new raw GMT offset for this time zone. + * @stable ICU 3.8 + */ + virtual void setRawOffset(int32_t offsetMillis); + + /** + * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add + * to GMT to get local time, before taking daylight savings time into account). + * + * @return The TimeZone's raw GMT offset. + * @stable ICU 3.8 + */ + virtual int32_t getRawOffset(void) const; + + /** + * Queries if this time zone uses daylight savings time. + * @return true if this time zone uses daylight savings time, + * false, otherwise. + * @stable ICU 3.8 + */ + virtual UBool useDaylightTime(void) const; + + /** + * Queries if the given date is in daylight savings time in + * this time zone. + * This method is wasteful since it creates a new GregorianCalendar and + * deletes it each time it is called. This is a deprecated method + * and provided only for Java compatibility. + * + * @param date the given UDate. + * @param status Output param filled in with success/error code. + * @return true if the given date is in daylight savings time, + * false, otherwise. + * @deprecated ICU 2.4. Use Calendar::inDaylightTime() instead. + */ + virtual UBool inDaylightTime(UDate date, UErrorCode& status) const; + + /** + * Returns true if this zone has the same rule and offset as another zone. + * That is, if this zone differs only in ID, if at all. + * @param other the <code>TimeZone</code> object to be compared with + * @return true if the given zone is the same as this one, + * with the possible exception of the ID + * @stable ICU 3.8 + */ + virtual UBool hasSameRules(const TimeZone& other) const; + + /** + * Gets the first time zone transition after the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the first transition after the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const; + + /** + * Gets the most recent time zone transition before the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the most recent transition before the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) const; + + /** + * Returns the number of <code>TimeZoneRule</code>s which represents time transitions, + * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except + * <code>InitialTimeZoneRule</code>. The return value range is 0 or any positive value. + * @param status Receives error status code. + * @return The number of <code>TimeZoneRule</code>s representing time transitions. + * @stable ICU 3.8 + */ + virtual int32_t countTransitionRules(UErrorCode& status) const; + + /** + * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code> + * which represent time transitions for this time zone. On successful return, + * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and + * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code> + * instances up to the size specified by trscount. The results are referencing the + * rule instance held by this time zone instance. Therefore, after this time zone + * is destructed, they are no longer available. + * @param initial Receives the initial timezone rule + * @param trsrules Receives the timezone transition rules + * @param trscount On input, specify the size of the array 'transitions' receiving + * the timezone transition rules. On output, actual number of + * rules filled in the array will be set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const; + +private: + enum { DEFAULT_VTIMEZONE_LINES = 100 }; + + /** + * Default constructor. + */ + VTimeZone(); + static VTimeZone* createVTimeZone(VTZReader* reader); + void write(VTZWriter& writer, UErrorCode& status) const; + void write(UDate start, VTZWriter& writer, UErrorCode& status) const; + void writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) const; + void load(VTZReader& reader, UErrorCode& status); + void parse(UErrorCode& status); + + void writeZone(VTZWriter& w, BasicTimeZone& basictz, UVector* customProps, + UErrorCode& status) const; + + void writeHeaders(VTZWriter& w, UErrorCode& status) const; + void writeFooter(VTZWriter& writer, UErrorCode& status) const; + + void writeZonePropsByTime(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, UDate time, UBool withRDATE, + UErrorCode& status) const; + void writeZonePropsByDOM(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, + int32_t month, int32_t dayOfMonth, UDate startTime, UDate untilTime, + UErrorCode& status) const; + void writeZonePropsByDOW(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, + int32_t month, int32_t weekInMonth, int32_t dayOfWeek, + UDate startTime, UDate untilTime, UErrorCode& status) const; + void writeZonePropsByDOW_GEQ_DOM(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, + int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + UDate startTime, UDate untilTime, UErrorCode& status) const; + void writeZonePropsByDOW_GEQ_DOM_sub(VTZWriter& writer, int32_t month, int32_t dayOfMonth, + int32_t dayOfWeek, int32_t numDays, + UDate untilTime, int32_t fromOffset, UErrorCode& status) const; + void writeZonePropsByDOW_LEQ_DOM(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, + int32_t month, int32_t dayOfMonth, int32_t dayOfWeek, + UDate startTime, UDate untilTime, UErrorCode& status) const; + void writeFinalRule(VTZWriter& writer, UBool isDst, const AnnualTimeZoneRule* rule, + int32_t fromRawOffset, int32_t fromDSTSavings, + UDate startTime, UErrorCode& status) const; + + void beginZoneProps(VTZWriter& writer, UBool isDst, const UnicodeString& zonename, + int32_t fromOffset, int32_t toOffset, UDate startTime, UErrorCode& status) const; + void endZoneProps(VTZWriter& writer, UBool isDst, UErrorCode& status) const; + void beginRRULE(VTZWriter& writer, int32_t month, UErrorCode& status) const; + void appendUNTIL(VTZWriter& writer, const UnicodeString& until, UErrorCode& status) const; + + BasicTimeZone *tz; + UVector *vtzlines; + UnicodeString tzurl; + UDate lastmod; + UnicodeString olsonzid; + UnicodeString icutzver; + +public: + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . erived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 3.8 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 3.8 + */ + virtual UClassID getDynamicClassID(void) const; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // VTZONE_H +//eof |