diff options
Diffstat (limited to 'js/src/builtin/Intl.h')
-rw-r--r-- | js/src/builtin/Intl.h | 409 |
1 files changed, 409 insertions, 0 deletions
diff --git a/js/src/builtin/Intl.h b/js/src/builtin/Intl.h new file mode 100644 index 000000000..54764605b --- /dev/null +++ b/js/src/builtin/Intl.h @@ -0,0 +1,409 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef builtin_Intl_h +#define builtin_Intl_h + +#include "mozilla/HashFunctions.h" +#include "mozilla/MemoryReporting.h" + +#include "jsalloc.h" +#include "NamespaceImports.h" + +#include "js/GCAPI.h" +#include "js/GCHashTable.h" + +#if ENABLE_INTL_API +#include "unicode/utypes.h" +#endif + +/* + * The Intl module specified by standard ECMA-402, + * ECMAScript Internationalization API Specification. + */ + +namespace js { + +/** + * Initializes the Intl Object and its standard built-in properties. + * Spec: ECMAScript Internationalization API Specification, 8.0, 8.1 + */ +extern JSObject* +InitIntlClass(JSContext* cx, HandleObject obj); + +/** + * Stores Intl data which can be shared across compartments (but not contexts). + * + * Used for data which is expensive when computed repeatedly or is not + * available through ICU. + */ +class SharedIntlData +{ + /** + * Information tracking the set of the supported time zone names, derived + * from the IANA time zone database <https://www.iana.org/time-zones>. + * + * There are two kinds of IANA time zone names: Zone and Link (denoted as + * such in database source files). Zone names are the canonical, preferred + * name for a time zone, e.g. Asia/Kolkata. Link names simply refer to + * target Zone names for their meaning, e.g. Asia/Calcutta targets + * Asia/Kolkata. That a name is a Link doesn't *necessarily* reflect a + * sense of deprecation: some Link names also exist partly for convenience, + * e.g. UTC and GMT as Link names targeting the Zone name Etc/UTC. + * + * Two data sources determine the time zone names we support: those ICU + * supports and IANA's zone information. + * + * Unfortunately the names ICU and IANA support, and their Link + * relationships from name to target, aren't identical, so we can't simply + * implicitly trust ICU's name handling. We must perform various + * preprocessing of user-provided zone names and post-processing of + * ICU-provided zone names to implement ECMA-402's IANA-consistent behavior. + * + * Also see <https://ssl.icu-project.org/trac/ticket/12044> and + * <http://unicode.org/cldr/trac/ticket/9892>. + */ + + using TimeZoneName = JSAtom*; + + struct TimeZoneHasher + { + struct Lookup + { + union { + const JS::Latin1Char* latin1Chars; + const char16_t* twoByteChars; + }; + bool isLatin1; + size_t length; + JS::AutoCheckCannotGC nogc; + HashNumber hash; + + explicit Lookup(JSFlatString* timeZone); + }; + + static js::HashNumber hash(const Lookup& lookup) { return lookup.hash; } + static bool match(TimeZoneName key, const Lookup& lookup); + }; + + using TimeZoneSet = js::GCHashSet<TimeZoneName, + TimeZoneHasher, + js::SystemAllocPolicy>; + + using TimeZoneMap = js::GCHashMap<TimeZoneName, + TimeZoneName, + TimeZoneHasher, + js::SystemAllocPolicy>; + + /** + * As a threshold matter, available time zones are those time zones ICU + * supports, via ucal_openTimeZones. But ICU supports additional non-IANA + * time zones described in intl/icu/source/tools/tzcode/icuzones (listed in + * IntlTimeZoneData.cpp's |legacyICUTimeZones|) for its own backwards + * compatibility purposes. This set consists of ICU's supported time zones, + * minus all backwards-compatibility time zones. + */ + TimeZoneSet availableTimeZones; + + /** + * IANA treats some time zone names as Zones, that ICU instead treats as + * Links. For example, IANA considers "America/Indiana/Indianapolis" to be + * a Zone and "America/Fort_Wayne" a Link that targets it, but ICU + * considers the former a Link that targets "America/Indianapolis" (which + * IANA treats as a Link). + * + * ECMA-402 requires that we respect IANA data, so if we're asked to + * canonicalize a time zone name in this set, we must *not* return ICU's + * canonicalization. + */ + TimeZoneSet ianaZonesTreatedAsLinksByICU; + + /** + * IANA treats some time zone names as Links to one target, that ICU + * instead treats as either Zones, or Links to different targets. An + * example of the former is "Asia/Calcutta, which IANA assigns the target + * "Asia/Kolkata" but ICU considers its own Zone. An example of the latter + * is "America/Virgin", which IANA assigns the target + * "America/Port_of_Spain" but ICU assigns the target "America/St_Thomas". + * + * ECMA-402 requires that we respect IANA data, so if we're asked to + * canonicalize a time zone name that's a key in this map, we *must* return + * the corresponding value and *must not* return ICU's canonicalization. + */ + TimeZoneMap ianaLinksCanonicalizedDifferentlyByICU; + + bool timeZoneDataInitialized = false; + + /** + * Precomputes the available time zone names, because it's too expensive to + * call ucal_openTimeZones() repeatedly. + */ + bool ensureTimeZones(JSContext* cx); + + public: + /** + * Returns the validated time zone name in |result|. If the input time zone + * isn't a valid IANA time zone name, |result| remains unchanged. + */ + bool validateTimeZoneName(JSContext* cx, JS::HandleString timeZone, + JS::MutableHandleString result); + + /** + * Returns the canonical time zone name in |result|. If no canonical name + * was found, |result| remains unchanged. + * + * This method only handles time zones which are canonicalized differently + * by ICU when compared to IANA. + */ + bool tryCanonicalizeTimeZoneConsistentWithIANA(JSContext* cx, JS::HandleString timeZone, + JS::MutableHandleString result); + + void destroyInstance(); + + void trace(JSTracer* trc); + + size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const; +}; + +/* + * The following functions are for use by self-hosted code. + */ + + +/******************** Collator ********************/ + +/** + * Returns a new instance of the standard built-in Collator constructor. + * Self-hosted code cannot cache this constructor (as it does for others in + * Utilities.js) because it is initialized after self-hosted code is compiled. + * + * Usage: collator = intl_Collator(locales, options) + */ +extern MOZ_MUST_USE bool +intl_Collator(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns an object indicating the supported locales for collation + * by having a true-valued property for each such locale with the + * canonicalized language tag as the property name. The object has no + * prototype. + * + * Usage: availableLocales = intl_Collator_availableLocales() + */ +extern MOZ_MUST_USE bool +intl_Collator_availableLocales(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns an array with the collation type identifiers per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * collations supported for the given locale. "standard" and "search" are + * excluded. + * + * Usage: collations = intl_availableCollations(locale) + */ +extern MOZ_MUST_USE bool +intl_availableCollations(JSContext* cx, unsigned argc, Value* vp); + +/** + * Compares x and y (which must be String values), and returns a number less + * than 0 if x < y, 0 if x = y, or a number greater than 0 if x > y according + * to the sort order for the locale and collation options of the given + * Collator. + * + * Spec: ECMAScript Internationalization API Specification, 10.3.2. + * + * Usage: result = intl_CompareStrings(collator, x, y) + */ +extern MOZ_MUST_USE bool +intl_CompareStrings(JSContext* cx, unsigned argc, Value* vp); + + +/******************** NumberFormat ********************/ + +/** + * Returns a new instance of the standard built-in NumberFormat constructor. + * Self-hosted code cannot cache this constructor (as it does for others in + * Utilities.js) because it is initialized after self-hosted code is compiled. + * + * Usage: numberFormat = intl_NumberFormat(locales, options) + */ +extern MOZ_MUST_USE bool +intl_NumberFormat(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns an object indicating the supported locales for number formatting + * by having a true-valued property for each such locale with the + * canonicalized language tag as the property name. The object has no + * prototype. + * + * Usage: availableLocales = intl_NumberFormat_availableLocales() + */ +extern MOZ_MUST_USE bool +intl_NumberFormat_availableLocales(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns the numbering system type identifier per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * default numbering system for the given locale. + * + * Usage: defaultNumberingSystem = intl_numberingSystem(locale) + */ +extern MOZ_MUST_USE bool +intl_numberingSystem(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns a string representing the number x according to the effective + * locale and the formatting options of the given NumberFormat. + * + * Spec: ECMAScript Internationalization API Specification, 11.3.2. + * + * Usage: formatted = intl_FormatNumber(numberFormat, x) + */ +extern MOZ_MUST_USE bool +intl_FormatNumber(JSContext* cx, unsigned argc, Value* vp); + + +/******************** DateTimeFormat ********************/ + +/** + * Returns a new instance of the standard built-in DateTimeFormat constructor. + * Self-hosted code cannot cache this constructor (as it does for others in + * Utilities.js) because it is initialized after self-hosted code is compiled. + * + * Usage: dateTimeFormat = intl_DateTimeFormat(locales, options) + */ +extern MOZ_MUST_USE bool +intl_DateTimeFormat(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns an object indicating the supported locales for date and time + * formatting by having a true-valued property for each such locale with the + * canonicalized language tag as the property name. The object has no + * prototype. + * + * Usage: availableLocales = intl_DateTimeFormat_availableLocales() + */ +extern MOZ_MUST_USE bool +intl_DateTimeFormat_availableLocales(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns an array with the calendar type identifiers per Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * supported calendars for the given locale. The default calendar is + * element 0. + * + * Usage: calendars = intl_availableCalendars(locale) + */ +extern MOZ_MUST_USE bool +intl_availableCalendars(JSContext* cx, unsigned argc, Value* vp); + +/** + * 6.4.1 IsValidTimeZoneName ( timeZone ) + * + * Verifies that the given string is a valid time zone name. If it is a valid + * time zone name, its IANA time zone name is returned. Otherwise returns null. + * + * ES2017 Intl draft rev 4a23f407336d382ed5e3471200c690c9b020b5f3 + * + * Usage: ianaTimeZone = intl_IsValidTimeZoneName(timeZone) + */ +extern MOZ_MUST_USE bool +intl_IsValidTimeZoneName(JSContext* cx, unsigned argc, Value* vp); + +/** + * Return the canonicalized time zone name. Canonicalization resolves link + * names to their target time zones. + * + * Usage: ianaTimeZone = intl_canonicalizeTimeZone(timeZone) + */ +extern MOZ_MUST_USE bool +intl_canonicalizeTimeZone(JSContext* cx, unsigned argc, Value* vp); + +/** + * Return the default time zone name. The time zone name is not canonicalized. + * + * Usage: icuDefaultTimeZone = intl_defaultTimeZone() + */ +extern MOZ_MUST_USE bool +intl_defaultTimeZone(JSContext* cx, unsigned argc, Value* vp); + +/** + * Return the raw offset from GMT in milliseconds for the default time zone. + * + * Usage: defaultTimeZoneOffset = intl_defaultTimeZoneOffset() + */ +extern MOZ_MUST_USE bool +intl_defaultTimeZoneOffset(JSContext* cx, unsigned argc, Value* vp); + +/** + * Return a pattern in the date-time format pattern language of Unicode + * Technical Standard 35, Unicode Locale Data Markup Language, for the + * best-fit date-time format pattern corresponding to skeleton for the + * given locale. + * + * Usage: pattern = intl_patternForSkeleton(locale, skeleton) + */ +extern MOZ_MUST_USE bool +intl_patternForSkeleton(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns a String value representing x (which must be a Number value) + * according to the effective locale and the formatting options of the + * given DateTimeFormat. + * + * Spec: ECMAScript Internationalization API Specification, 12.3.2. + * + * Usage: formatted = intl_FormatDateTime(dateTimeFormat, x) + */ +extern MOZ_MUST_USE bool +intl_FormatDateTime(JSContext* cx, unsigned argc, Value* vp); + +/** + * Returns a plain object with calendar information for a single valid locale + * (callers must perform this validation). The object will have these + * properties: + * + * firstDayOfWeek + * an integer in the range 1=Sunday to 7=Saturday indicating the day + * considered the first day of the week in calendars, e.g. 1 for en-US, + * 2 for en-GB, 1 for bn-IN + * minDays + * an integer in the range of 1 to 7 indicating the minimum number + * of days required in the first week of the year, e.g. 1 for en-US, 4 for de + * weekendStart + * an integer in the range 1=Sunday to 7=Saturday indicating the day + * considered the beginning of a weekend, e.g. 7 for en-US, 7 for en-GB, + * 1 for bn-IN + * weekendEnd + * an integer in the range 1=Sunday to 7=Saturday indicating the day + * considered the end of a weekend, e.g. 1 for en-US, 1 for en-GB, + * 1 for bn-IN (note that "weekend" is *not* necessarily two days) + * + * NOTE: "calendar" and "locale" properties are *not* added to the object. + */ +extern MOZ_MUST_USE bool +intl_GetCalendarInfo(JSContext* cx, unsigned argc, Value* vp); + +#if ENABLE_INTL_API +/** + * Cast char16_t* strings to UChar* strings used by ICU. + */ +inline const UChar* +Char16ToUChar(const char16_t* chars) +{ + return reinterpret_cast<const UChar*>(chars); +} + +inline UChar* +Char16ToUChar(char16_t* chars) +{ + return reinterpret_cast<UChar*>(chars); +} +#endif // ENABLE_INTL_API + +} // namespace js + +#endif /* builtin_Intl_h */ |