diff options
Diffstat (limited to 'intl/icu/source/common/loclikely.cpp')
-rw-r--r-- | intl/icu/source/common/loclikely.cpp | 1385 |
1 files changed, 1385 insertions, 0 deletions
diff --git a/intl/icu/source/common/loclikely.cpp b/intl/icu/source/common/loclikely.cpp new file mode 100644 index 000000000..c13b37e1a --- /dev/null +++ b/intl/icu/source/common/loclikely.cpp @@ -0,0 +1,1385 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: loclikely.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010feb25 +* created by: Markus W. Scherer +* +* Code for likely and minimized locale subtags, separated out from other .cpp files +* that then do not depend on resource bundle code and likely-subtags data. +*/ + +#include "unicode/utypes.h" +#include "unicode/locid.h" +#include "unicode/putil.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" +#include "unicode/uscript.h" +#include "cmemory.h" +#include "cstring.h" +#include "ulocimp.h" +#include "ustr_imp.h" + +/** + * This function looks for the localeID in the likelySubtags resource. + * + * @param localeID The tag to find. + * @param buffer A buffer to hold the matching entry + * @param bufferLength The length of the output buffer + * @return A pointer to "buffer" if found, or a null pointer if not. + */ +static const char* U_CALLCONV +findLikelySubtags(const char* localeID, + char* buffer, + int32_t bufferLength, + UErrorCode* err) { + const char* result = NULL; + + if (!U_FAILURE(*err)) { + int32_t resLen = 0; + const UChar* s = NULL; + UErrorCode tmpErr = U_ZERO_ERROR; + UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); + if (U_SUCCESS(tmpErr)) { + s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); + + if (U_FAILURE(tmpErr)) { + /* + * If a resource is missing, it's not really an error, it's + * just that we don't have any data for that particular locale ID. + */ + if (tmpErr != U_MISSING_RESOURCE_ERROR) { + *err = tmpErr; + } + } + else if (resLen >= bufferLength) { + /* The buffer should never overflow. */ + *err = U_INTERNAL_PROGRAM_ERROR; + } + else { + u_UCharsToChars(s, buffer, resLen + 1); + result = buffer; + } + + ures_close(subtags); + } else { + *err = tmpErr; + } + } + + return result; +} + +/** + * Append a tag to a buffer, adding the separator if necessary. The buffer + * must be large enough to contain the resulting tag plus any separator + * necessary. The tag must not be a zero-length string. + * + * @param tag The tag to add. + * @param tagLength The length of the tag. + * @param buffer The output buffer. + * @param bufferLength The length of the output buffer. This is an input/ouput parameter. + **/ +static void U_CALLCONV +appendTag( + const char* tag, + int32_t tagLength, + char* buffer, + int32_t* bufferLength) { + + if (*bufferLength > 0) { + buffer[*bufferLength] = '_'; + ++(*bufferLength); + } + + uprv_memmove( + &buffer[*bufferLength], + tag, + tagLength); + + *bufferLength += tagLength; +} + +/** + * These are the canonical strings for unknown languages, scripts and regions. + **/ +static const char* const unknownLanguage = "und"; +static const char* const unknownScript = "Zzzz"; +static const char* const unknownRegion = "ZZ"; + +/** + * Create a tag string from the supplied parameters. The lang, script and region + * parameters may be NULL pointers. If they are, their corresponding length parameters + * must be less than or equal to 0. + * + * If any of the language, script or region parameters are empty, and the alternateTags + * parameter is not NULL, it will be parsed for potential language, script and region tags + * to be used when constructing the new tag. If the alternateTags parameter is NULL, or + * it contains no language tag, the default tag for the unknown language is used. + * + * If the length of the new string exceeds the capacity of the output buffer, + * the function copies as many bytes to the output buffer as it can, and returns + * the error U_BUFFER_OVERFLOW_ERROR. + * + * If an illegal argument is provided, the function returns the error + * U_ILLEGAL_ARGUMENT_ERROR. + * + * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if + * the tag string fits in the output buffer, but the null terminator doesn't. + * + * @param lang The language tag to use. + * @param langLength The length of the language tag. + * @param script The script tag to use. + * @param scriptLength The length of the script tag. + * @param region The region tag to use. + * @param regionLength The length of the region tag. + * @param trailing Any trailing data to append to the new tag. + * @param trailingLength The length of the trailing data. + * @param alternateTags A string containing any alternate tags. + * @param tag The output buffer. + * @param tagCapacity The capacity of the output buffer. + * @param err A pointer to a UErrorCode for error reporting. + * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. + **/ +static int32_t U_CALLCONV +createTagStringWithAlternates( + const char* lang, + int32_t langLength, + const char* script, + int32_t scriptLength, + const char* region, + int32_t regionLength, + const char* trailing, + int32_t trailingLength, + const char* alternateTags, + char* tag, + int32_t tagCapacity, + UErrorCode* err) { + + if (U_FAILURE(*err)) { + goto error; + } + else if (tag == NULL || + tagCapacity <= 0 || + langLength >= ULOC_LANG_CAPACITY || + scriptLength >= ULOC_SCRIPT_CAPACITY || + regionLength >= ULOC_COUNTRY_CAPACITY) { + goto error; + } + else { + /** + * ULOC_FULLNAME_CAPACITY will provide enough capacity + * that we can build a string that contains the language, + * script and region code without worrying about overrunning + * the user-supplied buffer. + **/ + char tagBuffer[ULOC_FULLNAME_CAPACITY]; + int32_t tagLength = 0; + int32_t capacityRemaining = tagCapacity; + UBool regionAppended = FALSE; + + if (langLength > 0) { + appendTag( + lang, + langLength, + tagBuffer, + &tagLength); + } + else if (alternateTags == NULL) { + /* + * Append the value for an unknown language, if + * we found no language. + */ + appendTag( + unknownLanguage, + (int32_t)uprv_strlen(unknownLanguage), + tagBuffer, + &tagLength); + } + else { + /* + * Parse the alternateTags string for the language. + */ + char alternateLang[ULOC_LANG_CAPACITY]; + int32_t alternateLangLength = sizeof(alternateLang); + + alternateLangLength = + uloc_getLanguage( + alternateTags, + alternateLang, + alternateLangLength, + err); + if(U_FAILURE(*err) || + alternateLangLength >= ULOC_LANG_CAPACITY) { + goto error; + } + else if (alternateLangLength == 0) { + /* + * Append the value for an unknown language, if + * we found no language. + */ + appendTag( + unknownLanguage, + (int32_t)uprv_strlen(unknownLanguage), + tagBuffer, + &tagLength); + } + else { + appendTag( + alternateLang, + alternateLangLength, + tagBuffer, + &tagLength); + } + } + + if (scriptLength > 0) { + appendTag( + script, + scriptLength, + tagBuffer, + &tagLength); + } + else if (alternateTags != NULL) { + /* + * Parse the alternateTags string for the script. + */ + char alternateScript[ULOC_SCRIPT_CAPACITY]; + + const int32_t alternateScriptLength = + uloc_getScript( + alternateTags, + alternateScript, + sizeof(alternateScript), + err); + + if (U_FAILURE(*err) || + alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { + goto error; + } + else if (alternateScriptLength > 0) { + appendTag( + alternateScript, + alternateScriptLength, + tagBuffer, + &tagLength); + } + } + + if (regionLength > 0) { + appendTag( + region, + regionLength, + tagBuffer, + &tagLength); + + regionAppended = TRUE; + } + else if (alternateTags != NULL) { + /* + * Parse the alternateTags string for the region. + */ + char alternateRegion[ULOC_COUNTRY_CAPACITY]; + + const int32_t alternateRegionLength = + uloc_getCountry( + alternateTags, + alternateRegion, + sizeof(alternateRegion), + err); + if (U_FAILURE(*err) || + alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { + goto error; + } + else if (alternateRegionLength > 0) { + appendTag( + alternateRegion, + alternateRegionLength, + tagBuffer, + &tagLength); + + regionAppended = TRUE; + } + } + + { + const int32_t toCopy = + tagLength >= tagCapacity ? tagCapacity : tagLength; + + /** + * Copy the partial tag from our internal buffer to the supplied + * target. + **/ + uprv_memcpy( + tag, + tagBuffer, + toCopy); + + capacityRemaining -= toCopy; + } + + if (trailingLength > 0) { + if (*trailing != '@' && capacityRemaining > 0) { + tag[tagLength++] = '_'; + --capacityRemaining; + if (capacityRemaining > 0 && !regionAppended) { + /* extra separator is required */ + tag[tagLength++] = '_'; + --capacityRemaining; + } + } + + if (capacityRemaining > 0) { + /* + * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we + * don't know if the user-supplied buffers overlap. + */ + const int32_t toCopy = + trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; + + uprv_memmove( + &tag[tagLength], + trailing, + toCopy); + } + } + + tagLength += trailingLength; + + return u_terminateChars( + tag, + tagCapacity, + tagLength, + err); + } + +error: + + /** + * An overflow indicates the locale ID passed in + * is ill-formed. If we got here, and there was + * no previous error, it's an implicit overflow. + **/ + if (*err == U_BUFFER_OVERFLOW_ERROR || + U_SUCCESS(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + return -1; +} + +/** + * Create a tag string from the supplied parameters. The lang, script and region + * parameters may be NULL pointers. If they are, their corresponding length parameters + * must be less than or equal to 0. If the lang parameter is an empty string, the + * default value for an unknown language is written to the output buffer. + * + * If the length of the new string exceeds the capacity of the output buffer, + * the function copies as many bytes to the output buffer as it can, and returns + * the error U_BUFFER_OVERFLOW_ERROR. + * + * If an illegal argument is provided, the function returns the error + * U_ILLEGAL_ARGUMENT_ERROR. + * + * @param lang The language tag to use. + * @param langLength The length of the language tag. + * @param script The script tag to use. + * @param scriptLength The length of the script tag. + * @param region The region tag to use. + * @param regionLength The length of the region tag. + * @param trailing Any trailing data to append to the new tag. + * @param trailingLength The length of the trailing data. + * @param tag The output buffer. + * @param tagCapacity The capacity of the output buffer. + * @param err A pointer to a UErrorCode for error reporting. + * @return The length of the tag string, which may be greater than tagCapacity. + **/ +static int32_t U_CALLCONV +createTagString( + const char* lang, + int32_t langLength, + const char* script, + int32_t scriptLength, + const char* region, + int32_t regionLength, + const char* trailing, + int32_t trailingLength, + char* tag, + int32_t tagCapacity, + UErrorCode* err) +{ + return createTagStringWithAlternates( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + trailing, + trailingLength, + NULL, + tag, + tagCapacity, + err); +} + +/** + * Parse the language, script, and region subtags from a tag string, and copy the + * results into the corresponding output parameters. The buffers are null-terminated, + * unless overflow occurs. + * + * The langLength, scriptLength, and regionLength parameters are input/output + * parameters, and must contain the capacity of their corresponding buffers on + * input. On output, they will contain the actual length of the buffers, not + * including the null terminator. + * + * If the length of any of the output subtags exceeds the capacity of the corresponding + * buffer, the function copies as many bytes to the output buffer as it can, and returns + * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow + * occurs. + * + * If an illegal argument is provided, the function returns the error + * U_ILLEGAL_ARGUMENT_ERROR. + * + * @param localeID The locale ID to parse. + * @param lang The language tag buffer. + * @param langLength The length of the language tag. + * @param script The script tag buffer. + * @param scriptLength The length of the script tag. + * @param region The region tag buffer. + * @param regionLength The length of the region tag. + * @param err A pointer to a UErrorCode for error reporting. + * @return The number of chars of the localeID parameter consumed. + **/ +static int32_t U_CALLCONV +parseTagString( + const char* localeID, + char* lang, + int32_t* langLength, + char* script, + int32_t* scriptLength, + char* region, + int32_t* regionLength, + UErrorCode* err) +{ + const char* position = localeID; + int32_t subtagLength = 0; + + if(U_FAILURE(*err) || + localeID == NULL || + lang == NULL || + langLength == NULL || + script == NULL || + scriptLength == NULL || + region == NULL || + regionLength == NULL) { + goto error; + } + + subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); + u_terminateChars(lang, *langLength, subtagLength, err); + + /* + * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING + * to be an error, because it indicates the user-supplied tag is + * not well-formed. + */ + if(U_FAILURE(*err)) { + goto error; + } + + *langLength = subtagLength; + + /* + * If no language was present, use the value of unknownLanguage + * instead. Otherwise, move past any separator. + */ + if (*langLength == 0) { + uprv_strcpy( + lang, + unknownLanguage); + *langLength = (int32_t)uprv_strlen(lang); + } + else if (_isIDSeparator(*position)) { + ++position; + } + + subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); + u_terminateChars(script, *scriptLength, subtagLength, err); + + if(U_FAILURE(*err)) { + goto error; + } + + *scriptLength = subtagLength; + + if (*scriptLength > 0) { + if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { + /** + * If the script part is the "unknown" script, then don't return it. + **/ + *scriptLength = 0; + } + + /* + * Move past any separator. + */ + if (_isIDSeparator(*position)) { + ++position; + } + } + + subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); + u_terminateChars(region, *regionLength, subtagLength, err); + + if(U_FAILURE(*err)) { + goto error; + } + + *regionLength = subtagLength; + + if (*regionLength > 0) { + if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { + /** + * If the region part is the "unknown" region, then don't return it. + **/ + *regionLength = 0; + } + } else if (*position != 0 && *position != '@') { + /* back up over consumed trailing separator */ + --position; + } + +exit: + + return (int32_t)(position - localeID); + +error: + + /** + * If we get here, we have no explicit error, it's the result of an + * illegal argument. + **/ + if (!U_FAILURE(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + goto exit; +} + +static int32_t U_CALLCONV +createLikelySubtagsString( + const char* lang, + int32_t langLength, + const char* script, + int32_t scriptLength, + const char* region, + int32_t regionLength, + const char* variants, + int32_t variantsLength, + char* tag, + int32_t tagCapacity, + UErrorCode* err) +{ + /** + * ULOC_FULLNAME_CAPACITY will provide enough capacity + * that we can build a string that contains the language, + * script and region code without worrying about overrunning + * the user-supplied buffer. + **/ + char tagBuffer[ULOC_FULLNAME_CAPACITY]; + char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; + + if(U_FAILURE(*err)) { + goto error; + } + + /** + * Try the language with the script and region first. + **/ + if (scriptLength > 0 && regionLength > 0) { + + const char* likelySubtags = NULL; + + createTagString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + NULL, + 0, + tagBuffer, + sizeof(tagBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + likelySubtags = + findLikelySubtags( + tagBuffer, + likelySubtagsBuffer, + sizeof(likelySubtagsBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + if (likelySubtags != NULL) { + /* Always use the language tag from the + maximal string, since it may be more + specific than the one provided. */ + return createTagStringWithAlternates( + NULL, + 0, + NULL, + 0, + NULL, + 0, + variants, + variantsLength, + likelySubtags, + tag, + tagCapacity, + err); + } + } + + /** + * Try the language with just the script. + **/ + if (scriptLength > 0) { + + const char* likelySubtags = NULL; + + createTagString( + lang, + langLength, + script, + scriptLength, + NULL, + 0, + NULL, + 0, + tagBuffer, + sizeof(tagBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + likelySubtags = + findLikelySubtags( + tagBuffer, + likelySubtagsBuffer, + sizeof(likelySubtagsBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + if (likelySubtags != NULL) { + /* Always use the language tag from the + maximal string, since it may be more + specific than the one provided. */ + return createTagStringWithAlternates( + NULL, + 0, + NULL, + 0, + region, + regionLength, + variants, + variantsLength, + likelySubtags, + tag, + tagCapacity, + err); + } + } + + /** + * Try the language with just the region. + **/ + if (regionLength > 0) { + + const char* likelySubtags = NULL; + + createTagString( + lang, + langLength, + NULL, + 0, + region, + regionLength, + NULL, + 0, + tagBuffer, + sizeof(tagBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + likelySubtags = + findLikelySubtags( + tagBuffer, + likelySubtagsBuffer, + sizeof(likelySubtagsBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + if (likelySubtags != NULL) { + /* Always use the language tag from the + maximal string, since it may be more + specific than the one provided. */ + return createTagStringWithAlternates( + NULL, + 0, + script, + scriptLength, + NULL, + 0, + variants, + variantsLength, + likelySubtags, + tag, + tagCapacity, + err); + } + } + + /** + * Finally, try just the language. + **/ + { + const char* likelySubtags = NULL; + + createTagString( + lang, + langLength, + NULL, + 0, + NULL, + 0, + NULL, + 0, + tagBuffer, + sizeof(tagBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + likelySubtags = + findLikelySubtags( + tagBuffer, + likelySubtagsBuffer, + sizeof(likelySubtagsBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + if (likelySubtags != NULL) { + /* Always use the language tag from the + maximal string, since it may be more + specific than the one provided. */ + return createTagStringWithAlternates( + NULL, + 0, + script, + scriptLength, + region, + regionLength, + variants, + variantsLength, + likelySubtags, + tag, + tagCapacity, + err); + } + } + + return u_terminateChars( + tag, + tagCapacity, + 0, + err); + +error: + + if (!U_FAILURE(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + return -1; +} + +#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ + { int32_t count = 0; \ + int32_t i; \ + for (i = 0; i < trailingLength; i++) { \ + if (trailing[i] == '-' || trailing[i] == '_') { \ + count = 0; \ + if (count > 8) { \ + goto error; \ + } \ + } else if (trailing[i] == '@') { \ + break; \ + } else if (count > 8) { \ + goto error; \ + } else { \ + count++; \ + } \ + } \ + } + +static int32_t +_uloc_addLikelySubtags(const char* localeID, + char* maximizedLocaleID, + int32_t maximizedLocaleIDCapacity, + UErrorCode* err) +{ + char lang[ULOC_LANG_CAPACITY]; + int32_t langLength = sizeof(lang); + char script[ULOC_SCRIPT_CAPACITY]; + int32_t scriptLength = sizeof(script); + char region[ULOC_COUNTRY_CAPACITY]; + int32_t regionLength = sizeof(region); + const char* trailing = ""; + int32_t trailingLength = 0; + int32_t trailingIndex = 0; + int32_t resultLength = 0; + + if(U_FAILURE(*err)) { + goto error; + } + else if (localeID == NULL || + maximizedLocaleID == NULL || + maximizedLocaleIDCapacity <= 0) { + goto error; + } + + trailingIndex = parseTagString( + localeID, + lang, + &langLength, + script, + &scriptLength, + region, + ®ionLength, + err); + if(U_FAILURE(*err)) { + /* Overflow indicates an illegal argument error */ + if (*err == U_BUFFER_OVERFLOW_ERROR) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + goto error; + } + + /* Find the length of the trailing portion. */ + while (_isIDSeparator(localeID[trailingIndex])) { + trailingIndex++; + } + trailing = &localeID[trailingIndex]; + trailingLength = (int32_t)uprv_strlen(trailing); + + CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); + + resultLength = + createLikelySubtagsString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + trailing, + trailingLength, + maximizedLocaleID, + maximizedLocaleIDCapacity, + err); + + if (resultLength == 0) { + const int32_t localIDLength = (int32_t)uprv_strlen(localeID); + + /* + * If we get here, we need to return localeID. + */ + uprv_memcpy( + maximizedLocaleID, + localeID, + localIDLength <= maximizedLocaleIDCapacity ? + localIDLength : maximizedLocaleIDCapacity); + + resultLength = + u_terminateChars( + maximizedLocaleID, + maximizedLocaleIDCapacity, + localIDLength, + err); + } + + return resultLength; + +error: + + if (!U_FAILURE(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + return -1; +} + +static int32_t +_uloc_minimizeSubtags(const char* localeID, + char* minimizedLocaleID, + int32_t minimizedLocaleIDCapacity, + UErrorCode* err) +{ + /** + * ULOC_FULLNAME_CAPACITY will provide enough capacity + * that we can build a string that contains the language, + * script and region code without worrying about overrunning + * the user-supplied buffer. + **/ + char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; + int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); + + char lang[ULOC_LANG_CAPACITY]; + int32_t langLength = sizeof(lang); + char script[ULOC_SCRIPT_CAPACITY]; + int32_t scriptLength = sizeof(script); + char region[ULOC_COUNTRY_CAPACITY]; + int32_t regionLength = sizeof(region); + const char* trailing = ""; + int32_t trailingLength = 0; + int32_t trailingIndex = 0; + + if(U_FAILURE(*err)) { + goto error; + } + else if (localeID == NULL || + minimizedLocaleID == NULL || + minimizedLocaleIDCapacity <= 0) { + goto error; + } + + trailingIndex = + parseTagString( + localeID, + lang, + &langLength, + script, + &scriptLength, + region, + ®ionLength, + err); + if(U_FAILURE(*err)) { + + /* Overflow indicates an illegal argument error */ + if (*err == U_BUFFER_OVERFLOW_ERROR) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + goto error; + } + + /* Find the spot where the variants or the keywords begin, if any. */ + while (_isIDSeparator(localeID[trailingIndex])) { + trailingIndex++; + } + trailing = &localeID[trailingIndex]; + trailingLength = (int32_t)uprv_strlen(trailing); + + CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); + + createTagString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + NULL, + 0, + maximizedTagBuffer, + maximizedTagBufferLength, + err); + if(U_FAILURE(*err)) { + goto error; + } + + /** + * First, we need to first get the maximization + * from AddLikelySubtags. + **/ + maximizedTagBufferLength = + uloc_addLikelySubtags( + maximizedTagBuffer, + maximizedTagBuffer, + maximizedTagBufferLength, + err); + + if(U_FAILURE(*err)) { + goto error; + } + + /** + * Start first with just the language. + **/ + { + char tagBuffer[ULOC_FULLNAME_CAPACITY]; + + const int32_t tagBufferLength = + createLikelySubtagsString( + lang, + langLength, + NULL, + 0, + NULL, + 0, + NULL, + 0, + tagBuffer, + sizeof(tagBuffer), + err); + + if(U_FAILURE(*err)) { + goto error; + } + else if (uprv_strnicmp( + maximizedTagBuffer, + tagBuffer, + tagBufferLength) == 0) { + + return createTagString( + lang, + langLength, + NULL, + 0, + NULL, + 0, + trailing, + trailingLength, + minimizedLocaleID, + minimizedLocaleIDCapacity, + err); + } + } + + /** + * Next, try the language and region. + **/ + if (regionLength > 0) { + + char tagBuffer[ULOC_FULLNAME_CAPACITY]; + + const int32_t tagBufferLength = + createLikelySubtagsString( + lang, + langLength, + NULL, + 0, + region, + regionLength, + NULL, + 0, + tagBuffer, + sizeof(tagBuffer), + err); + + if(U_FAILURE(*err)) { + goto error; + } + else if (uprv_strnicmp( + maximizedTagBuffer, + tagBuffer, + tagBufferLength) == 0) { + + return createTagString( + lang, + langLength, + NULL, + 0, + region, + regionLength, + trailing, + trailingLength, + minimizedLocaleID, + minimizedLocaleIDCapacity, + err); + } + } + + /** + * Finally, try the language and script. This is our last chance, + * since trying with all three subtags would only yield the + * maximal version that we already have. + **/ + if (scriptLength > 0 && regionLength > 0) { + char tagBuffer[ULOC_FULLNAME_CAPACITY]; + + const int32_t tagBufferLength = + createLikelySubtagsString( + lang, + langLength, + script, + scriptLength, + NULL, + 0, + NULL, + 0, + tagBuffer, + sizeof(tagBuffer), + err); + + if(U_FAILURE(*err)) { + goto error; + } + else if (uprv_strnicmp( + maximizedTagBuffer, + tagBuffer, + tagBufferLength) == 0) { + + return createTagString( + lang, + langLength, + script, + scriptLength, + NULL, + 0, + trailing, + trailingLength, + minimizedLocaleID, + minimizedLocaleIDCapacity, + err); + } + } + + { + /** + * If we got here, return the locale ID parameter. + **/ + const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); + + uprv_memcpy( + minimizedLocaleID, + localeID, + localeIDLength <= minimizedLocaleIDCapacity ? + localeIDLength : minimizedLocaleIDCapacity); + + return u_terminateChars( + minimizedLocaleID, + minimizedLocaleIDCapacity, + localeIDLength, + err); + } + +error: + + if (!U_FAILURE(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + return -1; + + +} + +static UBool +do_canonicalize(const char* localeID, + char* buffer, + int32_t bufferCapacity, + UErrorCode* err) +{ + uloc_canonicalize( + localeID, + buffer, + bufferCapacity, + err); + + if (*err == U_STRING_NOT_TERMINATED_WARNING || + *err == U_BUFFER_OVERFLOW_ERROR) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + + return FALSE; + } + else if (U_FAILURE(*err)) { + + return FALSE; + } + else { + return TRUE; + } +} + +U_CAPI int32_t U_EXPORT2 +uloc_addLikelySubtags(const char* localeID, + char* maximizedLocaleID, + int32_t maximizedLocaleIDCapacity, + UErrorCode* err) +{ + char localeBuffer[ULOC_FULLNAME_CAPACITY]; + + if (!do_canonicalize( + localeID, + localeBuffer, + sizeof(localeBuffer), + err)) { + return -1; + } + else { + return _uloc_addLikelySubtags( + localeBuffer, + maximizedLocaleID, + maximizedLocaleIDCapacity, + err); + } +} + +U_CAPI int32_t U_EXPORT2 +uloc_minimizeSubtags(const char* localeID, + char* minimizedLocaleID, + int32_t minimizedLocaleIDCapacity, + UErrorCode* err) +{ + char localeBuffer[ULOC_FULLNAME_CAPACITY]; + + if (!do_canonicalize( + localeID, + localeBuffer, + sizeof(localeBuffer), + err)) { + return -1; + } + else { + return _uloc_minimizeSubtags( + localeBuffer, + minimizedLocaleID, + minimizedLocaleIDCapacity, + err); + } +} + +// Pairs of (language subtag, + or -) for finding out fast if common languages +// are LTR (minus) or RTL (plus). +static const char* LANG_DIR_STRING = + "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; + +// Implemented here because this calls uloc_addLikelySubtags(). +U_CAPI UBool U_EXPORT2 +uloc_isRightToLeft(const char *locale) { + UErrorCode errorCode = U_ZERO_ERROR; + char script[8]; + int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode); + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || + scriptLength == 0) { + // Fastpath: We know the likely scripts and their writing direction + // for some common languages. + errorCode = U_ZERO_ERROR; + char lang[8]; + int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || + langLength == 0) { + return FALSE; + } + const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); + if (langPtr != NULL) { + switch (langPtr[langLength]) { + case '-': return FALSE; + case '+': return TRUE; + default: break; // partial match of a longer code + } + } + // Otherwise, find the likely script. + errorCode = U_ZERO_ERROR; + char likely[ULOC_FULLNAME_CAPACITY]; + (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode); + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { + return FALSE; + } + scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode); + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || + scriptLength == 0) { + return FALSE; + } + } + UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); + return uscript_isRightToLeft(scriptCode); +} + +U_NAMESPACE_BEGIN + +UBool +Locale::isRightToLeft() const { + return uloc_isRightToLeft(getBaseName()); +} + +U_NAMESPACE_END + +// The following must at least allow for rg key value (6) plus terminator (1). +#define ULOC_RG_BUFLEN 8 + +U_CAPI int32_t U_EXPORT2 +ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, + char *region, int32_t regionCapacity, UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + char rgBuf[ULOC_RG_BUFLEN]; + UErrorCode rgStatus = U_ZERO_ERROR; + + // First check for rg keyword value + int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); + if (U_FAILURE(rgStatus) || rgLen != 6) { + rgLen = 0; + } else { + // rgBuf guaranteed to be zero terminated here, with text len 6 + char *rgPtr = rgBuf; + for (; *rgPtr!= 0; rgPtr++) { + *rgPtr = uprv_toupper(*rgPtr); + } + rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0; + } + + if (rgLen == 0) { + // No valid rg keyword value, try for unicode_region_subtag + rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status); + if (U_FAILURE(*status)) { + rgLen = 0; + } else if (rgLen == 0 && inferRegion) { + // no unicode_region_subtag but inferRegion TRUE, try likely subtags + char locBuf[ULOC_FULLNAME_CAPACITY]; + rgStatus = U_ZERO_ERROR; + (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus); + if (U_SUCCESS(rgStatus)) { + rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status); + if (U_FAILURE(*status)) { + rgLen = 0; + } + } + } + } + + rgBuf[rgLen] = 0; + uprv_strncpy(region, rgBuf, regionCapacity); + return u_terminateChars(region, regionCapacity, rgLen, status); +} + |