diff options
author | wolfbeast <mcwerewolf@gmail.com> | 2018-02-09 08:53:46 +0100 |
---|---|---|
committer | wolfbeast <mcwerewolf@gmail.com> | 2018-02-09 08:53:46 +0100 |
commit | 8cecf8d5208f3945b35f879bba3015bb1a11bec6 (patch) | |
tree | 0926f5c21f9d10cf929e4c35e7d7e8e8c084dbf5 /netwerk | |
parent | 8cd777888a40e987ad536ab68421068f5c06d83b (diff) | |
parent | 92104eb6828ba026550e1f4a3c6890c5b8254d36 (diff) | |
download | UXP-8cecf8d5208f3945b35f879bba3015bb1a11bec6.tar UXP-8cecf8d5208f3945b35f879bba3015bb1a11bec6.tar.gz UXP-8cecf8d5208f3945b35f879bba3015bb1a11bec6.tar.lz UXP-8cecf8d5208f3945b35f879bba3015bb1a11bec6.tar.xz UXP-8cecf8d5208f3945b35f879bba3015bb1a11bec6.zip |
Merge branch 'ported-upstream'
Diffstat (limited to 'netwerk')
-rw-r--r-- | netwerk/dns/nsIDNService.cpp | 54 |
1 files changed, 47 insertions, 7 deletions
diff --git a/netwerk/dns/nsIDNService.cpp b/netwerk/dns/nsIDNService.cpp index d4f31027e..49beecbb3 100644 --- a/netwerk/dns/nsIDNService.cpp +++ b/netwerk/dns/nsIDNService.cpp @@ -26,6 +26,7 @@ const bool kIDNA2008_TransitionalProcessing = false; #include "ICUUtils.h" +#include "unicode/uscript.h" #endif using namespace mozilla::unicode; @@ -797,6 +798,7 @@ bool nsIDNService::isLabelSafe(const nsAString &label) Script lastScript = Script::INVALID; uint32_t previousChar = 0; + uint32_t baseChar = 0; // last non-diacritic seen (base char for marks) uint32_t savedNumberingSystem = 0; // Simplified/Traditional Chinese check temporarily disabled -- bug 857481 #if 0 @@ -834,8 +836,8 @@ bool nsIDNService::isLabelSafe(const nsAString &label) } // Check for mixed numbering systems - if (GetGeneralCategory(ch) == - HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) { + auto genCat = GetGeneralCategory(ch); + if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) { uint32_t zeroCharacter = ch - GetNumericValue(ch); if (savedNumberingSystem == 0) { // If we encounter a decimal number, save the zero character from that @@ -846,11 +848,49 @@ bool nsIDNService::isLabelSafe(const nsAString &label) } } - // Check for consecutive non-spacing marks - if (previousChar != 0 && - previousChar == ch && - GetGeneralCategory(ch) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) { - return false; + if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) { + // Check for consecutive non-spacing marks + if (previousChar != 0 && previousChar == ch) { + return false; + } + // Check for marks whose expected script doesn't match the base script. + if (lastScript != Script::INVALID) { + const size_t kMaxScripts = 32; // more than ample for current values + // of ScriptExtensions property + UScriptCode scripts[kMaxScripts]; + UErrorCode errorCode = U_ZERO_ERROR; + int nScripts = uscript_getScriptExtensions(ch, scripts, kMaxScripts, + &errorCode); + MOZ_ASSERT(U_SUCCESS(errorCode), "uscript_getScriptExtensions failed"); + if (U_FAILURE(errorCode)) { + return false; + } + // nScripts will always be >= 1, because even for undefined characters + // uscript_getScriptExtensions will return Script::INVALID. + // If the mark just has script=COMMON or INHERITED, we can't check any + // more carefully, but if it has specific scriptExtension codes, then + // assume those are the only valid scripts to use it with. + if (nScripts > 1 || + (Script(scripts[0]) != Script::COMMON && + Script(scripts[0]) != Script::INHERITED)) { + while (--nScripts >= 0) { + if (Script(scripts[nScripts]) == lastScript) { + break; + } + } + if (nScripts == -1) { + return false; + } + } + } + // Check for diacritics on dotless-i or dotless-j, which would be + // indistinguishable from normal accented letter. + if ((baseChar == 0x0237 || baseChar == 0x0131) && + ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) { + return false; + } + } else { + baseChar = ch; } // Simplified/Traditional Chinese check temporarily disabled -- bug 857481 |