From 424eeaaa5994fa1582e24ec945189f6eeb738111 Mon Sep 17 00:00:00 2001 From: wolfbeast Date: Thu, 8 Feb 2018 19:47:21 +0100 Subject: Check ScriptExtensions property of combining marks when available. --- netwerk/dns/nsIDNService.cpp | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/netwerk/dns/nsIDNService.cpp b/netwerk/dns/nsIDNService.cpp index 73a189b67..49beecbb3 100644 --- a/netwerk/dns/nsIDNService.cpp +++ b/netwerk/dns/nsIDNService.cpp @@ -26,6 +26,7 @@ const bool kIDNA2008_TransitionalProcessing = false; #include "ICUUtils.h" +#include "unicode/uscript.h" #endif using namespace mozilla::unicode; @@ -835,8 +836,8 @@ bool nsIDNService::isLabelSafe(const nsAString &label) } // Check for mixed numbering systems - if (GetGeneralCategory(ch) == - HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) { + auto genCat = GetGeneralCategory(ch); + if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) { uint32_t zeroCharacter = ch - GetNumericValue(ch); if (savedNumberingSystem == 0) { // If we encounter a decimal number, save the zero character from that @@ -847,11 +848,41 @@ bool nsIDNService::isLabelSafe(const nsAString &label) } } - if (GetGeneralCategory(ch) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) { + if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) { // Check for consecutive non-spacing marks if (previousChar != 0 && previousChar == ch) { return false; } + // Check for marks whose expected script doesn't match the base script. + if (lastScript != Script::INVALID) { + const size_t kMaxScripts = 32; // more than ample for current values + // of ScriptExtensions property + UScriptCode scripts[kMaxScripts]; + UErrorCode errorCode = U_ZERO_ERROR; + int nScripts = uscript_getScriptExtensions(ch, scripts, kMaxScripts, + &errorCode); + MOZ_ASSERT(U_SUCCESS(errorCode), "uscript_getScriptExtensions failed"); + if (U_FAILURE(errorCode)) { + return false; + } + // nScripts will always be >= 1, because even for undefined characters + // uscript_getScriptExtensions will return Script::INVALID. + // If the mark just has script=COMMON or INHERITED, we can't check any + // more carefully, but if it has specific scriptExtension codes, then + // assume those are the only valid scripts to use it with. + if (nScripts > 1 || + (Script(scripts[0]) != Script::COMMON && + Script(scripts[0]) != Script::INHERITED)) { + while (--nScripts >= 0) { + if (Script(scripts[nScripts]) == lastScript) { + break; + } + } + if (nScripts == -1) { + return false; + } + } + } // Check for diacritics on dotless-i or dotless-j, which would be // indistinguishable from normal accented letter. if ((baseChar == 0x0237 || baseChar == 0x0131) && -- cgit v1.2.3