diff options
Diffstat (limited to 'netwerk/mime/nsMIMEHeaderParamImpl.cpp')
-rw-r--r-- | netwerk/mime/nsMIMEHeaderParamImpl.cpp | 1346 |
1 files changed, 1346 insertions, 0 deletions
diff --git a/netwerk/mime/nsMIMEHeaderParamImpl.cpp b/netwerk/mime/nsMIMEHeaderParamImpl.cpp new file mode 100644 index 000000000..8d668f64f --- /dev/null +++ b/netwerk/mime/nsMIMEHeaderParamImpl.cpp @@ -0,0 +1,1346 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set sw=4 ts=8 et tw=80 : */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <string.h> +#include "prmem.h" +#include "prprf.h" +#include "plstr.h" +#include "plbase64.h" +#include "nsCRT.h" +#include "nsMemory.h" +#include "nsTArray.h" +#include "nsCOMPtr.h" +#include "nsEscape.h" +#include "nsIUTF8ConverterService.h" +#include "nsUConvCID.h" +#include "nsIServiceManager.h" +#include "nsMIMEHeaderParamImpl.h" +#include "nsReadableUtils.h" +#include "nsNativeCharsetUtils.h" +#include "nsError.h" +#include "nsIUnicodeDecoder.h" +#include "mozilla/dom/EncodingUtils.h" + +using mozilla::dom::EncodingUtils; + +// static functions declared below are moved from mailnews/mime/src/comi18n.cpp + +static char *DecodeQ(const char *, uint32_t); +static bool Is7bitNonAsciiString(const char *, uint32_t); +static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &); +static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&); +static nsresult internalDecodeParameter(const nsACString&, const char*, + const char*, bool, bool, nsACString&); + +// XXX The chance of UTF-7 being used in the message header is really +// low, but in theory it's possible. +#define IS_7BIT_NON_ASCII_CHARSET(cset) \ + (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \ + !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \ + !nsCRT::strncasecmp((cset), "UTF-7", 5)) + +NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam) + +NS_IMETHODIMP +nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, + const char *aParamName, + const nsACString& aFallbackCharset, + bool aTryLocaleCharset, + char **aLang, nsAString& aResult) +{ + return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING, + aFallbackCharset, aTryLocaleCharset, aLang, aResult); +} + +NS_IMETHODIMP +nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal, + const char *aParamName, + const nsACString& aFallbackCharset, + bool aTryLocaleCharset, + char **aLang, nsAString& aResult) +{ + return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING, + aFallbackCharset, aTryLocaleCharset, aLang, aResult); +} + +// XXX : aTryLocaleCharset is not yet effective. +nsresult +nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, + const char *aParamName, + ParamDecoding aDecoding, + const nsACString& aFallbackCharset, + bool aTryLocaleCharset, + char **aLang, nsAString& aResult) +{ + aResult.Truncate(); + nsresult rv; + + // get parameter (decode RFC 2231/5987 when applicable, as specified by + // aDecoding (5987 being a subset of 2231) and return charset.) + nsXPIDLCString med; + nsXPIDLCString charset; + rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, + aDecoding, getter_Copies(charset), aLang, + getter_Copies(med)); + if (NS_FAILED(rv)) + return rv; + + // convert to UTF-8 after charset conversion and RFC 2047 decoding + // if necessary. + + nsAutoCString str1; + rv = internalDecodeParameter(med, charset.get(), nullptr, false, + // was aDecoding == MIME_FIELD_ENCODING + // see bug 875615 + true, + str1); + NS_ENSURE_SUCCESS(rv, rv); + + if (!aFallbackCharset.IsEmpty()) + { + nsAutoCString charset; + EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset); + nsAutoCString str2; + nsCOMPtr<nsIUTF8ConverterService> + cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); + if (cvtUTF8 && + NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, + PromiseFlatCString(aFallbackCharset).get(), false, + !charset.EqualsLiteral("UTF-8"), + 1, str2))) { + CopyUTF8toUTF16(str2, aResult); + return NS_OK; + } + } + + if (IsUTF8(str1)) { + CopyUTF8toUTF16(str1, aResult); + return NS_OK; + } + + if (aTryLocaleCharset && !NS_IsNativeUTF8()) + return NS_CopyNativeToUnicode(str1, aResult); + + CopyASCIItoUTF16(str1, aResult); + return NS_OK; +} + +// remove backslash-encoded sequences from quoted-strings +// modifies string in place, potentially shortening it +void RemoveQuotedStringEscapes(char *src) +{ + char *dst = src; + + for (char *c = src; *c; ++c) + { + if (c[0] == '\\' && c[1]) + { + // skip backslash if not at end + ++c; + } + *dst++ = *c; + } + *dst = 0; +} + +// true is character is a hex digit +bool IsHexDigit(char aChar) +{ + char c = aChar; + + return (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F') || + (c >= '0' && c <= '9'); +} + +// validate that a C String containing %-escapes is syntactically valid +bool IsValidPercentEscaped(const char *aValue, int32_t len) +{ + for (int32_t i = 0; i < len; i++) { + if (aValue[i] == '%') { + if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) { + return false; + } + } + } + return true; +} + +// Support for continuations (RFC 2231, Section 3) + +// only a sane number supported +#define MAX_CONTINUATIONS 999 + +// part of a continuation + +class Continuation { + public: + Continuation(const char *aValue, uint32_t aLength, + bool aNeedsPercentDecoding, bool aWasQuotedString) { + value = aValue; + length = aLength; + needsPercentDecoding = aNeedsPercentDecoding; + wasQuotedString = aWasQuotedString; + } + Continuation() { + // empty constructor needed for nsTArray + value = 0L; + length = 0; + needsPercentDecoding = false; + wasQuotedString = false; + } + ~Continuation() = default; + + const char *value; + uint32_t length; + bool needsPercentDecoding; + bool wasQuotedString; +}; + +// combine segments into a single string, returning the allocated string +// (or nullptr) while emptying the list +char *combineContinuations(nsTArray<Continuation>& aArray) +{ + // Sanity check + if (aArray.Length() == 0) + return nullptr; + + // Get an upper bound for the length + uint32_t length = 0; + for (uint32_t i = 0; i < aArray.Length(); i++) { + length += aArray[i].length; + } + + // Allocate + char *result = (char *) moz_xmalloc(length + 1); + + // Concatenate + if (result) { + *result = '\0'; + + for (uint32_t i = 0; i < aArray.Length(); i++) { + Continuation cont = aArray[i]; + if (! cont.value) break; + + char *c = result + strlen(result); + strncat(result, cont.value, cont.length); + if (cont.needsPercentDecoding) { + nsUnescape(c); + } + if (cont.wasQuotedString) { + RemoveQuotedStringEscapes(c); + } + } + + // return null if empty value + if (*result == '\0') { + free(result); + result = nullptr; + } + } else { + // Handle OOM + NS_WARNING("Out of memory\n"); + } + + return result; +} + +// add a continuation, return false on error if segment already has been seen +bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex, + const char *aValue, uint32_t aLength, + bool aNeedsPercentDecoding, bool aWasQuotedString) +{ + if (aIndex < aArray.Length() && aArray[aIndex].value) { + NS_WARNING("duplicate RC2231 continuation segment #\n"); + return false; + } + + if (aIndex > MAX_CONTINUATIONS) { + NS_WARNING("RC2231 continuation segment # exceeds limit\n"); + return false; + } + + if (aNeedsPercentDecoding && aWasQuotedString) { + NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n"); + return false; + } + + Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString); + + if (aArray.Length() <= aIndex) { + aArray.SetLength(aIndex + 1); + } + aArray[aIndex] = cont; + + return true; +} + +// parse a segment number; return -1 on error +int32_t parseSegmentNumber(const char *aValue, int32_t aLen) +{ + if (aLen < 1) { + NS_WARNING("segment number missing\n"); + return -1; + } + + if (aLen > 1 && aValue[0] == '0') { + NS_WARNING("leading '0' not allowed in segment number\n"); + return -1; + } + + int32_t segmentNumber = 0; + + for (int32_t i = 0; i < aLen; i++) { + if (! (aValue[i] >= '0' && aValue[i] <= '9')) { + NS_WARNING("invalid characters in segment number\n"); + return -1; + } + + segmentNumber *= 10; + segmentNumber += aValue[i] - '0'; + if (segmentNumber > MAX_CONTINUATIONS) { + NS_WARNING("Segment number exceeds sane size\n"); + return -1; + } + } + + return segmentNumber; +} + +// validate a given octet sequence for compliance with the specified +// encoding +bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets) +{ + nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService + (NS_UTF8CONVERTERSERVICE_CONTRACTID)); + if (!cvtUTF8) { + NS_WARNING("Can't get UTF8ConverterService\n"); + return false; + } + + nsAutoCString tmpRaw; + tmpRaw.Assign(aOctets); + nsAutoCString tmpDecoded; + + nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw, + PromiseFlatCString(aCharset).get(), + false, false, 1, tmpDecoded); + + if (rv != NS_OK) { + // we can't decode; charset may be unsupported, or the octet sequence + // is broken (illegal or incomplete octet sequence contained) + NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n"); + return false; + } + + return true; +} + +// moved almost verbatim from mimehdrs.cpp +// char * +// MimeHeaders_get_parameter (const char *header_value, const char *parm_name, +// char **charset, char **language) +// +// The format of these header lines is +// <token> [ ';' <token> '=' <token-or-quoted-string> ]* +NS_IMETHODIMP +nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, + const char *aParamName, + char **aCharset, + char **aLang, + char **aResult) +{ + return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING, + aCharset, aLang, aResult); +} + + +nsresult +nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, + const char *aParamName, + ParamDecoding aDecoding, + char **aCharset, + char **aLang, + char **aResult) +{ + + if (!aHeaderValue || !*aHeaderValue || !aResult) + return NS_ERROR_INVALID_ARG; + + *aResult = nullptr; + + if (aCharset) *aCharset = nullptr; + if (aLang) *aLang = nullptr; + + nsAutoCString charset; + + // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable + // them for HTTP header fields later on, see bug 776324 + bool acceptContinuations = true; + + const char *str = aHeaderValue; + + // skip leading white space. + for (; *str && nsCRT::IsAsciiSpace(*str); ++str) + ; + const char *start = str; + + // aParamName is empty. return the first (possibly) _unnamed_ 'parameter' + // For instance, return 'inline' in the following case: + // Content-Disposition: inline; filename=..... + if (!aParamName || !*aParamName) + { + for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str) + ; + if (str == start) + return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY; + + *aResult = (char *) nsMemory::Clone(start, (str - start) + 1); + NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY); + (*aResult)[str - start] = '\0'; // null-terminate + return NS_OK; + } + + /* Skip forward to first ';' */ + for (; *str && *str != ';' && *str != ','; ++str) + ; + if (*str) + str++; + /* Skip over following whitespace */ + for (; *str && nsCRT::IsAsciiSpace(*str); ++str) + ; + + // Some broken http servers just specify parameters + // like 'filename' without specifying disposition + // method. Rewind to the first non-white-space + // character. + + if (!*str) + str = start; + + // RFC2231 - The legitimate parm format can be: + // A. title=ThisIsTitle + // B. title*=us-ascii'en-us'This%20is%20wierd. + // C. title*0*=us-ascii'en'This%20is%20wierd.%20We + // title*1*=have%20to%20support%20this. + // title*2="Else..." + // D. title*0="Hey, what you think you are doing?" + // title*1="There is no charset and lang info." + // RFC5987: only A and B + + // collect results for the different algorithms (plain filename, + // RFC5987/2231-encoded filename, + continuations) separately and decide + // which to use at the end + char *caseAResult = nullptr; + char *caseBResult = nullptr; + char *caseCDResult = nullptr; + + // collect continuation segments + nsTArray<Continuation> segments; + + + // our copies of the charset parameter, kept separately as they might + // differ for the two formats + nsDependentCSubstring charsetB, charsetCD; + + nsDependentCSubstring lang; + + int32_t paramLen = strlen(aParamName); + + while (*str) { + // find name/value + + const char *nameStart = str; + const char *nameEnd = nullptr; + const char *valueStart = str; + const char *valueEnd = nullptr; + bool isQuotedString = false; + + NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace."); + + // Skip forward to the end of this token. + for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++) + ; + nameEnd = str; + + int32_t nameLen = nameEnd - nameStart; + + // Skip over whitespace, '=', and whitespace + while (nsCRT::IsAsciiSpace(*str)) ++str; + if (!*str) { + break; + } + if (*str++ != '=') { + // don't accept parameters without "=" + goto increment_str; + } + while (nsCRT::IsAsciiSpace(*str)) ++str; + + if (*str != '"') { + // The value is a token, not a quoted string. + valueStart = str; + for (valueEnd = str; + *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';'; + valueEnd++) + ; + str = valueEnd; + } else { + isQuotedString = true; + + ++str; + valueStart = str; + for (valueEnd = str; *valueEnd; ++valueEnd) { + if (*valueEnd == '\\' && *(valueEnd + 1)) + ++valueEnd; + else if (*valueEnd == '"') + break; + } + str = valueEnd; + // *valueEnd != null means that *valueEnd is quote character. + if (*valueEnd) + str++; + } + + // See if this is the simplest case (case A above), + // a 'single' line value with no charset and lang. + // If so, copy it and return. + if (nameLen == paramLen && + !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) { + + if (caseAResult) { + // we already have one caseA result, ignore subsequent ones + goto increment_str; + } + + // if the parameter spans across multiple lines we have to strip out the + // line continuation -- jht 4/29/98 + nsAutoCString tempStr(valueStart, valueEnd - valueStart); + tempStr.StripChars("\r\n"); + char *res = ToNewCString(tempStr); + NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY); + + if (isQuotedString) + RemoveQuotedStringEscapes(res); + + caseAResult = res; + // keep going, we may find a RFC 2231/5987 encoded alternative + } + // case B, C, and D + else if (nameLen > paramLen && + !nsCRT::strncasecmp(nameStart, aParamName, paramLen) && + *(nameStart + paramLen) == '*') { + + // 1st char past '*' + const char *cp = nameStart + paramLen + 1; + + // if param name ends in "*" we need do to RFC5987 "ext-value" decoding + bool needExtDecoding = *(nameEnd - 1) == '*'; + + bool caseB = nameLen == paramLen + 1; + bool caseCStart = (*cp == '0') && needExtDecoding; + + // parse the segment number + int32_t segmentNumber = -1; + if (!caseB) { + int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0); + segmentNumber = parseSegmentNumber(cp, segLen); + + if (segmentNumber == -1) { + acceptContinuations = false; + goto increment_str; + } + } + + // CaseB and start of CaseC: requires charset and optional language + // in quotes (quotes required even if lang is blank) + if (caseB || (caseCStart && acceptContinuations)) { + // look for single quotation mark(') + const char *sQuote1 = PL_strchr(valueStart, 0x27); + const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr; + + // Two single quotation marks must be present even in + // absence of charset and lang. + if (!sQuote1 || !sQuote2) { + NS_WARNING("Mandatory two single quotes are missing in header parameter\n"); + } + + const char *charsetStart = nullptr; + int32_t charsetLength = 0; + const char *langStart = nullptr; + int32_t langLength = 0; + const char *rawValStart = nullptr; + int32_t rawValLength = 0; + + if (sQuote2 && sQuote1) { + // both delimiters present: charSet'lang'rawVal + rawValStart = sQuote2 + 1; + rawValLength = valueEnd - rawValStart; + + langStart = sQuote1 + 1; + langLength = sQuote2 - langStart; + + charsetStart = valueStart; + charsetLength = sQuote1 - charsetStart; + } + else if (sQuote1) { + // one delimiter; assume charset'rawVal + rawValStart = sQuote1 + 1; + rawValLength = valueEnd - rawValStart; + + charsetStart = valueStart; + charsetLength = sQuote1 - valueStart; + } + else { + // no delimiter: just rawVal + rawValStart = valueStart; + rawValLength = valueEnd - valueStart; + } + + if (langLength != 0) { + lang.Assign(langStart, langLength); + } + + // keep the charset for later + if (caseB) { + charsetB.Assign(charsetStart, charsetLength); + } else { + // if caseCorD + charsetCD.Assign(charsetStart, charsetLength); + } + + // non-empty value part + if (rawValLength > 0) { + if (!caseBResult && caseB) { + if (!IsValidPercentEscaped(rawValStart, rawValLength)) { + goto increment_str; + } + + // allocate buffer for the raw value + char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1); + if (!tmpResult) { + goto increment_str; + } + *(tmpResult + rawValLength) = 0; + + nsUnescape(tmpResult); + caseBResult = tmpResult; + } else { + // caseC + bool added = addContinuation(segments, 0, rawValStart, + rawValLength, needExtDecoding, + isQuotedString); + + if (!added) { + // continuation not added, stop processing them + acceptContinuations = false; + } + } + } + } // end of if-block : title*0*= or title*= + // caseD: a line of multiline param with no need for unescaping : title*[0-9]= + // or 2nd or later lines of a caseC param : title*[1-9]*= + else if (acceptContinuations && segmentNumber != -1) { + uint32_t valueLength = valueEnd - valueStart; + + bool added = addContinuation(segments, segmentNumber, valueStart, + valueLength, needExtDecoding, + isQuotedString); + + if (!added) { + // continuation not added, stop processing them + acceptContinuations = false; + } + } // end of if-block : title*[0-9]= or title*[1-9]*= + } + + // str now points after the end of the value. + // skip over whitespace, ';', whitespace. +increment_str: + while (nsCRT::IsAsciiSpace(*str)) ++str; + if (*str == ';') { + ++str; + } else { + // stop processing the header field; either we are done or the + // separator was missing + break; + } + while (nsCRT::IsAsciiSpace(*str)) ++str; + } + + caseCDResult = combineContinuations(segments); + + if (caseBResult && !charsetB.IsEmpty()) { + // check that the 2231/5987 result decodes properly given the + // specified character set + if (!IsValidOctetSequenceForCharset(charsetB, caseBResult)) + caseBResult = nullptr; + } + + if (caseCDResult && !charsetCD.IsEmpty()) { + // check that the 2231/5987 result decodes properly given the + // specified character set + if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult)) + caseCDResult = nullptr; + } + + if (caseBResult) { + // prefer simple 5987 format over 2231 with continuations + *aResult = caseBResult; + caseBResult = nullptr; + charset.Assign(charsetB); + } + else if (caseCDResult) { + // prefer 2231/5987 with or without continuations over plain format + *aResult = caseCDResult; + caseCDResult = nullptr; + charset.Assign(charsetCD); + } + else if (caseAResult) { + *aResult = caseAResult; + caseAResult = nullptr; + } + + // free unused stuff + free(caseAResult); + free(caseBResult); + free(caseCDResult); + + // if we have a result + if (*aResult) { + // then return charset and lang as well + if (aLang && !lang.IsEmpty()) { + uint32_t len = lang.Length(); + *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1); + if (*aLang) { + *(*aLang + len) = 0; + } + } + if (aCharset && !charset.IsEmpty()) { + uint32_t len = charset.Length(); + *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1); + if (*aCharset) { + *(*aCharset + len) = 0; + } + } + } + + return *aResult ? NS_OK : NS_ERROR_INVALID_ARG; +} + +nsresult +internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset, + bool aOverrideCharset, bool aEatContinuations, + nsACString& aResult) +{ + aResult.Truncate(); + if (!aHeaderVal) + return NS_ERROR_INVALID_ARG; + if (!*aHeaderVal) + return NS_OK; + + + // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but + // aDefaultCharset is specified, decodes RFC 2047 encoding and converts + // to UTF-8. Otherwise, just strips away CRLF. + if (PL_strstr(aHeaderVal, "=?") || + (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || + Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) { + DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult); + } else if (aEatContinuations && + (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) { + aResult = aHeaderVal; + } else { + aEatContinuations = false; + aResult = aHeaderVal; + } + + if (aEatContinuations) { + nsAutoCString temp(aResult); + temp.ReplaceSubstring("\n\t", " "); + temp.ReplaceSubstring("\r\t", " "); + temp.StripChars("\r\n"); + aResult = temp; + } + + return NS_OK; +} + +NS_IMETHODIMP +nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, + const char* aDefaultCharset, + bool aOverrideCharset, + bool aEatContinuations, + nsACString& aResult) +{ + return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset, + aOverrideCharset, aEatContinuations, + aResult); +} + +// true if the character is allowed in a RFC 5987 value +// see RFC 5987, Section 3.2.1, "attr-char" +bool IsRFC5987AttrChar(char aChar) +{ + char c = aChar; + + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '!' || c == '#' || c == '$' || c == '&' || + c == '+' || c == '-' || c == '.' || c == '^' || + c == '_' || c == '`' || c == '|' || c == '~'); +} + +// percent-decode a value +// returns false on failure +bool PercentDecode(nsACString& aValue) +{ + char *c = (char *) moz_xmalloc(aValue.Length() + 1); + if (!c) { + return false; + } + + strcpy(c, PromiseFlatCString(aValue).get()); + nsUnescape(c); + aValue.Assign(c); + free(c); + + return true; +} + +// Decode a parameter value using the encoding defined in RFC 5987 +// +// charset "'" [ language ] "'" value-chars +NS_IMETHODIMP +nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal, + nsACString& aLang, + nsAString& aResult) +{ + nsAutoCString charset; + nsAutoCString language; + nsAutoCString value; + + uint32_t delimiters = 0; + const nsCString& encoded = PromiseFlatCString(aParamVal); + const char *c = encoded.get(); + + while (*c) { + char tc = *c++; + + if (tc == '\'') { + // single quote + delimiters++; + } else if (((unsigned char)tc) >= 128) { + // fail early, not ASCII + NS_WARNING("non-US-ASCII character in RFC5987-encoded param"); + return NS_ERROR_INVALID_ARG; + } else { + if (delimiters == 0) { + // valid characters are checked later implicitly + charset.Append(tc); + } else if (delimiters == 1) { + // no value checking for now + language.Append(tc); + } else if (delimiters == 2) { + if (IsRFC5987AttrChar(tc)) { + value.Append(tc); + } else if (tc == '%') { + if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) { + // we expect two more characters + NS_WARNING("broken %-escape in RFC5987-encoded param"); + return NS_ERROR_INVALID_ARG; + } + value.Append(tc); + // we consume two more + value.Append(*c++); + value.Append(*c++); + } else { + // character not allowed here + NS_WARNING("invalid character in RFC5987-encoded param"); + return NS_ERROR_INVALID_ARG; + } + } + } + } + + if (delimiters != 2) { + NS_WARNING("missing delimiters in RFC5987-encoded param"); + return NS_ERROR_INVALID_ARG; + } + + // abort early for unsupported encodings + if (!charset.LowerCaseEqualsLiteral("utf-8")) { + NS_WARNING("unsupported charset in RFC5987-encoded param"); + return NS_ERROR_INVALID_ARG; + } + + // percent-decode + if (!PercentDecode(value)) { + return NS_ERROR_OUT_OF_MEMORY; + } + + // return the encoding + aLang.Assign(language); + + // finally convert octet sequence to UTF-8 and be done + nsresult rv = NS_OK; + nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 = + do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + nsAutoCString utf8; + rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8); + NS_ENSURE_SUCCESS(rv, rv); + + CopyUTF8toUTF16(utf8, aResult); + return NS_OK; +} + +nsresult +internalDecodeParameter(const nsACString& aParamValue, const char* aCharset, + const char* aDefaultCharset, bool aOverrideCharset, + bool aDecode2047, nsACString& aResult) +{ + aResult.Truncate(); + // If aCharset is given, aParamValue was obtained from RFC2231/5987 + // encoding and we're pretty sure that it's in aCharset. + if (aCharset && *aCharset) + { + nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); + if (cvtUTF8) + return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset, + true, true, 1, aResult); + } + + const nsAFlatCString& param = PromiseFlatCString(aParamValue); + nsAutoCString unQuoted; + nsACString::const_iterator s, e; + param.BeginReading(s); + param.EndReading(e); + + // strip '\' when used to quote CR, LF, '"' and '\' + for ( ; s != e; ++s) { + if ((*s == '\\')) { + if (++s == e) { + --s; // '\' is at the end. move back and append '\'. + } + else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') { + --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\' + } + // else : skip '\' and append the quoted character. + } + unQuoted.Append(*s); + } + + aResult = unQuoted; + nsresult rv = NS_OK; + + if (aDecode2047) { + nsAutoCString decoded; + + // Try RFC 2047 encoding, instead. + rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset, + aOverrideCharset, true, decoded); + + if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) + aResult = decoded; + } + + return rv; +} + +NS_IMETHODIMP +nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue, + const char* aCharset, + const char* aDefaultCharset, + bool aOverrideCharset, + nsACString& aResult) +{ + return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset, + aOverrideCharset, true, aResult); +} + +#define ISHEXCHAR(c) \ + ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \ + (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \ + (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66)) + +// Decode Q encoding (RFC 2047). +// static +char *DecodeQ(const char *in, uint32_t length) +{ + char *out, *dest = 0; + + out = dest = (char *)PR_Calloc(length + 1, sizeof(char)); + if (dest == nullptr) + return nullptr; + while (length > 0) { + unsigned c = 0; + switch (*in) { + case '=': + // check if |in| in the form of '=hh' where h is [0-9a-fA-F]. + if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2])) + goto badsyntax; + PR_sscanf(in + 1, "%2X", &c); + *out++ = (char) c; + in += 3; + length -= 3; + break; + + case '_': + *out++ = ' '; + in++; + length--; + break; + + default: + if (*in & 0x80) goto badsyntax; + *out++ = *in++; + length--; + } + } + *out++ = '\0'; + + for (out = dest; *out ; ++out) { + if (*out == '\t') + *out = ' '; + } + + return dest; + + badsyntax: + PR_Free(dest); + return nullptr; +} + +// check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) +// or has ESC which may be an indication that it's in one of many ISO +// 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554). +// static +bool Is7bitNonAsciiString(const char *input, uint32_t len) +{ + int32_t c; + + enum { hz_initial, // No HZ seen yet + hz_escaped, // Inside an HZ ~{ escape sequence + hz_seen, // Have seen at least one complete HZ sequence + hz_notpresent // Have seen something that is not legal HZ + } hz_state; + + hz_state = hz_initial; + while (len) { + c = uint8_t(*input++); + len--; + if (c & 0x80) return false; + if (c == 0x1B) return true; + if (c == '~') { + switch (hz_state) { + case hz_initial: + case hz_seen: + if (*input == '{') { + hz_state = hz_escaped; + } else if (*input == '~') { + // ~~ is the HZ encoding of ~. Skip over second ~ as well + hz_state = hz_seen; + input++; + len--; + } else { + hz_state = hz_notpresent; + } + break; + + case hz_escaped: + if (*input == '}') hz_state = hz_seen; + break; + default: + break; + } + } + } + return hz_state == hz_seen; +} + +#define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD) + +// copy 'raw' sequences of octets in aInput to aOutput. +// If aDefaultCharset is specified, the input is assumed to be in the +// charset and converted to UTF-8. Otherwise, a blind copy is made. +// If aDefaultCharset is specified, but the conversion to UTF-8 +// is not successful, each octet is replaced by Unicode replacement +// chars. *aOutput is advanced by the number of output octets. +// static +void CopyRawHeader(const char *aInput, uint32_t aLen, + const char *aDefaultCharset, nsACString &aOutput) +{ + int32_t c; + + // If aDefaultCharset is not specified, make a blind copy. + if (!aDefaultCharset || !*aDefaultCharset) { + aOutput.Append(aInput, aLen); + return; + } + + // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022 + // A ~ may indicate it is HZ + while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) { + aOutput.Append(char(c)); + aLen--; + } + if (!aLen) { + return; + } + aInput--; + + // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii + // string and aDefaultCharset is a 7bit non-ascii charset. + bool skipCheck = (c == 0x1B || c == '~') && + IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset); + + // If not UTF-8, treat as default charset + nsCOMPtr<nsIUTF8ConverterService> + cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); + nsAutoCString utf8Text; + if (cvtUTF8 && + NS_SUCCEEDED( + cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), + aDefaultCharset, skipCheck, true, 1, + utf8Text))) { + aOutput.Append(utf8Text); + } else { // replace each octet with Unicode replacement char in UTF-8. + for (uint32_t i = 0; i < aLen; i++) { + c = uint8_t(*aInput++); + if (c & 0x80) + aOutput.Append(REPLACEMENT_CHAR); + else + aOutput.Append(char(c)); + } + } +} + +nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64, + const char *aCharset, nsACString &aResult) +{ + char *decodedText; + NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'"); + if(aQOrBase64 == 'Q') + decodedText = DecodeQ(aEncoded, aLen); + else if (aQOrBase64 == 'B') { + decodedText = PL_Base64Decode(aEncoded, aLen, nullptr); + } else { + return NS_ERROR_INVALID_ARG; + } + + if (!decodedText) { + return NS_ERROR_INVALID_ARG; + } + + nsresult rv; + nsCOMPtr<nsIUTF8ConverterService> + cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv)); + nsAutoCString utf8Text; + if (NS_SUCCEEDED(rv)) { + // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset. + rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText), + aCharset, + IS_7BIT_NON_ASCII_CHARSET(aCharset), + true, 1, utf8Text); + } + PR_Free(decodedText); + if (NS_FAILED(rv)) { + return rv; + } + aResult.Append(utf8Text); + + return NS_OK; +} + +static const char especials[] = R"(()<>@,;:\"/[]?.=)"; + +// |decode_mime_part2_str| taken from comi18n.c +// Decode RFC2047-encoded words in the input and convert the result to UTF-8. +// If aOverrideCharset is true, charset in RFC2047-encoded words is +// ignored and aDefaultCharset is assumed, instead. aDefaultCharset +// is also used to convert raw octets (without RFC 2047 encoding) to UTF-8. +//static +nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, + bool aOverrideCharset, nsACString &aResult) +{ + const char *p, *q = nullptr, *r; + const char *begin; // tracking pointer for where we are in the input buffer + int32_t isLastEncodedWord = 0; + const char *charsetStart, *charsetEnd; + nsAutoCString prevCharset, curCharset; + nsAutoCString encodedText; + char prevEncoding = '\0', curEncoding; + nsresult rv; + + begin = aHeader; + + // To avoid buffer realloc, if possible, set capacity in advance. No + // matter what, more than 3x expansion can never happen for all charsets + // supported by Mozilla. SCSU/BCSU with the sliding window set to a + // non-BMP block may be exceptions, but Mozilla does not support them. + // Neither any known mail/news program use them. Even if there's, we're + // safe because we don't use a raw *char any more. + aResult.SetCapacity(3 * strlen(aHeader)); + + while ((p = PL_strstr(begin, "=?")) != 0) { + if (isLastEncodedWord) { + // See if it's all whitespace. + for (q = begin; q < p; ++q) { + if (!PL_strchr(" \t\r\n", *q)) break; + } + } + + if (!isLastEncodedWord || q < p) { + if (!encodedText.IsEmpty()) { + rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), + prevEncoding, prevCharset.get(), aResult); + if (NS_FAILED(rv)) { + aResult.Append(encodedText); + } + encodedText.Truncate(); + prevCharset.Truncate(); + prevEncoding = '\0'; + } + // copy the part before the encoded-word + CopyRawHeader(begin, p - begin, aDefaultCharset, aResult); + begin = p; + } + + p += 2; + + // Get charset info + charsetStart = p; + charsetEnd = 0; + for (q = p; *q != '?'; q++) { + if (*q <= ' ' || PL_strchr(especials, *q)) { + goto badsyntax; + } + + // RFC 2231 section 5 + if (!charsetEnd && *q == '*') { + charsetEnd = q; + } + } + if (!charsetEnd) { + charsetEnd = q; + } + + q++; + curEncoding = nsCRT::ToUpper(*q); + if (curEncoding != 'Q' && curEncoding != 'B') + goto badsyntax; + + if (q[1] != '?') + goto badsyntax; + + // loop-wise, keep going until we hit "?=". the inner check handles the + // nul terminator should the string terminate before we hit the right + // marker. (And the r[1] will never reach beyond the end of the string + // because *r != '?' is true if r is the nul character.) + for (r = q + 2; *r != '?' || r[1] != '='; r++) { + if (*r < ' ') goto badsyntax; + } + if (r == q + 2) { + // it's empty, skip + begin = r + 2; + isLastEncodedWord = 1; + continue; + } + + curCharset.Assign(charsetStart, charsetEnd - charsetStart); + // Override charset if requested. Never override labeled UTF-8. + // Use default charset instead of UNKNOWN-8BIT + if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8")) + || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT")) + ) { + curCharset = aDefaultCharset; + } + + const char *R; + R = r; + if (curEncoding == 'B') { + // bug 227290. ignore an extraneous '=' at the end. + // (# of characters in B-encoded part has to be a multiple of 4) + int32_t n = r - (q + 2); + R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0; + } + // Bug 493544. Don't decode the encoded text until it ends + if (R[-1] != '=' + && (prevCharset.IsEmpty() + || (curCharset == prevCharset && curEncoding == prevEncoding)) + ) { + encodedText.Append(q + 2, R - (q + 2)); + prevCharset = curCharset; + prevEncoding = curEncoding; + + begin = r + 2; + isLastEncodedWord = 1; + continue; + } + + bool bDecoded; // If the current line has been decoded. + bDecoded = false; + if (!encodedText.IsEmpty()) { + if (curCharset == prevCharset && curEncoding == prevEncoding) { + encodedText.Append(q + 2, R - (q + 2)); + bDecoded = true; + } + rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), + prevEncoding, prevCharset.get(), aResult); + if (NS_FAILED(rv)) { + aResult.Append(encodedText); + } + encodedText.Truncate(); + prevCharset.Truncate(); + prevEncoding = '\0'; + } + if (!bDecoded) { + rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, + curCharset.get(), aResult); + if (NS_FAILED(rv)) { + aResult.Append(encodedText); + } + } + + begin = r + 2; + isLastEncodedWord = 1; + continue; + + badsyntax: + if (!encodedText.IsEmpty()) { + rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), + prevEncoding, prevCharset.get(), aResult); + if (NS_FAILED(rv)) { + aResult.Append(encodedText); + } + encodedText.Truncate(); + prevCharset.Truncate(); + } + // copy the part before the encoded-word + aResult.Append(begin, p - begin); + begin = p; + isLastEncodedWord = 0; + } + + if (!encodedText.IsEmpty()) { + rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), + prevEncoding, prevCharset.get(), aResult); + if (NS_FAILED(rv)) { + aResult.Append(encodedText); + } + } + + // put the tail back + CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult); + + nsAutoCString tempStr(aResult); + tempStr.ReplaceChar('\t', ' '); + aResult = tempStr; + + return NS_OK; +} |