summaryrefslogtreecommitdiffstats
path: root/netwerk/mime/nsMIMEHeaderParamImpl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'netwerk/mime/nsMIMEHeaderParamImpl.cpp')
-rw-r--r--netwerk/mime/nsMIMEHeaderParamImpl.cpp1346
1 files changed, 1346 insertions, 0 deletions
diff --git a/netwerk/mime/nsMIMEHeaderParamImpl.cpp b/netwerk/mime/nsMIMEHeaderParamImpl.cpp
new file mode 100644
index 000000000..8d668f64f
--- /dev/null
+++ b/netwerk/mime/nsMIMEHeaderParamImpl.cpp
@@ -0,0 +1,1346 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set sw=4 ts=8 et tw=80 : */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <string.h>
+#include "prmem.h"
+#include "prprf.h"
+#include "plstr.h"
+#include "plbase64.h"
+#include "nsCRT.h"
+#include "nsMemory.h"
+#include "nsTArray.h"
+#include "nsCOMPtr.h"
+#include "nsEscape.h"
+#include "nsIUTF8ConverterService.h"
+#include "nsUConvCID.h"
+#include "nsIServiceManager.h"
+#include "nsMIMEHeaderParamImpl.h"
+#include "nsReadableUtils.h"
+#include "nsNativeCharsetUtils.h"
+#include "nsError.h"
+#include "nsIUnicodeDecoder.h"
+#include "mozilla/dom/EncodingUtils.h"
+
+using mozilla::dom::EncodingUtils;
+
+// static functions declared below are moved from mailnews/mime/src/comi18n.cpp
+
+static char *DecodeQ(const char *, uint32_t);
+static bool Is7bitNonAsciiString(const char *, uint32_t);
+static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &);
+static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
+static nsresult internalDecodeParameter(const nsACString&, const char*,
+ const char*, bool, bool, nsACString&);
+
+// XXX The chance of UTF-7 being used in the message header is really
+// low, but in theory it's possible.
+#define IS_7BIT_NON_ASCII_CHARSET(cset) \
+ (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
+ !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
+ !nsCRT::strncasecmp((cset), "UTF-7", 5))
+
+NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
+
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
+ const char *aParamName,
+ const nsACString& aFallbackCharset,
+ bool aTryLocaleCharset,
+ char **aLang, nsAString& aResult)
+{
+ return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
+ aFallbackCharset, aTryLocaleCharset, aLang, aResult);
+}
+
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
+ const char *aParamName,
+ const nsACString& aFallbackCharset,
+ bool aTryLocaleCharset,
+ char **aLang, nsAString& aResult)
+{
+ return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
+ aFallbackCharset, aTryLocaleCharset, aLang, aResult);
+}
+
+// XXX : aTryLocaleCharset is not yet effective.
+nsresult
+nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal,
+ const char *aParamName,
+ ParamDecoding aDecoding,
+ const nsACString& aFallbackCharset,
+ bool aTryLocaleCharset,
+ char **aLang, nsAString& aResult)
+{
+ aResult.Truncate();
+ nsresult rv;
+
+ // get parameter (decode RFC 2231/5987 when applicable, as specified by
+ // aDecoding (5987 being a subset of 2231) and return charset.)
+ nsXPIDLCString med;
+ nsXPIDLCString charset;
+ rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
+ aDecoding, getter_Copies(charset), aLang,
+ getter_Copies(med));
+ if (NS_FAILED(rv))
+ return rv;
+
+ // convert to UTF-8 after charset conversion and RFC 2047 decoding
+ // if necessary.
+
+ nsAutoCString str1;
+ rv = internalDecodeParameter(med, charset.get(), nullptr, false,
+ // was aDecoding == MIME_FIELD_ENCODING
+ // see bug 875615
+ true,
+ str1);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (!aFallbackCharset.IsEmpty())
+ {
+ nsAutoCString charset;
+ EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset);
+ nsAutoCString str2;
+ nsCOMPtr<nsIUTF8ConverterService>
+ cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
+ if (cvtUTF8 &&
+ NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1,
+ PromiseFlatCString(aFallbackCharset).get(), false,
+ !charset.EqualsLiteral("UTF-8"),
+ 1, str2))) {
+ CopyUTF8toUTF16(str2, aResult);
+ return NS_OK;
+ }
+ }
+
+ if (IsUTF8(str1)) {
+ CopyUTF8toUTF16(str1, aResult);
+ return NS_OK;
+ }
+
+ if (aTryLocaleCharset && !NS_IsNativeUTF8())
+ return NS_CopyNativeToUnicode(str1, aResult);
+
+ CopyASCIItoUTF16(str1, aResult);
+ return NS_OK;
+}
+
+// remove backslash-encoded sequences from quoted-strings
+// modifies string in place, potentially shortening it
+void RemoveQuotedStringEscapes(char *src)
+{
+ char *dst = src;
+
+ for (char *c = src; *c; ++c)
+ {
+ if (c[0] == '\\' && c[1])
+ {
+ // skip backslash if not at end
+ ++c;
+ }
+ *dst++ = *c;
+ }
+ *dst = 0;
+}
+
+// true is character is a hex digit
+bool IsHexDigit(char aChar)
+{
+ char c = aChar;
+
+ return (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= '0' && c <= '9');
+}
+
+// validate that a C String containing %-escapes is syntactically valid
+bool IsValidPercentEscaped(const char *aValue, int32_t len)
+{
+ for (int32_t i = 0; i < len; i++) {
+ if (aValue[i] == '%') {
+ if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// Support for continuations (RFC 2231, Section 3)
+
+// only a sane number supported
+#define MAX_CONTINUATIONS 999
+
+// part of a continuation
+
+class Continuation {
+ public:
+ Continuation(const char *aValue, uint32_t aLength,
+ bool aNeedsPercentDecoding, bool aWasQuotedString) {
+ value = aValue;
+ length = aLength;
+ needsPercentDecoding = aNeedsPercentDecoding;
+ wasQuotedString = aWasQuotedString;
+ }
+ Continuation() {
+ // empty constructor needed for nsTArray
+ value = 0L;
+ length = 0;
+ needsPercentDecoding = false;
+ wasQuotedString = false;
+ }
+ ~Continuation() = default;
+
+ const char *value;
+ uint32_t length;
+ bool needsPercentDecoding;
+ bool wasQuotedString;
+};
+
+// combine segments into a single string, returning the allocated string
+// (or nullptr) while emptying the list
+char *combineContinuations(nsTArray<Continuation>& aArray)
+{
+ // Sanity check
+ if (aArray.Length() == 0)
+ return nullptr;
+
+ // Get an upper bound for the length
+ uint32_t length = 0;
+ for (uint32_t i = 0; i < aArray.Length(); i++) {
+ length += aArray[i].length;
+ }
+
+ // Allocate
+ char *result = (char *) moz_xmalloc(length + 1);
+
+ // Concatenate
+ if (result) {
+ *result = '\0';
+
+ for (uint32_t i = 0; i < aArray.Length(); i++) {
+ Continuation cont = aArray[i];
+ if (! cont.value) break;
+
+ char *c = result + strlen(result);
+ strncat(result, cont.value, cont.length);
+ if (cont.needsPercentDecoding) {
+ nsUnescape(c);
+ }
+ if (cont.wasQuotedString) {
+ RemoveQuotedStringEscapes(c);
+ }
+ }
+
+ // return null if empty value
+ if (*result == '\0') {
+ free(result);
+ result = nullptr;
+ }
+ } else {
+ // Handle OOM
+ NS_WARNING("Out of memory\n");
+ }
+
+ return result;
+}
+
+// add a continuation, return false on error if segment already has been seen
+bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
+ const char *aValue, uint32_t aLength,
+ bool aNeedsPercentDecoding, bool aWasQuotedString)
+{
+ if (aIndex < aArray.Length() && aArray[aIndex].value) {
+ NS_WARNING("duplicate RC2231 continuation segment #\n");
+ return false;
+ }
+
+ if (aIndex > MAX_CONTINUATIONS) {
+ NS_WARNING("RC2231 continuation segment # exceeds limit\n");
+ return false;
+ }
+
+ if (aNeedsPercentDecoding && aWasQuotedString) {
+ NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n");
+ return false;
+ }
+
+ Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
+
+ if (aArray.Length() <= aIndex) {
+ aArray.SetLength(aIndex + 1);
+ }
+ aArray[aIndex] = cont;
+
+ return true;
+}
+
+// parse a segment number; return -1 on error
+int32_t parseSegmentNumber(const char *aValue, int32_t aLen)
+{
+ if (aLen < 1) {
+ NS_WARNING("segment number missing\n");
+ return -1;
+ }
+
+ if (aLen > 1 && aValue[0] == '0') {
+ NS_WARNING("leading '0' not allowed in segment number\n");
+ return -1;
+ }
+
+ int32_t segmentNumber = 0;
+
+ for (int32_t i = 0; i < aLen; i++) {
+ if (! (aValue[i] >= '0' && aValue[i] <= '9')) {
+ NS_WARNING("invalid characters in segment number\n");
+ return -1;
+ }
+
+ segmentNumber *= 10;
+ segmentNumber += aValue[i] - '0';
+ if (segmentNumber > MAX_CONTINUATIONS) {
+ NS_WARNING("Segment number exceeds sane size\n");
+ return -1;
+ }
+ }
+
+ return segmentNumber;
+}
+
+// validate a given octet sequence for compliance with the specified
+// encoding
+bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets)
+{
+ nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService
+ (NS_UTF8CONVERTERSERVICE_CONTRACTID));
+ if (!cvtUTF8) {
+ NS_WARNING("Can't get UTF8ConverterService\n");
+ return false;
+ }
+
+ nsAutoCString tmpRaw;
+ tmpRaw.Assign(aOctets);
+ nsAutoCString tmpDecoded;
+
+ nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw,
+ PromiseFlatCString(aCharset).get(),
+ false, false, 1, tmpDecoded);
+
+ if (rv != NS_OK) {
+ // we can't decode; charset may be unsupported, or the octet sequence
+ // is broken (illegal or incomplete octet sequence contained)
+ NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
+ return false;
+ }
+
+ return true;
+}
+
+// moved almost verbatim from mimehdrs.cpp
+// char *
+// MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
+// char **charset, char **language)
+//
+// The format of these header lines is
+// <token> [ ';' <token> '=' <token-or-quoted-string> ]*
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
+ const char *aParamName,
+ char **aCharset,
+ char **aLang,
+ char **aResult)
+{
+ return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
+ aCharset, aLang, aResult);
+}
+
+
+nsresult
+nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
+ const char *aParamName,
+ ParamDecoding aDecoding,
+ char **aCharset,
+ char **aLang,
+ char **aResult)
+{
+
+ if (!aHeaderValue || !*aHeaderValue || !aResult)
+ return NS_ERROR_INVALID_ARG;
+
+ *aResult = nullptr;
+
+ if (aCharset) *aCharset = nullptr;
+ if (aLang) *aLang = nullptr;
+
+ nsAutoCString charset;
+
+ // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
+ // them for HTTP header fields later on, see bug 776324
+ bool acceptContinuations = true;
+
+ const char *str = aHeaderValue;
+
+ // skip leading white space.
+ for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
+ ;
+ const char *start = str;
+
+ // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
+ // For instance, return 'inline' in the following case:
+ // Content-Disposition: inline; filename=.....
+ if (!aParamName || !*aParamName)
+ {
+ for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
+ ;
+ if (str == start)
+ return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
+
+ *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
+ NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
+ (*aResult)[str - start] = '\0'; // null-terminate
+ return NS_OK;
+ }
+
+ /* Skip forward to first ';' */
+ for (; *str && *str != ';' && *str != ','; ++str)
+ ;
+ if (*str)
+ str++;
+ /* Skip over following whitespace */
+ for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
+ ;
+
+ // Some broken http servers just specify parameters
+ // like 'filename' without specifying disposition
+ // method. Rewind to the first non-white-space
+ // character.
+
+ if (!*str)
+ str = start;
+
+ // RFC2231 - The legitimate parm format can be:
+ // A. title=ThisIsTitle
+ // B. title*=us-ascii'en-us'This%20is%20wierd.
+ // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
+ // title*1*=have%20to%20support%20this.
+ // title*2="Else..."
+ // D. title*0="Hey, what you think you are doing?"
+ // title*1="There is no charset and lang info."
+ // RFC5987: only A and B
+
+ // collect results for the different algorithms (plain filename,
+ // RFC5987/2231-encoded filename, + continuations) separately and decide
+ // which to use at the end
+ char *caseAResult = nullptr;
+ char *caseBResult = nullptr;
+ char *caseCDResult = nullptr;
+
+ // collect continuation segments
+ nsTArray<Continuation> segments;
+
+
+ // our copies of the charset parameter, kept separately as they might
+ // differ for the two formats
+ nsDependentCSubstring charsetB, charsetCD;
+
+ nsDependentCSubstring lang;
+
+ int32_t paramLen = strlen(aParamName);
+
+ while (*str) {
+ // find name/value
+
+ const char *nameStart = str;
+ const char *nameEnd = nullptr;
+ const char *valueStart = str;
+ const char *valueEnd = nullptr;
+ bool isQuotedString = false;
+
+ NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
+
+ // Skip forward to the end of this token.
+ for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
+ ;
+ nameEnd = str;
+
+ int32_t nameLen = nameEnd - nameStart;
+
+ // Skip over whitespace, '=', and whitespace
+ while (nsCRT::IsAsciiSpace(*str)) ++str;
+ if (!*str) {
+ break;
+ }
+ if (*str++ != '=') {
+ // don't accept parameters without "="
+ goto increment_str;
+ }
+ while (nsCRT::IsAsciiSpace(*str)) ++str;
+
+ if (*str != '"') {
+ // The value is a token, not a quoted string.
+ valueStart = str;
+ for (valueEnd = str;
+ *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
+ valueEnd++)
+ ;
+ str = valueEnd;
+ } else {
+ isQuotedString = true;
+
+ ++str;
+ valueStart = str;
+ for (valueEnd = str; *valueEnd; ++valueEnd) {
+ if (*valueEnd == '\\' && *(valueEnd + 1))
+ ++valueEnd;
+ else if (*valueEnd == '"')
+ break;
+ }
+ str = valueEnd;
+ // *valueEnd != null means that *valueEnd is quote character.
+ if (*valueEnd)
+ str++;
+ }
+
+ // See if this is the simplest case (case A above),
+ // a 'single' line value with no charset and lang.
+ // If so, copy it and return.
+ if (nameLen == paramLen &&
+ !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
+
+ if (caseAResult) {
+ // we already have one caseA result, ignore subsequent ones
+ goto increment_str;
+ }
+
+ // if the parameter spans across multiple lines we have to strip out the
+ // line continuation -- jht 4/29/98
+ nsAutoCString tempStr(valueStart, valueEnd - valueStart);
+ tempStr.StripChars("\r\n");
+ char *res = ToNewCString(tempStr);
+ NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
+
+ if (isQuotedString)
+ RemoveQuotedStringEscapes(res);
+
+ caseAResult = res;
+ // keep going, we may find a RFC 2231/5987 encoded alternative
+ }
+ // case B, C, and D
+ else if (nameLen > paramLen &&
+ !nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
+ *(nameStart + paramLen) == '*') {
+
+ // 1st char past '*'
+ const char *cp = nameStart + paramLen + 1;
+
+ // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
+ bool needExtDecoding = *(nameEnd - 1) == '*';
+
+ bool caseB = nameLen == paramLen + 1;
+ bool caseCStart = (*cp == '0') && needExtDecoding;
+
+ // parse the segment number
+ int32_t segmentNumber = -1;
+ if (!caseB) {
+ int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
+ segmentNumber = parseSegmentNumber(cp, segLen);
+
+ if (segmentNumber == -1) {
+ acceptContinuations = false;
+ goto increment_str;
+ }
+ }
+
+ // CaseB and start of CaseC: requires charset and optional language
+ // in quotes (quotes required even if lang is blank)
+ if (caseB || (caseCStart && acceptContinuations)) {
+ // look for single quotation mark(')
+ const char *sQuote1 = PL_strchr(valueStart, 0x27);
+ const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
+
+ // Two single quotation marks must be present even in
+ // absence of charset and lang.
+ if (!sQuote1 || !sQuote2) {
+ NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
+ }
+
+ const char *charsetStart = nullptr;
+ int32_t charsetLength = 0;
+ const char *langStart = nullptr;
+ int32_t langLength = 0;
+ const char *rawValStart = nullptr;
+ int32_t rawValLength = 0;
+
+ if (sQuote2 && sQuote1) {
+ // both delimiters present: charSet'lang'rawVal
+ rawValStart = sQuote2 + 1;
+ rawValLength = valueEnd - rawValStart;
+
+ langStart = sQuote1 + 1;
+ langLength = sQuote2 - langStart;
+
+ charsetStart = valueStart;
+ charsetLength = sQuote1 - charsetStart;
+ }
+ else if (sQuote1) {
+ // one delimiter; assume charset'rawVal
+ rawValStart = sQuote1 + 1;
+ rawValLength = valueEnd - rawValStart;
+
+ charsetStart = valueStart;
+ charsetLength = sQuote1 - valueStart;
+ }
+ else {
+ // no delimiter: just rawVal
+ rawValStart = valueStart;
+ rawValLength = valueEnd - valueStart;
+ }
+
+ if (langLength != 0) {
+ lang.Assign(langStart, langLength);
+ }
+
+ // keep the charset for later
+ if (caseB) {
+ charsetB.Assign(charsetStart, charsetLength);
+ } else {
+ // if caseCorD
+ charsetCD.Assign(charsetStart, charsetLength);
+ }
+
+ // non-empty value part
+ if (rawValLength > 0) {
+ if (!caseBResult && caseB) {
+ if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
+ goto increment_str;
+ }
+
+ // allocate buffer for the raw value
+ char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1);
+ if (!tmpResult) {
+ goto increment_str;
+ }
+ *(tmpResult + rawValLength) = 0;
+
+ nsUnescape(tmpResult);
+ caseBResult = tmpResult;
+ } else {
+ // caseC
+ bool added = addContinuation(segments, 0, rawValStart,
+ rawValLength, needExtDecoding,
+ isQuotedString);
+
+ if (!added) {
+ // continuation not added, stop processing them
+ acceptContinuations = false;
+ }
+ }
+ }
+ } // end of if-block : title*0*= or title*=
+ // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
+ // or 2nd or later lines of a caseC param : title*[1-9]*=
+ else if (acceptContinuations && segmentNumber != -1) {
+ uint32_t valueLength = valueEnd - valueStart;
+
+ bool added = addContinuation(segments, segmentNumber, valueStart,
+ valueLength, needExtDecoding,
+ isQuotedString);
+
+ if (!added) {
+ // continuation not added, stop processing them
+ acceptContinuations = false;
+ }
+ } // end of if-block : title*[0-9]= or title*[1-9]*=
+ }
+
+ // str now points after the end of the value.
+ // skip over whitespace, ';', whitespace.
+increment_str:
+ while (nsCRT::IsAsciiSpace(*str)) ++str;
+ if (*str == ';') {
+ ++str;
+ } else {
+ // stop processing the header field; either we are done or the
+ // separator was missing
+ break;
+ }
+ while (nsCRT::IsAsciiSpace(*str)) ++str;
+ }
+
+ caseCDResult = combineContinuations(segments);
+
+ if (caseBResult && !charsetB.IsEmpty()) {
+ // check that the 2231/5987 result decodes properly given the
+ // specified character set
+ if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
+ caseBResult = nullptr;
+ }
+
+ if (caseCDResult && !charsetCD.IsEmpty()) {
+ // check that the 2231/5987 result decodes properly given the
+ // specified character set
+ if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
+ caseCDResult = nullptr;
+ }
+
+ if (caseBResult) {
+ // prefer simple 5987 format over 2231 with continuations
+ *aResult = caseBResult;
+ caseBResult = nullptr;
+ charset.Assign(charsetB);
+ }
+ else if (caseCDResult) {
+ // prefer 2231/5987 with or without continuations over plain format
+ *aResult = caseCDResult;
+ caseCDResult = nullptr;
+ charset.Assign(charsetCD);
+ }
+ else if (caseAResult) {
+ *aResult = caseAResult;
+ caseAResult = nullptr;
+ }
+
+ // free unused stuff
+ free(caseAResult);
+ free(caseBResult);
+ free(caseCDResult);
+
+ // if we have a result
+ if (*aResult) {
+ // then return charset and lang as well
+ if (aLang && !lang.IsEmpty()) {
+ uint32_t len = lang.Length();
+ *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1);
+ if (*aLang) {
+ *(*aLang + len) = 0;
+ }
+ }
+ if (aCharset && !charset.IsEmpty()) {
+ uint32_t len = charset.Length();
+ *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1);
+ if (*aCharset) {
+ *(*aCharset + len) = 0;
+ }
+ }
+ }
+
+ return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
+}
+
+nsresult
+internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset,
+ bool aOverrideCharset, bool aEatContinuations,
+ nsACString& aResult)
+{
+ aResult.Truncate();
+ if (!aHeaderVal)
+ return NS_ERROR_INVALID_ARG;
+ if (!*aHeaderVal)
+ return NS_OK;
+
+
+ // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
+ // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
+ // to UTF-8. Otherwise, just strips away CRLF.
+ if (PL_strstr(aHeaderVal, "=?") ||
+ (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
+ Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
+ DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
+ } else if (aEatContinuations &&
+ (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
+ aResult = aHeaderVal;
+ } else {
+ aEatContinuations = false;
+ aResult = aHeaderVal;
+ }
+
+ if (aEatContinuations) {
+ nsAutoCString temp(aResult);
+ temp.ReplaceSubstring("\n\t", " ");
+ temp.ReplaceSubstring("\r\t", " ");
+ temp.StripChars("\r\n");
+ aResult = temp;
+ }
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
+ const char* aDefaultCharset,
+ bool aOverrideCharset,
+ bool aEatContinuations,
+ nsACString& aResult)
+{
+ return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset,
+ aOverrideCharset, aEatContinuations,
+ aResult);
+}
+
+// true if the character is allowed in a RFC 5987 value
+// see RFC 5987, Section 3.2.1, "attr-char"
+bool IsRFC5987AttrChar(char aChar)
+{
+ char c = aChar;
+
+ return (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '!' || c == '#' || c == '$' || c == '&' ||
+ c == '+' || c == '-' || c == '.' || c == '^' ||
+ c == '_' || c == '`' || c == '|' || c == '~');
+}
+
+// percent-decode a value
+// returns false on failure
+bool PercentDecode(nsACString& aValue)
+{
+ char *c = (char *) moz_xmalloc(aValue.Length() + 1);
+ if (!c) {
+ return false;
+ }
+
+ strcpy(c, PromiseFlatCString(aValue).get());
+ nsUnescape(c);
+ aValue.Assign(c);
+ free(c);
+
+ return true;
+}
+
+// Decode a parameter value using the encoding defined in RFC 5987
+//
+// charset "'" [ language ] "'" value-chars
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
+ nsACString& aLang,
+ nsAString& aResult)
+{
+ nsAutoCString charset;
+ nsAutoCString language;
+ nsAutoCString value;
+
+ uint32_t delimiters = 0;
+ const nsCString& encoded = PromiseFlatCString(aParamVal);
+ const char *c = encoded.get();
+
+ while (*c) {
+ char tc = *c++;
+
+ if (tc == '\'') {
+ // single quote
+ delimiters++;
+ } else if (((unsigned char)tc) >= 128) {
+ // fail early, not ASCII
+ NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
+ return NS_ERROR_INVALID_ARG;
+ } else {
+ if (delimiters == 0) {
+ // valid characters are checked later implicitly
+ charset.Append(tc);
+ } else if (delimiters == 1) {
+ // no value checking for now
+ language.Append(tc);
+ } else if (delimiters == 2) {
+ if (IsRFC5987AttrChar(tc)) {
+ value.Append(tc);
+ } else if (tc == '%') {
+ if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
+ // we expect two more characters
+ NS_WARNING("broken %-escape in RFC5987-encoded param");
+ return NS_ERROR_INVALID_ARG;
+ }
+ value.Append(tc);
+ // we consume two more
+ value.Append(*c++);
+ value.Append(*c++);
+ } else {
+ // character not allowed here
+ NS_WARNING("invalid character in RFC5987-encoded param");
+ return NS_ERROR_INVALID_ARG;
+ }
+ }
+ }
+ }
+
+ if (delimiters != 2) {
+ NS_WARNING("missing delimiters in RFC5987-encoded param");
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ // abort early for unsupported encodings
+ if (!charset.LowerCaseEqualsLiteral("utf-8")) {
+ NS_WARNING("unsupported charset in RFC5987-encoded param");
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ // percent-decode
+ if (!PercentDecode(value)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ // return the encoding
+ aLang.Assign(language);
+
+ // finally convert octet sequence to UTF-8 and be done
+ nsresult rv = NS_OK;
+ nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 =
+ do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsAutoCString utf8;
+ rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ CopyUTF8toUTF16(utf8, aResult);
+ return NS_OK;
+}
+
+nsresult
+internalDecodeParameter(const nsACString& aParamValue, const char* aCharset,
+ const char* aDefaultCharset, bool aOverrideCharset,
+ bool aDecode2047, nsACString& aResult)
+{
+ aResult.Truncate();
+ // If aCharset is given, aParamValue was obtained from RFC2231/5987
+ // encoding and we're pretty sure that it's in aCharset.
+ if (aCharset && *aCharset)
+ {
+ nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
+ if (cvtUTF8)
+ return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
+ true, true, 1, aResult);
+ }
+
+ const nsAFlatCString& param = PromiseFlatCString(aParamValue);
+ nsAutoCString unQuoted;
+ nsACString::const_iterator s, e;
+ param.BeginReading(s);
+ param.EndReading(e);
+
+ // strip '\' when used to quote CR, LF, '"' and '\'
+ for ( ; s != e; ++s) {
+ if ((*s == '\\')) {
+ if (++s == e) {
+ --s; // '\' is at the end. move back and append '\'.
+ }
+ else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
+ --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
+ }
+ // else : skip '\' and append the quoted character.
+ }
+ unQuoted.Append(*s);
+ }
+
+ aResult = unQuoted;
+ nsresult rv = NS_OK;
+
+ if (aDecode2047) {
+ nsAutoCString decoded;
+
+ // Try RFC 2047 encoding, instead.
+ rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
+ aOverrideCharset, true, decoded);
+
+ if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
+ aResult = decoded;
+ }
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
+ const char* aCharset,
+ const char* aDefaultCharset,
+ bool aOverrideCharset,
+ nsACString& aResult)
+{
+ return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset,
+ aOverrideCharset, true, aResult);
+}
+
+#define ISHEXCHAR(c) \
+ ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
+ (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
+ (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
+
+// Decode Q encoding (RFC 2047).
+// static
+char *DecodeQ(const char *in, uint32_t length)
+{
+ char *out, *dest = 0;
+
+ out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
+ if (dest == nullptr)
+ return nullptr;
+ while (length > 0) {
+ unsigned c = 0;
+ switch (*in) {
+ case '=':
+ // check if |in| in the form of '=hh' where h is [0-9a-fA-F].
+ if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
+ goto badsyntax;
+ PR_sscanf(in + 1, "%2X", &c);
+ *out++ = (char) c;
+ in += 3;
+ length -= 3;
+ break;
+
+ case '_':
+ *out++ = ' ';
+ in++;
+ length--;
+ break;
+
+ default:
+ if (*in & 0x80) goto badsyntax;
+ *out++ = *in++;
+ length--;
+ }
+ }
+ *out++ = '\0';
+
+ for (out = dest; *out ; ++out) {
+ if (*out == '\t')
+ *out = ' ';
+ }
+
+ return dest;
+
+ badsyntax:
+ PR_Free(dest);
+ return nullptr;
+}
+
+// check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
+// or has ESC which may be an indication that it's in one of many ISO
+// 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
+// static
+bool Is7bitNonAsciiString(const char *input, uint32_t len)
+{
+ int32_t c;
+
+ enum { hz_initial, // No HZ seen yet
+ hz_escaped, // Inside an HZ ~{ escape sequence
+ hz_seen, // Have seen at least one complete HZ sequence
+ hz_notpresent // Have seen something that is not legal HZ
+ } hz_state;
+
+ hz_state = hz_initial;
+ while (len) {
+ c = uint8_t(*input++);
+ len--;
+ if (c & 0x80) return false;
+ if (c == 0x1B) return true;
+ if (c == '~') {
+ switch (hz_state) {
+ case hz_initial:
+ case hz_seen:
+ if (*input == '{') {
+ hz_state = hz_escaped;
+ } else if (*input == '~') {
+ // ~~ is the HZ encoding of ~. Skip over second ~ as well
+ hz_state = hz_seen;
+ input++;
+ len--;
+ } else {
+ hz_state = hz_notpresent;
+ }
+ break;
+
+ case hz_escaped:
+ if (*input == '}') hz_state = hz_seen;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return hz_state == hz_seen;
+}
+
+#define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
+
+// copy 'raw' sequences of octets in aInput to aOutput.
+// If aDefaultCharset is specified, the input is assumed to be in the
+// charset and converted to UTF-8. Otherwise, a blind copy is made.
+// If aDefaultCharset is specified, but the conversion to UTF-8
+// is not successful, each octet is replaced by Unicode replacement
+// chars. *aOutput is advanced by the number of output octets.
+// static
+void CopyRawHeader(const char *aInput, uint32_t aLen,
+ const char *aDefaultCharset, nsACString &aOutput)
+{
+ int32_t c;
+
+ // If aDefaultCharset is not specified, make a blind copy.
+ if (!aDefaultCharset || !*aDefaultCharset) {
+ aOutput.Append(aInput, aLen);
+ return;
+ }
+
+ // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
+ // A ~ may indicate it is HZ
+ while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
+ aOutput.Append(char(c));
+ aLen--;
+ }
+ if (!aLen) {
+ return;
+ }
+ aInput--;
+
+ // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
+ // string and aDefaultCharset is a 7bit non-ascii charset.
+ bool skipCheck = (c == 0x1B || c == '~') &&
+ IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
+
+ // If not UTF-8, treat as default charset
+ nsCOMPtr<nsIUTF8ConverterService>
+ cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
+ nsAutoCString utf8Text;
+ if (cvtUTF8 &&
+ NS_SUCCEEDED(
+ cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen),
+ aDefaultCharset, skipCheck, true, 1,
+ utf8Text))) {
+ aOutput.Append(utf8Text);
+ } else { // replace each octet with Unicode replacement char in UTF-8.
+ for (uint32_t i = 0; i < aLen; i++) {
+ c = uint8_t(*aInput++);
+ if (c & 0x80)
+ aOutput.Append(REPLACEMENT_CHAR);
+ else
+ aOutput.Append(char(c));
+ }
+ }
+}
+
+nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64,
+ const char *aCharset, nsACString &aResult)
+{
+ char *decodedText;
+ NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
+ if(aQOrBase64 == 'Q')
+ decodedText = DecodeQ(aEncoded, aLen);
+ else if (aQOrBase64 == 'B') {
+ decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
+ } else {
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ if (!decodedText) {
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ nsresult rv;
+ nsCOMPtr<nsIUTF8ConverterService>
+ cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
+ nsAutoCString utf8Text;
+ if (NS_SUCCEEDED(rv)) {
+ // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
+ rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
+ aCharset,
+ IS_7BIT_NON_ASCII_CHARSET(aCharset),
+ true, 1, utf8Text);
+ }
+ PR_Free(decodedText);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ aResult.Append(utf8Text);
+
+ return NS_OK;
+}
+
+static const char especials[] = R"(()<>@,;:\"/[]?.=)";
+
+// |decode_mime_part2_str| taken from comi18n.c
+// Decode RFC2047-encoded words in the input and convert the result to UTF-8.
+// If aOverrideCharset is true, charset in RFC2047-encoded words is
+// ignored and aDefaultCharset is assumed, instead. aDefaultCharset
+// is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
+//static
+nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset,
+ bool aOverrideCharset, nsACString &aResult)
+{
+ const char *p, *q = nullptr, *r;
+ const char *begin; // tracking pointer for where we are in the input buffer
+ int32_t isLastEncodedWord = 0;
+ const char *charsetStart, *charsetEnd;
+ nsAutoCString prevCharset, curCharset;
+ nsAutoCString encodedText;
+ char prevEncoding = '\0', curEncoding;
+ nsresult rv;
+
+ begin = aHeader;
+
+ // To avoid buffer realloc, if possible, set capacity in advance. No
+ // matter what, more than 3x expansion can never happen for all charsets
+ // supported by Mozilla. SCSU/BCSU with the sliding window set to a
+ // non-BMP block may be exceptions, but Mozilla does not support them.
+ // Neither any known mail/news program use them. Even if there's, we're
+ // safe because we don't use a raw *char any more.
+ aResult.SetCapacity(3 * strlen(aHeader));
+
+ while ((p = PL_strstr(begin, "=?")) != 0) {
+ if (isLastEncodedWord) {
+ // See if it's all whitespace.
+ for (q = begin; q < p; ++q) {
+ if (!PL_strchr(" \t\r\n", *q)) break;
+ }
+ }
+
+ if (!isLastEncodedWord || q < p) {
+ if (!encodedText.IsEmpty()) {
+ rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
+ prevEncoding, prevCharset.get(), aResult);
+ if (NS_FAILED(rv)) {
+ aResult.Append(encodedText);
+ }
+ encodedText.Truncate();
+ prevCharset.Truncate();
+ prevEncoding = '\0';
+ }
+ // copy the part before the encoded-word
+ CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
+ begin = p;
+ }
+
+ p += 2;
+
+ // Get charset info
+ charsetStart = p;
+ charsetEnd = 0;
+ for (q = p; *q != '?'; q++) {
+ if (*q <= ' ' || PL_strchr(especials, *q)) {
+ goto badsyntax;
+ }
+
+ // RFC 2231 section 5
+ if (!charsetEnd && *q == '*') {
+ charsetEnd = q;
+ }
+ }
+ if (!charsetEnd) {
+ charsetEnd = q;
+ }
+
+ q++;
+ curEncoding = nsCRT::ToUpper(*q);
+ if (curEncoding != 'Q' && curEncoding != 'B')
+ goto badsyntax;
+
+ if (q[1] != '?')
+ goto badsyntax;
+
+ // loop-wise, keep going until we hit "?=". the inner check handles the
+ // nul terminator should the string terminate before we hit the right
+ // marker. (And the r[1] will never reach beyond the end of the string
+ // because *r != '?' is true if r is the nul character.)
+ for (r = q + 2; *r != '?' || r[1] != '='; r++) {
+ if (*r < ' ') goto badsyntax;
+ }
+ if (r == q + 2) {
+ // it's empty, skip
+ begin = r + 2;
+ isLastEncodedWord = 1;
+ continue;
+ }
+
+ curCharset.Assign(charsetStart, charsetEnd - charsetStart);
+ // Override charset if requested. Never override labeled UTF-8.
+ // Use default charset instead of UNKNOWN-8BIT
+ if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8"))
+ || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))
+ ) {
+ curCharset = aDefaultCharset;
+ }
+
+ const char *R;
+ R = r;
+ if (curEncoding == 'B') {
+ // bug 227290. ignore an extraneous '=' at the end.
+ // (# of characters in B-encoded part has to be a multiple of 4)
+ int32_t n = r - (q + 2);
+ R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
+ }
+ // Bug 493544. Don't decode the encoded text until it ends
+ if (R[-1] != '='
+ && (prevCharset.IsEmpty()
+ || (curCharset == prevCharset && curEncoding == prevEncoding))
+ ) {
+ encodedText.Append(q + 2, R - (q + 2));
+ prevCharset = curCharset;
+ prevEncoding = curEncoding;
+
+ begin = r + 2;
+ isLastEncodedWord = 1;
+ continue;
+ }
+
+ bool bDecoded; // If the current line has been decoded.
+ bDecoded = false;
+ if (!encodedText.IsEmpty()) {
+ if (curCharset == prevCharset && curEncoding == prevEncoding) {
+ encodedText.Append(q + 2, R - (q + 2));
+ bDecoded = true;
+ }
+ rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
+ prevEncoding, prevCharset.get(), aResult);
+ if (NS_FAILED(rv)) {
+ aResult.Append(encodedText);
+ }
+ encodedText.Truncate();
+ prevCharset.Truncate();
+ prevEncoding = '\0';
+ }
+ if (!bDecoded) {
+ rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding,
+ curCharset.get(), aResult);
+ if (NS_FAILED(rv)) {
+ aResult.Append(encodedText);
+ }
+ }
+
+ begin = r + 2;
+ isLastEncodedWord = 1;
+ continue;
+
+ badsyntax:
+ if (!encodedText.IsEmpty()) {
+ rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
+ prevEncoding, prevCharset.get(), aResult);
+ if (NS_FAILED(rv)) {
+ aResult.Append(encodedText);
+ }
+ encodedText.Truncate();
+ prevCharset.Truncate();
+ }
+ // copy the part before the encoded-word
+ aResult.Append(begin, p - begin);
+ begin = p;
+ isLastEncodedWord = 0;
+ }
+
+ if (!encodedText.IsEmpty()) {
+ rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
+ prevEncoding, prevCharset.get(), aResult);
+ if (NS_FAILED(rv)) {
+ aResult.Append(encodedText);
+ }
+ }
+
+ // put the tail back
+ CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
+
+ nsAutoCString tempStr(aResult);
+ tempStr.ReplaceChar('\t', ' ');
+ aResult = tempStr;
+
+ return NS_OK;
+}