1 files changed, 633 insertions, 0 deletions
diff --git a/xpcom/io/nsEscape.cpp b/xpcom/io/nsEscape.cpp
new file mode 100644
index 000000000..f16edc4ce
--- /dev/null
+++ b/xpcom/io/nsEscape.cpp
@@ -0,0 +1,633 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsEscape.h"
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/BinarySearch.h"
+#include "nsTArray.h"
+#include "nsCRT.h"
+#include "plstr.h"
+
+static const char hexCharsUpper[] = "0123456789ABCDEF";
+static const char hexCharsUpperLower[] = "0123456789ABCDEFabcdef";
+
+static const int netCharType[256] =
+/*  Bit 0       xalpha      -- the alphas
+**  Bit 1       xpalpha     -- as xalpha but
+**                             converts spaces to plus and plus to %2B
+**  Bit 3 ...   path        -- as xalphas but doesn't escape '/'
+*/
+  /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
+  {  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,   /* 0x */
+     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,   /* 1x */
+     0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4,   /* 2x   !"#$%&'()*+,-./  */
+     7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0,   /* 3x  0123456789:;<=>?  */
+     0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,   /* 4x  @ABCDEFGHIJKLMNO  */
+     /* bits for '@' changed from 7 to 0 so '@' can be escaped   */
+     /* in usernames and passwords in publishing.                */
+     7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,   /* 5X  PQRSTUVWXYZ[\]^_  */
+     0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,   /* 6x  `abcdefghijklmno  */
+     7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,   /* 7X  pqrstuvwxyz{\}~  DEL */
+     0, };
+
+/* decode % escaped hex codes into character values
+ */
+#define UNHEX(C) \
+    ((C >= '0' && C <= '9') ? C - '0' : \
+     ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
+     ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
+
+
+#define IS_OK(C) (netCharType[((unsigned int)(C))] & (aFlags))
+#define HEX_ESCAPE '%'
+
+static const uint32_t ENCODE_MAX_LEN = 6; // %uABCD
+
+static uint32_t
+AppendPercentHex(char* aBuffer, unsigned char aChar)
+{
+  uint32_t i = 0;
+  aBuffer[i++] = '%';
+  aBuffer[i++] = hexCharsUpper[aChar >> 4]; // high nibble
+  aBuffer[i++] = hexCharsUpper[aChar & 0xF]; // low nibble
+  return i;
+}
+
+static uint32_t
+AppendPercentHex(char16_t* aBuffer, char16_t aChar)
+{
+  uint32_t i = 0;
+  aBuffer[i++] = '%';
+  if (aChar & 0xff00) {
+    aBuffer[i++] = 'u';
+    aBuffer[i++] = hexCharsUpper[aChar >> 12]; // high-byte high nibble
+    aBuffer[i++] = hexCharsUpper[(aChar >> 8) & 0xF]; // high-byte low nibble
+  }
+  aBuffer[i++] = hexCharsUpper[(aChar >> 4) & 0xF]; // low-byte high nibble
+  aBuffer[i++] = hexCharsUpper[aChar & 0xF]; // low-byte low nibble
+  return i;
+}
+
+//----------------------------------------------------------------------------------------
+char*
+nsEscape(const char* aStr, size_t aLength, size_t* aOutputLength,
+         nsEscapeMask aFlags)
+//----------------------------------------------------------------------------------------
+{
+  if (!aStr) {
+    return nullptr;
+  }
+
+  size_t charsToEscape = 0;
+
+  const unsigned char* src = (const unsigned char*)aStr;
+  for (size_t i = 0; i < aLength; ++i) {
+    if (!IS_OK(src[i])) {
+      charsToEscape++;
+    }
+  }
+
+  // calculate how much memory should be allocated
+  // original length + 2 bytes for each escaped character + terminating '\0'
+  // do the sum in steps to check for overflow
+  size_t dstSize = aLength + 1 + charsToEscape;
+  if (dstSize <= aLength) {
+    return nullptr;
+  }
+  dstSize += charsToEscape;
+  if (dstSize < aLength) {
+    return nullptr;
+  }
+
+  // fail if we need more than 4GB
+  if (dstSize > UINT32_MAX) {
+    return nullptr;
+  }
+
+  char* result = (char*)moz_xmalloc(dstSize);
+  if (!result) {
+    return nullptr;
+  }
+
+  unsigned char* dst = (unsigned char*)result;
+  src = (const unsigned char*)aStr;
+  if (aFlags == url_XPAlphas) {
+    for (size_t i = 0; i < aLength; ++i) {
+      unsigned char c = *src++;
+      if (IS_OK(c)) {
+        *dst++ = c;
+      } else if (c == ' ') {
+        *dst++ = '+';  /* convert spaces to pluses */
+      } else {
+        *dst++ = HEX_ESCAPE;
+        *dst++ = hexCharsUpper[c >> 4];  /* high nibble */
+        *dst++ = hexCharsUpper[c & 0x0f];  /* low nibble */
+      }
+    }
+  } else {
+    for (size_t i = 0; i < aLength; ++i) {
+      unsigned char c = *src++;
+      if (IS_OK(c)) {
+        *dst++ = c;
+      } else {
+        *dst++ = HEX_ESCAPE;
+        *dst++ = hexCharsUpper[c >> 4];  /* high nibble */
+        *dst++ = hexCharsUpper[c & 0x0f];  /* low nibble */
+      }
+    }
+  }
+
+  *dst = '\0';     /* tack on eos */
+  if (aOutputLength) {
+    *aOutputLength = dst - (unsigned char*)result;
+  }
+
+  return result;
+}
+
+//----------------------------------------------------------------------------------------
+char*
+nsUnescape(char* aStr)
+//----------------------------------------------------------------------------------------
+{
+  nsUnescapeCount(aStr);
+  return aStr;
+}
+
+//----------------------------------------------------------------------------------------
+int32_t
+nsUnescapeCount(char* aStr)
+//----------------------------------------------------------------------------------------
+{
+  char* src = aStr;
+  char* dst = aStr;
+
+  char c1[] = " ";
+  char c2[] = " ";
+  char* const pc1 = c1;
+  char* const pc2 = c2;
+
+  if (!*src) {
+    // A null string was passed in.  Nothing to escape.
+    // Returns early as the string might not actually be mutable with
+    // length 0.
+    return 0;
+  }
+
+  while (*src) {
+    c1[0] = *(src + 1);
+    if (*(src + 1) == '\0') {
+      c2[0] = '\0';
+    } else {
+      c2[0] = *(src + 2);
+    }
+
+    if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexCharsUpperLower) == 0 ||
+        PL_strpbrk(pc2, hexCharsUpperLower) == 0) {
+      *dst++ = *src++;
+    } else {
+      src++; /* walk over escape */
+      if (*src) {
+        *dst = UNHEX(*src) << 4;
+        src++;
+      }
+      if (*src) {
+        *dst = (*dst + UNHEX(*src));
+        src++;
+      }
+      dst++;
+    }
+  }
+
+  *dst = 0;
+  return (int)(dst - aStr);
+
+} /* NET_UnEscapeCnt */
+
+
+char*
+nsEscapeHTML(const char* aString)
+{
+  char* rv = nullptr;
+  /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
+  uint32_t len = strlen(aString);
+  if (len >= (UINT32_MAX / 6)) {
+    return nullptr;
+  }
+
+  rv = (char*)moz_xmalloc((6 * len) + 1);
+  char* ptr = rv;
+
+  if (rv) {
+    for (; *aString != '\0'; ++aString) {
+      if (*aString == '<') {
+        *ptr++ = '&';
+        *ptr++ = 'l';
+        *ptr++ = 't';
+        *ptr++ = ';';
+      } else if (*aString == '>') {
+        *ptr++ = '&';
+        *ptr++ = 'g';
+        *ptr++ = 't';
+        *ptr++ = ';';
+      } else if (*aString == '&') {
+        *ptr++ = '&';
+        *ptr++ = 'a';
+        *ptr++ = 'm';
+        *ptr++ = 'p';
+        *ptr++ = ';';
+      } else if (*aString == '"') {
+        *ptr++ = '&';
+        *ptr++ = 'q';
+        *ptr++ = 'u';
+        *ptr++ = 'o';
+        *ptr++ = 't';
+        *ptr++ = ';';
+      } else if (*aString == '\'') {
+        *ptr++ = '&';
+        *ptr++ = '#';
+        *ptr++ = '3';
+        *ptr++ = '9';
+        *ptr++ = ';';
+      } else {
+        *ptr++ = *aString;
+      }
+    }
+    *ptr = '\0';
+  }
+
+  return rv;
+}
+
+char16_t*
+nsEscapeHTML2(const char16_t* aSourceBuffer, int32_t aSourceBufferLen)
+{
+  // Calculate the length, if the caller didn't.
+  if (aSourceBufferLen < 0) {
+    aSourceBufferLen = NS_strlen(aSourceBuffer);
+  }
+
+  /* XXX Hardcoded max entity len. */
+  if (uint32_t(aSourceBufferLen) >=
+      ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t)))) {
+    return nullptr;
+  }
+
+  char16_t* resultBuffer = (char16_t*)moz_xmalloc(
+    aSourceBufferLen * 6 * sizeof(char16_t) + sizeof(char16_t('\0')));
+  char16_t* ptr = resultBuffer;
+
+  if (resultBuffer) {
+    int32_t i;
+
+    for (i = 0; i < aSourceBufferLen; ++i) {
+      if (aSourceBuffer[i] == '<') {
+        *ptr++ = '&';
+        *ptr++ = 'l';
+        *ptr++ = 't';
+        *ptr++ = ';';
+      } else if (aSourceBuffer[i] == '>') {
+        *ptr++ = '&';
+        *ptr++ = 'g';
+        *ptr++ = 't';
+        *ptr++ = ';';
+      } else if (aSourceBuffer[i] == '&') {
+        *ptr++ = '&';
+        *ptr++ = 'a';
+        *ptr++ = 'm';
+        *ptr++ = 'p';
+        *ptr++ = ';';
+      } else if (aSourceBuffer[i] == '"') {
+        *ptr++ = '&';
+        *ptr++ = 'q';
+        *ptr++ = 'u';
+        *ptr++ = 'o';
+        *ptr++ = 't';
+        *ptr++ = ';';
+      } else if (aSourceBuffer[i] == '\'') {
+        *ptr++ = '&';
+        *ptr++ = '#';
+        *ptr++ = '3';
+        *ptr++ = '9';
+        *ptr++ = ';';
+      } else {
+        *ptr++ = aSourceBuffer[i];
+      }
+    }
+    *ptr = 0;
+  }
+
+  return resultBuffer;
+}
+
+//----------------------------------------------------------------------------------------
+//
+// The following table encodes which characters needs to be escaped for which
+// parts of an URL.  The bits are the "url components" in the enum EscapeMask,
+// see nsEscape.h.
+//
+// esc_Scheme        =     1
+// esc_Username      =     2
+// esc_Password      =     4
+// esc_Host          =     8
+// esc_Directory     =    16
+// esc_FileBaseName  =    32
+// esc_FileExtension =    64
+// esc_Param         =   128
+// esc_Query         =   256
+// esc_Ref           =   512
+
+static const uint32_t EscapeChars[256] =
+//   0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
+{
+     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  // 0x
+     0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  // 1x
+     0,1023,   0, 512,1023,   0,1023, 112,1023,1023,1023,1023,1023,1023, 953, 784,  // 2x   !"#$%&'()*+,-./
+  1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008,   0,1008,   0, 768,  // 3x  0123456789:;<=>?
+  1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,  // 4x  @ABCDEFGHIJKLMNO
+  1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008, 896,1008, 896,1023,  // 5x  PQRSTUVWXYZ[\]^_
+   384,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,  // 6x  `abcdefghijklmno
+  1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023,   0,  // 7x  pqrstuvwxyz{|}~ DEL
+     0                                                                              // 80 to FF are zero
+};
+
+static uint16_t dontNeedEscape(unsigned char aChar, uint32_t aFlags)
+{
+  return EscapeChars[(uint32_t)aChar] & aFlags;
+}
+static uint16_t dontNeedEscape(uint16_t aChar, uint32_t aFlags)
+{
+  return aChar < mozilla::ArrayLength(EscapeChars) ?
+    (EscapeChars[(uint32_t)aChar]  & aFlags) : 0;
+}
+
+//----------------------------------------------------------------------------------------
+
+/**
+ * Templated helper for URL escaping a portion of a string.
+ *
+ * @param aPart The pointer to the beginning of the portion of the string to
+ *  escape.
+ * @param aPartLen The length of the string to escape.
+ * @param aFlags Flags used to configure escaping. @see EscapeMask
+ * @param aResult String that has the URL escaped portion appended to. Only
+ *  altered if the string is URL escaped or |esc_AlwaysCopy| is specified.
+ * @param aDidAppend Indicates whether or not data was appended to |aResult|.
+ * @return NS_ERROR_INVALID_ARG, NS_ERROR_OUT_OF_MEMORY on failure.
+ */
+template<class T>
+static nsresult
+T_EscapeURL(const typename T::char_type* aPart, size_t aPartLen,
+            uint32_t aFlags, T& aResult, bool& aDidAppend)
+{
+  typedef nsCharTraits<typename T::char_type> traits;
+  typedef typename traits::unsigned_char_type unsigned_char_type;
+  static_assert(sizeof(*aPart) == 1 || sizeof(*aPart) == 2,
+                "unexpected char type");
+
+  if (!aPart) {
+    NS_NOTREACHED("null pointer");
+    return NS_ERROR_INVALID_ARG;
+  }
+
+  bool forced = !!(aFlags & esc_Forced);
+  bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII);
+  bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII);
+  bool writing = !!(aFlags & esc_AlwaysCopy);
+  bool colon = !!(aFlags & esc_Colon);
+
+  auto src = reinterpret_cast<const unsigned_char_type*>(aPart);
+
+  typename T::char_type tempBuffer[100];
+  unsigned int tempBufferPos = 0;
+
+  bool previousIsNonASCII = false;
+  for (size_t i = 0; i < aPartLen; ++i) {
+    unsigned_char_type c = *src++;
+
+    // if the char has not to be escaped or whatever follows % is
+    // a valid escaped string, just copy the char.
+    //
+    // Also the % will not be escaped until forced
+    // See bugzilla bug 61269 for details why we changed this
+    //
+    // And, we will not escape non-ascii characters if requested.
+    // On special request we will also escape the colon even when
+    // not covered by the matrix.
+    // ignoreAscii is not honored for control characters (C0 and DEL)
+    //
+    // And, we should escape the '|' character when it occurs after any
+    // non-ASCII character as it may be aPart of a multi-byte character.
+    //
+    // 0x20..0x7e are the valid ASCII characters. We also escape spaces
+    // (0x20) since they are not legal in URLs.
+    if ((dontNeedEscape(c, aFlags) || (c == HEX_ESCAPE && !forced)
+         || (c > 0x7f && ignoreNonAscii)
+         || (c > 0x20 && c < 0x7f && ignoreAscii))
+        && !(c == ':' && colon)
+        && !(previousIsNonASCII && c == '|' && !ignoreNonAscii)) {
+      if (writing) {
+        tempBuffer[tempBufferPos++] = c;
+      }
+    } else { /* do the escape magic */
+      if (!writing) {
+        if (!aResult.Append(aPart, i, fallible)) {
+          return NS_ERROR_OUT_OF_MEMORY;
+        }
+        writing = true;
+      }
+      uint32_t len = ::AppendPercentHex(tempBuffer + tempBufferPos, c);
+      tempBufferPos += len;
+      MOZ_ASSERT(len <= ENCODE_MAX_LEN, "potential buffer overflow");
+    }
+
+    // Flush the temp buffer if it doesnt't have room for another encoded char.
+    if (tempBufferPos >= mozilla::ArrayLength(tempBuffer) - ENCODE_MAX_LEN) {
+      NS_ASSERTION(writing, "should be writing");
+      if (!aResult.Append(tempBuffer, tempBufferPos, fallible)) {
+        return NS_ERROR_OUT_OF_MEMORY;
+      }
+      tempBufferPos = 0;
+    }
+
+    previousIsNonASCII = (c > 0x7f);
+  }
+  if (writing) {
+    if (!aResult.Append(tempBuffer, tempBufferPos, fallible)) {
+      return NS_ERROR_OUT_OF_MEMORY;
+    }
+  }
+  aDidAppend = writing;
+  return NS_OK;
+}
+
+bool
+NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
+             nsACString& aResult)
+{
+  if (aPartLen < 0) {
+    aPartLen = strlen(aPart);
+  }
+
+  bool result = false;
+  nsresult rv = T_EscapeURL(aPart, aPartLen, aFlags, aResult, result);
+  if (NS_FAILED(rv)) {
+    ::NS_ABORT_OOM(aResult.Length() * sizeof(nsACString::char_type));
+  }
+
+  return result;
+}
+
+nsresult
+NS_EscapeURL(const nsCSubstring& aStr, uint32_t aFlags, nsCSubstring& aResult,
+             const mozilla::fallible_t&)
+{
+  bool appended = false;
+  nsresult rv = T_EscapeURL(aStr.Data(), aStr.Length(), aFlags, aResult, appended);
+  if (NS_FAILED(rv)) {
+    aResult.Truncate();
+    return rv;
+  }
+
+  if (!appended) {
+    aResult = aStr;
+  }
+
+  return rv;
+}
+
+const nsSubstring&
+NS_EscapeURL(const nsSubstring& aStr, uint32_t aFlags, nsSubstring& aResult)
+{
+  bool result = false;
+  nsresult rv = T_EscapeURL<nsSubstring>(aStr.Data(), aStr.Length(), aFlags, aResult, result);
+
+  if (NS_FAILED(rv)) {
+    ::NS_ABORT_OOM(aResult.Length() * sizeof(nsSubstring::char_type));
+  }
+
+  if (result) {
+    return aResult;
+  }
+  return aStr;
+}
+
+// Starting at aStr[aStart] find the first index in aStr that matches any
+// character in aForbidden. Return false if not found.
+static bool
+FindFirstMatchFrom(const nsAFlatString& aStr, size_t aStart,
+                   const nsTArray<char16_t>& aForbidden, size_t* aIndex)
+{
+  const size_t len = aForbidden.Length();
+  for (size_t j = aStart, l = aStr.Length(); j < l; ++j) {
+    size_t unused;
+    if (mozilla::BinarySearch(aForbidden, 0, len, aStr[j], &unused)) {
+      *aIndex = j;
+      return true;
+    }
+  }
+  return false;
+}
+
+const nsSubstring&
+NS_EscapeURL(const nsAFlatString& aStr, const nsTArray<char16_t>& aForbidden,
+             nsSubstring& aResult)
+{
+  bool didEscape = false;
+  for (size_t i = 0, strLen = aStr.Length(); i < strLen; ) {
+    size_t j;
+    if (MOZ_UNLIKELY(FindFirstMatchFrom(aStr, i, aForbidden, &j))) {
+      if (i == 0) {
+        didEscape = true;
+        aResult.Truncate();
+        aResult.SetCapacity(aStr.Length());
+      }
+      if (j != i) {
+        // The substring from 'i' up to 'j' that needs no escaping.
+        aResult.Append(nsDependentSubstring(aStr, i, j - i));
+      }
+      char16_t buffer[ENCODE_MAX_LEN];
+      uint32_t bufferLen = ::AppendPercentHex(buffer, aStr[j]);
+      MOZ_ASSERT(bufferLen <= ENCODE_MAX_LEN, "buffer overflow");
+      aResult.Append(buffer, bufferLen);
+      i = j + 1;
+    } else {
+      if (MOZ_UNLIKELY(didEscape)) {
+        // The tail of the string that needs no escaping.
+        aResult.Append(nsDependentSubstring(aStr, i, strLen - i));
+      }
+      break;
+    }
+  }
+  if (MOZ_UNLIKELY(didEscape)) {
+    return aResult;
+  }
+  return aStr;
+}
+
+#define ISHEX(c) memchr(hexCharsUpperLower, c, sizeof(hexCharsUpperLower)-1)
+
+bool
+NS_UnescapeURL(const char* aStr, int32_t aLen, uint32_t aFlags,
+               nsACString& aResult)
+{
+  if (!aStr) {
+    NS_NOTREACHED("null pointer");
+    return false;
+  }
+
+  MOZ_ASSERT(aResult.IsEmpty(),
+             "Passing a non-empty string as an out parameter!");
+
+  if (aLen < 0) {
+    aLen = strlen(aStr);
+  }
+
+  bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII);
+  bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII);
+  bool writing = !!(aFlags & esc_AlwaysCopy);
+  bool skipControl = !!(aFlags & esc_SkipControl);
+  bool skipInvalidHostChar = !!(aFlags & esc_Host);
+
+  if (writing) {
+    aResult.SetCapacity(aLen);
+  }
+
+  const char* last = aStr;
+  const char* p = aStr;
+
+  for (int i = 0; i < aLen; ++i, ++p) {
+    if (*p == HEX_ESCAPE && i < aLen - 2) {
+      unsigned char c1 = *((unsigned char*)p + 1);
+      unsigned char c2 = *((unsigned char*)p + 2);
+      unsigned char u = (UNHEX(c1) << 4) + UNHEX(c2);
+      if (ISHEX(c1) && ISHEX(c2) &&
+          (!skipInvalidHostChar || dontNeedEscape(u, aFlags) || c1 >= '8') &&
+          ((c1 < '8' && !ignoreAscii) || (c1 >= '8' && !ignoreNonAscii)) &&
+          !(skipControl &&
+            (c1 < '2' || (c1 == '7' && (c2 == 'f' || c2 == 'F'))))) {
+        if (!writing) {
+          writing = true;
+          aResult.SetCapacity(aLen);
+        }
+        if (p > last) {
+          aResult.Append(last, p - last);
+          last = p;
+        }
+        aResult.Append(u);
+        i += 2;
+        p += 2;
+        last += 3;
+      }
+    }
+  }
+  if (writing && last < aStr + aLen) {
+    aResult.Append(last, aStr + aLen - last);
+  }
+
+  return writing;
+}