summaryrefslogtreecommitdiffstats
path: root/netwerk/base/nsURLHelper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'netwerk/base/nsURLHelper.cpp')
-rw-r--r--netwerk/base/nsURLHelper.cpp1168
1 files changed, 1168 insertions, 0 deletions
diff --git a/netwerk/base/nsURLHelper.cpp b/netwerk/base/nsURLHelper.cpp
new file mode 100644
index 000000000..8def697da
--- /dev/null
+++ b/netwerk/base/nsURLHelper.cpp
@@ -0,0 +1,1168 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* vim:set ts=4 sw=4 sts=4 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/RangedPtr.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "nsURLHelper.h"
+#include "nsIFile.h"
+#include "nsIURLParser.h"
+#include "nsCOMPtr.h"
+#include "nsCRT.h"
+#include "nsNetCID.h"
+#include "mozilla/Preferences.h"
+#include "prnetdb.h"
+#include "mozilla/Tokenizer.h"
+
+using namespace mozilla;
+
+//----------------------------------------------------------------------------
+// Init/Shutdown
+//----------------------------------------------------------------------------
+
+static bool gInitialized = false;
+static nsIURLParser *gNoAuthURLParser = nullptr;
+static nsIURLParser *gAuthURLParser = nullptr;
+static nsIURLParser *gStdURLParser = nullptr;
+static int32_t gMaxLength = 1048576; // Default: 1MB
+
+static void
+InitGlobals()
+{
+ nsCOMPtr<nsIURLParser> parser;
+
+ parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
+ NS_ASSERTION(parser, "failed getting 'noauth' url parser");
+ if (parser) {
+ gNoAuthURLParser = parser.get();
+ NS_ADDREF(gNoAuthURLParser);
+ }
+
+ parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
+ NS_ASSERTION(parser, "failed getting 'auth' url parser");
+ if (parser) {
+ gAuthURLParser = parser.get();
+ NS_ADDREF(gAuthURLParser);
+ }
+
+ parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
+ NS_ASSERTION(parser, "failed getting 'std' url parser");
+ if (parser) {
+ gStdURLParser = parser.get();
+ NS_ADDREF(gStdURLParser);
+ }
+
+ gInitialized = true;
+ Preferences::AddIntVarCache(&gMaxLength,
+ "network.standard-url.max-length", 1048576);
+}
+
+void
+net_ShutdownURLHelper()
+{
+ if (gInitialized) {
+ NS_IF_RELEASE(gNoAuthURLParser);
+ NS_IF_RELEASE(gAuthURLParser);
+ NS_IF_RELEASE(gStdURLParser);
+ gInitialized = false;
+ }
+}
+
+int32_t net_GetURLMaxLength()
+{
+ return gMaxLength;
+}
+
+//----------------------------------------------------------------------------
+// nsIURLParser getters
+//----------------------------------------------------------------------------
+
+nsIURLParser *
+net_GetAuthURLParser()
+{
+ if (!gInitialized)
+ InitGlobals();
+ return gAuthURLParser;
+}
+
+nsIURLParser *
+net_GetNoAuthURLParser()
+{
+ if (!gInitialized)
+ InitGlobals();
+ return gNoAuthURLParser;
+}
+
+nsIURLParser *
+net_GetStdURLParser()
+{
+ if (!gInitialized)
+ InitGlobals();
+ return gStdURLParser;
+}
+
+//---------------------------------------------------------------------------
+// GetFileFromURLSpec implementations
+//---------------------------------------------------------------------------
+nsresult
+net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result)
+{
+ nsAutoCString escPath;
+ nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
+ if (NS_FAILED(rv))
+ return rv;
+
+ if (escPath.Last() != '/') {
+ escPath += '/';
+ }
+
+ result = escPath;
+ return NS_OK;
+}
+
+nsresult
+net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result)
+{
+ nsAutoCString escPath;
+ nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
+ if (NS_FAILED(rv))
+ return rv;
+
+ // if this file references a directory, then we need to ensure that the
+ // URL ends with a slash. this is important since it affects the rules
+ // for relative URL resolution when this URL is used as a base URL.
+ // if the file does not exist, then we make no assumption about its type,
+ // and simply leave the URL unmodified.
+ if (escPath.Last() != '/') {
+ bool dir;
+ rv = aFile->IsDirectory(&dir);
+ if (NS_SUCCEEDED(rv) && dir)
+ escPath += '/';
+ }
+
+ result = escPath;
+ return NS_OK;
+}
+
+//----------------------------------------------------------------------------
+// file:// URL parsing
+//----------------------------------------------------------------------------
+
+nsresult
+net_ParseFileURL(const nsACString &inURL,
+ nsACString &outDirectory,
+ nsACString &outFileBaseName,
+ nsACString &outFileExtension)
+{
+ nsresult rv;
+
+ if (inURL.Length() > (uint32_t) gMaxLength) {
+ return NS_ERROR_MALFORMED_URI;
+ }
+
+ outDirectory.Truncate();
+ outFileBaseName.Truncate();
+ outFileExtension.Truncate();
+
+ const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
+ const char *url = flatURL.get();
+
+ nsAutoCString scheme;
+ rv = net_ExtractURLScheme(flatURL, scheme);
+ if (NS_FAILED(rv)) return rv;
+
+ if (!scheme.EqualsLiteral("file")) {
+ NS_ERROR("must be a file:// url");
+ return NS_ERROR_UNEXPECTED;
+ }
+
+ nsIURLParser *parser = net_GetNoAuthURLParser();
+ NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
+
+ uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
+ int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
+
+ // invoke the parser to extract the URL path
+ rv = parser->ParseURL(url, flatURL.Length(),
+ nullptr, nullptr, // don't care about scheme
+ nullptr, nullptr, // don't care about authority
+ &pathPos, &pathLen);
+ if (NS_FAILED(rv)) return rv;
+
+ // invoke the parser to extract filepath from the path
+ rv = parser->ParsePath(url + pathPos, pathLen,
+ &filepathPos, &filepathLen,
+ nullptr, nullptr, // don't care about query
+ nullptr, nullptr); // don't care about ref
+ if (NS_FAILED(rv)) return rv;
+
+ filepathPos += pathPos;
+
+ // invoke the parser to extract the directory and filename from filepath
+ rv = parser->ParseFilePath(url + filepathPos, filepathLen,
+ &directoryPos, &directoryLen,
+ &basenamePos, &basenameLen,
+ &extensionPos, &extensionLen);
+ if (NS_FAILED(rv)) return rv;
+
+ if (directoryLen > 0)
+ outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
+ if (basenameLen > 0)
+ outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
+ if (extensionLen > 0)
+ outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
+ // since we are using a no-auth url parser, there will never be a host
+ // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
+
+ return NS_OK;
+}
+
+//----------------------------------------------------------------------------
+// path manipulation functions
+//----------------------------------------------------------------------------
+
+// Replace all /./ with a / while resolving URLs
+// But only till #?
+void
+net_CoalesceDirs(netCoalesceFlags flags, char* path)
+{
+ /* Stolen from the old netlib's mkparse.c.
+ *
+ * modifies a url of the form /foo/../foo1 -> /foo1
+ * and /foo/./foo1 -> /foo/foo1
+ * and /foo/foo1/.. -> /foo/
+ */
+ char *fwdPtr = path;
+ char *urlPtr = path;
+ char *lastslash = path;
+ uint32_t traversal = 0;
+ uint32_t special_ftp_len = 0;
+
+ /* Remember if this url is a special ftp one: */
+ if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT)
+ {
+ /* some schemes (for example ftp) have the speciality that
+ the path can begin // or /%2F to mark the root of the
+ servers filesystem, a simple / only marks the root relative
+ to the user loging in. We remember the length of the marker */
+ if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
+ special_ftp_len = 4;
+ else if (nsCRT::strncmp(path,"//",2) == 0 )
+ special_ftp_len = 2;
+ }
+
+ /* find the last slash before # or ? */
+ for(; (*fwdPtr != '\0') &&
+ (*fwdPtr != '?') &&
+ (*fwdPtr != '#'); ++fwdPtr)
+ {
+ }
+
+ /* found nothing, but go back one only */
+ /* if there is something to go back to */
+ if (fwdPtr != path && *fwdPtr == '\0')
+ {
+ --fwdPtr;
+ }
+
+ /* search the slash */
+ for(; (fwdPtr != path) &&
+ (*fwdPtr != '/'); --fwdPtr)
+ {
+ }
+ lastslash = fwdPtr;
+ fwdPtr = path;
+
+ /* replace all %2E or %2e with . in the path */
+ /* but stop at lastchar if non null */
+ for(; (*fwdPtr != '\0') &&
+ (*fwdPtr != '?') &&
+ (*fwdPtr != '#') &&
+ (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
+ {
+ if (*fwdPtr == '%' && *(fwdPtr+1) == '2' &&
+ (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
+ {
+ *urlPtr++ = '.';
+ ++fwdPtr;
+ ++fwdPtr;
+ }
+ else
+ {
+ *urlPtr++ = *fwdPtr;
+ }
+ }
+ // Copy remaining stuff past the #?;
+ for (; *fwdPtr != '\0'; ++fwdPtr)
+ {
+ *urlPtr++ = *fwdPtr;
+ }
+ *urlPtr = '\0'; // terminate the url
+
+ // start again, this time for real
+ fwdPtr = path;
+ urlPtr = path;
+
+ for(; (*fwdPtr != '\0') &&
+ (*fwdPtr != '?') &&
+ (*fwdPtr != '#'); ++fwdPtr)
+ {
+ if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
+ {
+ // remove . followed by slash
+ ++fwdPtr;
+ }
+ else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' &&
+ (*(fwdPtr+3) == '/' ||
+ *(fwdPtr+3) == '\0' || // This will take care of
+ *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag
+ *(fwdPtr+3) == '#'))
+ {
+ // remove foo/..
+ // reverse the urlPtr to the previous slash if possible
+ // if url does not allow relative root then drop .. above root
+ // otherwise retain them in the path
+ if(traversal > 0 || !(flags &
+ NET_COALESCE_ALLOW_RELATIVE_ROOT))
+ {
+ if (urlPtr != path)
+ urlPtr--; // we must be going back at least by one
+ for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
+ ; // null body
+ --traversal; // count back
+ // forward the fwdPtr past the ../
+ fwdPtr += 2;
+ // if we have reached the beginning of the path
+ // while searching for the previous / and we remember
+ // that it is an url that begins with /%2F then
+ // advance urlPtr again by 3 chars because /%2F already
+ // marks the root of the path
+ if (urlPtr == path && special_ftp_len > 3)
+ {
+ ++urlPtr;
+ ++urlPtr;
+ ++urlPtr;
+ }
+ // special case if we have reached the end
+ // to preserve the last /
+ if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
+ ++urlPtr;
+ }
+ else
+ {
+ // there are to much /.. in this path, just copy them instead.
+ // forward the urlPtr past the /.. and copying it
+
+ // However if we remember it is an url that starts with
+ // /%2F and urlPtr just points at the "F" of "/%2F" then do
+ // not overwrite it with the /, just copy .. and move forward
+ // urlPtr.
+ if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
+ ++urlPtr;
+ else
+ *urlPtr++ = *fwdPtr;
+ ++fwdPtr;
+ *urlPtr++ = *fwdPtr;
+ ++fwdPtr;
+ *urlPtr++ = *fwdPtr;
+ }
+ }
+ else
+ {
+ // count the hierachie, but only if we do not have reached
+ // the root of some special urls with a special root marker
+ if (*fwdPtr == '/' && *(fwdPtr+1) != '.' &&
+ (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
+ traversal++;
+ // copy the url incrementaly
+ *urlPtr++ = *fwdPtr;
+ }
+ }
+
+ /*
+ * Now lets remove trailing . case
+ * /foo/foo1/. -> /foo/foo1/
+ */
+
+ if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
+ urlPtr--;
+
+ // Copy remaining stuff past the #?;
+ for (; *fwdPtr != '\0'; ++fwdPtr)
+ {
+ *urlPtr++ = *fwdPtr;
+ }
+ *urlPtr = '\0'; // terminate the url
+}
+
+nsresult
+net_ResolveRelativePath(const nsACString &relativePath,
+ const nsACString &basePath,
+ nsACString &result)
+{
+ nsAutoCString name;
+ nsAutoCString path(basePath);
+ bool needsDelim = false;
+
+ if ( !path.IsEmpty() ) {
+ char16_t last = path.Last();
+ needsDelim = !(last == '/');
+ }
+
+ nsACString::const_iterator beg, end;
+ relativePath.BeginReading(beg);
+ relativePath.EndReading(end);
+
+ bool stop = false;
+ char c;
+ for (; !stop; ++beg) {
+ c = (beg == end) ? '\0' : *beg;
+ //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
+ switch (c) {
+ case '\0':
+ case '#':
+ case '?':
+ stop = true;
+ MOZ_FALLTHROUGH;
+ case '/':
+ // delimiter found
+ if (name.EqualsLiteral("..")) {
+ // pop path
+ // If we already have the delim at end, then
+ // skip over that when searching for next one to the left
+ int32_t offset = path.Length() - (needsDelim ? 1 : 2);
+ // First check for errors
+ if (offset < 0 )
+ return NS_ERROR_MALFORMED_URI;
+ int32_t pos = path.RFind("/", false, offset);
+ if (pos >= 0)
+ path.Truncate(pos + 1);
+ else
+ path.Truncate();
+ }
+ else if (name.IsEmpty() || name.EqualsLiteral(".")) {
+ // do nothing
+ }
+ else {
+ // append name to path
+ if (needsDelim)
+ path += '/';
+ path += name;
+ needsDelim = true;
+ }
+ name.Truncate();
+ break;
+
+ default:
+ // append char to name
+ name += c;
+ }
+ }
+ // append anything left on relativePath (e.g. #..., ;..., ?...)
+ if (c != '\0')
+ path += Substring(--beg, end);
+
+ result = path;
+ return NS_OK;
+}
+
+//----------------------------------------------------------------------------
+// scheme fu
+//----------------------------------------------------------------------------
+
+static bool isAsciiAlpha(char c) {
+ return nsCRT::IsAsciiAlpha(c);
+}
+
+static bool
+net_IsValidSchemeChar(const char aChar)
+{
+ if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
+ aChar == '+' || aChar == '.' || aChar == '-') {
+ return true;
+ }
+ return false;
+}
+
+/* Extract URI-Scheme if possible */
+nsresult
+net_ExtractURLScheme(const nsACString &inURI,
+ nsACString& scheme)
+{
+ nsACString::const_iterator start, end;
+ inURI.BeginReading(start);
+ inURI.EndReading(end);
+
+ // Strip C0 and space from begining
+ while (start != end) {
+ if ((uint8_t) *start > 0x20) {
+ break;
+ }
+ start++;
+ }
+
+ Tokenizer p(Substring(start, end), "\r\n\t");
+ p.Record();
+ if (!p.CheckChar(isAsciiAlpha)) {
+ // First char must be alpha
+ return NS_ERROR_MALFORMED_URI;
+ }
+
+ while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+ // Skip valid scheme characters or \r\n\t
+ }
+
+ if (!p.CheckChar(':')) {
+ return NS_ERROR_MALFORMED_URI;
+ }
+
+ p.Claim(scheme);
+ scheme.StripChars("\r\n\t");
+ return NS_OK;
+}
+
+bool
+net_IsValidScheme(const char *scheme, uint32_t schemeLen)
+{
+ // first char must be alpha
+ if (!nsCRT::IsAsciiAlpha(*scheme))
+ return false;
+
+ // nsCStrings may have embedded nulls -- reject those too
+ for (; schemeLen; ++scheme, --schemeLen) {
+ if (!(nsCRT::IsAsciiAlpha(*scheme) ||
+ nsCRT::IsAsciiDigit(*scheme) ||
+ *scheme == '+' ||
+ *scheme == '.' ||
+ *scheme == '-'))
+ return false;
+ }
+
+ return true;
+}
+
+bool
+net_IsAbsoluteURL(const nsACString& uri)
+{
+ nsACString::const_iterator start, end;
+ uri.BeginReading(start);
+ uri.EndReading(end);
+
+ // Strip C0 and space from begining
+ while (start != end) {
+ if ((uint8_t) *start > 0x20) {
+ break;
+ }
+ start++;
+ }
+
+ Tokenizer p(Substring(start, end), "\r\n\t");
+
+ // First char must be alpha
+ if (!p.CheckChar(isAsciiAlpha)) {
+ return false;
+ }
+
+ while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+ // Skip valid scheme characters or \r\n\t
+ }
+ if (!p.CheckChar(':')) {
+ return false;
+ }
+ p.SkipWhites();
+
+ if (!p.CheckChar('/')) {
+ return false;
+ }
+ p.SkipWhites();
+
+ if (p.CheckChar('/')) {
+ // aSpec is really absolute. Ignore aBaseURI in this case
+ return true;
+ }
+ return false;
+}
+
+void
+net_FilterURIString(const nsACString& input, nsACString& result)
+{
+ const char kCharsToStrip[] = "\r\n\t";
+
+ result.Truncate();
+
+ auto start = input.BeginReading();
+ auto end = input.EndReading();
+
+ // Trim off leading and trailing invalid chars.
+ auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
+ auto newStart = std::find_if(start, end, charFilter);
+ auto newEnd = std::find_if(
+ std::reverse_iterator<decltype(end)>(end),
+ std::reverse_iterator<decltype(newStart)>(newStart),
+ charFilter).base();
+
+ // Check if chars need to be stripped.
+ auto itr = std::find_first_of(
+ newStart, newEnd, std::begin(kCharsToStrip), std::end(kCharsToStrip));
+ const bool needsStrip = itr != newEnd;
+
+ // Just use the passed in string rather than creating new copies if no
+ // changes are necessary.
+ if (newStart == start && newEnd == end && !needsStrip) {
+ result = input;
+ return;
+ }
+
+ result.Assign(Substring(newStart, newEnd));
+ if (needsStrip) {
+ result.StripChars(kCharsToStrip);
+ }
+}
+
+
+#if defined(XP_WIN)
+bool
+net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
+{
+ bool writing = false;
+
+ nsACString::const_iterator beginIter, endIter;
+ aURL.BeginReading(beginIter);
+ aURL.EndReading(endIter);
+
+ const char *s, *begin = beginIter.get();
+
+ for (s = begin; s != endIter.get(); ++s)
+ {
+ if (*s == '\\')
+ {
+ writing = true;
+ if (s > begin)
+ aResultBuf.Append(begin, s - begin);
+ aResultBuf += '/';
+ begin = s + 1;
+ }
+ }
+ if (writing && s > begin)
+ aResultBuf.Append(begin, s - begin);
+
+ return writing;
+}
+#endif
+
+//----------------------------------------------------------------------------
+// miscellaneous (i.e., stuff that should really be elsewhere)
+//----------------------------------------------------------------------------
+
+static inline
+void ToLower(char &c)
+{
+ if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
+ c += 'a' - 'A';
+}
+
+void
+net_ToLowerCase(char *str, uint32_t length)
+{
+ for (char *end = str + length; str < end; ++str)
+ ToLower(*str);
+}
+
+void
+net_ToLowerCase(char *str)
+{
+ for (; *str; ++str)
+ ToLower(*str);
+}
+
+char *
+net_FindCharInSet(const char *iter, const char *stop, const char *set)
+{
+ for (; iter != stop && *iter; ++iter) {
+ for (const char *s = set; *s; ++s) {
+ if (*iter == *s)
+ return (char *) iter;
+ }
+ }
+ return (char *) iter;
+}
+
+char *
+net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
+{
+repeat:
+ for (const char *s = set; *s; ++s) {
+ if (*iter == *s) {
+ if (++iter == stop)
+ break;
+ goto repeat;
+ }
+ }
+ return (char *) iter;
+}
+
+char *
+net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
+{
+ --iter;
+ --stop;
+
+ if (iter == stop)
+ return (char *) iter;
+
+repeat:
+ for (const char *s = set; *s; ++s) {
+ if (*iter == *s) {
+ if (--iter == stop)
+ break;
+ goto repeat;
+ }
+ }
+ return (char *) iter;
+}
+
+#define HTTP_LWS " \t"
+
+// Return the index of the closing quote of the string, if any
+static uint32_t
+net_FindStringEnd(const nsCString& flatStr,
+ uint32_t stringStart,
+ char stringDelim)
+{
+ NS_ASSERTION(stringStart < flatStr.Length() &&
+ flatStr.CharAt(stringStart) == stringDelim &&
+ (stringDelim == '"' || stringDelim == '\''),
+ "Invalid stringStart");
+
+ const char set[] = { stringDelim, '\\', '\0' };
+ do {
+ // stringStart points to either the start quote or the last
+ // escaped char (the char following a '\\')
+
+ // Write to searchStart here, so that when we get back to the
+ // top of the loop right outside this one we search from the
+ // right place.
+ uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
+ if (stringEnd == uint32_t(kNotFound))
+ return flatStr.Length();
+
+ if (flatStr.CharAt(stringEnd) == '\\') {
+ // Hit a backslash-escaped char. Need to skip over it.
+ stringStart = stringEnd + 1;
+ if (stringStart == flatStr.Length())
+ return stringStart;
+
+ // Go back to looking for the next escape or the string end
+ continue;
+ }
+
+ return stringEnd;
+
+ } while (true);
+
+ NS_NOTREACHED("How did we get here?");
+ return flatStr.Length();
+}
+
+
+static uint32_t
+net_FindMediaDelimiter(const nsCString& flatStr,
+ uint32_t searchStart,
+ char delimiter)
+{
+ do {
+ // searchStart points to the spot from which we should start looking
+ // for the delimiter.
+ const char delimStr[] = { delimiter, '"', '\0' };
+ uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
+ if (curDelimPos == uint32_t(kNotFound))
+ return flatStr.Length();
+
+ char ch = flatStr.CharAt(curDelimPos);
+ if (ch == delimiter) {
+ // Found delimiter
+ return curDelimPos;
+ }
+
+ // We hit the start of a quoted string. Look for its end.
+ searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
+ if (searchStart == flatStr.Length())
+ return searchStart;
+
+ ++searchStart;
+
+ // searchStart now points to the first char after the end of the
+ // string, so just go back to the top of the loop and look for
+ // |delimiter| again.
+ } while (true);
+
+ NS_NOTREACHED("How did we get here?");
+ return flatStr.Length();
+}
+
+// aOffset should be added to aCharsetStart and aCharsetEnd if this
+// function sets them.
+static void
+net_ParseMediaType(const nsACString &aMediaTypeStr,
+ nsACString &aContentType,
+ nsACString &aContentCharset,
+ int32_t aOffset,
+ bool *aHadCharset,
+ int32_t *aCharsetStart,
+ int32_t *aCharsetEnd,
+ bool aStrict)
+{
+ const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
+ const char* start = flatStr.get();
+ const char* end = start + flatStr.Length();
+
+ // Trim LWS leading and trailing whitespace from type. We include '(' in
+ // the trailing trim set to catch media-type comments, which are not at all
+ // standard, but may occur in rare cases.
+ const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
+ const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
+
+ const char* charset = "";
+ const char* charsetEnd = charset;
+ int32_t charsetParamStart = 0;
+ int32_t charsetParamEnd = 0;
+
+ uint32_t consumed = typeEnd - type;
+
+ // Iterate over parameters
+ bool typeHasCharset = false;
+ uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
+ if (paramStart != uint32_t(kNotFound)) {
+ // We have parameters. Iterate over them.
+ uint32_t curParamStart = paramStart + 1;
+ do {
+ uint32_t curParamEnd =
+ net_FindMediaDelimiter(flatStr, curParamStart, ';');
+
+ const char* paramName = net_FindCharNotInSet(start + curParamStart,
+ start + curParamEnd,
+ HTTP_LWS);
+ static const char charsetStr[] = "charset=";
+ if (PL_strncasecmp(paramName, charsetStr,
+ sizeof(charsetStr) - 1) == 0) {
+ charset = paramName + sizeof(charsetStr) - 1;
+ charsetEnd = start + curParamEnd;
+ typeHasCharset = true;
+ charsetParamStart = curParamStart - 1;
+ charsetParamEnd = curParamEnd;
+ }
+
+ consumed = curParamEnd;
+ curParamStart = curParamEnd + 1;
+ } while (curParamStart < flatStr.Length());
+ }
+
+ bool charsetNeedsQuotedStringUnescaping = false;
+ if (typeHasCharset) {
+ // Trim LWS leading and trailing whitespace from charset. We include
+ // '(' in the trailing trim set to catch media-type comments, which are
+ // not at all standard, but may occur in rare cases.
+ charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
+ if (*charset == '"') {
+ charsetNeedsQuotedStringUnescaping = true;
+ charsetEnd =
+ start + net_FindStringEnd(flatStr, charset - start, *charset);
+ charset++;
+ NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
+ } else {
+ charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
+ }
+ }
+
+ // if the server sent "*/*", it is meaningless, so do not store it.
+ // also, if type is the same as aContentType, then just update the
+ // charset. however, if charset is empty and aContentType hasn't
+ // changed, then don't wipe-out an existing aContentCharset. We
+ // also want to reject a mime-type if it does not include a slash.
+ // some servers give junk after the charset parameter, which may
+ // include a comma, so this check makes us a bit more tolerant.
+
+ if (type != typeEnd &&
+ memchr(type, '/', typeEnd - type) != nullptr &&
+ (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end) :
+ (strncmp(type, "*/*", typeEnd - type) != 0))) {
+ // Common case here is that aContentType is empty
+ bool eq = !aContentType.IsEmpty() &&
+ aContentType.Equals(Substring(type, typeEnd),
+ nsCaseInsensitiveCStringComparator());
+ if (!eq) {
+ aContentType.Assign(type, typeEnd - type);
+ ToLowerCase(aContentType);
+ }
+
+ if ((!eq && *aHadCharset) || typeHasCharset) {
+ *aHadCharset = true;
+ if (charsetNeedsQuotedStringUnescaping) {
+ // parameters using the "quoted-string" syntax need
+ // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
+ aContentCharset.Truncate();
+ for (const char *c = charset; c != charsetEnd; c++) {
+ if (*c == '\\' && c + 1 != charsetEnd) {
+ // eat escape
+ c++;
+ }
+ aContentCharset.Append(*c);
+ }
+ }
+ else {
+ aContentCharset.Assign(charset, charsetEnd - charset);
+ }
+ if (typeHasCharset) {
+ *aCharsetStart = charsetParamStart + aOffset;
+ *aCharsetEnd = charsetParamEnd + aOffset;
+ }
+ }
+ // Only set a new charset position if this is a different type
+ // from the last one we had and it doesn't already have a
+ // charset param. If this is the same type, we probably want
+ // to leave the charset position on its first occurrence.
+ if (!eq && !typeHasCharset) {
+ int32_t charsetStart = int32_t(paramStart);
+ if (charsetStart == kNotFound)
+ charsetStart = flatStr.Length();
+
+ *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
+ }
+ }
+}
+
+#undef HTTP_LWS
+
+void
+net_ParseContentType(const nsACString &aHeaderStr,
+ nsACString &aContentType,
+ nsACString &aContentCharset,
+ bool *aHadCharset)
+{
+ int32_t dummy1, dummy2;
+ net_ParseContentType(aHeaderStr, aContentType, aContentCharset,
+ aHadCharset, &dummy1, &dummy2);
+}
+
+void
+net_ParseContentType(const nsACString &aHeaderStr,
+ nsACString &aContentType,
+ nsACString &aContentCharset,
+ bool *aHadCharset,
+ int32_t *aCharsetStart,
+ int32_t *aCharsetEnd)
+{
+ //
+ // Augmented BNF (from RFC 2616 section 3.7):
+ //
+ // header-value = media-type *( LWS "," LWS media-type )
+ // media-type = type "/" subtype *( LWS ";" LWS parameter )
+ // type = token
+ // subtype = token
+ // parameter = attribute "=" value
+ // attribute = token
+ // value = token | quoted-string
+ //
+ //
+ // Examples:
+ //
+ // text/html
+ // text/html, text/html
+ // text/html,text/html; charset=ISO-8859-1
+ // text/html,text/html; charset="ISO-8859-1"
+ // text/html;charset=ISO-8859-1, text/html
+ // text/html;charset='ISO-8859-1', text/html
+ // application/octet-stream
+ //
+
+ *aHadCharset = false;
+ const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
+
+ // iterate over media-types. Note that ',' characters can happen
+ // inside quoted strings, so we need to watch out for that.
+ uint32_t curTypeStart = 0;
+ do {
+ // curTypeStart points to the start of the current media-type. We want
+ // to look for its end.
+ uint32_t curTypeEnd =
+ net_FindMediaDelimiter(flatStr, curTypeStart, ',');
+
+ // At this point curTypeEnd points to the spot where the media-type
+ // starting at curTypeEnd ends. Time to parse that!
+ net_ParseMediaType(Substring(flatStr, curTypeStart,
+ curTypeEnd - curTypeStart),
+ aContentType, aContentCharset, curTypeStart,
+ aHadCharset, aCharsetStart, aCharsetEnd, false);
+
+ // And let's move on to the next media-type
+ curTypeStart = curTypeEnd + 1;
+ } while (curTypeStart < flatStr.Length());
+}
+
+void
+net_ParseRequestContentType(const nsACString &aHeaderStr,
+ nsACString &aContentType,
+ nsACString &aContentCharset,
+ bool *aHadCharset)
+{
+ //
+ // Augmented BNF (from RFC 7231 section 3.1.1.1):
+ //
+ // media-type = type "/" subtype *( OWS ";" OWS parameter )
+ // type = token
+ // subtype = token
+ // parameter = token "=" ( token / quoted-string )
+ //
+ // Examples:
+ //
+ // text/html
+ // text/html; charset=ISO-8859-1
+ // text/html; charset="ISO-8859-1"
+ // application/octet-stream
+ //
+
+ aContentType.Truncate();
+ aContentCharset.Truncate();
+ *aHadCharset = false;
+ const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
+
+ // At this point curTypeEnd points to the spot where the media-type
+ // starting at curTypeEnd ends. Time to parse that!
+ nsAutoCString contentType, contentCharset;
+ bool hadCharset = false;
+ int32_t dummy1, dummy2;
+ uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ',');
+ if (typeEnd != flatStr.Length()) {
+ // We have some stuff left at the end, so this is not a valid
+ // request Content-Type header.
+ return;
+ }
+ net_ParseMediaType(flatStr, contentType, contentCharset, 0,
+ &hadCharset, &dummy1, &dummy2, true);
+
+ aContentType = contentType;
+ aContentCharset = contentCharset;
+ *aHadCharset = hadCharset;
+}
+
+bool
+net_IsValidHostName(const nsCSubstring &host)
+{
+ const char *end = host.EndReading();
+ // Use explicit whitelists to select which characters we are
+ // willing to send to lower-level DNS logic. This is more
+ // self-documenting, and can also be slightly faster than the
+ // blacklist approach, since DNS names are the common case, and
+ // the commonest characters will tend to be near the start of
+ // the list.
+
+ // Whitelist for DNS names (RFC 1035) with extra characters added
+ // for pragmatic reasons "$+_"
+ // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
+ if (net_FindCharNotInSet(host.BeginReading(), end,
+ "abcdefghijklmnopqrstuvwxyz"
+ ".-0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)
+ return true;
+
+ // Might be a valid IPv6 link-local address containing a percent sign
+ nsAutoCString strhost(host);
+ PRNetAddr addr;
+ return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;
+}
+
+bool
+net_IsValidIPv4Addr(const char *addr, int32_t addrLen)
+{
+ RangedPtr<const char> p(addr, addrLen);
+
+ int32_t octet = -1; // means no digit yet
+ int32_t dotCount = 0; // number of dots in the address
+
+ for (; addrLen; ++p, --addrLen) {
+ if (*p == '.') {
+ dotCount++;
+ if (octet == -1) {
+ // invalid octet
+ return false;
+ }
+ octet = -1;
+ } else if (*p >= '0' && *p <='9') {
+ if (octet == 0) {
+ // leading 0 is not allowed
+ return false;
+ } else if (octet == -1) {
+ octet = *p - '0';
+ } else {
+ octet *= 10;
+ octet += *p - '0';
+ if (octet > 255)
+ return false;
+ }
+ } else {
+ // invalid character
+ return false;
+ }
+ }
+
+ return (dotCount == 3 && octet != -1);
+}
+
+bool
+net_IsValidIPv6Addr(const char *addr, int32_t addrLen)
+{
+ RangedPtr<const char> p(addr, addrLen);
+
+ int32_t digits = 0; // number of digits in current block
+ int32_t colons = 0; // number of colons in a row during parsing
+ int32_t blocks = 0; // number of hexadecimal blocks
+ bool haveZeros = false; // true if double colon is present in the address
+
+ for (; addrLen; ++p, --addrLen) {
+ if (*p == ':') {
+ if (colons == 0) {
+ if (digits != 0) {
+ digits = 0;
+ blocks++;
+ }
+ } else if (colons == 1) {
+ if (haveZeros)
+ return false; // only one occurrence is allowed
+ haveZeros = true;
+ } else {
+ // too many colons in a row
+ return false;
+ }
+ colons++;
+ } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') ||
+ (*p >= 'A' && *p <= 'F')) {
+ if (colons == 1 && blocks == 0) // starts with a single colon
+ return false;
+ if (digits == 4) // too many digits
+ return false;
+ colons = 0;
+ digits++;
+ } else if (*p == '.') {
+ // check valid IPv4 from the beginning of the last block
+ if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits))
+ return false;
+ return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6);
+ } else {
+ // invalid character
+ return false;
+ }
+ }
+
+ if (colons == 1) // ends with a single colon
+ return false;
+
+ if (digits) // there is a block at the end
+ blocks++;
+
+ return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8);
+}