diff options
Diffstat (limited to 'netwerk/base/nsURLParsers.cpp')
-rw-r--r-- | netwerk/base/nsURLParsers.cpp | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/netwerk/base/nsURLParsers.cpp b/netwerk/base/nsURLParsers.cpp new file mode 100644 index 000000000..b75ee0c4d --- /dev/null +++ b/netwerk/base/nsURLParsers.cpp @@ -0,0 +1,702 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <string.h> + +#include "mozilla/RangedPtr.h" + +#include "nsURLParsers.h" +#include "nsURLHelper.h" +#include "nsString.h" +#include "nsCRT.h" + +using namespace mozilla; + +//---------------------------------------------------------------------------- + +static uint32_t +CountConsecutiveSlashes(const char *str, int32_t len) +{ + RangedPtr<const char> p(str, len); + uint32_t count = 0; + while (len-- && *p++ == '/') ++count; + return count; +} + +//---------------------------------------------------------------------------- +// nsBaseURLParser implementation +//---------------------------------------------------------------------------- + +NS_IMPL_ISUPPORTS(nsAuthURLParser, nsIURLParser) +NS_IMPL_ISUPPORTS(nsNoAuthURLParser, nsIURLParser) + +#define SET_RESULT(component, pos, len) \ + PR_BEGIN_MACRO \ + if (component ## Pos) \ + *component ## Pos = uint32_t(pos); \ + if (component ## Len) \ + *component ## Len = int32_t(len); \ + PR_END_MACRO + +#define OFFSET_RESULT(component, offset) \ + PR_BEGIN_MACRO \ + if (component ## Pos) \ + *component ## Pos += offset; \ + PR_END_MACRO + +NS_IMETHODIMP +nsBaseURLParser::ParseURL(const char *spec, int32_t specLen, + uint32_t *schemePos, int32_t *schemeLen, + uint32_t *authorityPos, int32_t *authorityLen, + uint32_t *pathPos, int32_t *pathLen) +{ + if (NS_WARN_IF(!spec)) { + return NS_ERROR_INVALID_POINTER; + } + + if (specLen < 0) + specLen = strlen(spec); + + const char *stop = nullptr; + const char *colon = nullptr; + const char *slash = nullptr; + const char *p = spec; + uint32_t offset = 0; + int32_t len = specLen; + + // skip leading whitespace + while (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') { + spec++; + specLen--; + offset++; + + p++; + len--; + } + + for (; len && *p && !colon && !slash; ++p, --len) { + switch (*p) { + case ':': + if (!colon) + colon = p; + break; + case '/': // start of filepath + case '?': // start of query + case '#': // start of ref + if (!slash) + slash = p; + break; + case '@': // username@hostname + case '[': // start of IPv6 address literal + if (!stop) + stop = p; + break; + } + } + // disregard the first colon if it follows an '@' or a '[' + if (colon && stop && colon > stop) + colon = nullptr; + + // if the spec only contained whitespace ... + if (specLen == 0) { + SET_RESULT(scheme, 0, -1); + SET_RESULT(authority, 0, 0); + SET_RESULT(path, 0, 0); + return NS_OK; + } + + // ignore trailing whitespace and control characters + for (p = spec + specLen - 1; ((unsigned char) *p <= ' ') && (p != spec); --p) + ; + + specLen = p - spec + 1; + + if (colon && (colon < slash || !slash)) { + // + // spec = <scheme>:/<the-rest> + // + // or + // + // spec = <scheme>:<authority> + // spec = <scheme>:<path-no-slashes> + // + if (!net_IsValidScheme(spec, colon - spec) || (*(colon+1) == ':')) { + return NS_ERROR_MALFORMED_URI; + } + SET_RESULT(scheme, offset, colon - spec); + if (authorityLen || pathLen) { + uint32_t schemeLen = colon + 1 - spec; + offset += schemeLen; + ParseAfterScheme(colon + 1, specLen - schemeLen, + authorityPos, authorityLen, + pathPos, pathLen); + OFFSET_RESULT(authority, offset); + OFFSET_RESULT(path, offset); + } + } + else { + // + // spec = <authority-no-port-or-password>/<path> + // spec = <path> + // + // or + // + // spec = <authority-no-port-or-password>/<path-with-colon> + // spec = <path-with-colon> + // + // or + // + // spec = <authority-no-port-or-password> + // spec = <path-no-slashes-or-colon> + // + SET_RESULT(scheme, 0, -1); + if (authorityLen || pathLen) { + ParseAfterScheme(spec, specLen, + authorityPos, authorityLen, + pathPos, pathLen); + OFFSET_RESULT(authority, offset); + OFFSET_RESULT(path, offset); + } + } + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParseAuthority(const char *auth, int32_t authLen, + uint32_t *usernamePos, int32_t *usernameLen, + uint32_t *passwordPos, int32_t *passwordLen, + uint32_t *hostnamePos, int32_t *hostnameLen, + int32_t *port) +{ + if (NS_WARN_IF(!auth)) { + return NS_ERROR_INVALID_POINTER; + } + + if (authLen < 0) + authLen = strlen(auth); + + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + SET_RESULT(hostname, 0, authLen); + if (port) + *port = -1; + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParseUserInfo(const char *userinfo, int32_t userinfoLen, + uint32_t *usernamePos, int32_t *usernameLen, + uint32_t *passwordPos, int32_t *passwordLen) +{ + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParseServerInfo(const char *serverinfo, int32_t serverinfoLen, + uint32_t *hostnamePos, int32_t *hostnameLen, + int32_t *port) +{ + SET_RESULT(hostname, 0, -1); + if (port) + *port = -1; + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParsePath(const char *path, int32_t pathLen, + uint32_t *filepathPos, int32_t *filepathLen, + uint32_t *queryPos, int32_t *queryLen, + uint32_t *refPos, int32_t *refLen) +{ + if (NS_WARN_IF(!path)) { + return NS_ERROR_INVALID_POINTER; + } + + if (pathLen < 0) + pathLen = strlen(path); + + // path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref> + + // XXX PL_strnpbrk would be nice, but it's buggy + + // search for first occurrence of either ? or # + const char *query_beg = 0, *query_end = 0; + const char *ref_beg = 0; + const char *p = 0; + for (p = path; p < path + pathLen; ++p) { + // only match the query string if it precedes the reference fragment + if (!ref_beg && !query_beg && *p == '?') + query_beg = p + 1; + else if (*p == '#') { + ref_beg = p + 1; + if (query_beg) + query_end = p; + break; + } + } + + if (query_beg) { + if (query_end) + SET_RESULT(query, query_beg - path, query_end - query_beg); + else + SET_RESULT(query, query_beg - path, pathLen - (query_beg - path)); + } + else + SET_RESULT(query, 0, -1); + + if (ref_beg) + SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path)); + else + SET_RESULT(ref, 0, -1); + + const char *end; + if (query_beg) + end = query_beg - 1; + else if (ref_beg) + end = ref_beg - 1; + else + end = path + pathLen; + + // an empty file path is no file path + if (end != path) + SET_RESULT(filepath, 0, end - path); + else + SET_RESULT(filepath, 0, -1); + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParseFilePath(const char *filepath, int32_t filepathLen, + uint32_t *directoryPos, int32_t *directoryLen, + uint32_t *basenamePos, int32_t *basenameLen, + uint32_t *extensionPos, int32_t *extensionLen) +{ + if (NS_WARN_IF(!filepath)) { + return NS_ERROR_INVALID_POINTER; + } + + if (filepathLen < 0) + filepathLen = strlen(filepath); + + if (filepathLen == 0) { + SET_RESULT(directory, 0, -1); + SET_RESULT(basename, 0, 0); // assume a zero length file basename + SET_RESULT(extension, 0, -1); + return NS_OK; + } + + const char *p; + const char *end = filepath + filepathLen; + + // search backwards for filename + for (p = end - 1; *p != '/' && p > filepath; --p) + ; + if (*p == '/') { + // catch /.. and /. + if ((p+1 < end && *(p+1) == '.') && + (p+2 == end || (*(p+2) == '.' && p+3 == end))) + p = end - 1; + // filepath = <directory><filename>.<extension> + SET_RESULT(directory, 0, p - filepath + 1); + ParseFileName(p + 1, end - (p + 1), + basenamePos, basenameLen, + extensionPos, extensionLen); + OFFSET_RESULT(basename, p + 1 - filepath); + OFFSET_RESULT(extension, p + 1 - filepath); + } + else { + // filepath = <filename>.<extension> + SET_RESULT(directory, 0, -1); + ParseFileName(filepath, filepathLen, + basenamePos, basenameLen, + extensionPos, extensionLen); + } + return NS_OK; +} + +nsresult +nsBaseURLParser::ParseFileName(const char *filename, int32_t filenameLen, + uint32_t *basenamePos, int32_t *basenameLen, + uint32_t *extensionPos, int32_t *extensionLen) +{ + if (NS_WARN_IF(!filename)) { + return NS_ERROR_INVALID_POINTER; + } + + if (filenameLen < 0) + filenameLen = strlen(filename); + + // no extension if filename ends with a '.' + if (filename[filenameLen-1] != '.') { + // ignore '.' at the beginning + for (const char *p = filename + filenameLen - 1; p > filename; --p) { + if (*p == '.') { + // filename = <basename.extension> + SET_RESULT(basename, 0, p - filename); + SET_RESULT(extension, p + 1 - filename, filenameLen - (p - filename + 1)); + return NS_OK; + } + } + } + // filename = <basename> + SET_RESULT(basename, 0, filenameLen); + SET_RESULT(extension, 0, -1); + return NS_OK; +} + +//---------------------------------------------------------------------------- +// nsNoAuthURLParser implementation +//---------------------------------------------------------------------------- + +NS_IMETHODIMP +nsNoAuthURLParser::ParseAuthority(const char *auth, int32_t authLen, + uint32_t *usernamePos, int32_t *usernameLen, + uint32_t *passwordPos, int32_t *passwordLen, + uint32_t *hostnamePos, int32_t *hostnameLen, + int32_t *port) +{ + NS_NOTREACHED("Shouldn't parse auth in a NoAuthURL!"); + return NS_ERROR_UNEXPECTED; +} + +void +nsNoAuthURLParser::ParseAfterScheme(const char *spec, int32_t specLen, + uint32_t *authPos, int32_t *authLen, + uint32_t *pathPos, int32_t *pathLen) +{ + NS_PRECONDITION(specLen >= 0, "unexpected"); + + // everything is the path + uint32_t pos = 0; + switch (CountConsecutiveSlashes(spec, specLen)) { + case 0: + case 1: + break; + case 2: + { + const char *p = nullptr; + if (specLen > 2) { + // looks like there is an authority section +#if defined(XP_WIN) + // if the authority looks like a drive number then we + // really want to treat it as part of the path + // [a-zA-Z][:|]{/\} + // i.e one of: c: c:\foo c:/foo c| c|\foo c|/foo + if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') && + nsCRT::IsAsciiAlpha(spec[2]) && + ((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) { + pos = 1; + break; + } +#endif + // Ignore apparent authority; path is everything after it + for (p = spec + 2; p < spec + specLen; ++p) { + if (*p == '/' || *p == '?' || *p == '#') + break; + } + } + SET_RESULT(auth, 0, -1); + if (p && p != spec+specLen) + SET_RESULT(path, p - spec, specLen - (p - spec)); + else + SET_RESULT(path, 0, -1); + return; + } + default: + pos = 2; + break; + } + SET_RESULT(auth, pos, 0); + SET_RESULT(path, pos, specLen - pos); +} + +#if defined(XP_WIN) +NS_IMETHODIMP +nsNoAuthURLParser::ParseFilePath(const char *filepath, int32_t filepathLen, + uint32_t *directoryPos, int32_t *directoryLen, + uint32_t *basenamePos, int32_t *basenameLen, + uint32_t *extensionPos, int32_t *extensionLen) +{ + if (NS_WARN_IF(!filepath)) { + return NS_ERROR_INVALID_POINTER; + } + + if (filepathLen < 0) + filepathLen = strlen(filepath); + + // look for a filepath consisting of only a drive number, which may or + // may not have a leading slash. + if (filepathLen > 1 && filepathLen < 4) { + const char *end = filepath + filepathLen; + const char *p = filepath; + if (*p == '/') + p++; + if ((end-p == 2) && (p[1]==':' || p[1]=='|') && nsCRT::IsAsciiAlpha(*p)) { + // filepath = <drive-number>: + SET_RESULT(directory, 0, filepathLen); + SET_RESULT(basename, 0, -1); + SET_RESULT(extension, 0, -1); + return NS_OK; + } + } + + // otherwise fallback on common implementation + return nsBaseURLParser::ParseFilePath(filepath, filepathLen, + directoryPos, directoryLen, + basenamePos, basenameLen, + extensionPos, extensionLen); +} +#endif + +//---------------------------------------------------------------------------- +// nsAuthURLParser implementation +//---------------------------------------------------------------------------- + +NS_IMETHODIMP +nsAuthURLParser::ParseAuthority(const char *auth, int32_t authLen, + uint32_t *usernamePos, int32_t *usernameLen, + uint32_t *passwordPos, int32_t *passwordLen, + uint32_t *hostnamePos, int32_t *hostnameLen, + int32_t *port) +{ + nsresult rv; + + if (NS_WARN_IF(!auth)) { + return NS_ERROR_INVALID_POINTER; + } + + if (authLen < 0) + authLen = strlen(auth); + + if (authLen == 0) { + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + SET_RESULT(hostname, 0, 0); + if (port) + *port = -1; + return NS_OK; + } + + // search backwards for @ + const char *p = auth + authLen - 1; + for (; (*p != '@') && (p > auth); --p) { + continue; + } + if ( *p == '@' ) { + // auth = <user-info@server-info> + rv = ParseUserInfo(auth, p - auth, + usernamePos, usernameLen, + passwordPos, passwordLen); + if (NS_FAILED(rv)) return rv; + rv = ParseServerInfo(p + 1, authLen - (p - auth + 1), + hostnamePos, hostnameLen, + port); + if (NS_FAILED(rv)) return rv; + OFFSET_RESULT(hostname, p + 1 - auth); + + // malformed if has a username or password + // but no host info, such as: http://u:p@/ + if ((usernamePos || passwordPos) && (!hostnamePos || !*hostnameLen)) { + return NS_ERROR_MALFORMED_URI; + } + } + else { + // auth = <server-info> + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + rv = ParseServerInfo(auth, authLen, + hostnamePos, hostnameLen, + port); + if (NS_FAILED(rv)) return rv; + } + return NS_OK; +} + +NS_IMETHODIMP +nsAuthURLParser::ParseUserInfo(const char *userinfo, int32_t userinfoLen, + uint32_t *usernamePos, int32_t *usernameLen, + uint32_t *passwordPos, int32_t *passwordLen) +{ + if (NS_WARN_IF(!userinfo)) { + return NS_ERROR_INVALID_POINTER; + } + + if (userinfoLen < 0) + userinfoLen = strlen(userinfo); + + if (userinfoLen == 0) { + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + return NS_OK; + } + + const char *p = (const char *) memchr(userinfo, ':', userinfoLen); + if (p) { + // userinfo = <username:password> + if (p == userinfo) { + // must have a username! + return NS_ERROR_MALFORMED_URI; + } + SET_RESULT(username, 0, p - userinfo); + SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1)); + } + else { + // userinfo = <username> + SET_RESULT(username, 0, userinfoLen); + SET_RESULT(password, 0, -1); + } + return NS_OK; +} + +NS_IMETHODIMP +nsAuthURLParser::ParseServerInfo(const char *serverinfo, int32_t serverinfoLen, + uint32_t *hostnamePos, int32_t *hostnameLen, + int32_t *port) +{ + if (NS_WARN_IF(!serverinfo)) { + return NS_ERROR_INVALID_POINTER; + } + + if (serverinfoLen < 0) + serverinfoLen = strlen(serverinfo); + + if (serverinfoLen == 0) { + SET_RESULT(hostname, 0, 0); + if (port) + *port = -1; + return NS_OK; + } + + // search backwards for a ':' but stop on ']' (IPv6 address literal + // delimiter). check for illegal characters in the hostname. + const char *p = serverinfo + serverinfoLen - 1; + const char *colon = nullptr, *bracket = nullptr; + for (; p > serverinfo; --p) { + switch (*p) { + case ']': + bracket = p; + break; + case ':': + if (bracket == nullptr) + colon = p; + break; + case ' ': + // hostname must not contain a space + return NS_ERROR_MALFORMED_URI; + } + } + + if (colon) { + // serverinfo = <hostname:port> + SET_RESULT(hostname, 0, colon - serverinfo); + if (port) { + // XXX unfortunately ToInteger is not defined for substrings + nsAutoCString buf(colon+1, serverinfoLen - (colon + 1 - serverinfo)); + if (buf.Length() == 0) { + *port = -1; + } + else { + const char* nondigit = NS_strspnp("0123456789", buf.get()); + if (nondigit && *nondigit) + return NS_ERROR_MALFORMED_URI; + + nsresult err; + *port = buf.ToInteger(&err); + if (NS_FAILED(err) || *port < 0 || *port > std::numeric_limits<uint16_t>::max()) + return NS_ERROR_MALFORMED_URI; + } + } + } + else { + // serverinfo = <hostname> + SET_RESULT(hostname, 0, serverinfoLen); + if (port) + *port = -1; + } + + // In case of IPv6 address check its validity + if (*hostnameLen > 1 && *(serverinfo + *hostnamePos) == '[' && + *(serverinfo + *hostnamePos + *hostnameLen - 1) == ']' && + !net_IsValidIPv6Addr(serverinfo + *hostnamePos + 1, *hostnameLen - 2)) + return NS_ERROR_MALFORMED_URI; + + return NS_OK; +} + +void +nsAuthURLParser::ParseAfterScheme(const char *spec, int32_t specLen, + uint32_t *authPos, int32_t *authLen, + uint32_t *pathPos, int32_t *pathLen) +{ + NS_PRECONDITION(specLen >= 0, "unexpected"); + + uint32_t nslash = CountConsecutiveSlashes(spec, specLen); + + // search for the end of the authority section + const char *end = spec + specLen; + const char *p; + for (p = spec + nslash; p < end; ++p) { + if (*p == '/' || *p == '?' || *p == '#') + break; + } + if (p < end) { + // spec = [/]<auth><path> + SET_RESULT(auth, nslash, p - (spec + nslash)); + SET_RESULT(path, p - spec, specLen - (p - spec)); + } + else { + // spec = [/]<auth> + SET_RESULT(auth, nslash, specLen - nslash); + SET_RESULT(path, 0, -1); + } +} + +//---------------------------------------------------------------------------- +// nsStdURLParser implementation +//---------------------------------------------------------------------------- + +void +nsStdURLParser::ParseAfterScheme(const char *spec, int32_t specLen, + uint32_t *authPos, int32_t *authLen, + uint32_t *pathPos, int32_t *pathLen) +{ + NS_PRECONDITION(specLen >= 0, "unexpected"); + + uint32_t nslash = CountConsecutiveSlashes(spec, specLen); + + // search for the end of the authority section + const char *end = spec + specLen; + const char *p; + for (p = spec + nslash; p < end; ++p) { + if (strchr("/?#;", *p)) + break; + } + switch (nslash) { + case 0: + case 2: + if (p < end) { + // spec = (//)<auth><path> + SET_RESULT(auth, nslash, p - (spec + nslash)); + SET_RESULT(path, p - spec, specLen - (p - spec)); + } + else { + // spec = (//)<auth> + SET_RESULT(auth, nslash, specLen - nslash); + SET_RESULT(path, 0, -1); + } + break; + case 1: + // spec = /<path> + SET_RESULT(auth, 0, -1); + SET_RESULT(path, 0, specLen); + break; + default: + // spec = ///[/]<path> + SET_RESULT(auth, 2, 0); + SET_RESULT(path, 2, specLen - 2); + } +} |