netwerk/base/nsURLHelper.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsURLHelper_h__
#define nsURLHelper_h__

#include "nsString.h"

class nsIFile;
class nsIURLParser;

enum netCoalesceFlags
{
  NET_COALESCE_NORMAL = 0,

  /**
   * retains /../ that reach above dir root (useful for FTP
   * servers in which the root of the FTP URL is not necessarily
   * the root of the FTP filesystem).
   */
  NET_COALESCE_ALLOW_RELATIVE_ROOT = 1<<0,

  /**
   * recognizes /%2F and // as markers for the root directory
   * and handles them properly.
   */
  NET_COALESCE_DOUBLE_SLASH_IS_ROOT = 1<<1
};

//----------------------------------------------------------------------------
// This module contains some private helper functions related to URL parsing.
//----------------------------------------------------------------------------

/* shutdown frees URL parser */
void net_ShutdownURLHelper();
#ifdef XP_MACOSX
void net_ShutdownURLHelperOSX();
#endif

/* access URL parsers */
nsIURLParser * net_GetAuthURLParser();
nsIURLParser * net_GetNoAuthURLParser();
nsIURLParser * net_GetStdURLParser();

/* convert between nsIFile and file:// URL spec 
 * net_GetURLSpecFromFile does an extra stat, so callers should
 * avoid it if possible in favor of net_GetURLSpecFromActualFile
 * and net_GetURLSpecFromDir */
nsresult net_GetURLSpecFromFile(nsIFile *, nsACString &);
nsresult net_GetURLSpecFromDir(nsIFile *, nsACString &);
nsresult net_GetURLSpecFromActualFile(nsIFile *, nsACString &);
nsresult net_GetFileFromURLSpec(const nsACString &, nsIFile **);

/* extract file path components from file:// URL */
nsresult net_ParseFileURL(const nsACString &inURL,
                                      nsACString &outDirectory,
                                      nsACString &outFileBaseName,
                                      nsACString &outFileExtension);

/* handle .. in dirs while resolving URLs (path is UTF-8) */
void net_CoalesceDirs(netCoalesceFlags flags, char* path);

/**
 * Resolves a relative path string containing "." and ".."
 * with respect to a base path (assumed to already be resolved). 
 * For example, resolving "../../foo/./bar/../baz.html" w.r.t.
 * "/a/b/c/d/e/" yields "/a/b/c/foo/baz.html". Attempting to 
 * ascend above the base results in the NS_ERROR_MALFORMED_URI
 * exception. If basePath is null, it treats it as "/".
 *
 * @param relativePath  a relative URI
 * @param basePath      a base URI
 *
 * @return a new string, representing canonical uri
 */
nsresult net_ResolveRelativePath(const nsACString &relativePath,
                                             const nsACString &basePath,
                                             nsACString &result);

/**
 * Check if a URL is absolute
 *
 * @param inURL     URL spec
 * @return true if the given spec represents an absolute URL
 */
bool net_IsAbsoluteURL(const nsACString& inURL);

/**
 * Extract URI-Scheme if possible
 *
 * @param inURI     URI spec
 * @param scheme    scheme copied to this buffer on return (may be null)
 */
nsresult net_ExtractURLScheme(const nsACString &inURI,
                              nsACString &scheme);

/* check that the given scheme conforms to RFC 2396 */
bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);

inline bool net_IsValidScheme(const nsAFlatCString &scheme)
{
    return net_IsValidScheme(scheme.get(), scheme.Length());
}

/**
 * This function strips out all C0 controls and space at the beginning and end
 * of the URL and filters out \r, \n, \t from the middle of the URL.  This makes
 * it safe to call on things like javascript: urls or data: urls, where we may
 * in fact run into whitespace that is not properly encoded.
 *
 * @param input the URL spec we want to filter
 * @param result the out param to write to if filtering happens
 */
void net_FilterURIString(const nsACString& input, nsACString& result);

#if defined(XP_WIN)
/**
 * On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
 * forward-slash.  This function maps any back-slashes to forward-slashes.
 *
 * @param aURL
 *        The URL string to normalize (UTF-8 encoded).  This can be a
 *        relative URL segment.
 * @param aResultBuf 
 *        The resulting string is appended to this string.  If the input URL
 *        is already normalized, then aResultBuf is unchanged.
 *
 * @returns false if aURL is already normalized.  Otherwise, returns true.
 */
bool net_NormalizeFileURL(const nsACString &aURL,
                                        nsCString &aResultBuf);
#endif

/*****************************************************************************
 * generic string routines follow (XXX move to someplace more generic).
 */

/* convert to lower case */
void net_ToLowerCase(char* str, uint32_t length);
void net_ToLowerCase(char* str);

/**
 * returns pointer to first character of |str| in the given set.  if not found,
 * then |end| is returned.  stops prematurely if a null byte is encountered,
 * and returns the address of the null byte.
 */
char * net_FindCharInSet(const char *str, const char *end, const char *set);

/**
 * returns pointer to first character of |str| NOT in the given set.  if all
 * characters are in the given set, then |end| is returned.  if '\0' is not
 * included in |set|, then stops prematurely if a null byte is encountered,
 * and returns the address of the null byte.
 */
char * net_FindCharNotInSet(const char *str, const char *end, const char *set);

/**
 * returns pointer to last character of |str| NOT in the given set.  if all
 * characters are in the given set, then |str - 1| is returned.
 */
char * net_RFindCharNotInSet(const char *str, const char *end, const char *set);

/**
 * Parses a content-type header and returns the content type and
 * charset (if any).  aCharset is not modified if no charset is
 * specified in anywhere in aHeaderStr.  In that case (no charset
 * specified), aHadCharset is set to false.  Otherwise, it's set to
 * true.  Note that aContentCharset can be empty even if aHadCharset
 * is true.
 *
 * This parsing is suitable for HTTP request.  Use net_ParseContentType
 * for parsing this header in HTTP responses.
 */
void net_ParseRequestContentType(const nsACString &aHeaderStr,
                                 nsACString       &aContentType,
                                 nsACString       &aContentCharset,
                                 bool*          aHadCharset);

/**
 * Parses a content-type header and returns the content type and
 * charset (if any).  aCharset is not modified if no charset is
 * specified in anywhere in aHeaderStr.  In that case (no charset
 * specified), aHadCharset is set to false.  Otherwise, it's set to
 * true.  Note that aContentCharset can be empty even if aHadCharset
 * is true.
 */
void net_ParseContentType(const nsACString &aHeaderStr,
                          nsACString       &aContentType,
                          nsACString       &aContentCharset,
                          bool*          aHadCharset);
/**
 * As above, but also returns the start and end indexes for the charset
 * parameter in aHeaderStr.  These are indices for the entire parameter, NOT
 * just the value.  If there is "effectively" no charset parameter (e.g. if an
 * earlier type with one is overridden by a later type without one),
 * *aHadCharset will be true but *aCharsetStart will be set to -1.  Note that
 * it's possible to have aContentCharset empty and *aHadCharset true when
 * *aCharsetStart is nonnegative; this corresponds to charset="".
 */
void net_ParseContentType(const nsACString &aHeaderStr,
                          nsACString       &aContentType,
                          nsACString       &aContentCharset,
                          bool             *aHadCharset,
                          int32_t          *aCharsetStart,
                          int32_t          *aCharsetEnd);

/* inline versions */

/* remember the 64-bit platforms ;-) */
#define NET_MAX_ADDRESS (((char*)0)-1)

inline char *net_FindCharInSet(const char *str, const char *set)
{
    return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
}
inline char *net_FindCharNotInSet(const char *str, const char *set)
{
    return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
}
inline char *net_RFindCharNotInSet(const char *str, const char *set)
{
    return net_RFindCharNotInSet(str, str + strlen(str), set);
}

/**
 * This function returns true if the given hostname does not include any
 * restricted characters.  Otherwise, false is returned.
 */
bool net_IsValidHostName(const nsCSubstring &host);

/**
 * Checks whether the IPv4 address is valid according to RFC 3986 section 3.2.2.
 */
bool net_IsValidIPv4Addr(const char *addr, int32_t addrLen);

/**
 * Checks whether the IPv6 address is valid according to RFC 3986 section 3.2.2.
 */
bool net_IsValidIPv6Addr(const char *addr, int32_t addrLen);


/**
 * Returns the max length of a URL. The default is 1048576 (1 MB).
 * Can be changed by pref "network.standard-url.max-length"
 */
int32_t net_GetURLMaxLength();

#endif // !nsURLHelper_h__