dom/security/nsCSPParser.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsCSPParser_h___
#define nsCSPParser_h___

#include "nsCSPUtils.h"
#include "nsIURI.h"
#include "nsString.h"

/**
 * How does the parsing work?
 *
 * We generate tokens by splitting the policy-string by whitespace and semicolon.
 * Interally the tokens are represented as an array of string-arrays:
 *
 *  [
 *    [ name, src, src, src, ... ],
 *    [ name, src, src, src, ... ],
 *    [ name, src, src, src, ... ]
 *  ]
 *
 * for example:
 *  [
 *    [ img-src, http://www.example.com, http:www.test.com ],
 *    [ default-src, 'self'],
 *    [ script-src, 'unsafe-eval', 'unsafe-inline' ],
 *  ]
 *
 * The first element of each array has to be a valid directive-name, otherwise we can
 * ignore the remaining elements of the array. Also, if the
 * directive already exists in the current policy, we can ignore
 * the remaining elements of that array. (http://www.w3.org/TR/CSP/#parsing)
 */

typedef nsTArray< nsTArray<nsString> > cspTokens;

class nsCSPTokenizer {

  public:
    static void tokenizeCSPPolicy(const nsAString &aPolicyString, cspTokens& outTokens);

  private:
    nsCSPTokenizer(const char16_t* aStart, const char16_t* aEnd);
    ~nsCSPTokenizer();

    inline bool atEnd()
    {
      return mCurChar >= mEndChar;
    }

    inline void skipWhiteSpace()
    {
      while (mCurChar < mEndChar &&
             nsContentUtils::IsHTMLWhitespace(*mCurChar)) {
        mCurToken.Append(*mCurChar++);
      }
      mCurToken.Truncate();
    }

    inline void skipWhiteSpaceAndSemicolon()
    {
      while (mCurChar < mEndChar && (*mCurChar == ';' ||
             nsContentUtils::IsHTMLWhitespace(*mCurChar))){
        mCurToken.Append(*mCurChar++);
      }
      mCurToken.Truncate();
    }

    inline bool accept(char16_t aChar)
    {
      NS_ASSERTION(mCurChar < mEndChar, "Trying to dereference mEndChar");
      if (*mCurChar == aChar) {
        mCurToken.Append(*mCurChar++);
        return true;
      }
      return false;
    }

    void generateNextToken();
    void generateTokens(cspTokens& outTokens);

    const char16_t* mCurChar;
    const char16_t* mEndChar;
    nsString        mCurToken;
};


class nsCSPParser {

  public:
    /**
     * The CSP parser only has one publicly accessible function, which is parseContentSecurityPolicy.
     * Internally the input string is separated into string tokens and policy() is called, which starts
     * parsing the policy. The parser calls one function after the other according the the source-list
     * from http://www.w3.org/TR/CSP11/#source-list. E.g., the parser can only call port() after the parser
     * has already processed any possible host in host(), similar to a finite state machine.
     */
    static nsCSPPolicy* parseContentSecurityPolicy(const nsAString &aPolicyString,
                                                   nsIURI *aSelfURI,
                                                   bool aReportOnly,
                                                   nsCSPContext* aCSPContext,
                                                   bool aDeliveredViaMetaTag);

  private:
    nsCSPParser(cspTokens& aTokens,
                nsIURI* aSelfURI,
                nsCSPContext* aCSPContext,
                bool aDeliveredViaMetaTag);

    static bool sCSPExperimentalEnabled;
    static bool sStrictDynamicEnabled;

    ~nsCSPParser();


    // Parsing the CSP using the source-list from http://www.w3.org/TR/CSP11/#source-list
    nsCSPPolicy*        policy();
    void                directive();
    nsCSPDirective*     directiveName();
    void                directiveValue(nsTArray<nsCSPBaseSrc*>& outSrcs);
    void                requireSRIForDirectiveValue(nsRequireSRIForDirective* aDir);
    void                referrerDirectiveValue(nsCSPDirective* aDir);
    void                reportURIList(nsCSPDirective* aDir);
    void                sandboxFlagList(nsCSPDirective* aDir);
    void                sourceList(nsTArray<nsCSPBaseSrc*>& outSrcs);
    nsCSPBaseSrc*       sourceExpression();
    nsCSPSchemeSrc*     schemeSource();
    nsCSPHostSrc*       hostSource();
    nsCSPBaseSrc*       keywordSource();
    nsCSPNonceSrc*      nonceSource();
    nsCSPHashSrc*       hashSource();
    nsCSPHostSrc*       appHost(); // helper function to support app specific hosts
    nsCSPHostSrc*       host();
    bool                hostChar();
    bool                schemeChar();
    bool                port();
    bool                path(nsCSPHostSrc* aCspHost);

    bool subHost();                                         // helper function to parse subDomains
    bool atValidUnreservedChar();                           // helper function to parse unreserved
    bool atValidSubDelimChar();                             // helper function to parse sub-delims
    bool atValidPctEncodedChar();                           // helper function to parse pct-encoded
    bool subPath(nsCSPHostSrc* aCspHost);                   // helper function to parse paths

    inline bool atEnd()
    {
      return mCurChar >= mEndChar;
    }

    inline bool accept(char16_t aSymbol)
    {
      if (atEnd()) { return false; }
      return (*mCurChar == aSymbol) && advance();
    }

    inline bool accept(bool (*aClassifier) (char16_t))
    {
      if (atEnd()) { return false; }
      return (aClassifier(*mCurChar)) && advance();
    }

    inline bool peek(char16_t aSymbol)
    {
      if (atEnd()) { return false; }
      return *mCurChar == aSymbol;
    }

    inline bool peek(bool (*aClassifier) (char16_t))
    {
      if (atEnd()) { return false; }
      return aClassifier(*mCurChar);
    }

    inline bool advance()
    {
      if (atEnd()) { return false; }
      mCurValue.Append(*mCurChar++);
      return true;
    }

    inline void resetCurValue()
    {
      mCurValue.Truncate();
    }

    bool atEndOfPath();
    bool atValidPathChar();

    void resetCurChar(const nsAString& aToken);

    void logWarningErrorToConsole(uint32_t aSeverityFlag,
                                  const char* aProperty,
                                  const char16_t* aParams[],
                                  uint32_t aParamsLength);

/**
 * When parsing the policy, the parser internally uses the following helper
 * variables/members which are used/reset during parsing. The following
 * example explains how they are used.
 * The tokenizer separats all input into arrays of arrays of strings, which
 * are stored in mTokens, for example:
 *   mTokens = [ [ script-src, http://www.example.com, 'self' ], ... ]
 *
 * When parsing starts, mCurdir always holds the currently processed array of strings.
 * In our example:
 *   mCurDir = [ script-src, http://www.example.com, 'self' ]
 *
 * During parsing, we process/consume one string at a time of that array.
 * We set mCurToken to the string we are currently processing; in the first case
 * that would be:
 *   mCurToken = script-src
 * which allows to do simple string comparisons to see if mCurToken is a valid directive.
 *
 * Continuing parsing, the parser consumes the next string of that array, resetting:
 *   mCurToken = "http://www.example.com"
 *                ^                     ^
 *                mCurChar              mEndChar (points *after* the 'm')
 *   mCurValue = ""
 *
 * After calling advance() the first time, helpers would hold the following values:
 *   mCurToken = "http://www.example.com"
 *                 ^                    ^
 *                 mCurChar             mEndChar (points *after* the 'm')
 *  mCurValue = "h"
 *
 * We continue parsing till all strings of one directive are consumed, then we reset
 * mCurDir to hold the next array of strings and start the process all over.
 */

    const char16_t*    mCurChar;
    const char16_t*    mEndChar;
    nsString           mCurValue;
    nsString           mCurToken;
    nsTArray<nsString> mCurDir;

    // helpers to allow invalidation of srcs within script-src and style-src
    // if either 'strict-dynamic' or at least a hash or nonce is present.
    bool               mHasHashOrNonce; // false, if no hash or nonce is defined
    bool               mStrictDynamic;  // false, if 'strict-dynamic' is not defined
    nsCSPKeywordSrc*   mUnsafeInlineKeywordSrc; // null, otherwise invlidate()

    // cache variables for child-src, frame-src and worker-src handling;
    // in CSP 3 child-src is deprecated. For backwards compatibility
    // child-src needs to restrict:
    //   (*) frames, in case frame-src is not expicitly specified
    //   (*) workers, in case worker-src is not expicitly specified
    // If neither worker-src, nor child-src is present, then script-src
    // needs to govern workers.
    nsCSPChildSrcDirective*  mChildSrc;
    nsCSPDirective*          mFrameSrc;
    nsCSPDirective*          mWorkerSrc;
    nsCSPScriptSrcDirective* mScriptSrc;

    // cache variable to let nsCSPHostSrc know that it's within
    // the frame-ancestors directive.
    bool                    mParsingFrameAncestorsDir;

    cspTokens          mTokens;
    nsIURI*            mSelfURI;
    nsCSPPolicy*       mPolicy;
    nsCSPContext*      mCSPContext; // used for console logging
    bool               mDeliveredViaMetaTag;
};

#endif /* nsCSPParser_h___ */