/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef nsCSPParser_h___ #define nsCSPParser_h___ #include "nsCSPContext.h" #include "nsCSPUtils.h" #include "nsIURI.h" #include "nsString.h" /** * How does the parsing work? * * We generate tokens by splitting the policy-string by whitespace and semicolon. * Interally the tokens are represented as an array of string-arrays: * * [ * [ name, src, src, src, ... ], * [ name, src, src, src, ... ], * [ name, src, src, src, ... ] * ] * * for example: * [ * [ img-src, http://www.example.com, http:www.test.com ], * [ default-src, 'self'], * [ script-src, 'unsafe-eval', 'unsafe-inline' ], * ] * * The first element of each array has to be a valid directive-name, otherwise we can * ignore the remaining elements of the array. Also, if the * directive already exists in the current policy, we can ignore * the remaining elements of that array. (http://www.w3.org/TR/CSP/#parsing) */ typedef nsTArray< nsTArray<nsString> > cspTokens; class nsCSPTokenizer { public: static void tokenizeCSPPolicy(const nsAString &aPolicyString, cspTokens& outTokens); private: nsCSPTokenizer(const char16_t* aStart, const char16_t* aEnd); ~nsCSPTokenizer(); inline bool atEnd() { return mCurChar >= mEndChar; } inline void skipWhiteSpace() { while (mCurChar < mEndChar && nsContentUtils::IsHTMLWhitespace(*mCurChar)) { mCurToken.Append(*mCurChar++); } mCurToken.Truncate(); } inline void skipWhiteSpaceAndSemicolon() { while (mCurChar < mEndChar && (*mCurChar == ';' || nsContentUtils::IsHTMLWhitespace(*mCurChar))){ mCurToken.Append(*mCurChar++); } mCurToken.Truncate(); } inline bool accept(char16_t aChar) { NS_ASSERTION(mCurChar < mEndChar, "Trying to dereference mEndChar"); if (*mCurChar == aChar) { mCurToken.Append(*mCurChar++); return true; } return false; } void generateNextToken(); void generateTokens(cspTokens& outTokens); const char16_t* mCurChar; const char16_t* mEndChar; nsString mCurToken; }; class nsCSPParser { public: /** * The CSP parser only has one publicly accessible function, which is parseContentSecurityPolicy. * Internally the input string is separated into string tokens and policy() is called, which starts * parsing the policy. The parser calls one function after the other according the the source-list * from http://www.w3.org/TR/CSP11/#source-list. E.g., the parser can only call port() after the parser * has already processed any possible host in host(), similar to a finite state machine. */ static nsCSPPolicy* parseContentSecurityPolicy(const nsAString &aPolicyString, nsIURI *aSelfURI, bool aReportOnly, nsCSPContext* aCSPContext, bool aDeliveredViaMetaTag); private: nsCSPParser(cspTokens& aTokens, nsIURI* aSelfURI, nsCSPContext* aCSPContext, bool aDeliveredViaMetaTag); static bool sCSPExperimentalEnabled; static bool sStrictDynamicEnabled; ~nsCSPParser(); // Parsing the CSP using the source-list from http://www.w3.org/TR/CSP11/#source-list nsCSPPolicy* policy(); void directive(); nsCSPDirective* directiveName(); void directiveValue(nsTArray<nsCSPBaseSrc*>& outSrcs); void requireSRIForDirectiveValue(nsRequireSRIForDirective* aDir); void referrerDirectiveValue(nsCSPDirective* aDir); void reportURIList(nsCSPDirective* aDir); void sandboxFlagList(nsCSPDirective* aDir); void sourceList(nsTArray<nsCSPBaseSrc*>& outSrcs); nsCSPBaseSrc* sourceExpression(); nsCSPSchemeSrc* schemeSource(); nsCSPHostSrc* hostSource(); nsCSPBaseSrc* keywordSource(); nsCSPNonceSrc* nonceSource(); nsCSPHashSrc* hashSource(); nsCSPHostSrc* appHost(); // helper function to support app specific hosts nsCSPHostSrc* host(); bool hostChar(); bool schemeChar(); bool port(); bool path(nsCSPHostSrc* aCspHost); bool subHost(); // helper function to parse subDomains bool atValidUnreservedChar(); // helper function to parse unreserved bool atValidSubDelimChar(); // helper function to parse sub-delims bool atValidPctEncodedChar(); // helper function to parse pct-encoded bool subPath(nsCSPHostSrc* aCspHost); // helper function to parse paths inline bool atEnd() { return mCurChar >= mEndChar; } inline bool accept(char16_t aSymbol) { if (atEnd()) { return false; } return (*mCurChar == aSymbol) && advance(); } inline bool accept(bool (*aClassifier) (char16_t)) { if (atEnd()) { return false; } return (aClassifier(*mCurChar)) && advance(); } inline bool peek(char16_t aSymbol) { if (atEnd()) { return false; } return *mCurChar == aSymbol; } inline bool peek(bool (*aClassifier) (char16_t)) { if (atEnd()) { return false; } return aClassifier(*mCurChar); } inline bool advance() { if (atEnd()) { return false; } mCurValue.Append(*mCurChar++); return true; } inline void resetCurValue() { mCurValue.Truncate(); } bool atEndOfPath(); bool atValidPathChar(); void resetCurChar(const nsAString& aToken); void logWarningErrorToConsole(uint32_t aSeverityFlag, const char* aProperty, const char16_t* aParams[], uint32_t aParamsLength); /** * When parsing the policy, the parser internally uses the following helper * variables/members which are used/reset during parsing. The following * example explains how they are used. * The tokenizer separats all input into arrays of arrays of strings, which * are stored in mTokens, for example: * mTokens = [ [ script-src, http://www.example.com, 'self' ], ... ] * * When parsing starts, mCurdir always holds the currently processed array of strings. * In our example: * mCurDir = [ script-src, http://www.example.com, 'self' ] * * During parsing, we process/consume one string at a time of that array. * We set mCurToken to the string we are currently processing; in the first case * that would be: * mCurToken = script-src * which allows to do simple string comparisons to see if mCurToken is a valid directive. * * Continuing parsing, the parser consumes the next string of that array, resetting: * mCurToken = "http://www.example.com" * ^ ^ * mCurChar mEndChar (points *after* the 'm') * mCurValue = "" * * After calling advance() the first time, helpers would hold the following values: * mCurToken = "http://www.example.com" * ^ ^ * mCurChar mEndChar (points *after* the 'm') * mCurValue = "h" * * We continue parsing till all strings of one directive are consumed, then we reset * mCurDir to hold the next array of strings and start the process all over. */ const char16_t* mCurChar; const char16_t* mEndChar; nsString mCurValue; nsString mCurToken; nsTArray<nsString> mCurDir; // helpers to allow invalidation of srcs within script-src and style-src // if either 'strict-dynamic' or at least a hash or nonce is present. bool mHasHashOrNonce; // false, if no hash or nonce is defined bool mStrictDynamic; // false, if 'strict-dynamic' is not defined nsCSPKeywordSrc* mUnsafeInlineKeywordSrc; // null, otherwise invlidate() // cache variables for child-src, frame-src and worker-src handling; // in CSP 3 child-src is deprecated. For backwards compatibility // child-src needs to restrict: // (*) frames, in case frame-src is not expicitly specified // (*) workers, in case worker-src is not expicitly specified // If neither worker-src, nor child-src is present, then script-src // needs to govern workers. nsCSPChildSrcDirective* mChildSrc; nsCSPDirective* mFrameSrc; nsCSPDirective* mWorkerSrc; nsCSPScriptSrcDirective* mScriptSrc; // cache variable to let nsCSPHostSrc know that it's within // the frame-ancestors directive. bool mParsingFrameAncestorsDir; cspTokens mTokens; nsIURI* mSelfURI; nsCSPPolicy* mPolicy; nsCSPContext* mCSPContext; // used for console logging bool mDeliveredViaMetaTag; }; #endif /* nsCSPParser_h___ */