/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * vim: set ts=8 sts=4 et sw=4 tw=99: */ // Copyright 2012 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef V8_REGEXP_AST_H_ #define V8_REGEXP_AST_H_ // Prevent msvc build failures as indicated in bug 1205328 #ifdef min # undef min #endif #ifdef max # undef max #endif #include "irregexp/RegExpEngine.h" namespace js { namespace irregexp { class RegExpCompiler; class RegExpNode; class RegExpVisitor { public: virtual ~RegExpVisitor() { } #define MAKE_CASE(Name) \ virtual void* Visit##Name(RegExp##Name*, void* data) = 0; FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) #undef MAKE_CASE }; class RegExpTree { public: static const int kInfinity = INT32_MAX; virtual ~RegExpTree() {} virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) = 0; virtual bool IsTextElement() { return false; } virtual bool IsAnchoredAtStart() { return false; } virtual bool IsAnchoredAtEnd() { return false; } virtual int min_match() = 0; virtual int max_match() = 0; // Returns the interval of registers used for captures within this // expression. virtual Interval CaptureRegisters() { return Interval::Empty(); } virtual void AppendToText(RegExpText* text) { MOZ_CRASH("Bad call"); } #define MAKE_ASTYPE(Name) \ virtual RegExp##Name* As##Name(); \ virtual bool Is##Name(); FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) #undef MAKE_ASTYPE }; typedef InfallibleVector RegExpTreeVector; class RegExpDisjunction : public RegExpTree { public: explicit RegExpDisjunction(RegExpTreeVector* alternatives); virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpDisjunction* AsDisjunction(); virtual Interval CaptureRegisters(); virtual bool IsDisjunction(); virtual bool IsAnchoredAtStart(); virtual bool IsAnchoredAtEnd(); virtual int min_match() { return min_match_; } virtual int max_match() { return max_match_; } const RegExpTreeVector& alternatives() { return *alternatives_; } private: RegExpTreeVector* alternatives_; int min_match_; int max_match_; }; class RegExpAlternative : public RegExpTree { public: explicit RegExpAlternative(RegExpTreeVector* nodes); virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpAlternative* AsAlternative(); virtual Interval CaptureRegisters(); virtual bool IsAlternative(); virtual bool IsAnchoredAtStart(); virtual bool IsAnchoredAtEnd(); virtual int min_match() { return min_match_; } virtual int max_match() { return max_match_; } const RegExpTreeVector& nodes() { return *nodes_; } private: RegExpTreeVector* nodes_; int min_match_; int max_match_; }; class RegExpAssertion : public RegExpTree { public: enum AssertionType { START_OF_LINE, START_OF_INPUT, END_OF_LINE, END_OF_INPUT, BOUNDARY, NON_BOUNDARY, NOT_AFTER_LEAD_SURROGATE, NOT_IN_SURROGATE_PAIR }; explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { } virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpAssertion* AsAssertion(); virtual bool IsAssertion(); virtual bool IsAnchoredAtStart(); virtual bool IsAnchoredAtEnd(); virtual int min_match() { return 0; } virtual int max_match() { return 0; } AssertionType assertion_type() { return assertion_type_; } private: AssertionType assertion_type_; }; class CharacterSet { public: explicit CharacterSet(char16_t standard_set_type) : ranges_(nullptr), standard_set_type_(standard_set_type) {} explicit CharacterSet(CharacterRangeVector* ranges) : ranges_(ranges), standard_set_type_(0) {} CharacterRangeVector& ranges(LifoAlloc* alloc); char16_t standard_set_type() { return standard_set_type_; } void set_standard_set_type(char16_t special_set_type) { standard_set_type_ = special_set_type; } bool is_standard() { return standard_set_type_ != 0; } void Canonicalize(); private: CharacterRangeVector* ranges_; // If non-zero, the value represents a standard set (e.g., all whitespace // characters) without having to expand the ranges. char16_t standard_set_type_; }; class RegExpCharacterClass : public RegExpTree { public: RegExpCharacterClass(CharacterRangeVector* ranges, bool is_negated) : set_(ranges), is_negated_(is_negated) {} explicit RegExpCharacterClass(char16_t type) : set_(type), is_negated_(false) {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpCharacterClass* AsCharacterClass(); virtual bool IsCharacterClass(); virtual bool IsTextElement() { return true; } virtual int min_match() { return 1; } virtual int max_match() { return 1; } virtual void AppendToText(RegExpText* text); CharacterSet character_set() { return set_; } // TODO(lrn): Remove need for complex version if is_standard that // recognizes a mangled standard set and just do { return set_.is_special(); } bool is_standard(LifoAlloc* alloc); // Returns a value representing the standard character set if is_standard() // returns true. // Currently used values are: // s : unicode whitespace // S : unicode non-whitespace // w : ASCII word character (digit, letter, underscore) // W : non-ASCII word character // d : ASCII digit // D : non-ASCII digit // . : non-unicode non-newline // * : All characters char16_t standard_type() { return set_.standard_set_type(); } CharacterRangeVector& ranges(LifoAlloc* alloc) { return set_.ranges(alloc); } bool is_negated() { return is_negated_; } private: CharacterSet set_; bool is_negated_; }; typedef InfallibleVector CharacterVector; class RegExpAtom : public RegExpTree { public: explicit RegExpAtom(CharacterVector* data) : data_(data) {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpAtom* AsAtom(); virtual bool IsAtom(); virtual bool IsTextElement() { return true; } virtual int min_match() { return data_->length(); } virtual int max_match() { return data_->length(); } virtual void AppendToText(RegExpText* text); const CharacterVector& data() { return *data_; } int length() { return data_->length(); } private: CharacterVector* data_; }; class RegExpText : public RegExpTree { public: explicit RegExpText(LifoAlloc* alloc) : elements_(*alloc), length_(0) {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpText* AsText(); virtual bool IsText(); virtual bool IsTextElement() { return true; } virtual int min_match() { return length_; } virtual int max_match() { return length_; } virtual void AppendToText(RegExpText* text); void AddElement(TextElement elm) { elements_.append(elm); length_ += elm.length(); } const TextElementVector& elements() { return elements_; } private: TextElementVector elements_; int length_; }; class RegExpQuantifier : public RegExpTree { public: enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) : body_(body), min_(min), max_(max), min_match_(min * body->min_match()), quantifier_type_(type) { if (max > 0 && body->max_match() > kInfinity / max) { max_match_ = kInfinity; } else { max_match_ = max * body->max_match(); } } virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body, RegExpCompiler* compiler, RegExpNode* on_success, bool not_at_start = false); virtual RegExpQuantifier* AsQuantifier(); virtual Interval CaptureRegisters(); virtual bool IsQuantifier(); virtual int min_match() { return min_match_; } virtual int max_match() { return max_match_; } int min() { return min_; } int max() { return max_; } bool is_possessive() { return quantifier_type_ == POSSESSIVE; } bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } bool is_greedy() { return quantifier_type_ == GREEDY; } RegExpTree* body() { return body_; } private: RegExpTree* body_; int min_; int max_; int min_match_; int max_match_; QuantifierType quantifier_type_; }; class RegExpCapture : public RegExpTree { public: explicit RegExpCapture(RegExpTree* body, int index) : body_(body), index_(index) {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); static RegExpNode* ToNode(RegExpTree* body, int index, RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpCapture* AsCapture(); virtual bool IsAnchoredAtStart(); virtual bool IsAnchoredAtEnd(); virtual Interval CaptureRegisters(); virtual bool IsCapture(); virtual int min_match() { return body_->min_match(); } virtual int max_match() { return body_->max_match(); } RegExpTree* body() { return body_; } int index() { return index_; } static int StartRegister(int index) { return index * 2; } static int EndRegister(int index) { return index * 2 + 1; } private: RegExpTree* body_; int index_; }; class RegExpLookahead : public RegExpTree { public: RegExpLookahead(RegExpTree* body, bool is_positive, int capture_count, int capture_from) : body_(body), is_positive_(is_positive), capture_count_(capture_count), capture_from_(capture_from) {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpLookahead* AsLookahead(); virtual Interval CaptureRegisters(); virtual bool IsLookahead(); virtual bool IsAnchoredAtStart(); virtual int min_match() { return 0; } virtual int max_match() { return 0; } RegExpTree* body() { return body_; } bool is_positive() { return is_positive_; } int capture_count() { return capture_count_; } int capture_from() { return capture_from_; } private: RegExpTree* body_; bool is_positive_; int capture_count_; int capture_from_; }; typedef InfallibleVector RegExpCaptureVector; class RegExpBackReference : public RegExpTree { public: explicit RegExpBackReference(RegExpCapture* capture) : capture_(capture) {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpBackReference* AsBackReference(); virtual bool IsBackReference(); virtual int min_match() { return 0; } virtual int max_match() { return capture_->max_match(); } int index() { return capture_->index(); } RegExpCapture* capture() { return capture_; } private: RegExpCapture* capture_; }; class RegExpEmpty : public RegExpTree { public: RegExpEmpty() {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); virtual RegExpEmpty* AsEmpty(); virtual bool IsEmpty(); virtual int min_match() { return 0; } virtual int max_match() { return 0; } static RegExpEmpty* GetInstance() { static RegExpEmpty instance; return &instance; } }; } } // namespace js::irregexp #endif // V8_REGEXP_AST_H_