diff options
author | Michal Kubecek <mkubecek@suse.cz> | 2015-04-13 09:21:39 +0200 |
---|---|---|
committer | Michal Kubecek <mkubecek@suse.cz> | 2015-04-13 09:21:39 +0200 |
commit | e2bc6f4153813cc570ae814c8ddb74628009b488 (patch) | |
tree | a40b171be1d859c2232ccc94f758010f9ae54d3c /src/parser/scanner.lxx | |
download | twinkle-e2bc6f4153813cc570ae814c8ddb74628009b488.tar twinkle-e2bc6f4153813cc570ae814c8ddb74628009b488.tar.gz twinkle-e2bc6f4153813cc570ae814c8ddb74628009b488.tar.lz twinkle-e2bc6f4153813cc570ae814c8ddb74628009b488.tar.xz twinkle-e2bc6f4153813cc570ae814c8ddb74628009b488.zip |
initial checkin
Check in contents of upstream 1.4.2 tarball, exclude generated files.
Diffstat (limited to 'src/parser/scanner.lxx')
-rw-r--r-- | src/parser/scanner.lxx | 333 |
1 files changed, 333 insertions, 0 deletions
diff --git a/src/parser/scanner.lxx b/src/parser/scanner.lxx new file mode 100644 index 0000000..05ffb97 --- /dev/null +++ b/src/parser/scanner.lxx @@ -0,0 +1,333 @@ +/* + Copyright (C) 2005-2009 Michel de Boer <michel@twinklephone.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +%{ +#include <cstdio> +#include <cstring> +#include <math.h> +#include <string> +#include "parse_ctrl.h" +#include "parser.h" +#include "util.h" +#include "audits/memman.h" + +using namespace std; +%} + +%option noyywrap +%option stack + +DIGIT [0-9] +HEXDIG [0-9a-fA-F] +ALPHA [a-zA-Z] +CAPITALS [A-Z] +ALNUM [a-zA-Z0-9] +TOKEN_SYM [[:alnum:]\-\.!%\*_\+\`\'~] +WORD_SYM [[:alnum:]\-\.!%\*_\+\`\'~\(\)<>:\\\"\/\[\]\?\{\}] + +%x C_URI +%x C_URI_SPECIAL +%x C_QSTRING +%x C_LANG +%x C_WORD +%x C_NUM +%x C_DATE +%x C_LINE +%x C_COMMENT +%x C_NEW +%x C_AUTH_SCHEME +%x C_RPAREN +%x C_IPV6ADDR +%x C_PARAMVAL + +%% + switch (t_parser::context) { + case t_parser::X_URI: BEGIN(C_URI); break; + case t_parser::X_URI_SPECIAL: BEGIN(C_URI_SPECIAL); break; + case t_parser::X_LANG: BEGIN(C_LANG); break; + case t_parser::X_WORD: BEGIN(C_WORD); break; + case t_parser::X_NUM: BEGIN(C_NUM); break; + case t_parser::X_DATE: BEGIN(C_DATE); break; + case t_parser::X_LINE: BEGIN(C_LINE); break; + case t_parser::X_COMMENT: BEGIN(C_COMMENT); break; + case t_parser::X_NEW: BEGIN(C_NEW); break; + case t_parser::X_AUTH_SCHEME: BEGIN(C_AUTH_SCHEME); break; + case t_parser::X_IPV6ADDR: BEGIN(C_IPV6ADDR); break; + case t_parser::X_PARAMVAL: BEGIN(C_PARAMVAL); break; + default: BEGIN(INITIAL); + } + + /* Headers */ +^Accept { return T_HDR_ACCEPT; } +^Accept-Encoding { return T_HDR_ACCEPT_ENCODING; } +^Accept-Language { return T_HDR_ACCEPT_LANGUAGE; } +^Alert-Info { return T_HDR_ALERT_INFO; } +^Allow { return T_HDR_ALLOW; } +^(Allow-Events)|u { return T_HDR_ALLOW_EVENTS; } +^Authentication-Info { return T_HDR_AUTHENTICATION_INFO; } +^Authorization { return T_HDR_AUTHORIZATION; } +^(Call-ID)|i { return T_HDR_CALL_ID; } +^Call-Info { return T_HDR_CALL_INFO; } +^(Contact)|m { return T_HDR_CONTACT; } +^Content-Disposition { return T_HDR_CONTENT_DISP; } +^(Content-Encoding)|e { return T_HDR_CONTENT_ENCODING; } +^Content-Language { return T_HDR_CONTENT_LANGUAGE; } +^(Content-Length)|l { return T_HDR_CONTENT_LENGTH; } +^(Content-Type)|c { return T_HDR_CONTENT_TYPE; } +^CSeq { return T_HDR_CSEQ; } +^Date { return T_HDR_DATE; } +^Error-Info { return T_HDR_ERROR_INFO; } +^(Event)|o { return T_HDR_EVENT; } +^Expires { return T_HDR_EXPIRES; } +^(From|f) { return T_HDR_FROM; } +^In-Reply-To { return T_HDR_IN_REPLY_TO; } +^Max-Forwards { return T_HDR_MAX_FORWARDS; } +^Min-Expires { return T_HDR_MIN_EXPIRES; } +^MIME-Version { return T_HDR_MIME_VERSION; } +^Organization { return T_HDR_ORGANIZATION; } +^P-Asserted-Identity { return T_HDR_P_ASSERTED_IDENTITY; } +^P-Preferred-Identity { return T_HDR_P_PREFERRED_IDENTITY; } +^Priority { return T_HDR_PRIORITY; } +^Privacy { return T_HDR_PRIVACY; } +^Proxy-Authenticate { return T_HDR_PROXY_AUTHENTICATE; } +^Proxy-Authorization { return T_HDR_PROXY_AUTHORIZATION; } +^Proxy-Require { return T_HDR_PROXY_REQUIRE; } +^RAck { return T_HDR_RACK; } +^Record-Route { return T_HDR_RECORD_ROUTE; } +^Service-Route { return T_HDR_SERVICE_ROUTE; } +^Refer-Sub { return T_HDR_REFER_SUB; } +^(Refer-To)|r { return T_HDR_REFER_TO; } +^(Referred-By)|b { return T_HDR_REFERRED_BY; } +^Replaces { return T_HDR_REPLACES; } +^Reply-To { return T_HDR_REPLY_TO; } +^Require { return T_HDR_REQUIRE; } +^(Request-Disposition)|d {return T_HDR_REQUEST_DISPOSITION; } +^Retry-After { return T_HDR_RETRY_AFTER; } +^Route { return T_HDR_ROUTE; } +^RSeq { return T_HDR_RSEQ; } +^Server { return T_HDR_SERVER; } +^SIP-ETag { return T_HDR_SIP_ETAG; } +^SIP-If-Match { return T_HDR_SIP_IF_MATCH; } +^(Subject)|s { return T_HDR_SUBJECT; } +^Subscription-State { return T_HDR_SUBSCRIPTION_STATE; } +^(Supported)|k { return T_HDR_SUPPORTED; } +^Timestamp { return T_HDR_TIMESTAMP; } +^(To)|t { return T_HDR_TO; } +^unsupported { return T_HDR_UNSUPPORTED; } +^User-Agent { return T_HDR_USER_AGENT; } +^(Via)|v { return T_HDR_VIA; } +^Warning { return T_HDR_WARNING; } +^WWW-Authenticate { return T_HDR_WWW_AUTHENTICATE; } +^{TOKEN_SYM}+ { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_HDR_UNKNOWN; } + + /* Token as define in RFC 3261 */ +{TOKEN_SYM}+ { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_TOKEN; } + + /* Switch to quoted string context */ +\" { yy_push_state(C_QSTRING); } + + /* End of line */ +\r\n { return T_CRLF; } +\n { return T_CRLF; } + +[[:blank:]] /* Skip white space */ + + /* Single character token */ +. { return yytext[0]; } + + /* URI. + This context scans a URI including parameters. + The syntax of a URI will be checked outside the scanner + */ +<C_URI>\" { yy_push_state(C_QSTRING); } +<C_URI>{TOKEN_SYM}({TOKEN_SYM}|[[:blank:]])*/< { + yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_DISPLAY; } +<C_URI>[^[:blank:]<>\r\n]+/[[:blank:]]*> { + yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_URI; } +<C_URI>\* { return T_URI_WILDCARD; } +<C_URI>[^[:blank:]<>\"\r\n]+ { + yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_URI; } +<C_URI>[[:blank:]] /* Skip white space */ +<C_URI>. { return yytext[0]; } +<C_URI>\n { return T_ERROR; } + + /* URI special case. + In several headers (eg. From, To, Contact, Reply-To) the URI + can be enclosed by < and > + If it is enclosed then parameters belong to the URI, if it + is not enclosed then parameters belong to the header. + Parameters are seperated by a semi-colon. + For the URI special case, parameters belong to the header. + If the parser receives a < from the scanner, then the parser + will switch to the normal URI case. + The syntax of a URI will be checked outside the scanner + */ +<C_URI_SPECIAL>\" { yy_push_state(C_QSTRING); } +<C_URI_SPECIAL>{TOKEN_SYM}({TOKEN_SYM}|[[:blank:]])*/< { + yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_DISPLAY; } +<C_URI_SPECIAL>\* { return T_URI_WILDCARD; } +<C_URI_SPECIAL>[^[:blank:]<>;\"\r\n]+ { + yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_URI; } +<C_URI_SPECIAL>[[:blank:]] /* Skip white space */ +<C_URI_SPECIAL>. { return yytext[0]; } +<C_URI_SPECIAL>\n { return T_ERROR; } + + /* Quoted string (starting after open quote, closing quote + will be consumed but not returned. */ +<C_QSTRING>\\ { yymore(); } +<C_QSTRING>[^\"\\\r\n]*\\\" { yymore(); } +<C_QSTRING>[^\"\\\r\n]*\" { yy_pop_state(); + yytext[strlen(yytext)-1] = '\0'; + yylval.yyt_str = new string(unescape(string(yytext))); + MEMMAN_NEW(yylval.yyt_str); + return T_QSTRING; } +<C_QSTRING>[^\"\\\n]*\n { yy_pop_state(); return T_ERROR; } +<C_QSTRING>. { yy_pop_state(); return T_ERROR; } + + /* Comment (starting after LPAREN till RPAREN) */ +<C_COMMENT>\\ { yymore(); } +<C_COMMENT>[^\(\)\\\r\n]*\\\) { yymore(); } +<C_COMMENT>[^\(\)\\\r\n]*\\\( { yymore(); } +<C_COMMENT>[^\(\)\\\r\n]*\( { t_parser::inc_comment_level(); yymore(); } +<C_COMMENT>[^\(\)\\\r\n]*/\) { if (t_parser::dec_comment_level()) { + BEGIN(C_RPAREN); + yymore(); + } else { + yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_COMMENT; + } + } +<C_COMMENT>[^\(\)\\\n]*\n { return T_ERROR; } +<C_COMMENT>. { return T_ERROR; } +<C_RPAREN>\) { BEGIN(C_COMMENT); yymore(); } + + /* Language tag */ +<C_LANG>{ALPHA}{1,8}(\-{ALPHA}{1,8})* { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_LANG; } +<C_LANG>[[:blank:]] /* Skip white space */ +<C_LANG>. { return yytext[0]; } +<C_LANG>\r\n { return T_CRLF; } +<C_LANG>\n { return T_CRLF; } + + /* Word */ +<C_WORD>{WORD_SYM}+ { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_WORD; } +<C_WORD>[[:blank:]] /* Skip white space */ +<C_WORD>. { return yytext[0]; } +<C_WORD>\r\n { return T_CRLF; } +<C_WORD>\n { return T_CRLF; } + + /* Number */ +<C_NUM>{DIGIT}+ { yylval.yyt_ulong = strtoul(yytext, NULL, 10); return T_NUM; } +<C_NUM>[[:blank:]] /* Skip white space */ +<C_NUM>. { return yytext[0]; } +<C_NUM>\r\n { return T_CRLF; } +<C_NUM>\n { return T_CRLF; } + + /* Date */ +<C_DATE>Mon { yylval.yyt_int = 1; return T_WKDAY; } +<C_DATE>Tue { yylval.yyt_int = 2; return T_WKDAY; } +<C_DATE>Wed { yylval.yyt_int = 3; return T_WKDAY; } +<C_DATE>Thu { yylval.yyt_int = 4; return T_WKDAY; } +<C_DATE>Fri { yylval.yyt_int = 5; return T_WKDAY; } +<C_DATE>Sat { yylval.yyt_int = 6; return T_WKDAY; } +<C_DATE>Sun { yylval.yyt_int = 0; return T_WKDAY; } +<C_DATE>Jan { yylval.yyt_int = 0; return T_MONTH; } +<C_DATE>Feb { yylval.yyt_int = 1; return T_MONTH; } +<C_DATE>Mar { yylval.yyt_int = 2; return T_MONTH; } +<C_DATE>Apr { yylval.yyt_int = 3; return T_MONTH; } +<C_DATE>May { yylval.yyt_int = 4; return T_MONTH; } +<C_DATE>Jun { yylval.yyt_int = 5; return T_MONTH; } +<C_DATE>Jul { yylval.yyt_int = 6; return T_MONTH; } +<C_DATE>Aug { yylval.yyt_int = 7; return T_MONTH; } +<C_DATE>Sep { yylval.yyt_int = 8; return T_MONTH; } +<C_DATE>Oct { yylval.yyt_int = 9; return T_MONTH; } +<C_DATE>Nov { yylval.yyt_int = 10; return T_MONTH; } +<C_DATE>Dec { yylval.yyt_int = 11; return T_MONTH; } +<C_DATE>GMT { return T_GMT; } +<C_DATE>{DIGIT}+ { yylval.yyt_ulong = strtoul(yytext, NULL, 10); return T_NUM; } +<C_DATE>[[:blank:]] /* Skip white space */ +<C_DATE>. { return yytext[0]; } +<C_DATE>\r\n { return T_CRLF; } +<C_DATE>\n { return T_CRLF; } + + /* Get all text till end of line */ +<C_LINE>[^\r\n]+ { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_LINE; } +<C_LINE>\r\n { return T_CRLF; } +<C_LINE>\n { return T_CRLF; } +<C_LINE>\r { return T_CRLF; } + + /* Start of a new message */ +<C_NEW>SIP { return T_SIP; } +<C_NEW>{CAPITALS}+ { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_METHOD; } +<C_NEW>[[:blank:]] /* Skip white space */ +<C_NEW>. { return T_ERROR; } +<C_NEW>\r\n { return T_CRLF; } +<C_NEW>\n { return T_CRLF; } + + /* Authorization scheme */ +<C_AUTH_SCHEME>Digest { return T_AUTH_DIGEST; } +<C_AUTH_SCHEME>{TOKEN_SYM}+ { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_AUTH_OTHER; } +<C_AUTH_SCHEME>[[:blank:]] /* Skip white space */ +<C_AUTH_SCHEME>. { return T_ERROR; } +<C_AUTH_SCHEME>\r\n { return T_CRLF; } +<C_AUTH_SCHEME>\n { return T_CRLF; } + + /* IPv6 address + * NOTE: the validity of the format is not checked here. + */ +<C_IPV6ADDR>({HEXDIG}|[:\.])+ { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_IPV6ADDR; } +<C_IPV6ADDR>[[:blank:]] /* Skip white space */ +<C_IPV6ADDR>. { return T_ERROR; } +<C_IPV6ADDR>\r\n { return T_CRLF; } +<C_IPV6ADDR>\n { return T_CRLF; } + + /* Parameter values may contain an IPv6 address or reference. */ +<C_PARAMVAL>({TOKEN_SYM}|[:\[\]])+ { yylval.yyt_str = new string(yytext); + MEMMAN_NEW(yylval.yyt_str); + return T_PARAMVAL; } +<C_PARAMVAL>\" { yy_push_state(C_QSTRING); } +<C_PARAMVAL>[[:blank:]] /* Skip white space */ +<C_PARAMVAL>. { return T_ERROR; } +<C_PARAMVAL>\r\n { return T_CRLF; } +<C_PARAMVAL>\n { return T_CRLF; } |