diff options
Diffstat (limited to 'testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c')
-rw-r--r-- | testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c | 560 |
1 files changed, 560 insertions, 0 deletions
diff --git a/testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c b/testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c new file mode 100644 index 000000000..9f9103386 --- /dev/null +++ b/testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c @@ -0,0 +1,560 @@ +/*********************************************************************** + * $Id$ + * Copyright 2009 Aplix Corporation. All rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***********************************************************************/ +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include "lex.h" +#include "misc.h" +#include "node.h" +#include "process.h" + +struct file { + struct file *next; + const char *filename; + char *buf; + const char *pos, *end; + unsigned int linenum; +}; + +const char keywords[] = KEYWORDS; + +static struct file *file, *firstfile; +static struct tok tok; + +/*********************************************************************** + * readinput : read all input files into memory + * + * Enter: argv = 0-terminated array of filenames + */ +void +readinput(const char *const *argv) +{ + struct file **pfile = &file; + for (;;) { + struct file *file; + const char *filename = *argv++; + char *buf = 0; + int len = 0, thislen, isstdin; + FILE *handle; + if (!filename) + break; + /* Read the file. */ + isstdin = !strcmp(filename, "-"); + if (isstdin) { + handle = stdin; + filename = "<stdin>"; + } else { + handle = fopen(filename, "rb"); + if (!handle) + errorexit("%s: %s", filename, strerror(errno)); + } + for (;;) { + thislen = len ? len * 2 : 4096; + buf = memrealloc(buf, len + thislen + 1); + thislen = fread(buf + len, 1, thislen, handle); + if (!thislen) + break; + len += thislen; + } + if (ferror(handle)) + errorexit("%s: I/O error", filename); + if (!isstdin) + fclose(handle); + buf[len] = 0; + buf = memrealloc(buf, len + 1); + /* Create the file struct for it. */ + file = memalloc(sizeof(struct file)); + *pfile = file; + pfile = &file->next; + file->filename = filename; + file->pos = file->buf = buf; + file->end = buf + len; + file->linenum = 1; + } + *pfile = 0; + firstfile = file; +} + +/*********************************************************************** + * lexerrorexit : error and exit with line number + */ +static void +lexerrorexit(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vlocerrorexit(file->filename, file->linenum, format, ap); + va_end(ap); +} + +/*********************************************************************** + * lexblockcomment : lex a block comment + * + * Enter: start = start of comment + * + * Return: tok struct, lifetime until next call to lex + */ +static struct tok * +lexblockcomment(const char *start) +{ + const char *p = start + 1; + tok.filename = file->filename; + tok.linenum = file->linenum; + for (;;) { + int ch = *++p; + if (!ch) + lexerrorexit("unterminated block comment"); + if (ch != '*') { + if (ch == '\n') + file->linenum++; + continue; + } + ch = p[1]; + if (!ch) + lexerrorexit("unterminated block comment"); + if (ch == '/') + break; + } + p += 2; + file->pos = p; + tok.type = TOK_BLOCKCOMMENT; + tok.start = start + 2; + tok.len = p - start - 4; + return &tok; +} + +/*********************************************************************** + * lexinlinecomment : lex an inline comment + * + * Enter: start = start of comment, starts with "//" + * + * Return: tok struct, lifetime until next call to lex + */ +static struct tok * +lexinlinecomment(const char *start) +{ + const char *p = start + 2; + p = start + 1; + for (;;) { + int ch = *++p; + if (!ch || ch == '\n') + break; + } + p++; + file->pos = p; + tok.type = TOK_INLINECOMMENT; + tok.start = start + 2; + tok.len = p - start - 2; + tok.filename = file->filename; + tok.linenum = file->linenum++; + return &tok; +} + +/*********************************************************************** + * lexnumber : lex a number (or just a '-' symbol) + * + * Enter: start = start of token + * + * Return: tok struct, lifetime until next call to lex + * + * The IDL grammar seems to say that a float can't start with a + * decimal point, so that's what we have implemented here. + */ +static struct tok * +lexnumber(const char *start) +{ + for (;;) { + const char *p = start; + const char *octalend = start; + int ch = *p; + enum { STATE_START, STATE_INT, STATE_HEX, STATE_OCTAL, STATE_BADOCTAL, + STATE_DP, STATE_EXPSTART, STATE_EXPSIGN, STATE_EXP + } state = STATE_START; + if (ch == '-') { + ch = *++p; + if (ch == 'I') { // starts of Infinity + char * infinity = "-Infinity"; + unsigned int len = strlen(infinity); + if (!memcmp(start, infinity, len)) { + tok.type = TOK_minusinfinity; + tok.start = start; + tok.len = len; + tok.filename = file->filename; + tok.linenum = file->linenum; + file->pos = start + len; + return &tok; + } + } + } + if (ch == '0') { + state = STATE_OCTAL; + ch = *++p; + if ((ch & ~0x20) == 'X') { + state = STATE_HEX; + ch = *++p; + } + } + + for (;;) { + if ((unsigned)(ch - '0') >= 8) { + if ((ch & -2) == '8') { + if (state == STATE_OCTAL) { + state = STATE_BADOCTAL; + octalend = p; + } + } else if ((unsigned)((ch & ~0x20) - 'A') <= 'F' - 'A') { + if (state != STATE_HEX) { + if ((ch & ~0x20) != 'E') + break; + if (state == STATE_HEX || state >= STATE_EXPSTART || state == STATE_START) + break; + state = STATE_EXPSTART; + } + } else if (ch == '.') { + if (state == STATE_HEX || state >= STATE_DP) + break; + state = STATE_DP; + } else if (ch == '-') { + if (state != STATE_EXPSTART) + break; + state = STATE_EXPSIGN; + } else + break; + } + ch = *++p; + if (state == STATE_START) + state = STATE_INT; + else if (state == STATE_EXPSTART || state == STATE_EXPSIGN) + state = STATE_EXP; + } + switch (state) { + case STATE_START: + /* Must have just been a - character by itself. */ + tok.type = '-'; + p = start + 1; + break; + case STATE_BADOCTAL: + p = octalend; + /* fall through... */ + case STATE_INT: + case STATE_OCTAL: + tok.type = TOK_INTEGER; + break; + case STATE_HEX: + if (p - start == 2 || (p - start == 3 && *start == '-')) + p = start + 1; + tok.type = TOK_INTEGER; + break; + case STATE_EXP: + case STATE_DP: + tok.type = TOK_FLOAT; + break; + case STATE_EXPSIGN: + p--; + /* fall through... */ + case STATE_EXPSTART: + p--; + tok.type = TOK_FLOAT; + break; + } + tok.start = start; + tok.len = p - start; + tok.filename = file->filename; + tok.linenum = file->linenum; + file->pos = p; + return &tok; + } +} + +/*********************************************************************** + * lexstring : lex a quoted string + * + * Enter: start = start of token + * + * Return: tok struct, lifetime until next call to lex + */ +static struct tok * +lexstring(const char *start) +{ + for (;;) { + const char *p = start + 1; + int ch = *p; + for (;;) { + if (!ch || ch == '\n') + lexerrorexit("unterminated string"); + if (ch == '"') { + tok.type = TOK_STRING; + tok.start = start + 1; + tok.len = p - start - 1; + tok.filename = file->filename; + tok.linenum = file->linenum; + file->pos = p + 1; + return &tok; + } + /* Note the IDL spec doesn't seem to allow for escape sequences + * in strings. */ + ch = *++p; + } + } +} + +/*********************************************************************** + * lexidentifier : lex an identifier + * + * Enter: start = start of token + * + * Return: tok struct, lifetime until next call to lex + */ +static struct tok * +lexidentifier(const char *start) +{ + const char *p = start + 1; + for (;;) { + int ch = *p; + if (ch != '_' && (unsigned)(ch - '0') >= 10 + && (unsigned)((ch & ~0x20) - 'A') > 'Z' - 'A') + { + break; + } + p++; + } + tok.type = TOK_IDENTIFIER; + tok.start = start; + tok.len = p - start; + tok.filename = file->filename; + tok.linenum = file->linenum; + file->pos = p; + /* See if this is a keyword. (This search is a bit n-squared.) */ + { + unsigned int type = TOK_DOMString; + p = keywords; + for (;;) { + unsigned int len = strlen(p); + if (!len) + break; + if (len == tok.len && !memcmp(start, p, len)) { + tok.type = type; + break; + } + p += len + 1; + type++; + } + } + return &tok; +} + +/*********************************************************************** + * lex : retrieve next token + * + * Return: tok struct, lifetime until next call to lex + */ +struct tok * +lex(void) +{ + const char *p; + int ch; + for (;;) { + if (!file) { + tok.type = TOK_EOF; + tok.start = "end of file"; + tok.len = strlen(tok.start); + return &tok; + } + tok.prestart = p = file->pos; + /* Flush whitespace. */ + for (;;) { + ch = *p++; + switch (ch) { + case ' ': + case '\t': + case '\r': + continue; + case '\n': + ++file->linenum; + tok.prestart = p; + continue; + } + break; + } + p--; + if (ch) + break; + if (p != file->end) + lexerrorexit("\\0 byte not allowed"); + file = file->next; + } + /* See if we have a comment. */ + tok.start = p; + if (ch == '/') { + switch (*++p) { + case '*': + return lexblockcomment(p - 1); + case '/': + return lexinlinecomment(p - 1); + } + tok.type = '/'; + } else { + /* Handle things that start with '-', which is either '-' as a token, + * or a number. Handle numbers. */ + if (ch == '-' || (unsigned)(ch - '0') < 10) + return lexnumber(p); + /* Handle string. */ + if (ch == '"') + return lexstring(p); + /* Handle identifier. */ + if (ch == '_' || (unsigned)((ch & ~0x20) - 'A') <= 'Z' - 'A') + return lexidentifier(p); + /* The only multi-symbol token are ... and [] */ + if (ch == '.') { + tok.type = '.'; + if (*++p == '.' && p[1] == '.') { + tok.type = TOK_ELLIPSIS; + p += 2; + } + goto done; + } + if (ch == '[') { + tok.type = '['; + if (*++p == ']') { + tok.type = TOK_DOUBLEBRACKET; + p++; + } + goto done; + } + } + /* Single symbol token. */ + tok.type = ch; + p++; +done: + tok.filename = file->filename; + tok.linenum = file->linenum; + tok.len = p - tok.start; + file->pos = p; + return &tok; +} + +/*********************************************************************** + * outputwidl : output literal Web IDL input that node was parsed from + * + * Enter: node = parse node to output literal Web IDL for + */ +void +outputwidl(struct node *node) +{ + const char *start = node->wsstart, *end = node->end; + /* Find the file that start is in. */ + struct file *file = firstfile; + while (start < file->buf || start >= file->end) { + file = file->next; + assert(file); + } + /* Find the (current or) next node that has node->start set. Any such + * node needs to be put inside a <ref> element. */ + while (node && !node->start) + node = nodewalk(node); + /* Output until we get to the end. This has to cope with the text + * spanning multiple input files. */ + for (;;) { + int final = end >= file->buf && end <= file->end; + const char *thisend = final ? end : file->end; + /* Output the Web IDL, omitting comments. */ + while (start != end) { + const char *p, *p2, *comment, *endcomment; + int ch; + if (node && start == node->start) { + /* We are on the start of the present node in the tree + * walk. Put it in a <ref>. */ + fputs("<ref>", stdout); + printtext(node->start, node->end - node->start, 1); + fputs("</ref>", stdout); + start = node->end; + /* Skip to the next node with node->start set if any. */ + do + node = nodewalk(node); + while (node && !node->start); + continue; + } + p2 = thisend; + if (node && node->start >= file->buf && node->start < p2) + p2 = node->start; + p = memchr(start, '/', p2 - start); + if (!p) { + printtext(start, p2 - start, 1); + if (p2 != thisend) { + start = p2; + continue; + } + break; + } + /* See if we're at the start of a comment. If so find the end. */ + comment = 0; + if (p + 1 != thisend) { + switch (p[1]) { + case '*': + /* Block comment. */ + comment = p; + p++; + do + p = memchr(p + 1, '*', thisend - p - 1); + while (p[1] != '/'); + endcomment = p + 2; + break; + case '/': + /* Inline comment. */ + comment = p; + p = memchr(p, '\n', thisend - p); + if (!p) + p = thisend; + endcomment = p; + break; + } + } + if (!comment) { + /* Not at start of comment. */ + p++; + printtext(start, p - start, 1); + start = p; + assert(start <= end); + continue; + } + /* If the comment has only whitespace before it on the line, + * eat that up. */ + p = comment; + while (p != start && ((ch = p[-1]) == ' ' || ch == '\t')) + p--; + if (p == start || p[-1] == '\n') { + comment = p; + /* If the comment has only whitespace after it to the end + * of the line, eat that and the newline up. This always + * happens for an inline comment on a line by itself. */ + p = endcomment; + while (p != thisend && ((ch = *p) == ' ' || ch == '\t')) + p++; + if (p != thisend && *p == '\n') + p++; + endcomment = p; + } + printtext(start, comment - start, 1); + start = endcomment; + if (start > thisend) + start = thisend; + } + if (final) + break; + file = file->next; + assert(file); + start = file->buf; + } +} |