summaryrefslogtreecommitdiffstats
path: root/testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c')
-rw-r--r--testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c560
1 files changed, 560 insertions, 0 deletions
diff --git a/testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c b/testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c
new file mode 100644
index 000000000..9f9103386
--- /dev/null
+++ b/testing/web-platform/tests/resources/webidl2/test/widlproc/src/lex.c
@@ -0,0 +1,560 @@
+/***********************************************************************
+ * $Id$
+ * Copyright 2009 Aplix Corporation. All rights reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***********************************************************************/
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include "lex.h"
+#include "misc.h"
+#include "node.h"
+#include "process.h"
+
+struct file {
+ struct file *next;
+ const char *filename;
+ char *buf;
+ const char *pos, *end;
+ unsigned int linenum;
+};
+
+const char keywords[] = KEYWORDS;
+
+static struct file *file, *firstfile;
+static struct tok tok;
+
+/***********************************************************************
+ * readinput : read all input files into memory
+ *
+ * Enter: argv = 0-terminated array of filenames
+ */
+void
+readinput(const char *const *argv)
+{
+ struct file **pfile = &file;
+ for (;;) {
+ struct file *file;
+ const char *filename = *argv++;
+ char *buf = 0;
+ int len = 0, thislen, isstdin;
+ FILE *handle;
+ if (!filename)
+ break;
+ /* Read the file. */
+ isstdin = !strcmp(filename, "-");
+ if (isstdin) {
+ handle = stdin;
+ filename = "<stdin>";
+ } else {
+ handle = fopen(filename, "rb");
+ if (!handle)
+ errorexit("%s: %s", filename, strerror(errno));
+ }
+ for (;;) {
+ thislen = len ? len * 2 : 4096;
+ buf = memrealloc(buf, len + thislen + 1);
+ thislen = fread(buf + len, 1, thislen, handle);
+ if (!thislen)
+ break;
+ len += thislen;
+ }
+ if (ferror(handle))
+ errorexit("%s: I/O error", filename);
+ if (!isstdin)
+ fclose(handle);
+ buf[len] = 0;
+ buf = memrealloc(buf, len + 1);
+ /* Create the file struct for it. */
+ file = memalloc(sizeof(struct file));
+ *pfile = file;
+ pfile = &file->next;
+ file->filename = filename;
+ file->pos = file->buf = buf;
+ file->end = buf + len;
+ file->linenum = 1;
+ }
+ *pfile = 0;
+ firstfile = file;
+}
+
+/***********************************************************************
+ * lexerrorexit : error and exit with line number
+ */
+static void
+lexerrorexit(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ vlocerrorexit(file->filename, file->linenum, format, ap);
+ va_end(ap);
+}
+
+/***********************************************************************
+ * lexblockcomment : lex a block comment
+ *
+ * Enter: start = start of comment
+ *
+ * Return: tok struct, lifetime until next call to lex
+ */
+static struct tok *
+lexblockcomment(const char *start)
+{
+ const char *p = start + 1;
+ tok.filename = file->filename;
+ tok.linenum = file->linenum;
+ for (;;) {
+ int ch = *++p;
+ if (!ch)
+ lexerrorexit("unterminated block comment");
+ if (ch != '*') {
+ if (ch == '\n')
+ file->linenum++;
+ continue;
+ }
+ ch = p[1];
+ if (!ch)
+ lexerrorexit("unterminated block comment");
+ if (ch == '/')
+ break;
+ }
+ p += 2;
+ file->pos = p;
+ tok.type = TOK_BLOCKCOMMENT;
+ tok.start = start + 2;
+ tok.len = p - start - 4;
+ return &tok;
+}
+
+/***********************************************************************
+ * lexinlinecomment : lex an inline comment
+ *
+ * Enter: start = start of comment, starts with "//"
+ *
+ * Return: tok struct, lifetime until next call to lex
+ */
+static struct tok *
+lexinlinecomment(const char *start)
+{
+ const char *p = start + 2;
+ p = start + 1;
+ for (;;) {
+ int ch = *++p;
+ if (!ch || ch == '\n')
+ break;
+ }
+ p++;
+ file->pos = p;
+ tok.type = TOK_INLINECOMMENT;
+ tok.start = start + 2;
+ tok.len = p - start - 2;
+ tok.filename = file->filename;
+ tok.linenum = file->linenum++;
+ return &tok;
+}
+
+/***********************************************************************
+ * lexnumber : lex a number (or just a '-' symbol)
+ *
+ * Enter: start = start of token
+ *
+ * Return: tok struct, lifetime until next call to lex
+ *
+ * The IDL grammar seems to say that a float can't start with a
+ * decimal point, so that's what we have implemented here.
+ */
+static struct tok *
+lexnumber(const char *start)
+{
+ for (;;) {
+ const char *p = start;
+ const char *octalend = start;
+ int ch = *p;
+ enum { STATE_START, STATE_INT, STATE_HEX, STATE_OCTAL, STATE_BADOCTAL,
+ STATE_DP, STATE_EXPSTART, STATE_EXPSIGN, STATE_EXP
+ } state = STATE_START;
+ if (ch == '-') {
+ ch = *++p;
+ if (ch == 'I') { // starts of Infinity
+ char * infinity = "-Infinity";
+ unsigned int len = strlen(infinity);
+ if (!memcmp(start, infinity, len)) {
+ tok.type = TOK_minusinfinity;
+ tok.start = start;
+ tok.len = len;
+ tok.filename = file->filename;
+ tok.linenum = file->linenum;
+ file->pos = start + len;
+ return &tok;
+ }
+ }
+ }
+ if (ch == '0') {
+ state = STATE_OCTAL;
+ ch = *++p;
+ if ((ch & ~0x20) == 'X') {
+ state = STATE_HEX;
+ ch = *++p;
+ }
+ }
+
+ for (;;) {
+ if ((unsigned)(ch - '0') >= 8) {
+ if ((ch & -2) == '8') {
+ if (state == STATE_OCTAL) {
+ state = STATE_BADOCTAL;
+ octalend = p;
+ }
+ } else if ((unsigned)((ch & ~0x20) - 'A') <= 'F' - 'A') {
+ if (state != STATE_HEX) {
+ if ((ch & ~0x20) != 'E')
+ break;
+ if (state == STATE_HEX || state >= STATE_EXPSTART || state == STATE_START)
+ break;
+ state = STATE_EXPSTART;
+ }
+ } else if (ch == '.') {
+ if (state == STATE_HEX || state >= STATE_DP)
+ break;
+ state = STATE_DP;
+ } else if (ch == '-') {
+ if (state != STATE_EXPSTART)
+ break;
+ state = STATE_EXPSIGN;
+ } else
+ break;
+ }
+ ch = *++p;
+ if (state == STATE_START)
+ state = STATE_INT;
+ else if (state == STATE_EXPSTART || state == STATE_EXPSIGN)
+ state = STATE_EXP;
+ }
+ switch (state) {
+ case STATE_START:
+ /* Must have just been a - character by itself. */
+ tok.type = '-';
+ p = start + 1;
+ break;
+ case STATE_BADOCTAL:
+ p = octalend;
+ /* fall through... */
+ case STATE_INT:
+ case STATE_OCTAL:
+ tok.type = TOK_INTEGER;
+ break;
+ case STATE_HEX:
+ if (p - start == 2 || (p - start == 3 && *start == '-'))
+ p = start + 1;
+ tok.type = TOK_INTEGER;
+ break;
+ case STATE_EXP:
+ case STATE_DP:
+ tok.type = TOK_FLOAT;
+ break;
+ case STATE_EXPSIGN:
+ p--;
+ /* fall through... */
+ case STATE_EXPSTART:
+ p--;
+ tok.type = TOK_FLOAT;
+ break;
+ }
+ tok.start = start;
+ tok.len = p - start;
+ tok.filename = file->filename;
+ tok.linenum = file->linenum;
+ file->pos = p;
+ return &tok;
+ }
+}
+
+/***********************************************************************
+ * lexstring : lex a quoted string
+ *
+ * Enter: start = start of token
+ *
+ * Return: tok struct, lifetime until next call to lex
+ */
+static struct tok *
+lexstring(const char *start)
+{
+ for (;;) {
+ const char *p = start + 1;
+ int ch = *p;
+ for (;;) {
+ if (!ch || ch == '\n')
+ lexerrorexit("unterminated string");
+ if (ch == '"') {
+ tok.type = TOK_STRING;
+ tok.start = start + 1;
+ tok.len = p - start - 1;
+ tok.filename = file->filename;
+ tok.linenum = file->linenum;
+ file->pos = p + 1;
+ return &tok;
+ }
+ /* Note the IDL spec doesn't seem to allow for escape sequences
+ * in strings. */
+ ch = *++p;
+ }
+ }
+}
+
+/***********************************************************************
+ * lexidentifier : lex an identifier
+ *
+ * Enter: start = start of token
+ *
+ * Return: tok struct, lifetime until next call to lex
+ */
+static struct tok *
+lexidentifier(const char *start)
+{
+ const char *p = start + 1;
+ for (;;) {
+ int ch = *p;
+ if (ch != '_' && (unsigned)(ch - '0') >= 10
+ && (unsigned)((ch & ~0x20) - 'A') > 'Z' - 'A')
+ {
+ break;
+ }
+ p++;
+ }
+ tok.type = TOK_IDENTIFIER;
+ tok.start = start;
+ tok.len = p - start;
+ tok.filename = file->filename;
+ tok.linenum = file->linenum;
+ file->pos = p;
+ /* See if this is a keyword. (This search is a bit n-squared.) */
+ {
+ unsigned int type = TOK_DOMString;
+ p = keywords;
+ for (;;) {
+ unsigned int len = strlen(p);
+ if (!len)
+ break;
+ if (len == tok.len && !memcmp(start, p, len)) {
+ tok.type = type;
+ break;
+ }
+ p += len + 1;
+ type++;
+ }
+ }
+ return &tok;
+}
+
+/***********************************************************************
+ * lex : retrieve next token
+ *
+ * Return: tok struct, lifetime until next call to lex
+ */
+struct tok *
+lex(void)
+{
+ const char *p;
+ int ch;
+ for (;;) {
+ if (!file) {
+ tok.type = TOK_EOF;
+ tok.start = "end of file";
+ tok.len = strlen(tok.start);
+ return &tok;
+ }
+ tok.prestart = p = file->pos;
+ /* Flush whitespace. */
+ for (;;) {
+ ch = *p++;
+ switch (ch) {
+ case ' ':
+ case '\t':
+ case '\r':
+ continue;
+ case '\n':
+ ++file->linenum;
+ tok.prestart = p;
+ continue;
+ }
+ break;
+ }
+ p--;
+ if (ch)
+ break;
+ if (p != file->end)
+ lexerrorexit("\\0 byte not allowed");
+ file = file->next;
+ }
+ /* See if we have a comment. */
+ tok.start = p;
+ if (ch == '/') {
+ switch (*++p) {
+ case '*':
+ return lexblockcomment(p - 1);
+ case '/':
+ return lexinlinecomment(p - 1);
+ }
+ tok.type = '/';
+ } else {
+ /* Handle things that start with '-', which is either '-' as a token,
+ * or a number. Handle numbers. */
+ if (ch == '-' || (unsigned)(ch - '0') < 10)
+ return lexnumber(p);
+ /* Handle string. */
+ if (ch == '"')
+ return lexstring(p);
+ /* Handle identifier. */
+ if (ch == '_' || (unsigned)((ch & ~0x20) - 'A') <= 'Z' - 'A')
+ return lexidentifier(p);
+ /* The only multi-symbol token are ... and [] */
+ if (ch == '.') {
+ tok.type = '.';
+ if (*++p == '.' && p[1] == '.') {
+ tok.type = TOK_ELLIPSIS;
+ p += 2;
+ }
+ goto done;
+ }
+ if (ch == '[') {
+ tok.type = '[';
+ if (*++p == ']') {
+ tok.type = TOK_DOUBLEBRACKET;
+ p++;
+ }
+ goto done;
+ }
+ }
+ /* Single symbol token. */
+ tok.type = ch;
+ p++;
+done:
+ tok.filename = file->filename;
+ tok.linenum = file->linenum;
+ tok.len = p - tok.start;
+ file->pos = p;
+ return &tok;
+}
+
+/***********************************************************************
+ * outputwidl : output literal Web IDL input that node was parsed from
+ *
+ * Enter: node = parse node to output literal Web IDL for
+ */
+void
+outputwidl(struct node *node)
+{
+ const char *start = node->wsstart, *end = node->end;
+ /* Find the file that start is in. */
+ struct file *file = firstfile;
+ while (start < file->buf || start >= file->end) {
+ file = file->next;
+ assert(file);
+ }
+ /* Find the (current or) next node that has node->start set. Any such
+ * node needs to be put inside a <ref> element. */
+ while (node && !node->start)
+ node = nodewalk(node);
+ /* Output until we get to the end. This has to cope with the text
+ * spanning multiple input files. */
+ for (;;) {
+ int final = end >= file->buf && end <= file->end;
+ const char *thisend = final ? end : file->end;
+ /* Output the Web IDL, omitting comments. */
+ while (start != end) {
+ const char *p, *p2, *comment, *endcomment;
+ int ch;
+ if (node && start == node->start) {
+ /* We are on the start of the present node in the tree
+ * walk. Put it in a <ref>. */
+ fputs("<ref>", stdout);
+ printtext(node->start, node->end - node->start, 1);
+ fputs("</ref>", stdout);
+ start = node->end;
+ /* Skip to the next node with node->start set if any. */
+ do
+ node = nodewalk(node);
+ while (node && !node->start);
+ continue;
+ }
+ p2 = thisend;
+ if (node && node->start >= file->buf && node->start < p2)
+ p2 = node->start;
+ p = memchr(start, '/', p2 - start);
+ if (!p) {
+ printtext(start, p2 - start, 1);
+ if (p2 != thisend) {
+ start = p2;
+ continue;
+ }
+ break;
+ }
+ /* See if we're at the start of a comment. If so find the end. */
+ comment = 0;
+ if (p + 1 != thisend) {
+ switch (p[1]) {
+ case '*':
+ /* Block comment. */
+ comment = p;
+ p++;
+ do
+ p = memchr(p + 1, '*', thisend - p - 1);
+ while (p[1] != '/');
+ endcomment = p + 2;
+ break;
+ case '/':
+ /* Inline comment. */
+ comment = p;
+ p = memchr(p, '\n', thisend - p);
+ if (!p)
+ p = thisend;
+ endcomment = p;
+ break;
+ }
+ }
+ if (!comment) {
+ /* Not at start of comment. */
+ p++;
+ printtext(start, p - start, 1);
+ start = p;
+ assert(start <= end);
+ continue;
+ }
+ /* If the comment has only whitespace before it on the line,
+ * eat that up. */
+ p = comment;
+ while (p != start && ((ch = p[-1]) == ' ' || ch == '\t'))
+ p--;
+ if (p == start || p[-1] == '\n') {
+ comment = p;
+ /* If the comment has only whitespace after it to the end
+ * of the line, eat that and the newline up. This always
+ * happens for an inline comment on a line by itself. */
+ p = endcomment;
+ while (p != thisend && ((ch = *p) == ' ' || ch == '\t'))
+ p++;
+ if (p != thisend && *p == '\n')
+ p++;
+ endcomment = p;
+ }
+ printtext(start, comment - start, 1);
+ start = endcomment;
+ if (start > thisend)
+ start = thisend;
+ }
+ if (final)
+ break;
+ file = file->next;
+ assert(file);
+ start = file->buf;
+ }
+}