summaryrefslogtreecommitdiffstats
path: root/dom/webidl/CSSLexer.webidl
blob: 86fbfcfffe735be990a689387f8619133564f936 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// The possible values for CSSToken.tokenType.
enum CSSTokenType {
  // Whitespace.
  "whitespace",
  // A CSS comment.
  "comment",
  // An identifier.  |text| holds the identifier text.
  "ident",
  // A function token.  |text| holds the function name.  Note that the
  // function token includes (i.e., consumes) the "(" -- but this is
  // not included in |text|.
  "function",
  // "@word".  |text| holds "word", without the "@".
  "at",
  // "#word".  |text| holds "word", without the "#".
  "id",
  // "#word".  ID is used when "word" would have been a valid IDENT
  // token without the "#"; otherwise, HASH is used.
  "hash",
  // A number.
  "number",
  // A dimensioned number.
  "dimension",
  // A percentage.
  "percentage",
  // A string.
  "string",
  // A "bad string".  This can only be returned when a string is
  // unterminated at EOF.  (However, currently the lexer returns
  // ordinary STRING tokens in this situation.)
  "bad_string",
  // A URL.  |text| holds the URL.
  "url",
  // A "bad URL".  This is a URL that is unterminated at EOF.  |text|
  // holds the URL.
  "bad_url",
  // A "symbol" is any one-character symbol.  This corresponds to the
  // DELIM token in the CSS specification.
  "symbol",
  // The "~=" token.
  "includes",
  // The "|=" token.
  "dashmatch",
  // The "^=" token.
  "beginsmatch",
  // The "$=" token.
  "endsmatch",
  // The "*=" token.
  "containsmatch",
  // A unicode-range token.  This is currently not fully represented
  // by CSSToken.
  "urange",
  // HTML comment delimiters, either "<!--" or "-->".  Note that each
  // is emitted as a separate token, and the intervening text is lexed
  // as normal; whereas ordinary CSS comments are lexed as a unit.
  "htmlcomment"
};

dictionary CSSToken {
  // The token type.
  CSSTokenType tokenType = "whitespace";

  // Offset of the first character of the token.
  unsigned long startOffset = 0;
  // Offset of the character after the final character of the token.
  // This is chosen so that the offsets can be passed to |substring|
  // to yield the exact contents of the token.
  unsigned long endOffset = 0;

  // If the token is a number, percentage, or dimension, this holds
  // the value.  This is not present for other token types.
  double number;
  // If the token is a number, percentage, or dimension, this is true
  // iff the number had an explicit sign.  This is not present for
  // other token types.
  boolean hasSign;
  // If the token is a number, percentage, or dimension, this is true
  // iff the number was specified as an integer.  This is not present
  // for other token types.
  boolean isInteger;

  // Text associated with the token.  This is not present for all
  // token types.  In particular it is:
  //
  // Token type    Meaning
  // ===============================
  //    ident      The identifier.
  //    function   The function name.  Note that the "(" is part
  //               of the token but is not present in |text|.
  //    at         The word.
  //    id         The word.
  //    hash       The word.
  //    dimension  The dimension.
  //    string     The string contents after escape processing.
  //    bad_string Ditto.
  //    url        The URL after escape processing.
  //    bad_url    Ditto.
  //    symbol     The symbol text.
  DOMString text;
};

/**
 * CSSLexer is an interface to the CSS lexer.  It tokenizes an
 * input stream and returns CSS tokens.
 *
 * @see inIDOMUtils.getCSSLexer to create an instance of the lexer.
 */
[ChromeOnly]
interface CSSLexer
{
  /**
   * The line number of the most recently returned token.  Line
   * numbers are 0-based.
   */
  readonly attribute unsigned long lineNumber;

  /**
   * The column number of the most recently returned token.  Column
   * numbers are 0-based.
   */
  readonly attribute unsigned long columnNumber;

  /**
   * When EOF is reached, the last token might be unterminated in some
   * ways.  This method takes an input string and appends the needed
   * terminators.  In particular:
   *
   * 1. If EOF occurs mid-string, this will append the correct quote.
   * 2. If EOF occurs in a url token, this will append the close paren.
   * 3. If EOF occurs in a comment this will append the comment closer.
   *
   * A trailing backslash might also have been present in the input
   * string.  This is handled in different ways, depending on the
   * context and arguments.
   *
   * If preserveBackslash is true, then the existing backslash at the
   * end of inputString is preserved, and a new backslash is appended.
   * That is, the input |\| is transformed to |\\|, and the
   * input |'\| is transformed to |'\\'|.
   *
   * Otherwise, preserveBackslash is false:
   * If the backslash appears in a string context, then the trailing
   * backslash is dropped from inputString.  That is, |"\| is
   * transformed to |""|.
   * If the backslash appears outside of a string context, then
   * U+FFFD is appended.  That is, |\| is transformed to a string
   * with two characters: backslash followed by U+FFFD.
   *
   * Passing false for preserveBackslash makes the result conform to
   * the CSS Syntax specification.  However, passing true may give
   * somewhat more intuitive behavior.
   *
   * @param inputString the input string
   * @param preserveBackslash how to handle trailing backslashes
   * @return the input string with the termination characters appended
   */
  DOMString performEOFFixup(DOMString inputString, boolean preserveBackslash);

  /**
   * Return the next token, or null at EOF.
   */
  CSSToken? nextToken();
};