summaryrefslogtreecommitdiffstats
path: root/js/src/irregexp/RegExpCharacters.cpp
blob: 096c027605fc877f5030efbbc01ea24ee8e08911 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/* Generated by make_unicode.py DO NOT MODIFY */
/* Unicode version: 9.0.0 */
#include "irregexp/RegExpCharacters.h"

#include "mozilla/Assertions.h"

char16_t
js::irregexp::ConvertNonLatin1ToLatin1(char16_t c, bool unicode)
{
    MOZ_ASSERT(c > 0xFF, "Character mustn't be Latin1");
    if (unicode) {
        // "LATIN SMALL LETTER LONG S" case folds to "LATIN SMALL LETTER S".
        if (c == 0x017F)
            return 0x73;
        // "LATIN CAPITAL LETTER SHARP S" case folds to "LATIN SMALL LETTER SHARP S".
        if (c == 0x1E9E)
            return 0xDF;
        // "KELVIN SIGN" case folds to "LATIN SMALL LETTER K".
        if (c == 0x212A)
            return 0x6B;
        // "ANGSTROM SIGN" case folds to "LATIN SMALL LETTER A WITH RING ABOVE".
        if (c == 0x212B)
            return 0xE5;
    }

    // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
    // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
    if (c == 0x039C || c == 0x03BC)
        return 0xB5;
    // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER Y WITH DIAERESIS".
    if (c == 0x0178)
        return 0xFF;
    return 0;
}

const int js::irregexp::kSpaceRanges[] = {
    0x0009, 0x000D + 1, // CHARACTER TABULATION..CARRIAGE RETURN (CR)
    0x0020, 0x0020 + 1, // SPACE
    0x00A0, 0x00A0 + 1, // NO-BREAK SPACE
    0x1680, 0x1680 + 1, // OGHAM SPACE MARK
    0x2000, 0x200A + 1, // EN QUAD..HAIR SPACE
    0x2028, 0x2029 + 1, // LINE SEPARATOR..PARAGRAPH SEPARATOR
    0x202F, 0x202F + 1, // NARROW NO-BREAK SPACE
    0x205F, 0x205F + 1, // MEDIUM MATHEMATICAL SPACE
    0x3000, 0x3000 + 1, // IDEOGRAPHIC SPACE
    0xFEFF, 0xFEFF + 1, // ZERO WIDTH NO-BREAK SPACE
    0xFFFF + 1
};
const int js::irregexp::kSpaceRangeCount = 21;

const int js::irregexp::kSpaceAndSurrogateRanges[] = {
    0x0009, 0x000D + 1, // CHARACTER TABULATION..CARRIAGE RETURN (CR)
    0x0020, 0x0020 + 1, // SPACE
    0x00A0, 0x00A0 + 1, // NO-BREAK SPACE
    0x1680, 0x1680 + 1, // OGHAM SPACE MARK
    0x2000, 0x200A + 1, // EN QUAD..HAIR SPACE
    0x2028, 0x2029 + 1, // LINE SEPARATOR..PARAGRAPH SEPARATOR
    0x202F, 0x202F + 1, // NARROW NO-BREAK SPACE
    0x205F, 0x205F + 1, // MEDIUM MATHEMATICAL SPACE
    0x3000, 0x3000 + 1, // IDEOGRAPHIC SPACE
    0xD800, 0xDFFF + 1, // <Lead Surrogate Min>..<Trail Surrogate Max>
    0xFEFF, 0xFEFF + 1, // ZERO WIDTH NO-BREAK SPACE
    0xFFFF + 1
};
const int js::irregexp::kSpaceAndSurrogateRangeCount = 23;

const int js::irregexp::kWordRanges[] = {
    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
    0x0041, 0x005A + 1, // LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
    0x005F, 0x005F + 1, // LOW LINE
    0x0061, 0x007A + 1, // LATIN SMALL LETTER A..LATIN SMALL LETTER Z
    0xFFFF + 1
};
const int js::irregexp::kWordRangeCount = 9;

const int js::irregexp::kIgnoreCaseWordRanges[] = {
    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
    0x0041, 0x005A + 1, // LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
    0x005F, 0x005F + 1, // LOW LINE
    0x0061, 0x007A + 1, // LATIN SMALL LETTER A..LATIN SMALL LETTER Z
    0x017F, 0x017F + 1, // LATIN SMALL LETTER LONG S
    0x212A, 0x212A + 1, // KELVIN SIGN
    0xFFFF + 1
};
const int js::irregexp::kIgnoreCaseWordRangeCount = 13;

const int js::irregexp::kWordAndSurrogateRanges[] = {
    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
    0x0041, 0x005A + 1, // LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
    0x005F, 0x005F + 1, // LOW LINE
    0x0061, 0x007A + 1, // LATIN SMALL LETTER A..LATIN SMALL LETTER Z
    0xD800, 0xDFFF + 1, // <Lead Surrogate Min>..<Trail Surrogate Max>
    0xFFFF + 1
};
const int js::irregexp::kWordAndSurrogateRangeCount = 11;

const int js::irregexp::kNegatedIgnoreCaseWordAndSurrogateRanges[] = {
    0x0000, 0x002F + 1, // NULL..SOLIDUS
    0x003A, 0x0040 + 1, // COLON..COMMERCIAL AT
    0x005B, 0x005E + 1, // LEFT SQUARE BRACKET..CIRCUMFLEX ACCENT
    0x0060, 0x0060 + 1, // GRAVE ACCENT
    0x007B, 0x017E + 1, // LEFT CURLY BRACKET..LATIN SMALL LETTER Z WITH CARON
    0x0180, 0x2129 + 1, // LATIN SMALL LETTER B WITH STROKE..TURNED GREEK SMALL LETTER IOTA
    0x212B, 0xD7FF + 1, // ANGSTROM SIGN..<Unused>
    0xE000, 0xFFFF + 1, // Private Use..<Unused>
    0xFFFF + 1
};
const int js::irregexp::kNegatedIgnoreCaseWordAndSurrogateRangeCount = 17;

const int js::irregexp::kDigitRanges[] = {
    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
    0xFFFF + 1
};
const int js::irregexp::kDigitRangeCount = 3;

const int js::irregexp::kDigitAndSurrogateRanges[] = {
    0x0030, 0x0039 + 1, // DIGIT ZERO..DIGIT NINE
    0xD800, 0xDFFF + 1, // <Lead Surrogate Min>..<Trail Surrogate Max>
    0xFFFF + 1
};
const int js::irregexp::kDigitAndSurrogateRangeCount = 5;

const int js::irregexp::kSurrogateRanges[] = {
    0xD800, 0xDFFF + 1, // <Lead Surrogate Min>..<Trail Surrogate Max>
    0xFFFF + 1
};
const int js::irregexp::kSurrogateRangeCount = 3;

const int js::irregexp::kLineTerminatorRanges[] = {
    0x000A, 0x000A + 1, // LINE FEED (LF)
    0x000D, 0x000D + 1, // CARRIAGE RETURN (CR)
    0x2028, 0x2029 + 1, // LINE SEPARATOR..PARAGRAPH SEPARATOR
    0xFFFF + 1
};
const int js::irregexp::kLineTerminatorRangeCount = 7;