testing/web-platform/tests/html/webappapis/atob/base64.html


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306

<!doctype html>
<!-- Originally developed by Aryeh Gregor, funded by Google.  Copyright belongs
to Google. -->
<title>atob()/btoa() tests</title>
<meta charset=utf-8>
<div id=log></div>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<script>
/**
 * btoa() as defined by the HTML5 spec, which mostly just references RFC4648.
 */
function mybtoa(s) {
    // String conversion as required by WebIDL.
    s = String(s);

    // "The btoa() method must throw an INVALID_CHARACTER_ERR exception if the
    // method's first argument contains any character whose code point is
    // greater than U+00FF."
    for (var i = 0; i < s.length; i++) {
        if (s.charCodeAt(i) > 255) {
            return "INVALID_CHARACTER_ERR";
        }
    }

    var out = "";
    for (var i = 0; i < s.length; i += 3) {
        var groupsOfSix = [undefined, undefined, undefined, undefined];
        groupsOfSix[0] = s.charCodeAt(i) >> 2;
        groupsOfSix[1] = (s.charCodeAt(i) & 0x03) << 4;
        if (s.length > i + 1) {
            groupsOfSix[1] |= s.charCodeAt(i + 1) >> 4;
            groupsOfSix[2] = (s.charCodeAt(i + 1) & 0x0f) << 2;
        }
        if (s.length > i + 2) {
            groupsOfSix[2] |= s.charCodeAt(i + 2) >> 6;
            groupsOfSix[3] = s.charCodeAt(i + 2) & 0x3f;
        }
        for (var j = 0; j < groupsOfSix.length; j++) {
            if (typeof groupsOfSix[j] == "undefined") {
                out += "=";
            } else {
                out += btoaLookup(groupsOfSix[j]);
            }
        }
    }
    return out;
}

/**
 * Lookup table for mybtoa(), which converts a six-bit number into the
 * corresponding ASCII character.
 */
function btoaLookup(idx) {
    if (idx < 26) {
        return String.fromCharCode(idx + 'A'.charCodeAt(0));
    }
    if (idx < 52) {
        return String.fromCharCode(idx - 26 + 'a'.charCodeAt(0));
    }
    if (idx < 62) {
        return String.fromCharCode(idx - 52 + '0'.charCodeAt(0));
    }
    if (idx == 62) {
        return '+';
    }
    if (idx == 63) {
        return '/';
    }
    // Throw INVALID_CHARACTER_ERR exception here -- won't be hit in the tests.
}

/**
 * Implementation of atob() according to the HTML spec, except that instead of
 * throwing INVALID_CHARACTER_ERR we return null.
 */
function myatob(input) {
    // WebIDL requires DOMStrings to just be converted using ECMAScript
    // ToString, which in our case amounts to calling String().
    input = String(input);

    // "Remove all space characters from input."
    input = input.replace(/[ \t\n\f\r]/g, "");

    // "If the length of input divides by 4 leaving no remainder, then: if
    // input ends with one or two U+003D EQUALS SIGN (=) characters, remove
    // them from input."
    if (input.length % 4 == 0 && /==?$/.test(input)) {
        input = input.replace(/==?$/, "");
    }

    // "If the length of input divides by 4 leaving a remainder of 1, throw an
    // INVALID_CHARACTER_ERR exception and abort these steps."
    //
    // "If input contains a character that is not in the following list of
    // characters and character ranges, throw an INVALID_CHARACTER_ERR
    // exception and abort these steps:
    //
    // U+002B PLUS SIGN (+)
    // U+002F SOLIDUS (/)
    // U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9)
    // U+0041 LATIN CAPITAL LETTER A to U+005A LATIN CAPITAL LETTER Z
    // U+0061 LATIN SMALL LETTER A to U+007A LATIN SMALL LETTER Z"
    if (input.length % 4 == 1
    || !/^[+/0-9A-Za-z]*$/.test(input)) {
        return null;
    }

    // "Let output be a string, initially empty."
    var output = "";

    // "Let buffer be a buffer that can have bits appended to it, initially
    // empty."
    //
    // We append bits via left-shift and or.  accumulatedBits is used to track
    // when we've gotten to 24 bits.
    var buffer = 0;
    var accumulatedBits = 0;

    // "While position does not point past the end of input, run these
    // substeps:"
    for (var i = 0; i < input.length; i++) {
        // "Find the character pointed to by position in the first column of
        // the following table. Let n be the number given in the second cell of
        // the same row."
        //
        // "Append to buffer the six bits corresponding to number, most
        // significant bit first."
        //
        // atobLookup() implements the table from the spec.
        buffer <<= 6;
        buffer |= atobLookup(input[i]);

        // "If buffer has accumulated 24 bits, interpret them as three 8-bit
        // big-endian numbers. Append the three characters with code points
        // equal to those numbers to output, in the same order, and then empty
        // buffer."
        accumulatedBits += 6;
        if (accumulatedBits == 24) {
            output += String.fromCharCode((buffer & 0xff0000) >> 16);
            output += String.fromCharCode((buffer & 0xff00) >> 8);
            output += String.fromCharCode(buffer & 0xff);
            buffer = accumulatedBits = 0;
        }

        // "Advance position by one character."
    }

    // "If buffer is not empty, it contains either 12 or 18 bits. If it
    // contains 12 bits, discard the last four and interpret the remaining
    // eight as an 8-bit big-endian number. If it contains 18 bits, discard the
    // last two and interpret the remaining 16 as two 8-bit big-endian numbers.
    // Append the one or two characters with code points equal to those one or
    // two numbers to output, in the same order."
    if (accumulatedBits == 12) {
        buffer >>= 4;
        output += String.fromCharCode(buffer);
    } else if (accumulatedBits == 18) {
        buffer >>= 2;
        output += String.fromCharCode((buffer & 0xff00) >> 8);
        output += String.fromCharCode(buffer & 0xff);
    }

    // "Return output."
    return output;
}

/**
 * A lookup table for atob(), which converts an ASCII character to the
 * corresponding six-bit number.
 */
function atobLookup(chr) {
    if (/[A-Z]/.test(chr)) {
        return chr.charCodeAt(0) - "A".charCodeAt(0);
    }
    if (/[a-z]/.test(chr)) {
        return chr.charCodeAt(0) - "a".charCodeAt(0) + 26;
    }
    if (/[0-9]/.test(chr)) {
        return chr.charCodeAt(0) - "0".charCodeAt(0) + 52;
    }
    if (chr == "+") {
        return 62;
    }
    if (chr == "/") {
        return 63;
    }
    // Throw exception; should not be hit in tests
}

function btoaException(input) {
    input = String(input);
    for (var i = 0; i < input.length; i++) {
        if (input.charCodeAt(i) > 255) {
            return true;
        }
    }
    return false;
}

function testBtoa(input) {
    // "The btoa() method must throw an INVALID_CHARACTER_ERR exception if the
    // method's first argument contains any character whose code point is
    // greater than U+00FF."
    var normalizedInput = String(input);
    for (var i = 0; i < normalizedInput.length; i++) {
        if (normalizedInput.charCodeAt(i) > 255) {
            assert_throws("InvalidCharacterError", function() { btoa(input); },
                "Code unit " + i + " has value " + normalizedInput.charCodeAt(i) + ", which is greater than 255");
            return;
        }
    }
    assert_equals(btoa(input), mybtoa(input));
    assert_equals(atob(btoa(input)), String(input), "atob(btoa(input)) must be the same as String(input)");
}

var tests = ["עברית", "", "ab", "abc", "abcd", "abcde",
    // This one is thrown in because IE9 seems to fail atob(btoa()) on it.  Or
    // possibly to fail btoa().  I actually can't tell what's happening here,
    // but it doesn't hurt.
    "\xff\xff\xc0",
    // Is your DOM implementation binary-safe?
    "\0a", "a\0b",
    // WebIDL tests.
    undefined, null, 7, 12, 1.5, true, false, NaN, +Infinity, -Infinity, 0, -0,
    {toString: function() { return "foo" }},
];
for (var i = 0; i < 258; i++) {
    tests.push(String.fromCharCode(i));
}
tests.push(String.fromCharCode(10000));
tests.push(String.fromCharCode(65534));
tests.push(String.fromCharCode(65535));

// This is supposed to be U+10000.
tests.push(String.fromCharCode(0xd800, 0xdc00));
tests = tests.map(
    function(elem) {
        var expected = mybtoa(elem);
        if (expected === "INVALID_CHARACTER_ERR") {
            return ["btoa("  + format_value(elem) + ") must raise INVALID_CHARACTER_ERR", elem];
        }
        return ["btoa(" + format_value(elem) + ") == " + format_value(mybtoa(elem)), elem];
    }
);

var everything = "";
for (var i = 0; i < 256; i++) {
    everything += String.fromCharCode(i);
}
tests.push(["btoa(first 256 code points concatenated)", everything]);

generate_tests(testBtoa, tests);

function testAtob(input) {
    var expected = myatob(input);
    if (expected === null) {
        assert_throws("InvalidCharacterError", function() { atob(input) });
        return;
    }

    assert_equals(atob(input), expected);
}

var tests = ["", "abcd", " abcd", "abcd ", " abcd===", "abcd=== ",
    "abcd ===", "a", "ab", "abc", "abcde", String.fromCharCode(0xd800, 0xdc00),
    "=", "==", "===", "====", "=====",
    "a=", "a==", "a===", "a====", "a=====",
    "ab=", "ab==", "ab===", "ab====", "ab=====",
    "abc=", "abc==", "abc===", "abc====", "abc=====",
    "abcd=", "abcd==", "abcd===", "abcd====", "abcd=====",
    "abcde=", "abcde==", "abcde===", "abcde====", "abcde=====",
    "=a", "=a=", "a=b", "a=b=", "ab=c", "ab=c=", "abc=d", "abc=d=",
    // With whitespace
    "ab\tcd", "ab\ncd", "ab\fcd", "ab\rcd", "ab cd", "ab\u00a0cd",
    "ab\t\n\f\r cd", " \t\n\f\r ab\t\n\f\r cd\t\n\f\r ",
    "ab\t\n\f\r =\t\n\f\r =\t\n\f\r ",
    // Test if any bits are set at the end.  These should all be fine, since
    // they end with A, which becomes 0:
    "A", "/A", "//A", "///A", "////A",
    // These are all bad, since they end in / (= 63, all bits set) but their
    // length isn't a multiple of four characters, so they can't be output by
    // btoa().  Thus one might expect some UAs to throw exceptions or otherwise
    // object, since they could never be output by btoa(), so they're good to
    // test.
    "/", "A/", "AA/", "AAAA/",
    // But this one is possible:
    "AAA/",
    // Binary-safety tests
    "\0nonsense", "abcd\0nonsense",
    // WebIDL tests
    undefined, null, 7, 12, 1.5, true, false, NaN, +Infinity, -Infinity, 0, -0,
    {toString: function() { return "foo" }},
    {toString: function() { return "abcd" }},
];
tests = tests.map(
    function(elem) {
        if (myatob(elem) === null) {
            return ["atob(" + format_value(elem) + ") must raise InvalidCharacterError", elem];
        }
        return ["atob(" + format_value(elem) + ") == " + format_value(myatob(elem)), elem];
    }
);

generate_tests(testAtob, tests);
</script>