diff options
Diffstat (limited to 'testing/web-platform/tests/encoding')
25 files changed, 1452 insertions, 0 deletions
diff --git a/testing/web-platform/tests/encoding/OWNERS b/testing/web-platform/tests/encoding/OWNERS new file mode 100644 index 000000000..4917e2665 --- /dev/null +++ b/testing/web-platform/tests/encoding/OWNERS @@ -0,0 +1,2 @@ +@inexorabletash +@sideshowbarker diff --git a/testing/web-platform/tests/encoding/api-basics.html b/testing/web-platform/tests/encoding/api-basics.html new file mode 100644 index 000000000..83670ce7d --- /dev/null +++ b/testing/web-platform/tests/encoding/api-basics.html @@ -0,0 +1,58 @@ +<!DOCTYPE html> +<title>Encoding API: Basics</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +test(function() { + assert_equals((new TextEncoder).encoding, 'utf-8', 'default encoding is utf-8'); + assert_equals((new TextDecoder).encoding, 'utf-8', 'default encoding is utf-8'); +}, 'Default encodings'); + +test(function() { + assert_array_equals(new TextEncoder().encode(), [], 'input default should be empty string') + assert_array_equals(new TextEncoder().encode(undefined), [], 'input default should be empty string') +}, 'Default inputs'); + + +function testDecodeSample(encoding, string, bytes) { + test(function() { + assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string); + assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string); + }, 'Decode sample: ' + encoding); +} + +// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34), +// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD) +// byte-swapped BOM (non-character U+FFFE) +var sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE'; + +test(function() { + var encoding = 'utf-8'; + var string = sample; + var bytes = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE]; + var encoded = new TextEncoder().encode(string); + assert_array_equals([].slice.call(encoded), bytes); + assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string); + assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string); +}, 'Encode/decode round trip: utf-8'); + +testDecodeSample( + 'utf-16le', + sample, + [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF] +); + +testDecodeSample( + 'utf-16be', + sample, + [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE] +); + +testDecodeSample( + 'utf-16', + sample, + [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF] +); + +</script> diff --git a/testing/web-platform/tests/encoding/api-invalid-label.html b/testing/web-platform/tests/encoding/api-invalid-label.html new file mode 100644 index 000000000..f15c184aa --- /dev/null +++ b/testing/web-platform/tests/encoding/api-invalid-label.html @@ -0,0 +1,30 @@ +<!DOCTYPE html> +<title>Encoding API: invalid label</title> +<meta name="timeout" content="long"> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script src="resources/encodings.js"></script> +<script> +var tests = ["invalid-invalidLabel"]; +setup(function() { + encodings_table.forEach(function(section) { + section.encodings.filter(function(encoding) { + return encoding.name !== 'replacement'; + }).forEach(function(encoding) { + encoding.labels.forEach(function(label) { + ["\u0000", "\u000b", "\u00a0", "\u2028", "\u2029"].forEach(function(ws) { + tests.push(ws + label); + tests.push(label + ws); + tests.push(ws + label + ws); + }); + }); + }); + }); +}); + +tests.forEach(function(input) { + test(function() { + assert_throws(new RangeError(), function() { new TextDecoder(input); }); + }, 'Invalid label ' + format_value(input) + ' should be rejected by TextDecoder.'); +}); +</script> diff --git a/testing/web-platform/tests/encoding/api-replacement-encodings.html b/testing/web-platform/tests/encoding/api-replacement-encodings.html new file mode 100644 index 000000000..2dffd72e7 --- /dev/null +++ b/testing/web-platform/tests/encoding/api-replacement-encodings.html @@ -0,0 +1,24 @@ +<!DOCTYPE html> +<title>Encoding API: replacement encoding</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script src="resources/encodings.js"></script> +<script> + +test(function() { + assert_throws(new RangeError(), function() { new TextDecoder('replacement'); }); +}, 'The "replacement" label should not be a known encoding.'); + +encodings_table.forEach(function(section) { + section.encodings.filter(function(encoding) { + return encoding.name === 'replacement'; + }).forEach(function(encoding) { + encoding.labels.forEach(function(label) { + test(function() { + assert_throws(new RangeError(), function() { new TextDecoder(label); }); + }, 'Label for "replacement" should be rejected by API: ' + label); + }); + }); +}); + +</script> diff --git a/testing/web-platform/tests/encoding/api-surrogates-utf8.html b/testing/web-platform/tests/encoding/api-surrogates-utf8.html new file mode 100644 index 000000000..ef0ad4075 --- /dev/null +++ b/testing/web-platform/tests/encoding/api-surrogates-utf8.html @@ -0,0 +1,54 @@ +<!DOCTYPE html> +<title>Encoding API: Invalid UTF-16 surrogates with UTF-8 encoding</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +var badStrings = [ + { + input: 'abc123', + expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33], + decoded: 'abc123', + name: 'Sanity check' + }, + { + input: '\uD800', + expected: [0xef, 0xbf, 0xbd], + decoded: '\uFFFD', + name: 'Surrogate half (low)' + }, + { + input: '\uDC00', + expected: [0xef, 0xbf, 0xbd], + decoded: '\uFFFD', + name: 'Surrogate half (high)' + }, + { + input: 'abc\uD800123', + expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33], + decoded: 'abc\uFFFD123', + name: 'Surrogate half (low), in a string' + }, + { + input: 'abc\uDC00123', + expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33], + decoded: 'abc\uFFFD123', + name: 'Surrogate half (high), in a string' + }, + { + input: '\uDC00\uD800', + expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd], + decoded: '\uFFFD\uFFFD', + name: 'Wrong order' + } +]; + +badStrings.forEach(function(t) { + test(function() { + var encoded = new TextEncoder().encode(t.input); + assert_array_equals([].slice.call(encoded), t.expected); + assert_equals(new TextDecoder('utf-8').decode(encoded), t.decoded); + }, 'Invalid surrogates encoded into UTF-8: ' + t.name); +}); + +</script> diff --git a/testing/web-platform/tests/encoding/big5-encoder.html b/testing/web-platform/tests/encoding/big5-encoder.html new file mode 100644 index 000000000..7260b6b15 --- /dev/null +++ b/testing/web-platform/tests/encoding/big5-encoder.html @@ -0,0 +1,33 @@ +<!doctype html> +<meta charset=big5> <!-- test breaks if the server overrides this --> +<script src=/resources/testharness.js></script> +<script src=/resources/testharnessreport.js></script> +<div id=log></div> +<script> + function encode(input, output, desc) { + test(function() { + var a = document.createElement("a"); // <a> uses document encoding for URL's query + // Append and prepend X to test for off-by-one errors + a.href = "https://example.com/?X" + input + "X"; + assert_equals(a.search.substr(1), "X" + output + "X"); // remove leading "?" + }, "big5 encoder: " + desc); + } + + encode("ab", "ab", "very basic") + // edge cases + encode("\u9EA6", "%26%2340614%3B", "Highest-pointer BMP character excluded from encoder"); + encode("\uD858\uDE6B", "%26%23156267%3B", "Highest-pointer character excluded from encoder"); + encode("\u3000", "%A1@", "Lowest-pointer character included in encoder"); + encode("\u20AC", "%A3%E1", "Euro; the highest-pointer character before a range of 30 unmapped pointers"); + encode("\u4E00", "%A4@", "The lowest-pointer character after the range of 30 unmapped pointers"); + encode("\uD85D\uDE07", "%C8%A4", "The highest-pointer character before a range of 41 unmapped pointers"); + encode("\uFFE2", "%C8%CD", "The lowest-pointer character after the range of 41 unmapped pointers"); + encode("\u79D4", "%FE%FE", "The last character in the index"); + // not in index + encode("\u2603", "%26%239731%3B", "The canonical BMP test character that is not in the index"); + encode("\uD83D\uDCA9", "%26%23128169%3B", "The canonical astral test character that is not in the index"); + // duplicate low bits + encode("\uD840\uDFB5", "%FDj", "A Plane 2 character whose low 16 bits match a BMP character that has a lower pointer"); + // prefer last + encode("\u2550", "%F9%F9", "A duplicate-mapped code point that prefers the highest pointer in the encoder"); +</script> diff --git a/testing/web-platform/tests/encoding/gb18030-encoder.html b/testing/web-platform/tests/encoding/gb18030-encoder.html new file mode 100644 index 000000000..799d69274 --- /dev/null +++ b/testing/web-platform/tests/encoding/gb18030-encoder.html @@ -0,0 +1,21 @@ +<!doctype html> +<meta charset=gb18030> <!-- if the server overrides this, it is stupid, as this is a testsuite --> +<script src=/resources/testharness.js></script> +<script src=/resources/testharnessreport.js></script> +<div id=log></div> +<script> + function encode(input, output, desc) { + test(function() { + var a = document.createElement("a") // <a> uses document encoding for URL's query + a.href = "https://example.com/?" + input + assert_equals(a.search.substr(1), output) // remove leading "?" + }, "gb18030 encoder: " + desc) + } + + encode("s", "s", "very basic") + encode("\u20AC", "%A2%E3", "Euro") + encode("\u4E02", "%81@", "character") + encode("\uE4C6", "%A1@", "PUA") + encode("\uE4C5", "%FE%FE", "PUA #2") + encode("\ud83d\udca9", "%949%DA3", "poo") +</script> diff --git a/testing/web-platform/tests/encoding/gbk-encoder.html b/testing/web-platform/tests/encoding/gbk-encoder.html new file mode 100644 index 000000000..a6074f975 --- /dev/null +++ b/testing/web-platform/tests/encoding/gbk-encoder.html @@ -0,0 +1,21 @@ +<!doctype html> +<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite --> +<script src=/resources/testharness.js></script> +<script src=/resources/testharnessreport.js></script> +<div id=log></div> +<script> + function encode(input, output, desc) { + test(function() { + var a = document.createElement("a") // <a> uses document encoding for URL's query + a.href = "https://example.com/?" + input + assert_equals(a.search.substr(1), output) // remove leading "?" + }, "gbk encoder: " + desc) + } + + encode("s", "s", "very basic") + encode("\u20AC", "%80", "Euro") + encode("\u4E02", "%81@", "character") + encode("\uE4C6", "%A1@", "PUA") + encode("\uE4C5", "%FE%FE", "PUA #2") + encode("\ud83d\udca9", "%26%23128169%3B", "poo") +</script> diff --git a/testing/web-platform/tests/encoding/idlharness.html b/testing/web-platform/tests/encoding/idlharness.html new file mode 100644 index 000000000..c010df3fa --- /dev/null +++ b/testing/web-platform/tests/encoding/idlharness.html @@ -0,0 +1,66 @@ +<!DOCTYPE html> +<meta charset="utf-8"> +<title>idlharness test: Encoding Living Standard API</title> +<link rel="author" title="Joshua Bell" href="mailto:jsbell@google.com" /> +<link rel="help" href="https://encoding.spec.whatwg.org/#api"/> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script src="/resources/WebIDLParser.js"></script> +<script src="/resources/idlharness.js"></script> + +<h1>idlharness test</h1> +<p>This test validates the WebIDL included in the Encoding Living Standard.</p> + +<script type="text/plain" class="untested-idl"> +interface Window {}; +</script> + +<script type="text/plain" class="idl"> +// 8.1 Interface TextDecoder + +dictionary TextDecoderOptions { + boolean fatal = false; + boolean ignoreBOM = false; +}; + +dictionary TextDecodeOptions { + boolean stream = false; +}; + +[Constructor(optional DOMString label = "utf-8", optional TextDecoderOptions options), + Exposed=Window,Worker] +interface TextDecoder { + readonly attribute DOMString encoding; + readonly attribute boolean fatal; + readonly attribute boolean ignoreBOM; + USVString decode(optional BufferSource input, optional TextDecodeOptions options); +}; + +// 8.2 Interface TextDecoder + +[Constructor, + Exposed=Window,Worker] +interface TextEncoder { + readonly attribute DOMString encoding; + [NewObject] Uint8Array encode(optional USVString input = ""); +}; +</script> + +<script> +function select(selector) { + return [].slice.call(document.querySelectorAll(selector)) + .map(function(e) { return e.textContent; }) + .join('\n\n'); +} + +var idl = select('.idl') +var untested = select('.untested-idl'); +var idl_array = new IdlArray(); +idl_array.add_untested_idls(untested); +idl_array.add_idls(idl); +idl_array.add_objects({ + TextEncoder: ['new TextEncoder()'], + TextDecoder: ['new TextDecoder()'] +}); +idl_array.test(); +</script> diff --git a/testing/web-platform/tests/encoding/iso-2022-jp-decoder.html b/testing/web-platform/tests/encoding/iso-2022-jp-decoder.html new file mode 100644 index 000000000..c86ffc158 --- /dev/null +++ b/testing/web-platform/tests/encoding/iso-2022-jp-decoder.html @@ -0,0 +1,57 @@ +<!doctype html> +<meta charset=utf-8> +<script src=/resources/testharness.js></script> +<script src=/resources/testharnessreport.js></script> +<div id=log></div> +<script> + function decode(input, output, desc) { + test(function() { + var d = new TextDecoder("iso-2022-jp"), + buffer = new ArrayBuffer(input.length), + view = new Int8Array(buffer) + for(var i = 0, l = input.length; i < l; i++) { + view[i] = input[i] + } + assert_equals(d.decode(view), output) + }, "iso-2022-jp decoder: " + desc) + } + decode([0x1b, 0x24], "�$", "Error ESC") + decode([0x1b, 0x24, 0x50], "�$P", "Error ESC, character") + decode([0x1b, 0x28, 0x42, 0x50], "�P", "ASCII ESC, character") + decode([0x1b, 0x28, 0x42, 0x1b, 0x28, 0x42, 0x50], "��P", "Double ASCII ESC, character") + decode([0x50, 0x1b, 0x28, 0x42, 0x50], "PP", "character, ASCII ESC, character") + decode([0x5C, 0x5D, 0x7E], "\\]~", "characters") + decode([0x0D, 0x0E, 0x0F, 0x10], "\x0D��\x10", "SO / SI") + + decode([0x1b, 0x28, 0x4A, 0x5C, 0x5D, 0x7E], "¥]‾", "Roman ESC, characters") + decode([0x1b, 0x28, 0x4A, 0x0D, 0x0E, 0x0F, 0x10], "\x0D��\x10", "Roman ESC, SO / SI") + decode([0x1b, 0x28, 0x4A, 0x1b, 0x1b, 0x28, 0x49, 0x50], "�ミ", "Roman ESC, error ESC, Katakana ESC") + + decode([0x1b, 0x28, 0x49, 0x50], "ミ", "Katakana ESC, character") + decode([0x1b, 0x28, 0x49, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Katakana ESC, multibyte ESC, character") + decode([0x1b, 0x28, 0x49, 0x1b, 0x50], "�ミ", "Katakana ESC, error ESC, character") + decode([0x1b, 0x28, 0x49, 0x1b, 0x24, 0x50], "�、ミ", "Katakana ESC, error ESC #2, character") + decode([0x1b, 0x28, 0x49, 0x50, 0x1b, 0x28, 0x49, 0x50], "ミミ", "Katakana ESC, character, Katakana ESC, character") + decode([0x1b, 0x28, 0x49, 0x0D, 0x0E, 0x0F, 0x10], "����", "Katakana ESC, SO / SI") + + decode([0x1b, 0x24, 0x40, 0x50, 0x50], "佩", "Multibyte ESC, character") + decode([0x1b, 0x24, 0x42, 0x50, 0x50], "佩", "Multibyte ESC #2, character") + decode([0x1b, 0x24, 0x42, 0x1b, 0x50, 0x50], "�佩", "Multibyte ESC, error ESC, character") + decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x40], "�", "Double multibyte ESC") + decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Double multibyte ESC, character") + decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x42, 0x50, 0x50], "�佩", "Double multibyte ESC #2, character") + decode([0x1b, 0x24, 0x40, 0x1b, 0x24, 0x50, 0x50], "�ば�", "Multibyte ESC, error ESC #2, character") + + decode([0x1b, 0x24, 0x40, 0x50, 0x1b, 0x24, 0x40, 0x50, 0x50], "�佩", "Multibyte ESC, single byte, multibyte ESC, character") + decode([0x1b, 0x24, 0x40, 0x20, 0x50], "��", "Multibyte ESC, lead error byte") + decode([0x1b, 0x24, 0x40, 0x50, 0x20], "�", "Multibyte ESC, trail error byte") + + decode([0x50, 0x1b], "P�", "character, error ESC") + decode([0x50, 0x1b, 0x24], "P�$", "character, error ESC #2") + decode([0x50, 0x1b, 0x50], "P�P", "character, error ESC #3") + decode([0x50, 0x1b, 0x28, 0x42], "P", "character, ASCII ESC") + decode([0x50, 0x1b, 0x28, 0x4A], "P", "character, Roman ESC") + decode([0x50, 0x1b, 0x28, 0x49], "P", "character, Katakana ESC") + decode([0x50, 0x1b, 0x24, 0x40], "P", "character, Multibyte ESC") + decode([0x50, 0x1b, 0x24, 0x42], "P", "character, Multibyte ESC #2") +</script> diff --git a/testing/web-platform/tests/encoding/iso-2022-jp-encoder.html b/testing/web-platform/tests/encoding/iso-2022-jp-encoder.html new file mode 100644 index 000000000..d3124e5f5 --- /dev/null +++ b/testing/web-platform/tests/encoding/iso-2022-jp-encoder.html @@ -0,0 +1,18 @@ +<!doctype html> +<meta charset=iso-2022-jp> <!-- if the server overrides this, it is stupid, as this is a testsuite --> +<script src=/resources/testharness.js></script> +<script src=/resources/testharnessreport.js></script> +<div id=log></div> +<script> + function encode(input, output, desc) { + test(function() { + var a = document.createElement("a") // <a> uses document encoding for URL's query + a.href = "https://example.com/?" + input + assert_equals(a.search.substr(1), output) // remove leading "?" + }, "iso-2022-jp encoder: " + desc) + } + + encode("s", "s", "very basic") + encode("\u00A5\u203Es\\\uFF90\u4F69", "%1B(J\\~s%1B(B\\%1B$B%_PP%1B(B", "basics") + encode("\x0E\x0F\x1Bx", "%0E%0F%1Bx", "SO/SI ESC") +</script> diff --git a/testing/web-platform/tests/encoding/resources/encodings.js b/testing/web-platform/tests/encoding/resources/encodings.js new file mode 100644 index 000000000..1cdf585ee --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/encodings.js @@ -0,0 +1,490 @@ +// Based on https://dom.spec.whatwg.org/#dom-document-characterset + +var compatibility_names = { + "utf-8": "UTF-8", + "ibm866": "IBM866", + "iso-8859-2": "ISO-8859-2", + "iso-8859-3": "ISO-8859-3", + "iso-8859-4": "ISO-8859-4", + "iso-8859-5": "ISO-8859-5", + "iso-8859-6": "ISO-8859-6", + "iso-8859-7": "ISO-8859-7", + "iso-8859-8": "ISO-8859-8", + "iso-8859-8-i": "ISO-8859-8-I", + "iso-8859-10": "ISO-8859-10", + "iso-8859-13": "ISO-8859-13", + "iso-8859-14": "ISO-8859-14", + "iso-8859-15": "ISO-8859-15", + "iso-8859-16": "ISO-8859-16", + "koi8-r": "KOI8-R", + "koi8-u": "KOI8-U", + "gbk": "GBK", + "big5": "Big5", + "euc-jp": "EUC-JP", + "iso-2022-jp": "ISO-2022-JP", + "shift_jis": "Shift_JIS", + "euc-kr": "EUC-KR", + "utf-16be": "UTF-16BE", + "utf-16le": "UTF-16LE" +}; + +// Based on https://encoding.spec.whatwg.org/ + +var utf_encodings = ['utf-8', 'utf-16le', 'utf-16be']; + +var encodings_table = +[ + { + "encodings": [ + { + "labels": [ + "unicode-1-1-utf-8", + "utf-8", + "utf8" + ], + "name": "utf-8" + } + ], + "heading": "The Encoding" + }, + { + "encodings": [ + { + "labels": [ + "866", + "cp866", + "csibm866", + "ibm866" + ], + "name": "ibm866" + }, + { + "labels": [ + "csisolatin2", + "iso-8859-2", + "iso-ir-101", + "iso8859-2", + "iso88592", + "iso_8859-2", + "iso_8859-2:1987", + "l2", + "latin2" + ], + "name": "iso-8859-2" + }, + { + "labels": [ + "csisolatin3", + "iso-8859-3", + "iso-ir-109", + "iso8859-3", + "iso88593", + "iso_8859-3", + "iso_8859-3:1988", + "l3", + "latin3" + ], + "name": "iso-8859-3" + }, + { + "labels": [ + "csisolatin4", + "iso-8859-4", + "iso-ir-110", + "iso8859-4", + "iso88594", + "iso_8859-4", + "iso_8859-4:1988", + "l4", + "latin4" + ], + "name": "iso-8859-4" + }, + { + "labels": [ + "csisolatincyrillic", + "cyrillic", + "iso-8859-5", + "iso-ir-144", + "iso8859-5", + "iso88595", + "iso_8859-5", + "iso_8859-5:1988" + ], + "name": "iso-8859-5" + }, + { + "labels": [ + "arabic", + "asmo-708", + "csiso88596e", + "csiso88596i", + "csisolatinarabic", + "ecma-114", + "iso-8859-6", + "iso-8859-6-e", + "iso-8859-6-i", + "iso-ir-127", + "iso8859-6", + "iso88596", + "iso_8859-6", + "iso_8859-6:1987" + ], + "name": "iso-8859-6" + }, + { + "labels": [ + "csisolatingreek", + "ecma-118", + "elot_928", + "greek", + "greek8", + "iso-8859-7", + "iso-ir-126", + "iso8859-7", + "iso88597", + "iso_8859-7", + "iso_8859-7:1987", + "sun_eu_greek" + ], + "name": "iso-8859-7" + }, + { + "labels": [ + "csiso88598e", + "csisolatinhebrew", + "hebrew", + "iso-8859-8", + "iso-8859-8-e", + "iso-ir-138", + "iso8859-8", + "iso88598", + "iso_8859-8", + "iso_8859-8:1988", + "visual" + ], + "name": "iso-8859-8" + }, + { + "labels": [ + "csiso88598i", + "iso-8859-8-i", + "logical" + ], + "name": "iso-8859-8-i" + }, + { + "labels": [ + "csisolatin6", + "iso-8859-10", + "iso-ir-157", + "iso8859-10", + "iso885910", + "l6", + "latin6" + ], + "name": "iso-8859-10" + }, + { + "labels": [ + "iso-8859-13", + "iso8859-13", + "iso885913" + ], + "name": "iso-8859-13" + }, + { + "labels": [ + "iso-8859-14", + "iso8859-14", + "iso885914" + ], + "name": "iso-8859-14" + }, + { + "labels": [ + "csisolatin9", + "iso-8859-15", + "iso8859-15", + "iso885915", + "iso_8859-15", + "l9" + ], + "name": "iso-8859-15" + }, + { + "labels": [ + "iso-8859-16" + ], + "name": "iso-8859-16" + }, + { + "labels": [ + "cskoi8r", + "koi", + "koi8", + "koi8-r", + "koi8_r" + ], + "name": "koi8-r" + }, + { + "labels": [ + "koi8-u" + ], + "name": "koi8-u" + }, + { + "labels": [ + "csmacintosh", + "mac", + "macintosh", + "x-mac-roman" + ], + "name": "macintosh" + }, + { + "labels": [ + "dos-874", + "iso-8859-11", + "iso8859-11", + "iso885911", + "tis-620", + "windows-874" + ], + "name": "windows-874" + }, + { + "labels": [ + "cp1250", + "windows-1250", + "x-cp1250" + ], + "name": "windows-1250" + }, + { + "labels": [ + "cp1251", + "windows-1251", + "x-cp1251" + ], + "name": "windows-1251" + }, + { + "labels": [ + "ansi_x3.4-1968", + "ascii", + "cp1252", + "cp819", + "csisolatin1", + "ibm819", + "iso-8859-1", + "iso-ir-100", + "iso8859-1", + "iso88591", + "iso_8859-1", + "iso_8859-1:1987", + "l1", + "latin1", + "us-ascii", + "windows-1252", + "x-cp1252" + ], + "name": "windows-1252" + }, + { + "labels": [ + "cp1253", + "windows-1253", + "x-cp1253" + ], + "name": "windows-1253" + }, + { + "labels": [ + "cp1254", + "csisolatin5", + "iso-8859-9", + "iso-ir-148", + "iso8859-9", + "iso88599", + "iso_8859-9", + "iso_8859-9:1989", + "l5", + "latin5", + "windows-1254", + "x-cp1254" + ], + "name": "windows-1254" + }, + { + "labels": [ + "cp1255", + "windows-1255", + "x-cp1255" + ], + "name": "windows-1255" + }, + { + "labels": [ + "cp1256", + "windows-1256", + "x-cp1256" + ], + "name": "windows-1256" + }, + { + "labels": [ + "cp1257", + "windows-1257", + "x-cp1257" + ], + "name": "windows-1257" + }, + { + "labels": [ + "cp1258", + "windows-1258", + "x-cp1258" + ], + "name": "windows-1258" + }, + { + "labels": [ + "x-mac-cyrillic", + "x-mac-ukrainian" + ], + "name": "x-mac-cyrillic" + } + ], + "heading": "Legacy single-byte encodings" + }, + { + "encodings": [ + { + "labels": [ + "chinese", + "csgb2312", + "csiso58gb231280", + "gb2312", + "gb_2312", + "gb_2312-80", + "gbk", + "iso-ir-58", + "x-gbk" + ], + "name": "gbk" + }, + { + "labels": [ + "gb18030" + ], + "name": "gb18030" + } + ], + "heading": "Legacy multi-byte Chinese (simplified) encodings" + }, + { + "encodings": [ + { + "labels": [ + "big5", + "big5-hkscs", + "cn-big5", + "csbig5", + "x-x-big5" + ], + "name": "big5" + } + ], + "heading": "Legacy multi-byte Chinese (traditional) encodings" + }, + { + "encodings": [ + { + "labels": [ + "cseucpkdfmtjapanese", + "euc-jp", + "x-euc-jp" + ], + "name": "euc-jp" + }, + { + "labels": [ + "csiso2022jp", + "iso-2022-jp" + ], + "name": "iso-2022-jp" + }, + { + "labels": [ + "csshiftjis", + "ms932", + "ms_kanji", + "shift-jis", + "shift_jis", + "sjis", + "windows-31j", + "x-sjis" + ], + "name": "shift_jis" + } + ], + "heading": "Legacy multi-byte Japanese encodings" + }, + { + "encodings": [ + { + "labels": [ + "cseuckr", + "csksc56011987", + "euc-kr", + "iso-ir-149", + "korean", + "ks_c_5601-1987", + "ks_c_5601-1989", + "ksc5601", + "ksc_5601", + "windows-949" + ], + "name": "euc-kr" + } + ], + "heading": "Legacy multi-byte Korean encodings" + }, + { + "encodings": [ + { + "labels": [ + "csiso2022kr", + "hz-gb-2312", + "iso-2022-cn", + "iso-2022-cn-ext", + "iso-2022-kr" + ], + "name": "replacement" + }, + { + "labels": [ + "utf-16be" + ], + "name": "utf-16be" + }, + { + "labels": [ + "utf-16", + "utf-16le" + ], + "name": "utf-16le" + }, + { + "labels": [ + "x-user-defined" + ], + "name": "x-user-defined" + } + ], + "heading": "Legacy miscellaneous encodings" + } +] +; diff --git a/testing/web-platform/tests/encoding/resources/single-byte-raw.py b/testing/web-platform/tests/encoding/resources/single-byte-raw.py new file mode 100644 index 000000000..b4a6c9040 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/single-byte-raw.py @@ -0,0 +1,3 @@ +def main(request, response): + response.headers.set("Content-Type", "text/plain;charset=" + request.GET.first("label")) + response.content = "".join(chr(byte) for byte in xrange(255)) diff --git a/testing/web-platform/tests/encoding/resources/text-plain-charset.py b/testing/web-platform/tests/encoding/resources/text-plain-charset.py new file mode 100644 index 000000000..a1c07e701 --- /dev/null +++ b/testing/web-platform/tests/encoding/resources/text-plain-charset.py @@ -0,0 +1,3 @@ +def main(request, response): + response.headers.set("Content-Type", "text/plain;charset=" + request.GET.first("label")) + response.content = "hello encoding" diff --git a/testing/web-platform/tests/encoding/single-byte-decoder.html b/testing/web-platform/tests/encoding/single-byte-decoder.html new file mode 100644 index 000000000..6462cd1f7 --- /dev/null +++ b/testing/web-platform/tests/encoding/single-byte-decoder.html @@ -0,0 +1,99 @@ +<!doctype html> +<meta name=timeout content=long> +<script src=/resources/testharness.js></script> +<script src=/resources/testharnessreport.js></script> +<script src=resources/encodings.js></script> +<div id=log></div> +<script> + + var singleByteEncodings = encodings_table.filter(function(group) { + return group.heading === "Legacy single-byte encodings"; + })[0].encodings, + // https://encoding.spec.whatwg.org/indexes.json + singleByteIndexes = { + "ibm866":[1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,9617,9618,9619,9474,9508,9569,9570,9558,9557,9571,9553,9559,9565,9564,9563,9488,9492,9524,9516,9500,9472,9532,9566,9567,9562,9556,9577,9574,9568,9552,9580,9575,9576,9572,9573,9561,9560,9554,9555,9579,9578,9496,9484,9608,9604,9612,9616,9600,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,1025,1105,1028,1108,1031,1111,1038,1118,176,8729,183,8730,8470,164,9632,160], + "iso-8859-2":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,728,321,164,317,346,167,168,352,350,356,377,173,381,379,176,261,731,322,180,318,347,711,184,353,351,357,378,733,382,380,340,193,194,258,196,313,262,199,268,201,280,203,282,205,206,270,272,323,327,211,212,336,214,215,344,366,218,368,220,221,354,223,341,225,226,259,228,314,263,231,269,233,281,235,283,237,238,271,273,324,328,243,244,337,246,247,345,367,250,369,252,253,355,729], + "iso-8859-3":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,294,728,163,164,null,292,167,168,304,350,286,308,173,null,379,176,295,178,179,180,181,293,183,184,305,351,287,309,189,null,380,192,193,194,null,196,266,264,199,200,201,202,203,204,205,206,207,null,209,210,211,212,288,214,215,284,217,218,219,220,364,348,223,224,225,226,null,228,267,265,231,232,233,234,235,236,237,238,239,null,241,242,243,244,289,246,247,285,249,250,251,252,365,349,729], + "iso-8859-4":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,312,342,164,296,315,167,168,352,274,290,358,173,381,175,176,261,731,343,180,297,316,711,184,353,275,291,359,330,382,331,256,193,194,195,196,197,198,302,268,201,280,203,278,205,206,298,272,325,332,310,212,213,214,215,216,370,218,219,220,360,362,223,257,225,226,227,228,229,230,303,269,233,281,235,279,237,238,299,273,326,333,311,244,245,246,247,248,371,250,251,252,361,363,729], + "iso-8859-5":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,173,1038,1039,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,8470,1105,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,167,1118,1119], + "iso-8859-6":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,null,null,null,164,null,null,null,null,null,null,null,1548,173,null,null,null,null,null,null,null,null,null,null,null,null,null,1563,null,null,null,1567,null,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,1591,1592,1593,1594,null,null,null,null,null,1600,1601,1602,1603,1604,1605,1606,1607,1608,1609,1610,1611,1612,1613,1614,1615,1616,1617,1618,null,null,null,null,null,null,null,null,null,null,null,null,null], + "iso-8859-7":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,8216,8217,163,8364,8367,166,167,168,169,890,171,172,173,null,8213,176,177,178,179,900,901,902,183,904,905,906,187,908,189,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,null,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,null], + "iso-8859-8":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,null,162,163,164,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,8215,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,null,null,8206,8207,null], + "iso-8859-10":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,274,290,298,296,310,167,315,272,352,358,381,173,362,330,176,261,275,291,299,297,311,183,316,273,353,359,382,8213,363,331,256,193,194,195,196,197,198,302,268,201,280,203,278,205,206,207,208,325,332,211,212,213,214,360,216,370,218,219,220,221,222,223,257,225,226,227,228,229,230,303,269,233,281,235,279,237,238,239,240,326,333,243,244,245,246,361,248,371,250,251,252,253,254,312], + "iso-8859-13":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,8221,162,163,164,8222,166,167,216,169,342,171,172,173,174,198,176,177,178,179,8220,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324,326,243,333,245,246,247,371,322,347,363,252,380,382,8217], + "iso-8859-14":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,7682,7683,163,266,267,7690,167,7808,169,7810,7691,7922,173,174,376,7710,7711,288,289,7744,7745,182,7766,7809,7767,7811,7776,7923,7812,7813,7777,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,372,209,210,211,212,213,214,7786,216,217,218,219,220,221,374,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,373,241,242,243,244,245,246,7787,248,249,250,251,252,253,375,255], + "iso-8859-15":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,8364,165,352,167,353,169,170,171,172,173,174,175,176,177,178,179,381,181,182,183,382,185,186,187,338,339,376,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255], + "iso-8859-16":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,261,321,8364,8222,352,167,353,169,536,171,377,173,378,379,176,177,268,322,381,8221,182,183,382,269,537,187,338,339,376,380,192,193,194,258,196,262,198,199,200,201,202,203,204,205,206,207,272,323,210,211,212,336,214,346,368,217,218,219,220,280,538,223,224,225,226,259,228,263,230,231,232,233,234,235,236,237,238,239,273,324,242,243,244,337,246,347,369,249,250,251,252,281,539,255], + "koi8-r":[9472,9474,9484,9488,9492,9496,9500,9508,9516,9524,9532,9600,9604,9608,9612,9616,9617,9618,9619,8992,9632,8729,8730,8776,8804,8805,160,8993,176,178,183,247,9552,9553,9554,1105,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568,9569,1025,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,169,1102,1072,1073,1094,1076,1077,1092,1075,1093,1080,1081,1082,1083,1084,1085,1086,1087,1103,1088,1089,1090,1091,1078,1074,1100,1099,1079,1096,1101,1097,1095,1098,1070,1040,1041,1062,1044,1045,1060,1043,1061,1048,1049,1050,1051,1052,1053,1054,1055,1071,1056,1057,1058,1059,1046,1042,1068,1067,1047,1064,1069,1065,1063,1066], + "koi8-u":[9472,9474,9484,9488,9492,9496,9500,9508,9516,9524,9532,9600,9604,9608,9612,9616,9617,9618,9619,8992,9632,8729,8730,8776,8804,8805,160,8993,176,178,183,247,9552,9553,9554,1105,1108,9556,1110,1111,9559,9560,9561,9562,9563,1169,9565,9566,9567,9568,9569,1025,1028,9571,1030,1031,9574,9575,9576,9577,9578,1168,9580,169,1102,1072,1073,1094,1076,1077,1092,1075,1093,1080,1081,1082,1083,1084,1085,1086,1087,1103,1088,1089,1090,1091,1078,1074,1100,1099,1079,1096,1101,1097,1095,1098,1070,1040,1041,1062,1044,1045,1060,1043,1061,1048,1049,1050,1051,1052,1053,1054,1055,1071,1056,1057,1058,1059,1046,1042,1068,1067,1047,1064,1069,1065,1063,1066], + "macintosh":[196,197,199,201,209,214,220,225,224,226,228,227,229,231,233,232,234,235,237,236,238,239,241,243,242,244,246,245,250,249,251,252,8224,176,162,163,167,8226,182,223,174,169,8482,180,168,8800,198,216,8734,177,8804,8805,165,181,8706,8721,8719,960,8747,170,186,937,230,248,191,161,172,8730,402,8776,8710,171,187,8230,160,192,195,213,338,339,8211,8212,8220,8221,8216,8217,247,9674,255,376,8260,8364,8249,8250,64257,64258,8225,183,8218,8222,8240,194,202,193,203,200,205,206,207,204,211,212,63743,210,218,219,217,305,710,732,175,728,729,730,184,733,731,711], + "windows-874":[8364,129,130,131,132,8230,134,135,136,137,138,139,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,152,153,154,155,156,157,158,159,160,3585,3586,3587,3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603,3604,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618,3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,3630,3631,3632,3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,null,null,null,null,3647,3648,3649,3650,3651,3652,3653,3654,3655,3656,3657,3658,3659,3660,3661,3662,3663,3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,null,null,null,null], + "windows-1250":[8364,129,8218,131,8222,8230,8224,8225,136,8240,352,8249,346,356,381,377,144,8216,8217,8220,8221,8226,8211,8212,152,8482,353,8250,347,357,382,378,160,711,728,321,164,260,166,167,168,169,350,171,172,173,174,379,176,177,731,322,180,181,182,183,184,261,351,187,317,733,318,380,340,193,194,258,196,313,262,199,268,201,280,203,282,205,206,270,272,323,327,211,212,336,214,215,344,366,218,368,220,221,354,223,341,225,226,259,228,314,263,231,269,233,281,235,283,237,238,271,273,324,328,243,244,337,246,247,345,367,250,369,252,253,355,729], + "windows-1251":[1026,1027,8218,1107,8222,8230,8224,8225,8364,8240,1033,8249,1034,1036,1035,1039,1106,8216,8217,8220,8221,8226,8211,8212,152,8482,1113,8250,1114,1116,1115,1119,160,1038,1118,1032,164,1168,166,167,1025,169,1028,171,172,173,174,1031,176,177,1030,1110,1169,181,182,183,1105,8470,1108,187,1112,1029,1109,1111,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103], + "windows-1252":[8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,381,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,382,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255], + "windows-1253":[8364,129,8218,402,8222,8230,8224,8225,136,8240,138,8249,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,157,158,159,160,901,902,163,164,165,166,167,168,169,null,171,172,173,174,8213,176,177,178,179,900,181,182,183,904,905,906,187,908,189,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,null,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,null], + "windows-1254":[8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,286,209,210,211,212,213,214,215,216,217,218,219,220,304,350,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,287,241,242,243,244,245,246,247,248,249,250,251,252,305,351,255], + "windows-1255":[8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,156,157,158,159,160,161,162,163,8362,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,191,1456,1457,1458,1459,1460,1461,1462,1463,1464,1465,1466,1467,1468,1469,1470,1471,1472,1473,1474,1475,1520,1521,1522,1523,1524,null,null,null,null,null,null,null,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,null,null,8206,8207,null], + "windows-1256":[8364,1662,8218,402,8222,8230,8224,8225,710,8240,1657,8249,338,1670,1688,1672,1711,8216,8217,8220,8221,8226,8211,8212,1705,8482,1681,8250,339,8204,8205,1722,160,1548,162,163,164,165,166,167,168,169,1726,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,1563,187,188,189,190,1567,1729,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,215,1591,1592,1593,1594,1600,1601,1602,1603,224,1604,226,1605,1606,1607,1608,231,232,233,234,235,1609,1610,238,239,1611,1612,1613,1614,244,1615,1616,247,1617,249,1618,251,252,8206,8207,1746], + "windows-1257":[8364,129,8218,131,8222,8230,8224,8225,136,8240,138,8249,140,168,711,184,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,175,731,159,160,null,162,163,164,null,166,167,216,169,342,171,172,173,174,198,176,177,178,179,180,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324,326,243,333,245,246,247,371,322,347,363,252,380,382,729], + "windows-1258":[8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,258,196,197,198,199,200,201,202,203,768,205,206,207,272,209,777,211,212,416,214,215,216,217,218,219,220,431,771,223,224,225,226,259,228,229,230,231,232,233,234,235,769,237,238,239,273,241,803,243,244,417,246,247,248,249,250,251,252,432,8363,255], + "x-mac-cyrillic":[1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,8224,176,1168,163,167,8226,182,1030,174,169,8482,1026,1106,8800,1027,1107,8734,177,8804,8805,1110,181,1169,1032,1028,1108,1031,1111,1033,1113,1034,1114,1112,1029,172,8730,402,8776,8710,171,187,8230,160,1035,1115,1036,1116,1109,8211,8212,8220,8221,8216,8217,247,8222,1038,1118,1039,1119,8470,1025,1105,1103,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,8364] + } + + // For TextDecoder tests + var buffer = new ArrayBuffer(255), + view = new Uint8Array(buffer) + for(var i = 0, l = view.byteLength; i < l; i++) { + view[i] = i + } + + // For XMLHttpRequest and TextDecoder tests + function assert_decode(data, encoding) { + if(encoding == "iso-8859-8-i") { + encoding = "iso-8859-8" + } + for(var i = 0, l = data.length; i < l; i++) { + var cp = data.charCodeAt(i), + expectedCp = (i < 0x80) ? i : singleByteIndexes[encoding][i-0x80] + if(expectedCp == null) { + expectedCp = 0xFFFD + } + assert_equals(cp, expectedCp, encoding + ":" + i) + } + } + + // Setting up all the tests + for(var i = 0, l = singleByteEncodings.length; i < l; i++) { + var encoding = singleByteEncodings[i] + for(var ii = 0, ll = encoding.labels.length; ii < ll; ii++) { + var label = encoding.labels[ii] + + async_test(function(t) { + var xhr = new XMLHttpRequest, + name = encoding.name // need scoped variable + xhr.open("GET", "resources/single-byte-raw.py?label=" + label) + xhr.send(null) + xhr.onload = t.step_func_done(function() { assert_decode(xhr.responseText, name) }) + }, encoding.name + ": " + label + " (XMLHttpRequest)") + + test(function() { + var d = new TextDecoder(label), + data = d.decode(view) + assert_equals(d.encoding, encoding.name) + assert_decode(data, encoding.name) + }, encoding.name + ": " + label + " (TextDecoder)") + + async_test(function(t) { + var frame = document.createElement("iframe"), + name = compatibility_names[encoding.name] || encoding.name; + frame.src = "resources/text-plain-charset.py?label=" + label + frame.onload = t.step_func_done(function() { + assert_equals(frame.contentDocument.characterSet, name) + assert_equals(frame.contentDocument.inputEncoding, name) + }) + t.add_cleanup(function() { document.body.removeChild(frame) }) + document.body.appendChild(frame) + }, encoding.name + ": " + label + " (document.characterSet and document.inputEncoding)") + } + } +</script> diff --git a/testing/web-platform/tests/encoding/textdecoder-byte-order-marks.html b/testing/web-platform/tests/encoding/textdecoder-byte-order-marks.html new file mode 100644 index 000000000..8ef167943 --- /dev/null +++ b/testing/web-platform/tests/encoding/textdecoder-byte-order-marks.html @@ -0,0 +1,48 @@ +<!DOCTYPE html> +<title>Encoding API: Byte-order marks</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +var testCases = [ + { + encoding: 'utf-8', + bom: [0xEF, 0xBB, 0xBF], + bytes: [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD] + }, + { + encoding: 'utf-16le', + bom: [0xff, 0xfe], + bytes: [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF] + }, + { + encoding: 'utf-16be', + bom: [0xfe, 0xff], + bytes: [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD] + } +]; + +var string = 'z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD'; // z, cent, CJK water, G-Clef, Private-use character + +testCases.forEach(function(t) { + test(function() { + + var decoder = new TextDecoder(t.encoding); + assert_equals(decoder.decode(new Uint8Array(t.bytes)), string, + 'Sequence without BOM should decode successfully'); + + assert_equals(decoder.decode(new Uint8Array(t.bom.concat(t.bytes))), string, + 'Sequence with BOM should decode successfully (with no BOM present in output)'); + + testCases.forEach(function(o) { + if (o === t) + return; + + assert_not_equals(decoder.decode(new Uint8Array(o.bom.concat(t.bytes))), string, + 'Mismatching BOM should not be ignored - treated as garbage bytes.'); + }); + + }, 'Byte-order marks: ' + t.encoding); +}); + +</script> diff --git a/testing/web-platform/tests/encoding/textdecoder-fatal-single-byte.html b/testing/web-platform/tests/encoding/textdecoder-fatal-single-byte.html new file mode 100644 index 000000000..d9bf41282 --- /dev/null +++ b/testing/web-platform/tests/encoding/textdecoder-fatal-single-byte.html @@ -0,0 +1,55 @@ +<!DOCTYPE html> +<title>Encoding API: Fatal flag for single byte encodings</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +var singleByteEncodings = [ + {encoding: 'IBM866', bad: []}, + {encoding: 'ISO-8859-2', bad: []}, + {encoding: 'ISO-8859-3', bad: [0xA5, 0xAE, 0xBE, 0xC3, 0xD0, 0xE3, 0xF0]}, + {encoding: 'ISO-8859-4', bad: []}, + {encoding: 'ISO-8859-5', bad: []}, + {encoding: 'ISO-8859-6', bad: [0xA1, 0xA2, 0xA3, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBC, 0xBD, 0xBE, 0xC0, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF]}, + {encoding: 'ISO-8859-7', bad: [0xAE, 0xD2, 0xFF]}, + {encoding: 'ISO-8859-8', bad: [0xA1, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFB, 0xFC, 0xFF]}, + {encoding: 'ISO-8859-8-I', bad: [0xA1, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFB, 0xFC, 0xFF]}, + {encoding: 'ISO-8859-10', bad: []}, + {encoding: 'ISO-8859-13', bad: []}, + {encoding: 'ISO-8859-14', bad: []}, + {encoding: 'ISO-8859-15', bad: []}, + {encoding: 'ISO-8859-16', bad: []}, + {encoding: 'KOI8-R', bad: []}, + {encoding: 'KOI8-U', bad: []}, + {encoding: 'macintosh', bad: []}, + {encoding: 'windows-874', bad: [0xDB, 0xDC, 0xDD, 0xDE, 0xFC, 0xFD, 0xFE, 0xFF]}, + {encoding: 'windows-1250', bad: []}, + {encoding: 'windows-1251', bad: []}, + {encoding: 'windows-1252', bad: []}, + {encoding: 'windows-1253', bad: [0xAA, 0xD2, 0xFF]}, + {encoding: 'windows-1254', bad: []}, + {encoding: 'windows-1255', bad: [0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xFB, 0xFC, 0xFF]}, + {encoding: 'windows-1256', bad: []}, + {encoding: 'windows-1257', bad: [0xA1, 0xA5]}, + {encoding: 'windows-1258', bad: []}, + {encoding: 'x-mac-cyrillic', bad: []}, +]; + +singleByteEncodings.forEach(function(t) { + for (var i = 0; i < 256; ++i) { + if (t.bad.indexOf(i) != -1) { + test(function() { + assert_throws(new TypeError(), function() { + new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array([i])); + }); + }, 'Throw due to fatal flag: ' + t.encoding + ' doesn\'t have a pointer ' + i); + } + else { + test(function() { + assert_equals(typeof new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array([i])), "string"); + }, 'Not throw: ' + t.encoding + ' has a pointer ' + i); + } + } +}); + +</script> diff --git a/testing/web-platform/tests/encoding/textdecoder-fatal-streaming.html b/testing/web-platform/tests/encoding/textdecoder-fatal-streaming.html new file mode 100644 index 000000000..2ccac9f23 --- /dev/null +++ b/testing/web-platform/tests/encoding/textdecoder-fatal-streaming.html @@ -0,0 +1,50 @@ +<!DOCTYPE html> +<title>Encoding API: End-of-file</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +test(function() { + [ + {encoding: 'utf-8', sequence: [0xC0]}, + {encoding: 'utf-16le', sequence: [0x00]}, + {encoding: 'utf-16be', sequence: [0x00]} + ].forEach(function(testCase) { + + assert_throws(new TypeError(), function() { + var decoder = new TextDecoder(testCase.encoding, {fatal: true}); + decoder.decode(new Uint8Array(testCase.sequence)); + }, 'Unterminated ' + testCase.encoding + ' sequence should throw if fatal flag is set'); + + assert_equals( + new TextDecoder(testCase.encoding).decode(new Uint8Array([testCase.sequence])), + '\uFFFD', + 'Unterminated UTF-8 sequence should emit replacement character if fatal flag is unset'); + }); +}, 'Fatal flag, non-streaming cases'); + +test(function() { + + var decoder = new TextDecoder('utf-16le', {fatal: true}); + var odd = new Uint8Array([0x00]); + var even = new Uint8Array([0x00, 0x00]); + + assert_equals(decoder.decode(odd, {stream: true}), ''); + assert_equals(decoder.decode(odd), '\u0000'); + + assert_throws(new TypeError(), function() { + decoder.decode(even, {stream: true}); + decoder.decode(odd) + }); + + assert_throws(new TypeError(), function() { + decoder.decode(odd, {stream: true}); + decoder.decode(even); + }); + + assert_equals(decoder.decode(even, {stream: true}), '\u0000'); + assert_equals(decoder.decode(even), '\u0000'); + +}, 'Fatal flag, streaming cases'); + +</script> diff --git a/testing/web-platform/tests/encoding/textdecoder-fatal.html b/testing/web-platform/tests/encoding/textdecoder-fatal.html new file mode 100644 index 000000000..e8cc9f64c --- /dev/null +++ b/testing/web-platform/tests/encoding/textdecoder-fatal.html @@ -0,0 +1,72 @@ +<!DOCTYPE html> +<title>Encoding API: Fatal flag</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +var bad = [ + { encoding: 'utf-8', input: [0xFF], name: 'invalid code' }, + { encoding: 'utf-8', input: [0xC0], name: 'ends early' }, + { encoding: 'utf-8', input: [0xE0], name: 'ends early 2' }, + { encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' }, + { encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' }, + { encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' }, + { encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' }, + { encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' }, + { encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: '> 0x10FFFF' }, + { encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'obsolete lead byte' }, + + // Overlong encodings + { encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' }, + { encoding: 'utf-8', input: [0xE0, 0x80, 0x80], name: 'overlong U+0000 - 3 bytes' }, + { encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 4 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], name: 'overlong U+0000 - 6 bytes' }, + + { encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' }, + { encoding: 'utf-8', input: [0xE0, 0x81, 0xBF], name: 'overlong U+007F - 3 bytes' }, + { encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 4 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], name: 'overlong U+007F - 6 bytes' }, + + { encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF], name: 'overlong U+07FF - 3 bytes' }, + { encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 4 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], name: 'overlong U+07FF - 6 bytes' }, + + { encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 4 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], name: 'overlong U+FFFF - 6 bytes' }, + + { encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], name: 'overlong U+10FFFF - 6 bytes' }, + + // UTF-16 surrogates encoded as code points in UTF-8 + { encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' }, + { encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' }, + { encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], name: 'surrogate pair' }, + + { encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' }, + // Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html + + // FIXME: Add legacy encoding cases +]; + +bad.forEach(function(t) { + test(function() { + assert_throws(new TypeError(), function() { + new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) + }); + }, 'Fatal flag: ' + t.encoding + ' - ' + t.name); +}); + +test(function() { + assert_true('fatal' in new TextDecoder(), 'The fatal attribute should exist on TextDecoder.'); + assert_equals(typeof new TextDecoder().fatal, 'boolean', 'The type of the fatal attribute should be boolean.'); + assert_false(new TextDecoder().fatal, 'The fatal attribute should default to false.'); + assert_true(new TextDecoder('utf-8', {fatal: true}).fatal, 'The fatal attribute can be set using an option.'); + +}, 'The fatal attribute of TextDecoder'); + +</script> diff --git a/testing/web-platform/tests/encoding/textdecoder-ignorebom.html b/testing/web-platform/tests/encoding/textdecoder-ignorebom.html new file mode 100644 index 000000000..26381568f --- /dev/null +++ b/testing/web-platform/tests/encoding/textdecoder-ignorebom.html @@ -0,0 +1,45 @@ +<!DOCTYPE html> +<title>Encoding API: TextDecoder ignoreBOM option</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +var cases = [ + {encoding: 'utf-8', bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63]}, + {encoding: 'utf-16le', bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00]}, + {encoding: 'utf-16be', bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63]} +]; + +cases.forEach(function(testCase) { + test(function() { + var BOM = '\uFEFF'; + var decoder = new TextDecoder(testCase.encoding, {ignoreBOM: true}); + var bytes = new Uint8Array(testCase.bytes); + assert_equals( + decoder.decode(bytes), + BOM + 'abc', + testCase.encoding + ': BOM should be present in decoded string if ignored'); + + decoder = new TextDecoder(testCase.encoding, {ignoreBOM: false}); + assert_equals( + decoder.decode(bytes), + 'abc', + testCase.encoding + ': BOM should be absent from decoded string if not ignored'); + + decoder = new TextDecoder(testCase.encoding); + assert_equals( + decoder.decode(bytes), + 'abc', + testCase.encoding + ': BOM should be absent from decoded string by default'); + }, 'BOM is ignored if ignoreBOM option is specified: ' + testCase.encoding); +}); + +test(function() { + assert_true('ignoreBOM' in new TextDecoder(), 'The ignoreBOM attribute should exist on TextDecoder.'); + assert_equals(typeof new TextDecoder().ignoreBOM, 'boolean', 'The type of the ignoreBOM attribute should be boolean.'); + assert_false(new TextDecoder().ignoreBOM, 'The ignoreBOM attribute should default to false.'); + assert_true(new TextDecoder('utf-8', {ignoreBOM: true}).ignoreBOM, 'The ignoreBOM attribute can be set using an option.'); + +}, 'The ignoreBOM attribute of TextDecoder'); + +</script> diff --git a/testing/web-platform/tests/encoding/textdecoder-labels.html b/testing/web-platform/tests/encoding/textdecoder-labels.html new file mode 100644 index 000000000..1f3202c96 --- /dev/null +++ b/testing/web-platform/tests/encoding/textdecoder-labels.html @@ -0,0 +1,36 @@ +<!DOCTYPE html> +<title>Encoding API: Encoding labels</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script src="resources/encodings.js"></script> +<script> +var tests = []; +setup(function() { + var whitespace = [' ', '\t', '\n', '\f', '\r']; + encodings_table.forEach(function(section) { + section.encodings.filter(function(encoding) { + return encoding.name !== 'replacement'; + }).forEach(function(encoding) { + var name = encoding.name; + encoding.labels.forEach(function(label) { + tests.push([label, encoding.name]); + whitespace.forEach(function(ws) { + tests.push([ws + label, encoding.name]); + tests.push([label + ws, encoding.name]); + tests.push([ws + label + ws, encoding.name]); + }); + }); + }); + }); +}); + +tests.forEach(function(t) { + var input = t[0], output = t[1]; + test(function() { + assert_equals(new TextDecoder(input).encoding, output, + 'label for encoding should match'); + assert_equals(new TextDecoder(input.toUpperCase()).encoding, output, + 'label matching should be case-insensitive'); + }, format_value(input) + " => " + format_value(output)); +}); +</script> diff --git a/testing/web-platform/tests/encoding/textdecoder-streaming.html b/testing/web-platform/tests/encoding/textdecoder-streaming.html new file mode 100644 index 000000000..2b567aaa1 --- /dev/null +++ b/testing/web-platform/tests/encoding/textdecoder-streaming.html @@ -0,0 +1,42 @@ +<!DOCTYPE html> +<title>Encoding API: Streaming decode</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script src="resources/encodings.js"></script> +<script> + +var string = '\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF'; +var octets = { + 'utf-8': [0x00,0x31,0x32,0x33,0x41,0x42,0x43,0x61,0x62,0x63,0xc2,0x80, + 0xc3,0xbf,0xc4,0x80,0xe1,0x80,0x80,0xef,0xbf,0xbd,0xf0,0x90, + 0x80,0x80,0xf4,0x8f,0xbf,0xbf], + 'utf-16le': [0x00,0x00,0x31,0x00,0x32,0x00,0x33,0x00,0x41,0x00,0x42,0x00, + 0x43,0x00,0x61,0x00,0x62,0x00,0x63,0x00,0x80,0x00,0xFF,0x00, + 0x00,0x01,0x00,0x10,0xFD,0xFF,0x00,0xD8,0x00,0xDC,0xFF,0xDB, + 0xFF,0xDF], + 'utf-16be': [0x00,0x00,0x00,0x31,0x00,0x32,0x00,0x33,0x00,0x41,0x00,0x42, + 0x00,0x43,0x00,0x61,0x00,0x62,0x00,0x63,0x00,0x80,0x00,0xFF, + 0x01,0x00,0x10,0x00,0xFF,0xFD,0xD8,0x00,0xDC,0x00,0xDB,0xFF, + 0xDF,0xFF] +}; + +Object.keys(octets).forEach(function(encoding) { + for (var len = 1; len <= 5; ++len) { + test(function() { + var encoded = octets[encoding]; + + var out = ''; + var decoder = new TextDecoder(encoding); + for (var i = 0; i < encoded.length; i += len) { + var sub = []; + for (var j = i; j < encoded.length && j < i + len; ++j) + sub.push(encoded[j]); + out += decoder.decode(new Uint8Array(sub), {stream: true}); + } + out += decoder.decode(); + assert_equals(out, string); + }, 'Streaming decode: ' + encoding + ', ' + len + ' byte window'); + } +}); + +</script> diff --git a/testing/web-platform/tests/encoding/textdecoder-utf16-surrogates.html b/testing/web-platform/tests/encoding/textdecoder-utf16-surrogates.html new file mode 100644 index 000000000..207cf372b --- /dev/null +++ b/testing/web-platform/tests/encoding/textdecoder-utf16-surrogates.html @@ -0,0 +1,51 @@ +<!DOCTYPE html> +<title>Encoding API: UTF-16 surrogate handling</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +var bad = [ + { + encoding: 'utf-16le', + input: [0x00, 0xd8], + expected: '\uFFFD', + name: 'lone surrogate lead' + }, + { + encoding: 'utf-16le', + input: [0x00, 0xdc], + expected: '\uFFFD', + name: 'lone surrogate trail' + }, + { + encoding: 'utf-16le', + input: [0x00, 0xd8, 0x00, 0x00], + expected: '\uFFFD\u0000', + name: 'unmatched surrogate lead' + }, + { + encoding: 'utf-16le', + input: [0x00, 0xdc, 0x00, 0x00], + expected: '\uFFFD\u0000', + name: 'unmatched surrogate trail' + }, + { + encoding: 'utf-16le', + input: [0x00, 0xdc, 0x00, 0xd8], + expected: '\uFFFD\uFFFD', + name: 'swapped surrogate pair' + } +]; + +bad.forEach(function(t) { + test(function() { + assert_equals(new TextDecoder(t.encoding).decode(new Uint8Array(t.input)), t.expected); + }, t.encoding + ' - ' + t.name); + test(function() { + assert_throws(new TypeError(), function() { + new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)) + }); + }, t.encoding + ' - ' + t.name + ' (fatal flag set)'); +}); + +</script> diff --git a/testing/web-platform/tests/encoding/textencoder-constructor-non-utf.html b/testing/web-platform/tests/encoding/textencoder-constructor-non-utf.html new file mode 100644 index 000000000..f5c2ea545 --- /dev/null +++ b/testing/web-platform/tests/encoding/textencoder-constructor-non-utf.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<title>Encoding API: Legacy encodings</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script src="resources/encodings.js"></script> +<script> + +encodings_table.forEach(function(section) { + section.encodings.forEach(function(encoding) { + if (encoding.name !== 'replacement') { + test(function() { + assert_equals(new TextDecoder(encoding.name).encoding, encoding.name); + }, 'Encoding argument supported for decode: ' + encoding.name); + } + + test(function() { + assert_equals(new TextEncoder(encoding.name).encoding, 'utf-8'); + }, 'Encoding argument not considered for encode: ' + encoding.name); + }); +}); + +</script> diff --git a/testing/web-platform/tests/encoding/textencoder-utf16-surrogates.html b/testing/web-platform/tests/encoding/textencoder-utf16-surrogates.html new file mode 100644 index 000000000..46407528f --- /dev/null +++ b/testing/web-platform/tests/encoding/textencoder-utf16-surrogates.html @@ -0,0 +1,52 @@ +<!DOCTYPE html> +<title>Encoding API: USVString surrogate handling when encoding</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> + +var bad = [ + { + input: '\uD800', + expected: '\uFFFD', + name: 'lone surrogate lead' + }, + { + input: '\uDC00', + expected: '\uFFFD', + name: 'lone surrogate trail' + }, + { + input: '\uD800\u0000', + expected: '\uFFFD\u0000', + name: 'unmatched surrogate lead' + }, + { + input: '\uDC00\u0000', + expected: '\uFFFD\u0000', + name: 'unmatched surrogate trail' + }, + { + input: '\uDC00\uD800', + expected: '\uFFFD\uFFFD', + name: 'swapped surrogate pair' + }, + { + input: '\uD834\uDD1E', + expected: '\uD834\uDD1E', + name: 'properly encoded MUSICAL SYMBOL G CLEF (U+1D11E)' + } +]; + +bad.forEach(function(t) { + test(function() { + var encoded = new TextEncoder().encode(t.input); + var decoded = new TextDecoder().decode(encoded); + assert_equals(decoded, t.expected); + }, 'USVString handling: ' + t.name); +}); + +test(function() { + assert_equals(new TextEncoder().encode().length, 0, 'Should default to empty string'); +}, 'USVString default'); + +</script> |