# -*- coding: utf-8 -*- import os ccdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # based on https://github.com/w3c/web-platform-tests/blob/275544eab54a0d0c7f74ccc2baae9711293d8908/url/urltestdata.txt invalid = { "scheme-trailing-tab": "a:\tfoo.com", "scheme-trailing-newline": "a:\nfoo.com", "scheme-trailing-cr": "a:\rfoo.com", "scheme-trailing-space": "a: foo.com", "scheme-trailing-tab": "a:\tfoo.com", "scheme-trailing-newline": "a:\nfoo.com", "scheme-trailing-cr": "a:\rfoo.com", "scheme-http-no-slash": "http:foo.com", "scheme-http-no-slash-colon": "http::@c:29", "scheme-http-no-slash-square-bracket": "http:[61:27]/:foo", "scheme-http-backslash": "http:\\\\foo.com\\", "scheme-http-single-slash": "http:/example.com/", "scheme-ftp-single-slash": "ftp:/example.com/", "scheme-https-single-slash": "https:/example.com/", "scheme-data-single-slash": "data:/example.com/", "scheme-ftp-no-slash": "ftp:example.com/", "scheme-https-no-slash": "https:example.com/", "scheme-javascript-no-slash-malformed": "javascript:example.com/", "userinfo-password-bad-chars": "http://&a:foo(b]c@d:2/", "userinfo-username-contains-at-sign": "http://::@c@d:2", "userinfo-backslash": "http://a\\b:c\\d@foo.com", "host-space": "http://example .org", "host-tab": "http://example\t.org", "host-newline": "http://example.\norg", "host-cr": "http://example.\rorg", "host-square-brackets-port-contains-colon": "http://[1::2]:3:4", "port-999999": "http://f:999999/c", "port-single-letter": "http://f:b/c", "port-multiple-letters": "http://f:fifty-two/c", "port-leading-colon": "http://2001::1", "port-leading-colon-bracket-colon": "http://2001::1]:80", "path-leading-backslash-at-sign": "http://foo.com/\\@", "path-leading-colon-backslash": ":\\", "path-leading-colon-chars-backslash": ":foo.com\\", "path-relative-square-brackets": "[61:24:74]:98", "fragment-contains-hash": "http://foo/path#f#g", "path-percent-encoded-malformed": "http://example.com/foo/%2e%2", "path-bare-percent-sign": "http://example.com/foo%", "path-u0091": u"http://example.com/foo\u0091".encode('utf-8'), "userinfo-username-contains-pile-of-poo": "http://💩:foo@example.com", "userinfo-password-contains-pile-of-poo": "http://foo:💩@example.com", "host-hostname-in-brackets": "http://[www.google.com]/", "host-empty": "http://", "host-empty-with-userinfo": "http://user:pass@/", "port-leading-dash": "http://foo:-80/", "host-empty-userinfo-empty": "http://@/www.example.com", "host-invalid-unicode": u"http://\ufdd0zyx.com".encode('utf-8'), "host-invalid-unicode-percent-encoded": "http://%ef%b7%90zyx.com", "host-double-percent-encoded": u"http://\uff05\uff14\uff11.com".encode('utf-8'), "host-double-percent-encoded-percent-encoded": "http://%ef%bc%85%ef%bc%94%ef%bc%91.com", "host-u0000-percent-encoded": u"http://\uff05\uff10\uff10.com".encode('utf-8'), "host-u0000-percent-encoded-percent-encoded": "http://%ef%bc%85%ef%bc%90%ef%bc%90.com", } invalid_absolute = invalid.copy() invalid_url_code_points = { "fragment-backslash": "#\\", "fragment-leading-space": "http://f:21/b# e", "path-contains-space": "/a/ /c", "path-leading-space": "http://f:21/ b", "path-tab": "http://example.com/foo\tbar", "path-trailing-space": "http://f:21/b ?", "port-cr": "http://f:\r/c", "port-newline": "http://f:\n/c", "port-space": "http://f: /c", "port-tab": "http://f:\t/c", "query-leading-space": "http://f:21/b? d", "query-trailing-space": "http://f:21/b?d #", } invalid.update(invalid_url_code_points) invalid_absolute.update(invalid_url_code_points) valid_absolute = { "scheme-private": "a:foo.com", "scheme-private-slash": "foo:/", "scheme-private-slash-slash": "foo://", "scheme-private-path": "foo:/bar.com/", "scheme-private-path-leading-slashes-only": "foo://///////", "scheme-private-path-leading-slashes-chars": "foo://///////bar.com/", "scheme-private-path-leading-slashes-colon-slashes": "foo:////://///", "scheme-private-single-letter": "c:/foo", "scheme-private-single-slash": "madeupscheme:/example.com/", "scheme-file-single-slash": "file:/example.com/", "scheme-ftps-single-slash": "ftps:/example.com/", "scheme-gopher-single-slash": "gopher:/example.com/", "scheme-ws-single-slash": "ws:/example.com/", "scheme-wss-single-slash": "wss:/example.com/", "scheme-javascript-single-slash": "javascript:/example.com/", "scheme-mailto-single-slash": "mailto:/example.com/", "scheme-private-no-slash": "madeupscheme:example.com/", "scheme-ftps-no-slash": "ftps:example.com/", "scheme-gopher-no-slash": "gopher:example.com/", "scheme-wss-no-slash": "wss:example.com/", "scheme-mailto-no-slash": "mailto:example.com/", "scheme-data-no-slash": "data:text/plain,foo", "userinfo": "http://user:pass@foo:21/bar;par?b#c", "host-ipv6": "http://[2001::1]", "host-ipv6-port": "http://[2001::1]:80", "port-none-but-colon": "http://f:/c", "port-0": "http://f:0/c", "port-00000000000000": "http://f:00000000000000/c", "port-00000000000000000000080": "http://f:00000000000000000000080/c", "port-00000000000000000000080": "http://f:00000000000000000000080/c", "userinfo-host-port-path": "http://a:b@c:29/d", "userinfo-username-non-alpha": "http://foo.com:b@d/", "query-contains-question-mark": "http://foo/abcd?efgh?ijkl", "fragment-contains-question-mark": "http://foo/abcd#foo?bar", "path-percent-encoded-dot": "http://example.com/foo/%2e", "path-percent-encoded-space": "http://example.com/%20foo", "path-non-ascii": u"http://example.com/\u00C2\u00A9zbar".encode('utf-8'), "path-percent-encoded-multiple": "http://example.com/foo%41%7a", "path-percent-encoded-u0091": "http://example.com/foo%91", "path-percent-encoded-u0000": "http://example.com/foo%00", "path-percent-encoded-mixed-case": "http://example.com/%3A%3a%3C%3c", "path-unicode-han": u"http://example.com/\u4F60\u597D\u4F60\u597D".encode('utf-8'), "path-uFEFF": u"http://example.com/\uFEFF/foo".encode('utf-8'), "path-u202E-u202D": u"http://example.com/\u202E/foo/\u202D/bar".encode('utf-8'), "host-is-pile-of-poo": "http://💩", "path-contains-pile-of-poo": "http://example.com/foo/💩", "query-contains-pile-of-poo": "http://example.com/foo?💩", "fragment-contains-pile-of-poo": "http://example.com/foo#💩", "host-192.0x00A80001": "http://192.0x00A80001", "userinfo-username-contains-percent-encoded": "http://%25DOMAIN:foobar@foodomain.com", "userinfo-empty": "http://@www.example.com", "userinfo-user-empty": "http://:b@www.example.com", "userinfo-password-empty": "http://a:@www.example.com", "host-exotic-whitespace": u"http://GOO\u200b\u2060\ufeffgoo.com".encode('utf-8'), "host-exotic-dot": u"http://www.foo\u3002bar.com".encode('utf-8'), "host-fullwidth": u"http://\uff27\uff4f.com".encode('utf-8'), "host-idn-unicode-han": u"http://\u4f60\u597d\u4f60\u597d".encode('utf-8'), "host-IP-address-broken": "http://192.168.0.257/", } valid = valid_absolute.copy() valid_relative = { "scheme-schemeless-relative": "//foo/bar", "path-slash-only-relative": "/", "path-simple-relative": "/a/b/c", "path-percent-encoded-slash-relative": "/a%2fc", "path-percent-encoded-slash-plus-slashes-relative": "/a/%2f/c", "query-empty-no-path-relative": "?", "fragment-empty-hash-only-no-path-relative": "#", "fragment-slash-relative": "#/", "fragment-semicolon-question-mark-relative": "#;?", "fragment-non-ascii-relative": u"#\u03B2".encode('utf-8'), } valid.update(valid_relative) invalid_absolute.update(valid_relative) valid_relative_colon_dot = { "scheme-none-relative": "foo.com", "path-colon-relative": ":", "path-leading-colon-letter-relative": ":a", "path-leading-colon-chars-relative": ":foo.com", "path-leading-colon-slash-relative": ":/", "path-leading-colon-hash-relative": ":#", "path-leading-colon-number-relative": ":23", "path-slash-colon-number-relative": "/:23", "path-leading-colon-colon-relative": "::", "path-colon-colon-number-relative": "::23", "path-starts-with-pile-of-poo": "💩http://foo", "path-contains-pile-of-poo": "http💩//:foo", } valid.update(valid_relative_colon_dot) invalid_file = { "scheme-file-backslash": "file:c:\\foo\\bar.html", "scheme-file-single-slash-c-bar": "file:/C|/foo/bar", "scheme-file-triple-slash-c-bar": "file:///C|/foo/bar", } invalid.update(invalid_file) valid_file = { "scheme-file-uppercase": "File://foo/bar.html", "scheme-file-slash-slash-c-bar": "file://C|/foo/bar", "scheme-file-slash-slash-abc-bar": "file://abc|/foo/bar", "scheme-file-host-included": "file://server/foo/bar", "scheme-file-host-empty": "file:///foo/bar.txt", "scheme-file-scheme-only": "file:", "scheme-file-slash-only": "file:/", "scheme-file-slash-slash-only": "file://", "scheme-file-slash-slash-slash-only": "file:///", "scheme-file-no-slash": "file:test", } valid.update(valid_file) valid_absolute.update(valid_file) warnings = { "scheme-data-contains-fragment": "data:text/html,test#test", } element_attribute_pairs = [ "a href", # "a ping", space-separated list of URLs; tested elsewhere "area href", # "area ping", space-separated list of URLs; tested elsewhere "audio src", "base href", "blockquote cite", "button formaction", "del cite", "embed src", "form action", "html manifest", "iframe src", "img src", # srcset is tested elsewhere "input formaction", # type=submit, type=image "input src", # type=image "input value", # type=url "ins cite", "link href", #"menuitem icon", # skip until parser is updated "object data", "q cite", "script src", "source src", "track src", "video poster", "video src", ] template = "\n\n" def write_novalid_files(): for el, attr in (pair.split() for pair in element_attribute_pairs): for desc, url in invalid.items(): if ("area" == el): f = open(os.path.join(ccdir, "html/elements/area/href/%s-novalid.html" % desc), 'wb') f.write(template + '