diff options
Diffstat (limited to 'dom/encoding/domainsfallbacks.properties')
-rw-r--r-- | dom/encoding/domainsfallbacks.properties | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/dom/encoding/domainsfallbacks.properties b/dom/encoding/domainsfallbacks.properties new file mode 100644 index 000000000..b4911955d --- /dev/null +++ b/dom/encoding/domainsfallbacks.properties @@ -0,0 +1,167 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This file contains educated guesses about which top-level domains are +# likely to host legacy content that assumes a non-windows-1252 encoding. +# Punycode TLDs are included on the theory that legacy content might appear +# behind those relatively new TLDs if DNS just points to a legacy server. +# +# Encodings for which a confident-enough educated guess is missing are +# listed in nonparticipatingdomains.properties. Domains that are listed +# neither there nor here get windows-1252 as the associated fallback. +# +# The list below includes Arabic-script TLDs not on IANA list but on the +# ICANN list: +# http://www.icann.org/en/resources/idn/fast-track/string-evaluation-completion +# Otherwise, the list includes non-windows-1252-affilited country TLDs from +# https://data.iana.org/TLD/tlds-alpha-by-domain.txt +# +# The guesses are assigned as follows: +# * If the country has a dominant country-affiliated language and that language +# is part of the languages to fallbacks mapping, use the encoding for that +# language from that mapping. +# * Use windows-1256 for countries that have a dominant Arabic-script +# language or whose all languages are Arabic-script languages. +# * Use windows-1251 likewise but for Cyrillic script. + +ae=windows-1256 +xn--mgbaam7a8h=windows-1256 + +af=windows-1256 + +bg=windows-1251 + +bh=windows-1256 + +by=windows-1251 + +cn=gbk +xn--fiqs8s=gbk +# Assume that Traditional Chinese TLD is meant to work if URL input happens to +# be in the traditional mode. Expect content to be simplified anyway. +xn--fiqz9s=gbk + +cz=windows-1250 + +dz=windows-1256 +xn--lgbbat1ad8j=windows-1256 + +ee=windows-1257 + +eg=windows-1256 +xn--wgbh1c=windows-1256 + +gr=ISO-8859-7 + +hk=Big5 +xn--j6w193g=Big5 + +hr=windows-1250 + +hu=ISO-8859-2 + +iq=windows-1256 + +ir=windows-1256 +xn--mgba3a4f16a=windows-1256 + +jo=windows-1256 +xn--mgbayh7gpa=windows-1256 + +jp=Shift_JIS + +kg=windows-1251 + +kp=EUC-KR + +kr=EUC-KR +xn--3e0b707e=EUC-KR + +kw=windows-1256 + +kz=windows-1251 +xn--80ao21a=windows-1251 + +lb=windows-1256 + +lt=windows-1257 + +lv=windows-1257 + +ma=windows-1256 +xn--mgbc0a9azcg=windows-1256 + +mk=windows-1251 + +mn=windows-1251 +xn--l1acc=windows-1251 + +mo=Big5 + +# my +xn--mgbx4cd0ab=windows-1256 + +om=windows-1256 +xn--mgb9awbf=windows-1256 + +#pk +xn--mgbai9azgqp6j=windows-1256 + +pl=ISO-8859-2 + +ps=windows-1256 +xn--ygbi2ammx=windows-1256 + +qa=windows-1256 +xn--wgbl6a=windows-1256 + +rs=windows-1251 +xn--90a3ac=windows-1251 + +ru=windows-1251 +xn--p1ai=windows-1251 + +sa=windows-1256 +xn--mgberp4a5d4ar=windows-1256 + +sd=windows-1256 +xn--mgbpl2fh=windows-1256 + +sg=gbk +xn--yfro4i67o=gbk + +si=ISO-8859-2 + +sk=windows-1250 + +su=windows-1251 + +sy=windows-1256 +xn--mgbtf8fl=windows-1256 + +th=windows-874 +xn--o3cw4h=windows-874 + +tj=windows-1251 + +tn=windows-1256 +xn--pgbs0dh=windows-1256 + +tr=windows-1254 + +tw=Big5 +# Assume that the Simplified Chinese TLD is meant to work when URL input +# happens in the simplified mode. Assume content is tradition anyway. +xn--kprw13d=Big5 +xn--kpry57d=Big5 + +ua=windows-1251 +xn--j1amh=windows-1251 + +uz=windows-1251 + +vn=windows-1258 + +ye=windows-1256 +xn--mgb2ddes=windows-1256 |