summaryrefslogtreecommitdiffstats
path: root/dom/encoding/domainsfallbacks.properties
diff options
context:
space:
mode:
Diffstat (limited to 'dom/encoding/domainsfallbacks.properties')
-rw-r--r--dom/encoding/domainsfallbacks.properties167
1 files changed, 167 insertions, 0 deletions
diff --git a/dom/encoding/domainsfallbacks.properties b/dom/encoding/domainsfallbacks.properties
new file mode 100644
index 000000000..b4911955d
--- /dev/null
+++ b/dom/encoding/domainsfallbacks.properties
@@ -0,0 +1,167 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This file contains educated guesses about which top-level domains are
+# likely to host legacy content that assumes a non-windows-1252 encoding.
+# Punycode TLDs are included on the theory that legacy content might appear
+# behind those relatively new TLDs if DNS just points to a legacy server.
+#
+# Encodings for which a confident-enough educated guess is missing are
+# listed in nonparticipatingdomains.properties. Domains that are listed
+# neither there nor here get windows-1252 as the associated fallback.
+#
+# The list below includes Arabic-script TLDs not on IANA list but on the
+# ICANN list:
+# http://www.icann.org/en/resources/idn/fast-track/string-evaluation-completion
+# Otherwise, the list includes non-windows-1252-affilited country TLDs from
+# https://data.iana.org/TLD/tlds-alpha-by-domain.txt
+#
+# The guesses are assigned as follows:
+# * If the country has a dominant country-affiliated language and that language
+# is part of the languages to fallbacks mapping, use the encoding for that
+# language from that mapping.
+# * Use windows-1256 for countries that have a dominant Arabic-script
+# language or whose all languages are Arabic-script languages.
+# * Use windows-1251 likewise but for Cyrillic script.
+
+ae=windows-1256
+xn--mgbaam7a8h=windows-1256
+
+af=windows-1256
+
+bg=windows-1251
+
+bh=windows-1256
+
+by=windows-1251
+
+cn=gbk
+xn--fiqs8s=gbk
+# Assume that Traditional Chinese TLD is meant to work if URL input happens to
+# be in the traditional mode. Expect content to be simplified anyway.
+xn--fiqz9s=gbk
+
+cz=windows-1250
+
+dz=windows-1256
+xn--lgbbat1ad8j=windows-1256
+
+ee=windows-1257
+
+eg=windows-1256
+xn--wgbh1c=windows-1256
+
+gr=ISO-8859-7
+
+hk=Big5
+xn--j6w193g=Big5
+
+hr=windows-1250
+
+hu=ISO-8859-2
+
+iq=windows-1256
+
+ir=windows-1256
+xn--mgba3a4f16a=windows-1256
+
+jo=windows-1256
+xn--mgbayh7gpa=windows-1256
+
+jp=Shift_JIS
+
+kg=windows-1251
+
+kp=EUC-KR
+
+kr=EUC-KR
+xn--3e0b707e=EUC-KR
+
+kw=windows-1256
+
+kz=windows-1251
+xn--80ao21a=windows-1251
+
+lb=windows-1256
+
+lt=windows-1257
+
+lv=windows-1257
+
+ma=windows-1256
+xn--mgbc0a9azcg=windows-1256
+
+mk=windows-1251
+
+mn=windows-1251
+xn--l1acc=windows-1251
+
+mo=Big5
+
+# my
+xn--mgbx4cd0ab=windows-1256
+
+om=windows-1256
+xn--mgb9awbf=windows-1256
+
+#pk
+xn--mgbai9azgqp6j=windows-1256
+
+pl=ISO-8859-2
+
+ps=windows-1256
+xn--ygbi2ammx=windows-1256
+
+qa=windows-1256
+xn--wgbl6a=windows-1256
+
+rs=windows-1251
+xn--90a3ac=windows-1251
+
+ru=windows-1251
+xn--p1ai=windows-1251
+
+sa=windows-1256
+xn--mgberp4a5d4ar=windows-1256
+
+sd=windows-1256
+xn--mgbpl2fh=windows-1256
+
+sg=gbk
+xn--yfro4i67o=gbk
+
+si=ISO-8859-2
+
+sk=windows-1250
+
+su=windows-1251
+
+sy=windows-1256
+xn--mgbtf8fl=windows-1256
+
+th=windows-874
+xn--o3cw4h=windows-874
+
+tj=windows-1251
+
+tn=windows-1256
+xn--pgbs0dh=windows-1256
+
+tr=windows-1254
+
+tw=Big5
+# Assume that the Simplified Chinese TLD is meant to work when URL input
+# happens in the simplified mode. Assume content is tradition anyway.
+xn--kprw13d=Big5
+xn--kpry57d=Big5
+
+ua=windows-1251
+xn--j1amh=windows-1251
+
+uz=windows-1251
+
+vn=windows-1258
+
+ye=windows-1256
+xn--mgb2ddes=windows-1256