summaryrefslogtreecommitdiffstats
path: root/dom/encoding/domainsfallbacks.properties
blob: b4911955d04bdfc055ef5fa9058106a385dc9579 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# This file contains educated guesses about which top-level domains are
# likely to host legacy content that assumes a non-windows-1252 encoding.
# Punycode TLDs are included on the theory that legacy content might appear
# behind those relatively new TLDs if DNS just points to a legacy server.
#
# Encodings for which a confident-enough educated guess is missing are
# listed in nonparticipatingdomains.properties. Domains that are listed 
# neither there nor here get windows-1252 as the associated fallback.
#
# The list below includes Arabic-script TLDs not on IANA list but on the 
# ICANN list:
# http://www.icann.org/en/resources/idn/fast-track/string-evaluation-completion
# Otherwise, the list includes non-windows-1252-affilited country TLDs from
# https://data.iana.org/TLD/tlds-alpha-by-domain.txt
#
# The guesses are assigned as follows:
# * If the country has a dominant country-affiliated language and that language
#   is part of the languages to fallbacks mapping, use the encoding for that
#   language from that mapping.
# * Use windows-1256 for countries that have a dominant Arabic-script
#   language or whose all languages are Arabic-script languages.
# * Use windows-1251 likewise but for Cyrillic script.

ae=windows-1256
xn--mgbaam7a8h=windows-1256

af=windows-1256

bg=windows-1251

bh=windows-1256

by=windows-1251

cn=gbk
xn--fiqs8s=gbk
# Assume that Traditional Chinese TLD is meant to work if URL input happens to 
# be in the traditional mode. Expect content to be simplified anyway.
xn--fiqz9s=gbk

cz=windows-1250

dz=windows-1256
xn--lgbbat1ad8j=windows-1256

ee=windows-1257

eg=windows-1256
xn--wgbh1c=windows-1256

gr=ISO-8859-7

hk=Big5
xn--j6w193g=Big5

hr=windows-1250

hu=ISO-8859-2

iq=windows-1256

ir=windows-1256
xn--mgba3a4f16a=windows-1256

jo=windows-1256
xn--mgbayh7gpa=windows-1256

jp=Shift_JIS

kg=windows-1251

kp=EUC-KR

kr=EUC-KR
xn--3e0b707e=EUC-KR

kw=windows-1256

kz=windows-1251
xn--80ao21a=windows-1251

lb=windows-1256

lt=windows-1257

lv=windows-1257

ma=windows-1256
xn--mgbc0a9azcg=windows-1256

mk=windows-1251

mn=windows-1251
xn--l1acc=windows-1251

mo=Big5

# my
xn--mgbx4cd0ab=windows-1256

om=windows-1256
xn--mgb9awbf=windows-1256

#pk
xn--mgbai9azgqp6j=windows-1256

pl=ISO-8859-2

ps=windows-1256
xn--ygbi2ammx=windows-1256

qa=windows-1256
xn--wgbl6a=windows-1256

rs=windows-1251
xn--90a3ac=windows-1251

ru=windows-1251
xn--p1ai=windows-1251

sa=windows-1256
xn--mgberp4a5d4ar=windows-1256

sd=windows-1256
xn--mgbpl2fh=windows-1256

sg=gbk
xn--yfro4i67o=gbk

si=ISO-8859-2

sk=windows-1250

su=windows-1251

sy=windows-1256
xn--mgbtf8fl=windows-1256

th=windows-874
xn--o3cw4h=windows-874

tj=windows-1251

tn=windows-1256
xn--pgbs0dh=windows-1256

tr=windows-1254

tw=Big5
# Assume that the Simplified Chinese TLD is meant to work when URL input
# happens in the simplified mode. Assume content is tradition anyway.
xn--kprw13d=Big5
xn--kpry57d=Big5

ua=windows-1251
xn--j1amh=windows-1251

uz=windows-1251

vn=windows-1258

ye=windows-1256
xn--mgb2ddes=windows-1256