1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
import codecs
class MalformedLocaleFileError(Exception):
pass
def parse_file(path):
return parse(read_file(path), path)
def read_file(path):
try:
return codecs.open( path, "r", "utf-8" ).readlines()
except UnicodeDecodeError, e:
raise MalformedLocaleFileError(
'Following locale file is not a valid ' +
'UTF-8 file: %s\n%s"' % (path, str(e)))
COMMENT = re.compile(r'\s*#')
EMPTY = re.compile(r'^\s+$')
KEYVALUE = re.compile(r"\s*([^=:]+)(=|:)\s*(.*)")
def parse(lines, path=None):
lines = iter(lines)
lineNo = 1
pairs = dict()
for line in lines:
if COMMENT.match(line) or EMPTY.match(line) or len(line) == 0:
continue
m = KEYVALUE.match(line)
if not m:
raise MalformedLocaleFileError(
'Following locale file is not a valid .properties file: %s\n'
'Line %d is incorrect:\n%s' % (path, lineNo, line))
# All spaces are strip. Spaces at the beginning are stripped
# by the regular expression. We have to strip spaces at the end.
key = m.group(1).rstrip()
val = m.group(3).rstrip()
val = val.encode('raw-unicode-escape').decode('raw-unicode-escape')
# `key` can be empty when key is only made of spaces
if not key:
raise MalformedLocaleFileError(
'Following locale file is not a valid .properties file: %s\n'
'Key is invalid on line %d is incorrect:\n%s' %
(path, lineNo, line))
# Multiline value: keep reading lines, while lines end with backslash
# and strip spaces at the beginning of lines except the last line
# that doesn't end up with backslash, we strip all spaces for this one.
if val.endswith("\\"):
val = val[:-1]
try:
# remove spaces before/after and especially the \n at EOL
line = lines.next().strip()
while line.endswith("\\"):
val += line[:-1].lstrip()
line = lines.next()
lineNo += 1
val += line.strip()
except StopIteration:
raise MalformedLocaleFileError(
'Following locale file is not a valid .properties file: %s\n'
'Unexpected EOF in multiline sequence at line %d:\n%s' %
(path, lineNo, line))
# Save this new pair
pairs[key] = val
lineNo += 1
normalize_plural(path, pairs)
return pairs
# Plural forms in properties files are defined like this:
# key = other form
# key[one] = one form
# key[...] = ...
# Parse them and merge each key into one object containing all forms:
# key: {
# other: "other form",
# one: "one form",
# ...: ...
# }
PLURAL_FORM = re.compile(r'^(.*)\[(zero|one|two|few|many|other)\]$')
def normalize_plural(path, pairs):
for key in list(pairs.keys()):
m = PLURAL_FORM.match(key)
if not m:
continue
main_key = m.group(1)
plural_form = m.group(2)
# Allows not specifying a generic key (i.e a key without [form])
if not main_key in pairs:
pairs[main_key] = {}
# Ensure that we always have the [other] form
if not main_key + "[other]" in pairs:
raise MalformedLocaleFileError(
'Following locale file is not a valid UTF-8 file: %s\n'
'This plural form doesn\'t have a matching `%s[other]` form:\n'
'%s\n'
'You have to defined following key:\n%s'
% (path, main_key, key, main_key))
# convert generic form into an object if it is still a string
if isinstance(pairs[main_key], unicode):
pairs[main_key] = {"other": pairs[main_key]}
# then, add this new plural form
pairs[main_key][plural_form] = pairs[key]
del pairs[key]
|