summaryrefslogtreecommitdiffstats
path: root/addon-sdk/source/python-lib/cuddlefish/property_parser.py
blob: 20068a2646517ea25f7bdf56f7d2e3585097a8a3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import re
import codecs

class MalformedLocaleFileError(Exception):
    pass

def parse_file(path):
    return parse(read_file(path), path)

def read_file(path):
    try:
        return codecs.open( path, "r", "utf-8" ).readlines()
    except UnicodeDecodeError, e:
        raise MalformedLocaleFileError(
          'Following locale file is not a valid ' +
          'UTF-8 file: %s\n%s"' % (path, str(e)))

COMMENT = re.compile(r'\s*#')
EMPTY = re.compile(r'^\s+$')
KEYVALUE = re.compile(r"\s*([^=:]+)(=|:)\s*(.*)")

def parse(lines, path=None):
    lines = iter(lines)
    lineNo = 1
    pairs = dict()
    for line in lines:
        if COMMENT.match(line) or EMPTY.match(line) or len(line) == 0:
            continue
        m = KEYVALUE.match(line)
        if not m:
            raise MalformedLocaleFileError(
                  'Following locale file is not a valid .properties file: %s\n'
                  'Line %d is incorrect:\n%s' % (path, lineNo, line))

        # All spaces are strip. Spaces at the beginning are stripped
        # by the regular expression. We have to strip spaces at the end.
        key = m.group(1).rstrip()
        val = m.group(3).rstrip()
        val = val.encode('raw-unicode-escape').decode('raw-unicode-escape')

        # `key` can be empty when key is only made of spaces
        if not key:
            raise MalformedLocaleFileError(
                  'Following locale file is not a valid .properties file: %s\n'
                  'Key is invalid on line %d is incorrect:\n%s' %
                  (path, lineNo, line))

        # Multiline value: keep reading lines, while lines end with backslash
        # and strip spaces at the beginning of lines except the last line
        # that doesn't end up with backslash, we strip all spaces for this one.
        if val.endswith("\\"):
            val = val[:-1]
            try:
                # remove spaces before/after and especially the \n at EOL
                line = lines.next().strip()
                while line.endswith("\\"):
                    val += line[:-1].lstrip()
                    line = lines.next()
                    lineNo += 1
                val += line.strip()
            except StopIteration:
                raise MalformedLocaleFileError(
                  'Following locale file is not a valid .properties file: %s\n'
                  'Unexpected EOF in multiline sequence at line %d:\n%s' %
                  (path, lineNo, line))
        # Save this new pair
        pairs[key] = val
        lineNo += 1

    normalize_plural(path, pairs)
    return pairs

# Plural forms in properties files are defined like this:
#   key = other form
#   key[one] = one form
#   key[...] = ...
# Parse them and merge each key into one object containing all forms:
#   key: {
#     other: "other form",
#     one: "one form",
#     ...: ...
#   }
PLURAL_FORM = re.compile(r'^(.*)\[(zero|one|two|few|many|other)\]$')
def normalize_plural(path, pairs):
    for key in list(pairs.keys()):
        m = PLURAL_FORM.match(key)
        if not m:
            continue
        main_key = m.group(1)
        plural_form = m.group(2)
        # Allows not specifying a generic key (i.e a key without [form])
        if not main_key in pairs:
            pairs[main_key] = {}
            # Ensure that we always have the [other] form
            if not main_key + "[other]" in pairs:
                raise MalformedLocaleFileError(
                      'Following locale file is not a valid UTF-8 file: %s\n'
                      'This plural form doesn\'t have a matching `%s[other]` form:\n'
                      '%s\n'
                      'You have to defined following key:\n%s'
                      % (path, main_key, key, main_key))
        # convert generic form into an object if it is still a string
        if isinstance(pairs[main_key], unicode):
            pairs[main_key] = {"other": pairs[main_key]}
        # then, add this new plural form
        pairs[main_key][plural_form] = pairs[key]
        del pairs[key]