summaryrefslogtreecommitdiffstats
path: root/addon-sdk/source/python-lib/plural-rules-generator.py
blob: 02cdee1351fae9f49881755bcf2ffd732cf163ed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

# Program used to generate /packages/api-utils/lib/l10n/plural-rules.js
# Fetch unicode.org data in order to build functions specific to each language
# that will return for a given integer, its plural form name.
# Plural form names are: zero, one, two, few, many, other.
#
# More information here:
#   http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
#   http://cldr.unicode.org/index/cldr-spec/plural-rules

# Usage:
# $ python plural-rules-generator.py > ../packages/api-utils/lib/l10n/plural-rules.js

import urllib2
import xml.dom.minidom
import json
import re

PRINT_CONDITIONS_IN_COMMENTS = False

UNICODE_ORG_XML_URL = "http://unicode.org/repos/cldr/trunk/common/supplemental/plurals.xml"

CONDITION_RE = r'n( mod \d+)? (is|in|within|(not in))( not)? ([^\s]+)'


def parseCondition(g):
    """
    For a given regexp.MatchObject `g` for `CONDITION_RE`, 
    returns the equivalent JS piece of code
    i.e. maps pseudo conditional language from unicode.org XML to JS code
    """
    lvalue = "n"
    if g.group(1):
        lvalue = "(n %% %d)" % int(g.group(1).replace("mod ", ""))

    operator = g.group(2)
    if g.group(4):
        operator += " not"

    rvalue = g.group(5)

    if operator == "is":
        return "%s == %s" % (lvalue, rvalue)
    if operator == "is not":
        return "%s != %s" % (lvalue, rvalue)

    # "in", "within" or "not in" case:
    notPrefix = ""
    if operator == "not in":
        notPrefix = "!"

    # `rvalue` is a comma seperated list of either:
    #  - numbers: 42
    #  - ranges: 42..72
    sections = rvalue.split(',')

    if ".." not in rvalue:
        # If we don't have range, but only a list of integer,
        # we can simplify the generated code by using `isIn`
        # n in 1,3,6,42
        return "%sisIn(%s, [%s])" % (notPrefix, lvalue, ", ".join(sections))

    # n in 1..42
    # n in 1..3,42
    subCondition = []
    integers = []
    for sub in sections:
        if ".." in sub:
            left, right = sub.split("..")
            subCondition.append("isBetween(%s, %d, %d)" % (
                                lvalue,
                                int(left),
                                int(right)
                               ))
        else:
            integers.append(int(sub))
    if len(integers) > 1:
      subCondition.append("isIn(%s, [%s])" % (lvalue, ", ".join(integers)))
    elif len(integers) == 1:
      subCondition.append("(%s == %s)" % (lvalue, integers[0]))
    return "%s(%s)" % (notPrefix, " || ".join(subCondition))

def computeRules():
    """
    Fetch plural rules data directly from unicode.org website:
    """
    url = UNICODE_ORG_XML_URL
    f = urllib2.urlopen(url)
    doc = xml.dom.minidom.parse(f)

    # Read XML document and extract locale to rules mapping
    localesMapping = {}
    algorithms = {}
    for index,pluralRules in enumerate(doc.getElementsByTagName("pluralRules")):
        if not index in algorithms:
            algorithms[index] = {}
        for locale in pluralRules.getAttribute("locales").split():
            localesMapping[locale] = index
        for rule in pluralRules.childNodes:
            if rule.nodeType != rule.ELEMENT_NODE or rule.tagName != "pluralRule":
                continue
            pluralForm = rule.getAttribute("count")
            algorithm = rule.firstChild.nodeValue
            algorithms[index][pluralForm] = algorithm

    # Go through all rules and compute a Javascript code for each of them
    rules = {}
    for index,rule in algorithms.iteritems():
        lines = []
        for pluralForm in rule:
            condition = rule[pluralForm]
            originalCondition = str(condition)

            # Convert pseudo language to JS code
            condition = rule[pluralForm].lower()
            condition = re.sub(CONDITION_RE, parseCondition, condition)
            condition = re.sub(r'or', "||", condition)
            condition = re.sub(r'and', "&&", condition)

            # Prints original condition in unicode.org pseudo language
            if PRINT_CONDITIONS_IN_COMMENTS:
                lines.append( '// %s' % originalCondition )

            lines.append( 'if (%s)' % condition )
            lines.append( '  return "%s";' % pluralForm )
            
        rules[index] = "\n    ".join(lines)
    return localesMapping, rules


localesMapping, rules = computeRules()

rulesLines = []
for index in rules:
    lines = rules[index]
    rulesLines.append('"%d": function (n) {' % index)
    rulesLines.append('  %s' % lines)
    rulesLines.append('  return "other"')
    rulesLines.append('},')

print """/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// This file is automatically generated with /python-lib/plural-rules-generator.py
// Fetching data from: %s

// Mapping of short locale name == to == > rule index in following list
const LOCALES_TO_RULES = %s;

// Utility functions for plural rules methods
function isIn(n, list) {
  return list.indexOf(n) !== -1;
}
function isBetween(n, start, end) {
  return start <= n && n <= end;
}

// List of all plural rules methods, that maps an integer to the plural form name to use
const RULES = {
  %s
};

/**
  * Return a function that gives the plural form name for a given integer
  * for the specified `locale`
  *   let fun = getRulesForLocale('en');
  *   fun(1)    -> 'one'
  *   fun(0)    -> 'other'
  *   fun(1000) -> 'other'
  */
exports.getRulesForLocale = function getRulesForLocale(locale) {
  let index = LOCALES_TO_RULES[locale];
  if (!(index in RULES)) {
    console.warn('Plural form unknown for locale "' + locale + '"');
    return function () { return "other"; };
  }
  return RULES[index];
}
""" % (UNICODE_ORG_XML_URL,
        json.dumps(localesMapping, sort_keys=True, indent=2),
        "\n  ".join(rulesLines))