summaryrefslogtreecommitdiffstats
path: root/python/devtools/migrate-l10n/migrate/main.py
blob: 0a1d468a80a9b0042b1ee6a24895b3b66e2e78fd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
import argparse
import glob
import HTMLParser
import logging
import os
import re
import sys
import urllib2


# Import compare-locales parser from parent folder.
script_path = os.path.dirname(os.path.realpath(__file__))
compare_locales_path = os.path.join(script_path, '../../../compare-locales')
sys.path.insert(0, compare_locales_path)
from compare_locales import parser


# Configure logging format and level
logging.basicConfig(format='  [%(levelname)s] %(message)s', level=logging.INFO)


# License header to use when creating new properties files.
DEFAULT_HEADER = ('# This Source Code Form is subject to the terms of the '
                  'Mozilla Public\n# License, v. 2.0. If a copy of the MPL '
                  'was not distributed with this\n# file, You can obtain '
                  'one at http://mozilla.org/MPL/2.0/.\n')


# Base url to retrieve properties files on central, that will be parsed for
# localization notes.
CENTRAL_BASE_URL = ('https://hg.mozilla.org/'
                    'mozilla-central/raw-file/tip/'
                    'devtools/client/locales/en-US/')


# HTML parser to translate HTML entities in dtd files.
HTML_PARSER = HTMLParser.HTMLParser()

# Cache to store properties files retrieved over the network.
central_prop_cache = {}

# Cache the parsed entities from the existing DTD files.
dtd_entities_cache = {}


# Retrieve the content of the current version of a properties file for the
# provided filename, from devtools/client on mozilla central. Will return an
# empty array if the file can't be retrieved or read.
def get_central_prop_content(prop_filename):
    if prop_filename in central_prop_cache:
        return central_prop_cache[prop_filename]

    url = CENTRAL_BASE_URL + prop_filename
    logging.info('loading localization file from central: {%s}' % url)

    try:
        central_prop_cache[prop_filename] = urllib2.urlopen(url).readlines()
    except:
        logging.error('failed to load properties file from central: {%s}'
                      % url)
        central_prop_cache[prop_filename] = []

    return central_prop_cache[prop_filename]


# Retrieve the current en-US localization notes for the provided prop_name.
def get_localization_note(prop_name, prop_filename):
    prop_content = get_central_prop_content(prop_filename)

    comment_buffer = []
    for i, line in enumerate(prop_content):
        # Remove line breaks.
        line = line.strip('\n').strip('\r')

        if line.startswith('#'):
            # Comment line, add to the current comment buffer.
            comment_buffer.append(line)
        elif re.search('(^|\n)' + re.escape(prop_name) + '\s*=', line):
            # Property found, the current comment buffer is the localization
            # note.
            break;
        else:
            # No match, not a comment, reinitialize the comment buffer.
            comment_buffer = []

    return '\n'.join(comment_buffer)


# Retrieve the parsed DTD entities for a provided path. Results are cached by
# dtd path.
def get_dtd_entities(dtd_path):
    if dtd_path in dtd_entities_cache:
        return dtd_entities_cache[dtd_path]

    dtd_parser = parser.getParser('.dtd')
    dtd_parser.readFile(dtd_path)
    dtd_entities_cache[dtd_path] = dtd_parser.parse()
    return dtd_entities_cache[dtd_path]


# Extract the value of an entity in a dtd file.
def get_translation_from_dtd(dtd_path, entity_name):
    entities, map = get_dtd_entities(dtd_path)
    if entity_name not in map:
        # Bail out if translation is missing.
        return

    key = map[entity_name]
    entity = entities[key]
    translation = HTML_PARSER.unescape(entity.val)
    return translation.encode('utf-8')


# Extract the header and file wide comments for the provided properties file
# filename.
def get_properties_header(prop_filename):
    prop_content = get_central_prop_content(prop_filename)

    # if the file content is empty, return the default license header.
    if len(prop_content) == 0:
        return DEFAULT_HEADER

    header_buffer = []
    for i, line in enumerate(prop_content):
        # remove line breaks.
        line = line.strip('\n').strip('\r')

        # regexp matching keys extracted form parser.py.
        is_entity_line = re.search('^(\s*)'
                                   '((?:[#!].*?\n\s*)*)'
                                   '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', line)
        is_loc_note = re.search('^(\s*)'
                                '\#\s*LOCALIZATION NOTE\s*\([^)]+\)', line)
        if is_entity_line or is_loc_note:
            # header finished, break the loop.
            break
        else:
            # header line, add to the current buffer.
            header_buffer.append(line)

    # concatenate the current buffer and return.
    return '\n'.join(header_buffer)


# Create a new properties file at the provided path.
def create_properties_file(prop_path):
    logging.info('creating new *.properties file: {%s}' % prop_path)

    prop_filename = os.path.basename(prop_path)
    header = get_properties_header(prop_filename)

    prop_file = open(prop_path, 'w+')
    prop_file.write(header)
    prop_file.close()


# Migrate a single string entry for a dtd to a properties file.
def migrate_string(dtd_path, prop_path, dtd_name, prop_name):
    if not os.path.isfile(dtd_path):
        logging.error('dtd file can not be found at: {%s}' % dtd_path)
        return

    translation = get_translation_from_dtd(dtd_path, dtd_name)
    if not translation:
        logging.error('translation could not be found for: {%s} in {%s}'
                      % (dtd_name, dtd_path))
        return

    # Create properties file if missing.
    if not os.path.isfile(prop_path):
        create_properties_file(prop_path)

    if not os.path.isfile(prop_path):
        logging.error('could not create new properties file at: {%s}'
                      % prop_path)
        return

    prop_line = prop_name + '=' + translation + '\n'

    # Skip the string if it already exists in the destination file.
    prop_file_content = open(prop_path, 'r').read()
    if prop_line in prop_file_content:
        logging.warning('string already migrated, skipping: {%s}' % prop_name)
        return

    # Skip the string and log an error if an existing entry is found, but with
    # a different value.
    if re.search('(^|\n)' + re.escape(prop_name) + '\s*=', prop_file_content):
        logging.error('existing string found, skipping: {%s}' % prop_name)
        return

    prop_filename = os.path.basename(prop_path)
    logging.info('migrating {%s} in {%s}' % (prop_name, prop_filename))
    with open(prop_path, 'a') as prop_file:
        localization_note = get_localization_note(prop_name, prop_filename)
        if len(localization_note):
            prop_file.write('\n' + localization_note)
        else:
            logging.warning('localization notes could not be found for: {%s}'
                            % prop_name)
        prop_file.write('\n' + prop_line)


# Apply the migration instructions in the provided configuration file.
def migrate_conf(conf_path, l10n_path):
    f = open(conf_path, 'r')
    lines = f.readlines()
    f.close()

    for i, line in enumerate(lines):
        # Remove line breaks.
        line = line.strip('\n').strip('\r')

        # Skip invalid lines.
        if ' = ' not in line:
            continue

        # Expected syntax: ${prop_path}:${prop_name} = ${dtd_path}:${dtd_name}.
        prop_info, dtd_info = line.split(' = ')
        prop_path, prop_name = prop_info.split(':')
        dtd_path, dtd_name = dtd_info.split(':')

        dtd_path = os.path.join(l10n_path, dtd_path)
        prop_path = os.path.join(l10n_path, prop_path)

        migrate_string(dtd_path, prop_path, dtd_name, prop_name)


def main():
    # Read command line arguments.
    arg_parser = argparse.ArgumentParser(
            description='Migrate devtools localized strings.')
    arg_parser.add_argument('path', type=str, help='path to l10n repository')
    arg_parser.add_argument('-c', '--config', type=str,
                            help='path to configuration file or folder')
    args = arg_parser.parse_args()

    # Retrieve path to devtools localization files in l10n repository.
    devtools_l10n_path = os.path.join(args.path, 'devtools/client/')
    if not os.path.exists(devtools_l10n_path):
        logging.error('l10n path is invalid: {%s}' % devtools_l10n_path)
        exit()
    logging.info('l10n path is valid: {%s}' % devtools_l10n_path)

    # Retrieve configuration files to apply.
    if os.path.isdir(args.config):
        conf_files = glob.glob(args.config + '*')
    elif os.path.isfile(args.config):
        conf_files = [args.config]
    else:
        logging.error('config path is invalid: {%s}' % args.config)
        exit()

    # Perform migration for each configuration file.
    for conf_file in conf_files:
        logging.info('performing migration for config file: {%s}' % conf_file)
        migrate_conf(conf_file, devtools_l10n_path)


if __name__ == '__main__':
    main()