summaryrefslogtreecommitdiffstats
path: root/python/compare-locales
diff options
context:
space:
mode:
Diffstat (limited to 'python/compare-locales')
-rw-r--r--python/compare-locales/compare_locales/__init__.py1
-rw-r--r--python/compare-locales/compare_locales/checks.py438
-rw-r--r--python/compare-locales/compare_locales/commands.py154
-rw-r--r--python/compare-locales/compare_locales/compare.py638
-rw-r--r--python/compare-locales/compare_locales/parser.py521
-rw-r--r--python/compare-locales/compare_locales/paths.py398
-rw-r--r--python/compare-locales/compare_locales/tests/__init__.py49
-rw-r--r--python/compare-locales/compare_locales/tests/data/bug121341.properties68
-rw-r--r--python/compare-locales/compare_locales/tests/data/test.properties14
-rw-r--r--python/compare-locales/compare_locales/tests/data/triple-license.dtd38
-rw-r--r--python/compare-locales/compare_locales/tests/test_checks.py403
-rw-r--r--python/compare-locales/compare_locales/tests/test_compare.py90
-rw-r--r--python/compare-locales/compare_locales/tests/test_dtd.py86
-rw-r--r--python/compare-locales/compare_locales/tests/test_ini.py115
-rw-r--r--python/compare-locales/compare_locales/tests/test_merge.py265
-rw-r--r--python/compare-locales/compare_locales/tests/test_properties.py95
-rw-r--r--python/compare-locales/compare_locales/tests/test_util.py29
-rw-r--r--python/compare-locales/compare_locales/tests/test_webapps.py41
-rw-r--r--python/compare-locales/compare_locales/util.py11
-rw-r--r--python/compare-locales/compare_locales/webapps.py235
-rw-r--r--python/compare-locales/docs/glossary.rst26
-rw-r--r--python/compare-locales/docs/index.rst191
-rw-r--r--python/compare-locales/mach_commands.py81
-rw-r--r--python/compare-locales/moz.build16
24 files changed, 4003 insertions, 0 deletions
diff --git a/python/compare-locales/compare_locales/__init__.py b/python/compare-locales/compare_locales/__init__.py
new file mode 100644
index 000000000..bad265e4f
--- /dev/null
+++ b/python/compare-locales/compare_locales/__init__.py
@@ -0,0 +1 @@
+version = "1.1"
diff --git a/python/compare-locales/compare_locales/checks.py b/python/compare-locales/compare_locales/checks.py
new file mode 100644
index 000000000..ee3bef03d
--- /dev/null
+++ b/python/compare-locales/compare_locales/checks.py
@@ -0,0 +1,438 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+from difflib import SequenceMatcher
+from xml import sax
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+
+from compare_locales.parser import DTDParser, PropertiesParser
+
+
+class Checker(object):
+ '''Abstract class to implement checks per file type.
+ '''
+ pattern = None
+
+ @classmethod
+ def use(cls, file):
+ return cls.pattern.match(file.file)
+
+ def check(self, refEnt, l10nEnt):
+ '''Given the reference and localized Entities, performs checks.
+
+ This is a generator yielding tuples of
+ - "warning" or "error", depending on what should be reported,
+ - tuple of line, column info for the error within the string
+ - description string to be shown in the report
+ '''
+ if True:
+ raise NotImplementedError("Need to subclass")
+ yield ("error", (0, 0), "This is an example error", "example")
+
+
+class PrintfException(Exception):
+ def __init__(self, msg, pos):
+ self.pos = pos
+ self.msg = msg
+
+
+class PropertiesChecker(Checker):
+ '''Tests to run on .properties files.
+ '''
+ pattern = re.compile('.*\.properties$')
+ printf = re.compile(r'%(?P<good>%|'
+ r'(?:(?P<number>[1-9][0-9]*)\$)?'
+ r'(?P<width>\*|[0-9]+)?'
+ r'(?P<prec>\.(?:\*|[0-9]+)?)?'
+ r'(?P<spec>[duxXosScpfg]))?')
+
+ def check(self, refEnt, l10nEnt):
+ '''Test for the different variable formats.
+ '''
+ refValue, l10nValue = refEnt.val, l10nEnt.val
+ refSpecs = None
+ # check for PluralForm.jsm stuff, should have the docs in the
+ # comment
+ if 'Localization_and_Plurals' in refEnt.pre_comment:
+ # For plurals, common variable pattern is #1. Try that.
+ pats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)',
+ refValue))
+ if len(pats) == 0:
+ return
+ lpats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)',
+ l10nValue))
+ if pats - lpats:
+ yield ('warning', 0, 'not all variables used in l10n',
+ 'plural')
+ return
+ if lpats - pats:
+ yield ('error', 0, 'unreplaced variables in l10n',
+ 'plural')
+ return
+ return
+ # check for lost escapes
+ raw_val = l10nEnt.raw_val
+ for m in PropertiesParser.escape.finditer(raw_val):
+ if m.group('single') and \
+ m.group('single') not in PropertiesParser.known_escapes:
+ yield ('warning', m.start(),
+ 'unknown escape sequence, \\' + m.group('single'),
+ 'escape')
+ try:
+ refSpecs = self.getPrintfSpecs(refValue)
+ except PrintfException:
+ refSpecs = []
+ if refSpecs:
+ for t in self.checkPrintf(refSpecs, l10nValue):
+ yield t
+ return
+
+ def checkPrintf(self, refSpecs, l10nValue):
+ try:
+ l10nSpecs = self.getPrintfSpecs(l10nValue)
+ except PrintfException, e:
+ yield ('error', e.pos, e.msg, 'printf')
+ return
+ if refSpecs != l10nSpecs:
+ sm = SequenceMatcher()
+ sm.set_seqs(refSpecs, l10nSpecs)
+ msgs = []
+ warn = None
+ for action, i1, i2, j1, j2 in sm.get_opcodes():
+ if action == 'equal':
+ continue
+ if action == 'delete':
+ # missing argument in l10n
+ if i2 == len(refSpecs):
+ # trailing specs missing, that's just a warning
+ warn = ', '.join('trailing argument %d `%s` missing' %
+ (i+1, refSpecs[i])
+ for i in xrange(i1, i2))
+ else:
+ for i in xrange(i1, i2):
+ msgs.append('argument %d `%s` missing' %
+ (i+1, refSpecs[i]))
+ continue
+ if action == 'insert':
+ # obsolete argument in l10n
+ for i in xrange(j1, j2):
+ msgs.append('argument %d `%s` obsolete' %
+ (i+1, l10nSpecs[i]))
+ continue
+ if action == 'replace':
+ for i, j in zip(xrange(i1, i2), xrange(j1, j2)):
+ msgs.append('argument %d `%s` should be `%s`' %
+ (j+1, l10nSpecs[j], refSpecs[i]))
+ if msgs:
+ yield ('error', 0, ', '.join(msgs), 'printf')
+ if warn is not None:
+ yield ('warning', 0, warn, 'printf')
+
+ def getPrintfSpecs(self, val):
+ hasNumber = False
+ specs = []
+ for m in self.printf.finditer(val):
+ if m.group("good") is None:
+ # found just a '%', signal an error
+ raise PrintfException('Found single %', m.start())
+ if m.group("good") == '%':
+ # escaped %
+ continue
+ if ((hasNumber and m.group('number') is None) or
+ (not hasNumber and specs and
+ m.group('number') is not None)):
+ # mixed style, numbered and not
+ raise PrintfException('Mixed ordered and non-ordered args',
+ m.start())
+ hasNumber = m.group('number') is not None
+ if hasNumber:
+ pos = int(m.group('number')) - 1
+ ls = len(specs)
+ if pos >= ls:
+ # pad specs
+ nones = pos - ls
+ specs[ls:pos] = nones*[None]
+ specs.append(m.group('spec'))
+ else:
+ if specs[pos] is not None:
+ raise PrintfException('Double ordered argument %d' %
+ (pos+1),
+ m.start())
+ specs[pos] = m.group('spec')
+ else:
+ specs.append(m.group('spec'))
+ # check for missing args
+ if hasNumber and not all(specs):
+ raise PrintfException('Ordered argument missing', 0)
+ return specs
+
+
+class DTDChecker(Checker):
+ """Tests to run on DTD files.
+
+ Uses xml.sax for the heavy lifting of xml parsing.
+
+ The code tries to parse until it doesn't find any unresolved entities
+ anymore. If it finds one, it tries to grab the key, and adds an empty
+ <!ENTITY key ""> definition to the header.
+
+ Also checks for some CSS and number heuristics in the values.
+ """
+ pattern = re.compile('.*\.dtd$')
+
+ eref = re.compile('&(%s);' % DTDParser.Name)
+ tmpl = '''<!DOCTYPE elem [%s]>
+<elem>%s</elem>
+'''
+ xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot'))
+
+ def __init__(self, reference):
+ self.reference = reference
+ self.__known_entities = None
+
+ def known_entities(self, refValue):
+ if self.__known_entities is None and self.reference is not None:
+ self.__known_entities = set()
+ for ent in self.reference:
+ self.__known_entities.update(self.entities_for_value(ent.val))
+ return self.__known_entities if self.__known_entities is not None \
+ else self.entities_for_value(refValue)
+
+ def entities_for_value(self, value):
+ reflist = set(m.group(1).encode('utf-8')
+ for m in self.eref.finditer(value))
+ reflist -= self.xmllist
+ return reflist
+
+ # Setup for XML parser, with default and text-only content handler
+ class TextContent(sax.handler.ContentHandler):
+ textcontent = ''
+
+ def characters(self, content):
+ self.textcontent += content
+
+ defaulthandler = sax.handler.ContentHandler()
+ texthandler = TextContent()
+
+ numPattern = r'([0-9]+|[0-9]*\.[0-9]+)'
+ num = re.compile('^%s$' % numPattern)
+ lengthPattern = '%s(em|px|ch|cm|in)' % numPattern
+ length = re.compile('^%s$' % lengthPattern)
+ spec = re.compile(r'((?:min\-)?(?:width|height))\s*:\s*%s' %
+ lengthPattern)
+ style = re.compile(r'^%(spec)s\s*(;\s*%(spec)s\s*)*;?$' %
+ {'spec': spec.pattern})
+
+ processContent = None
+
+ def check(self, refEnt, l10nEnt):
+ """Try to parse the refvalue inside a dummy element, and keep
+ track of entities that we need to define to make that work.
+
+ Return a checker that offers just those entities.
+ """
+ refValue, l10nValue = refEnt.val, l10nEnt.val
+ # find entities the refValue references,
+ # reusing markup from DTDParser.
+ reflist = self.known_entities(refValue)
+ inContext = self.entities_for_value(refValue)
+ entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist))
+ parser = sax.make_parser()
+ parser.setFeature(sax.handler.feature_external_ges, False)
+
+ parser.setContentHandler(self.defaulthandler)
+ try:
+ parser.parse(StringIO(self.tmpl %
+ (entities, refValue.encode('utf-8'))))
+ # also catch stray %
+ parser.parse(StringIO(self.tmpl %
+ (refEnt.all.encode('utf-8') + entities,
+ '&%s;' % refEnt.key.encode('utf-8'))))
+ except sax.SAXParseException, e:
+ yield ('warning',
+ (0, 0),
+ "can't parse en-US value", 'xmlparse')
+
+ # find entities the l10nValue references,
+ # reusing markup from DTDParser.
+ l10nlist = self.entities_for_value(l10nValue)
+ missing = sorted(l10nlist - reflist)
+ _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing)
+ if self.processContent is not None:
+ self.texthandler.textcontent = ''
+ parser.setContentHandler(self.texthandler)
+ try:
+ parser.parse(StringIO(self.tmpl % (_entities,
+ l10nValue.encode('utf-8'))))
+ # also catch stray %
+ # if this fails, we need to substract the entity definition
+ parser.setContentHandler(self.defaulthandler)
+ parser.parse(StringIO(self.tmpl % (
+ l10nEnt.all.encode('utf-8') + _entities,
+ '&%s;' % l10nEnt.key.encode('utf-8'))))
+ except sax.SAXParseException, e:
+ # xml parse error, yield error
+ # sometimes, the error is reported on our fake closing
+ # element, make that the end of the last line
+ lnr = e.getLineNumber() - 1
+ lines = l10nValue.splitlines()
+ if lnr > len(lines):
+ lnr = len(lines)
+ col = len(lines[lnr-1])
+ else:
+ col = e.getColumnNumber()
+ if lnr == 1:
+ # first line starts with <elem>, substract
+ col -= len("<elem>")
+ elif lnr == 0:
+ col -= len("<!DOCTYPE elem [") # first line is DOCTYPE
+ yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse')
+
+ warntmpl = u'Referencing unknown entity `%s`'
+ if reflist:
+ if inContext:
+ elsewhere = reflist - inContext
+ warntmpl += ' (%s used in context' % \
+ ', '.join(sorted(inContext))
+ if elsewhere:
+ warntmpl += ', %s known)' % ', '.join(sorted(elsewhere))
+ else:
+ warntmpl += ')'
+ else:
+ warntmpl += ' (%s known)' % ', '.join(sorted(reflist))
+ for key in missing:
+ yield ('warning', (0, 0), warntmpl % key.decode('utf-8'),
+ 'xmlparse')
+ if inContext and l10nlist and l10nlist - inContext - set(missing):
+ mismatch = sorted(l10nlist - inContext - set(missing))
+ for key in mismatch:
+ yield ('warning', (0, 0),
+ 'Entity %s referenced, but %s used in context' % (
+ key.decode('utf-8'),
+ ', '.join(sorted(inContext))
+ ), 'xmlparse')
+
+ # Number check
+ if self.num.match(refValue) and not self.num.match(l10nValue):
+ yield ('warning', 0, 'reference is a number', 'number')
+ # CSS checks
+ # just a length, width="100em"
+ if self.length.match(refValue) and not self.length.match(l10nValue):
+ yield ('error', 0, 'reference is a CSS length', 'css')
+ # real CSS spec, style="width:100px;"
+ if self.style.match(refValue):
+ if not self.style.match(l10nValue):
+ yield ('error', 0, 'reference is a CSS spec', 'css')
+ else:
+ # warn if different properties or units
+ refMap = dict((s, u) for s, _, u in
+ self.spec.findall(refValue))
+ msgs = []
+ for s, _, u in self.spec.findall(l10nValue):
+ if s not in refMap:
+ msgs.insert(0, '%s only in l10n' % s)
+ continue
+ else:
+ ru = refMap.pop(s)
+ if u != ru:
+ msgs.append("units for %s don't match "
+ "(%s != %s)" % (s, u, ru))
+ for s in refMap.iterkeys():
+ msgs.insert(0, '%s only in reference' % s)
+ if msgs:
+ yield ('warning', 0, ', '.join(msgs), 'css')
+
+ if self.processContent is not None:
+ for t in self.processContent(self.texthandler.textcontent):
+ yield t
+
+
+class PrincessAndroid(DTDChecker):
+ """Checker for the string values that Android puts into an XML container.
+
+ http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling # noqa
+ has more info. Check for unescaped apostrophes and bad unicode escapes.
+ """
+ quoted = re.compile("(?P<q>[\"']).*(?P=q)$")
+
+ def unicode_escape(self, str):
+ """Helper method to try to decode all unicode escapes in a string.
+
+ This code uses the standard python decode for unicode-escape, but
+ that's somewhat tricky, as its input needs to be ascii. To get to
+ ascii, the unicode string gets converted to ascii with
+ backslashreplace, i.e., all non-ascii unicode chars get unicode
+ escaped. And then we try to roll all of that back.
+ Now, when that hits an error, that's from the original string, and we
+ need to search for the actual error position in the original string,
+ as the backslashreplace code changes string positions quite badly.
+ See also the last check in TestAndroid.test_android_dtd, with a
+ lengthy chinese string.
+ """
+ val = str.encode('ascii', 'backslashreplace')
+ try:
+ val.decode('unicode-escape')
+ except UnicodeDecodeError, e:
+ args = list(e.args)
+ badstring = args[1][args[2]:args[3]]
+ i = len(args[1][:args[2]].decode('unicode-escape'))
+ args[2] = i
+ args[3] = i + len(badstring)
+ raise UnicodeDecodeError(*args)
+
+ @classmethod
+ def use(cls, file):
+ """Use this Checker only for DTD files in embedding/android."""
+ return (file.module in ("embedding/android",
+ "mobile/android/base") and
+ cls.pattern.match(file.file))
+
+ def processContent(self, val):
+ """Actual check code.
+ Check for unicode escapes and unescaped quotes and apostrophes,
+ if string's not quoted.
+ """
+ # first, try to decode unicode escapes
+ try:
+ self.unicode_escape(val)
+ except UnicodeDecodeError, e:
+ yield ('error', e.args[2], e.args[4], 'android')
+ # check for unescaped single or double quotes.
+ # first, see if the complete string is single or double quoted,
+ # that changes the rules
+ m = self.quoted.match(val)
+ if m:
+ q = m.group('q')
+ offset = 0
+ val = val[1:-1] # strip quotes
+ else:
+ q = "[\"']"
+ offset = -1
+ stray_quot = re.compile(r"[\\\\]*(%s)" % q)
+
+ for m in stray_quot.finditer(val):
+ if len(m.group(0)) % 2:
+ # found an unescaped single or double quote, which message?
+ if m.group(1) == '"':
+ msg = u"Quotes in Android DTDs need escaping with \\\" "\
+ u"or \\u0022, or put string in apostrophes."
+ else:
+ msg = u"Apostrophes in Android DTDs need escaping with "\
+ u"\\' or \\u0027, or use \u2019, or put string in "\
+ u"quotes."
+ yield ('error', m.end(0)+offset, msg, 'android')
+
+
+def getChecker(file, reference=None):
+ if PropertiesChecker.use(file):
+ return PropertiesChecker()
+ if PrincessAndroid.use(file):
+ return PrincessAndroid(reference)
+ if DTDChecker.use(file):
+ return DTDChecker(reference)
+ return None
diff --git a/python/compare-locales/compare_locales/commands.py b/python/compare-locales/compare_locales/commands.py
new file mode 100644
index 000000000..61b58ec4b
--- /dev/null
+++ b/python/compare-locales/compare_locales/commands.py
@@ -0,0 +1,154 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Commands exposed to commandlines'
+
+import logging
+from optparse import OptionParser, make_option
+
+from compare_locales.paths import EnumerateApp
+from compare_locales.compare import compareApp, compareDirs
+from compare_locales.webapps import compare_web_app
+
+
+class BaseCommand(object):
+ """Base class for compare-locales commands.
+ This handles command line parsing, and general sugar for setuptools
+ entry_points.
+ """
+ options = [
+ make_option('-v', '--verbose', action='count', dest='v', default=0,
+ help='Make more noise'),
+ make_option('-q', '--quiet', action='count', dest='q', default=0,
+ help='Make less noise'),
+ make_option('-m', '--merge',
+ help='''Use this directory to stage merged files,
+use {ab_CD} to specify a different directory for each locale'''),
+ ]
+ data_option = make_option('--data', choices=['text', 'exhibit', 'json'],
+ default='text',
+ help='''Choose data and format (one of text,
+exhibit, json); text: (default) Show which files miss which strings, together
+with warnings and errors. Also prints a summary; json: Serialize the internal
+tree, useful for tools. Also always succeeds; exhibit: Serialize the summary
+data in a json useful for Exhibit
+''')
+
+ def __init__(self):
+ self.parser = None
+
+ def get_parser(self):
+ """Get an OptionParser, with class docstring as usage, and
+ self.options.
+ """
+ parser = OptionParser()
+ parser.set_usage(self.__doc__)
+ for option in self.options:
+ parser.add_option(option)
+ return parser
+
+ @classmethod
+ def call(cls):
+ """Entry_point for setuptools.
+ The actual command handling is done in the handle() method of the
+ subclasses.
+ """
+ cmd = cls()
+ cmd.handle_()
+
+ def handle_(self):
+ """The instance part of the classmethod call."""
+ self.parser = self.get_parser()
+ (options, args) = self.parser.parse_args()
+ # log as verbose or quiet as we want, warn by default
+ logging.basicConfig()
+ logging.getLogger().setLevel(logging.WARNING -
+ (options.v - options.q)*10)
+ observer = self.handle(args, options)
+ print observer.serialize(type=options.data).encode('utf-8', 'replace')
+
+ def handle(self, args, options):
+ """Subclasses need to implement this method for the actual
+ command handling.
+ """
+ raise NotImplementedError
+
+
+class CompareLocales(BaseCommand):
+ """usage: %prog [options] l10n.ini l10n_base_dir [locale ...]
+
+Check the localization status of a gecko application.
+The first argument is a path to the l10n.ini file for the application,
+followed by the base directory of the localization repositories.
+Then you pass in the list of locale codes you want to compare. If there are
+not locales given, the list of locales will be taken from the all-locales file
+of the application\'s l10n.ini."""
+
+ options = BaseCommand.options + [
+ make_option('--clobber-merge', action="store_true", default=False,
+ dest='clobber',
+ help="""WARNING: DATALOSS.
+Use this option with care. If specified, the merge directory will
+be clobbered for each module. That means, the subdirectory will
+be completely removed, any files that were there are lost.
+Be careful to specify the right merge directory when using this option."""),
+ make_option('-r', '--reference', default='en-US', dest='reference',
+ help='Explicitly set the reference '
+ 'localization. [default: en-US]'),
+ BaseCommand.data_option
+ ]
+
+ def handle(self, args, options):
+ if len(args) < 2:
+ self.parser.error('Need to pass in list of languages')
+ inipath, l10nbase = args[:2]
+ locales = args[2:]
+ app = EnumerateApp(inipath, l10nbase, locales)
+ app.reference = options.reference
+ try:
+ observer = compareApp(app, merge_stage=options.merge,
+ clobber=options.clobber)
+ except (OSError, IOError), exc:
+ print "FAIL: " + str(exc)
+ self.parser.exit(2)
+ return observer
+
+
+class CompareDirs(BaseCommand):
+ """usage: %prog [options] reference localization
+
+Check the localization status of a directory tree.
+The first argument is a path to the reference data,the second is the
+localization to be tested."""
+
+ options = BaseCommand.options + [
+ BaseCommand.data_option
+ ]
+
+ def handle(self, args, options):
+ if len(args) != 2:
+ self.parser.error('Reference and localizatino required')
+ reference, locale = args
+ observer = compareDirs(reference, locale, merge_stage=options.merge)
+ return observer
+
+
+class CompareWebApp(BaseCommand):
+ """usage: %prog [options] webapp [locale locale]
+
+Check the localization status of a gaia-style web app.
+The first argument is the directory of the web app.
+Following arguments explicitly state the locales to test.
+If none are given, test all locales in manifest.webapp or files."""
+
+ options = BaseCommand.options[:-1] + [
+ BaseCommand.data_option]
+
+ def handle(self, args, options):
+ if len(args) < 1:
+ self.parser.error('Webapp directory required')
+ basedir = args[0]
+ locales = args[1:]
+ observer = compare_web_app(basedir, locales)
+ return observer
diff --git a/python/compare-locales/compare_locales/compare.py b/python/compare-locales/compare_locales/compare.py
new file mode 100644
index 000000000..4f71c46f8
--- /dev/null
+++ b/python/compare-locales/compare_locales/compare.py
@@ -0,0 +1,638 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Mozilla l10n compare locales tool'
+
+import codecs
+import os
+import os.path
+import shutil
+import re
+from difflib import SequenceMatcher
+from collections import defaultdict
+
+try:
+ from json import dumps
+except:
+ from simplejson import dumps
+
+from compare_locales import parser
+from compare_locales import paths
+from compare_locales.checks import getChecker
+
+
+class Tree(object):
+ def __init__(self, valuetype):
+ self.branches = dict()
+ self.valuetype = valuetype
+ self.value = None
+
+ def __getitem__(self, leaf):
+ parts = []
+ if isinstance(leaf, paths.File):
+ parts = [p for p in [leaf.locale, leaf.module] if p] + \
+ leaf.file.split('/')
+ else:
+ parts = leaf.split('/')
+ return self.__get(parts)
+
+ def __get(self, parts):
+ common = None
+ old = None
+ new = tuple(parts)
+ t = self
+ for k, v in self.branches.iteritems():
+ for i, part in enumerate(zip(k, parts)):
+ if part[0] != part[1]:
+ i -= 1
+ break
+ if i < 0:
+ continue
+ i += 1
+ common = tuple(k[:i])
+ old = tuple(k[i:])
+ new = tuple(parts[i:])
+ break
+ if old:
+ self.branches.pop(k)
+ t = Tree(self.valuetype)
+ t.branches[old] = v
+ self.branches[common] = t
+ elif common:
+ t = self.branches[common]
+ if new:
+ if common:
+ return t.__get(new)
+ t2 = t
+ t = Tree(self.valuetype)
+ t2.branches[new] = t
+ if t.value is None:
+ t.value = t.valuetype()
+ return t.value
+
+ indent = ' '
+
+ def getContent(self, depth=0):
+ '''
+ Returns iterator of (depth, flag, key_or_value) tuples.
+ If flag is 'value', key_or_value is a value object, otherwise
+ (flag is 'key') it's a key string.
+ '''
+ keys = self.branches.keys()
+ keys.sort()
+ if self.value is not None:
+ yield (depth, 'value', self.value)
+ for key in keys:
+ yield (depth, 'key', key)
+ for child in self.branches[key].getContent(depth + 1):
+ yield child
+
+ def toJSON(self):
+ '''
+ Returns this Tree as a JSON-able tree of hashes.
+ Only the values need to take care that they're JSON-able.
+ '''
+ json = {}
+ keys = self.branches.keys()
+ keys.sort()
+ if self.value is not None:
+ json['value'] = self.value
+ children = [('/'.join(key), self.branches[key].toJSON())
+ for key in keys]
+ if children:
+ json['children'] = children
+ return json
+
+ def getStrRows(self):
+ def tostr(t):
+ if t[1] == 'key':
+ return self.indent * t[0] + '/'.join(t[2])
+ return self.indent * (t[0] + 1) + str(t[2])
+
+ return map(tostr, self.getContent())
+
+ def __str__(self):
+ return '\n'.join(self.getStrRows())
+
+
+class AddRemove(SequenceMatcher):
+ def __init__(self):
+ SequenceMatcher.__init__(self, None, None, None)
+
+ def set_left(self, left):
+ if not isinstance(left, list):
+ left = [l for l in left]
+ self.set_seq1(left)
+
+ def set_right(self, right):
+ if not isinstance(right, list):
+ right = [l for l in right]
+ self.set_seq2(right)
+
+ def __iter__(self):
+ for tag, i1, i2, j1, j2 in self.get_opcodes():
+ if tag == 'equal':
+ for pair in zip(self.a[i1:i2], self.b[j1:j2]):
+ yield ('equal', pair)
+ elif tag == 'delete':
+ for item in self.a[i1:i2]:
+ yield ('delete', item)
+ elif tag == 'insert':
+ for item in self.b[j1:j2]:
+ yield ('add', item)
+ else:
+ # tag == 'replace'
+ for item in self.a[i1:i2]:
+ yield ('delete', item)
+ for item in self.b[j1:j2]:
+ yield ('add', item)
+
+
+class DirectoryCompare(SequenceMatcher):
+ def __init__(self, reference):
+ SequenceMatcher.__init__(self, None, [i for i in reference],
+ [])
+ self.watcher = None
+
+ def setWatcher(self, watcher):
+ self.watcher = watcher
+
+ def compareWith(self, other):
+ if not self.watcher:
+ return
+ self.set_seq2([i for i in other])
+ for tag, i1, i2, j1, j2 in self.get_opcodes():
+ if tag == 'equal':
+ for i, j in zip(xrange(i1, i2), xrange(j1, j2)):
+ self.watcher.compare(self.a[i], self.b[j])
+ elif tag == 'delete':
+ for i in xrange(i1, i2):
+ self.watcher.add(self.a[i], other.cloneFile(self.a[i]))
+ elif tag == 'insert':
+ for j in xrange(j1, j2):
+ self.watcher.remove(self.b[j])
+ else:
+ for j in xrange(j1, j2):
+ self.watcher.remove(self.b[j])
+ for i in xrange(i1, i2):
+ self.watcher.add(self.a[i], other.cloneFile(self.a[i]))
+
+
+class Observer(object):
+ stat_cats = ['missing', 'obsolete', 'missingInFiles', 'report',
+ 'changed', 'unchanged', 'keys']
+
+ def __init__(self):
+ class intdict(defaultdict):
+ def __init__(self):
+ defaultdict.__init__(self, int)
+
+ self.summary = defaultdict(intdict)
+ self.details = Tree(dict)
+ self.filter = None
+
+ # support pickling
+ def __getstate__(self):
+ return dict(summary=self.getSummary(), details=self.details)
+
+ def __setstate__(self, state):
+ class intdict(defaultdict):
+ def __init__(self):
+ defaultdict.__init__(self, int)
+
+ self.summary = defaultdict(intdict)
+ if 'summary' in state:
+ for loc, stats in state['summary'].iteritems():
+ self.summary[loc].update(stats)
+ self.details = state['details']
+ self.filter = None
+
+ def getSummary(self):
+ plaindict = {}
+ for k, v in self.summary.iteritems():
+ plaindict[k] = dict(v)
+ return plaindict
+
+ def toJSON(self):
+ return dict(summary=self.getSummary(), details=self.details.toJSON())
+
+ def notify(self, category, file, data):
+ rv = "error"
+ if category in self.stat_cats:
+ # these get called post reporting just for stats
+ # return "error" to forward them to other other_observers
+ self.summary[file.locale][category] += data
+ # keep track of how many strings are in a missing file
+ # we got the {'missingFile': 'error'} from the first pass
+ if category == 'missingInFiles':
+ self.details[file]['strings'] = data
+ return "error"
+ if category in ['missingFile', 'obsoleteFile']:
+ if self.filter is not None:
+ rv = self.filter(file)
+ if rv != "ignore":
+ self.details[file][category] = rv
+ return rv
+ if category in ['missingEntity', 'obsoleteEntity']:
+ if self.filter is not None:
+ rv = self.filter(file, data)
+ if rv == "ignore":
+ return rv
+ v = self.details[file]
+ try:
+ v[category].append(data)
+ except KeyError:
+ v[category] = [data]
+ return rv
+ if category == 'error':
+ try:
+ self.details[file][category].append(data)
+ except KeyError:
+ self.details[file][category] = [data]
+ self.summary[file.locale]['errors'] += 1
+ elif category == 'warning':
+ try:
+ self.details[file][category].append(data)
+ except KeyError:
+ self.details[file][category] = [data]
+ self.summary[file.locale]['warnings'] += 1
+ return rv
+
+ def toExhibit(self):
+ items = []
+ for locale in sorted(self.summary.iterkeys()):
+ summary = self.summary[locale]
+ if locale is not None:
+ item = {'id': 'xxx/' + locale,
+ 'label': locale,
+ 'locale': locale}
+ else:
+ item = {'id': 'xxx',
+ 'label': 'xxx',
+ 'locale': 'xxx'}
+ item['type'] = 'Build'
+ total = sum([summary[k]
+ for k in ('changed', 'unchanged', 'report', 'missing',
+ 'missingInFiles')
+ if k in summary])
+ rate = (('changed' in summary and summary['changed'] * 100) or
+ 0) / total
+ item.update((k, summary.get(k, 0))
+ for k in ('changed', 'unchanged'))
+ item.update((k, summary[k])
+ for k in ('report', 'errors', 'warnings')
+ if k in summary)
+ item['missing'] = summary.get('missing', 0) + \
+ summary.get('missingInFiles', 0)
+ item['completion'] = rate
+ item['total'] = total
+ result = 'success'
+ if item.get('warnings', 0):
+ result = 'warning'
+ if item.get('errors', 0) or item.get('missing', 0):
+ result = 'failure'
+ item['result'] = result
+ items.append(item)
+ data = {
+ "properties": dict.fromkeys(
+ ("completion", "errors", "warnings", "missing", "report",
+ "unchanged", "changed", "obsolete"),
+ {"valueType": "number"}),
+ "types": {
+ "Build": {"pluralLabel": "Builds"}
+ }}
+ data['items'] = items
+ return dumps(data, indent=2)
+
+ def serialize(self, type="text"):
+ if type == "exhibit":
+ return self.toExhibit()
+ if type == "json":
+ return dumps(self.toJSON())
+
+ def tostr(t):
+ if t[1] == 'key':
+ return ' ' * t[0] + '/'.join(t[2])
+ o = []
+ indent = ' ' * (t[0] + 1)
+ if 'error' in t[2]:
+ o += [indent + 'ERROR: ' + e for e in t[2]['error']]
+ if 'warning' in t[2]:
+ o += [indent + 'WARNING: ' + e for e in t[2]['warning']]
+ if 'missingEntity' in t[2] or 'obsoleteEntity' in t[2]:
+ missingEntities = ('missingEntity' in t[2] and
+ t[2]['missingEntity']) or []
+ obsoleteEntities = ('obsoleteEntity' in t[2] and
+ t[2]['obsoleteEntity']) or []
+ entities = missingEntities + obsoleteEntities
+ entities.sort()
+ for entity in entities:
+ op = '+'
+ if entity in obsoleteEntities:
+ op = '-'
+ o.append(indent + op + entity)
+ elif 'missingFile' in t[2]:
+ o.append(indent + '// add and localize this file')
+ elif 'obsoleteFile' in t[2]:
+ o.append(indent + '// remove this file')
+ return '\n'.join(o)
+
+ out = []
+ for locale, summary in sorted(self.summary.iteritems()):
+ if locale is not None:
+ out.append(locale + ':')
+ out += [k + ': ' + str(v) for k, v in sorted(summary.iteritems())]
+ total = sum([summary[k]
+ for k in ['changed', 'unchanged', 'report', 'missing',
+ 'missingInFiles']
+ if k in summary])
+ rate = 0
+ if total:
+ rate = (('changed' in summary and summary['changed'] * 100) or
+ 0) / total
+ out.append('%d%% of entries changed' % rate)
+ return '\n'.join(map(tostr, self.details.getContent()) + out)
+
+ def __str__(self):
+ return 'observer'
+
+
+class ContentComparer:
+ keyRE = re.compile('[kK]ey')
+ nl = re.compile('\n', re.M)
+
+ def __init__(self):
+ '''Create a ContentComparer.
+ observer is usually a instance of Observer. The return values
+ of the notify method are used to control the handling of missing
+ entities.
+ '''
+ self.reference = dict()
+ self.observer = Observer()
+ self.other_observers = []
+ self.merge_stage = None
+
+ def add_observer(self, obs):
+ '''Add a non-filtering observer.
+ Results from the notify calls are ignored.
+ '''
+ self.other_observers.append(obs)
+
+ def set_merge_stage(self, merge_stage):
+ self.merge_stage = merge_stage
+
+ def merge(self, ref_entities, ref_map, ref_file, l10n_file, missing,
+ skips, p):
+ outfile = os.path.join(self.merge_stage, l10n_file.module,
+ l10n_file.file)
+ outdir = os.path.dirname(outfile)
+ if not os.path.isdir(outdir):
+ os.makedirs(outdir)
+ if not p.canMerge:
+ shutil.copyfile(ref_file.fullpath, outfile)
+ print "copied reference to " + outfile
+ return
+ if skips:
+ # skips come in ordered by key name, we need them in file order
+ skips.sort(key=lambda s: s.span[0])
+ trailing = (['\n'] +
+ [ref_entities[ref_map[key]].all for key in missing] +
+ [ref_entities[ref_map[skip.key]].all for skip in skips
+ if not isinstance(skip, parser.Junk)])
+ if skips:
+ # we need to skip a few errornous blocks in the input, copy by hand
+ f = codecs.open(outfile, 'wb', p.encoding)
+ offset = 0
+ for skip in skips:
+ chunk = skip.span
+ f.write(p.contents[offset:chunk[0]])
+ offset = chunk[1]
+ f.write(p.contents[offset:])
+ else:
+ shutil.copyfile(l10n_file.fullpath, outfile)
+ f = codecs.open(outfile, 'ab', p.encoding)
+ print "adding to " + outfile
+
+ def ensureNewline(s):
+ if not s.endswith('\n'):
+ return s + '\n'
+ return s
+
+ f.write(''.join(map(ensureNewline, trailing)))
+ f.close()
+
+ def notify(self, category, file, data):
+ """Check observer for the found data, and if it's
+ not to ignore, notify other_observers.
+ """
+ rv = self.observer.notify(category, file, data)
+ if rv == 'ignore':
+ return rv
+ for obs in self.other_observers:
+ # non-filtering other_observers, ignore results
+ obs.notify(category, file, data)
+ return rv
+
+ def remove(self, obsolete):
+ self.notify('obsoleteFile', obsolete, None)
+ pass
+
+ def compare(self, ref_file, l10n):
+ try:
+ p = parser.getParser(ref_file.file)
+ except UserWarning:
+ # no comparison, XXX report?
+ return
+ if ref_file not in self.reference:
+ # we didn't parse this before
+ try:
+ p.readContents(ref_file.getContents())
+ except Exception, e:
+ self.notify('error', ref_file, str(e))
+ return
+ self.reference[ref_file] = p.parse()
+ ref = self.reference[ref_file]
+ ref_list = ref[1].keys()
+ ref_list.sort()
+ try:
+ p.readContents(l10n.getContents())
+ l10n_entities, l10n_map = p.parse()
+ except Exception, e:
+ self.notify('error', l10n, str(e))
+ return
+ lines = []
+
+ def _getLine(offset):
+ if not lines:
+ lines.append(0)
+ for m in self.nl.finditer(p.contents):
+ lines.append(m.end())
+ for i in xrange(len(lines), 0, -1):
+ if offset >= lines[i - 1]:
+ return (i, offset - lines[i - 1])
+ return (1, offset)
+
+ l10n_list = l10n_map.keys()
+ l10n_list.sort()
+ ar = AddRemove()
+ ar.set_left(ref_list)
+ ar.set_right(l10n_list)
+ report = missing = obsolete = changed = unchanged = keys = 0
+ missings = []
+ skips = []
+ checker = getChecker(l10n, reference=ref[0])
+ for action, item_or_pair in ar:
+ if action == 'delete':
+ # missing entity
+ _rv = self.notify('missingEntity', l10n, item_or_pair)
+ if _rv == "ignore":
+ continue
+ if _rv == "error":
+ # only add to missing entities for l10n-merge on error,
+ # not report
+ missings.append(item_or_pair)
+ missing += 1
+ else:
+ # just report
+ report += 1
+ elif action == 'add':
+ # obsolete entity or junk
+ if isinstance(l10n_entities[l10n_map[item_or_pair]],
+ parser.Junk):
+ junk = l10n_entities[l10n_map[item_or_pair]]
+ params = (junk.val,) + junk.span
+ self.notify('error', l10n,
+ 'Unparsed content "%s" at %d-%d' % params)
+ if self.merge_stage is not None:
+ skips.append(junk)
+ elif self.notify('obsoleteEntity', l10n,
+ item_or_pair) != 'ignore':
+ obsolete += 1
+ else:
+ # entity found in both ref and l10n, check for changed
+ entity = item_or_pair[0]
+ refent = ref[0][ref[1][entity]]
+ l10nent = l10n_entities[l10n_map[entity]]
+ if self.keyRE.search(entity):
+ keys += 1
+ else:
+ if refent.val == l10nent.val:
+ self.doUnchanged(l10nent)
+ unchanged += 1
+ else:
+ self.doChanged(ref_file, refent, l10nent)
+ changed += 1
+ # run checks:
+ if checker:
+ for tp, pos, msg, cat in checker.check(refent, l10nent):
+ # compute real src position, if first line,
+ # col needs adjustment
+ _l, _offset = _getLine(l10nent.val_span[0])
+ if isinstance(pos, tuple):
+ # line, column
+ if pos[0] == 1:
+ col = pos[1] + _offset
+ else:
+ col = pos[1]
+ _l += pos[0] - 1
+ else:
+ _l, col = _getLine(l10nent.val_span[0] + pos)
+ # skip error entities when merging
+ if tp == 'error' and self.merge_stage is not None:
+ skips.append(l10nent)
+ self.notify(tp, l10n,
+ u"%s at line %d, column %d for %s" %
+ (msg, _l, col, refent.key))
+ pass
+ if missing:
+ self.notify('missing', l10n, missing)
+ if self.merge_stage is not None and (missings or skips):
+ self.merge(ref[0], ref[1], ref_file, l10n, missings, skips, p)
+ if report:
+ self.notify('report', l10n, report)
+ if obsolete:
+ self.notify('obsolete', l10n, obsolete)
+ if changed:
+ self.notify('changed', l10n, changed)
+ if unchanged:
+ self.notify('unchanged', l10n, unchanged)
+ if keys:
+ self.notify('keys', l10n, keys)
+ pass
+
+ def add(self, orig, missing):
+ if self.notify('missingFile', missing, None) == "ignore":
+ # filter said that we don't need this file, don't count it
+ return
+ f = orig
+ try:
+ p = parser.getParser(f.file)
+ except UserWarning:
+ return
+ try:
+ p.readContents(f.getContents())
+ entities, map = p.parse()
+ except Exception, e:
+ self.notify('error', f, str(e))
+ return
+ self.notify('missingInFiles', missing, len(map))
+
+ def doUnchanged(self, entity):
+ # overload this if needed
+ pass
+
+ def doChanged(self, file, ref_entity, l10n_entity):
+ # overload this if needed
+ pass
+
+
+def compareApp(app, other_observer=None, merge_stage=None, clobber=False):
+ '''Compare locales set in app.
+
+ Optional arguments are:
+ - other_observer. A object implementing
+ notify(category, _file, data)
+ The return values of that callback are ignored.
+ - merge_stage. A directory to be used for staging the output of
+ l10n-merge.
+ - clobber. Clobber the module subdirectories of the merge dir as we go.
+ Use wisely, as it might cause data loss.
+ '''
+ comparer = ContentComparer()
+ if other_observer is not None:
+ comparer.add_observer(other_observer)
+ comparer.observer.filter = app.filter
+ for module, reference, locales in app:
+ dir_comp = DirectoryCompare(reference)
+ dir_comp.setWatcher(comparer)
+ for _, localization in locales:
+ if merge_stage is not None:
+ locale_merge = merge_stage.format(ab_CD=localization.locale)
+ comparer.set_merge_stage(locale_merge)
+ if clobber:
+ # if clobber, remove the stage for the module if it exists
+ clobberdir = os.path.join(locale_merge, module)
+ if os.path.exists(clobberdir):
+ shutil.rmtree(clobberdir)
+ print "clobbered " + clobberdir
+ dir_comp.compareWith(localization)
+ return comparer.observer
+
+
+def compareDirs(reference, locale, other_observer=None, merge_stage=None):
+ '''Compare reference and locale dir.
+
+ Optional arguments are:
+ - other_observer. A object implementing
+ notify(category, _file, data)
+ The return values of that callback are ignored.
+ '''
+ comparer = ContentComparer()
+ if other_observer is not None:
+ comparer.add_observer(other_observer)
+ comparer.set_merge_stage(merge_stage)
+ dir_comp = DirectoryCompare(paths.EnumerateDir(reference))
+ dir_comp.setWatcher(comparer)
+ dir_comp.compareWith(paths.EnumerateDir(locale))
+ return comparer.observer
diff --git a/python/compare-locales/compare_locales/parser.py b/python/compare-locales/compare_locales/parser.py
new file mode 100644
index 000000000..a97cf201b
--- /dev/null
+++ b/python/compare-locales/compare_locales/parser.py
@@ -0,0 +1,521 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+import codecs
+import logging
+from HTMLParser import HTMLParser
+
+__constructors = []
+
+
+class Entity(object):
+ '''
+ Abstraction layer for a localizable entity.
+ Currently supported are grammars of the form:
+
+ 1: pre white space
+ 2: pre comments
+ 3: entity definition
+ 4: entity key (name)
+ 5: entity value
+ 6: post comment (and white space) in the same line (dtd only)
+ <--[1]
+ <!-- pre comments --> <--[2]
+ <!ENTITY key "value"> <!-- comment -->
+
+ <-------[3]---------><------[6]------>
+ '''
+ def __init__(self, contents, pp,
+ span, pre_ws_span, pre_comment_span, def_span,
+ key_span, val_span, post_span):
+ self.contents = contents
+ self.span = span
+ self.pre_ws_span = pre_ws_span
+ self.pre_comment_span = pre_comment_span
+ self.def_span = def_span
+ self.key_span = key_span
+ self.val_span = val_span
+ self.post_span = post_span
+ self.pp = pp
+ pass
+
+ # getter helpers
+
+ def get_all(self):
+ return self.contents[self.span[0]:self.span[1]]
+
+ def get_pre_ws(self):
+ return self.contents[self.pre_ws_span[0]:self.pre_ws_span[1]]
+
+ def get_pre_comment(self):
+ return self.contents[self.pre_comment_span[0]:
+ self.pre_comment_span[1]]
+
+ def get_def(self):
+ return self.contents[self.def_span[0]:self.def_span[1]]
+
+ def get_key(self):
+ return self.contents[self.key_span[0]:self.key_span[1]]
+
+ def get_val(self):
+ return self.pp(self.contents[self.val_span[0]:self.val_span[1]])
+
+ def get_raw_val(self):
+ return self.contents[self.val_span[0]:self.val_span[1]]
+
+ def get_post(self):
+ return self.contents[self.post_span[0]:self.post_span[1]]
+
+ # getters
+
+ all = property(get_all)
+ pre_ws = property(get_pre_ws)
+ pre_comment = property(get_pre_comment)
+ definition = property(get_def)
+ key = property(get_key)
+ val = property(get_val)
+ raw_val = property(get_raw_val)
+ post = property(get_post)
+
+ def __repr__(self):
+ return self.key
+
+
+class Junk(object):
+ '''
+ An almost-Entity, representing junk data that we didn't parse.
+ This way, we can signal bad content as stuff we don't understand.
+ And the either fix that, or report real bugs in localizations.
+ '''
+ junkid = 0
+
+ def __init__(self, contents, span):
+ self.contents = contents
+ self.span = span
+ self.pre_ws = self.pre_comment = self.definition = self.post = ''
+ self.__class__.junkid += 1
+ self.key = '_junk_%d_%d-%d' % (self.__class__.junkid, span[0], span[1])
+
+ # getter helpers
+ def get_all(self):
+ return self.contents[self.span[0]:self.span[1]]
+
+ # getters
+ all = property(get_all)
+ val = property(get_all)
+
+ def __repr__(self):
+ return self.key
+
+
+class Parser:
+ canMerge = True
+
+ def __init__(self):
+ if not hasattr(self, 'encoding'):
+ self.encoding = 'utf-8'
+ pass
+
+ def readFile(self, file):
+ f = codecs.open(file, 'r', self.encoding)
+ try:
+ self.contents = f.read()
+ except UnicodeDecodeError, e:
+ (logging.getLogger('locales')
+ .error("Can't read file: " + file + '; ' + str(e)))
+ self.contents = u''
+ f.close()
+
+ def readContents(self, contents):
+ (self.contents, length) = codecs.getdecoder(self.encoding)(contents)
+
+ def parse(self):
+ l = []
+ m = {}
+ for e in self:
+ m[e.key] = len(l)
+ l.append(e)
+ return (l, m)
+
+ def postProcessValue(self, val):
+ return val
+
+ def __iter__(self):
+ contents = self.contents
+ offset = 0
+ self.header, offset = self.getHeader(contents, offset)
+ self.footer = ''
+ entity, offset = self.getEntity(contents, offset)
+ while entity:
+ yield entity
+ entity, offset = self.getEntity(contents, offset)
+ f = self.reFooter.match(contents, offset)
+ if f:
+ self.footer = f.group()
+ offset = f.end()
+ if len(contents) > offset:
+ yield Junk(contents, (offset, len(contents)))
+ pass
+
+ def getHeader(self, contents, offset):
+ header = ''
+ h = self.reHeader.match(contents)
+ if h:
+ header = h.group()
+ offset = h.end()
+ return (header, offset)
+
+ def getEntity(self, contents, offset):
+ m = self.reKey.match(contents, offset)
+ if m:
+ offset = m.end()
+ entity = self.createEntity(contents, m)
+ return (entity, offset)
+ # first check if footer has a non-empty match,
+ # 'cause then we don't find junk
+ m = self.reFooter.match(contents, offset)
+ if m and m.end() > offset:
+ return (None, offset)
+ m = self.reKey.search(contents, offset)
+ if m:
+ # we didn't match, but search, so there's junk between offset
+ # and start. We'll match() on the next turn
+ junkend = m.start()
+ return (Junk(contents, (offset, junkend)), junkend)
+ return (None, offset)
+
+ def createEntity(self, contents, m):
+ return Entity(contents, self.postProcessValue,
+ *[m.span(i) for i in xrange(7)])
+
+
+def getParser(path):
+ for item in __constructors:
+ if re.search(item[0], path):
+ return item[1]
+ raise UserWarning("Cannot find Parser")
+
+
+# Subgroups of the match will:
+# 1: pre white space
+# 2: pre comments
+# 3: entity definition
+# 4: entity key (name)
+# 5: entity value
+# 6: post comment (and white space) in the same line (dtd only)
+# <--[1]
+# <!-- pre comments --> <--[2]
+# <!ENTITY key "value"> <!-- comment -->
+#
+# <-------[3]---------><------[6]------>
+
+
+class DTDParser(Parser):
+ # http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar
+ # ":" | [A-Z] | "_" | [a-z] |
+ # [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
+ # | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
+ # [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
+ # [#x10000-#xEFFFF]
+ CharMinusDash = u'\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD'
+ XmlComment = '<!--(?:-?[%s])*?-->' % CharMinusDash
+ NameStartChar = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
+ u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \
+ u'\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
+ # + \U00010000-\U000EFFFF seems to be unsupported in python
+
+ # NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
+ # [#x0300-#x036F] | [#x203F-#x2040]
+ NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040'
+ Name = '[' + NameStartChar + '][' + NameChar + ']*'
+ reKey = re.compile('(?:(?P<pre>\s*)(?P<precomment>(?:' + XmlComment +
+ '\s*)*)(?P<entity><!ENTITY\s+(?P<key>' + Name +
+ ')\s+(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>)'
+ '(?P<post>[ \t]*(?:' + XmlComment + '\s*)*\n?)?)',
+ re.DOTALL)
+ # add BOM to DTDs, details in bug 435002
+ reHeader = re.compile(u'^\ufeff?'
+ u'(\s*<!--.*(http://mozilla.org/MPL/2.0/|'
+ u'LICENSE BLOCK)([^-]+-)*[^-]+-->)?', re.S)
+ reFooter = re.compile('\s*(<!--([^-]+-)*[^-]+-->\s*)*$')
+ rePE = re.compile('(?:(\s*)((?:' + XmlComment + '\s*)*)'
+ '(<!ENTITY\s+%\s+(' + Name +
+ ')\s+SYSTEM\s+(\"[^\"]*\"|\'[^\']*\')\s*>\s*%' + Name +
+ ';)([ \t]*(?:' + XmlComment + '\s*)*\n?)?)')
+
+ def getEntity(self, contents, offset):
+ '''
+ Overload Parser.getEntity to special-case ParsedEntities.
+ Just check for a parsed entity if that method claims junk.
+
+ <!ENTITY % foo SYSTEM "url">
+ %foo;
+ '''
+ entity, inneroffset = Parser.getEntity(self, contents, offset)
+ if (entity and isinstance(entity, Junk)) or entity is None:
+ m = self.rePE.match(contents, offset)
+ if m:
+ inneroffset = m.end()
+ entity = Entity(contents, self.postProcessValue,
+ *[m.span(i) for i in xrange(7)])
+ return (entity, inneroffset)
+
+ def createEntity(self, contents, m):
+ valspan = m.span('val')
+ valspan = (valspan[0]+1, valspan[1]-1)
+ return Entity(contents, self.postProcessValue, m.span(),
+ m.span('pre'), m.span('precomment'),
+ m.span('entity'), m.span('key'), valspan,
+ m.span('post'))
+
+
+class PropertiesParser(Parser):
+ escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
+ '(?P<nl>\n\s*)|(?P<single>.))', re.M)
+ known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
+
+ def __init__(self):
+ self.reKey = re.compile('^(\s*)'
+ '((?:[#!].*?\n\s*)*)'
+ '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M)
+ self.reHeader = re.compile('^\s*([#!].*\s*)+')
+ self.reFooter = re.compile('\s*([#!].*\s*)*$')
+ self._escapedEnd = re.compile(r'\\+$')
+ self._trailingWS = re.compile(r'[ \t]*$')
+ Parser.__init__(self)
+
+ def getHeader(self, contents, offset):
+ header = ''
+ h = self.reHeader.match(contents, offset)
+ if h:
+ candidate = h.group()
+ if 'http://mozilla.org/MPL/2.0/' in candidate or \
+ 'LICENSE BLOCK' in candidate:
+ header = candidate
+ offset = h.end()
+ return (header, offset)
+
+ def getEntity(self, contents, offset):
+ # overwritten to parse values line by line
+ m = self.reKey.match(contents, offset)
+ if m:
+ offset = m.end()
+ while True:
+ endval = nextline = contents.find('\n', offset)
+ if nextline == -1:
+ endval = offset = len(contents)
+ break
+ # is newline escaped?
+ _e = self._escapedEnd.search(contents, offset, nextline)
+ offset = nextline + 1
+ if _e is None:
+ break
+ # backslashes at end of line, if 2*n, not escaped
+ if len(_e.group()) % 2 == 0:
+ break
+ # strip trailing whitespace
+ ws = self._trailingWS.search(contents, m.end(), offset)
+ if ws:
+ endval -= ws.end() - ws.start()
+ entity = Entity(contents, self.postProcessValue,
+ (m.start(), offset), # full span
+ m.span(1), # leading whitespan
+ m.span(2), # leading comment span
+ (m.start(3), offset), # entity def span
+ m.span(3), # key span
+ (m.end(), endval), # value span
+ (offset, offset)) # post comment span, empty
+ return (entity, offset)
+ m = self.reKey.search(contents, offset)
+ if m:
+ # we didn't match, but search, so there's junk between offset
+ # and start. We'll match() on the next turn
+ junkend = m.start()
+ return (Junk(contents, (offset, junkend)), junkend)
+ return (None, offset)
+
+ def postProcessValue(self, val):
+
+ def unescape(m):
+ found = m.groupdict()
+ if found['uni']:
+ return unichr(int(found['uni'][1:], 16))
+ if found['nl']:
+ return ''
+ return self.known_escapes.get(found['single'], found['single'])
+ val = self.escape.sub(unescape, val)
+ return val
+
+
+class DefinesParser(Parser):
+ # can't merge, #unfilter needs to be the last item, which we don't support
+ canMerge = False
+
+ def __init__(self):
+ self.reKey = re.compile('^(\s*)((?:^#(?!define\s).*\s*)*)'
+ '(#define[ \t]+(\w+)[ \t]+(.*?))([ \t]*$\n?)',
+ re.M)
+ self.reHeader = re.compile('^\s*(#(?!define\s).*\s*)*')
+ self.reFooter = re.compile('\s*(#(?!define\s).*\s*)*$', re.M)
+ Parser.__init__(self)
+
+
+class IniParser(Parser):
+ '''
+ Parse files of the form:
+ # initial comment
+ [cat]
+ whitespace*
+ #comment
+ string=value
+ ...
+ '''
+ def __init__(self):
+ self.reHeader = re.compile('^((?:\s*|[;#].*)\n)*\[.+?\]\n', re.M)
+ self.reKey = re.compile('(\s*)((?:[;#].*\n\s*)*)((.+?)=(.*))(\n?)')
+ self.reFooter = re.compile('\s*([;#].*\s*)*$')
+ Parser.__init__(self)
+
+
+DECL, COMMENT, START, END, CONTENT = range(5)
+
+
+class BookmarksParserInner(HTMLParser):
+
+ class Token(object):
+ _type = None
+ content = ''
+
+ def __str__(self):
+ return self.content
+
+ class DeclToken(Token):
+ _type = DECL
+
+ def __init__(self, decl):
+ self.content = decl
+ pass
+
+ def __str__(self):
+ return '<!%s>' % self.content
+ pass
+
+ class CommentToken(Token):
+ _type = COMMENT
+
+ def __init__(self, comment):
+ self.content = comment
+ pass
+
+ def __str__(self):
+ return '<!--%s-->' % self.content
+ pass
+
+ class StartToken(Token):
+ _type = START
+
+ def __init__(self, tag, attrs, content):
+ self.tag = tag
+ self.attrs = dict(attrs)
+ self.content = content
+ pass
+ pass
+
+ class EndToken(Token):
+ _type = END
+
+ def __init__(self, tag):
+ self.tag = tag
+ pass
+
+ def __str__(self):
+ return '</%s>' % self.tag.upper()
+ pass
+
+ class ContentToken(Token):
+ _type = CONTENT
+
+ def __init__(self, content):
+ self.content = content
+ pass
+ pass
+
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.tokens = []
+
+ def parse(self, contents):
+ self.tokens = []
+ self.feed(contents)
+ self.close()
+ return self.tokens
+
+ # Called when we hit an end DL tag to reset the folder selections
+ def handle_decl(self, decl):
+ self.tokens.append(self.DeclToken(decl))
+
+ # Called when we hit an end DL tag to reset the folder selections
+ def handle_comment(self, comment):
+ self.tokens.append(self.CommentToken(comment))
+
+ def handle_starttag(self, tag, attrs):
+ self.tokens.append(self.StartToken(tag, attrs,
+ self.get_starttag_text()))
+
+ # Called when text data is encountered
+ def handle_data(self, data):
+ if self.tokens[-1]._type == CONTENT:
+ self.tokens[-1].content += data
+ else:
+ self.tokens.append(self.ContentToken(data))
+
+ def handle_charref(self, data):
+ self.handle_data('&#%s;' % data)
+
+ def handle_entityref(self, data):
+ self.handle_data('&%s;' % data)
+
+ # Called when we hit an end DL tag to reset the folder selections
+ def handle_endtag(self, tag):
+ self.tokens.append(self.EndToken(tag))
+
+
+class BookmarksParser(Parser):
+ canMerge = False
+
+ class BMEntity(object):
+ def __init__(self, key, val):
+ self.key = key
+ self.val = val
+
+ def __iter__(self):
+ p = BookmarksParserInner()
+ tks = p.parse(self.contents)
+ i = 0
+ k = []
+ for i in xrange(len(tks)):
+ t = tks[i]
+ if t._type == START:
+ k.append(t.tag)
+ keys = t.attrs.keys()
+ keys.sort()
+ for attrname in keys:
+ yield self.BMEntity('.'.join(k) + '.@' + attrname,
+ t.attrs[attrname])
+ if i + 1 < len(tks) and tks[i+1]._type == CONTENT:
+ i += 1
+ t = tks[i]
+ v = t.content.strip()
+ if v:
+ yield self.BMEntity('.'.join(k), v)
+ elif t._type == END:
+ k.pop()
+
+
+__constructors = [('\\.dtd$', DTDParser()),
+ ('\\.properties$', PropertiesParser()),
+ ('\\.ini$', IniParser()),
+ ('\\.inc$', DefinesParser()),
+ ('bookmarks\\.html$', BookmarksParser())]
diff --git a/python/compare-locales/compare_locales/paths.py b/python/compare-locales/compare_locales/paths.py
new file mode 100644
index 000000000..f72b3a2e7
--- /dev/null
+++ b/python/compare-locales/compare_locales/paths.py
@@ -0,0 +1,398 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os.path
+import os
+from ConfigParser import ConfigParser, NoSectionError, NoOptionError
+from urlparse import urlparse, urljoin
+from urllib import pathname2url, url2pathname
+from urllib2 import urlopen
+from collections import defaultdict
+from compare_locales import util
+
+
+class L10nConfigParser(object):
+ '''Helper class to gather application information from ini files.
+
+ This class is working on synchronous open to read files or web data.
+ Subclass this and overwrite loadConfigs and addChild if you need async.
+ '''
+ def __init__(self, inipath, **kwargs):
+ """Constructor for L10nConfigParsers
+
+ inipath -- l10n.ini path
+ Optional keyword arguments are fowarded to the inner ConfigParser as
+ defaults.
+ """
+ if os.path.isabs(inipath):
+ self.inipath = 'file:%s' % pathname2url(inipath)
+ else:
+ pwdurl = 'file:%s/' % pathname2url(os.getcwd())
+ self.inipath = urljoin(pwdurl, inipath)
+ # l10n.ini files can import other l10n.ini files, store the
+ # corresponding L10nConfigParsers
+ self.children = []
+ # we really only care about the l10n directories described in l10n.ini
+ self.dirs = []
+ # optional defaults to be passed to the inner ConfigParser (unused?)
+ self.defaults = kwargs
+
+ def getDepth(self, cp):
+ '''Get the depth for the comparison from the parsed l10n.ini.
+
+ Overloadable to get the source depth for fennec and friends.
+ '''
+ try:
+ depth = cp.get('general', 'depth')
+ except:
+ depth = '.'
+ return depth
+
+ def getFilters(self):
+ '''Get the test functions from this ConfigParser and all children.
+
+ Only works with synchronous loads, used by compare-locales, which
+ is local anyway.
+ '''
+ filterurl = urljoin(self.inipath, 'filter.py')
+ try:
+ l = {}
+ execfile(url2pathname(urlparse(filterurl).path), {}, l)
+ if 'test' in l and callable(l['test']):
+ filters = [l['test']]
+ else:
+ filters = []
+ except:
+ filters = []
+
+ for c in self.children:
+ filters += c.getFilters()
+
+ return filters
+
+ def loadConfigs(self):
+ """Entry point to load the l10n.ini file this Parser refers to.
+
+ This implementation uses synchronous loads, subclasses might overload
+ this behaviour. If you do, make sure to pass a file-like object
+ to onLoadConfig.
+ """
+ self.onLoadConfig(urlopen(self.inipath))
+
+ def onLoadConfig(self, inifile):
+ """Parse a file-like object for the loaded l10n.ini file."""
+ cp = ConfigParser(self.defaults)
+ cp.readfp(inifile)
+ depth = self.getDepth(cp)
+ self.baseurl = urljoin(self.inipath, depth)
+ # create child loaders for any other l10n.ini files to be included
+ try:
+ for title, path in cp.items('includes'):
+ # skip default items
+ if title in self.defaults:
+ continue
+ # add child config parser
+ self.addChild(title, path, cp)
+ except NoSectionError:
+ pass
+ # try to load the "dirs" defined in the "compare" section
+ try:
+ self.dirs.extend(cp.get('compare', 'dirs').split())
+ except (NoOptionError, NoSectionError):
+ pass
+ # try getting a top level compare dir, as used for fennec
+ try:
+ self.tld = cp.get('compare', 'tld')
+ # remove tld from comparison dirs
+ if self.tld in self.dirs:
+ self.dirs.remove(self.tld)
+ except (NoOptionError, NoSectionError):
+ self.tld = None
+ # try to set "all_path" and "all_url"
+ try:
+ self.all_path = cp.get('general', 'all')
+ self.all_url = urljoin(self.baseurl, self.all_path)
+ except (NoOptionError, NoSectionError):
+ self.all_path = None
+ self.all_url = None
+ return cp
+
+ def addChild(self, title, path, orig_cp):
+ """Create a child L10nConfigParser and load it.
+
+ title -- indicates the module's name
+ path -- indicates the path to the module's l10n.ini file
+ orig_cp -- the configuration parser of this l10n.ini
+ """
+ cp = L10nConfigParser(urljoin(self.baseurl, path), **self.defaults)
+ cp.loadConfigs()
+ self.children.append(cp)
+
+ def getTLDPathsTuple(self, basepath):
+ """Given the basepath, return the path fragments to be used for
+ self.tld. For build runs, this is (basepath, self.tld), for
+ source runs, just (basepath,).
+
+ @see overwritten method in SourceTreeConfigParser.
+ """
+ return (basepath, self.tld)
+
+ def dirsIter(self):
+ """Iterate over all dirs and our base path for this l10n.ini"""
+ url = urlparse(self.baseurl)
+ basepath = url2pathname(url.path)
+ if self.tld is not None:
+ yield self.tld, self.getTLDPathsTuple(basepath)
+ for dir in self.dirs:
+ yield dir, (basepath, dir)
+
+ def directories(self):
+ """Iterate over all dirs and base paths for this l10n.ini as well
+ as the included ones.
+ """
+ for t in self.dirsIter():
+ yield t
+ for child in self.children:
+ for t in child.directories():
+ yield t
+
+ def allLocales(self):
+ """Return a list of all the locales of this project"""
+ return util.parseLocales(urlopen(self.all_url).read())
+
+
+class SourceTreeConfigParser(L10nConfigParser):
+ '''Subclassing L10nConfigParser to work with just the repos
+ checked out next to each other instead of intermingled like
+ we do for real builds.
+ '''
+
+ def __init__(self, inipath, basepath):
+ '''Add additional arguments basepath.
+
+ basepath is used to resolve local paths via branchnames.
+ '''
+ L10nConfigParser.__init__(self, inipath)
+ self.basepath = basepath
+ self.tld = None
+
+ def getDepth(self, cp):
+ '''Get the depth for the comparison from the parsed l10n.ini.
+
+ Overloaded to get the source depth for fennec and friends.
+ '''
+ try:
+ depth = cp.get('general', 'source-depth')
+ except:
+ try:
+ depth = cp.get('general', 'depth')
+ except:
+ depth = '.'
+ return depth
+
+ def addChild(self, title, path, orig_cp):
+ # check if there's a section with details for this include
+ # we might have to check a different repo, or even VCS
+ # for example, projects like "mail" indicate in
+ # an "include_" section where to find the l10n.ini for "toolkit"
+ details = 'include_' + title
+ if orig_cp.has_section(details):
+ branch = orig_cp.get(details, 'mozilla')
+ inipath = orig_cp.get(details, 'l10n.ini')
+ path = self.basepath + '/' + branch + '/' + inipath
+ else:
+ path = urljoin(self.baseurl, path)
+ cp = SourceTreeConfigParser(path, self.basepath, **self.defaults)
+ cp.loadConfigs()
+ self.children.append(cp)
+
+ def getTLDPathsTuple(self, basepath):
+ """Overwrite L10nConfigParser's getTLDPathsTuple to just return
+ the basepath.
+ """
+ return (basepath, )
+
+
+class File(object):
+
+ def __init__(self, fullpath, file, module=None, locale=None):
+ self.fullpath = fullpath
+ self.file = file
+ self.module = module
+ self.locale = locale
+ pass
+
+ def getContents(self):
+ # open with universal line ending support and read
+ return open(self.fullpath, 'rU').read()
+
+ def __hash__(self):
+ f = self.file
+ if self.module:
+ f = self.module + '/' + f
+ return hash(f)
+
+ def __str__(self):
+ return self.fullpath
+
+ def __cmp__(self, other):
+ if not isinstance(other, File):
+ raise NotImplementedError
+ rv = cmp(self.module, other.module)
+ if rv != 0:
+ return rv
+ return cmp(self.file, other.file)
+
+
+class EnumerateDir(object):
+ ignore_dirs = ['CVS', '.svn', '.hg', '.git']
+
+ def __init__(self, basepath, module='', locale=None, ignore_subdirs=[]):
+ self.basepath = basepath
+ self.module = module
+ self.locale = locale
+ self.ignore_subdirs = ignore_subdirs
+ pass
+
+ def cloneFile(self, other):
+ '''
+ Return a File object that this enumerator would return, if it had it.
+ '''
+ return File(os.path.join(self.basepath, other.file), other.file,
+ self.module, self.locale)
+
+ def __iter__(self):
+ # our local dirs are given as a tuple of path segments, starting off
+ # with an empty sequence for the basepath.
+ dirs = [()]
+ while dirs:
+ dir = dirs.pop(0)
+ fulldir = os.path.join(self.basepath, *dir)
+ try:
+ entries = os.listdir(fulldir)
+ except OSError:
+ # we probably just started off in a non-existing dir, ignore
+ continue
+ entries.sort()
+ for entry in entries:
+ leaf = os.path.join(fulldir, entry)
+ if os.path.isdir(leaf):
+ if entry not in self.ignore_dirs and \
+ leaf not in [os.path.join(self.basepath, d)
+ for d in self.ignore_subdirs]:
+ dirs.append(dir + (entry,))
+ continue
+ yield File(leaf, '/'.join(dir + (entry,)),
+ self.module, self.locale)
+
+
+class LocalesWrap(object):
+
+ def __init__(self, base, module, locales, ignore_subdirs=[]):
+ self.base = base
+ self.module = module
+ self.locales = locales
+ self.ignore_subdirs = ignore_subdirs
+
+ def __iter__(self):
+ for locale in self.locales:
+ path = os.path.join(self.base, locale, self.module)
+ yield (locale, EnumerateDir(path, self.module, locale,
+ self.ignore_subdirs))
+
+
+class EnumerateApp(object):
+ reference = 'en-US'
+
+ def __init__(self, inipath, l10nbase, locales=None):
+ self.setupConfigParser(inipath)
+ self.modules = defaultdict(dict)
+ self.l10nbase = os.path.abspath(l10nbase)
+ self.filters = []
+ drive, tail = os.path.splitdrive(inipath)
+ self.addFilters(*self.config.getFilters())
+ self.locales = locales or self.config.allLocales()
+ self.locales.sort()
+
+ def setupConfigParser(self, inipath):
+ self.config = L10nConfigParser(inipath)
+ self.config.loadConfigs()
+
+ def addFilters(self, *args):
+ self.filters += args
+
+ value_map = {None: None, 'error': 0, 'ignore': 1, 'report': 2}
+
+ def filter(self, l10n_file, entity=None):
+ '''Go through all added filters, and,
+ - map "error" -> 0, "ignore" -> 1, "report" -> 2
+ - if filter.test returns a bool, map that to
+ False -> "ignore" (1), True -> "error" (0)
+ - take the max of all reported
+ '''
+ rv = 0
+ for f in reversed(self.filters):
+ try:
+ _r = f(l10n_file.module, l10n_file.file, entity)
+ except:
+ # XXX error handling
+ continue
+ if isinstance(_r, bool):
+ _r = [1, 0][_r]
+ else:
+ # map string return value to int, default to 'error',
+ # None is None
+ _r = self.value_map.get(_r, 0)
+ if _r is not None:
+ rv = max(rv, _r)
+ return ['error', 'ignore', 'report'][rv]
+
+ def __iter__(self):
+ '''
+ Iterate over all modules, return en-US directory enumerator, and an
+ iterator over all locales in each iteration. Per locale, the locale
+ code and an directory enumerator will be given.
+ '''
+ dirmap = dict(self.config.directories())
+ mods = dirmap.keys()
+ mods.sort()
+ for mod in mods:
+ if self.reference == 'en-US':
+ base = os.path.join(*(dirmap[mod] + ('locales', 'en-US')))
+ else:
+ base = os.path.join(self.l10nbase, self.reference, mod)
+ yield (mod, EnumerateDir(base, mod, self.reference),
+ LocalesWrap(self.l10nbase, mod, self.locales,
+ [m[len(mod)+1:] for m in mods if m.startswith(mod+'/')]))
+
+
+class EnumerateSourceTreeApp(EnumerateApp):
+ '''Subclass EnumerateApp to work on side-by-side checked out
+ repos, and to no pay attention to how the source would actually
+ be checked out for building.
+
+ It's supporting applications like Fennec, too, which have
+ 'locales/en-US/...' in their root dir, but claim to be 'mobile'.
+ '''
+
+ def __init__(self, inipath, basepath, l10nbase, locales=None):
+ self.basepath = basepath
+ EnumerateApp.__init__(self, inipath, l10nbase, locales)
+
+ def setupConfigParser(self, inipath):
+ self.config = SourceTreeConfigParser(inipath, self.basepath)
+ self.config.loadConfigs()
+
+
+def get_base_path(mod, loc):
+ 'statics for path patterns and conversion'
+ __l10n = 'l10n/%(loc)s/%(mod)s'
+ __en_US = 'mozilla/%(mod)s/locales/en-US'
+ if loc == 'en-US':
+ return __en_US % {'mod': mod}
+ return __l10n % {'mod': mod, 'loc': loc}
+
+
+def get_path(mod, loc, leaf):
+ return get_base_path(mod, loc) + '/' + leaf
diff --git a/python/compare-locales/compare_locales/tests/__init__.py b/python/compare-locales/compare_locales/tests/__init__.py
new file mode 100644
index 000000000..8808d78f4
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/__init__.py
@@ -0,0 +1,49 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''Mixins for parser tests.
+'''
+
+from itertools import izip_longest
+from pkg_resources import resource_string
+import re
+
+from compare_locales.parser import getParser
+
+
+class ParserTestMixin():
+ '''Utility methods used by the parser tests.
+ '''
+ filename = None
+
+ def setUp(self):
+ '''Create a parser for this test.
+ '''
+ self.parser = getParser(self.filename)
+
+ def tearDown(self):
+ 'tear down this test'
+ del self.parser
+
+ def resource(self, name):
+ testcontent = resource_string(__name__, 'data/' + name)
+ # fake universal line endings
+ testcontent = re.sub('\r\n?', lambda m: '\n', testcontent)
+ return testcontent
+
+ def _test(self, content, refs):
+ '''Helper to test the parser.
+ Compares the result of parsing content with the given list
+ of reference keys and values.
+ '''
+ self.parser.readContents(content)
+ entities = [entity for entity in self.parser]
+ for entity, ref in izip_longest(entities, refs):
+ self.assertTrue(entity, 'excess reference entity')
+ self.assertTrue(ref, 'excess parsed entity')
+ self.assertEqual(entity.val, ref[1])
+ if ref[0].startswith('_junk'):
+ self.assertTrue(re.match(ref[0], entity.key))
+ else:
+ self.assertEqual(entity.key, ref[0])
diff --git a/python/compare-locales/compare_locales/tests/data/bug121341.properties b/python/compare-locales/compare_locales/tests/data/bug121341.properties
new file mode 100644
index 000000000..b45fc9698
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/data/bug121341.properties
@@ -0,0 +1,68 @@
+# simple check
+1=abc
+# test whitespace trimming in key and value
+ 2 = xy
+# test parsing of escaped values
+3 = \u1234\t\r\n\uAB\
+\u1\n
+# test multiline properties
+4 = this is \
+multiline property
+5 = this is \
+ another multiline property
+# property with DOS EOL
+6 = test\u0036
+# test multiline property with with DOS EOL
+7 = yet another multi\
+ line propery
+# trimming should not trim escaped whitespaces
+8 = \ttest5\u0020
+# another variant of #8
+9 = \ test6\t
+# test UTF-8 encoded property/value
+10aሴb = c췯d
+# next property should test unicode escaping at the boundary of parsing buffer
+# buffer size is expected to be 4096 so add comments to get to this offset
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+################################################################################
+###############################################################################
+11 = \uABCD
diff --git a/python/compare-locales/compare_locales/tests/data/test.properties b/python/compare-locales/compare_locales/tests/data/test.properties
new file mode 100644
index 000000000..19cae9702
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/data/test.properties
@@ -0,0 +1,14 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+1=1
+ 2=2
+3 =3
+ 4 =4
+5=5
+6= 6
+7=7
+8= 8
+# this is a comment
+9=this is the first part of a continued line \
+ and here is the 2nd part
diff --git a/python/compare-locales/compare_locales/tests/data/triple-license.dtd b/python/compare-locales/compare_locales/tests/data/triple-license.dtd
new file mode 100644
index 000000000..4a28b17a6
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/data/triple-license.dtd
@@ -0,0 +1,38 @@
+<!-- ***** BEGIN LICENSE BLOCK *****
+#if 0
+ - Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ -
+ - The contents of this file are subject to the Mozilla Public License Version
+ - 1.1 (the "License"); you may not use this file except in compliance with
+ - the License. You may obtain a copy of the License at
+ - http://www.mozilla.org/MPL/
+ -
+ - Software distributed under the License is distributed on an "AS IS" basis,
+ - WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ - for the specific language governing rights and limitations under the
+ - License.
+ -
+ - The Original Code is mozilla.org Code.
+ -
+ - The Initial Developer of the Original Code is dummy.
+ - Portions created by the Initial Developer are Copyright (C) 2005
+ - the Initial Developer. All Rights Reserved.
+ -
+ - Contributor(s):
+ -
+ - Alternatively, the contents of this file may be used under the terms of
+ - either the GNU General Public License Version 2 or later (the "GPL"), or
+ - the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ - in which case the provisions of the GPL or the LGPL are applicable instead
+ - of those above. If you wish to allow use of your version of this file only
+ - under the terms of either the GPL or the LGPL, and not to allow others to
+ - use your version of this file under the terms of the MPL, indicate your
+ - decision by deleting the provisions above and replace them with the notice
+ - and other provisions required by the LGPL or the GPL. If you do not delete
+ - the provisions above, a recipient may use your version of this file under
+ - the terms of any one of the MPL, the GPL or the LGPL.
+ -
+#endif
+ - ***** END LICENSE BLOCK ***** -->
+
+<!ENTITY foo "value">
diff --git a/python/compare-locales/compare_locales/tests/test_checks.py b/python/compare-locales/compare_locales/tests/test_checks.py
new file mode 100644
index 000000000..b995d43f9
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_checks.py
@@ -0,0 +1,403 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+
+from compare_locales.checks import getChecker
+from compare_locales.parser import getParser, Entity
+from compare_locales.paths import File
+
+
+class BaseHelper(unittest.TestCase):
+ file = None
+ refContent = None
+
+ def setUp(self):
+ p = getParser(self.file.file)
+ p.readContents(self.refContent)
+ self.refList, self.refMap = p.parse()
+
+ def _test(self, content, refWarnOrErrors, with_ref_file=False):
+ p = getParser(self.file.file)
+ p.readContents(content)
+ l10n = [e for e in p]
+ assert len(l10n) == 1
+ l10n = l10n[0]
+ if with_ref_file:
+ kwargs = {
+ 'reference': self.refList
+ }
+ else:
+ kwargs = {}
+ checker = getChecker(self.file, **kwargs)
+ ref = self.refList[self.refMap[l10n.key]]
+ found = tuple(checker.check(ref, l10n))
+ self.assertEqual(found, refWarnOrErrors)
+
+
+class TestProperties(BaseHelper):
+ file = File('foo.properties', 'foo.properties')
+ refContent = '''some = value
+'''
+
+ def testGood(self):
+ self._test('''some = localized''',
+ tuple())
+
+ def testMissedEscape(self):
+ self._test(r'''some = \u67ood escape, bad \escape''',
+ (('warning', 20, r'unknown escape sequence, \e',
+ 'escape'),))
+
+
+class TestPlurals(BaseHelper):
+ file = File('foo.properties', 'foo.properties')
+ refContent = '''\
+# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
+# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
+# #1 number of files
+# example: 111 files - Downloads
+downloadsTitleFiles=#1 file - Downloads;#1 files - #2
+'''
+
+ def testGood(self):
+ self._test('''\
+# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
+# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
+# #1 number of files
+# example: 111 files - Downloads
+downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 filers
+''',
+ tuple())
+
+ def testNotUsed(self):
+ self._test('''\
+# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
+# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
+# #1 number of files
+# example: 111 files - Downloads
+downloadsTitleFiles=#1 file - Downloads;#1 files - Downloads;#1 filers
+''',
+ (('warning', 0, 'not all variables used in l10n',
+ 'plural'),))
+
+ def testNotDefined(self):
+ self._test('''\
+# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms.
+# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals
+# #1 number of files
+# example: 111 files - Downloads
+downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 #3
+''',
+ (('error', 0, 'unreplaced variables in l10n', 'plural'),))
+
+
+class TestDTDs(BaseHelper):
+ file = File('foo.dtd', 'foo.dtd')
+ refContent = '''<!ENTITY foo "This is &apos;good&apos;">
+<!ENTITY width "10ch">
+<!ENTITY style "width: 20ch; height: 280px;">
+<!ENTITY minStyle "min-height: 50em;">
+<!ENTITY ftd "0">
+<!ENTITY formatPercent "This is 100&#037; correct">
+<!ENTITY some.key "K">
+'''
+
+ def testWarning(self):
+ self._test('''<!ENTITY foo "This is &not; good">
+''',
+ (('warning', (0, 0), 'Referencing unknown entity `not`',
+ 'xmlparse'),))
+ # make sure we only handle translated entity references
+ self._test(u'''<!ENTITY foo "This is &ƞǿŧ; good">
+'''.encode('utf-8'),
+ (('warning', (0, 0), u'Referencing unknown entity `ƞǿŧ`',
+ 'xmlparse'),))
+
+ def testErrorFirstLine(self):
+ self._test('''<!ENTITY foo "This is </bad> stuff">
+''',
+ (('error', (1, 10), 'mismatched tag', 'xmlparse'),))
+
+ def testErrorSecondLine(self):
+ self._test('''<!ENTITY foo "This is
+ </bad>
+stuff">
+''',
+ (('error', (2, 4), 'mismatched tag', 'xmlparse'),))
+
+ def testKeyErrorSingleAmpersand(self):
+ self._test('''<!ENTITY some.key "&">
+''',
+ (('error', (1, 1), 'not well-formed (invalid token)',
+ 'xmlparse'),))
+
+ def testXMLEntity(self):
+ self._test('''<!ENTITY foo "This is &quot;good&quot;">
+''',
+ tuple())
+
+ def testPercentEntity(self):
+ self._test('''<!ENTITY formatPercent "Another 100&#037;">
+''',
+ tuple())
+ self._test('''<!ENTITY formatPercent "Bad 100% should fail">
+''',
+ (('error', (0, 32), 'not well-formed (invalid token)',
+ 'xmlparse'),))
+
+ def testNoNumber(self):
+ self._test('''<!ENTITY ftd "foo">''',
+ (('warning', 0, 'reference is a number', 'number'),))
+
+ def testNoLength(self):
+ self._test('''<!ENTITY width "15miles">''',
+ (('error', 0, 'reference is a CSS length', 'css'),))
+
+ def testNoStyle(self):
+ self._test('''<!ENTITY style "15ch">''',
+ (('error', 0, 'reference is a CSS spec', 'css'),))
+ self._test('''<!ENTITY style "junk">''',
+ (('error', 0, 'reference is a CSS spec', 'css'),))
+
+ def testStyleWarnings(self):
+ self._test('''<!ENTITY style "width:15ch">''',
+ (('warning', 0, 'height only in reference', 'css'),))
+ self._test('''<!ENTITY style "width:15em;height:200px;">''',
+ (('warning', 0, "units for width don't match (em != ch)",
+ 'css'),))
+
+ def testNoWarning(self):
+ self._test('''<!ENTITY width "12em">''', tuple())
+ self._test('''<!ENTITY style "width:12ch;height:200px;">''', tuple())
+ self._test('''<!ENTITY ftd "0">''', tuple())
+
+
+class TestEntitiesInDTDs(BaseHelper):
+ file = File('foo.dtd', 'foo.dtd')
+ refContent = '''<!ENTITY short "This is &brandShortName;">
+<!ENTITY shorter "This is &brandShorterName;">
+<!ENTITY ent.start "Using &brandShorterName; start to">
+<!ENTITY ent.end " end">
+'''
+
+ def testOK(self):
+ self._test('''<!ENTITY ent.start "Mit &brandShorterName;">''', tuple(),
+ with_ref_file=True)
+
+ def testMismatch(self):
+ self._test('''<!ENTITY ent.start "Mit &brandShortName;">''',
+ (('warning', (0, 0),
+ 'Entity brandShortName referenced, '
+ 'but brandShorterName used in context',
+ 'xmlparse'),),
+ with_ref_file=True)
+
+ def testAcross(self):
+ self._test('''<!ENTITY ent.end "Mit &brandShorterName;">''',
+ tuple(),
+ with_ref_file=True)
+
+ def testAcrossWithMismatch(self):
+ '''If we could tell that ent.start and ent.end are one string,
+ we should warn. Sadly, we can't, so this goes without warning.'''
+ self._test('''<!ENTITY ent.end "Mit &brandShortName;">''',
+ tuple(),
+ with_ref_file=True)
+
+ def testUnknownWithRef(self):
+ self._test('''<!ENTITY ent.start "Mit &foopy;">''',
+ (('warning',
+ (0, 0),
+ 'Referencing unknown entity `foopy` '
+ '(brandShorterName used in context, '
+ 'brandShortName known)',
+ 'xmlparse'),),
+ with_ref_file=True)
+
+ def testUnknown(self):
+ self._test('''<!ENTITY ent.end "Mit &foopy;">''',
+ (('warning',
+ (0, 0),
+ 'Referencing unknown entity `foopy`'
+ ' (brandShortName, brandShorterName known)',
+ 'xmlparse'),),
+ with_ref_file=True)
+
+
+class TestAndroid(unittest.TestCase):
+ """Test Android checker
+
+ Make sure we're hitting our extra rules only if
+ we're passing in a DTD file in the embedding/android module.
+ """
+ apos_msg = u"Apostrophes in Android DTDs need escaping with \\' or " + \
+ u"\\u0027, or use \u2019, or put string in quotes."
+ quot_msg = u"Quotes in Android DTDs need escaping with \\\" or " + \
+ u"\\u0022, or put string in apostrophes."
+
+ def getEntity(self, v):
+ return Entity(v, lambda s: s, (0, len(v)), (), (0, 0), (), (),
+ (0, len(v)), ())
+
+ def getDTDEntity(self, v):
+ v = v.replace('"', '&quot;')
+ return Entity('<!ENTITY foo "%s">' % v,
+ lambda s: s,
+ (0, len(v) + 16), (), (0, 0), (), (9, 12),
+ (14, len(v) + 14), ())
+
+ def test_android_dtd(self):
+ """Testing the actual android checks. The logic is involved,
+ so this is a lot of nitty gritty detail tests.
+ """
+ f = File("embedding/android/strings.dtd", "strings.dtd",
+ "embedding/android")
+ checker = getChecker(f)
+ # good string
+ ref = self.getDTDEntity("plain string")
+ l10n = self.getDTDEntity("plain localized string")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+ # dtd warning
+ l10n = self.getDTDEntity("plain localized string &ref;")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('warning', (0, 0),
+ 'Referencing unknown entity `ref`', 'xmlparse'),))
+ # no report on stray ampersand or quote, if not completely quoted
+ for i in xrange(3):
+ # make sure we're catching unescaped apostrophes,
+ # try 0..5 backticks
+ l10n = self.getDTDEntity("\\"*(2*i) + "'")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('error', 2*i, self.apos_msg, 'android'),))
+ l10n = self.getDTDEntity("\\"*(2*i + 1) + "'")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+ # make sure we don't report if apos string is quoted
+ l10n = self.getDTDEntity('"' + "\\"*(2*i) + "'\"")
+ tpl = tuple(checker.check(ref, l10n))
+ self.assertEqual(tpl, (),
+ "`%s` shouldn't fail but got %s"
+ % (l10n.val, str(tpl)))
+ l10n = self.getDTDEntity('"' + "\\"*(2*i+1) + "'\"")
+ tpl = tuple(checker.check(ref, l10n))
+ self.assertEqual(tpl, (),
+ "`%s` shouldn't fail but got %s"
+ % (l10n.val, str(tpl)))
+ # make sure we're catching unescaped quotes, try 0..5 backticks
+ l10n = self.getDTDEntity("\\"*(2*i) + "\"")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('error', 2*i, self.quot_msg, 'android'),))
+ l10n = self.getDTDEntity("\\"*(2*i + 1) + "'")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+ # make sure we don't report if quote string is single quoted
+ l10n = self.getDTDEntity("'" + "\\"*(2*i) + "\"'")
+ tpl = tuple(checker.check(ref, l10n))
+ self.assertEqual(tpl, (),
+ "`%s` shouldn't fail but got %s" %
+ (l10n.val, str(tpl)))
+ l10n = self.getDTDEntity('"' + "\\"*(2*i+1) + "'\"")
+ tpl = tuple(checker.check(ref, l10n))
+ self.assertEqual(tpl, (),
+ "`%s` shouldn't fail but got %s" %
+ (l10n.val, str(tpl)))
+ # check for mixed quotes and ampersands
+ l10n = self.getDTDEntity("'\"")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('error', 0, self.apos_msg, 'android'),
+ ('error', 1, self.quot_msg, 'android')))
+ l10n = self.getDTDEntity("''\"'")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('error', 1, self.apos_msg, 'android'),))
+ l10n = self.getDTDEntity('"\'""')
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('error', 2, self.quot_msg, 'android'),))
+
+ # broken unicode escape
+ l10n = self.getDTDEntity("Some broken \u098 unicode")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('error', 12, 'truncated \\uXXXX escape',
+ 'android'),))
+ # broken unicode escape, try to set the error off
+ l10n = self.getDTDEntity(u"\u9690"*14+"\u006"+" "+"\u0064")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('error', 14, 'truncated \\uXXXX escape',
+ 'android'),))
+
+ def test_android_prop(self):
+ f = File("embedding/android/strings.properties", "strings.properties",
+ "embedding/android")
+ checker = getChecker(f)
+ # good plain string
+ ref = self.getEntity("plain string")
+ l10n = self.getEntity("plain localized string")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+ # no dtd warning
+ ref = self.getEntity("plain string")
+ l10n = self.getEntity("plain localized string &ref;")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+ # no report on stray ampersand
+ ref = self.getEntity("plain string")
+ l10n = self.getEntity("plain localized string with apos: '")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+ # report on bad printf
+ ref = self.getEntity("string with %s")
+ l10n = self.getEntity("string with %S")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('error', 0, 'argument 1 `S` should be `s`',
+ 'printf'),))
+
+ def test_non_android_dtd(self):
+ f = File("browser/strings.dtd", "strings.dtd", "browser")
+ checker = getChecker(f)
+ # good string
+ ref = self.getDTDEntity("plain string")
+ l10n = self.getDTDEntity("plain localized string")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+ # dtd warning
+ ref = self.getDTDEntity("plain string")
+ l10n = self.getDTDEntity("plain localized string &ref;")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('warning', (0, 0),
+ 'Referencing unknown entity `ref`', 'xmlparse'),))
+ # no report on stray ampersand
+ ref = self.getDTDEntity("plain string")
+ l10n = self.getDTDEntity("plain localized string with apos: '")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+
+ def test_entities_across_dtd(self):
+ f = File("browser/strings.dtd", "strings.dtd", "browser")
+ p = getParser(f.file)
+ p.readContents('<!ENTITY other "some &good.ref;">')
+ ref = p.parse()
+ checker = getChecker(f, reference=ref[0])
+ # good string
+ ref = self.getDTDEntity("plain string")
+ l10n = self.getDTDEntity("plain localized string")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+ # dtd warning
+ ref = self.getDTDEntity("plain string")
+ l10n = self.getDTDEntity("plain localized string &ref;")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ (('warning', (0, 0),
+ 'Referencing unknown entity `ref` (good.ref known)',
+ 'xmlparse'),))
+ # no report on stray ampersand
+ ref = self.getDTDEntity("plain string")
+ l10n = self.getDTDEntity("plain localized string with &good.ref;")
+ self.assertEqual(tuple(checker.check(ref, l10n)),
+ ())
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/compare-locales/compare_locales/tests/test_compare.py b/python/compare-locales/compare_locales/tests/test_compare.py
new file mode 100644
index 000000000..51ba7cd8c
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_compare.py
@@ -0,0 +1,90 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+
+from compare_locales import compare
+
+
+class TestTree(unittest.TestCase):
+ '''Test the Tree utility class
+
+ Tree value classes need to be in-place editable
+ '''
+
+ def test_empty_dict(self):
+ tree = compare.Tree(dict)
+ self.assertEqual(list(tree.getContent()), [])
+ self.assertDictEqual(
+ tree.toJSON(),
+ {}
+ )
+
+ def test_disjoint_dict(self):
+ tree = compare.Tree(dict)
+ tree['one/entry']['leaf'] = 1
+ tree['two/other']['leaf'] = 2
+ self.assertEqual(
+ list(tree.getContent()),
+ [
+ (0, 'key', ('one', 'entry')),
+ (1, 'value', {'leaf': 1}),
+ (0, 'key', ('two', 'other')),
+ (1, 'value', {'leaf': 2})
+ ]
+ )
+ self.assertDictEqual(
+ tree.toJSON(),
+ {
+ 'children': [
+ ('one/entry',
+ {'value': {'leaf': 1}}
+ ),
+ ('two/other',
+ {'value': {'leaf': 2}}
+ )
+ ]
+ }
+ )
+ self.assertMultiLineEqual(
+ str(tree),
+ '''\
+one/entry
+ {'leaf': 1}
+two/other
+ {'leaf': 2}\
+'''
+ )
+
+ def test_overlapping_dict(self):
+ tree = compare.Tree(dict)
+ tree['one/entry']['leaf'] = 1
+ tree['one/other']['leaf'] = 2
+ self.assertEqual(
+ list(tree.getContent()),
+ [
+ (0, 'key', ('one',)),
+ (1, 'key', ('entry',)),
+ (2, 'value', {'leaf': 1}),
+ (1, 'key', ('other',)),
+ (2, 'value', {'leaf': 2})
+ ]
+ )
+ self.assertDictEqual(
+ tree.toJSON(),
+ {
+ 'children': [
+ ('one', {
+ 'children': [
+ ('entry',
+ {'value': {'leaf': 1}}
+ ),
+ ('other',
+ {'value': {'leaf': 2}}
+ )
+ ]
+ })
+ ]
+ }
+ )
diff --git a/python/compare-locales/compare_locales/tests/test_dtd.py b/python/compare-locales/compare_locales/tests/test_dtd.py
new file mode 100644
index 000000000..87ddcde30
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_dtd.py
@@ -0,0 +1,86 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''Tests for the DTD parser.
+'''
+
+import unittest
+import re
+
+from compare_locales.parser import getParser
+from compare_locales.tests import ParserTestMixin
+
+
+class TestDTD(ParserTestMixin, unittest.TestCase):
+ '''Tests for the DTD Parser.'''
+ filename = 'foo.dtd'
+
+ def test_one_entity(self):
+ self._test('''<!ENTITY foo.label "stuff">''',
+ (('foo.label', 'stuff'),))
+
+ quoteContent = '''<!ENTITY good.one "one">
+<!ENTITY bad.one "bad " quote">
+<!ENTITY good.two "two">
+<!ENTITY bad.two "bad "quoted" word">
+<!ENTITY good.three "three">
+<!ENTITY good.four "good ' quote">
+<!ENTITY good.five "good 'quoted' word">
+'''
+ quoteRef = (
+ ('good.one', 'one'),
+ ('_junk_\\d_25-56$', '<!ENTITY bad.one "bad " quote">'),
+ ('good.two', 'two'),
+ ('_junk_\\d_82-119$', '<!ENTITY bad.two "bad "quoted" word">'),
+ ('good.three', 'three'),
+ ('good.four', 'good \' quote'),
+ ('good.five', 'good \'quoted\' word'),)
+
+ def test_quotes(self):
+ self._test(self.quoteContent, self.quoteRef)
+
+ def test_apos(self):
+ qr = re.compile('[\'"]', re.M)
+
+ def quot2apos(s):
+ return qr.sub(lambda m: m.group(0) == '"' and "'" or '"', s)
+
+ self._test(quot2apos(self.quoteContent),
+ map(lambda t: (t[0], quot2apos(t[1])), self.quoteRef))
+
+ def test_parsed_ref(self):
+ self._test('''<!ENTITY % fooDTD SYSTEM "chrome://brand.dtd">
+ %fooDTD;
+''',
+ (('fooDTD', '"chrome://brand.dtd"'),))
+
+ def test_trailing_comment(self):
+ self._test('''<!ENTITY first "string">
+<!ENTITY second "string">
+<!--
+<!ENTITY commented "out">
+-->
+''',
+ (('first', 'string'), ('second', 'string')))
+
+ def test_license_header(self):
+ p = getParser('foo.dtd')
+ p.readContents(self.resource('triple-license.dtd'))
+ for e in p:
+ self.assertEqual(e.key, 'foo')
+ self.assertEqual(e.val, 'value')
+ self.assert_('MPL' in p.header)
+ p.readContents('''\
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+ - License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ - You can obtain one at http://mozilla.org/MPL/2.0/. -->
+<!ENTITY foo "value">
+''')
+ for e in p:
+ self.assertEqual(e.key, 'foo')
+ self.assertEqual(e.val, 'value')
+ self.assert_('MPL' in p.header)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/compare-locales/compare_locales/tests/test_ini.py b/python/compare-locales/compare_locales/tests/test_ini.py
new file mode 100644
index 000000000..4c8cc03e1
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_ini.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+
+from compare_locales.tests import ParserTestMixin
+
+
+mpl2 = '''\
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this file,
+; You can obtain one at http://mozilla.org/MPL/2.0/.
+'''
+
+
+class TestIniParser(ParserTestMixin, unittest.TestCase):
+
+ filename = 'foo.ini'
+
+ def testSimpleHeader(self):
+ self._test('''; This file is in the UTF-8 encoding
+[Strings]
+TitleText=Some Title
+''', (('TitleText', 'Some Title'),))
+ self.assert_('UTF-8' in self.parser.header)
+
+ def testMPL2_Space_UTF(self):
+ self._test(mpl2 + '''
+; This file is in the UTF-8 encoding
+[Strings]
+TitleText=Some Title
+''', (('TitleText', 'Some Title'),))
+ self.assert_('MPL' in self.parser.header)
+
+ def testMPL2_Space(self):
+ self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+''', (('TitleText', 'Some Title'),))
+ self.assert_('MPL' in self.parser.header)
+
+ def testMPL2_MultiSpace(self):
+ self._test(mpl2 + '''\
+
+; more comments
+
+[Strings]
+TitleText=Some Title
+''', (('TitleText', 'Some Title'),))
+ self.assert_('MPL' in self.parser.header)
+
+ def testMPL2_JunkBeforeCategory(self):
+ self._test(mpl2 + '''\
+Junk
+[Strings]
+TitleText=Some Title
+''', (('_junk_\\d+_0-213$', mpl2 + '''\
+Junk
+[Strings]'''), ('TitleText', 'Some Title')))
+ self.assert_('MPL' not in self.parser.header)
+
+ def test_TrailingComment(self):
+ self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+;Stray trailing comment
+''', (('TitleText', 'Some Title'),))
+ self.assert_('MPL' in self.parser.header)
+
+ def test_SpacedTrailingComments(self):
+ self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+
+;Stray trailing comment
+;Second stray comment
+
+''', (('TitleText', 'Some Title'),))
+ self.assert_('MPL' in self.parser.header)
+
+ def test_TrailingCommentsAndJunk(self):
+ self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+
+;Stray trailing comment
+Junk
+;Second stray comment
+
+''', (('TitleText', 'Some Title'), ('_junk_\\d+_231-284$', '''\
+
+;Stray trailing comment
+Junk
+;Second stray comment
+
+''')))
+ self.assert_('MPL' in self.parser.header)
+
+ def test_JunkInbetweenEntries(self):
+ self._test(mpl2 + '''
+[Strings]
+TitleText=Some Title
+
+Junk
+
+Good=other string
+''', (('TitleText', 'Some Title'), ('_junk_\\d+_231-236$', '''\
+
+Junk'''), ('Good', 'other string')))
+ self.assert_('MPL' in self.parser.header)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/compare-locales/compare_locales/tests/test_merge.py b/python/compare-locales/compare_locales/tests/test_merge.py
new file mode 100644
index 000000000..c006edbb5
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_merge.py
@@ -0,0 +1,265 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+import os
+from tempfile import mkdtemp
+import shutil
+
+from compare_locales.parser import getParser
+from compare_locales.paths import File
+from compare_locales.compare import ContentComparer
+
+
+class ContentMixin(object):
+ maxDiff = None # we got big dictionaries to compare
+ extension = None # OVERLOAD
+
+ def reference(self, content):
+ self.ref = os.path.join(self.tmp, "en-reference" + self.extension)
+ open(self.ref, "w").write(content)
+
+ def localized(self, content):
+ self.l10n = os.path.join(self.tmp, "l10n" + self.extension)
+ open(self.l10n, "w").write(content)
+
+
+class TestProperties(unittest.TestCase, ContentMixin):
+ extension = '.properties'
+
+ def setUp(self):
+ self.tmp = mkdtemp()
+ os.mkdir(os.path.join(self.tmp, "merge"))
+
+ def tearDown(self):
+ shutil.rmtree(self.tmp)
+ del self.tmp
+
+ def testGood(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""foo = fooVal
+bar = barVal
+eff = effVal""")
+ self.localized("""foo = lFoo
+bar = lBar
+eff = lEff
+""")
+ cc = ContentComparer()
+ cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+ cc.compare(File(self.ref, "en-reference.properties", ""),
+ File(self.l10n, "l10n.properties", ""))
+ self.assertDictEqual(
+ cc.observer.toJSON(),
+ {'summary':
+ {None: {
+ 'changed': 3
+ }},
+ 'details': {}
+ }
+ )
+ self.assert_(not os.path.exists(os.path.join(cc.merge_stage,
+ 'l10n.properties')))
+
+ def testMissing(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""foo = fooVal
+bar = barVal
+eff = effVal""")
+ self.localized("""bar = lBar
+""")
+ cc = ContentComparer()
+ cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+ cc.compare(File(self.ref, "en-reference.properties", ""),
+ File(self.l10n, "l10n.properties", ""))
+ self.assertDictEqual(
+ cc.observer.toJSON(),
+ {'summary':
+ {None: {
+ 'changed': 1, 'missing': 2
+ }},
+ 'details': {
+ 'children': [
+ ('l10n.properties',
+ {'value': {'missingEntity': [u'eff', u'foo']}}
+ )
+ ]}
+ }
+ )
+ mergefile = os.path.join(self.tmp, "merge", "l10n.properties")
+ self.assertTrue(os.path.isfile(mergefile))
+ p = getParser(mergefile)
+ p.readFile(mergefile)
+ [m, n] = p.parse()
+ self.assertEqual(map(lambda e: e.key, m), ["bar", "eff", "foo"])
+
+ def testError(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""foo = fooVal
+bar = %d barVal
+eff = effVal""")
+ self.localized("""bar = %S lBar
+eff = leffVal
+""")
+ cc = ContentComparer()
+ cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+ cc.compare(File(self.ref, "en-reference.properties", ""),
+ File(self.l10n, "l10n.properties", ""))
+ self.assertDictEqual(
+ cc.observer.toJSON(),
+ {'summary':
+ {None: {
+ 'changed': 2, 'errors': 1, 'missing': 1
+ }},
+ 'details': {
+ 'children': [
+ ('l10n.properties',
+ {'value': {
+ 'error': [u'argument 1 `S` should be `d` '
+ u'at line 1, column 6 for bar'],
+ 'missingEntity': [u'foo']}}
+ )
+ ]}
+ }
+ )
+ mergefile = os.path.join(self.tmp, "merge", "l10n.properties")
+ self.assertTrue(os.path.isfile(mergefile))
+ p = getParser(mergefile)
+ p.readFile(mergefile)
+ [m, n] = p.parse()
+ self.assertEqual([e.key for e in m], ["eff", "foo", "bar"])
+ self.assertEqual(m[n['bar']].val, '%d barVal')
+
+ def testObsolete(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""foo = fooVal
+eff = effVal""")
+ self.localized("""foo = fooVal
+other = obsolete
+eff = leffVal
+""")
+ cc = ContentComparer()
+ cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+ cc.compare(File(self.ref, "en-reference.properties", ""),
+ File(self.l10n, "l10n.properties", ""))
+ self.assertDictEqual(
+ cc.observer.toJSON(),
+ {'summary':
+ {None: {
+ 'changed': 1, 'obsolete': 1, 'unchanged': 1
+ }},
+ 'details': {
+ 'children': [
+ ('l10n.properties',
+ {'value': {'obsoleteEntity': [u'other']}})]},
+ }
+ )
+
+
+class TestDTD(unittest.TestCase, ContentMixin):
+ extension = '.dtd'
+
+ def setUp(self):
+ self.tmp = mkdtemp()
+ os.mkdir(os.path.join(self.tmp, "merge"))
+
+ def tearDown(self):
+ shutil.rmtree(self.tmp)
+ del self.tmp
+
+ def testGood(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""<!ENTITY foo 'fooVal'>
+<!ENTITY bar 'barVal'>
+<!ENTITY eff 'effVal'>""")
+ self.localized("""<!ENTITY foo 'lFoo'>
+<!ENTITY bar 'lBar'>
+<!ENTITY eff 'lEff'>
+""")
+ cc = ContentComparer()
+ cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+ cc.compare(File(self.ref, "en-reference.dtd", ""),
+ File(self.l10n, "l10n.dtd", ""))
+ self.assertDictEqual(
+ cc.observer.toJSON(),
+ {'summary':
+ {None: {
+ 'changed': 3
+ }},
+ 'details': {}
+ }
+ )
+ self.assert_(
+ not os.path.exists(os.path.join(cc.merge_stage, 'l10n.dtd')))
+
+ def testMissing(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""<!ENTITY foo 'fooVal'>
+<!ENTITY bar 'barVal'>
+<!ENTITY eff 'effVal'>""")
+ self.localized("""<!ENTITY bar 'lBar'>
+""")
+ cc = ContentComparer()
+ cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+ cc.compare(File(self.ref, "en-reference.dtd", ""),
+ File(self.l10n, "l10n.dtd", ""))
+ self.assertDictEqual(
+ cc.observer.toJSON(),
+ {'summary':
+ {None: {
+ 'changed': 1, 'missing': 2
+ }},
+ 'details': {
+ 'children': [
+ ('l10n.dtd',
+ {'value': {'missingEntity': [u'eff', u'foo']}}
+ )
+ ]}
+ }
+ )
+ mergefile = os.path.join(self.tmp, "merge", "l10n.dtd")
+ self.assertTrue(os.path.isfile(mergefile))
+ p = getParser(mergefile)
+ p.readFile(mergefile)
+ [m, n] = p.parse()
+ self.assertEqual(map(lambda e: e.key, m), ["bar", "eff", "foo"])
+
+ def testJunk(self):
+ self.assertTrue(os.path.isdir(self.tmp))
+ self.reference("""<!ENTITY foo 'fooVal'>
+<!ENTITY bar 'barVal'>
+<!ENTITY eff 'effVal'>""")
+ self.localized("""<!ENTITY foo 'fooVal'>
+<!ENTY bar 'gimmick'>
+<!ENTITY eff 'effVal'>
+""")
+ cc = ContentComparer()
+ cc.set_merge_stage(os.path.join(self.tmp, "merge"))
+ cc.compare(File(self.ref, "en-reference.dtd", ""),
+ File(self.l10n, "l10n.dtd", ""))
+ self.assertDictEqual(
+ cc.observer.toJSON(),
+ {'summary':
+ {None: {
+ 'errors': 1, 'missing': 1, 'unchanged': 2
+ }},
+ 'details': {
+ 'children': [
+ ('l10n.dtd',
+ {'value': {
+ 'error': [u'Unparsed content "<!ENTY bar '
+ u'\'gimmick\'>" at 23-44'],
+ 'missingEntity': [u'bar']}}
+ )
+ ]}
+ }
+ )
+ mergefile = os.path.join(self.tmp, "merge", "l10n.dtd")
+ self.assertTrue(os.path.isfile(mergefile))
+ p = getParser(mergefile)
+ p.readFile(mergefile)
+ [m, n] = p.parse()
+ self.assertEqual(map(lambda e: e.key, m), ["foo", "eff", "bar"])
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/compare-locales/compare_locales/tests/test_properties.py b/python/compare-locales/compare_locales/tests/test_properties.py
new file mode 100644
index 000000000..331a1a57c
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_properties.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+
+from compare_locales.tests import ParserTestMixin
+
+
+class TestPropertiesParser(ParserTestMixin, unittest.TestCase):
+
+ filename = 'foo.properties'
+
+ def testBackslashes(self):
+ self._test(r'''one_line = This is one line
+two_line = This is the first \
+of two lines
+one_line_trailing = This line ends in \\
+and has junk
+two_lines_triple = This line is one of two and ends in \\\
+and still has another line coming
+''', (
+ ('one_line', 'This is one line'),
+ ('two_line', u'This is the first of two lines'),
+ ('one_line_trailing', u'This line ends in \\'),
+ ('_junk_\\d+_113-126$', 'and has junk\n'),
+ ('two_lines_triple', 'This line is one of two and ends in \\'
+ 'and still has another line coming')))
+
+ def testProperties(self):
+ # port of netwerk/test/PropertiesTest.cpp
+ self.parser.readContents(self.resource('test.properties'))
+ ref = ['1', '2', '3', '4', '5', '6', '7', '8',
+ 'this is the first part of a continued line '
+ 'and here is the 2nd part']
+ i = iter(self.parser)
+ for r, e in zip(ref, i):
+ self.assertEqual(e.val, r)
+
+ def test_bug121341(self):
+ # port of xpcom/tests/unit/test_bug121341.js
+ self.parser.readContents(self.resource('bug121341.properties'))
+ ref = ['abc', 'xy', u"\u1234\t\r\n\u00AB\u0001\n",
+ "this is multiline property",
+ "this is another multiline property", u"test\u0036",
+ "yet another multiline propery", u"\ttest5\u0020", " test6\t",
+ u"c\uCDEFd", u"\uABCD"]
+ i = iter(self.parser)
+ for r, e in zip(ref, i):
+ self.assertEqual(e.val, r)
+
+ def test_comment_in_multi(self):
+ self._test(r'''bar=one line with a \
+# part that looks like a comment \
+and an end''', (('bar', 'one line with a # part that looks like a comment '
+ 'and an end'),))
+
+ def test_license_header(self):
+ self._test('''\
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+foo=value
+''', (('foo', 'value'),))
+ self.assert_('MPL' in self.parser.header)
+
+ def test_escapes(self):
+ self.parser.readContents(r'''
+# unicode escapes
+zero = some \unicode
+one = \u0
+two = \u41
+three = \u042
+four = \u0043
+five = \u0044a
+six = \a
+seven = \n\r\t\\
+''')
+ ref = ['some unicode', chr(0), 'A', 'B', 'C', 'Da', 'a', '\n\r\t\\']
+ for r, e in zip(ref, self.parser):
+ self.assertEqual(e.val, r)
+
+ def test_trailing_comment(self):
+ self._test('''first = string
+second = string
+
+#
+#commented out
+''', (('first', 'string'), ('second', 'string')))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/compare-locales/compare_locales/tests/test_util.py b/python/compare-locales/compare_locales/tests/test_util.py
new file mode 100644
index 000000000..fd2d2c92b
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_util.py
@@ -0,0 +1,29 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+
+from compare_locales import util
+
+
+class ParseLocalesTest(unittest.TestCase):
+ def test_empty(self):
+ self.assertEquals(util.parseLocales(''), [])
+
+ def test_all(self):
+ self.assertEquals(util.parseLocales('''af
+de'''), ['af', 'de'])
+
+ def test_shipped(self):
+ self.assertEquals(util.parseLocales('''af
+ja win mac
+de'''), ['af', 'de', 'ja'])
+
+ def test_sparse(self):
+ self.assertEquals(util.parseLocales('''
+af
+
+de
+
+'''), ['af', 'de'])
diff --git a/python/compare-locales/compare_locales/tests/test_webapps.py b/python/compare-locales/compare_locales/tests/test_webapps.py
new file mode 100644
index 000000000..2f1223649
--- /dev/null
+++ b/python/compare-locales/compare_locales/tests/test_webapps.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import unittest
+
+from compare_locales import webapps
+
+
+class TestFileComparison(unittest.TestCase):
+
+ def mock_FileComparison(self, mock_listdir):
+ class Target(webapps.FileComparison):
+ def _listdir(self):
+ return mock_listdir()
+ return Target('.', 'en-US')
+
+ def test_just_reference(self):
+ def _listdir():
+ return ['my_app.en-US.properties']
+ filecomp = self.mock_FileComparison(_listdir)
+ filecomp.files()
+ self.assertEqual(filecomp.locales(), [])
+ self.assertEqual(filecomp._reference.keys(), ['my_app'])
+ file_ = filecomp._reference['my_app']
+ self.assertEqual(file_.file, 'locales/my_app.en-US.properties')
+
+ def test_just_locales(self):
+ def _listdir():
+ return ['my_app.ar.properties',
+ 'my_app.sr-Latn.properties',
+ 'my_app.sv-SE.properties',
+ 'my_app.po_SI.properties']
+ filecomp = self.mock_FileComparison(_listdir)
+ filecomp.files()
+ self.assertEqual(filecomp.locales(),
+ ['ar', 'sr-Latn', 'sv-SE'])
+ self.assertEqual(filecomp._files['ar'].keys(), ['my_app'])
+ file_ = filecomp._files['ar']['my_app']
+ self.assertEqual(file_.file, 'locales/my_app.ar.properties')
diff --git a/python/compare-locales/compare_locales/util.py b/python/compare-locales/compare_locales/util.py
new file mode 100644
index 000000000..71eadd874
--- /dev/null
+++ b/python/compare-locales/compare_locales/util.py
@@ -0,0 +1,11 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This file is shared between compare-locales and locale-inspector
+# test_util is in compare-locales only, for the sake of easy
+# development.
+
+
+def parseLocales(content):
+ return sorted(l.split()[0] for l in content.splitlines() if l)
diff --git a/python/compare-locales/compare_locales/webapps.py b/python/compare-locales/compare_locales/webapps.py
new file mode 100644
index 000000000..42f5b5657
--- /dev/null
+++ b/python/compare-locales/compare_locales/webapps.py
@@ -0,0 +1,235 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''gaia-style web apps support
+
+This variant supports manifest.webapp localization as well as
+.properties files with a naming scheme of locales/foo.*.properties.
+'''
+
+from collections import defaultdict
+import json
+import os
+import os.path
+import re
+
+from compare_locales.paths import File, EnumerateDir
+from compare_locales.compare import AddRemove, ContentComparer
+
+
+class WebAppCompare(object):
+ '''For a given directory, analyze
+ /manifest.webapp
+ /locales/*.*.properties
+
+ Deduce the present locale codes.
+ '''
+ ignore_dirs = EnumerateDir.ignore_dirs
+ reference_locale = 'en-US'
+
+ def __init__(self, basedir):
+ '''Constructor
+ :param basedir: Directory of the web app to inspect
+ '''
+ self.basedir = basedir
+ self.manifest = Manifest(basedir, self.reference_locale)
+ self.files = FileComparison(basedir, self.reference_locale)
+ self.watcher = None
+
+ def compare(self, locales):
+ '''Compare the manifest.webapp and the locales/*.*.properties
+ '''
+ if not locales:
+ locales = self.locales()
+ self.manifest.compare(locales)
+ self.files.compare(locales)
+
+ def setWatcher(self, watcher):
+ self.watcher = watcher
+ self.manifest.watcher = watcher
+ self.files.watcher = watcher
+
+ def locales(self):
+ '''Inspect files on disk to find present languages.
+ :rtype: List of locales, sorted, including reference.
+ '''
+ locales = set(self.manifest.strings.keys())
+ locales.update(self.files.locales())
+ locales = list(sorted(locales))
+ return locales
+
+
+class Manifest(object):
+ '''Class that helps with parsing and inspection of manifest.webapp.
+ '''
+
+ def __init__(self, basedir, reference_locale):
+ self.file = File(os.path.join(basedir, 'manifest.webapp'),
+ 'manifest.webapp')
+ self.reference_locale = reference_locale
+ self._strings = None
+ self.watcher = None
+
+ @property
+ def strings(self):
+ if self._strings is None:
+ self._strings = self.load_and_parse()
+ return self._strings
+
+ def load_and_parse(self):
+ try:
+ manifest = json.load(open(self.file.fullpath))
+ except (ValueError, IOError), e:
+ if self.watcher:
+ self.watcher.notify('error', self.file, str(e))
+ return False
+ return self.extract_manifest_strings(manifest)
+
+ def extract_manifest_strings(self, manifest_fragment):
+ '''Extract localizable strings from a manifest dict.
+ This method is recursive, and returns a two-level dict,
+ first level being locale codes, second level being generated
+ key and localized value. Keys are generated by concatenating
+ each level in the json with a ".".
+ '''
+ rv = defaultdict(dict)
+ localizable = manifest_fragment.pop('locales', {})
+ if localizable:
+ for locale, keyvalue in localizable.iteritems():
+ for key, value in keyvalue.iteritems():
+ key = '.'.join(['locales', 'AB_CD', key])
+ rv[locale][key] = value
+ for key, sub_manifest in manifest_fragment.iteritems():
+ if not isinstance(sub_manifest, dict):
+ continue
+ subdict = self.extract_manifest_strings(sub_manifest)
+ if subdict:
+ for locale, keyvalue in subdict:
+ rv[locale].update((key + '.' + subkey, value)
+ for subkey, value
+ in keyvalue.iteritems())
+ return rv
+
+ def compare(self, locales):
+ strings = self.strings
+ if not strings:
+ return
+ # create a copy so that we can mock around with it
+ strings = strings.copy()
+ reference = strings.pop(self.reference_locale)
+ for locale in locales:
+ if locale == self.reference_locale:
+ continue
+ self.compare_strings(reference,
+ strings.get(locale, {}),
+ locale)
+
+ def compare_strings(self, reference, l10n, locale):
+ add_remove = AddRemove()
+ add_remove.set_left(sorted(reference.keys()))
+ add_remove.set_right(sorted(l10n.keys()))
+ missing = obsolete = changed = unchanged = 0
+ for op, item_or_pair in add_remove:
+ if op == 'equal':
+ if reference[item_or_pair[0]] == l10n[item_or_pair[1]]:
+ unchanged += 1
+ else:
+ changed += 1
+ else:
+ key = item_or_pair.replace('.AB_CD.',
+ '.%s.' % locale)
+ if op == 'add':
+ # obsolete entry
+ obsolete += 1
+ self.watcher.notify('obsoleteEntity', self.file, key)
+ else:
+ # missing entry
+ missing += 1
+ self.watcher.notify('missingEntity', self.file, key)
+
+
+class FileComparison(object):
+ '''Compare the locales/*.*.properties files inside a webapp.
+ '''
+ prop = re.compile('(?P<base>.*)\\.'
+ '(?P<locale>[a-zA-Z]+(?:-[a-zA-Z]+)*)'
+ '\\.properties$')
+
+ def __init__(self, basedir, reference_locale):
+ self.basedir = basedir
+ self.reference_locale = reference_locale
+ self.watcher = None
+ self._reference = self._files = None
+
+ def locales(self):
+ '''Get the locales present in the webapp
+ '''
+ self.files()
+ locales = self._files.keys()
+ locales.sort()
+ return locales
+
+ def compare(self, locales):
+ self.files()
+ for locale in locales:
+ l10n = self._files[locale]
+ filecmp = AddRemove()
+ filecmp.set_left(sorted(self._reference.keys()))
+ filecmp.set_right(sorted(l10n.keys()))
+ for op, item_or_pair in filecmp:
+ if op == 'equal':
+ self.watcher.compare(self._reference[item_or_pair[0]],
+ l10n[item_or_pair[1]])
+ elif op == 'add':
+ # obsolete file
+ self.watcher.remove(l10n[item_or_pair])
+ else:
+ # missing file
+ _path = '.'.join([item_or_pair, locale, 'properties'])
+ missingFile = File(
+ os.path.join(self.basedir, 'locales', _path),
+ 'locales/' + _path)
+ self.watcher.add(self._reference[item_or_pair],
+ missingFile)
+
+ def files(self):
+ '''Read the list of locales from disk.
+ '''
+ if self._reference:
+ return
+ self._reference = {}
+ self._files = defaultdict(dict)
+ path_list = self._listdir()
+ for path in path_list:
+ match = self.prop.match(path)
+ if match is None:
+ continue
+ locale = match.group('locale')
+ if locale == self.reference_locale:
+ target = self._reference
+ else:
+ target = self._files[locale]
+ fullpath = os.path.join(self.basedir, 'locales', path)
+ target[match.group('base')] = File(fullpath, 'locales/' + path)
+
+ def _listdir(self):
+ 'Monkey-patch this for testing.'
+ return os.listdir(os.path.join(self.basedir, 'locales'))
+
+
+def compare_web_app(basedir, locales, other_observer=None):
+ '''Compare gaia-style web app.
+
+ Optional arguments are:
+ - other_observer. A object implementing
+ notify(category, _file, data)
+ The return values of that callback are ignored.
+ '''
+ comparer = ContentComparer()
+ if other_observer is not None:
+ comparer.add_observer(other_observer)
+ webapp_comp = WebAppCompare(basedir)
+ webapp_comp.setWatcher(comparer)
+ webapp_comp.compare(locales)
+ return comparer.observer
diff --git a/python/compare-locales/docs/glossary.rst b/python/compare-locales/docs/glossary.rst
new file mode 100644
index 000000000..e89839b16
--- /dev/null
+++ b/python/compare-locales/docs/glossary.rst
@@ -0,0 +1,26 @@
+========
+Glossary
+========
+
+.. glossary::
+ :sorted:
+
+ Localization
+ The process of creating content in a native language, including
+ translation, but also customizations like Search.
+
+ Localizability
+ Enabling a piece of software to be localized. This is mostly
+ externalizing English strings, and writing build support to
+ pick up localized search engines etc.
+
+ L10n
+ *Numeronym* for Localization, *L*, 10 chars, *n*
+
+ L12y
+ Numeronym for Localizability
+
+ l10n-merge
+ nick-name for the process of merging ``en-US`` and a particular
+ localization into one joint artifact without any missing strings, and
+ without technical errors, as far as possible.
diff --git a/python/compare-locales/docs/index.rst b/python/compare-locales/docs/index.rst
new file mode 100644
index 000000000..925ca0f88
--- /dev/null
+++ b/python/compare-locales/docs/index.rst
@@ -0,0 +1,191 @@
+============
+Localization
+============
+
+.. toctree::
+ :maxdepth: 1
+
+ glossary
+
+The documentation here is targeted at developers, writing localizable code
+for Firefox and Firefox for Android, as well as Thunderbird and SeaMonkey.
+
+If you haven't dealt with localization in gecko code before, it's a good
+idea to check the :doc:`./glossary` for what localization is, and which terms
+we use for what.
+
+Exposing strings
+----------------
+
+Localizers only handle a few file formats in well-known locations in the
+source tree.
+
+The locations are in directories like
+
+ :file:`browser/`\ ``locales/en-US/``\ :file:`subdir/file.ext`
+
+The first thing to note is that only files beneath :file:`locales/en-US` are
+exposed to localizers. The second thing to note is that only a few directories
+are exposed. Which directories are exposed is defined in files called
+``l10n.ini``, which are at a
+`few places <https://dxr.mozilla.org/mozilla-central/search?q=path%3Al10n.ini&redirect=true>`_
+in the source code.
+
+An example looks like this
+
+.. code-block:: ini
+
+ [general]
+ depth = ../..
+
+ [compare]
+ dirs = browser
+ browser/branding/official
+
+ [includes]
+ toolkit = toolkit/locales/l10n.ini
+
+This tells the l10n infrastructure three things: Resolve the paths against the
+directory two levels up, include files in :file:`browser/locales/en-US` and
+:file:`browser/branding/official/locales/en-US`, and load more data from
+:file:`toolkit/locales/l10n.ini`.
+
+For projects like Thunderbird and SeaMonkey in ``comm-central``, additional
+data needs to be provided when including an ``l10n.ini`` from a different
+repository:
+
+.. code-block:: ini
+
+ [include_toolkit]
+ type = hg
+ mozilla = mozilla-central
+ repo = http://hg.mozilla.org/
+ l10n.ini = toolkit/locales/l10n.ini
+
+This tells the l10n pieces where to find the repository, and where inside
+that repository the ``l10n.ini`` file is. This is needed because for local
+builds, :file:`mail/locales/l10n.ini` references
+:file:`mozilla/toolkit/locales/l10n.ini`, which is where the comm-central
+build setup expects toolkit to be.
+
+Now that the directories exposed to l10n are known, we can talk about the
+supported file formats.
+
+File formats
+------------
+
+This is just a quick overview, please check the
+`XUL Tutorial <https://developer.mozilla.org/docs/Mozilla/Tech/XUL/Tutorial/Localization>`_
+for an in-depth tour.
+
+The following file formats are known to the l10n tool chains:
+
+DTD
+ Used in XUL and XHTML. Also for Android native strings.
+Properties
+ Used from JavaScript and C++. When used from js, also comes with
+ `plural support <https://developer.mozilla.org/docs/Mozilla/Localization/Localization_and_Plurals>`_.
+ini
+ Used by the crashreporter and updater, avoid if possible.
+foo.defines
+ Used during builds, for example to create file:`install.rdf` for
+ language packs.
+
+Adding new formats involves changing various different tools, and is strongly
+discouraged.
+
+Exceptions
+----------
+Generally, anything that exists in ``en-US`` needs a one-to-one mapping in
+all localizations. There are a few cases where that's not wanted, notably
+around search settings and spell-checking dictionaries.
+
+To enable tools to adjust to those exceptions, there's a python-coded
+:py:mod:`filter.py`, implementing :py:func:`test`, with the following
+signature
+
+.. code-block:: python
+
+ def test(mod, path, entity = None):
+ if does_not_matter:
+ return "ignore"
+ if show_but_do_not_merge:
+ return "report"
+ # default behavior, localizer or build need to do something
+ return "error"
+
+For any missing file, this function is called with ``mod`` being
+the *module*, and ``path`` being the relative path inside
+:file:`locales/en-US`. The module is the top-level dir as referenced in
+:file:`l10n.ini`.
+
+For missing strings, the :py:data:`entity` parameter is the key of the string
+in the en-US file.
+
+l10n-merge
+----------
+
+Gecko doesn't support fallback from a localization to ``en-US`` at runtime.
+Thus, the build needs to ensure that the localization as it's built into
+the package has all required strings, and that the strings don't contain
+errors. To ensure that, we're *merging* the localization and ``en-US``
+at build time, nick-named :term:`l10n-merge`.
+
+The process is usually triggered via
+
+.. code-block:: bash
+
+ $obj-dir/browser/locales> make merge-de LOCALE_MERGEDIR=$PWD/merge-de
+
+It creates another directory in the object dir, :file:`merge-ab-CD`, in
+which the modified files are stored. The actual repackaging process looks for
+the localized files in the merge dir first, then the localized file, and then
+in ``en-US``. Thus, for the ``de`` localization of
+:file:`browser/locales/en-US/chrome/browser/browser.dtd`, it checks
+
+1. :file:`$objdir/browser/locales/merge-de/browser/chrome/browser/browser.dtd`
+2. :file:`$(LOCALE_BASEDIR)/de/browser/chrome/browser/browser.dtd`
+3. :file:`browser/locales/en-US/chrome/browser/browser.dtd`
+
+and will include the first of those files it finds.
+
+l10n-merge modifies a file if it supports the particular file type, and there
+are missing strings which are not filtered out, or if an existing string
+shows an error. See the Checks section below for details.
+
+Checks
+------
+
+As part of the build and other localization tool chains, we run a variety
+of source-based checks. Think of them as linters.
+
+The suite of checks is usually determined by file type, i.e., there's a
+suite of checks for DTD files and one for properties files, etc. An exception
+are Android-specific checks.
+
+Android
+^^^^^^^
+
+For Android, we need to localize :file:`strings.xml`. We're doing so via DTD
+files, which is mostly OK. But the strings inside the XML file have to
+satisfy additional constraints about quotes etc, that are not part of XML.
+There's probably some historic background on why things are the way they are.
+
+The Android-specific checks are enabled for DTD files that are in
+:file:`mobile/android/base/locales/en-US/`.
+
+Localizations
+-------------
+
+Now that we talked in-depth about how to expose content to localizers,
+where are the localizations?
+
+We host a mercurial repository per locale and per branch. Most of our
+localizations only work starting with aurora, so the bulk of the localizations
+is found on https://hg.mozilla.org/releases/l10n/mozilla-aurora/. We have
+several localizations continuously working with mozilla-central, those
+repositories are on https://hg.mozilla.org/l10n-central/.
+
+You can search inside our localized files on
+`Transvision <https://transvision.mozfr.org/>`_ and
+http://dxr.mozilla.org/l10n-mozilla-aurora/.
diff --git a/python/compare-locales/mach_commands.py b/python/compare-locales/mach_commands.py
new file mode 100644
index 000000000..7be6a50e7
--- /dev/null
+++ b/python/compare-locales/mach_commands.py
@@ -0,0 +1,81 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this,
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+from mach.decorators import (
+ CommandArgument,
+ CommandProvider,
+ Command,
+)
+
+from mozbuild.base import (
+ MachCommandBase,
+)
+
+import mozpack.path as mozpath
+
+
+MERGE_HELP = '''Directory to merge to. Will be removed to before running
+the comparison. Default: $(OBJDIR)/($MOZ_BUILD_APP)/locales/merge-$(AB_CD)
+'''.lstrip()
+
+
+@CommandProvider
+class CompareLocales(MachCommandBase):
+ """Run compare-locales."""
+
+ @Command('compare-locales', category='testing',
+ description='Run source checks on a localization.')
+ @CommandArgument('--l10n-ini',
+ help='l10n.ini describing the app. ' +
+ 'Default: $(MOZ_BUILD_APP)/locales/l10n.ini')
+ @CommandArgument('--l10n-base',
+ help='Directory with the localizations. ' +
+ 'Default: $(L10NBASEDIR)')
+ @CommandArgument('--merge-dir',
+ help=MERGE_HELP)
+ @CommandArgument('locales', nargs='+', metavar='ab_CD',
+ help='Locale codes to compare')
+ def compare(self, l10n_ini=None, l10n_base=None, merge_dir=None,
+ locales=None):
+ from compare_locales.paths import EnumerateApp
+ from compare_locales.compare import compareApp
+
+ # check if we're configured and use defaults from there
+ # otherwise, error early
+ try:
+ self.substs # explicitly check
+ if not l10n_ini:
+ l10n_ini = mozpath.join(
+ self.topsrcdir,
+ self.substs['MOZ_BUILD_APP'],
+ 'locales', 'l10n.ini'
+ )
+ if not l10n_base:
+ l10n_base = mozpath.join(
+ self.topsrcdir,
+ self.substs['L10NBASEDIR']
+ )
+ except Exception:
+ if not l10n_ini or not l10n_base:
+ print('Specify --l10n-ini and --l10n-base or run configure.')
+ return 1
+
+ if not merge_dir:
+ try:
+ # self.substs is raising an Exception if we're not configured
+ # don't merge if we're not
+ merge_dir = mozpath.join(
+ self.topobjdir,
+ self.substs['MOZ_BUILD_APP'],
+ 'locales', 'merge-dir-{ab_CD}'
+ )
+ except Exception:
+ pass
+
+ app = EnumerateApp(l10n_ini, l10n_base, locales)
+ observer = compareApp(app, merge_stage=merge_dir,
+ clobber=True)
+ print(observer.serialize())
diff --git a/python/compare-locales/moz.build b/python/compare-locales/moz.build
new file mode 100644
index 000000000..f772ab620
--- /dev/null
+++ b/python/compare-locales/moz.build
@@ -0,0 +1,16 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files('compare_locales/**'):
+ BUG_COMPONENT = ('Localization Infrastructure and Tools', 'compare-locales')
+with Files('docs/**'):
+ BUG_COMPONENT = ('Mozilla Localizations', 'Documentation')
+
+# SPHINX_PYTHON_PACKAGE_DIRS += [
+# 'compare_locales',
+# ]
+
+SPHINX_TREES['.'] = 'docs'