diff options
Diffstat (limited to 'python/compare-locales/compare_locales')
20 files changed, 3689 insertions, 0 deletions
diff --git a/python/compare-locales/compare_locales/__init__.py b/python/compare-locales/compare_locales/__init__.py new file mode 100644 index 000000000..bad265e4f --- /dev/null +++ b/python/compare-locales/compare_locales/__init__.py @@ -0,0 +1 @@ +version = "1.1" diff --git a/python/compare-locales/compare_locales/checks.py b/python/compare-locales/compare_locales/checks.py new file mode 100644 index 000000000..ee3bef03d --- /dev/null +++ b/python/compare-locales/compare_locales/checks.py @@ -0,0 +1,438 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re +from difflib import SequenceMatcher +from xml import sax +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO + +from compare_locales.parser import DTDParser, PropertiesParser + + +class Checker(object): + '''Abstract class to implement checks per file type. + ''' + pattern = None + + @classmethod + def use(cls, file): + return cls.pattern.match(file.file) + + def check(self, refEnt, l10nEnt): + '''Given the reference and localized Entities, performs checks. + + This is a generator yielding tuples of + - "warning" or "error", depending on what should be reported, + - tuple of line, column info for the error within the string + - description string to be shown in the report + ''' + if True: + raise NotImplementedError("Need to subclass") + yield ("error", (0, 0), "This is an example error", "example") + + +class PrintfException(Exception): + def __init__(self, msg, pos): + self.pos = pos + self.msg = msg + + +class PropertiesChecker(Checker): + '''Tests to run on .properties files. + ''' + pattern = re.compile('.*\.properties$') + printf = re.compile(r'%(?P<good>%|' + r'(?:(?P<number>[1-9][0-9]*)\$)?' + r'(?P<width>\*|[0-9]+)?' + r'(?P<prec>\.(?:\*|[0-9]+)?)?' + r'(?P<spec>[duxXosScpfg]))?') + + def check(self, refEnt, l10nEnt): + '''Test for the different variable formats. + ''' + refValue, l10nValue = refEnt.val, l10nEnt.val + refSpecs = None + # check for PluralForm.jsm stuff, should have the docs in the + # comment + if 'Localization_and_Plurals' in refEnt.pre_comment: + # For plurals, common variable pattern is #1. Try that. + pats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)', + refValue)) + if len(pats) == 0: + return + lpats = set(int(m.group(1)) for m in re.finditer('#([0-9]+)', + l10nValue)) + if pats - lpats: + yield ('warning', 0, 'not all variables used in l10n', + 'plural') + return + if lpats - pats: + yield ('error', 0, 'unreplaced variables in l10n', + 'plural') + return + return + # check for lost escapes + raw_val = l10nEnt.raw_val + for m in PropertiesParser.escape.finditer(raw_val): + if m.group('single') and \ + m.group('single') not in PropertiesParser.known_escapes: + yield ('warning', m.start(), + 'unknown escape sequence, \\' + m.group('single'), + 'escape') + try: + refSpecs = self.getPrintfSpecs(refValue) + except PrintfException: + refSpecs = [] + if refSpecs: + for t in self.checkPrintf(refSpecs, l10nValue): + yield t + return + + def checkPrintf(self, refSpecs, l10nValue): + try: + l10nSpecs = self.getPrintfSpecs(l10nValue) + except PrintfException, e: + yield ('error', e.pos, e.msg, 'printf') + return + if refSpecs != l10nSpecs: + sm = SequenceMatcher() + sm.set_seqs(refSpecs, l10nSpecs) + msgs = [] + warn = None + for action, i1, i2, j1, j2 in sm.get_opcodes(): + if action == 'equal': + continue + if action == 'delete': + # missing argument in l10n + if i2 == len(refSpecs): + # trailing specs missing, that's just a warning + warn = ', '.join('trailing argument %d `%s` missing' % + (i+1, refSpecs[i]) + for i in xrange(i1, i2)) + else: + for i in xrange(i1, i2): + msgs.append('argument %d `%s` missing' % + (i+1, refSpecs[i])) + continue + if action == 'insert': + # obsolete argument in l10n + for i in xrange(j1, j2): + msgs.append('argument %d `%s` obsolete' % + (i+1, l10nSpecs[i])) + continue + if action == 'replace': + for i, j in zip(xrange(i1, i2), xrange(j1, j2)): + msgs.append('argument %d `%s` should be `%s`' % + (j+1, l10nSpecs[j], refSpecs[i])) + if msgs: + yield ('error', 0, ', '.join(msgs), 'printf') + if warn is not None: + yield ('warning', 0, warn, 'printf') + + def getPrintfSpecs(self, val): + hasNumber = False + specs = [] + for m in self.printf.finditer(val): + if m.group("good") is None: + # found just a '%', signal an error + raise PrintfException('Found single %', m.start()) + if m.group("good") == '%': + # escaped % + continue + if ((hasNumber and m.group('number') is None) or + (not hasNumber and specs and + m.group('number') is not None)): + # mixed style, numbered and not + raise PrintfException('Mixed ordered and non-ordered args', + m.start()) + hasNumber = m.group('number') is not None + if hasNumber: + pos = int(m.group('number')) - 1 + ls = len(specs) + if pos >= ls: + # pad specs + nones = pos - ls + specs[ls:pos] = nones*[None] + specs.append(m.group('spec')) + else: + if specs[pos] is not None: + raise PrintfException('Double ordered argument %d' % + (pos+1), + m.start()) + specs[pos] = m.group('spec') + else: + specs.append(m.group('spec')) + # check for missing args + if hasNumber and not all(specs): + raise PrintfException('Ordered argument missing', 0) + return specs + + +class DTDChecker(Checker): + """Tests to run on DTD files. + + Uses xml.sax for the heavy lifting of xml parsing. + + The code tries to parse until it doesn't find any unresolved entities + anymore. If it finds one, it tries to grab the key, and adds an empty + <!ENTITY key ""> definition to the header. + + Also checks for some CSS and number heuristics in the values. + """ + pattern = re.compile('.*\.dtd$') + + eref = re.compile('&(%s);' % DTDParser.Name) + tmpl = '''<!DOCTYPE elem [%s]> +<elem>%s</elem> +''' + xmllist = set(('amp', 'lt', 'gt', 'apos', 'quot')) + + def __init__(self, reference): + self.reference = reference + self.__known_entities = None + + def known_entities(self, refValue): + if self.__known_entities is None and self.reference is not None: + self.__known_entities = set() + for ent in self.reference: + self.__known_entities.update(self.entities_for_value(ent.val)) + return self.__known_entities if self.__known_entities is not None \ + else self.entities_for_value(refValue) + + def entities_for_value(self, value): + reflist = set(m.group(1).encode('utf-8') + for m in self.eref.finditer(value)) + reflist -= self.xmllist + return reflist + + # Setup for XML parser, with default and text-only content handler + class TextContent(sax.handler.ContentHandler): + textcontent = '' + + def characters(self, content): + self.textcontent += content + + defaulthandler = sax.handler.ContentHandler() + texthandler = TextContent() + + numPattern = r'([0-9]+|[0-9]*\.[0-9]+)' + num = re.compile('^%s$' % numPattern) + lengthPattern = '%s(em|px|ch|cm|in)' % numPattern + length = re.compile('^%s$' % lengthPattern) + spec = re.compile(r'((?:min\-)?(?:width|height))\s*:\s*%s' % + lengthPattern) + style = re.compile(r'^%(spec)s\s*(;\s*%(spec)s\s*)*;?$' % + {'spec': spec.pattern}) + + processContent = None + + def check(self, refEnt, l10nEnt): + """Try to parse the refvalue inside a dummy element, and keep + track of entities that we need to define to make that work. + + Return a checker that offers just those entities. + """ + refValue, l10nValue = refEnt.val, l10nEnt.val + # find entities the refValue references, + # reusing markup from DTDParser. + reflist = self.known_entities(refValue) + inContext = self.entities_for_value(refValue) + entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist)) + parser = sax.make_parser() + parser.setFeature(sax.handler.feature_external_ges, False) + + parser.setContentHandler(self.defaulthandler) + try: + parser.parse(StringIO(self.tmpl % + (entities, refValue.encode('utf-8')))) + # also catch stray % + parser.parse(StringIO(self.tmpl % + (refEnt.all.encode('utf-8') + entities, + '&%s;' % refEnt.key.encode('utf-8')))) + except sax.SAXParseException, e: + yield ('warning', + (0, 0), + "can't parse en-US value", 'xmlparse') + + # find entities the l10nValue references, + # reusing markup from DTDParser. + l10nlist = self.entities_for_value(l10nValue) + missing = sorted(l10nlist - reflist) + _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing) + if self.processContent is not None: + self.texthandler.textcontent = '' + parser.setContentHandler(self.texthandler) + try: + parser.parse(StringIO(self.tmpl % (_entities, + l10nValue.encode('utf-8')))) + # also catch stray % + # if this fails, we need to substract the entity definition + parser.setContentHandler(self.defaulthandler) + parser.parse(StringIO(self.tmpl % ( + l10nEnt.all.encode('utf-8') + _entities, + '&%s;' % l10nEnt.key.encode('utf-8')))) + except sax.SAXParseException, e: + # xml parse error, yield error + # sometimes, the error is reported on our fake closing + # element, make that the end of the last line + lnr = e.getLineNumber() - 1 + lines = l10nValue.splitlines() + if lnr > len(lines): + lnr = len(lines) + col = len(lines[lnr-1]) + else: + col = e.getColumnNumber() + if lnr == 1: + # first line starts with <elem>, substract + col -= len("<elem>") + elif lnr == 0: + col -= len("<!DOCTYPE elem [") # first line is DOCTYPE + yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse') + + warntmpl = u'Referencing unknown entity `%s`' + if reflist: + if inContext: + elsewhere = reflist - inContext + warntmpl += ' (%s used in context' % \ + ', '.join(sorted(inContext)) + if elsewhere: + warntmpl += ', %s known)' % ', '.join(sorted(elsewhere)) + else: + warntmpl += ')' + else: + warntmpl += ' (%s known)' % ', '.join(sorted(reflist)) + for key in missing: + yield ('warning', (0, 0), warntmpl % key.decode('utf-8'), + 'xmlparse') + if inContext and l10nlist and l10nlist - inContext - set(missing): + mismatch = sorted(l10nlist - inContext - set(missing)) + for key in mismatch: + yield ('warning', (0, 0), + 'Entity %s referenced, but %s used in context' % ( + key.decode('utf-8'), + ', '.join(sorted(inContext)) + ), 'xmlparse') + + # Number check + if self.num.match(refValue) and not self.num.match(l10nValue): + yield ('warning', 0, 'reference is a number', 'number') + # CSS checks + # just a length, width="100em" + if self.length.match(refValue) and not self.length.match(l10nValue): + yield ('error', 0, 'reference is a CSS length', 'css') + # real CSS spec, style="width:100px;" + if self.style.match(refValue): + if not self.style.match(l10nValue): + yield ('error', 0, 'reference is a CSS spec', 'css') + else: + # warn if different properties or units + refMap = dict((s, u) for s, _, u in + self.spec.findall(refValue)) + msgs = [] + for s, _, u in self.spec.findall(l10nValue): + if s not in refMap: + msgs.insert(0, '%s only in l10n' % s) + continue + else: + ru = refMap.pop(s) + if u != ru: + msgs.append("units for %s don't match " + "(%s != %s)" % (s, u, ru)) + for s in refMap.iterkeys(): + msgs.insert(0, '%s only in reference' % s) + if msgs: + yield ('warning', 0, ', '.join(msgs), 'css') + + if self.processContent is not None: + for t in self.processContent(self.texthandler.textcontent): + yield t + + +class PrincessAndroid(DTDChecker): + """Checker for the string values that Android puts into an XML container. + + http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling # noqa + has more info. Check for unescaped apostrophes and bad unicode escapes. + """ + quoted = re.compile("(?P<q>[\"']).*(?P=q)$") + + def unicode_escape(self, str): + """Helper method to try to decode all unicode escapes in a string. + + This code uses the standard python decode for unicode-escape, but + that's somewhat tricky, as its input needs to be ascii. To get to + ascii, the unicode string gets converted to ascii with + backslashreplace, i.e., all non-ascii unicode chars get unicode + escaped. And then we try to roll all of that back. + Now, when that hits an error, that's from the original string, and we + need to search for the actual error position in the original string, + as the backslashreplace code changes string positions quite badly. + See also the last check in TestAndroid.test_android_dtd, with a + lengthy chinese string. + """ + val = str.encode('ascii', 'backslashreplace') + try: + val.decode('unicode-escape') + except UnicodeDecodeError, e: + args = list(e.args) + badstring = args[1][args[2]:args[3]] + i = len(args[1][:args[2]].decode('unicode-escape')) + args[2] = i + args[3] = i + len(badstring) + raise UnicodeDecodeError(*args) + + @classmethod + def use(cls, file): + """Use this Checker only for DTD files in embedding/android.""" + return (file.module in ("embedding/android", + "mobile/android/base") and + cls.pattern.match(file.file)) + + def processContent(self, val): + """Actual check code. + Check for unicode escapes and unescaped quotes and apostrophes, + if string's not quoted. + """ + # first, try to decode unicode escapes + try: + self.unicode_escape(val) + except UnicodeDecodeError, e: + yield ('error', e.args[2], e.args[4], 'android') + # check for unescaped single or double quotes. + # first, see if the complete string is single or double quoted, + # that changes the rules + m = self.quoted.match(val) + if m: + q = m.group('q') + offset = 0 + val = val[1:-1] # strip quotes + else: + q = "[\"']" + offset = -1 + stray_quot = re.compile(r"[\\\\]*(%s)" % q) + + for m in stray_quot.finditer(val): + if len(m.group(0)) % 2: + # found an unescaped single or double quote, which message? + if m.group(1) == '"': + msg = u"Quotes in Android DTDs need escaping with \\\" "\ + u"or \\u0022, or put string in apostrophes." + else: + msg = u"Apostrophes in Android DTDs need escaping with "\ + u"\\' or \\u0027, or use \u2019, or put string in "\ + u"quotes." + yield ('error', m.end(0)+offset, msg, 'android') + + +def getChecker(file, reference=None): + if PropertiesChecker.use(file): + return PropertiesChecker() + if PrincessAndroid.use(file): + return PrincessAndroid(reference) + if DTDChecker.use(file): + return DTDChecker(reference) + return None diff --git a/python/compare-locales/compare_locales/commands.py b/python/compare-locales/compare_locales/commands.py new file mode 100644 index 000000000..61b58ec4b --- /dev/null +++ b/python/compare-locales/compare_locales/commands.py @@ -0,0 +1,154 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Commands exposed to commandlines' + +import logging +from optparse import OptionParser, make_option + +from compare_locales.paths import EnumerateApp +from compare_locales.compare import compareApp, compareDirs +from compare_locales.webapps import compare_web_app + + +class BaseCommand(object): + """Base class for compare-locales commands. + This handles command line parsing, and general sugar for setuptools + entry_points. + """ + options = [ + make_option('-v', '--verbose', action='count', dest='v', default=0, + help='Make more noise'), + make_option('-q', '--quiet', action='count', dest='q', default=0, + help='Make less noise'), + make_option('-m', '--merge', + help='''Use this directory to stage merged files, +use {ab_CD} to specify a different directory for each locale'''), + ] + data_option = make_option('--data', choices=['text', 'exhibit', 'json'], + default='text', + help='''Choose data and format (one of text, +exhibit, json); text: (default) Show which files miss which strings, together +with warnings and errors. Also prints a summary; json: Serialize the internal +tree, useful for tools. Also always succeeds; exhibit: Serialize the summary +data in a json useful for Exhibit +''') + + def __init__(self): + self.parser = None + + def get_parser(self): + """Get an OptionParser, with class docstring as usage, and + self.options. + """ + parser = OptionParser() + parser.set_usage(self.__doc__) + for option in self.options: + parser.add_option(option) + return parser + + @classmethod + def call(cls): + """Entry_point for setuptools. + The actual command handling is done in the handle() method of the + subclasses. + """ + cmd = cls() + cmd.handle_() + + def handle_(self): + """The instance part of the classmethod call.""" + self.parser = self.get_parser() + (options, args) = self.parser.parse_args() + # log as verbose or quiet as we want, warn by default + logging.basicConfig() + logging.getLogger().setLevel(logging.WARNING - + (options.v - options.q)*10) + observer = self.handle(args, options) + print observer.serialize(type=options.data).encode('utf-8', 'replace') + + def handle(self, args, options): + """Subclasses need to implement this method for the actual + command handling. + """ + raise NotImplementedError + + +class CompareLocales(BaseCommand): + """usage: %prog [options] l10n.ini l10n_base_dir [locale ...] + +Check the localization status of a gecko application. +The first argument is a path to the l10n.ini file for the application, +followed by the base directory of the localization repositories. +Then you pass in the list of locale codes you want to compare. If there are +not locales given, the list of locales will be taken from the all-locales file +of the application\'s l10n.ini.""" + + options = BaseCommand.options + [ + make_option('--clobber-merge', action="store_true", default=False, + dest='clobber', + help="""WARNING: DATALOSS. +Use this option with care. If specified, the merge directory will +be clobbered for each module. That means, the subdirectory will +be completely removed, any files that were there are lost. +Be careful to specify the right merge directory when using this option."""), + make_option('-r', '--reference', default='en-US', dest='reference', + help='Explicitly set the reference ' + 'localization. [default: en-US]'), + BaseCommand.data_option + ] + + def handle(self, args, options): + if len(args) < 2: + self.parser.error('Need to pass in list of languages') + inipath, l10nbase = args[:2] + locales = args[2:] + app = EnumerateApp(inipath, l10nbase, locales) + app.reference = options.reference + try: + observer = compareApp(app, merge_stage=options.merge, + clobber=options.clobber) + except (OSError, IOError), exc: + print "FAIL: " + str(exc) + self.parser.exit(2) + return observer + + +class CompareDirs(BaseCommand): + """usage: %prog [options] reference localization + +Check the localization status of a directory tree. +The first argument is a path to the reference data,the second is the +localization to be tested.""" + + options = BaseCommand.options + [ + BaseCommand.data_option + ] + + def handle(self, args, options): + if len(args) != 2: + self.parser.error('Reference and localizatino required') + reference, locale = args + observer = compareDirs(reference, locale, merge_stage=options.merge) + return observer + + +class CompareWebApp(BaseCommand): + """usage: %prog [options] webapp [locale locale] + +Check the localization status of a gaia-style web app. +The first argument is the directory of the web app. +Following arguments explicitly state the locales to test. +If none are given, test all locales in manifest.webapp or files.""" + + options = BaseCommand.options[:-1] + [ + BaseCommand.data_option] + + def handle(self, args, options): + if len(args) < 1: + self.parser.error('Webapp directory required') + basedir = args[0] + locales = args[1:] + observer = compare_web_app(basedir, locales) + return observer diff --git a/python/compare-locales/compare_locales/compare.py b/python/compare-locales/compare_locales/compare.py new file mode 100644 index 000000000..4f71c46f8 --- /dev/null +++ b/python/compare-locales/compare_locales/compare.py @@ -0,0 +1,638 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +import codecs +import os +import os.path +import shutil +import re +from difflib import SequenceMatcher +from collections import defaultdict + +try: + from json import dumps +except: + from simplejson import dumps + +from compare_locales import parser +from compare_locales import paths +from compare_locales.checks import getChecker + + +class Tree(object): + def __init__(self, valuetype): + self.branches = dict() + self.valuetype = valuetype + self.value = None + + def __getitem__(self, leaf): + parts = [] + if isinstance(leaf, paths.File): + parts = [p for p in [leaf.locale, leaf.module] if p] + \ + leaf.file.split('/') + else: + parts = leaf.split('/') + return self.__get(parts) + + def __get(self, parts): + common = None + old = None + new = tuple(parts) + t = self + for k, v in self.branches.iteritems(): + for i, part in enumerate(zip(k, parts)): + if part[0] != part[1]: + i -= 1 + break + if i < 0: + continue + i += 1 + common = tuple(k[:i]) + old = tuple(k[i:]) + new = tuple(parts[i:]) + break + if old: + self.branches.pop(k) + t = Tree(self.valuetype) + t.branches[old] = v + self.branches[common] = t + elif common: + t = self.branches[common] + if new: + if common: + return t.__get(new) + t2 = t + t = Tree(self.valuetype) + t2.branches[new] = t + if t.value is None: + t.value = t.valuetype() + return t.value + + indent = ' ' + + def getContent(self, depth=0): + ''' + Returns iterator of (depth, flag, key_or_value) tuples. + If flag is 'value', key_or_value is a value object, otherwise + (flag is 'key') it's a key string. + ''' + keys = self.branches.keys() + keys.sort() + if self.value is not None: + yield (depth, 'value', self.value) + for key in keys: + yield (depth, 'key', key) + for child in self.branches[key].getContent(depth + 1): + yield child + + def toJSON(self): + ''' + Returns this Tree as a JSON-able tree of hashes. + Only the values need to take care that they're JSON-able. + ''' + json = {} + keys = self.branches.keys() + keys.sort() + if self.value is not None: + json['value'] = self.value + children = [('/'.join(key), self.branches[key].toJSON()) + for key in keys] + if children: + json['children'] = children + return json + + def getStrRows(self): + def tostr(t): + if t[1] == 'key': + return self.indent * t[0] + '/'.join(t[2]) + return self.indent * (t[0] + 1) + str(t[2]) + + return map(tostr, self.getContent()) + + def __str__(self): + return '\n'.join(self.getStrRows()) + + +class AddRemove(SequenceMatcher): + def __init__(self): + SequenceMatcher.__init__(self, None, None, None) + + def set_left(self, left): + if not isinstance(left, list): + left = [l for l in left] + self.set_seq1(left) + + def set_right(self, right): + if not isinstance(right, list): + right = [l for l in right] + self.set_seq2(right) + + def __iter__(self): + for tag, i1, i2, j1, j2 in self.get_opcodes(): + if tag == 'equal': + for pair in zip(self.a[i1:i2], self.b[j1:j2]): + yield ('equal', pair) + elif tag == 'delete': + for item in self.a[i1:i2]: + yield ('delete', item) + elif tag == 'insert': + for item in self.b[j1:j2]: + yield ('add', item) + else: + # tag == 'replace' + for item in self.a[i1:i2]: + yield ('delete', item) + for item in self.b[j1:j2]: + yield ('add', item) + + +class DirectoryCompare(SequenceMatcher): + def __init__(self, reference): + SequenceMatcher.__init__(self, None, [i for i in reference], + []) + self.watcher = None + + def setWatcher(self, watcher): + self.watcher = watcher + + def compareWith(self, other): + if not self.watcher: + return + self.set_seq2([i for i in other]) + for tag, i1, i2, j1, j2 in self.get_opcodes(): + if tag == 'equal': + for i, j in zip(xrange(i1, i2), xrange(j1, j2)): + self.watcher.compare(self.a[i], self.b[j]) + elif tag == 'delete': + for i in xrange(i1, i2): + self.watcher.add(self.a[i], other.cloneFile(self.a[i])) + elif tag == 'insert': + for j in xrange(j1, j2): + self.watcher.remove(self.b[j]) + else: + for j in xrange(j1, j2): + self.watcher.remove(self.b[j]) + for i in xrange(i1, i2): + self.watcher.add(self.a[i], other.cloneFile(self.a[i])) + + +class Observer(object): + stat_cats = ['missing', 'obsolete', 'missingInFiles', 'report', + 'changed', 'unchanged', 'keys'] + + def __init__(self): + class intdict(defaultdict): + def __init__(self): + defaultdict.__init__(self, int) + + self.summary = defaultdict(intdict) + self.details = Tree(dict) + self.filter = None + + # support pickling + def __getstate__(self): + return dict(summary=self.getSummary(), details=self.details) + + def __setstate__(self, state): + class intdict(defaultdict): + def __init__(self): + defaultdict.__init__(self, int) + + self.summary = defaultdict(intdict) + if 'summary' in state: + for loc, stats in state['summary'].iteritems(): + self.summary[loc].update(stats) + self.details = state['details'] + self.filter = None + + def getSummary(self): + plaindict = {} + for k, v in self.summary.iteritems(): + plaindict[k] = dict(v) + return plaindict + + def toJSON(self): + return dict(summary=self.getSummary(), details=self.details.toJSON()) + + def notify(self, category, file, data): + rv = "error" + if category in self.stat_cats: + # these get called post reporting just for stats + # return "error" to forward them to other other_observers + self.summary[file.locale][category] += data + # keep track of how many strings are in a missing file + # we got the {'missingFile': 'error'} from the first pass + if category == 'missingInFiles': + self.details[file]['strings'] = data + return "error" + if category in ['missingFile', 'obsoleteFile']: + if self.filter is not None: + rv = self.filter(file) + if rv != "ignore": + self.details[file][category] = rv + return rv + if category in ['missingEntity', 'obsoleteEntity']: + if self.filter is not None: + rv = self.filter(file, data) + if rv == "ignore": + return rv + v = self.details[file] + try: + v[category].append(data) + except KeyError: + v[category] = [data] + return rv + if category == 'error': + try: + self.details[file][category].append(data) + except KeyError: + self.details[file][category] = [data] + self.summary[file.locale]['errors'] += 1 + elif category == 'warning': + try: + self.details[file][category].append(data) + except KeyError: + self.details[file][category] = [data] + self.summary[file.locale]['warnings'] += 1 + return rv + + def toExhibit(self): + items = [] + for locale in sorted(self.summary.iterkeys()): + summary = self.summary[locale] + if locale is not None: + item = {'id': 'xxx/' + locale, + 'label': locale, + 'locale': locale} + else: + item = {'id': 'xxx', + 'label': 'xxx', + 'locale': 'xxx'} + item['type'] = 'Build' + total = sum([summary[k] + for k in ('changed', 'unchanged', 'report', 'missing', + 'missingInFiles') + if k in summary]) + rate = (('changed' in summary and summary['changed'] * 100) or + 0) / total + item.update((k, summary.get(k, 0)) + for k in ('changed', 'unchanged')) + item.update((k, summary[k]) + for k in ('report', 'errors', 'warnings') + if k in summary) + item['missing'] = summary.get('missing', 0) + \ + summary.get('missingInFiles', 0) + item['completion'] = rate + item['total'] = total + result = 'success' + if item.get('warnings', 0): + result = 'warning' + if item.get('errors', 0) or item.get('missing', 0): + result = 'failure' + item['result'] = result + items.append(item) + data = { + "properties": dict.fromkeys( + ("completion", "errors", "warnings", "missing", "report", + "unchanged", "changed", "obsolete"), + {"valueType": "number"}), + "types": { + "Build": {"pluralLabel": "Builds"} + }} + data['items'] = items + return dumps(data, indent=2) + + def serialize(self, type="text"): + if type == "exhibit": + return self.toExhibit() + if type == "json": + return dumps(self.toJSON()) + + def tostr(t): + if t[1] == 'key': + return ' ' * t[0] + '/'.join(t[2]) + o = [] + indent = ' ' * (t[0] + 1) + if 'error' in t[2]: + o += [indent + 'ERROR: ' + e for e in t[2]['error']] + if 'warning' in t[2]: + o += [indent + 'WARNING: ' + e for e in t[2]['warning']] + if 'missingEntity' in t[2] or 'obsoleteEntity' in t[2]: + missingEntities = ('missingEntity' in t[2] and + t[2]['missingEntity']) or [] + obsoleteEntities = ('obsoleteEntity' in t[2] and + t[2]['obsoleteEntity']) or [] + entities = missingEntities + obsoleteEntities + entities.sort() + for entity in entities: + op = '+' + if entity in obsoleteEntities: + op = '-' + o.append(indent + op + entity) + elif 'missingFile' in t[2]: + o.append(indent + '// add and localize this file') + elif 'obsoleteFile' in t[2]: + o.append(indent + '// remove this file') + return '\n'.join(o) + + out = [] + for locale, summary in sorted(self.summary.iteritems()): + if locale is not None: + out.append(locale + ':') + out += [k + ': ' + str(v) for k, v in sorted(summary.iteritems())] + total = sum([summary[k] + for k in ['changed', 'unchanged', 'report', 'missing', + 'missingInFiles'] + if k in summary]) + rate = 0 + if total: + rate = (('changed' in summary and summary['changed'] * 100) or + 0) / total + out.append('%d%% of entries changed' % rate) + return '\n'.join(map(tostr, self.details.getContent()) + out) + + def __str__(self): + return 'observer' + + +class ContentComparer: + keyRE = re.compile('[kK]ey') + nl = re.compile('\n', re.M) + + def __init__(self): + '''Create a ContentComparer. + observer is usually a instance of Observer. The return values + of the notify method are used to control the handling of missing + entities. + ''' + self.reference = dict() + self.observer = Observer() + self.other_observers = [] + self.merge_stage = None + + def add_observer(self, obs): + '''Add a non-filtering observer. + Results from the notify calls are ignored. + ''' + self.other_observers.append(obs) + + def set_merge_stage(self, merge_stage): + self.merge_stage = merge_stage + + def merge(self, ref_entities, ref_map, ref_file, l10n_file, missing, + skips, p): + outfile = os.path.join(self.merge_stage, l10n_file.module, + l10n_file.file) + outdir = os.path.dirname(outfile) + if not os.path.isdir(outdir): + os.makedirs(outdir) + if not p.canMerge: + shutil.copyfile(ref_file.fullpath, outfile) + print "copied reference to " + outfile + return + if skips: + # skips come in ordered by key name, we need them in file order + skips.sort(key=lambda s: s.span[0]) + trailing = (['\n'] + + [ref_entities[ref_map[key]].all for key in missing] + + [ref_entities[ref_map[skip.key]].all for skip in skips + if not isinstance(skip, parser.Junk)]) + if skips: + # we need to skip a few errornous blocks in the input, copy by hand + f = codecs.open(outfile, 'wb', p.encoding) + offset = 0 + for skip in skips: + chunk = skip.span + f.write(p.contents[offset:chunk[0]]) + offset = chunk[1] + f.write(p.contents[offset:]) + else: + shutil.copyfile(l10n_file.fullpath, outfile) + f = codecs.open(outfile, 'ab', p.encoding) + print "adding to " + outfile + + def ensureNewline(s): + if not s.endswith('\n'): + return s + '\n' + return s + + f.write(''.join(map(ensureNewline, trailing))) + f.close() + + def notify(self, category, file, data): + """Check observer for the found data, and if it's + not to ignore, notify other_observers. + """ + rv = self.observer.notify(category, file, data) + if rv == 'ignore': + return rv + for obs in self.other_observers: + # non-filtering other_observers, ignore results + obs.notify(category, file, data) + return rv + + def remove(self, obsolete): + self.notify('obsoleteFile', obsolete, None) + pass + + def compare(self, ref_file, l10n): + try: + p = parser.getParser(ref_file.file) + except UserWarning: + # no comparison, XXX report? + return + if ref_file not in self.reference: + # we didn't parse this before + try: + p.readContents(ref_file.getContents()) + except Exception, e: + self.notify('error', ref_file, str(e)) + return + self.reference[ref_file] = p.parse() + ref = self.reference[ref_file] + ref_list = ref[1].keys() + ref_list.sort() + try: + p.readContents(l10n.getContents()) + l10n_entities, l10n_map = p.parse() + except Exception, e: + self.notify('error', l10n, str(e)) + return + lines = [] + + def _getLine(offset): + if not lines: + lines.append(0) + for m in self.nl.finditer(p.contents): + lines.append(m.end()) + for i in xrange(len(lines), 0, -1): + if offset >= lines[i - 1]: + return (i, offset - lines[i - 1]) + return (1, offset) + + l10n_list = l10n_map.keys() + l10n_list.sort() + ar = AddRemove() + ar.set_left(ref_list) + ar.set_right(l10n_list) + report = missing = obsolete = changed = unchanged = keys = 0 + missings = [] + skips = [] + checker = getChecker(l10n, reference=ref[0]) + for action, item_or_pair in ar: + if action == 'delete': + # missing entity + _rv = self.notify('missingEntity', l10n, item_or_pair) + if _rv == "ignore": + continue + if _rv == "error": + # only add to missing entities for l10n-merge on error, + # not report + missings.append(item_or_pair) + missing += 1 + else: + # just report + report += 1 + elif action == 'add': + # obsolete entity or junk + if isinstance(l10n_entities[l10n_map[item_or_pair]], + parser.Junk): + junk = l10n_entities[l10n_map[item_or_pair]] + params = (junk.val,) + junk.span + self.notify('error', l10n, + 'Unparsed content "%s" at %d-%d' % params) + if self.merge_stage is not None: + skips.append(junk) + elif self.notify('obsoleteEntity', l10n, + item_or_pair) != 'ignore': + obsolete += 1 + else: + # entity found in both ref and l10n, check for changed + entity = item_or_pair[0] + refent = ref[0][ref[1][entity]] + l10nent = l10n_entities[l10n_map[entity]] + if self.keyRE.search(entity): + keys += 1 + else: + if refent.val == l10nent.val: + self.doUnchanged(l10nent) + unchanged += 1 + else: + self.doChanged(ref_file, refent, l10nent) + changed += 1 + # run checks: + if checker: + for tp, pos, msg, cat in checker.check(refent, l10nent): + # compute real src position, if first line, + # col needs adjustment + _l, _offset = _getLine(l10nent.val_span[0]) + if isinstance(pos, tuple): + # line, column + if pos[0] == 1: + col = pos[1] + _offset + else: + col = pos[1] + _l += pos[0] - 1 + else: + _l, col = _getLine(l10nent.val_span[0] + pos) + # skip error entities when merging + if tp == 'error' and self.merge_stage is not None: + skips.append(l10nent) + self.notify(tp, l10n, + u"%s at line %d, column %d for %s" % + (msg, _l, col, refent.key)) + pass + if missing: + self.notify('missing', l10n, missing) + if self.merge_stage is not None and (missings or skips): + self.merge(ref[0], ref[1], ref_file, l10n, missings, skips, p) + if report: + self.notify('report', l10n, report) + if obsolete: + self.notify('obsolete', l10n, obsolete) + if changed: + self.notify('changed', l10n, changed) + if unchanged: + self.notify('unchanged', l10n, unchanged) + if keys: + self.notify('keys', l10n, keys) + pass + + def add(self, orig, missing): + if self.notify('missingFile', missing, None) == "ignore": + # filter said that we don't need this file, don't count it + return + f = orig + try: + p = parser.getParser(f.file) + except UserWarning: + return + try: + p.readContents(f.getContents()) + entities, map = p.parse() + except Exception, e: + self.notify('error', f, str(e)) + return + self.notify('missingInFiles', missing, len(map)) + + def doUnchanged(self, entity): + # overload this if needed + pass + + def doChanged(self, file, ref_entity, l10n_entity): + # overload this if needed + pass + + +def compareApp(app, other_observer=None, merge_stage=None, clobber=False): + '''Compare locales set in app. + + Optional arguments are: + - other_observer. A object implementing + notify(category, _file, data) + The return values of that callback are ignored. + - merge_stage. A directory to be used for staging the output of + l10n-merge. + - clobber. Clobber the module subdirectories of the merge dir as we go. + Use wisely, as it might cause data loss. + ''' + comparer = ContentComparer() + if other_observer is not None: + comparer.add_observer(other_observer) + comparer.observer.filter = app.filter + for module, reference, locales in app: + dir_comp = DirectoryCompare(reference) + dir_comp.setWatcher(comparer) + for _, localization in locales: + if merge_stage is not None: + locale_merge = merge_stage.format(ab_CD=localization.locale) + comparer.set_merge_stage(locale_merge) + if clobber: + # if clobber, remove the stage for the module if it exists + clobberdir = os.path.join(locale_merge, module) + if os.path.exists(clobberdir): + shutil.rmtree(clobberdir) + print "clobbered " + clobberdir + dir_comp.compareWith(localization) + return comparer.observer + + +def compareDirs(reference, locale, other_observer=None, merge_stage=None): + '''Compare reference and locale dir. + + Optional arguments are: + - other_observer. A object implementing + notify(category, _file, data) + The return values of that callback are ignored. + ''' + comparer = ContentComparer() + if other_observer is not None: + comparer.add_observer(other_observer) + comparer.set_merge_stage(merge_stage) + dir_comp = DirectoryCompare(paths.EnumerateDir(reference)) + dir_comp.setWatcher(comparer) + dir_comp.compareWith(paths.EnumerateDir(locale)) + return comparer.observer diff --git a/python/compare-locales/compare_locales/parser.py b/python/compare-locales/compare_locales/parser.py new file mode 100644 index 000000000..a97cf201b --- /dev/null +++ b/python/compare-locales/compare_locales/parser.py @@ -0,0 +1,521 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re +import codecs +import logging +from HTMLParser import HTMLParser + +__constructors = [] + + +class Entity(object): + ''' + Abstraction layer for a localizable entity. + Currently supported are grammars of the form: + + 1: pre white space + 2: pre comments + 3: entity definition + 4: entity key (name) + 5: entity value + 6: post comment (and white space) in the same line (dtd only) + <--[1] + <!-- pre comments --> <--[2] + <!ENTITY key "value"> <!-- comment --> + + <-------[3]---------><------[6]------> + ''' + def __init__(self, contents, pp, + span, pre_ws_span, pre_comment_span, def_span, + key_span, val_span, post_span): + self.contents = contents + self.span = span + self.pre_ws_span = pre_ws_span + self.pre_comment_span = pre_comment_span + self.def_span = def_span + self.key_span = key_span + self.val_span = val_span + self.post_span = post_span + self.pp = pp + pass + + # getter helpers + + def get_all(self): + return self.contents[self.span[0]:self.span[1]] + + def get_pre_ws(self): + return self.contents[self.pre_ws_span[0]:self.pre_ws_span[1]] + + def get_pre_comment(self): + return self.contents[self.pre_comment_span[0]: + self.pre_comment_span[1]] + + def get_def(self): + return self.contents[self.def_span[0]:self.def_span[1]] + + def get_key(self): + return self.contents[self.key_span[0]:self.key_span[1]] + + def get_val(self): + return self.pp(self.contents[self.val_span[0]:self.val_span[1]]) + + def get_raw_val(self): + return self.contents[self.val_span[0]:self.val_span[1]] + + def get_post(self): + return self.contents[self.post_span[0]:self.post_span[1]] + + # getters + + all = property(get_all) + pre_ws = property(get_pre_ws) + pre_comment = property(get_pre_comment) + definition = property(get_def) + key = property(get_key) + val = property(get_val) + raw_val = property(get_raw_val) + post = property(get_post) + + def __repr__(self): + return self.key + + +class Junk(object): + ''' + An almost-Entity, representing junk data that we didn't parse. + This way, we can signal bad content as stuff we don't understand. + And the either fix that, or report real bugs in localizations. + ''' + junkid = 0 + + def __init__(self, contents, span): + self.contents = contents + self.span = span + self.pre_ws = self.pre_comment = self.definition = self.post = '' + self.__class__.junkid += 1 + self.key = '_junk_%d_%d-%d' % (self.__class__.junkid, span[0], span[1]) + + # getter helpers + def get_all(self): + return self.contents[self.span[0]:self.span[1]] + + # getters + all = property(get_all) + val = property(get_all) + + def __repr__(self): + return self.key + + +class Parser: + canMerge = True + + def __init__(self): + if not hasattr(self, 'encoding'): + self.encoding = 'utf-8' + pass + + def readFile(self, file): + f = codecs.open(file, 'r', self.encoding) + try: + self.contents = f.read() + except UnicodeDecodeError, e: + (logging.getLogger('locales') + .error("Can't read file: " + file + '; ' + str(e))) + self.contents = u'' + f.close() + + def readContents(self, contents): + (self.contents, length) = codecs.getdecoder(self.encoding)(contents) + + def parse(self): + l = [] + m = {} + for e in self: + m[e.key] = len(l) + l.append(e) + return (l, m) + + def postProcessValue(self, val): + return val + + def __iter__(self): + contents = self.contents + offset = 0 + self.header, offset = self.getHeader(contents, offset) + self.footer = '' + entity, offset = self.getEntity(contents, offset) + while entity: + yield entity + entity, offset = self.getEntity(contents, offset) + f = self.reFooter.match(contents, offset) + if f: + self.footer = f.group() + offset = f.end() + if len(contents) > offset: + yield Junk(contents, (offset, len(contents))) + pass + + def getHeader(self, contents, offset): + header = '' + h = self.reHeader.match(contents) + if h: + header = h.group() + offset = h.end() + return (header, offset) + + def getEntity(self, contents, offset): + m = self.reKey.match(contents, offset) + if m: + offset = m.end() + entity = self.createEntity(contents, m) + return (entity, offset) + # first check if footer has a non-empty match, + # 'cause then we don't find junk + m = self.reFooter.match(contents, offset) + if m and m.end() > offset: + return (None, offset) + m = self.reKey.search(contents, offset) + if m: + # we didn't match, but search, so there's junk between offset + # and start. We'll match() on the next turn + junkend = m.start() + return (Junk(contents, (offset, junkend)), junkend) + return (None, offset) + + def createEntity(self, contents, m): + return Entity(contents, self.postProcessValue, + *[m.span(i) for i in xrange(7)]) + + +def getParser(path): + for item in __constructors: + if re.search(item[0], path): + return item[1] + raise UserWarning("Cannot find Parser") + + +# Subgroups of the match will: +# 1: pre white space +# 2: pre comments +# 3: entity definition +# 4: entity key (name) +# 5: entity value +# 6: post comment (and white space) in the same line (dtd only) +# <--[1] +# <!-- pre comments --> <--[2] +# <!ENTITY key "value"> <!-- comment --> +# +# <-------[3]---------><------[6]------> + + +class DTDParser(Parser): + # http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar + # ":" | [A-Z] | "_" | [a-z] | + # [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] + # | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | + # [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | + # [#x10000-#xEFFFF] + CharMinusDash = u'\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD' + XmlComment = '<!--(?:-?[%s])*?-->' % CharMinusDash + NameStartChar = u':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \ + u'\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \ + u'\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD' + # + \U00010000-\U000EFFFF seems to be unsupported in python + + # NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | + # [#x0300-#x036F] | [#x203F-#x2040] + NameChar = NameStartChar + ur'\-\.0-9' + u'\xB7\u0300-\u036F\u203F-\u2040' + Name = '[' + NameStartChar + '][' + NameChar + ']*' + reKey = re.compile('(?:(?P<pre>\s*)(?P<precomment>(?:' + XmlComment + + '\s*)*)(?P<entity><!ENTITY\s+(?P<key>' + Name + + ')\s+(?P<val>\"[^\"]*\"|\'[^\']*\'?)\s*>)' + '(?P<post>[ \t]*(?:' + XmlComment + '\s*)*\n?)?)', + re.DOTALL) + # add BOM to DTDs, details in bug 435002 + reHeader = re.compile(u'^\ufeff?' + u'(\s*<!--.*(http://mozilla.org/MPL/2.0/|' + u'LICENSE BLOCK)([^-]+-)*[^-]+-->)?', re.S) + reFooter = re.compile('\s*(<!--([^-]+-)*[^-]+-->\s*)*$') + rePE = re.compile('(?:(\s*)((?:' + XmlComment + '\s*)*)' + '(<!ENTITY\s+%\s+(' + Name + + ')\s+SYSTEM\s+(\"[^\"]*\"|\'[^\']*\')\s*>\s*%' + Name + + ';)([ \t]*(?:' + XmlComment + '\s*)*\n?)?)') + + def getEntity(self, contents, offset): + ''' + Overload Parser.getEntity to special-case ParsedEntities. + Just check for a parsed entity if that method claims junk. + + <!ENTITY % foo SYSTEM "url"> + %foo; + ''' + entity, inneroffset = Parser.getEntity(self, contents, offset) + if (entity and isinstance(entity, Junk)) or entity is None: + m = self.rePE.match(contents, offset) + if m: + inneroffset = m.end() + entity = Entity(contents, self.postProcessValue, + *[m.span(i) for i in xrange(7)]) + return (entity, inneroffset) + + def createEntity(self, contents, m): + valspan = m.span('val') + valspan = (valspan[0]+1, valspan[1]-1) + return Entity(contents, self.postProcessValue, m.span(), + m.span('pre'), m.span('precomment'), + m.span('entity'), m.span('key'), valspan, + m.span('post')) + + +class PropertiesParser(Parser): + escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|' + '(?P<nl>\n\s*)|(?P<single>.))', re.M) + known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'} + + def __init__(self): + self.reKey = re.compile('^(\s*)' + '((?:[#!].*?\n\s*)*)' + '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', re.M) + self.reHeader = re.compile('^\s*([#!].*\s*)+') + self.reFooter = re.compile('\s*([#!].*\s*)*$') + self._escapedEnd = re.compile(r'\\+$') + self._trailingWS = re.compile(r'[ \t]*$') + Parser.__init__(self) + + def getHeader(self, contents, offset): + header = '' + h = self.reHeader.match(contents, offset) + if h: + candidate = h.group() + if 'http://mozilla.org/MPL/2.0/' in candidate or \ + 'LICENSE BLOCK' in candidate: + header = candidate + offset = h.end() + return (header, offset) + + def getEntity(self, contents, offset): + # overwritten to parse values line by line + m = self.reKey.match(contents, offset) + if m: + offset = m.end() + while True: + endval = nextline = contents.find('\n', offset) + if nextline == -1: + endval = offset = len(contents) + break + # is newline escaped? + _e = self._escapedEnd.search(contents, offset, nextline) + offset = nextline + 1 + if _e is None: + break + # backslashes at end of line, if 2*n, not escaped + if len(_e.group()) % 2 == 0: + break + # strip trailing whitespace + ws = self._trailingWS.search(contents, m.end(), offset) + if ws: + endval -= ws.end() - ws.start() + entity = Entity(contents, self.postProcessValue, + (m.start(), offset), # full span + m.span(1), # leading whitespan + m.span(2), # leading comment span + (m.start(3), offset), # entity def span + m.span(3), # key span + (m.end(), endval), # value span + (offset, offset)) # post comment span, empty + return (entity, offset) + m = self.reKey.search(contents, offset) + if m: + # we didn't match, but search, so there's junk between offset + # and start. We'll match() on the next turn + junkend = m.start() + return (Junk(contents, (offset, junkend)), junkend) + return (None, offset) + + def postProcessValue(self, val): + + def unescape(m): + found = m.groupdict() + if found['uni']: + return unichr(int(found['uni'][1:], 16)) + if found['nl']: + return '' + return self.known_escapes.get(found['single'], found['single']) + val = self.escape.sub(unescape, val) + return val + + +class DefinesParser(Parser): + # can't merge, #unfilter needs to be the last item, which we don't support + canMerge = False + + def __init__(self): + self.reKey = re.compile('^(\s*)((?:^#(?!define\s).*\s*)*)' + '(#define[ \t]+(\w+)[ \t]+(.*?))([ \t]*$\n?)', + re.M) + self.reHeader = re.compile('^\s*(#(?!define\s).*\s*)*') + self.reFooter = re.compile('\s*(#(?!define\s).*\s*)*$', re.M) + Parser.__init__(self) + + +class IniParser(Parser): + ''' + Parse files of the form: + # initial comment + [cat] + whitespace* + #comment + string=value + ... + ''' + def __init__(self): + self.reHeader = re.compile('^((?:\s*|[;#].*)\n)*\[.+?\]\n', re.M) + self.reKey = re.compile('(\s*)((?:[;#].*\n\s*)*)((.+?)=(.*))(\n?)') + self.reFooter = re.compile('\s*([;#].*\s*)*$') + Parser.__init__(self) + + +DECL, COMMENT, START, END, CONTENT = range(5) + + +class BookmarksParserInner(HTMLParser): + + class Token(object): + _type = None + content = '' + + def __str__(self): + return self.content + + class DeclToken(Token): + _type = DECL + + def __init__(self, decl): + self.content = decl + pass + + def __str__(self): + return '<!%s>' % self.content + pass + + class CommentToken(Token): + _type = COMMENT + + def __init__(self, comment): + self.content = comment + pass + + def __str__(self): + return '<!--%s-->' % self.content + pass + + class StartToken(Token): + _type = START + + def __init__(self, tag, attrs, content): + self.tag = tag + self.attrs = dict(attrs) + self.content = content + pass + pass + + class EndToken(Token): + _type = END + + def __init__(self, tag): + self.tag = tag + pass + + def __str__(self): + return '</%s>' % self.tag.upper() + pass + + class ContentToken(Token): + _type = CONTENT + + def __init__(self, content): + self.content = content + pass + pass + + def __init__(self): + HTMLParser.__init__(self) + self.tokens = [] + + def parse(self, contents): + self.tokens = [] + self.feed(contents) + self.close() + return self.tokens + + # Called when we hit an end DL tag to reset the folder selections + def handle_decl(self, decl): + self.tokens.append(self.DeclToken(decl)) + + # Called when we hit an end DL tag to reset the folder selections + def handle_comment(self, comment): + self.tokens.append(self.CommentToken(comment)) + + def handle_starttag(self, tag, attrs): + self.tokens.append(self.StartToken(tag, attrs, + self.get_starttag_text())) + + # Called when text data is encountered + def handle_data(self, data): + if self.tokens[-1]._type == CONTENT: + self.tokens[-1].content += data + else: + self.tokens.append(self.ContentToken(data)) + + def handle_charref(self, data): + self.handle_data('&#%s;' % data) + + def handle_entityref(self, data): + self.handle_data('&%s;' % data) + + # Called when we hit an end DL tag to reset the folder selections + def handle_endtag(self, tag): + self.tokens.append(self.EndToken(tag)) + + +class BookmarksParser(Parser): + canMerge = False + + class BMEntity(object): + def __init__(self, key, val): + self.key = key + self.val = val + + def __iter__(self): + p = BookmarksParserInner() + tks = p.parse(self.contents) + i = 0 + k = [] + for i in xrange(len(tks)): + t = tks[i] + if t._type == START: + k.append(t.tag) + keys = t.attrs.keys() + keys.sort() + for attrname in keys: + yield self.BMEntity('.'.join(k) + '.@' + attrname, + t.attrs[attrname]) + if i + 1 < len(tks) and tks[i+1]._type == CONTENT: + i += 1 + t = tks[i] + v = t.content.strip() + if v: + yield self.BMEntity('.'.join(k), v) + elif t._type == END: + k.pop() + + +__constructors = [('\\.dtd$', DTDParser()), + ('\\.properties$', PropertiesParser()), + ('\\.ini$', IniParser()), + ('\\.inc$', DefinesParser()), + ('bookmarks\\.html$', BookmarksParser())] diff --git a/python/compare-locales/compare_locales/paths.py b/python/compare-locales/compare_locales/paths.py new file mode 100644 index 000000000..f72b3a2e7 --- /dev/null +++ b/python/compare-locales/compare_locales/paths.py @@ -0,0 +1,398 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os.path +import os +from ConfigParser import ConfigParser, NoSectionError, NoOptionError +from urlparse import urlparse, urljoin +from urllib import pathname2url, url2pathname +from urllib2 import urlopen +from collections import defaultdict +from compare_locales import util + + +class L10nConfigParser(object): + '''Helper class to gather application information from ini files. + + This class is working on synchronous open to read files or web data. + Subclass this and overwrite loadConfigs and addChild if you need async. + ''' + def __init__(self, inipath, **kwargs): + """Constructor for L10nConfigParsers + + inipath -- l10n.ini path + Optional keyword arguments are fowarded to the inner ConfigParser as + defaults. + """ + if os.path.isabs(inipath): + self.inipath = 'file:%s' % pathname2url(inipath) + else: + pwdurl = 'file:%s/' % pathname2url(os.getcwd()) + self.inipath = urljoin(pwdurl, inipath) + # l10n.ini files can import other l10n.ini files, store the + # corresponding L10nConfigParsers + self.children = [] + # we really only care about the l10n directories described in l10n.ini + self.dirs = [] + # optional defaults to be passed to the inner ConfigParser (unused?) + self.defaults = kwargs + + def getDepth(self, cp): + '''Get the depth for the comparison from the parsed l10n.ini. + + Overloadable to get the source depth for fennec and friends. + ''' + try: + depth = cp.get('general', 'depth') + except: + depth = '.' + return depth + + def getFilters(self): + '''Get the test functions from this ConfigParser and all children. + + Only works with synchronous loads, used by compare-locales, which + is local anyway. + ''' + filterurl = urljoin(self.inipath, 'filter.py') + try: + l = {} + execfile(url2pathname(urlparse(filterurl).path), {}, l) + if 'test' in l and callable(l['test']): + filters = [l['test']] + else: + filters = [] + except: + filters = [] + + for c in self.children: + filters += c.getFilters() + + return filters + + def loadConfigs(self): + """Entry point to load the l10n.ini file this Parser refers to. + + This implementation uses synchronous loads, subclasses might overload + this behaviour. If you do, make sure to pass a file-like object + to onLoadConfig. + """ + self.onLoadConfig(urlopen(self.inipath)) + + def onLoadConfig(self, inifile): + """Parse a file-like object for the loaded l10n.ini file.""" + cp = ConfigParser(self.defaults) + cp.readfp(inifile) + depth = self.getDepth(cp) + self.baseurl = urljoin(self.inipath, depth) + # create child loaders for any other l10n.ini files to be included + try: + for title, path in cp.items('includes'): + # skip default items + if title in self.defaults: + continue + # add child config parser + self.addChild(title, path, cp) + except NoSectionError: + pass + # try to load the "dirs" defined in the "compare" section + try: + self.dirs.extend(cp.get('compare', 'dirs').split()) + except (NoOptionError, NoSectionError): + pass + # try getting a top level compare dir, as used for fennec + try: + self.tld = cp.get('compare', 'tld') + # remove tld from comparison dirs + if self.tld in self.dirs: + self.dirs.remove(self.tld) + except (NoOptionError, NoSectionError): + self.tld = None + # try to set "all_path" and "all_url" + try: + self.all_path = cp.get('general', 'all') + self.all_url = urljoin(self.baseurl, self.all_path) + except (NoOptionError, NoSectionError): + self.all_path = None + self.all_url = None + return cp + + def addChild(self, title, path, orig_cp): + """Create a child L10nConfigParser and load it. + + title -- indicates the module's name + path -- indicates the path to the module's l10n.ini file + orig_cp -- the configuration parser of this l10n.ini + """ + cp = L10nConfigParser(urljoin(self.baseurl, path), **self.defaults) + cp.loadConfigs() + self.children.append(cp) + + def getTLDPathsTuple(self, basepath): + """Given the basepath, return the path fragments to be used for + self.tld. For build runs, this is (basepath, self.tld), for + source runs, just (basepath,). + + @see overwritten method in SourceTreeConfigParser. + """ + return (basepath, self.tld) + + def dirsIter(self): + """Iterate over all dirs and our base path for this l10n.ini""" + url = urlparse(self.baseurl) + basepath = url2pathname(url.path) + if self.tld is not None: + yield self.tld, self.getTLDPathsTuple(basepath) + for dir in self.dirs: + yield dir, (basepath, dir) + + def directories(self): + """Iterate over all dirs and base paths for this l10n.ini as well + as the included ones. + """ + for t in self.dirsIter(): + yield t + for child in self.children: + for t in child.directories(): + yield t + + def allLocales(self): + """Return a list of all the locales of this project""" + return util.parseLocales(urlopen(self.all_url).read()) + + +class SourceTreeConfigParser(L10nConfigParser): + '''Subclassing L10nConfigParser to work with just the repos + checked out next to each other instead of intermingled like + we do for real builds. + ''' + + def __init__(self, inipath, basepath): + '''Add additional arguments basepath. + + basepath is used to resolve local paths via branchnames. + ''' + L10nConfigParser.__init__(self, inipath) + self.basepath = basepath + self.tld = None + + def getDepth(self, cp): + '''Get the depth for the comparison from the parsed l10n.ini. + + Overloaded to get the source depth for fennec and friends. + ''' + try: + depth = cp.get('general', 'source-depth') + except: + try: + depth = cp.get('general', 'depth') + except: + depth = '.' + return depth + + def addChild(self, title, path, orig_cp): + # check if there's a section with details for this include + # we might have to check a different repo, or even VCS + # for example, projects like "mail" indicate in + # an "include_" section where to find the l10n.ini for "toolkit" + details = 'include_' + title + if orig_cp.has_section(details): + branch = orig_cp.get(details, 'mozilla') + inipath = orig_cp.get(details, 'l10n.ini') + path = self.basepath + '/' + branch + '/' + inipath + else: + path = urljoin(self.baseurl, path) + cp = SourceTreeConfigParser(path, self.basepath, **self.defaults) + cp.loadConfigs() + self.children.append(cp) + + def getTLDPathsTuple(self, basepath): + """Overwrite L10nConfigParser's getTLDPathsTuple to just return + the basepath. + """ + return (basepath, ) + + +class File(object): + + def __init__(self, fullpath, file, module=None, locale=None): + self.fullpath = fullpath + self.file = file + self.module = module + self.locale = locale + pass + + def getContents(self): + # open with universal line ending support and read + return open(self.fullpath, 'rU').read() + + def __hash__(self): + f = self.file + if self.module: + f = self.module + '/' + f + return hash(f) + + def __str__(self): + return self.fullpath + + def __cmp__(self, other): + if not isinstance(other, File): + raise NotImplementedError + rv = cmp(self.module, other.module) + if rv != 0: + return rv + return cmp(self.file, other.file) + + +class EnumerateDir(object): + ignore_dirs = ['CVS', '.svn', '.hg', '.git'] + + def __init__(self, basepath, module='', locale=None, ignore_subdirs=[]): + self.basepath = basepath + self.module = module + self.locale = locale + self.ignore_subdirs = ignore_subdirs + pass + + def cloneFile(self, other): + ''' + Return a File object that this enumerator would return, if it had it. + ''' + return File(os.path.join(self.basepath, other.file), other.file, + self.module, self.locale) + + def __iter__(self): + # our local dirs are given as a tuple of path segments, starting off + # with an empty sequence for the basepath. + dirs = [()] + while dirs: + dir = dirs.pop(0) + fulldir = os.path.join(self.basepath, *dir) + try: + entries = os.listdir(fulldir) + except OSError: + # we probably just started off in a non-existing dir, ignore + continue + entries.sort() + for entry in entries: + leaf = os.path.join(fulldir, entry) + if os.path.isdir(leaf): + if entry not in self.ignore_dirs and \ + leaf not in [os.path.join(self.basepath, d) + for d in self.ignore_subdirs]: + dirs.append(dir + (entry,)) + continue + yield File(leaf, '/'.join(dir + (entry,)), + self.module, self.locale) + + +class LocalesWrap(object): + + def __init__(self, base, module, locales, ignore_subdirs=[]): + self.base = base + self.module = module + self.locales = locales + self.ignore_subdirs = ignore_subdirs + + def __iter__(self): + for locale in self.locales: + path = os.path.join(self.base, locale, self.module) + yield (locale, EnumerateDir(path, self.module, locale, + self.ignore_subdirs)) + + +class EnumerateApp(object): + reference = 'en-US' + + def __init__(self, inipath, l10nbase, locales=None): + self.setupConfigParser(inipath) + self.modules = defaultdict(dict) + self.l10nbase = os.path.abspath(l10nbase) + self.filters = [] + drive, tail = os.path.splitdrive(inipath) + self.addFilters(*self.config.getFilters()) + self.locales = locales or self.config.allLocales() + self.locales.sort() + + def setupConfigParser(self, inipath): + self.config = L10nConfigParser(inipath) + self.config.loadConfigs() + + def addFilters(self, *args): + self.filters += args + + value_map = {None: None, 'error': 0, 'ignore': 1, 'report': 2} + + def filter(self, l10n_file, entity=None): + '''Go through all added filters, and, + - map "error" -> 0, "ignore" -> 1, "report" -> 2 + - if filter.test returns a bool, map that to + False -> "ignore" (1), True -> "error" (0) + - take the max of all reported + ''' + rv = 0 + for f in reversed(self.filters): + try: + _r = f(l10n_file.module, l10n_file.file, entity) + except: + # XXX error handling + continue + if isinstance(_r, bool): + _r = [1, 0][_r] + else: + # map string return value to int, default to 'error', + # None is None + _r = self.value_map.get(_r, 0) + if _r is not None: + rv = max(rv, _r) + return ['error', 'ignore', 'report'][rv] + + def __iter__(self): + ''' + Iterate over all modules, return en-US directory enumerator, and an + iterator over all locales in each iteration. Per locale, the locale + code and an directory enumerator will be given. + ''' + dirmap = dict(self.config.directories()) + mods = dirmap.keys() + mods.sort() + for mod in mods: + if self.reference == 'en-US': + base = os.path.join(*(dirmap[mod] + ('locales', 'en-US'))) + else: + base = os.path.join(self.l10nbase, self.reference, mod) + yield (mod, EnumerateDir(base, mod, self.reference), + LocalesWrap(self.l10nbase, mod, self.locales, + [m[len(mod)+1:] for m in mods if m.startswith(mod+'/')])) + + +class EnumerateSourceTreeApp(EnumerateApp): + '''Subclass EnumerateApp to work on side-by-side checked out + repos, and to no pay attention to how the source would actually + be checked out for building. + + It's supporting applications like Fennec, too, which have + 'locales/en-US/...' in their root dir, but claim to be 'mobile'. + ''' + + def __init__(self, inipath, basepath, l10nbase, locales=None): + self.basepath = basepath + EnumerateApp.__init__(self, inipath, l10nbase, locales) + + def setupConfigParser(self, inipath): + self.config = SourceTreeConfigParser(inipath, self.basepath) + self.config.loadConfigs() + + +def get_base_path(mod, loc): + 'statics for path patterns and conversion' + __l10n = 'l10n/%(loc)s/%(mod)s' + __en_US = 'mozilla/%(mod)s/locales/en-US' + if loc == 'en-US': + return __en_US % {'mod': mod} + return __l10n % {'mod': mod, 'loc': loc} + + +def get_path(mod, loc, leaf): + return get_base_path(mod, loc) + '/' + leaf diff --git a/python/compare-locales/compare_locales/tests/__init__.py b/python/compare-locales/compare_locales/tests/__init__.py new file mode 100644 index 000000000..8808d78f4 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/__init__.py @@ -0,0 +1,49 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Mixins for parser tests. +''' + +from itertools import izip_longest +from pkg_resources import resource_string +import re + +from compare_locales.parser import getParser + + +class ParserTestMixin(): + '''Utility methods used by the parser tests. + ''' + filename = None + + def setUp(self): + '''Create a parser for this test. + ''' + self.parser = getParser(self.filename) + + def tearDown(self): + 'tear down this test' + del self.parser + + def resource(self, name): + testcontent = resource_string(__name__, 'data/' + name) + # fake universal line endings + testcontent = re.sub('\r\n?', lambda m: '\n', testcontent) + return testcontent + + def _test(self, content, refs): + '''Helper to test the parser. + Compares the result of parsing content with the given list + of reference keys and values. + ''' + self.parser.readContents(content) + entities = [entity for entity in self.parser] + for entity, ref in izip_longest(entities, refs): + self.assertTrue(entity, 'excess reference entity') + self.assertTrue(ref, 'excess parsed entity') + self.assertEqual(entity.val, ref[1]) + if ref[0].startswith('_junk'): + self.assertTrue(re.match(ref[0], entity.key)) + else: + self.assertEqual(entity.key, ref[0]) diff --git a/python/compare-locales/compare_locales/tests/data/bug121341.properties b/python/compare-locales/compare_locales/tests/data/bug121341.properties new file mode 100644 index 000000000..b45fc9698 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/data/bug121341.properties @@ -0,0 +1,68 @@ +# simple check +1=abc +# test whitespace trimming in key and value + 2 = xy +# test parsing of escaped values +3 = \u1234\t\r\n\uAB\ +\u1\n +# test multiline properties +4 = this is \ +multiline property +5 = this is \ + another multiline property +# property with DOS EOL
+6 = test\u0036
+# test multiline property with with DOS EOL +7 = yet another multi\
+ line propery
+# trimming should not trim escaped whitespaces +8 = \ttest5\u0020 +# another variant of #8 +9 = \ test6\t +# test UTF-8 encoded property/value +10aሴb = cì·¯d +# next property should test unicode escaping at the boundary of parsing buffer +# buffer size is expected to be 4096 so add comments to get to this offset +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +############################################################################### +11 = \uABCD diff --git a/python/compare-locales/compare_locales/tests/data/test.properties b/python/compare-locales/compare_locales/tests/data/test.properties new file mode 100644 index 000000000..19cae9702 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/data/test.properties @@ -0,0 +1,14 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +1=1 + 2=2 +3 =3 + 4 =4 +5=5 +6= 6 +7=7 +8= 8 +# this is a comment +9=this is the first part of a continued line \ + and here is the 2nd part diff --git a/python/compare-locales/compare_locales/tests/data/triple-license.dtd b/python/compare-locales/compare_locales/tests/data/triple-license.dtd new file mode 100644 index 000000000..4a28b17a6 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/data/triple-license.dtd @@ -0,0 +1,38 @@ +<!-- ***** BEGIN LICENSE BLOCK ***** +#if 0 + - Version: MPL 1.1/GPL 2.0/LGPL 2.1 + - + - The contents of this file are subject to the Mozilla Public License Version + - 1.1 (the "License"); you may not use this file except in compliance with + - the License. You may obtain a copy of the License at + - http://www.mozilla.org/MPL/ + - + - Software distributed under the License is distributed on an "AS IS" basis, + - WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + - for the specific language governing rights and limitations under the + - License. + - + - The Original Code is mozilla.org Code. + - + - The Initial Developer of the Original Code is dummy. + - Portions created by the Initial Developer are Copyright (C) 2005 + - the Initial Developer. All Rights Reserved. + - + - Contributor(s): + - + - Alternatively, the contents of this file may be used under the terms of + - either the GNU General Public License Version 2 or later (the "GPL"), or + - the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + - in which case the provisions of the GPL or the LGPL are applicable instead + - of those above. If you wish to allow use of your version of this file only + - under the terms of either the GPL or the LGPL, and not to allow others to + - use your version of this file under the terms of the MPL, indicate your + - decision by deleting the provisions above and replace them with the notice + - and other provisions required by the LGPL or the GPL. If you do not delete + - the provisions above, a recipient may use your version of this file under + - the terms of any one of the MPL, the GPL or the LGPL. + - +#endif + - ***** END LICENSE BLOCK ***** --> + +<!ENTITY foo "value"> diff --git a/python/compare-locales/compare_locales/tests/test_checks.py b/python/compare-locales/compare_locales/tests/test_checks.py new file mode 100644 index 000000000..b995d43f9 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/test_checks.py @@ -0,0 +1,403 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from compare_locales.checks import getChecker +from compare_locales.parser import getParser, Entity +from compare_locales.paths import File + + +class BaseHelper(unittest.TestCase): + file = None + refContent = None + + def setUp(self): + p = getParser(self.file.file) + p.readContents(self.refContent) + self.refList, self.refMap = p.parse() + + def _test(self, content, refWarnOrErrors, with_ref_file=False): + p = getParser(self.file.file) + p.readContents(content) + l10n = [e for e in p] + assert len(l10n) == 1 + l10n = l10n[0] + if with_ref_file: + kwargs = { + 'reference': self.refList + } + else: + kwargs = {} + checker = getChecker(self.file, **kwargs) + ref = self.refList[self.refMap[l10n.key]] + found = tuple(checker.check(ref, l10n)) + self.assertEqual(found, refWarnOrErrors) + + +class TestProperties(BaseHelper): + file = File('foo.properties', 'foo.properties') + refContent = '''some = value +''' + + def testGood(self): + self._test('''some = localized''', + tuple()) + + def testMissedEscape(self): + self._test(r'''some = \u67ood escape, bad \escape''', + (('warning', 20, r'unknown escape sequence, \e', + 'escape'),)) + + +class TestPlurals(BaseHelper): + file = File('foo.properties', 'foo.properties') + refContent = '''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file - Downloads;#1 files - #2 +''' + + def testGood(self): + self._test('''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 filers +''', + tuple()) + + def testNotUsed(self): + self._test('''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file - Downloads;#1 files - Downloads;#1 filers +''', + (('warning', 0, 'not all variables used in l10n', + 'plural'),)) + + def testNotDefined(self): + self._test('''\ +# LOCALIZATION NOTE (downloadsTitleFiles): Semi-colon list of plural forms. +# See: http://developer.mozilla.org/en/docs/Localization_and_Plurals +# #1 number of files +# example: 111 files - Downloads +downloadsTitleFiles=#1 file - Downloads;#1 files - #2;#1 #3 +''', + (('error', 0, 'unreplaced variables in l10n', 'plural'),)) + + +class TestDTDs(BaseHelper): + file = File('foo.dtd', 'foo.dtd') + refContent = '''<!ENTITY foo "This is 'good'"> +<!ENTITY width "10ch"> +<!ENTITY style "width: 20ch; height: 280px;"> +<!ENTITY minStyle "min-height: 50em;"> +<!ENTITY ftd "0"> +<!ENTITY formatPercent "This is 100% correct"> +<!ENTITY some.key "K"> +''' + + def testWarning(self): + self._test('''<!ENTITY foo "This is ¬ good"> +''', + (('warning', (0, 0), 'Referencing unknown entity `not`', + 'xmlparse'),)) + # make sure we only handle translated entity references + self._test(u'''<!ENTITY foo "This is &ƞǿŧ; good"> +'''.encode('utf-8'), + (('warning', (0, 0), u'Referencing unknown entity `ƞǿŧ`', + 'xmlparse'),)) + + def testErrorFirstLine(self): + self._test('''<!ENTITY foo "This is </bad> stuff"> +''', + (('error', (1, 10), 'mismatched tag', 'xmlparse'),)) + + def testErrorSecondLine(self): + self._test('''<!ENTITY foo "This is + </bad> +stuff"> +''', + (('error', (2, 4), 'mismatched tag', 'xmlparse'),)) + + def testKeyErrorSingleAmpersand(self): + self._test('''<!ENTITY some.key "&"> +''', + (('error', (1, 1), 'not well-formed (invalid token)', + 'xmlparse'),)) + + def testXMLEntity(self): + self._test('''<!ENTITY foo "This is "good""> +''', + tuple()) + + def testPercentEntity(self): + self._test('''<!ENTITY formatPercent "Another 100%"> +''', + tuple()) + self._test('''<!ENTITY formatPercent "Bad 100% should fail"> +''', + (('error', (0, 32), 'not well-formed (invalid token)', + 'xmlparse'),)) + + def testNoNumber(self): + self._test('''<!ENTITY ftd "foo">''', + (('warning', 0, 'reference is a number', 'number'),)) + + def testNoLength(self): + self._test('''<!ENTITY width "15miles">''', + (('error', 0, 'reference is a CSS length', 'css'),)) + + def testNoStyle(self): + self._test('''<!ENTITY style "15ch">''', + (('error', 0, 'reference is a CSS spec', 'css'),)) + self._test('''<!ENTITY style "junk">''', + (('error', 0, 'reference is a CSS spec', 'css'),)) + + def testStyleWarnings(self): + self._test('''<!ENTITY style "width:15ch">''', + (('warning', 0, 'height only in reference', 'css'),)) + self._test('''<!ENTITY style "width:15em;height:200px;">''', + (('warning', 0, "units for width don't match (em != ch)", + 'css'),)) + + def testNoWarning(self): + self._test('''<!ENTITY width "12em">''', tuple()) + self._test('''<!ENTITY style "width:12ch;height:200px;">''', tuple()) + self._test('''<!ENTITY ftd "0">''', tuple()) + + +class TestEntitiesInDTDs(BaseHelper): + file = File('foo.dtd', 'foo.dtd') + refContent = '''<!ENTITY short "This is &brandShortName;"> +<!ENTITY shorter "This is &brandShorterName;"> +<!ENTITY ent.start "Using &brandShorterName; start to"> +<!ENTITY ent.end " end"> +''' + + def testOK(self): + self._test('''<!ENTITY ent.start "Mit &brandShorterName;">''', tuple(), + with_ref_file=True) + + def testMismatch(self): + self._test('''<!ENTITY ent.start "Mit &brandShortName;">''', + (('warning', (0, 0), + 'Entity brandShortName referenced, ' + 'but brandShorterName used in context', + 'xmlparse'),), + with_ref_file=True) + + def testAcross(self): + self._test('''<!ENTITY ent.end "Mit &brandShorterName;">''', + tuple(), + with_ref_file=True) + + def testAcrossWithMismatch(self): + '''If we could tell that ent.start and ent.end are one string, + we should warn. Sadly, we can't, so this goes without warning.''' + self._test('''<!ENTITY ent.end "Mit &brandShortName;">''', + tuple(), + with_ref_file=True) + + def testUnknownWithRef(self): + self._test('''<!ENTITY ent.start "Mit &foopy;">''', + (('warning', + (0, 0), + 'Referencing unknown entity `foopy` ' + '(brandShorterName used in context, ' + 'brandShortName known)', + 'xmlparse'),), + with_ref_file=True) + + def testUnknown(self): + self._test('''<!ENTITY ent.end "Mit &foopy;">''', + (('warning', + (0, 0), + 'Referencing unknown entity `foopy`' + ' (brandShortName, brandShorterName known)', + 'xmlparse'),), + with_ref_file=True) + + +class TestAndroid(unittest.TestCase): + """Test Android checker + + Make sure we're hitting our extra rules only if + we're passing in a DTD file in the embedding/android module. + """ + apos_msg = u"Apostrophes in Android DTDs need escaping with \\' or " + \ + u"\\u0027, or use \u2019, or put string in quotes." + quot_msg = u"Quotes in Android DTDs need escaping with \\\" or " + \ + u"\\u0022, or put string in apostrophes." + + def getEntity(self, v): + return Entity(v, lambda s: s, (0, len(v)), (), (0, 0), (), (), + (0, len(v)), ()) + + def getDTDEntity(self, v): + v = v.replace('"', '"') + return Entity('<!ENTITY foo "%s">' % v, + lambda s: s, + (0, len(v) + 16), (), (0, 0), (), (9, 12), + (14, len(v) + 14), ()) + + def test_android_dtd(self): + """Testing the actual android checks. The logic is involved, + so this is a lot of nitty gritty detail tests. + """ + f = File("embedding/android/strings.dtd", "strings.dtd", + "embedding/android") + checker = getChecker(f) + # good string + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # dtd warning + l10n = self.getDTDEntity("plain localized string &ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('warning', (0, 0), + 'Referencing unknown entity `ref`', 'xmlparse'),)) + # no report on stray ampersand or quote, if not completely quoted + for i in xrange(3): + # make sure we're catching unescaped apostrophes, + # try 0..5 backticks + l10n = self.getDTDEntity("\\"*(2*i) + "'") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 2*i, self.apos_msg, 'android'),)) + l10n = self.getDTDEntity("\\"*(2*i + 1) + "'") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # make sure we don't report if apos string is quoted + l10n = self.getDTDEntity('"' + "\\"*(2*i) + "'\"") + tpl = tuple(checker.check(ref, l10n)) + self.assertEqual(tpl, (), + "`%s` shouldn't fail but got %s" + % (l10n.val, str(tpl))) + l10n = self.getDTDEntity('"' + "\\"*(2*i+1) + "'\"") + tpl = tuple(checker.check(ref, l10n)) + self.assertEqual(tpl, (), + "`%s` shouldn't fail but got %s" + % (l10n.val, str(tpl))) + # make sure we're catching unescaped quotes, try 0..5 backticks + l10n = self.getDTDEntity("\\"*(2*i) + "\"") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 2*i, self.quot_msg, 'android'),)) + l10n = self.getDTDEntity("\\"*(2*i + 1) + "'") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # make sure we don't report if quote string is single quoted + l10n = self.getDTDEntity("'" + "\\"*(2*i) + "\"'") + tpl = tuple(checker.check(ref, l10n)) + self.assertEqual(tpl, (), + "`%s` shouldn't fail but got %s" % + (l10n.val, str(tpl))) + l10n = self.getDTDEntity('"' + "\\"*(2*i+1) + "'\"") + tpl = tuple(checker.check(ref, l10n)) + self.assertEqual(tpl, (), + "`%s` shouldn't fail but got %s" % + (l10n.val, str(tpl))) + # check for mixed quotes and ampersands + l10n = self.getDTDEntity("'\"") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 0, self.apos_msg, 'android'), + ('error', 1, self.quot_msg, 'android'))) + l10n = self.getDTDEntity("''\"'") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 1, self.apos_msg, 'android'),)) + l10n = self.getDTDEntity('"\'""') + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 2, self.quot_msg, 'android'),)) + + # broken unicode escape + l10n = self.getDTDEntity("Some broken \u098 unicode") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 12, 'truncated \\uXXXX escape', + 'android'),)) + # broken unicode escape, try to set the error off + l10n = self.getDTDEntity(u"\u9690"*14+"\u006"+" "+"\u0064") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 14, 'truncated \\uXXXX escape', + 'android'),)) + + def test_android_prop(self): + f = File("embedding/android/strings.properties", "strings.properties", + "embedding/android") + checker = getChecker(f) + # good plain string + ref = self.getEntity("plain string") + l10n = self.getEntity("plain localized string") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # no dtd warning + ref = self.getEntity("plain string") + l10n = self.getEntity("plain localized string &ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # no report on stray ampersand + ref = self.getEntity("plain string") + l10n = self.getEntity("plain localized string with apos: '") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # report on bad printf + ref = self.getEntity("string with %s") + l10n = self.getEntity("string with %S") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('error', 0, 'argument 1 `S` should be `s`', + 'printf'),)) + + def test_non_android_dtd(self): + f = File("browser/strings.dtd", "strings.dtd", "browser") + checker = getChecker(f) + # good string + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # dtd warning + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string &ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('warning', (0, 0), + 'Referencing unknown entity `ref`', 'xmlparse'),)) + # no report on stray ampersand + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string with apos: '") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + + def test_entities_across_dtd(self): + f = File("browser/strings.dtd", "strings.dtd", "browser") + p = getParser(f.file) + p.readContents('<!ENTITY other "some &good.ref;">') + ref = p.parse() + checker = getChecker(f, reference=ref[0]) + # good string + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + # dtd warning + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string &ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + (('warning', (0, 0), + 'Referencing unknown entity `ref` (good.ref known)', + 'xmlparse'),)) + # no report on stray ampersand + ref = self.getDTDEntity("plain string") + l10n = self.getDTDEntity("plain localized string with &good.ref;") + self.assertEqual(tuple(checker.check(ref, l10n)), + ()) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/compare-locales/compare_locales/tests/test_compare.py b/python/compare-locales/compare_locales/tests/test_compare.py new file mode 100644 index 000000000..51ba7cd8c --- /dev/null +++ b/python/compare-locales/compare_locales/tests/test_compare.py @@ -0,0 +1,90 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from compare_locales import compare + + +class TestTree(unittest.TestCase): + '''Test the Tree utility class + + Tree value classes need to be in-place editable + ''' + + def test_empty_dict(self): + tree = compare.Tree(dict) + self.assertEqual(list(tree.getContent()), []) + self.assertDictEqual( + tree.toJSON(), + {} + ) + + def test_disjoint_dict(self): + tree = compare.Tree(dict) + tree['one/entry']['leaf'] = 1 + tree['two/other']['leaf'] = 2 + self.assertEqual( + list(tree.getContent()), + [ + (0, 'key', ('one', 'entry')), + (1, 'value', {'leaf': 1}), + (0, 'key', ('two', 'other')), + (1, 'value', {'leaf': 2}) + ] + ) + self.assertDictEqual( + tree.toJSON(), + { + 'children': [ + ('one/entry', + {'value': {'leaf': 1}} + ), + ('two/other', + {'value': {'leaf': 2}} + ) + ] + } + ) + self.assertMultiLineEqual( + str(tree), + '''\ +one/entry + {'leaf': 1} +two/other + {'leaf': 2}\ +''' + ) + + def test_overlapping_dict(self): + tree = compare.Tree(dict) + tree['one/entry']['leaf'] = 1 + tree['one/other']['leaf'] = 2 + self.assertEqual( + list(tree.getContent()), + [ + (0, 'key', ('one',)), + (1, 'key', ('entry',)), + (2, 'value', {'leaf': 1}), + (1, 'key', ('other',)), + (2, 'value', {'leaf': 2}) + ] + ) + self.assertDictEqual( + tree.toJSON(), + { + 'children': [ + ('one', { + 'children': [ + ('entry', + {'value': {'leaf': 1}} + ), + ('other', + {'value': {'leaf': 2}} + ) + ] + }) + ] + } + ) diff --git a/python/compare-locales/compare_locales/tests/test_dtd.py b/python/compare-locales/compare_locales/tests/test_dtd.py new file mode 100644 index 000000000..87ddcde30 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/test_dtd.py @@ -0,0 +1,86 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Tests for the DTD parser. +''' + +import unittest +import re + +from compare_locales.parser import getParser +from compare_locales.tests import ParserTestMixin + + +class TestDTD(ParserTestMixin, unittest.TestCase): + '''Tests for the DTD Parser.''' + filename = 'foo.dtd' + + def test_one_entity(self): + self._test('''<!ENTITY foo.label "stuff">''', + (('foo.label', 'stuff'),)) + + quoteContent = '''<!ENTITY good.one "one"> +<!ENTITY bad.one "bad " quote"> +<!ENTITY good.two "two"> +<!ENTITY bad.two "bad "quoted" word"> +<!ENTITY good.three "three"> +<!ENTITY good.four "good ' quote"> +<!ENTITY good.five "good 'quoted' word"> +''' + quoteRef = ( + ('good.one', 'one'), + ('_junk_\\d_25-56$', '<!ENTITY bad.one "bad " quote">'), + ('good.two', 'two'), + ('_junk_\\d_82-119$', '<!ENTITY bad.two "bad "quoted" word">'), + ('good.three', 'three'), + ('good.four', 'good \' quote'), + ('good.five', 'good \'quoted\' word'),) + + def test_quotes(self): + self._test(self.quoteContent, self.quoteRef) + + def test_apos(self): + qr = re.compile('[\'"]', re.M) + + def quot2apos(s): + return qr.sub(lambda m: m.group(0) == '"' and "'" or '"', s) + + self._test(quot2apos(self.quoteContent), + map(lambda t: (t[0], quot2apos(t[1])), self.quoteRef)) + + def test_parsed_ref(self): + self._test('''<!ENTITY % fooDTD SYSTEM "chrome://brand.dtd"> + %fooDTD; +''', + (('fooDTD', '"chrome://brand.dtd"'),)) + + def test_trailing_comment(self): + self._test('''<!ENTITY first "string"> +<!ENTITY second "string"> +<!-- +<!ENTITY commented "out"> +--> +''', + (('first', 'string'), ('second', 'string'))) + + def test_license_header(self): + p = getParser('foo.dtd') + p.readContents(self.resource('triple-license.dtd')) + for e in p: + self.assertEqual(e.key, 'foo') + self.assertEqual(e.val, 'value') + self.assert_('MPL' in p.header) + p.readContents('''\ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this file, + - You can obtain one at http://mozilla.org/MPL/2.0/. --> +<!ENTITY foo "value"> +''') + for e in p: + self.assertEqual(e.key, 'foo') + self.assertEqual(e.val, 'value') + self.assert_('MPL' in p.header) + +if __name__ == '__main__': + unittest.main() diff --git a/python/compare-locales/compare_locales/tests/test_ini.py b/python/compare-locales/compare_locales/tests/test_ini.py new file mode 100644 index 000000000..4c8cc03e1 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/test_ini.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from compare_locales.tests import ParserTestMixin + + +mpl2 = '''\ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this file, +; You can obtain one at http://mozilla.org/MPL/2.0/. +''' + + +class TestIniParser(ParserTestMixin, unittest.TestCase): + + filename = 'foo.ini' + + def testSimpleHeader(self): + self._test('''; This file is in the UTF-8 encoding +[Strings] +TitleText=Some Title +''', (('TitleText', 'Some Title'),)) + self.assert_('UTF-8' in self.parser.header) + + def testMPL2_Space_UTF(self): + self._test(mpl2 + ''' +; This file is in the UTF-8 encoding +[Strings] +TitleText=Some Title +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) + + def testMPL2_Space(self): + self._test(mpl2 + ''' +[Strings] +TitleText=Some Title +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) + + def testMPL2_MultiSpace(self): + self._test(mpl2 + '''\ + +; more comments + +[Strings] +TitleText=Some Title +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) + + def testMPL2_JunkBeforeCategory(self): + self._test(mpl2 + '''\ +Junk +[Strings] +TitleText=Some Title +''', (('_junk_\\d+_0-213$', mpl2 + '''\ +Junk +[Strings]'''), ('TitleText', 'Some Title'))) + self.assert_('MPL' not in self.parser.header) + + def test_TrailingComment(self): + self._test(mpl2 + ''' +[Strings] +TitleText=Some Title +;Stray trailing comment +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) + + def test_SpacedTrailingComments(self): + self._test(mpl2 + ''' +[Strings] +TitleText=Some Title + +;Stray trailing comment +;Second stray comment + +''', (('TitleText', 'Some Title'),)) + self.assert_('MPL' in self.parser.header) + + def test_TrailingCommentsAndJunk(self): + self._test(mpl2 + ''' +[Strings] +TitleText=Some Title + +;Stray trailing comment +Junk +;Second stray comment + +''', (('TitleText', 'Some Title'), ('_junk_\\d+_231-284$', '''\ + +;Stray trailing comment +Junk +;Second stray comment + +'''))) + self.assert_('MPL' in self.parser.header) + + def test_JunkInbetweenEntries(self): + self._test(mpl2 + ''' +[Strings] +TitleText=Some Title + +Junk + +Good=other string +''', (('TitleText', 'Some Title'), ('_junk_\\d+_231-236$', '''\ + +Junk'''), ('Good', 'other string'))) + self.assert_('MPL' in self.parser.header) + +if __name__ == '__main__': + unittest.main() diff --git a/python/compare-locales/compare_locales/tests/test_merge.py b/python/compare-locales/compare_locales/tests/test_merge.py new file mode 100644 index 000000000..c006edbb5 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/test_merge.py @@ -0,0 +1,265 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest +import os +from tempfile import mkdtemp +import shutil + +from compare_locales.parser import getParser +from compare_locales.paths import File +from compare_locales.compare import ContentComparer + + +class ContentMixin(object): + maxDiff = None # we got big dictionaries to compare + extension = None # OVERLOAD + + def reference(self, content): + self.ref = os.path.join(self.tmp, "en-reference" + self.extension) + open(self.ref, "w").write(content) + + def localized(self, content): + self.l10n = os.path.join(self.tmp, "l10n" + self.extension) + open(self.l10n, "w").write(content) + + +class TestProperties(unittest.TestCase, ContentMixin): + extension = '.properties' + + def setUp(self): + self.tmp = mkdtemp() + os.mkdir(os.path.join(self.tmp, "merge")) + + def tearDown(self): + shutil.rmtree(self.tmp) + del self.tmp + + def testGood(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +bar = barVal +eff = effVal""") + self.localized("""foo = lFoo +bar = lBar +eff = lEff +""") + cc = ContentComparer() + cc.set_merge_stage(os.path.join(self.tmp, "merge")) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", "")) + self.assertDictEqual( + cc.observer.toJSON(), + {'summary': + {None: { + 'changed': 3 + }}, + 'details': {} + } + ) + self.assert_(not os.path.exists(os.path.join(cc.merge_stage, + 'l10n.properties'))) + + def testMissing(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +bar = barVal +eff = effVal""") + self.localized("""bar = lBar +""") + cc = ContentComparer() + cc.set_merge_stage(os.path.join(self.tmp, "merge")) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", "")) + self.assertDictEqual( + cc.observer.toJSON(), + {'summary': + {None: { + 'changed': 1, 'missing': 2 + }}, + 'details': { + 'children': [ + ('l10n.properties', + {'value': {'missingEntity': [u'eff', u'foo']}} + ) + ]} + } + ) + mergefile = os.path.join(self.tmp, "merge", "l10n.properties") + self.assertTrue(os.path.isfile(mergefile)) + p = getParser(mergefile) + p.readFile(mergefile) + [m, n] = p.parse() + self.assertEqual(map(lambda e: e.key, m), ["bar", "eff", "foo"]) + + def testError(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +bar = %d barVal +eff = effVal""") + self.localized("""bar = %S lBar +eff = leffVal +""") + cc = ContentComparer() + cc.set_merge_stage(os.path.join(self.tmp, "merge")) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", "")) + self.assertDictEqual( + cc.observer.toJSON(), + {'summary': + {None: { + 'changed': 2, 'errors': 1, 'missing': 1 + }}, + 'details': { + 'children': [ + ('l10n.properties', + {'value': { + 'error': [u'argument 1 `S` should be `d` ' + u'at line 1, column 6 for bar'], + 'missingEntity': [u'foo']}} + ) + ]} + } + ) + mergefile = os.path.join(self.tmp, "merge", "l10n.properties") + self.assertTrue(os.path.isfile(mergefile)) + p = getParser(mergefile) + p.readFile(mergefile) + [m, n] = p.parse() + self.assertEqual([e.key for e in m], ["eff", "foo", "bar"]) + self.assertEqual(m[n['bar']].val, '%d barVal') + + def testObsolete(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""foo = fooVal +eff = effVal""") + self.localized("""foo = fooVal +other = obsolete +eff = leffVal +""") + cc = ContentComparer() + cc.set_merge_stage(os.path.join(self.tmp, "merge")) + cc.compare(File(self.ref, "en-reference.properties", ""), + File(self.l10n, "l10n.properties", "")) + self.assertDictEqual( + cc.observer.toJSON(), + {'summary': + {None: { + 'changed': 1, 'obsolete': 1, 'unchanged': 1 + }}, + 'details': { + 'children': [ + ('l10n.properties', + {'value': {'obsoleteEntity': [u'other']}})]}, + } + ) + + +class TestDTD(unittest.TestCase, ContentMixin): + extension = '.dtd' + + def setUp(self): + self.tmp = mkdtemp() + os.mkdir(os.path.join(self.tmp, "merge")) + + def tearDown(self): + shutil.rmtree(self.tmp) + del self.tmp + + def testGood(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""<!ENTITY foo 'fooVal'> +<!ENTITY bar 'barVal'> +<!ENTITY eff 'effVal'>""") + self.localized("""<!ENTITY foo 'lFoo'> +<!ENTITY bar 'lBar'> +<!ENTITY eff 'lEff'> +""") + cc = ContentComparer() + cc.set_merge_stage(os.path.join(self.tmp, "merge")) + cc.compare(File(self.ref, "en-reference.dtd", ""), + File(self.l10n, "l10n.dtd", "")) + self.assertDictEqual( + cc.observer.toJSON(), + {'summary': + {None: { + 'changed': 3 + }}, + 'details': {} + } + ) + self.assert_( + not os.path.exists(os.path.join(cc.merge_stage, 'l10n.dtd'))) + + def testMissing(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""<!ENTITY foo 'fooVal'> +<!ENTITY bar 'barVal'> +<!ENTITY eff 'effVal'>""") + self.localized("""<!ENTITY bar 'lBar'> +""") + cc = ContentComparer() + cc.set_merge_stage(os.path.join(self.tmp, "merge")) + cc.compare(File(self.ref, "en-reference.dtd", ""), + File(self.l10n, "l10n.dtd", "")) + self.assertDictEqual( + cc.observer.toJSON(), + {'summary': + {None: { + 'changed': 1, 'missing': 2 + }}, + 'details': { + 'children': [ + ('l10n.dtd', + {'value': {'missingEntity': [u'eff', u'foo']}} + ) + ]} + } + ) + mergefile = os.path.join(self.tmp, "merge", "l10n.dtd") + self.assertTrue(os.path.isfile(mergefile)) + p = getParser(mergefile) + p.readFile(mergefile) + [m, n] = p.parse() + self.assertEqual(map(lambda e: e.key, m), ["bar", "eff", "foo"]) + + def testJunk(self): + self.assertTrue(os.path.isdir(self.tmp)) + self.reference("""<!ENTITY foo 'fooVal'> +<!ENTITY bar 'barVal'> +<!ENTITY eff 'effVal'>""") + self.localized("""<!ENTITY foo 'fooVal'> +<!ENTY bar 'gimmick'> +<!ENTITY eff 'effVal'> +""") + cc = ContentComparer() + cc.set_merge_stage(os.path.join(self.tmp, "merge")) + cc.compare(File(self.ref, "en-reference.dtd", ""), + File(self.l10n, "l10n.dtd", "")) + self.assertDictEqual( + cc.observer.toJSON(), + {'summary': + {None: { + 'errors': 1, 'missing': 1, 'unchanged': 2 + }}, + 'details': { + 'children': [ + ('l10n.dtd', + {'value': { + 'error': [u'Unparsed content "<!ENTY bar ' + u'\'gimmick\'>" at 23-44'], + 'missingEntity': [u'bar']}} + ) + ]} + } + ) + mergefile = os.path.join(self.tmp, "merge", "l10n.dtd") + self.assertTrue(os.path.isfile(mergefile)) + p = getParser(mergefile) + p.readFile(mergefile) + [m, n] = p.parse() + self.assertEqual(map(lambda e: e.key, m), ["foo", "eff", "bar"]) + +if __name__ == '__main__': + unittest.main() diff --git a/python/compare-locales/compare_locales/tests/test_properties.py b/python/compare-locales/compare_locales/tests/test_properties.py new file mode 100644 index 000000000..331a1a57c --- /dev/null +++ b/python/compare-locales/compare_locales/tests/test_properties.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from compare_locales.tests import ParserTestMixin + + +class TestPropertiesParser(ParserTestMixin, unittest.TestCase): + + filename = 'foo.properties' + + def testBackslashes(self): + self._test(r'''one_line = This is one line +two_line = This is the first \ +of two lines +one_line_trailing = This line ends in \\ +and has junk +two_lines_triple = This line is one of two and ends in \\\ +and still has another line coming +''', ( + ('one_line', 'This is one line'), + ('two_line', u'This is the first of two lines'), + ('one_line_trailing', u'This line ends in \\'), + ('_junk_\\d+_113-126$', 'and has junk\n'), + ('two_lines_triple', 'This line is one of two and ends in \\' + 'and still has another line coming'))) + + def testProperties(self): + # port of netwerk/test/PropertiesTest.cpp + self.parser.readContents(self.resource('test.properties')) + ref = ['1', '2', '3', '4', '5', '6', '7', '8', + 'this is the first part of a continued line ' + 'and here is the 2nd part'] + i = iter(self.parser) + for r, e in zip(ref, i): + self.assertEqual(e.val, r) + + def test_bug121341(self): + # port of xpcom/tests/unit/test_bug121341.js + self.parser.readContents(self.resource('bug121341.properties')) + ref = ['abc', 'xy', u"\u1234\t\r\n\u00AB\u0001\n", + "this is multiline property", + "this is another multiline property", u"test\u0036", + "yet another multiline propery", u"\ttest5\u0020", " test6\t", + u"c\uCDEFd", u"\uABCD"] + i = iter(self.parser) + for r, e in zip(ref, i): + self.assertEqual(e.val, r) + + def test_comment_in_multi(self): + self._test(r'''bar=one line with a \ +# part that looks like a comment \ +and an end''', (('bar', 'one line with a # part that looks like a comment ' + 'and an end'),)) + + def test_license_header(self): + self._test('''\ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +foo=value +''', (('foo', 'value'),)) + self.assert_('MPL' in self.parser.header) + + def test_escapes(self): + self.parser.readContents(r''' +# unicode escapes +zero = some \unicode +one = \u0 +two = \u41 +three = \u042 +four = \u0043 +five = \u0044a +six = \a +seven = \n\r\t\\ +''') + ref = ['some unicode', chr(0), 'A', 'B', 'C', 'Da', 'a', '\n\r\t\\'] + for r, e in zip(ref, self.parser): + self.assertEqual(e.val, r) + + def test_trailing_comment(self): + self._test('''first = string +second = string + +# +#commented out +''', (('first', 'string'), ('second', 'string'))) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/compare-locales/compare_locales/tests/test_util.py b/python/compare-locales/compare_locales/tests/test_util.py new file mode 100644 index 000000000..fd2d2c92b --- /dev/null +++ b/python/compare-locales/compare_locales/tests/test_util.py @@ -0,0 +1,29 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from compare_locales import util + + +class ParseLocalesTest(unittest.TestCase): + def test_empty(self): + self.assertEquals(util.parseLocales(''), []) + + def test_all(self): + self.assertEquals(util.parseLocales('''af +de'''), ['af', 'de']) + + def test_shipped(self): + self.assertEquals(util.parseLocales('''af +ja win mac +de'''), ['af', 'de', 'ja']) + + def test_sparse(self): + self.assertEquals(util.parseLocales(''' +af + +de + +'''), ['af', 'de']) diff --git a/python/compare-locales/compare_locales/tests/test_webapps.py b/python/compare-locales/compare_locales/tests/test_webapps.py new file mode 100644 index 000000000..2f1223649 --- /dev/null +++ b/python/compare-locales/compare_locales/tests/test_webapps.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import unittest + +from compare_locales import webapps + + +class TestFileComparison(unittest.TestCase): + + def mock_FileComparison(self, mock_listdir): + class Target(webapps.FileComparison): + def _listdir(self): + return mock_listdir() + return Target('.', 'en-US') + + def test_just_reference(self): + def _listdir(): + return ['my_app.en-US.properties'] + filecomp = self.mock_FileComparison(_listdir) + filecomp.files() + self.assertEqual(filecomp.locales(), []) + self.assertEqual(filecomp._reference.keys(), ['my_app']) + file_ = filecomp._reference['my_app'] + self.assertEqual(file_.file, 'locales/my_app.en-US.properties') + + def test_just_locales(self): + def _listdir(): + return ['my_app.ar.properties', + 'my_app.sr-Latn.properties', + 'my_app.sv-SE.properties', + 'my_app.po_SI.properties'] + filecomp = self.mock_FileComparison(_listdir) + filecomp.files() + self.assertEqual(filecomp.locales(), + ['ar', 'sr-Latn', 'sv-SE']) + self.assertEqual(filecomp._files['ar'].keys(), ['my_app']) + file_ = filecomp._files['ar']['my_app'] + self.assertEqual(file_.file, 'locales/my_app.ar.properties') diff --git a/python/compare-locales/compare_locales/util.py b/python/compare-locales/compare_locales/util.py new file mode 100644 index 000000000..71eadd874 --- /dev/null +++ b/python/compare-locales/compare_locales/util.py @@ -0,0 +1,11 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This file is shared between compare-locales and locale-inspector +# test_util is in compare-locales only, for the sake of easy +# development. + + +def parseLocales(content): + return sorted(l.split()[0] for l in content.splitlines() if l) diff --git a/python/compare-locales/compare_locales/webapps.py b/python/compare-locales/compare_locales/webapps.py new file mode 100644 index 000000000..42f5b5657 --- /dev/null +++ b/python/compare-locales/compare_locales/webapps.py @@ -0,0 +1,235 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''gaia-style web apps support + +This variant supports manifest.webapp localization as well as +.properties files with a naming scheme of locales/foo.*.properties. +''' + +from collections import defaultdict +import json +import os +import os.path +import re + +from compare_locales.paths import File, EnumerateDir +from compare_locales.compare import AddRemove, ContentComparer + + +class WebAppCompare(object): + '''For a given directory, analyze + /manifest.webapp + /locales/*.*.properties + + Deduce the present locale codes. + ''' + ignore_dirs = EnumerateDir.ignore_dirs + reference_locale = 'en-US' + + def __init__(self, basedir): + '''Constructor + :param basedir: Directory of the web app to inspect + ''' + self.basedir = basedir + self.manifest = Manifest(basedir, self.reference_locale) + self.files = FileComparison(basedir, self.reference_locale) + self.watcher = None + + def compare(self, locales): + '''Compare the manifest.webapp and the locales/*.*.properties + ''' + if not locales: + locales = self.locales() + self.manifest.compare(locales) + self.files.compare(locales) + + def setWatcher(self, watcher): + self.watcher = watcher + self.manifest.watcher = watcher + self.files.watcher = watcher + + def locales(self): + '''Inspect files on disk to find present languages. + :rtype: List of locales, sorted, including reference. + ''' + locales = set(self.manifest.strings.keys()) + locales.update(self.files.locales()) + locales = list(sorted(locales)) + return locales + + +class Manifest(object): + '''Class that helps with parsing and inspection of manifest.webapp. + ''' + + def __init__(self, basedir, reference_locale): + self.file = File(os.path.join(basedir, 'manifest.webapp'), + 'manifest.webapp') + self.reference_locale = reference_locale + self._strings = None + self.watcher = None + + @property + def strings(self): + if self._strings is None: + self._strings = self.load_and_parse() + return self._strings + + def load_and_parse(self): + try: + manifest = json.load(open(self.file.fullpath)) + except (ValueError, IOError), e: + if self.watcher: + self.watcher.notify('error', self.file, str(e)) + return False + return self.extract_manifest_strings(manifest) + + def extract_manifest_strings(self, manifest_fragment): + '''Extract localizable strings from a manifest dict. + This method is recursive, and returns a two-level dict, + first level being locale codes, second level being generated + key and localized value. Keys are generated by concatenating + each level in the json with a ".". + ''' + rv = defaultdict(dict) + localizable = manifest_fragment.pop('locales', {}) + if localizable: + for locale, keyvalue in localizable.iteritems(): + for key, value in keyvalue.iteritems(): + key = '.'.join(['locales', 'AB_CD', key]) + rv[locale][key] = value + for key, sub_manifest in manifest_fragment.iteritems(): + if not isinstance(sub_manifest, dict): + continue + subdict = self.extract_manifest_strings(sub_manifest) + if subdict: + for locale, keyvalue in subdict: + rv[locale].update((key + '.' + subkey, value) + for subkey, value + in keyvalue.iteritems()) + return rv + + def compare(self, locales): + strings = self.strings + if not strings: + return + # create a copy so that we can mock around with it + strings = strings.copy() + reference = strings.pop(self.reference_locale) + for locale in locales: + if locale == self.reference_locale: + continue + self.compare_strings(reference, + strings.get(locale, {}), + locale) + + def compare_strings(self, reference, l10n, locale): + add_remove = AddRemove() + add_remove.set_left(sorted(reference.keys())) + add_remove.set_right(sorted(l10n.keys())) + missing = obsolete = changed = unchanged = 0 + for op, item_or_pair in add_remove: + if op == 'equal': + if reference[item_or_pair[0]] == l10n[item_or_pair[1]]: + unchanged += 1 + else: + changed += 1 + else: + key = item_or_pair.replace('.AB_CD.', + '.%s.' % locale) + if op == 'add': + # obsolete entry + obsolete += 1 + self.watcher.notify('obsoleteEntity', self.file, key) + else: + # missing entry + missing += 1 + self.watcher.notify('missingEntity', self.file, key) + + +class FileComparison(object): + '''Compare the locales/*.*.properties files inside a webapp. + ''' + prop = re.compile('(?P<base>.*)\\.' + '(?P<locale>[a-zA-Z]+(?:-[a-zA-Z]+)*)' + '\\.properties$') + + def __init__(self, basedir, reference_locale): + self.basedir = basedir + self.reference_locale = reference_locale + self.watcher = None + self._reference = self._files = None + + def locales(self): + '''Get the locales present in the webapp + ''' + self.files() + locales = self._files.keys() + locales.sort() + return locales + + def compare(self, locales): + self.files() + for locale in locales: + l10n = self._files[locale] + filecmp = AddRemove() + filecmp.set_left(sorted(self._reference.keys())) + filecmp.set_right(sorted(l10n.keys())) + for op, item_or_pair in filecmp: + if op == 'equal': + self.watcher.compare(self._reference[item_or_pair[0]], + l10n[item_or_pair[1]]) + elif op == 'add': + # obsolete file + self.watcher.remove(l10n[item_or_pair]) + else: + # missing file + _path = '.'.join([item_or_pair, locale, 'properties']) + missingFile = File( + os.path.join(self.basedir, 'locales', _path), + 'locales/' + _path) + self.watcher.add(self._reference[item_or_pair], + missingFile) + + def files(self): + '''Read the list of locales from disk. + ''' + if self._reference: + return + self._reference = {} + self._files = defaultdict(dict) + path_list = self._listdir() + for path in path_list: + match = self.prop.match(path) + if match is None: + continue + locale = match.group('locale') + if locale == self.reference_locale: + target = self._reference + else: + target = self._files[locale] + fullpath = os.path.join(self.basedir, 'locales', path) + target[match.group('base')] = File(fullpath, 'locales/' + path) + + def _listdir(self): + 'Monkey-patch this for testing.' + return os.listdir(os.path.join(self.basedir, 'locales')) + + +def compare_web_app(basedir, locales, other_observer=None): + '''Compare gaia-style web app. + + Optional arguments are: + - other_observer. A object implementing + notify(category, _file, data) + The return values of that callback are ignored. + ''' + comparer = ContentComparer() + if other_observer is not None: + comparer.add_observer(other_observer) + webapp_comp = WebAppCompare(basedir) + webapp_comp.setWatcher(comparer) + webapp_comp.compare(locales) + return comparer.observer |