summaryrefslogtreecommitdiffstats
path: root/toolkit/mozapps/installer/find-dupes.py
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/mozapps/installer/find-dupes.py')
-rw-r--r--toolkit/mozapps/installer/find-dupes.py115
1 files changed, 14 insertions, 101 deletions
diff --git a/toolkit/mozapps/installer/find-dupes.py b/toolkit/mozapps/installer/find-dupes.py
index bd0561c97..34ef675f4 100644
--- a/toolkit/mozapps/installer/find-dupes.py
+++ b/toolkit/mozapps/installer/find-dupes.py
@@ -4,15 +4,8 @@
import sys
import hashlib
-import re
-from mozbuild.preprocessor import Preprocessor
-from mozbuild.util import DefinesAction
from mozpack.packager.unpack import UnpackFinder
-from mozpack.files import DeflatedFile
from collections import OrderedDict
-from StringIO import StringIO
-import argparse
-import buildconfig
'''
Find files duplicated in a given packaged directory, independently of its
@@ -20,116 +13,36 @@ package format.
'''
-def normalize_osx_path(p):
- '''
- Strips the first 3 elements of an OSX app path
-
- >>> normalize_osx_path('Nightly.app/foo/bar/baz')
- 'baz'
- '''
- bits = p.split('/')
- if len(bits) > 3 and bits[0].endswith('.app'):
- return '/'.join(bits[3:])
- return p
-
-
-def normalize_l10n_path(p):
- '''
- Normalizes localized paths to en-US
-
- >>> normalize_l10n_path('chrome/es-ES/locale/branding/brand.properties')
- 'chrome/en-US/locale/branding/brand.properties'
- >>> normalize_l10n_path('chrome/fr/locale/fr/browser/aboutHome.dtd')
- 'chrome/en-US/locale/en-US/browser/aboutHome.dtd'
- '''
- # Keep a trailing slash here! e.g. locales like 'br' can transform
- # 'chrome/br/locale/branding/' into 'chrome/en-US/locale/en-USanding/'
- p = re.sub(r'chrome/(\S+)/locale/\1/',
- 'chrome/en-US/locale/en-US/',
- p)
- p = re.sub(r'chrome/(\S+)/locale/',
- 'chrome/en-US/locale/',
- p)
- return p
-
-
-def normalize_path(p):
- return normalize_osx_path(normalize_l10n_path(p))
-
-
-def find_dupes(source, allowed_dupes, bail=True):
- allowed_dupes = set(allowed_dupes)
+def find_dupes(source):
md5s = OrderedDict()
for p, f in UnpackFinder(source):
content = f.open().read()
m = hashlib.md5(content).digest()
- if m not in md5s:
- if isinstance(f, DeflatedFile):
- compressed = f.file.compressed_size
- else:
- compressed = len(content)
- md5s[m] = (len(content), compressed, [])
- md5s[m][2].append(p)
+ if not m in md5s:
+ md5s[m] = (len(content), [])
+ md5s[m][1].append(p)
total = 0
- total_compressed = 0
num_dupes = 0
- unexpected_dupes = []
- for m, (size, compressed, paths) in sorted(md5s.iteritems(),
- key=lambda x: x[1][1]):
+ for m, (size, paths) in md5s.iteritems():
if len(paths) > 1:
- print 'Duplicates %d bytes%s%s:' % (size,
- ' (%d compressed)' % compressed if compressed != size else '',
+ print 'Duplicates %d bytes%s:' % (size,
' (%d times)' % (len(paths) - 1) if len(paths) > 2 else '')
print ''.join(' %s\n' % p for p in paths)
total += (len(paths) - 1) * size
- total_compressed += (len(paths) - 1) * compressed
num_dupes += 1
-
- unexpected_dupes.extend([p for p in paths if normalize_path(p) not in allowed_dupes])
-
if num_dupes:
- print "WARNING: Found %d duplicated files taking %d bytes (%s)" % \
- (num_dupes, total,
- '%d compressed' % total_compressed if total_compressed != total
- else 'uncompressed')
-
- if unexpected_dupes:
- errortype = "ERROR" if bail else "WARNING"
- print "%s: The following duplicated files are not allowed:" % errortype
- print "\n".join(unexpected_dupes)
- if bail:
- sys.exit(1)
+ print "WARNING: Found %d duplicated files taking %d bytes" % \
+ (num_dupes, total) + " (uncompressed)"
def main():
- parser = argparse.ArgumentParser(description='Find duplicate files in directory.')
- parser.add_argument('--warning', '-w', action='store_true',
- help='Only warn about duplicates, do not exit with an error')
- parser.add_argument('--file', '-f', action='append', dest='dupes_files', default=[],
- help='Add exceptions to the duplicate list from this file')
- parser.add_argument('-D', action=DefinesAction)
- parser.add_argument('-U', action='append', default=[])
- parser.add_argument('directory',
- help='The directory to check for duplicates in')
-
- args = parser.parse_args()
-
- allowed_dupes = []
- for filename in args.dupes_files:
- pp = Preprocessor()
- pp.context.update(buildconfig.defines)
- if args.D:
- pp.context.update(args.D)
- for undefine in args.U:
- if undefine in pp.context:
- del pp.context[undefine]
- pp.out = StringIO()
- pp.do_filter('substitution')
- pp.do_include(filename)
- allowed_dupes.extend([line.partition('#')[0].rstrip()
- for line in pp.out.getvalue().splitlines()])
+ if len(sys.argv) != 2:
+ import os
+ print >>sys.stderr, "Usage: %s directory" % \
+ os.path.basename(sys.argv[0])
+ sys.exit(1)
- find_dupes(args.directory, bail=not args.warning, allowed_dupes=allowed_dupes)
+ find_dupes(sys.argv[1])
if __name__ == "__main__":
main()