1 files changed, 1106 insertions, 0 deletions
diff --git a/python/mozbuild/mozpack/files.py b/python/mozbuild/mozpack/files.py
new file mode 100644
index 000000000..64902e195
--- /dev/null
+++ b/python/mozbuild/mozpack/files.py
@@ -0,0 +1,1106 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import
+
+import errno
+import os
+import platform
+import shutil
+import stat
+import subprocess
+import uuid
+import mozbuild.makeutil as makeutil
+from mozbuild.preprocessor import Preprocessor
+from mozbuild.util import FileAvoidWrite
+from mozpack.executables import (
+    is_executable,
+    may_strip,
+    strip,
+    may_elfhack,
+    elfhack,
+)
+from mozpack.chrome.manifest import ManifestEntry
+from io import BytesIO
+from mozpack.errors import (
+    ErrorMessage,
+    errors,
+)
+from mozpack.mozjar import JarReader
+import mozpack.path as mozpath
+from collections import OrderedDict
+from jsmin import JavascriptMinify
+from tempfile import (
+    mkstemp,
+    NamedTemporaryFile,
+)
+from tarfile import (
+    TarFile,
+    TarInfo,
+)
+try:
+    import hglib
+except ImportError:
+    hglib = None
+
+
+# For clean builds, copying files on win32 using CopyFile through ctypes is
+# ~2x as fast as using shutil.copyfile.
+if platform.system() != 'Windows':
+    _copyfile = shutil.copyfile
+else:
+    import ctypes
+    _kernel32 = ctypes.windll.kernel32
+    _CopyFileA = _kernel32.CopyFileA
+    _CopyFileW = _kernel32.CopyFileW
+
+    def _copyfile(src, dest):
+        # False indicates `dest` should be overwritten if it exists already.
+        if isinstance(src, unicode) and isinstance(dest, unicode):
+            _CopyFileW(src, dest, False)
+        elif isinstance(src, str) and isinstance(dest, str):
+            _CopyFileA(src, dest, False)
+        else:
+            raise TypeError('mismatched path types!')
+
+class Dest(object):
+    '''
+    Helper interface for BaseFile.copy. The interface works as follows:
+    - read() and write() can be used to sequentially read/write from the
+      underlying file.
+    - a call to read() after a write() will re-open the underlying file and
+      read from it.
+    - a call to write() after a read() will re-open the underlying file,
+      emptying it, and write to it.
+    '''
+    def __init__(self, path):
+        self.path = path
+        self.mode = None
+
+    @property
+    def name(self):
+        return self.path
+
+    def read(self, length=-1):
+        if self.mode != 'r':
+            self.file = open(self.path, 'rb')
+            self.mode = 'r'
+        return self.file.read(length)
+
+    def write(self, data):
+        if self.mode != 'w':
+            self.file = open(self.path, 'wb')
+            self.mode = 'w'
+        return self.file.write(data)
+
+    def exists(self):
+        return os.path.exists(self.path)
+
+    def close(self):
+        if self.mode:
+            self.mode = None
+            self.file.close()
+
+
+class BaseFile(object):
+    '''
+    Base interface and helper for file copying. Derived class may implement
+    their own copy function, or rely on BaseFile.copy using the open() member
+    function and/or the path property.
+    '''
+    @staticmethod
+    def is_older(first, second):
+        '''
+        Compares the modification time of two files, and returns whether the
+        ``first`` file is older than the ``second`` file.
+        '''
+        # os.path.getmtime returns a result in seconds with precision up to
+        # the microsecond. But microsecond is too precise because
+        # shutil.copystat only copies milliseconds, and seconds is not
+        # enough precision.
+        return int(os.path.getmtime(first) * 1000) \
+                <= int(os.path.getmtime(second) * 1000)
+
+    @staticmethod
+    def any_newer(dest, inputs):
+        '''
+        Compares the modification time of ``dest`` to multiple input files, and
+        returns whether any of the ``inputs`` is newer (has a later mtime) than
+        ``dest``.
+        '''
+        # os.path.getmtime returns a result in seconds with precision up to
+        # the microsecond. But microsecond is too precise because
+        # shutil.copystat only copies milliseconds, and seconds is not
+        # enough precision.
+        dest_mtime = int(os.path.getmtime(dest) * 1000)
+        for input in inputs:
+            if dest_mtime < int(os.path.getmtime(input) * 1000):
+                return True
+        return False
+
+    @staticmethod
+    def normalize_mode(mode):
+        # Normalize file mode:
+        # - keep file type (e.g. S_IFREG)
+        ret = stat.S_IFMT(mode)
+        # - expand user read and execute permissions to everyone
+        if mode & 0400:
+            ret |= 0444
+        if mode & 0100:
+            ret |= 0111
+        # - keep user write permissions
+        if mode & 0200:
+            ret |= 0200
+        # - leave away sticky bit, setuid, setgid
+        return ret
+
+    def copy(self, dest, skip_if_older=True):
+        '''
+        Copy the BaseFile content to the destination given as a string or a
+        Dest instance. Avoids replacing existing files if the BaseFile content
+        matches that of the destination, or in case of plain files, if the
+        destination is newer than the original file. This latter behaviour is
+        disabled when skip_if_older is False.
+        Returns whether a copy was actually performed (True) or not (False).
+        '''
+        if isinstance(dest, basestring):
+            dest = Dest(dest)
+        else:
+            assert isinstance(dest, Dest)
+
+        can_skip_content_check = False
+        if not dest.exists():
+            can_skip_content_check = True
+        elif getattr(self, 'path', None) and getattr(dest, 'path', None):
+            if skip_if_older and BaseFile.is_older(self.path, dest.path):
+                return False
+            elif os.path.getsize(self.path) != os.path.getsize(dest.path):
+                can_skip_content_check = True
+
+        if can_skip_content_check:
+            if getattr(self, 'path', None) and getattr(dest, 'path', None):
+                _copyfile(self.path, dest.path)
+                shutil.copystat(self.path, dest.path)
+            else:
+                # Ensure the file is always created
+                if not dest.exists():
+                    dest.write('')
+                shutil.copyfileobj(self.open(), dest)
+            return True
+
+        src = self.open()
+        copy_content = ''
+        while True:
+            dest_content = dest.read(32768)
+            src_content = src.read(32768)
+            copy_content += src_content
+            if len(dest_content) == len(src_content) == 0:
+                break
+            # If the read content differs between origin and destination,
+            # write what was read up to now, and copy the remainder.
+            if dest_content != src_content:
+                dest.write(copy_content)
+                shutil.copyfileobj(src, dest)
+                break
+        if hasattr(self, 'path') and hasattr(dest, 'path'):
+            shutil.copystat(self.path, dest.path)
+        return True
+
+    def open(self):
+        '''
+        Return a file-like object allowing to read() the content of the
+        associated file. This is meant to be overloaded in subclasses to return
+        a custom file-like object.
+        '''
+        assert self.path is not None
+        return open(self.path, 'rb')
+
+    def read(self):
+        raise NotImplementedError('BaseFile.read() not implemented. Bug 1170329.')
+
+    @property
+    def mode(self):
+        '''
+        Return the file's unix mode, or None if it has no meaning.
+        '''
+        return None
+
+
+class File(BaseFile):
+    '''
+    File class for plain files.
+    '''
+    def __init__(self, path):
+        self.path = path
+
+    @property
+    def mode(self):
+        '''
+        Return the file's unix mode, as returned by os.stat().st_mode.
+        '''
+        if platform.system() == 'Windows':
+            return None
+        assert self.path is not None
+        mode = os.stat(self.path).st_mode
+        return self.normalize_mode(mode)
+
+    def read(self):
+        '''Return the contents of the file.'''
+        with open(self.path, 'rb') as fh:
+            return fh.read()
+
+
+class ExecutableFile(File):
+    '''
+    File class for executable and library files on OS/2, OS/X and ELF systems.
+    (see mozpack.executables.is_executable documentation).
+    '''
+    def copy(self, dest, skip_if_older=True):
+        real_dest = dest
+        if not isinstance(dest, basestring):
+            fd, dest = mkstemp()
+            os.close(fd)
+            os.remove(dest)
+        assert isinstance(dest, basestring)
+        # If File.copy didn't actually copy because dest is newer, check the
+        # file sizes. If dest is smaller, it means it is already stripped and
+        # elfhacked, so we can skip.
+        if not File.copy(self, dest, skip_if_older) and \
+                os.path.getsize(self.path) > os.path.getsize(dest):
+            return False
+        try:
+            if may_strip(dest):
+                strip(dest)
+            if may_elfhack(dest):
+                elfhack(dest)
+        except ErrorMessage:
+            os.remove(dest)
+            raise
+
+        if real_dest != dest:
+            f = File(dest)
+            ret = f.copy(real_dest, skip_if_older)
+            os.remove(dest)
+            return ret
+        return True
+
+
+class AbsoluteSymlinkFile(File):
+    '''File class that is copied by symlinking (if available).
+
+    This class only works if the target path is absolute.
+    '''
+
+    def __init__(self, path):
+        if not os.path.isabs(path):
+            raise ValueError('Symlink target not absolute: %s' % path)
+
+        File.__init__(self, path)
+
+    def copy(self, dest, skip_if_older=True):
+        assert isinstance(dest, basestring)
+
+        # The logic in this function is complicated by the fact that symlinks
+        # aren't universally supported. So, where symlinks aren't supported, we
+        # fall back to file copying. Keep in mind that symlink support is
+        # per-filesystem, not per-OS.
+
+        # Handle the simple case where symlinks are definitely not supported by
+        # falling back to file copy.
+        if not hasattr(os, 'symlink'):
+            return File.copy(self, dest, skip_if_older=skip_if_older)
+
+        # Always verify the symlink target path exists.
+        if not os.path.exists(self.path):
+            raise ErrorMessage('Symlink target path does not exist: %s' % self.path)
+
+        st = None
+
+        try:
+            st = os.lstat(dest)
+        except OSError as ose:
+            if ose.errno != errno.ENOENT:
+                raise
+
+        # If the dest is a symlink pointing to us, we have nothing to do.
+        # If it's the wrong symlink, the filesystem must support symlinks,
+        # so we replace with a proper symlink.
+        if st and stat.S_ISLNK(st.st_mode):
+            link = os.readlink(dest)
+            if link == self.path:
+                return False
+
+            os.remove(dest)
+            os.symlink(self.path, dest)
+            return True
+
+        # If the destination doesn't exist, we try to create a symlink. If that
+        # fails, we fall back to copy code.
+        if not st:
+            try:
+                os.symlink(self.path, dest)
+                return True
+            except OSError:
+                return File.copy(self, dest, skip_if_older=skip_if_older)
+
+        # Now the complicated part. If the destination exists, we could be
+        # replacing a file with a symlink. Or, the filesystem may not support
+        # symlinks. We want to minimize I/O overhead for performance reasons,
+        # so we keep the existing destination file around as long as possible.
+        # A lot of the system calls would be eliminated if we cached whether
+        # symlinks are supported. However, even if we performed a single
+        # up-front test of whether the root of the destination directory
+        # supports symlinks, there's no guarantee that all operations for that
+        # dest (or source) would be on the same filesystem and would support
+        # symlinks.
+        #
+        # Our strategy is to attempt to create a new symlink with a random
+        # name. If that fails, we fall back to copy mode. If that works, we
+        # remove the old destination and move the newly-created symlink into
+        # its place.
+
+        temp_dest = os.path.join(os.path.dirname(dest), str(uuid.uuid4()))
+        try:
+            os.symlink(self.path, temp_dest)
+        # TODO Figure out exactly how symlink creation fails and only trap
+        # that.
+        except EnvironmentError:
+            return File.copy(self, dest, skip_if_older=skip_if_older)
+
+        # If removing the original file fails, don't forget to clean up the
+        # temporary symlink.
+        try:
+            os.remove(dest)
+        except EnvironmentError:
+            os.remove(temp_dest)
+            raise
+
+        os.rename(temp_dest, dest)
+        return True
+
+
+class ExistingFile(BaseFile):
+    '''
+    File class that represents a file that may exist but whose content comes
+    from elsewhere.
+
+    This purpose of this class is to account for files that are installed via
+    external means. It is typically only used in manifests or in registries to
+    account for files.
+
+    When asked to copy, this class does nothing because nothing is known about
+    the source file/data.
+
+    Instances of this class come in two flavors: required and optional. If an
+    existing file is required, it must exist during copy() or an error is
+    raised.
+    '''
+    def __init__(self, required):
+        self.required = required
+
+    def copy(self, dest, skip_if_older=True):
+        if isinstance(dest, basestring):
+            dest = Dest(dest)
+        else:
+            assert isinstance(dest, Dest)
+
+        if not self.required:
+            return
+
+        if not dest.exists():
+            errors.fatal("Required existing file doesn't exist: %s" %
+                dest.path)
+
+
+class PreprocessedFile(BaseFile):
+    '''
+    File class for a file that is preprocessed. PreprocessedFile.copy() runs
+    the preprocessor on the file to create the output.
+    '''
+    def __init__(self, path, depfile_path, marker, defines, extra_depends=None,
+                 silence_missing_directive_warnings=False):
+        self.path = path
+        self.depfile = depfile_path
+        self.marker = marker
+        self.defines = defines
+        self.extra_depends = list(extra_depends or [])
+        self.silence_missing_directive_warnings = \
+            silence_missing_directive_warnings
+
+    def copy(self, dest, skip_if_older=True):
+        '''
+        Invokes the preprocessor to create the destination file.
+        '''
+        if isinstance(dest, basestring):
+            dest = Dest(dest)
+        else:
+            assert isinstance(dest, Dest)
+
+        # We have to account for the case where the destination exists and is a
+        # symlink to something. Since we know the preprocessor is certainly not
+        # going to create a symlink, we can just remove the existing one. If the
+        # destination is not a symlink, we leave it alone, since we're going to
+        # overwrite its contents anyway.
+        # If symlinks aren't supported at all, we can skip this step.
+        if hasattr(os, 'symlink'):
+            if os.path.islink(dest.path):
+                os.remove(dest.path)
+
+        pp_deps = set(self.extra_depends)
+
+        # If a dependency file was specified, and it exists, add any
+        # dependencies from that file to our list.
+        if self.depfile and os.path.exists(self.depfile):
+            target = mozpath.normpath(dest.name)
+            with open(self.depfile, 'rb') as fileobj:
+                for rule in makeutil.read_dep_makefile(fileobj):
+                    if target in rule.targets():
+                        pp_deps.update(rule.dependencies())
+
+        skip = False
+        if dest.exists() and skip_if_older:
+            # If a dependency file was specified, and it doesn't exist,
+            # assume that the preprocessor needs to be rerun. That will
+            # regenerate the dependency file.
+            if self.depfile and not os.path.exists(self.depfile):
+                skip = False
+            else:
+                skip = not BaseFile.any_newer(dest.path, pp_deps)
+
+        if skip:
+            return False
+
+        deps_out = None
+        if self.depfile:
+            deps_out = FileAvoidWrite(self.depfile)
+        pp = Preprocessor(defines=self.defines, marker=self.marker)
+        pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings)
+
+        with open(self.path, 'rU') as input:
+            pp.processFile(input=input, output=dest, depfile=deps_out)
+
+        dest.close()
+        if self.depfile:
+            deps_out.close()
+
+        return True
+
+
+class GeneratedFile(BaseFile):
+    '''
+    File class for content with no previous existence on the filesystem.
+    '''
+    def __init__(self, content):
+        self.content = content
+
+    def open(self):
+        return BytesIO(self.content)
+
+
+class DeflatedFile(BaseFile):
+    '''
+    File class for members of a jar archive. DeflatedFile.copy() effectively
+    extracts the file from the jar archive.
+    '''
+    def __init__(self, file):
+        from mozpack.mozjar import JarFileReader
+        assert isinstance(file, JarFileReader)
+        self.file = file
+
+    def open(self):
+        self.file.seek(0)
+        return self.file
+
+class ExtractedTarFile(GeneratedFile):
+    '''
+    File class for members of a tar archive. Contents of the underlying file
+    are extracted immediately and stored in memory.
+    '''
+    def __init__(self, tar, info):
+        assert isinstance(info, TarInfo)
+        assert isinstance(tar, TarFile)
+        GeneratedFile.__init__(self, tar.extractfile(info).read())
+        self._mode = self.normalize_mode(info.mode)
+
+    @property
+    def mode(self):
+        return self._mode
+
+    def read(self):
+        return self.content
+
+class XPTFile(GeneratedFile):
+    '''
+    File class for a linked XPT file. It takes several XPT files as input
+    (using the add() and remove() member functions), and links them at copy()
+    time.
+    '''
+    def __init__(self):
+        self._files = set()
+
+    def add(self, xpt):
+        '''
+        Add the given XPT file (as a BaseFile instance) to the list of XPTs
+        to link.
+        '''
+        assert isinstance(xpt, BaseFile)
+        self._files.add(xpt)
+
+    def remove(self, xpt):
+        '''
+        Remove the given XPT file (as a BaseFile instance) from the list of
+        XPTs to link.
+        '''
+        assert isinstance(xpt, BaseFile)
+        self._files.remove(xpt)
+
+    def copy(self, dest, skip_if_older=True):
+        '''
+        Link the registered XPTs and place the resulting linked XPT at the
+        destination given as a string or a Dest instance. Avoids an expensive
+        XPT linking if the interfaces in an existing destination match those of
+        the individual XPTs to link.
+        skip_if_older is ignored.
+        '''
+        if isinstance(dest, basestring):
+            dest = Dest(dest)
+        assert isinstance(dest, Dest)
+
+        from xpt import xpt_link, Typelib, Interface
+        all_typelibs = [Typelib.read(f.open()) for f in self._files]
+        if dest.exists():
+            # Typelib.read() needs to seek(), so use a BytesIO for dest
+            # content.
+            dest_interfaces = \
+                dict((i.name, i)
+                     for i in Typelib.read(BytesIO(dest.read())).interfaces
+                     if i.iid != Interface.UNRESOLVED_IID)
+            identical = True
+            for f in self._files:
+                typelib = Typelib.read(f.open())
+                for i in typelib.interfaces:
+                    if i.iid != Interface.UNRESOLVED_IID and \
+                            not (i.name in dest_interfaces and
+                                 i == dest_interfaces[i.name]):
+                        identical = False
+                        break
+            if identical:
+                return False
+        s = BytesIO()
+        xpt_link(all_typelibs).write(s)
+        dest.write(s.getvalue())
+        return True
+
+    def open(self):
+        raise RuntimeError("Unsupported")
+
+    def isempty(self):
+        '''
+        Return whether there are XPT files to link.
+        '''
+        return len(self._files) == 0
+
+
+class ManifestFile(BaseFile):
+    '''
+    File class for a manifest file. It takes individual manifest entries (using
+    the add() and remove() member functions), and adjusts them to be relative
+    to the base path for the manifest, given at creation.
+    Example:
+        There is a manifest entry "content foobar foobar/content/" relative
+        to "foobar/chrome". When packaging, the entry will be stored in
+        jar:foobar/omni.ja!/chrome/chrome.manifest, which means the entry
+        will have to be relative to "chrome" instead of "foobar/chrome". This
+        doesn't really matter when serializing the entry, since this base path
+        is not written out, but it matters when moving the entry at the same
+        time, e.g. to jar:foobar/omni.ja!/chrome.manifest, which we don't do
+        currently but could in the future.
+    '''
+    def __init__(self, base, entries=None):
+        self._entries = entries if entries else []
+        self._base = base
+
+    def add(self, entry):
+        '''
+        Add the given entry to the manifest. Entries are rebased at open() time
+        instead of add() time so that they can be more easily remove()d.
+        '''
+        assert isinstance(entry, ManifestEntry)
+        self._entries.append(entry)
+
+    def remove(self, entry):
+        '''
+        Remove the given entry from the manifest.
+        '''
+        assert isinstance(entry, ManifestEntry)
+        self._entries.remove(entry)
+
+    def open(self):
+        '''
+        Return a file-like object allowing to read() the serialized content of
+        the manifest.
+        '''
+        return BytesIO(''.join('%s\n' % e.rebase(self._base)
+                               for e in self._entries))
+
+    def __iter__(self):
+        '''
+        Iterate over entries in the manifest file.
+        '''
+        return iter(self._entries)
+
+    def isempty(self):
+        '''
+        Return whether there are manifest entries to write
+        '''
+        return len(self._entries) == 0
+
+
+class MinifiedProperties(BaseFile):
+    '''
+    File class for minified properties. This wraps around a BaseFile instance,
+    and removes lines starting with a # from its content.
+    '''
+    def __init__(self, file):
+        assert isinstance(file, BaseFile)
+        self._file = file
+
+    def open(self):
+        '''
+        Return a file-like object allowing to read() the minified content of
+        the properties file.
+        '''
+        return BytesIO(''.join(l for l in self._file.open().readlines()
+                               if not l.startswith('#')))
+
+
+class MinifiedJavaScript(BaseFile):
+    '''
+    File class for minifying JavaScript files.
+    '''
+    def __init__(self, file, verify_command=None):
+        assert isinstance(file, BaseFile)
+        self._file = file
+        self._verify_command = verify_command
+
+    def open(self):
+        output = BytesIO()
+        minify = JavascriptMinify(self._file.open(), output, quote_chars="'\"`")
+        minify.minify()
+        output.seek(0)
+
+        if not self._verify_command:
+            return output
+
+        input_source = self._file.open().read()
+        output_source = output.getvalue()
+
+        with NamedTemporaryFile() as fh1, NamedTemporaryFile() as fh2:
+            fh1.write(input_source)
+            fh2.write(output_source)
+            fh1.flush()
+            fh2.flush()
+
+            try:
+                args = list(self._verify_command)
+                args.extend([fh1.name, fh2.name])
+                subprocess.check_output(args, stderr=subprocess.STDOUT)
+            except subprocess.CalledProcessError as e:
+                errors.warn('JS minification verification failed for %s:' %
+                    (getattr(self._file, 'path', '<unknown>')))
+                # Prefix each line with "Warning:" so mozharness doesn't
+                # think these error messages are real errors.
+                for line in e.output.splitlines():
+                    errors.warn(line)
+
+                return self._file.open()
+
+        return output
+
+
+class BaseFinder(object):
+    def __init__(self, base, minify=False, minify_js=False,
+        minify_js_verify_command=None):
+        '''
+        Initializes the instance with a reference base directory.
+
+        The optional minify argument specifies whether minification of code
+        should occur. minify_js is an additional option to control minification
+        of JavaScript. It requires minify to be True.
+
+        minify_js_verify_command can be used to optionally verify the results
+        of JavaScript minification. If defined, it is expected to be an iterable
+        that will constitute the first arguments to a called process which will
+        receive the filenames of the original and minified JavaScript files.
+        The invoked process can then verify the results. If minification is
+        rejected, the process exits with a non-0 exit code and the original
+        JavaScript source is used. An example value for this argument is
+        ('/path/to/js', '/path/to/verify/script.js').
+        '''
+        if minify_js and not minify:
+            raise ValueError('minify_js requires minify.')
+
+        self.base = base
+        self._minify = minify
+        self._minify_js = minify_js
+        self._minify_js_verify_command = minify_js_verify_command
+
+    def find(self, pattern):
+        '''
+        Yield path, BaseFile_instance pairs for all files under the base
+        directory and its subdirectories that match the given pattern. See the
+        mozpack.path.match documentation for a description of the handled
+        patterns.
+        '''
+        while pattern.startswith('/'):
+            pattern = pattern[1:]
+        for p, f in self._find(pattern):
+            yield p, self._minify_file(p, f)
+
+    def get(self, path):
+        """Obtain a single file.
+
+        Where ``find`` is tailored towards matching multiple files, this method
+        is used for retrieving a single file. Use this method when performance
+        is critical.
+
+        Returns a ``BaseFile`` if at most one file exists or ``None`` otherwise.
+        """
+        files = list(self.find(path))
+        if len(files) != 1:
+            return None
+        return files[0][1]
+
+    def __iter__(self):
+        '''
+        Iterates over all files under the base directory (excluding files
+        starting with a '.' and files at any level under a directory starting
+        with a '.').
+            for path, file in finder:
+                ...
+        '''
+        return self.find('')
+
+    def __contains__(self, pattern):
+        raise RuntimeError("'in' operator forbidden for %s. Use contains()." %
+                           self.__class__.__name__)
+
+    def contains(self, pattern):
+        '''
+        Return whether some files under the base directory match the given
+        pattern. See the mozpack.path.match documentation for a description of
+        the handled patterns.
+        '''
+        return any(self.find(pattern))
+
+    def _minify_file(self, path, file):
+        '''
+        Return an appropriate MinifiedSomething wrapper for the given BaseFile
+        instance (file), according to the file type (determined by the given
+        path), if the FileFinder was created with minification enabled.
+        Otherwise, just return the given BaseFile instance.
+        '''
+        if not self._minify or isinstance(file, ExecutableFile):
+            return file
+
+        if path.endswith('.properties'):
+            return MinifiedProperties(file)
+
+        if self._minify_js and path.endswith(('.js', '.jsm')):
+            return MinifiedJavaScript(file, self._minify_js_verify_command)
+
+        return file
+
+    def _find_helper(self, pattern, files, file_getter):
+        """Generic implementation of _find.
+
+        A few *Finder implementations share logic for returning results.
+        This function implements the custom logic.
+
+        The ``file_getter`` argument is a callable that receives a path
+        that is known to exist. The callable should return a ``BaseFile``
+        instance.
+        """
+        if '*' in pattern:
+            for p in files:
+                if mozpath.match(p, pattern):
+                    yield p, file_getter(p)
+        elif pattern == '':
+            for p in files:
+                yield p, file_getter(p)
+        elif pattern in files:
+            yield pattern, file_getter(pattern)
+        else:
+            for p in files:
+                if mozpath.basedir(p, [pattern]) == pattern:
+                    yield p, file_getter(p)
+
+
+class FileFinder(BaseFinder):
+    '''
+    Helper to get appropriate BaseFile instances from the file system.
+    '''
+    def __init__(self, base, find_executables=True, ignore=(),
+                 find_dotfiles=False, **kargs):
+        '''
+        Create a FileFinder for files under the given base directory.
+
+        The find_executables argument determines whether the finder needs to
+        try to guess whether files are executables. Disabling this guessing
+        when not necessary can speed up the finder significantly.
+
+        ``ignore`` accepts an iterable of patterns to ignore. Entries are
+        strings that match paths relative to ``base`` using
+        ``mozpath.match()``. This means if an entry corresponds
+        to a directory, all files under that directory will be ignored. If
+        an entry corresponds to a file, that particular file will be ignored.
+        '''
+        BaseFinder.__init__(self, base, **kargs)
+        self.find_dotfiles = find_dotfiles
+        self.find_executables = find_executables
+        self.ignore = ignore
+
+    def _find(self, pattern):
+        '''
+        Actual implementation of FileFinder.find(), dispatching to specialized
+        member functions depending on what kind of pattern was given.
+        Note all files with a name starting with a '.' are ignored when
+        scanning directories, but are not ignored when explicitely requested.
+        '''
+        if '*' in pattern:
+            return self._find_glob('', mozpath.split(pattern))
+        elif os.path.isdir(os.path.join(self.base, pattern)):
+            return self._find_dir(pattern)
+        else:
+            f = self.get(pattern)
+            return ((pattern, f),) if f else ()
+
+    def _find_dir(self, path):
+        '''
+        Actual implementation of FileFinder.find() when the given pattern
+        corresponds to an existing directory under the base directory.
+        Ignores file names starting with a '.' under the given path. If the
+        path itself has leafs starting with a '.', they are not ignored.
+        '''
+        for p in self.ignore:
+            if mozpath.match(path, p):
+                return
+
+        # The sorted makes the output idempotent. Otherwise, we are
+        # likely dependent on filesystem implementation details, such as
+        # inode ordering.
+        for p in sorted(os.listdir(os.path.join(self.base, path))):
+            if p.startswith('.'):
+                if p in ('.', '..'):
+                    continue
+                if not self.find_dotfiles:
+                    continue
+            for p_, f in self._find(mozpath.join(path, p)):
+                yield p_, f
+
+    def get(self, path):
+        srcpath = os.path.join(self.base, path)
+        if not os.path.exists(srcpath):
+            return None
+
+        for p in self.ignore:
+            if mozpath.match(path, p):
+                return None
+
+        if self.find_executables and is_executable(srcpath):
+            return ExecutableFile(srcpath)
+        else:
+            return File(srcpath)
+
+    def _find_glob(self, base, pattern):
+        '''
+        Actual implementation of FileFinder.find() when the given pattern
+        contains globbing patterns ('*' or '**'). This is meant to be an
+        equivalent of:
+            for p, f in self:
+                if mozpath.match(p, pattern):
+                    yield p, f
+        but avoids scanning the entire tree.
+        '''
+        if not pattern:
+            for p, f in self._find(base):
+                yield p, f
+        elif pattern[0] == '**':
+            for p, f in self._find(base):
+                if mozpath.match(p, mozpath.join(*pattern)):
+                    yield p, f
+        elif '*' in pattern[0]:
+            if not os.path.exists(os.path.join(self.base, base)):
+                return
+
+            for p in self.ignore:
+                if mozpath.match(base, p):
+                    return
+
+            # See above comment w.r.t. sorted() and idempotent behavior.
+            for p in sorted(os.listdir(os.path.join(self.base, base))):
+                if p.startswith('.') and not pattern[0].startswith('.'):
+                    continue
+                if mozpath.match(p, pattern[0]):
+                    for p_, f in self._find_glob(mozpath.join(base, p),
+                                                 pattern[1:]):
+                        yield p_, f
+        else:
+            for p, f in self._find_glob(mozpath.join(base, pattern[0]),
+                                        pattern[1:]):
+                yield p, f
+
+
+class JarFinder(BaseFinder):
+    '''
+    Helper to get appropriate DeflatedFile instances from a JarReader.
+    '''
+    def __init__(self, base, reader, **kargs):
+        '''
+        Create a JarFinder for files in the given JarReader. The base argument
+        is used as an indication of the Jar file location.
+        '''
+        assert isinstance(reader, JarReader)
+        BaseFinder.__init__(self, base, **kargs)
+        self._files = OrderedDict((f.filename, f) for f in reader)
+
+    def _find(self, pattern):
+        '''
+        Actual implementation of JarFinder.find(), dispatching to specialized
+        member functions depending on what kind of pattern was given.
+        '''
+        return self._find_helper(pattern, self._files,
+                                 lambda x: DeflatedFile(self._files[x]))
+
+
+class TarFinder(BaseFinder):
+    '''
+    Helper to get files from a TarFile.
+    '''
+    def __init__(self, base, tar, **kargs):
+        '''
+        Create a TarFinder for files in the given TarFile. The base argument
+        is used as an indication of the Tar file location.
+        '''
+        assert isinstance(tar, TarFile)
+        self._tar = tar
+        BaseFinder.__init__(self, base, **kargs)
+        self._files = OrderedDict((f.name, f) for f in tar if f.isfile())
+
+    def _find(self, pattern):
+        '''
+        Actual implementation of TarFinder.find(), dispatching to specialized
+        member functions depending on what kind of pattern was given.
+        '''
+        return self._find_helper(pattern, self._files,
+                                 lambda x: ExtractedTarFile(self._tar,
+                                                            self._files[x]))
+
+
+class ComposedFinder(BaseFinder):
+    '''
+    Composes multiple File Finders in some sort of virtual file system.
+
+    A ComposedFinder is initialized from a dictionary associating paths to
+    *Finder instances.
+
+    Note this could be optimized to be smarter than getting all the files
+    in advance.
+    '''
+    def __init__(self, finders):
+        # Can't import globally, because of the dependency of mozpack.copier
+        # on this module.
+        from mozpack.copier import FileRegistry
+        self.files = FileRegistry()
+
+        for base, finder in sorted(finders.iteritems()):
+            if self.files.contains(base):
+                self.files.remove(base)
+            for p, f in finder.find(''):
+                self.files.add(mozpath.join(base, p), f)
+
+    def find(self, pattern):
+        for p in self.files.match(pattern):
+            yield p, self.files[p]
+
+
+class MercurialFile(BaseFile):
+    """File class for holding data from Mercurial."""
+    def __init__(self, client, rev, path):
+        self._content = client.cat([path], rev=rev)
+
+    def read(self):
+        return self._content
+
+
+class MercurialRevisionFinder(BaseFinder):
+    """A finder that operates on a specific Mercurial revision."""
+
+    def __init__(self, repo, rev='.', recognize_repo_paths=False, **kwargs):
+        """Create a finder attached to a specific revision in a repository.
+
+        If no revision is given, open the parent of the working directory.
+
+        ``recognize_repo_paths`` will enable a mode where ``.get()`` will
+        recognize full paths that include the repo's path. Typically Finder
+        instances are "bound" to a base directory and paths are relative to
+        that directory. This mode changes that. When this mode is activated,
+        ``.find()`` will not work! This mode exists to support the moz.build
+        reader, which uses absolute paths instead of relative paths. The reader
+        should eventually be rewritten to use relative paths and this hack
+        should be removed (TODO bug 1171069).
+        """
+        if not hglib:
+            raise Exception('hglib package not found')
+
+        super(MercurialRevisionFinder, self).__init__(base=repo, **kwargs)
+
+        self._root = mozpath.normpath(repo).rstrip('/')
+        self._recognize_repo_paths = recognize_repo_paths
+
+        # We change directories here otherwise we have to deal with relative
+        # paths.
+        oldcwd = os.getcwd()
+        os.chdir(self._root)
+        try:
+            self._client = hglib.open(path=repo, encoding=b'utf-8')
+        finally:
+            os.chdir(oldcwd)
+        self._rev = rev if rev is not None else b'.'
+        self._files = OrderedDict()
+
+        # Immediately populate the list of files in the repo since nearly every
+        # operation requires this list.
+        out = self._client.rawcommand([b'files', b'--rev', str(self._rev)])
+        for relpath in out.splitlines():
+            self._files[relpath] = None
+
+    def _find(self, pattern):
+        if self._recognize_repo_paths:
+            raise NotImplementedError('cannot use find with recognize_repo_path')
+
+        return self._find_helper(pattern, self._files, self._get)
+
+    def get(self, path):
+        if self._recognize_repo_paths:
+            if not path.startswith(self._root):
+                raise ValueError('lookups in recognize_repo_paths mode must be '
+                                 'prefixed with repo path: %s' % path)
+            path = path[len(self._root) + 1:]
+
+        try:
+            return self._get(path)
+        except KeyError:
+            return None
+
+    def _get(self, path):
+        # We lazy populate self._files because potentially creating tens of
+        # thousands of MercurialFile instances for every file in the repo is
+        # inefficient.
+        f = self._files[path]
+        if not f:
+            f = MercurialFile(self._client, self._rev, path)
+            self._files[path] = f
+
+        return f