diff options
Diffstat (limited to 'python/mozbuild/mozpack/files.py')
-rw-r--r-- | python/mozbuild/mozpack/files.py | 1106 |
1 files changed, 1106 insertions, 0 deletions
diff --git a/python/mozbuild/mozpack/files.py b/python/mozbuild/mozpack/files.py new file mode 100644 index 000000000..64902e195 --- /dev/null +++ b/python/mozbuild/mozpack/files.py @@ -0,0 +1,1106 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import + +import errno +import os +import platform +import shutil +import stat +import subprocess +import uuid +import mozbuild.makeutil as makeutil +from mozbuild.preprocessor import Preprocessor +from mozbuild.util import FileAvoidWrite +from mozpack.executables import ( + is_executable, + may_strip, + strip, + may_elfhack, + elfhack, +) +from mozpack.chrome.manifest import ManifestEntry +from io import BytesIO +from mozpack.errors import ( + ErrorMessage, + errors, +) +from mozpack.mozjar import JarReader +import mozpack.path as mozpath +from collections import OrderedDict +from jsmin import JavascriptMinify +from tempfile import ( + mkstemp, + NamedTemporaryFile, +) +from tarfile import ( + TarFile, + TarInfo, +) +try: + import hglib +except ImportError: + hglib = None + + +# For clean builds, copying files on win32 using CopyFile through ctypes is +# ~2x as fast as using shutil.copyfile. +if platform.system() != 'Windows': + _copyfile = shutil.copyfile +else: + import ctypes + _kernel32 = ctypes.windll.kernel32 + _CopyFileA = _kernel32.CopyFileA + _CopyFileW = _kernel32.CopyFileW + + def _copyfile(src, dest): + # False indicates `dest` should be overwritten if it exists already. + if isinstance(src, unicode) and isinstance(dest, unicode): + _CopyFileW(src, dest, False) + elif isinstance(src, str) and isinstance(dest, str): + _CopyFileA(src, dest, False) + else: + raise TypeError('mismatched path types!') + +class Dest(object): + ''' + Helper interface for BaseFile.copy. The interface works as follows: + - read() and write() can be used to sequentially read/write from the + underlying file. + - a call to read() after a write() will re-open the underlying file and + read from it. + - a call to write() after a read() will re-open the underlying file, + emptying it, and write to it. + ''' + def __init__(self, path): + self.path = path + self.mode = None + + @property + def name(self): + return self.path + + def read(self, length=-1): + if self.mode != 'r': + self.file = open(self.path, 'rb') + self.mode = 'r' + return self.file.read(length) + + def write(self, data): + if self.mode != 'w': + self.file = open(self.path, 'wb') + self.mode = 'w' + return self.file.write(data) + + def exists(self): + return os.path.exists(self.path) + + def close(self): + if self.mode: + self.mode = None + self.file.close() + + +class BaseFile(object): + ''' + Base interface and helper for file copying. Derived class may implement + their own copy function, or rely on BaseFile.copy using the open() member + function and/or the path property. + ''' + @staticmethod + def is_older(first, second): + ''' + Compares the modification time of two files, and returns whether the + ``first`` file is older than the ``second`` file. + ''' + # os.path.getmtime returns a result in seconds with precision up to + # the microsecond. But microsecond is too precise because + # shutil.copystat only copies milliseconds, and seconds is not + # enough precision. + return int(os.path.getmtime(first) * 1000) \ + <= int(os.path.getmtime(second) * 1000) + + @staticmethod + def any_newer(dest, inputs): + ''' + Compares the modification time of ``dest`` to multiple input files, and + returns whether any of the ``inputs`` is newer (has a later mtime) than + ``dest``. + ''' + # os.path.getmtime returns a result in seconds with precision up to + # the microsecond. But microsecond is too precise because + # shutil.copystat only copies milliseconds, and seconds is not + # enough precision. + dest_mtime = int(os.path.getmtime(dest) * 1000) + for input in inputs: + if dest_mtime < int(os.path.getmtime(input) * 1000): + return True + return False + + @staticmethod + def normalize_mode(mode): + # Normalize file mode: + # - keep file type (e.g. S_IFREG) + ret = stat.S_IFMT(mode) + # - expand user read and execute permissions to everyone + if mode & 0400: + ret |= 0444 + if mode & 0100: + ret |= 0111 + # - keep user write permissions + if mode & 0200: + ret |= 0200 + # - leave away sticky bit, setuid, setgid + return ret + + def copy(self, dest, skip_if_older=True): + ''' + Copy the BaseFile content to the destination given as a string or a + Dest instance. Avoids replacing existing files if the BaseFile content + matches that of the destination, or in case of plain files, if the + destination is newer than the original file. This latter behaviour is + disabled when skip_if_older is False. + Returns whether a copy was actually performed (True) or not (False). + ''' + if isinstance(dest, basestring): + dest = Dest(dest) + else: + assert isinstance(dest, Dest) + + can_skip_content_check = False + if not dest.exists(): + can_skip_content_check = True + elif getattr(self, 'path', None) and getattr(dest, 'path', None): + if skip_if_older and BaseFile.is_older(self.path, dest.path): + return False + elif os.path.getsize(self.path) != os.path.getsize(dest.path): + can_skip_content_check = True + + if can_skip_content_check: + if getattr(self, 'path', None) and getattr(dest, 'path', None): + _copyfile(self.path, dest.path) + shutil.copystat(self.path, dest.path) + else: + # Ensure the file is always created + if not dest.exists(): + dest.write('') + shutil.copyfileobj(self.open(), dest) + return True + + src = self.open() + copy_content = '' + while True: + dest_content = dest.read(32768) + src_content = src.read(32768) + copy_content += src_content + if len(dest_content) == len(src_content) == 0: + break + # If the read content differs between origin and destination, + # write what was read up to now, and copy the remainder. + if dest_content != src_content: + dest.write(copy_content) + shutil.copyfileobj(src, dest) + break + if hasattr(self, 'path') and hasattr(dest, 'path'): + shutil.copystat(self.path, dest.path) + return True + + def open(self): + ''' + Return a file-like object allowing to read() the content of the + associated file. This is meant to be overloaded in subclasses to return + a custom file-like object. + ''' + assert self.path is not None + return open(self.path, 'rb') + + def read(self): + raise NotImplementedError('BaseFile.read() not implemented. Bug 1170329.') + + @property + def mode(self): + ''' + Return the file's unix mode, or None if it has no meaning. + ''' + return None + + +class File(BaseFile): + ''' + File class for plain files. + ''' + def __init__(self, path): + self.path = path + + @property + def mode(self): + ''' + Return the file's unix mode, as returned by os.stat().st_mode. + ''' + if platform.system() == 'Windows': + return None + assert self.path is not None + mode = os.stat(self.path).st_mode + return self.normalize_mode(mode) + + def read(self): + '''Return the contents of the file.''' + with open(self.path, 'rb') as fh: + return fh.read() + + +class ExecutableFile(File): + ''' + File class for executable and library files on OS/2, OS/X and ELF systems. + (see mozpack.executables.is_executable documentation). + ''' + def copy(self, dest, skip_if_older=True): + real_dest = dest + if not isinstance(dest, basestring): + fd, dest = mkstemp() + os.close(fd) + os.remove(dest) + assert isinstance(dest, basestring) + # If File.copy didn't actually copy because dest is newer, check the + # file sizes. If dest is smaller, it means it is already stripped and + # elfhacked, so we can skip. + if not File.copy(self, dest, skip_if_older) and \ + os.path.getsize(self.path) > os.path.getsize(dest): + return False + try: + if may_strip(dest): + strip(dest) + if may_elfhack(dest): + elfhack(dest) + except ErrorMessage: + os.remove(dest) + raise + + if real_dest != dest: + f = File(dest) + ret = f.copy(real_dest, skip_if_older) + os.remove(dest) + return ret + return True + + +class AbsoluteSymlinkFile(File): + '''File class that is copied by symlinking (if available). + + This class only works if the target path is absolute. + ''' + + def __init__(self, path): + if not os.path.isabs(path): + raise ValueError('Symlink target not absolute: %s' % path) + + File.__init__(self, path) + + def copy(self, dest, skip_if_older=True): + assert isinstance(dest, basestring) + + # The logic in this function is complicated by the fact that symlinks + # aren't universally supported. So, where symlinks aren't supported, we + # fall back to file copying. Keep in mind that symlink support is + # per-filesystem, not per-OS. + + # Handle the simple case where symlinks are definitely not supported by + # falling back to file copy. + if not hasattr(os, 'symlink'): + return File.copy(self, dest, skip_if_older=skip_if_older) + + # Always verify the symlink target path exists. + if not os.path.exists(self.path): + raise ErrorMessage('Symlink target path does not exist: %s' % self.path) + + st = None + + try: + st = os.lstat(dest) + except OSError as ose: + if ose.errno != errno.ENOENT: + raise + + # If the dest is a symlink pointing to us, we have nothing to do. + # If it's the wrong symlink, the filesystem must support symlinks, + # so we replace with a proper symlink. + if st and stat.S_ISLNK(st.st_mode): + link = os.readlink(dest) + if link == self.path: + return False + + os.remove(dest) + os.symlink(self.path, dest) + return True + + # If the destination doesn't exist, we try to create a symlink. If that + # fails, we fall back to copy code. + if not st: + try: + os.symlink(self.path, dest) + return True + except OSError: + return File.copy(self, dest, skip_if_older=skip_if_older) + + # Now the complicated part. If the destination exists, we could be + # replacing a file with a symlink. Or, the filesystem may not support + # symlinks. We want to minimize I/O overhead for performance reasons, + # so we keep the existing destination file around as long as possible. + # A lot of the system calls would be eliminated if we cached whether + # symlinks are supported. However, even if we performed a single + # up-front test of whether the root of the destination directory + # supports symlinks, there's no guarantee that all operations for that + # dest (or source) would be on the same filesystem and would support + # symlinks. + # + # Our strategy is to attempt to create a new symlink with a random + # name. If that fails, we fall back to copy mode. If that works, we + # remove the old destination and move the newly-created symlink into + # its place. + + temp_dest = os.path.join(os.path.dirname(dest), str(uuid.uuid4())) + try: + os.symlink(self.path, temp_dest) + # TODO Figure out exactly how symlink creation fails and only trap + # that. + except EnvironmentError: + return File.copy(self, dest, skip_if_older=skip_if_older) + + # If removing the original file fails, don't forget to clean up the + # temporary symlink. + try: + os.remove(dest) + except EnvironmentError: + os.remove(temp_dest) + raise + + os.rename(temp_dest, dest) + return True + + +class ExistingFile(BaseFile): + ''' + File class that represents a file that may exist but whose content comes + from elsewhere. + + This purpose of this class is to account for files that are installed via + external means. It is typically only used in manifests or in registries to + account for files. + + When asked to copy, this class does nothing because nothing is known about + the source file/data. + + Instances of this class come in two flavors: required and optional. If an + existing file is required, it must exist during copy() or an error is + raised. + ''' + def __init__(self, required): + self.required = required + + def copy(self, dest, skip_if_older=True): + if isinstance(dest, basestring): + dest = Dest(dest) + else: + assert isinstance(dest, Dest) + + if not self.required: + return + + if not dest.exists(): + errors.fatal("Required existing file doesn't exist: %s" % + dest.path) + + +class PreprocessedFile(BaseFile): + ''' + File class for a file that is preprocessed. PreprocessedFile.copy() runs + the preprocessor on the file to create the output. + ''' + def __init__(self, path, depfile_path, marker, defines, extra_depends=None, + silence_missing_directive_warnings=False): + self.path = path + self.depfile = depfile_path + self.marker = marker + self.defines = defines + self.extra_depends = list(extra_depends or []) + self.silence_missing_directive_warnings = \ + silence_missing_directive_warnings + + def copy(self, dest, skip_if_older=True): + ''' + Invokes the preprocessor to create the destination file. + ''' + if isinstance(dest, basestring): + dest = Dest(dest) + else: + assert isinstance(dest, Dest) + + # We have to account for the case where the destination exists and is a + # symlink to something. Since we know the preprocessor is certainly not + # going to create a symlink, we can just remove the existing one. If the + # destination is not a symlink, we leave it alone, since we're going to + # overwrite its contents anyway. + # If symlinks aren't supported at all, we can skip this step. + if hasattr(os, 'symlink'): + if os.path.islink(dest.path): + os.remove(dest.path) + + pp_deps = set(self.extra_depends) + + # If a dependency file was specified, and it exists, add any + # dependencies from that file to our list. + if self.depfile and os.path.exists(self.depfile): + target = mozpath.normpath(dest.name) + with open(self.depfile, 'rb') as fileobj: + for rule in makeutil.read_dep_makefile(fileobj): + if target in rule.targets(): + pp_deps.update(rule.dependencies()) + + skip = False + if dest.exists() and skip_if_older: + # If a dependency file was specified, and it doesn't exist, + # assume that the preprocessor needs to be rerun. That will + # regenerate the dependency file. + if self.depfile and not os.path.exists(self.depfile): + skip = False + else: + skip = not BaseFile.any_newer(dest.path, pp_deps) + + if skip: + return False + + deps_out = None + if self.depfile: + deps_out = FileAvoidWrite(self.depfile) + pp = Preprocessor(defines=self.defines, marker=self.marker) + pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings) + + with open(self.path, 'rU') as input: + pp.processFile(input=input, output=dest, depfile=deps_out) + + dest.close() + if self.depfile: + deps_out.close() + + return True + + +class GeneratedFile(BaseFile): + ''' + File class for content with no previous existence on the filesystem. + ''' + def __init__(self, content): + self.content = content + + def open(self): + return BytesIO(self.content) + + +class DeflatedFile(BaseFile): + ''' + File class for members of a jar archive. DeflatedFile.copy() effectively + extracts the file from the jar archive. + ''' + def __init__(self, file): + from mozpack.mozjar import JarFileReader + assert isinstance(file, JarFileReader) + self.file = file + + def open(self): + self.file.seek(0) + return self.file + +class ExtractedTarFile(GeneratedFile): + ''' + File class for members of a tar archive. Contents of the underlying file + are extracted immediately and stored in memory. + ''' + def __init__(self, tar, info): + assert isinstance(info, TarInfo) + assert isinstance(tar, TarFile) + GeneratedFile.__init__(self, tar.extractfile(info).read()) + self._mode = self.normalize_mode(info.mode) + + @property + def mode(self): + return self._mode + + def read(self): + return self.content + +class XPTFile(GeneratedFile): + ''' + File class for a linked XPT file. It takes several XPT files as input + (using the add() and remove() member functions), and links them at copy() + time. + ''' + def __init__(self): + self._files = set() + + def add(self, xpt): + ''' + Add the given XPT file (as a BaseFile instance) to the list of XPTs + to link. + ''' + assert isinstance(xpt, BaseFile) + self._files.add(xpt) + + def remove(self, xpt): + ''' + Remove the given XPT file (as a BaseFile instance) from the list of + XPTs to link. + ''' + assert isinstance(xpt, BaseFile) + self._files.remove(xpt) + + def copy(self, dest, skip_if_older=True): + ''' + Link the registered XPTs and place the resulting linked XPT at the + destination given as a string or a Dest instance. Avoids an expensive + XPT linking if the interfaces in an existing destination match those of + the individual XPTs to link. + skip_if_older is ignored. + ''' + if isinstance(dest, basestring): + dest = Dest(dest) + assert isinstance(dest, Dest) + + from xpt import xpt_link, Typelib, Interface + all_typelibs = [Typelib.read(f.open()) for f in self._files] + if dest.exists(): + # Typelib.read() needs to seek(), so use a BytesIO for dest + # content. + dest_interfaces = \ + dict((i.name, i) + for i in Typelib.read(BytesIO(dest.read())).interfaces + if i.iid != Interface.UNRESOLVED_IID) + identical = True + for f in self._files: + typelib = Typelib.read(f.open()) + for i in typelib.interfaces: + if i.iid != Interface.UNRESOLVED_IID and \ + not (i.name in dest_interfaces and + i == dest_interfaces[i.name]): + identical = False + break + if identical: + return False + s = BytesIO() + xpt_link(all_typelibs).write(s) + dest.write(s.getvalue()) + return True + + def open(self): + raise RuntimeError("Unsupported") + + def isempty(self): + ''' + Return whether there are XPT files to link. + ''' + return len(self._files) == 0 + + +class ManifestFile(BaseFile): + ''' + File class for a manifest file. It takes individual manifest entries (using + the add() and remove() member functions), and adjusts them to be relative + to the base path for the manifest, given at creation. + Example: + There is a manifest entry "content foobar foobar/content/" relative + to "foobar/chrome". When packaging, the entry will be stored in + jar:foobar/omni.ja!/chrome/chrome.manifest, which means the entry + will have to be relative to "chrome" instead of "foobar/chrome". This + doesn't really matter when serializing the entry, since this base path + is not written out, but it matters when moving the entry at the same + time, e.g. to jar:foobar/omni.ja!/chrome.manifest, which we don't do + currently but could in the future. + ''' + def __init__(self, base, entries=None): + self._entries = entries if entries else [] + self._base = base + + def add(self, entry): + ''' + Add the given entry to the manifest. Entries are rebased at open() time + instead of add() time so that they can be more easily remove()d. + ''' + assert isinstance(entry, ManifestEntry) + self._entries.append(entry) + + def remove(self, entry): + ''' + Remove the given entry from the manifest. + ''' + assert isinstance(entry, ManifestEntry) + self._entries.remove(entry) + + def open(self): + ''' + Return a file-like object allowing to read() the serialized content of + the manifest. + ''' + return BytesIO(''.join('%s\n' % e.rebase(self._base) + for e in self._entries)) + + def __iter__(self): + ''' + Iterate over entries in the manifest file. + ''' + return iter(self._entries) + + def isempty(self): + ''' + Return whether there are manifest entries to write + ''' + return len(self._entries) == 0 + + +class MinifiedProperties(BaseFile): + ''' + File class for minified properties. This wraps around a BaseFile instance, + and removes lines starting with a # from its content. + ''' + def __init__(self, file): + assert isinstance(file, BaseFile) + self._file = file + + def open(self): + ''' + Return a file-like object allowing to read() the minified content of + the properties file. + ''' + return BytesIO(''.join(l for l in self._file.open().readlines() + if not l.startswith('#'))) + + +class MinifiedJavaScript(BaseFile): + ''' + File class for minifying JavaScript files. + ''' + def __init__(self, file, verify_command=None): + assert isinstance(file, BaseFile) + self._file = file + self._verify_command = verify_command + + def open(self): + output = BytesIO() + minify = JavascriptMinify(self._file.open(), output, quote_chars="'\"`") + minify.minify() + output.seek(0) + + if not self._verify_command: + return output + + input_source = self._file.open().read() + output_source = output.getvalue() + + with NamedTemporaryFile() as fh1, NamedTemporaryFile() as fh2: + fh1.write(input_source) + fh2.write(output_source) + fh1.flush() + fh2.flush() + + try: + args = list(self._verify_command) + args.extend([fh1.name, fh2.name]) + subprocess.check_output(args, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + errors.warn('JS minification verification failed for %s:' % + (getattr(self._file, 'path', '<unknown>'))) + # Prefix each line with "Warning:" so mozharness doesn't + # think these error messages are real errors. + for line in e.output.splitlines(): + errors.warn(line) + + return self._file.open() + + return output + + +class BaseFinder(object): + def __init__(self, base, minify=False, minify_js=False, + minify_js_verify_command=None): + ''' + Initializes the instance with a reference base directory. + + The optional minify argument specifies whether minification of code + should occur. minify_js is an additional option to control minification + of JavaScript. It requires minify to be True. + + minify_js_verify_command can be used to optionally verify the results + of JavaScript minification. If defined, it is expected to be an iterable + that will constitute the first arguments to a called process which will + receive the filenames of the original and minified JavaScript files. + The invoked process can then verify the results. If minification is + rejected, the process exits with a non-0 exit code and the original + JavaScript source is used. An example value for this argument is + ('/path/to/js', '/path/to/verify/script.js'). + ''' + if minify_js and not minify: + raise ValueError('minify_js requires minify.') + + self.base = base + self._minify = minify + self._minify_js = minify_js + self._minify_js_verify_command = minify_js_verify_command + + def find(self, pattern): + ''' + Yield path, BaseFile_instance pairs for all files under the base + directory and its subdirectories that match the given pattern. See the + mozpack.path.match documentation for a description of the handled + patterns. + ''' + while pattern.startswith('/'): + pattern = pattern[1:] + for p, f in self._find(pattern): + yield p, self._minify_file(p, f) + + def get(self, path): + """Obtain a single file. + + Where ``find`` is tailored towards matching multiple files, this method + is used for retrieving a single file. Use this method when performance + is critical. + + Returns a ``BaseFile`` if at most one file exists or ``None`` otherwise. + """ + files = list(self.find(path)) + if len(files) != 1: + return None + return files[0][1] + + def __iter__(self): + ''' + Iterates over all files under the base directory (excluding files + starting with a '.' and files at any level under a directory starting + with a '.'). + for path, file in finder: + ... + ''' + return self.find('') + + def __contains__(self, pattern): + raise RuntimeError("'in' operator forbidden for %s. Use contains()." % + self.__class__.__name__) + + def contains(self, pattern): + ''' + Return whether some files under the base directory match the given + pattern. See the mozpack.path.match documentation for a description of + the handled patterns. + ''' + return any(self.find(pattern)) + + def _minify_file(self, path, file): + ''' + Return an appropriate MinifiedSomething wrapper for the given BaseFile + instance (file), according to the file type (determined by the given + path), if the FileFinder was created with minification enabled. + Otherwise, just return the given BaseFile instance. + ''' + if not self._minify or isinstance(file, ExecutableFile): + return file + + if path.endswith('.properties'): + return MinifiedProperties(file) + + if self._minify_js and path.endswith(('.js', '.jsm')): + return MinifiedJavaScript(file, self._minify_js_verify_command) + + return file + + def _find_helper(self, pattern, files, file_getter): + """Generic implementation of _find. + + A few *Finder implementations share logic for returning results. + This function implements the custom logic. + + The ``file_getter`` argument is a callable that receives a path + that is known to exist. The callable should return a ``BaseFile`` + instance. + """ + if '*' in pattern: + for p in files: + if mozpath.match(p, pattern): + yield p, file_getter(p) + elif pattern == '': + for p in files: + yield p, file_getter(p) + elif pattern in files: + yield pattern, file_getter(pattern) + else: + for p in files: + if mozpath.basedir(p, [pattern]) == pattern: + yield p, file_getter(p) + + +class FileFinder(BaseFinder): + ''' + Helper to get appropriate BaseFile instances from the file system. + ''' + def __init__(self, base, find_executables=True, ignore=(), + find_dotfiles=False, **kargs): + ''' + Create a FileFinder for files under the given base directory. + + The find_executables argument determines whether the finder needs to + try to guess whether files are executables. Disabling this guessing + when not necessary can speed up the finder significantly. + + ``ignore`` accepts an iterable of patterns to ignore. Entries are + strings that match paths relative to ``base`` using + ``mozpath.match()``. This means if an entry corresponds + to a directory, all files under that directory will be ignored. If + an entry corresponds to a file, that particular file will be ignored. + ''' + BaseFinder.__init__(self, base, **kargs) + self.find_dotfiles = find_dotfiles + self.find_executables = find_executables + self.ignore = ignore + + def _find(self, pattern): + ''' + Actual implementation of FileFinder.find(), dispatching to specialized + member functions depending on what kind of pattern was given. + Note all files with a name starting with a '.' are ignored when + scanning directories, but are not ignored when explicitely requested. + ''' + if '*' in pattern: + return self._find_glob('', mozpath.split(pattern)) + elif os.path.isdir(os.path.join(self.base, pattern)): + return self._find_dir(pattern) + else: + f = self.get(pattern) + return ((pattern, f),) if f else () + + def _find_dir(self, path): + ''' + Actual implementation of FileFinder.find() when the given pattern + corresponds to an existing directory under the base directory. + Ignores file names starting with a '.' under the given path. If the + path itself has leafs starting with a '.', they are not ignored. + ''' + for p in self.ignore: + if mozpath.match(path, p): + return + + # The sorted makes the output idempotent. Otherwise, we are + # likely dependent on filesystem implementation details, such as + # inode ordering. + for p in sorted(os.listdir(os.path.join(self.base, path))): + if p.startswith('.'): + if p in ('.', '..'): + continue + if not self.find_dotfiles: + continue + for p_, f in self._find(mozpath.join(path, p)): + yield p_, f + + def get(self, path): + srcpath = os.path.join(self.base, path) + if not os.path.exists(srcpath): + return None + + for p in self.ignore: + if mozpath.match(path, p): + return None + + if self.find_executables and is_executable(srcpath): + return ExecutableFile(srcpath) + else: + return File(srcpath) + + def _find_glob(self, base, pattern): + ''' + Actual implementation of FileFinder.find() when the given pattern + contains globbing patterns ('*' or '**'). This is meant to be an + equivalent of: + for p, f in self: + if mozpath.match(p, pattern): + yield p, f + but avoids scanning the entire tree. + ''' + if not pattern: + for p, f in self._find(base): + yield p, f + elif pattern[0] == '**': + for p, f in self._find(base): + if mozpath.match(p, mozpath.join(*pattern)): + yield p, f + elif '*' in pattern[0]: + if not os.path.exists(os.path.join(self.base, base)): + return + + for p in self.ignore: + if mozpath.match(base, p): + return + + # See above comment w.r.t. sorted() and idempotent behavior. + for p in sorted(os.listdir(os.path.join(self.base, base))): + if p.startswith('.') and not pattern[0].startswith('.'): + continue + if mozpath.match(p, pattern[0]): + for p_, f in self._find_glob(mozpath.join(base, p), + pattern[1:]): + yield p_, f + else: + for p, f in self._find_glob(mozpath.join(base, pattern[0]), + pattern[1:]): + yield p, f + + +class JarFinder(BaseFinder): + ''' + Helper to get appropriate DeflatedFile instances from a JarReader. + ''' + def __init__(self, base, reader, **kargs): + ''' + Create a JarFinder for files in the given JarReader. The base argument + is used as an indication of the Jar file location. + ''' + assert isinstance(reader, JarReader) + BaseFinder.__init__(self, base, **kargs) + self._files = OrderedDict((f.filename, f) for f in reader) + + def _find(self, pattern): + ''' + Actual implementation of JarFinder.find(), dispatching to specialized + member functions depending on what kind of pattern was given. + ''' + return self._find_helper(pattern, self._files, + lambda x: DeflatedFile(self._files[x])) + + +class TarFinder(BaseFinder): + ''' + Helper to get files from a TarFile. + ''' + def __init__(self, base, tar, **kargs): + ''' + Create a TarFinder for files in the given TarFile. The base argument + is used as an indication of the Tar file location. + ''' + assert isinstance(tar, TarFile) + self._tar = tar + BaseFinder.__init__(self, base, **kargs) + self._files = OrderedDict((f.name, f) for f in tar if f.isfile()) + + def _find(self, pattern): + ''' + Actual implementation of TarFinder.find(), dispatching to specialized + member functions depending on what kind of pattern was given. + ''' + return self._find_helper(pattern, self._files, + lambda x: ExtractedTarFile(self._tar, + self._files[x])) + + +class ComposedFinder(BaseFinder): + ''' + Composes multiple File Finders in some sort of virtual file system. + + A ComposedFinder is initialized from a dictionary associating paths to + *Finder instances. + + Note this could be optimized to be smarter than getting all the files + in advance. + ''' + def __init__(self, finders): + # Can't import globally, because of the dependency of mozpack.copier + # on this module. + from mozpack.copier import FileRegistry + self.files = FileRegistry() + + for base, finder in sorted(finders.iteritems()): + if self.files.contains(base): + self.files.remove(base) + for p, f in finder.find(''): + self.files.add(mozpath.join(base, p), f) + + def find(self, pattern): + for p in self.files.match(pattern): + yield p, self.files[p] + + +class MercurialFile(BaseFile): + """File class for holding data from Mercurial.""" + def __init__(self, client, rev, path): + self._content = client.cat([path], rev=rev) + + def read(self): + return self._content + + +class MercurialRevisionFinder(BaseFinder): + """A finder that operates on a specific Mercurial revision.""" + + def __init__(self, repo, rev='.', recognize_repo_paths=False, **kwargs): + """Create a finder attached to a specific revision in a repository. + + If no revision is given, open the parent of the working directory. + + ``recognize_repo_paths`` will enable a mode where ``.get()`` will + recognize full paths that include the repo's path. Typically Finder + instances are "bound" to a base directory and paths are relative to + that directory. This mode changes that. When this mode is activated, + ``.find()`` will not work! This mode exists to support the moz.build + reader, which uses absolute paths instead of relative paths. The reader + should eventually be rewritten to use relative paths and this hack + should be removed (TODO bug 1171069). + """ + if not hglib: + raise Exception('hglib package not found') + + super(MercurialRevisionFinder, self).__init__(base=repo, **kwargs) + + self._root = mozpath.normpath(repo).rstrip('/') + self._recognize_repo_paths = recognize_repo_paths + + # We change directories here otherwise we have to deal with relative + # paths. + oldcwd = os.getcwd() + os.chdir(self._root) + try: + self._client = hglib.open(path=repo, encoding=b'utf-8') + finally: + os.chdir(oldcwd) + self._rev = rev if rev is not None else b'.' + self._files = OrderedDict() + + # Immediately populate the list of files in the repo since nearly every + # operation requires this list. + out = self._client.rawcommand([b'files', b'--rev', str(self._rev)]) + for relpath in out.splitlines(): + self._files[relpath] = None + + def _find(self, pattern): + if self._recognize_repo_paths: + raise NotImplementedError('cannot use find with recognize_repo_path') + + return self._find_helper(pattern, self._files, self._get) + + def get(self, path): + if self._recognize_repo_paths: + if not path.startswith(self._root): + raise ValueError('lookups in recognize_repo_paths mode must be ' + 'prefixed with repo path: %s' % path) + path = path[len(self._root) + 1:] + + try: + return self._get(path) + except KeyError: + return None + + def _get(self, path): + # We lazy populate self._files because potentially creating tens of + # thousands of MercurialFile instances for every file in the repo is + # inefficient. + f = self._files[path] + if not f: + f = MercurialFile(self._client, self._rev, path) + self._files[path] = f + + return f |