#!/usr/bin/env python """\ Usage: extract_and_run_command.py [-j N] [command to run] -- [files and/or directories] -j is the number of workers to start, defaulting to 1. [command to run] must be a command that can accept one or many files to process as arguments. WARNING: This script does NOT respond to SIGINT. You must use SIGQUIT or SIGKILL to terminate it early. """ ### The canonical location for this file is ### https://hg.mozilla.org/build/tools/file/default/stage/extract_and_run_command.py ### ### Please update the copy in puppet to deploy new changes to ### stage.mozilla.org, see # https://wiki.mozilla.org/ReleaseEngineering/How_To/Modify_scripts_on_stage import logging import os from os import path import sys from Queue import Queue import shutil import subprocess import tempfile from threading import Thread import time logging.basicConfig( stream=sys.stdout, level=logging.INFO, format="%(message)s") log = logging.getLogger(__name__) try: # the future - https://github.com/mozilla/build-mar via a venv from mardor.marfile import BZ2MarFile except: # the past - http://hg.mozilla.org/build/tools/file/default/buildfarm/utils/mar.py sys.path.append( path.join(path.dirname(path.realpath(__file__)), "../buildfarm/utils")) from mar import BZ2MarFile SEVENZIP = "7za" def extractMar(filename, tempdir): m = BZ2MarFile(filename) m.extractall(path=tempdir) def extractExe(filename, tempdir): try: # We don't actually care about output, put we redirect to a tempfile # to avoid deadlocking in wait() when stdout=PIPE fd = tempfile.TemporaryFile() proc = subprocess.Popen([SEVENZIP, 'x', '-o%s' % tempdir, filename], stdout=fd, stderr=subprocess.STDOUT) proc.wait() except subprocess.CalledProcessError: # Not all EXEs are 7-zip files, so we have to ignore extraction errors pass # The keys here are matched against the last 3 characters of filenames. # The values are callables that accept two string arguments. EXTRACTORS = { '.mar': extractMar, '.exe': extractExe, } def find_files(d): """yields all of the files in `d'""" for root, dirs, files in os.walk(d): for f in files: yield path.abspath(path.join(root, f)) def rchmod(d, mode=0755): """chmods everything in `d' to `mode', including `d' itself""" os.chmod(d, mode) for root, dirs, files in os.walk(d): for item in dirs: os.chmod(path.join(root, item), mode) for item in files: os.chmod(path.join(root, item), mode) def maybe_extract(filename): """If an extractor is found for `filename', extracts it to a temporary directory and chmods it. The consumer is responsible for removing the extracted files, if desired.""" ext = path.splitext(filename)[1] if ext not in EXTRACTORS.keys(): return None # Append the full filepath to the tempdir tempdir_root = tempfile.mkdtemp() tempdir = path.join(tempdir_root, filename.lstrip('/')) os.makedirs(tempdir) EXTRACTORS[ext](filename, tempdir) rchmod(tempdir_root) return tempdir_root def process(item, command): def format_time(t): return time.strftime("%H:%M:%S", time.localtime(t)) # Buffer output to avoid interleaving of multiple workers' logs = [] args = [item] proc = None start = time.time() logs.append("START %s: %s" % (format_time(start), item)) # If the file was extracted, we need to process all of its files, too. tempdir = maybe_extract(item) if tempdir: for f in find_files(tempdir): args.append(f) try: fd = tempfile.TemporaryFile() proc = subprocess.Popen(command + args, stdout=fd) proc.wait() if proc.returncode != 0: raise Exception("returned %s" % proc.returncode) finally: if tempdir: shutil.rmtree(tempdir) fd.seek(0) # rstrip() here to avoid an unnecessary newline, if it exists. logs.append(fd.read().rstrip()) end = time.time() elapsed = end - start logs.append("END %s (%d seconds elapsed): %s\n" % ( format_time(end), elapsed, item)) # Now that we've got all of our output, print it. It's important that # the logging module is used for this, because "print" is not # thread-safe. log.info("\n".join(logs)) def worker(command, errors): item = q.get() while item != None: try: process(item, command) except: errors.put(item) item = q.get() if __name__ == '__main__': # getopt is used in favour of optparse to enable "--" as a separator # between the command and list of files. optparse doesn't allow that. from getopt import getopt options, args = getopt(sys.argv[1:], 'j:h', ['help']) concurrency = 1 for o, a in options: if o == '-j': concurrency = int(a) elif o in ('-h', '--help'): log.info(__doc__) sys.exit(0) if len(args) < 3 or '--' not in args: log.error(__doc__) sys.exit(1) command = [] while args[0] != "--": command.append(args.pop(0)) args.pop(0) q = Queue() errors = Queue() threads = [] for i in range(concurrency): t = Thread(target=worker, args=(command, errors)) t.start() threads.append(t) # find_files is a generator, so work will begin prior to it finding # all of the files for arg in args: if path.isfile(arg): q.put(arg) else: for f in find_files(arg): q.put(f) # Because the workers are started before we start populating the q # they can't use .empty() to determine whether or not their done. # We also can't use q.join() or j.task_done(), because we need to # support Python 2.4. We know that find_files won't yield None, # so we can detect doneness by having workers die when they get None # as an item. for i in range(concurrency): q.put(None) for t in threads: t.join() if not errors.empty(): log.error("Command failed for the following files:") while not errors.empty(): log.error(" %s" % errors.get()) sys.exit(1)