summaryrefslogtreecommitdiffstats
path: root/testing/mozharness/scripts/release/antivirus.py
diff options
context:
space:
mode:
Diffstat (limited to 'testing/mozharness/scripts/release/antivirus.py')
-rw-r--r--testing/mozharness/scripts/release/antivirus.py193
1 files changed, 193 insertions, 0 deletions
diff --git a/testing/mozharness/scripts/release/antivirus.py b/testing/mozharness/scripts/release/antivirus.py
new file mode 100644
index 000000000..b40dc5cc0
--- /dev/null
+++ b/testing/mozharness/scripts/release/antivirus.py
@@ -0,0 +1,193 @@
+from multiprocessing.pool import ThreadPool
+import os
+import re
+import sys
+import shutil
+
+sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
+
+from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
+from mozharness.base.script import BaseScript
+
+
+class AntivirusScan(BaseScript, VirtualenvMixin):
+ config_options = [
+ [["--product"], {
+ "dest": "product",
+ "help": "Product being released, eg: firefox, thunderbird",
+ }],
+ [["--version"], {
+ "dest": "version",
+ "help": "Version of release, eg: 39.0b5",
+ }],
+ [["--build-number"], {
+ "dest": "build_number",
+ "help": "Build number of release, eg: 2",
+ }],
+ [["--bucket-name"], {
+ "dest": "bucket_name",
+ "help": "S3 Bucket to retrieve files from",
+ }],
+ [["--exclude"], {
+ "dest": "excludes",
+ "action": "append",
+ "help": "List of filename patterns to exclude. See script source for default",
+ }],
+ [["-d", "--download-parallelization"], {
+ "dest": "download_parallelization",
+ "default": 6,
+ "type": "int",
+ "help": "Number of concurrent file downloads",
+ }],
+ [["-s", "--scan-parallelization"], {
+ "dest": "scan_parallelization",
+ "default": 4,
+ "type": "int",
+ "help": "Number of concurrent file scans",
+ }],
+ [["--tools-repo"], {
+ "dest": "tools_repo",
+ "default": "https://hg.mozilla.org/build/tools",
+ }],
+ [["--tools-revision"], {
+ "dest": "tools_revision",
+ "help": "Revision of tools repo to use when downloading extract_and_run_command.py",
+ }],
+ ] + virtualenv_config_options
+
+ DEFAULT_EXCLUDES = [
+ r"^.*tests.*$",
+ r"^.*crashreporter.*$",
+ r"^.*\.zip(\.asc)?$",
+ r"^.*\.log$",
+ r"^.*\.txt$",
+ r"^.*\.asc$",
+ r"^.*/partner-repacks.*$",
+ r"^.*.checksums(\.asc)?$",
+ r"^.*/logs/.*$",
+ r"^.*/jsshell.*$",
+ r"^.*json$",
+ r"^.*/host.*$",
+ r"^.*/mar-tools/.*$",
+ r"^.*robocop.apk$",
+ r"^.*contrib.*"
+ ]
+ CACHE_DIR = 'cache'
+
+ def __init__(self):
+ BaseScript.__init__(self,
+ config_options=self.config_options,
+ require_config_file=False,
+ config={
+ "virtualenv_modules": [
+ "boto",
+ "redo",
+ "mar",
+ ],
+ "virtualenv_path": "venv",
+ },
+ all_actions=[
+ "create-virtualenv",
+ "activate-virtualenv",
+ "get-extract-script",
+ "get-files",
+ "scan-files",
+ "cleanup-cache",
+ ],
+ default_actions=[
+ "create-virtualenv",
+ "activate-virtualenv",
+ "get-extract-script",
+ "get-files",
+ "scan-files",
+ "cleanup-cache",
+ ],
+ )
+ self.excludes = self.config.get('excludes', self.DEFAULT_EXCLUDES)
+ self.dest_dir = self.CACHE_DIR
+
+ def _get_candidates_prefix(self):
+ return "pub/{}/candidates/{}-candidates/build{}/".format(
+ self.config['product'],
+ self.config["version"],
+ self.config["build_number"]
+ )
+
+ def _matches_exclude(self, keyname):
+ for exclude in self.excludes:
+ if re.search(exclude, keyname):
+ return True
+ return False
+
+ def get_extract_script(self):
+ """Gets a copy of extract_and_run_command.py from tools, and the supporting mar.py,
+ so that we can unpack various files for clam to scan them."""
+ remote_file = "{}/raw-file/{}/stage/extract_and_run_command.py".format(self.config["tools_repo"],
+ self.config["tools_revision"])
+ self.download_file(remote_file, file_name="extract_and_run_command.py")
+
+ def get_files(self):
+ """Pull the candidate files down from S3 for scanning, using parallel requests"""
+ from boto.s3.connection import S3Connection
+ from boto.exception import S3CopyError, S3ResponseError
+ from redo import retry
+ from httplib import HTTPException
+
+ # suppress boto debug logging, it's too verbose with --loglevel=debug
+ import logging
+ logging.getLogger('boto').setLevel(logging.INFO)
+
+ self.info("Connecting to S3")
+ conn = S3Connection(anon=True)
+ self.info("Getting bucket {}".format(self.config["bucket_name"]))
+ bucket = conn.get_bucket(self.config["bucket_name"])
+
+ if os.path.exists(self.dest_dir):
+ self.info('Emptying {}'.format(self.dest_dir))
+ shutil.rmtree(self.dest_dir)
+ os.makedirs(self.dest_dir)
+
+ def worker(item):
+ source, destination = item
+
+ self.info("Downloading {} to {}".format(source, destination))
+ key = bucket.get_key(source)
+ return retry(key.get_contents_to_filename,
+ args=(destination, ),
+ sleeptime=30, max_sleeptime=150,
+ retry_exceptions=(S3CopyError, S3ResponseError,
+ IOError, HTTPException))
+
+ def find_release_files():
+ candidates_prefix = self._get_candidates_prefix()
+ self.info("Getting key names from candidates")
+ for key in bucket.list(prefix=candidates_prefix):
+ keyname = key.name
+ if self._matches_exclude(keyname):
+ self.debug("Excluding {}".format(keyname))
+ else:
+ destination = os.path.join(self.dest_dir, keyname.replace(candidates_prefix, ''))
+ dest_dir = os.path.dirname(destination)
+ if not os.path.isdir(dest_dir):
+ os.makedirs(dest_dir)
+ yield (keyname, destination)
+
+ pool = ThreadPool(self.config["download_parallelization"])
+ pool.map(worker, find_release_files())
+
+ def scan_files(self):
+ """Scan the files we've collected. We do the download and scan concurrently to make
+ it easier to have a coherent log afterwards. Uses the venv python."""
+ self.run_command([self.query_python_path(), 'extract_and_run_command.py',
+ '-j{}'.format(self.config['scan_parallelization']),
+ 'clamdscan', '-m', '--no-summary', '--', self.dest_dir])
+
+ def cleanup_cache(self):
+ """If we have simultaneous releases in flight an av slave may end up doing another
+ av job before being recycled, and we need to make sure the full disk is available."""
+ shutil.rmtree(self.dest_dir)
+
+
+if __name__ == "__main__":
+ myScript = AntivirusScan()
+ myScript.run_and_exit()