from multiprocessing.pool import ThreadPool
import os
import re
import sys


sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))

from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
from mozharness.base.script import BaseScript
from mozharness.mozilla.aws import pop_aws_auth_from_env


class ReleasePusher(BaseScript, VirtualenvMixin):
    config_options = [
        [["--product"], {
            "dest": "product",
            "help": "Product being released, eg: firefox, thunderbird",
        }],
        [["--version"], {
            "dest": "version",
            "help": "Version of release, eg: 39.0b5",
        }],
        [["--build-number"], {
            "dest": "build_number",
            "help": "Build number of release, eg: 2",
        }],
        [["--bucket-name"], {
            "dest": "bucket_name",
            "help": "Bucket to copy files from candidates/ to releases/",
        }],
        [["--credentials"], {
            "dest": "credentials",
            "help": "File containing access key and secret access key",
        }],
        [["--exclude"], {
            "dest": "excludes",
            "default": [
                r"^.*tests.*$",
                r"^.*crashreporter.*$",
                r"^.*[^k]\.zip(\.asc)?$",
                r"^.*\.log$",
                r"^.*\.txt$",
                r"^.*/partner-repacks.*$",
                r"^.*.checksums(\.asc)?$",
                r"^.*/logs/.*$",
                r"^.*/jsshell.*$",
                r"^.*json$",
                r"^.*/host.*$",
                r"^.*/mar-tools/.*$",
                r"^.*robocop.apk$",
                r"^.*bouncer.apk$",
                r"^.*contrib.*",
                r"^.*/beetmover-checksums/.*$",
            ],
            "action": "append",
            "help": "List of patterns to exclude from copy. The list can be "
                    "extended by passing multiple --exclude arguments.",
        }],
        [["-j", "--parallelization"], {
            "dest": "parallelization",
            "default": 20,
            "type": "int",
            "help": "Number of copy requests to run concurrently",
        }],
    ] + virtualenv_config_options

    def __init__(self, aws_creds):
        BaseScript.__init__(self,
            config_options=self.config_options,
            require_config_file=False,
            config={
                "virtualenv_modules": [
                    "boto",
                    "redo",
                ],
                "virtualenv_path": "venv",
            },
            all_actions=[
                "create-virtualenv",
                "activate-virtualenv",
                "push-to-releases",
            ],
            default_actions=[
                "create-virtualenv",
                "activate-virtualenv",
                "push-to-releases",
            ],
        )

        # validate aws credentials
        if not (all(aws_creds) or self.config.get('credentials')):
            self.fatal("aws creds not defined. please add them to your config or env.")
        if any(aws_creds) and self.config.get('credentials'):
            self.fatal("aws creds found in env and self.config. please declare in one place only.")

        # set aws credentials
        if all(aws_creds):
            self.aws_key_id, self.aws_secret_key = aws_creds
        else:  # use
            self.aws_key_id, self.aws_secret_key = None, None
            # set the env var for boto to read our special config file
            # rather than anything else we have at ~/.boto
            os.environ["BOTO_CONFIG"] = os.path.abspath(self.config["credentials"])

    def _get_candidates_prefix(self):
        return "pub/{}/candidates/{}-candidates/build{}/".format(
            self.config['product'],
            self.config["version"],
            self.config["build_number"]
        )

    def _get_releases_prefix(self):
        return "pub/{}/releases/{}/".format(
            self.config["product"],
            self.config["version"]
        )

    def _matches_exclude(self, keyname):
        for exclude in self.config["excludes"]:
            if re.search(exclude, keyname):
                return True
        return False

    def push_to_releases(self):
        """This step grabs the list of files in the candidates dir,
        filters out any unwanted files from within them, and copies
        the remainder."""
        from boto.s3.connection import S3Connection
        from boto.exception import S3CopyError, S3ResponseError
        from redo import retry

        # suppress boto debug logging, it's too verbose with --loglevel=debug
        import logging
        logging.getLogger('boto').setLevel(logging.INFO)

        self.info("Connecting to S3")
        conn = S3Connection(aws_access_key_id=self.aws_key_id,
                            aws_secret_access_key=self.aws_secret_key)
        self.info("Getting bucket {}".format(self.config["bucket_name"]))
        bucket = conn.get_bucket(self.config["bucket_name"])

        # ensure the destination is empty
        self.info("Checking destination {} is empty".format(self._get_releases_prefix()))
        keys = [k for k in bucket.list(prefix=self._get_releases_prefix())]
        if keys:
            self.warning("Destination already exists with %s keys" % len(keys))

        def worker(item):
            source, destination = item

            def copy_key():
                source_key = bucket.get_key(source)
                dest_key = bucket.get_key(destination)
                # According to http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html
                # S3 key MD5 is represented as ETag, except when objects are
                # uploaded using multipart method. In this case objects's ETag
                # is constructed using its MD5, minus symbol, and number of
                # part. See http://stackoverflow.com/questions/12186993/what-is-the-algorithm-to-compute-the-amazon-s3-etag-for-a-file-larger-than-5gb#answer-19896823
                source_md5 = source_key.etag.split("-")[0]
                if dest_key:
                    dest_md5 = dest_key.etag.split("-")[0]
                else:
                    dest_md5 = None

                if not dest_key:
                    self.info("Copying {} to {}".format(source, destination))
                    bucket.copy_key(destination, self.config["bucket_name"],
                                    source)
                elif source_md5 == dest_md5:
                    self.warning(
                        "{} already exists with the same content ({}), skipping copy".format(
                            destination, dest_md5))
                else:
                    self.fatal(
                        "{} already exists with the different content (src ETag: {}, dest ETag: {}), aborting".format(
                            destination, source_key.etag, dest_key.etag))

            return retry(copy_key, sleeptime=5, max_sleeptime=60,
                         retry_exceptions=(S3CopyError, S3ResponseError))

        def find_release_files():
            candidates_prefix = self._get_candidates_prefix()
            release_prefix = self._get_releases_prefix()
            self.info("Getting key names from candidates")
            for key in bucket.list(prefix=candidates_prefix):
                keyname = key.name
                if self._matches_exclude(keyname):
                    self.debug("Excluding {}".format(keyname))
                else:
                    destination = keyname.replace(candidates_prefix,
                                                  release_prefix)
                    yield (keyname, destination)

        pool = ThreadPool(self.config["parallelization"])
        pool.map(worker, find_release_files())

if __name__ == "__main__":
    myScript = ReleasePusher(pop_aws_auth_from_env())
    myScript.run_and_exit()