summaryrefslogtreecommitdiffstats
path: root/testing/mozharness/scripts/release/push-candidate-to-releases.py
blob: 5339fa38a83fe0db431371bbb0a9d19a1e648f54 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
from multiprocessing.pool import ThreadPool
import os
import re
import sys


sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))

from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
from mozharness.base.script import BaseScript
from mozharness.mozilla.aws import pop_aws_auth_from_env


class ReleasePusher(BaseScript, VirtualenvMixin):
    config_options = [
        [["--product"], {
            "dest": "product",
            "help": "Product being released, eg: firefox, thunderbird",
        }],
        [["--version"], {
            "dest": "version",
            "help": "Version of release, eg: 39.0b5",
        }],
        [["--build-number"], {
            "dest": "build_number",
            "help": "Build number of release, eg: 2",
        }],
        [["--bucket-name"], {
            "dest": "bucket_name",
            "help": "Bucket to copy files from candidates/ to releases/",
        }],
        [["--credentials"], {
            "dest": "credentials",
            "help": "File containing access key and secret access key",
        }],
        [["--exclude"], {
            "dest": "excludes",
            "default": [
                r"^.*tests.*$",
                r"^.*crashreporter.*$",
                r"^.*[^k]\.zip(\.asc)?$",
                r"^.*\.log$",
                r"^.*\.txt$",
                r"^.*/partner-repacks.*$",
                r"^.*.checksums(\.asc)?$",
                r"^.*/logs/.*$",
                r"^.*/jsshell.*$",
                r"^.*json$",
                r"^.*/host.*$",
                r"^.*/mar-tools/.*$",
                r"^.*robocop.apk$",
                r"^.*bouncer.apk$",
                r"^.*contrib.*",
                r"^.*/beetmover-checksums/.*$",
            ],
            "action": "append",
            "help": "List of patterns to exclude from copy. The list can be "
                    "extended by passing multiple --exclude arguments.",
        }],
        [["-j", "--parallelization"], {
            "dest": "parallelization",
            "default": 20,
            "type": "int",
            "help": "Number of copy requests to run concurrently",
        }],
    ] + virtualenv_config_options

    def __init__(self, aws_creds):
        BaseScript.__init__(self,
            config_options=self.config_options,
            require_config_file=False,
            config={
                "virtualenv_modules": [
                    "boto",
                    "redo",
                ],
                "virtualenv_path": "venv",
            },
            all_actions=[
                "create-virtualenv",
                "activate-virtualenv",
                "push-to-releases",
            ],
            default_actions=[
                "create-virtualenv",
                "activate-virtualenv",
                "push-to-releases",
            ],
        )

        # validate aws credentials
        if not (all(aws_creds) or self.config.get('credentials')):
            self.fatal("aws creds not defined. please add them to your config or env.")
        if any(aws_creds) and self.config.get('credentials'):
            self.fatal("aws creds found in env and self.config. please declare in one place only.")

        # set aws credentials
        if all(aws_creds):
            self.aws_key_id, self.aws_secret_key = aws_creds
        else:  # use
            self.aws_key_id, self.aws_secret_key = None, None
            # set the env var for boto to read our special config file
            # rather than anything else we have at ~/.boto
            os.environ["BOTO_CONFIG"] = os.path.abspath(self.config["credentials"])

    def _get_candidates_prefix(self):
        return "pub/{}/candidates/{}-candidates/build{}/".format(
            self.config['product'],
            self.config["version"],
            self.config["build_number"]
        )

    def _get_releases_prefix(self):
        return "pub/{}/releases/{}/".format(
            self.config["product"],
            self.config["version"]
        )

    def _matches_exclude(self, keyname):
        for exclude in self.config["excludes"]:
            if re.search(exclude, keyname):
                return True
        return False

    def push_to_releases(self):
        """This step grabs the list of files in the candidates dir,
        filters out any unwanted files from within them, and copies
        the remainder."""
        from boto.s3.connection import S3Connection
        from boto.exception import S3CopyError, S3ResponseError
        from redo import retry

        # suppress boto debug logging, it's too verbose with --loglevel=debug
        import logging
        logging.getLogger('boto').setLevel(logging.INFO)

        self.info("Connecting to S3")
        conn = S3Connection(aws_access_key_id=self.aws_key_id,
                            aws_secret_access_key=self.aws_secret_key)
        self.info("Getting bucket {}".format(self.config["bucket_name"]))
        bucket = conn.get_bucket(self.config["bucket_name"])

        # ensure the destination is empty
        self.info("Checking destination {} is empty".format(self._get_releases_prefix()))
        keys = [k for k in bucket.list(prefix=self._get_releases_prefix())]
        if keys:
            self.warning("Destination already exists with %s keys" % len(keys))

        def worker(item):
            source, destination = item

            def copy_key():
                source_key = bucket.get_key(source)
                dest_key = bucket.get_key(destination)
                # According to http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html
                # S3 key MD5 is represented as ETag, except when objects are
                # uploaded using multipart method. In this case objects's ETag
                # is constructed using its MD5, minus symbol, and number of
                # part. See http://stackoverflow.com/questions/12186993/what-is-the-algorithm-to-compute-the-amazon-s3-etag-for-a-file-larger-than-5gb#answer-19896823
                source_md5 = source_key.etag.split("-")[0]
                if dest_key:
                    dest_md5 = dest_key.etag.split("-")[0]
                else:
                    dest_md5 = None

                if not dest_key:
                    self.info("Copying {} to {}".format(source, destination))
                    bucket.copy_key(destination, self.config["bucket_name"],
                                    source)
                elif source_md5 == dest_md5:
                    self.warning(
                        "{} already exists with the same content ({}), skipping copy".format(
                            destination, dest_md5))
                else:
                    self.fatal(
                        "{} already exists with the different content (src ETag: {}, dest ETag: {}), aborting".format(
                            destination, source_key.etag, dest_key.etag))

            return retry(copy_key, sleeptime=5, max_sleeptime=60,
                         retry_exceptions=(S3CopyError, S3ResponseError))

        def find_release_files():
            candidates_prefix = self._get_candidates_prefix()
            release_prefix = self._get_releases_prefix()
            self.info("Getting key names from candidates")
            for key in bucket.list(prefix=candidates_prefix):
                keyname = key.name
                if self._matches_exclude(keyname):
                    self.debug("Excluding {}".format(keyname))
                else:
                    destination = keyname.replace(candidates_prefix,
                                                  release_prefix)
                    yield (keyname, destination)

        pool = ThreadPool(self.config["parallelization"])
        pool.map(worker, find_release_files())

if __name__ == "__main__":
    myScript = ReleasePusher(pop_aws_auth_from_env())
    myScript.run_and_exit()