summaryrefslogtreecommitdiffstats
path: root/testing/docker/recipes/run-task
blob: 978683cb536d9d15f611c9f6a4fea1a94bcc96b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
#!/usr/bin/python2.7 -u
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""Run a task after performing common actions.

This script is meant to be the "driver" for TaskCluster based tasks.
It receives some common arguments to control the run-time environment.

It performs actions as requested from the arguments. Then it executes
the requested process and prints its output, prefixing it with the
current time to improve log usefulness.
"""

from __future__ import absolute_import, print_function, unicode_literals

import argparse
import datetime
import errno
import grp
import json
import os
import pwd
import re
import socket
import stat
import subprocess
import sys
import urllib2


FINGERPRINT_URL = 'http://taskcluster/secrets/v1/secret/project/taskcluster/gecko/hgfingerprint'
FALLBACK_FINGERPRINT = {
    'fingerprints':
        "sha256:8e:ad:f7:6a:eb:44:06:15:ed:f3:e4:69:a6:64:60:37:2d:ff:98:88:37"
        ":bf:d7:b8:40:84:01:48:9c:26:ce:d9"}


def print_line(prefix, m):
    now = datetime.datetime.utcnow()
    print(b'[%s %sZ] %s' % (prefix, now.isoformat(), m), end=b'')


def run_and_prefix_output(prefix, args, extra_env=None):
    """Runs a process and prefixes its output with the time.

    Returns the process exit code.
    """
    print_line(prefix, b'executing %s\n' % args)

    env = dict(os.environ)
    env.update(extra_env or {})

    # Note: TaskCluster's stdin is a TTY. This attribute is lost
    # when we pass sys.stdin to the invoked process. If we cared
    # to preserve stdin as a TTY, we could make this work. But until
    # someone needs it, don't bother.
    p = subprocess.Popen(args,
                         # Disable buffering because we want to receive output
                         # as it is generated so timestamps in logs are
                         # accurate.
                         bufsize=0,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         stdin=sys.stdin.fileno(),
                         cwd='/',
                         env=env,
                         # So \r in progress bars are rendered as multiple
                         # lines, preserving progress indicators.
                         universal_newlines=True)

    while True:
        data = p.stdout.readline()
        if data == b'':
            break

        print_line(prefix, data)

    return p.wait()


def vcs_checkout(source_repo, dest, store_path,
                 base_repo=None, revision=None, branch=None):
    # Specify method to checkout a revision. This defaults to revisions as
    # SHA-1 strings, but also supports symbolic revisions like `tip` via the
    # branch flag.
    if revision:
        revision_flag = b'--revision'
        revision_value = revision
    elif branch:
        revision_flag = b'--branch'
        revision_value = branch
    else:
        print('revision is not specified for checkout')
        sys.exit(1)

    # Obtain certificate fingerprints.
    try:
        print_line(b'vcs', 'fetching hg.mozilla.org fingerprint from %s\n' %
                   FINGERPRINT_URL)
        res = urllib2.urlopen(FINGERPRINT_URL, timeout=10)
        secret = res.read()
        try:
            secret = json.loads(secret, encoding='utf-8')
        except ValueError:
            print_line(b'vcs', 'invalid JSON in hg fingerprint secret')
            sys.exit(1)
    except (urllib2.URLError, socket.timeout):
        print_line(b'vcs', 'Unable to retrieve current hg.mozilla.org fingerprint'
                           'using the secret service, using fallback instead.')
        # XXX This fingerprint will not be accurate if running on an old
        #     revision after the server fingerprint has changed.
        secret = {'secret': FALLBACK_FINGERPRINT}

    hgmo_fingerprint = secret['secret']['fingerprints'].encode('ascii')

    args = [
        b'/usr/bin/hg',
        b'--config', b'hostsecurity.hg.mozilla.org:fingerprints=%s' % hgmo_fingerprint,
        b'robustcheckout',
        b'--sharebase', store_path,
        b'--purge',
    ]

    if base_repo:
        args.extend([b'--upstream', base_repo])

    args.extend([
        revision_flag, revision_value,
        source_repo, dest,
    ])

    res = run_and_prefix_output(b'vcs', args,
                                extra_env={b'PYTHONUNBUFFERED': b'1'})
    if res:
        sys.exit(res)

    # Update the current revision hash and ensure that it is well formed.
    revision = subprocess.check_output(
        [b'/usr/bin/hg', b'log',
         b'--rev', b'.',
         b'--template', b'{node}'],
        cwd=dest)

    assert re.match('^[a-f0-9]{40}$', revision)
    return revision


def main(args):
    print_line(b'setup', b'run-task started\n')

    if os.getuid() != 0:
        print('assertion failed: not running as root')
        return 1

    # Arguments up to '--' are ours. After are for the main task
    # to be executed.
    try:
        i = args.index('--')
        our_args = args[0:i]
        task_args = args[i + 1:]
    except ValueError:
        our_args = args
        task_args = []

    parser = argparse.ArgumentParser()
    parser.add_argument('--user', default='worker', help='user to run as')
    parser.add_argument('--group', default='worker', help='group to run as')
    # We allow paths to be chowned by the --user:--group before permissions are
    # dropped. This is often necessary for caches/volumes, since they default
    # to root:root ownership.
    parser.add_argument('--chown', action='append',
                        help='Directory to chown to --user:--group')
    parser.add_argument('--chown-recursive', action='append',
                        help='Directory to recursively chown to --user:--group')
    parser.add_argument('--vcs-checkout',
                        help='Directory where Gecko checkout should be created')
    parser.add_argument('--tools-checkout',
                        help='Directory where build/tools checkout should be created')

    args = parser.parse_args(our_args)

    try:
        user = pwd.getpwnam(args.user)
    except KeyError:
        print('could not find user %s; specify --user to a known user' %
              args.user)
        return 1
    try:
        group = grp.getgrnam(args.group)
    except KeyError:
        print('could not find group %s; specify --group to a known group' %
              args.group)
        return 1

    uid = user.pw_uid
    gid = group.gr_gid

    # Find all groups to which this user is a member.
    gids = [g.gr_gid for g in grp.getgrall() if args.group in g.gr_mem]

    wanted_dir_mode = stat.S_IXUSR | stat.S_IRUSR | stat.S_IWUSR

    def set_dir_permissions(path, uid, gid):
        st = os.lstat(path)

        if st.st_uid != uid or st.st_gid != gid:
            os.chown(path, uid, gid)

        # Also make sure dirs are writable in case we need to delete
        # them.
        if st.st_mode & wanted_dir_mode != wanted_dir_mode:
            os.chmod(path, st.st_mode | wanted_dir_mode)

    # Change ownership of requested paths.
    # FUTURE: parse argument values for user/group if we don't want to
    # use --user/--group.
    for path in args.chown or []:
        print_line(b'chown', b'changing ownership of %s to %s:%s\n' % (
                   path, user.pw_name, group.gr_name))
        set_dir_permissions(path, uid, gid)

    for path in args.chown_recursive or []:
        print_line(b'chown', b'recursively changing ownership of %s to %s:%s\n' %
                   (path, user.pw_name, group.gr_name))

        set_dir_permissions(path, uid, gid)

        for root, dirs, files in os.walk(path):
            for d in dirs:
                set_dir_permissions(os.path.join(root, d), uid, gid)

            for f in files:
                # File may be a symlink that points to nowhere. In which case
                # os.chown() would fail because it attempts to follow the
                # symlink. We only care about directory entries, not what
                # they point to. So setting the owner of the symlink should
                # be sufficient.
                os.lchown(os.path.join(root, f), uid, gid)

    def prepare_checkout_dir(checkout):
        if not checkout:
            return

        # Ensure the directory for the source checkout exists.
        try:
            os.makedirs(os.path.dirname(checkout))
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        # And that it is owned by the appropriate user/group.
        os.chown(os.path.dirname(checkout), uid, gid)

        # And ensure the shared store path exists and has proper permissions.
        if 'HG_STORE_PATH' not in os.environ:
            print('error: HG_STORE_PATH environment variable not set')
            sys.exit(1)

        store_path = os.environ['HG_STORE_PATH']
        try:
            os.makedirs(store_path)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        os.chown(store_path, uid, gid)

    prepare_checkout_dir(args.vcs_checkout)
    prepare_checkout_dir(args.tools_checkout)

    # Drop permissions to requested user.
    # This code is modeled after what `sudo` was observed to do in a Docker
    # container. We do not bother calling setrlimit() because containers have
    # their own limits.
    print_line(b'setup', b'running as %s:%s\n' % (args.user, args.group))
    os.setgroups(gids)
    os.umask(022)
    os.setresgid(gid, gid, gid)
    os.setresuid(uid, uid, uid)

    # Checkout the repository, setting the GECKO_HEAD_REV to the current
    # revision hash. Revision hashes have priority over symbolic revisions. We
    # disallow running tasks with symbolic revisions unless they have been
    # resolved by a checkout.
    if args.vcs_checkout:
        base_repo = os.environ.get('GECKO_BASE_REPOSITORY')
        # Some callers set the base repository to mozilla-central for historical
        # reasons. Switch to mozilla-unified because robustcheckout works best
        # with it.
        if base_repo == 'https://hg.mozilla.org/mozilla-central':
            base_repo = b'https://hg.mozilla.org/mozilla-unified'

        os.environ['GECKO_HEAD_REV'] = vcs_checkout(
            os.environ['GECKO_HEAD_REPOSITORY'],
            args.vcs_checkout,
            os.environ['HG_STORE_PATH'],
            base_repo=base_repo,
            revision=os.environ.get('GECKO_HEAD_REV'),
            branch=os.environ.get('GECKO_HEAD_REF'))

    elif not os.environ.get('GECKO_HEAD_REV') and \
            os.environ.get('GECKO_HEAD_REF'):
        print('task should be defined in terms of non-symbolic revision')
        return 1

    if args.tools_checkout:
        vcs_checkout(b'https://hg.mozilla.org/build/tools',
                     args.tools_checkout,
                     os.environ['HG_STORE_PATH'],
                     # Always check out the latest commit on default branch.
                     # This is non-deterministic!
                     branch=b'default')

    return run_and_prefix_output(b'task', task_args)


if __name__ == '__main__':
    # Unbuffer stdio.
    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
    sys.stderr = os.fdopen(sys.stderr.fileno(), 'w', 0)

    sys.exit(main(sys.argv[1:]))