summaryrefslogtreecommitdiffstats
path: root/taskcluster/taskgraph/transforms/task.py
blob: 6e371e4baaf5fca1567923ca179f095a57cc7a04 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
These transformations take a task description and turn it into a TaskCluster
task definition (along with attributes, label, etc.).  The input to these
transformations is generic to any kind of task, but abstracts away some of the
complexities of worker implementations, scopes, and treeherder annotations.
"""

from __future__ import absolute_import, print_function, unicode_literals

import json
import time

from taskgraph.util.treeherder import split_symbol
from taskgraph.transforms.base import (
    validate_schema,
    TransformSequence
)
from voluptuous import Schema, Any, Required, Optional, Extra

from .gecko_v2_whitelist import JOB_NAME_WHITELIST, JOB_NAME_WHITELIST_ERROR

# shortcut for a string where task references are allowed
taskref_or_string = Any(
    basestring,
    {Required('task-reference'): basestring})

# A task description is a general description of a TaskCluster task
task_description_schema = Schema({
    # the label for this task
    Required('label'): basestring,

    # description of the task (for metadata)
    Required('description'): basestring,

    # attributes for this task
    Optional('attributes'): {basestring: object},

    # dependencies of this task, keyed by name; these are passed through
    # verbatim and subject to the interpretation of the Task's get_dependencies
    # method.
    Optional('dependencies'): {basestring: object},

    # expiration and deadline times, relative to task creation, with units
    # (e.g., "14 days").  Defaults are set based on the project.
    Optional('expires-after'): basestring,
    Optional('deadline-after'): basestring,

    # custom routes for this task; the default treeherder routes will be added
    # automatically
    Optional('routes'): [basestring],

    # custom scopes for this task; any scopes required for the worker will be
    # added automatically
    Optional('scopes'): [basestring],

    # custom "task.extra" content
    Optional('extra'): {basestring: object},

    # treeherder-related information; see
    # https://schemas.taskcluster.net/taskcluster-treeherder/v1/task-treeherder-config.json
    # If not specified, no treeherder extra information or routes will be
    # added to the task
    Optional('treeherder'): {
        # either a bare symbol, or "grp(sym)".
        'symbol': basestring,

        # the job kind
        'kind': Any('build', 'test', 'other'),

        # tier for this task
        'tier': int,

        # task platform, in the form platform/collection, used to set
        # treeherder.machine.platform and treeherder.collection or
        # treeherder.labels
        'platform': basestring,

        # treeherder environments (defaults to both staging and production)
        Required('environments', default=['production', 'staging']): ['production', 'staging'],
    },

    # information for indexing this build so its artifacts can be discovered;
    # if omitted, the build will not be indexed.
    Optional('index'): {
        # the name of the product this build produces
        'product': Any('firefox', 'mobile'),

        # the names to use for this job in the TaskCluster index
        'job-name': Any(
            # Assuming the job is named "normally", this is the v2 job name,
            # and the v1 and buildbot routes will be determined appropriately.
            basestring,

            # otherwise, give separate names for each of the legacy index
            # routes; if a name is omitted, no corresponding route will be
            # created.
            {
                # the name as it appears in buildbot routes
                Optional('buildbot'): basestring,
                Required('gecko-v2'): basestring,
            }
        ),

        # The rank that the task will receive in the TaskCluster
        # index.  A newly completed task supercedes the currently
        # indexed task iff it has a higher rank.  If unspecified,
        # 'by-tier' behavior will be used.
        'rank': Any(
            # Rank is equal the timestamp of the build_date for tier-1
            # tasks, and zero for non-tier-1.  This sorts tier-{2,3}
            # builds below tier-1 in the index.
            'by-tier',

            # Rank is given as an integer constant (e.g. zero to make
            # sure a task is last in the index).
            int,

            # Rank is equal to the timestamp of the build_date.  This
            # option can be used to override the 'by-tier' behavior
            # for non-tier-1 tasks.
            'build_date',
        ),
    },

    # The `run_on_projects` attribute, defaulting to "all".  This dictates the
    # projects on which this task should be included in the target task set.
    # See the attributes documentation for details.
    Optional('run-on-projects'): [basestring],

    # If the task can be coalesced, this is the name used in the coalesce key
    # the project, etc. will be added automatically.  Note that try (level 1)
    # tasks are never coalesced
    Optional('coalesce-name'): basestring,

    # the provisioner-id/worker-type for the task.  The following parameters will
    # be substituted in this string:
    #  {level} -- the scm level of this push
    'worker-type': basestring,

    # information specific to the worker implementation that will run this task
    'worker': Any({
        Required('implementation'): Any('docker-worker', 'docker-engine'),

        # For tasks that will run in docker-worker or docker-engine, this is the
        # name of the docker image or in-tree docker image to run the task in.  If
        # in-tree, then a dependency will be created automatically.  This is
        # generally `desktop-test`, or an image that acts an awful lot like it.
        Required('docker-image'): Any(
            # a raw Docker image path (repo/image:tag)
            basestring,
            # an in-tree generated docker image (from `testing/docker/<name>`)
            {'in-tree': basestring}
        ),

        # worker features that should be enabled
        Required('relengapi-proxy', default=False): bool,
        Required('chain-of-trust', default=False): bool,
        Required('taskcluster-proxy', default=False): bool,
        Required('allow-ptrace', default=False): bool,
        Required('loopback-video', default=False): bool,
        Required('loopback-audio', default=False): bool,

        # caches to set up for the task
        Optional('caches'): [{
            # only one type is supported by any of the workers right now
            'type': 'persistent',

            # name of the cache, allowing re-use by subsequent tasks naming the
            # same cache
            'name': basestring,

            # location in the task image where the cache will be mounted
            'mount-point': basestring,
        }],

        # artifacts to extract from the task image after completion
        Optional('artifacts'): [{
            # type of artifact -- simple file, or recursive directory
            'type': Any('file', 'directory'),

            # task image path from which to read artifact
            'path': basestring,

            # name of the produced artifact (root of the names for
            # type=directory)
            'name': basestring,
        }],

        # environment variables
        Required('env', default={}): {basestring: taskref_or_string},

        # the command to run
        'command': [taskref_or_string],

        # the maximum time to run, in seconds
        'max-run-time': int,

        # the exit status code that indicates the task should be retried
        Optional('retry-exit-status'): int,

    }, {
        Required('implementation'): 'generic-worker',

        # command is a list of commands to run, sequentially
        'command': [taskref_or_string],

        # artifacts to extract from the task image after completion; note that artifacts
        # for the generic worker cannot have names
        Optional('artifacts'): [{
            # type of artifact -- simple file, or recursive directory
            'type': Any('file', 'directory'),

            # task image path from which to read artifact
            'path': basestring,
        }],

        # directories and/or files to be mounted
        Optional('mounts'): [{
            # a unique name for the cache volume
            'cache-name': basestring,

            # task image path for the cache
            'path': basestring,
        }],

        # environment variables
        Required('env', default={}): {basestring: taskref_or_string},

        # the maximum time to run, in seconds
        'max-run-time': int,

        # os user groups for test task workers
        Optional('os-groups', default=[]): [basestring],
    }, {
        Required('implementation'): 'buildbot-bridge',

        # see
        # https://github.com/mozilla/buildbot-bridge/blob/master/bbb/schemas/payload.yml
        'buildername': basestring,
        'sourcestamp': {
            'branch': basestring,
            Optional('revision'): basestring,
            Optional('repository'): basestring,
            Optional('project'): basestring,
        },
        'properties': {
            'product': basestring,
            Extra: basestring,  # additional properties are allowed
        },
    }, {
        'implementation': 'macosx-engine',

        # A link for an executable to download
        Optional('link'): basestring,

        # the command to run
        Required('command'): [taskref_or_string],

        # environment variables
        Optional('env'): {basestring: taskref_or_string},

        # artifacts to extract from the task image after completion
        Optional('artifacts'): [{
            # type of artifact -- simple file, or recursive directory
            Required('type'): Any('file', 'directory'),

            # task image path from which to read artifact
            Required('path'): basestring,

            # name of the produced artifact (root of the names for
            # type=directory)
            Required('name'): basestring,
        }],
    }),

    # The "when" section contains descriptions of the circumstances
    # under which this task can be "optimized", that is, left out of the
    # task graph because it is unnecessary.
    Optional('when'): Any({
        # This task only needs to be run if a file matching one of the given
        # patterns has changed in the push.  The patterns use the mozpack
        # match function (python/mozbuild/mozpack/path.py).
        Optional('files-changed'): [basestring],
    }),
})

GROUP_NAMES = {
    'tc': 'Executed by TaskCluster',
    'tc-e10s': 'Executed by TaskCluster with e10s',
    'tc-Fxfn-l': 'Firefox functional tests (local) executed by TaskCluster',
    'tc-Fxfn-l-e10s': 'Firefox functional tests (local) executed by TaskCluster with e10s',
    'tc-Fxfn-r': 'Firefox functional tests (remote) executed by TaskCluster',
    'tc-Fxfn-r-e10s': 'Firefox functional tests (remote) executed by TaskCluster with e10s',
    'tc-M': 'Mochitests executed by TaskCluster',
    'tc-M-e10s': 'Mochitests executed by TaskCluster with e10s',
    'tc-R': 'Reftests executed by TaskCluster',
    'tc-R-e10s': 'Reftests executed by TaskCluster with e10s',
    'tc-VP': 'VideoPuppeteer tests executed by TaskCluster',
    'tc-W': 'Web platform tests executed by TaskCluster',
    'tc-W-e10s': 'Web platform tests executed by TaskCluster with e10s',
    'tc-X': 'Xpcshell tests executed by TaskCluster',
    'tc-X-e10s': 'Xpcshell tests executed by TaskCluster with e10s',
    'Aries': 'Aries Device Image',
    'Nexus 5-L': 'Nexus 5-L Device Image',
    'Cc': 'Toolchain builds',
    'SM-tc': 'Spidermonkey builds',
}
UNKNOWN_GROUP_NAME = "Treeherder group {} has no name; add it to " + __file__

BUILDBOT_ROUTE_TEMPLATES = [
    "index.buildbot.branches.{project}.{job-name-buildbot}",
    "index.buildbot.revisions.{head_rev}.{project}.{job-name-buildbot}",
]

V2_ROUTE_TEMPLATES = [
    "index.gecko.v2.{project}.latest.{product}.{job-name-gecko-v2}",
    "index.gecko.v2.{project}.pushdate.{build_date_long}.{product}.{job-name-gecko-v2}",
    "index.gecko.v2.{project}.revision.{head_rev}.{product}.{job-name-gecko-v2}",
]

# the roots of the treeherder routes, keyed by treeherder environment
TREEHERDER_ROUTE_ROOTS = {
    'production': 'tc-treeherder',
    'staging': 'tc-treeherder-stage',
}

COALESCE_KEY = 'builds.{project}.{name}'

# define a collection of payload builders, depending on the worker implementation
payload_builders = {}


def payload_builder(name):
    def wrap(func):
        payload_builders[name] = func
        return func
    return wrap


@payload_builder('docker-worker')
def build_docker_worker_payload(config, task, task_def):
    worker = task['worker']

    image = worker['docker-image']
    if isinstance(image, dict):
        docker_image_task = 'build-docker-image-' + image['in-tree']
        task.setdefault('dependencies', {})['docker-image'] = docker_image_task
        image = {
            "path": "public/image.tar.zst",
            "taskId": {"task-reference": "<docker-image>"},
            "type": "task-image",
        }

    features = {}

    if worker.get('relengapi-proxy'):
        features['relengAPIProxy'] = True

    if worker.get('taskcluster-proxy'):
        features['taskclusterProxy'] = True

    if worker.get('allow-ptrace'):
        features['allowPtrace'] = True
        task_def['scopes'].append('docker-worker:feature:allowPtrace')

    if worker.get('chain-of-trust'):
        features['chainOfTrust'] = True

    capabilities = {}

    for lo in 'audio', 'video':
        if worker.get('loopback-' + lo):
            capitalized = 'loopback' + lo.capitalize()
            devices = capabilities.setdefault('devices', {})
            devices[capitalized] = True
            task_def['scopes'].append('docker-worker:capability:device:' + capitalized)

    task_def['payload'] = payload = {
        'command': worker['command'],
        'image': image,
        'env': worker['env'],
    }

    if 'max-run-time' in worker:
        payload['maxRunTime'] = worker['max-run-time']

    if 'retry-exit-status' in worker:
        payload['onExitStatus'] = {'retry': [worker['retry-exit-status']]}

    if 'artifacts' in worker:
        artifacts = {}
        for artifact in worker['artifacts']:
            artifacts[artifact['name']] = {
                'path': artifact['path'],
                'type': artifact['type'],
                'expires': task_def['expires'],  # always expire with the task
            }
        payload['artifacts'] = artifacts

    if 'caches' in worker:
        caches = {}
        for cache in worker['caches']:
            caches[cache['name']] = cache['mount-point']
            task_def['scopes'].append('docker-worker:cache:' + cache['name'])
        payload['cache'] = caches

    if features:
        payload['features'] = features
    if capabilities:
        payload['capabilities'] = capabilities

    # coalesce / superseding
    if 'coalesce-name' in task and int(config.params['level']) > 1:
        key = COALESCE_KEY.format(
            project=config.params['project'],
            name=task['coalesce-name'])
        payload['supersederUrl'] = "https://coalesce.mozilla-releng.net/v1/list/" + key


@payload_builder('generic-worker')
def build_generic_worker_payload(config, task, task_def):
    worker = task['worker']

    artifacts = []

    for artifact in worker['artifacts']:
        artifacts.append({
            'path': artifact['path'],
            'type': artifact['type'],
            'expires': task_def['expires'],  # always expire with the task
        })

    mounts = []

    for mount in worker.get('mounts', []):
        mounts.append({
            'cacheName': mount['cache-name'],
            'directory': mount['path']
        })

    task_def['payload'] = {
        'command': worker['command'],
        'artifacts': artifacts,
        'env': worker.get('env', {}),
        'mounts': mounts,
        'maxRunTime': worker['max-run-time'],
        'osGroups': worker.get('os-groups', []),
    }

    if 'retry-exit-status' in worker:
        raise Exception("retry-exit-status not supported in generic-worker")


@payload_builder('macosx-engine')
def build_macosx_engine_payload(config, task, task_def):
    worker = task['worker']
    artifacts = map(lambda artifact: {
        'name': artifact['name'],
        'path': artifact['path'],
        'type': artifact['type'],
        'expires': task_def['expires'],
    }, worker['artifacts'])

    task_def['payload'] = {
        'link': worker['link'],
        'command': worker['command'],
        'env': worker['env'],
        'artifacts': artifacts,
    }

transforms = TransformSequence()


@transforms.add
def validate(config, tasks):
    for task in tasks:
        yield validate_schema(
            task_description_schema, task,
            "In task {!r}:".format(task.get('label', '?no-label?')))


@transforms.add
def add_index_routes(config, tasks):
    for task in tasks:
        index = task.get('index')
        routes = task.setdefault('routes', [])

        if not index:
            yield task
            continue

        job_name = index['job-name']
        # unpack the v2 name to v1 and buildbot names
        if isinstance(job_name, basestring):
            base_name, type_name = job_name.rsplit('-', 1)
            job_name = {
                'buildbot': base_name,
                'gecko-v2': '{}-{}'.format(base_name, type_name),
            }

        if job_name['gecko-v2'] not in JOB_NAME_WHITELIST:
            raise Exception(JOB_NAME_WHITELIST_ERROR.format(job_name['gecko-v2']))

        subs = config.params.copy()
        for n in job_name:
            subs['job-name-' + n] = job_name[n]
        subs['build_date_long'] = time.strftime("%Y.%m.%d.%Y%m%d%H%M%S",
                                                time.gmtime(config.params['build_date']))
        subs['product'] = index['product']

        if 'buildbot' in job_name:
            for tpl in BUILDBOT_ROUTE_TEMPLATES:
                routes.append(tpl.format(**subs))
        if 'gecko-v2' in job_name:
            for tpl in V2_ROUTE_TEMPLATES:
                routes.append(tpl.format(**subs))

        # The default behavior is to rank tasks according to their tier
        extra_index = task.setdefault('extra', {}).setdefault('index', {})
        rank = index.get('rank', 'by-tier')

        if rank == 'by-tier':
            # rank is zero for non-tier-1 tasks and based on pushid for others;
            # this sorts tier-{2,3} builds below tier-1 in the index
            tier = task.get('treeherder', {}).get('tier', 3)
            extra_index['rank'] = 0 if tier > 1 else int(config.params['build_date'])
        elif rank == 'build_date':
            extra_index['rank'] = int(config.params['build_date'])
        else:
            extra_index['rank'] = rank

        del task['index']
        yield task


@transforms.add
def build_task(config, tasks):
    for task in tasks:
        worker_type = task['worker-type'].format(level=str(config.params['level']))
        provisioner_id, worker_type = worker_type.split('/', 1)

        routes = task.get('routes', [])
        scopes = task.get('scopes', [])

        # set up extra
        extra = task.get('extra', {})
        task_th = task.get('treeherder')
        if task_th:
            extra['treeherderEnv'] = task_th['environments']

            treeherder = extra.setdefault('treeherder', {})

            machine_platform, collection = task_th['platform'].split('/', 1)
            treeherder['machine'] = {'platform': machine_platform}
            treeherder['collection'] = {collection: True}

            groupSymbol, symbol = split_symbol(task_th['symbol'])
            if groupSymbol != '?':
                treeherder['groupSymbol'] = groupSymbol
                if groupSymbol not in GROUP_NAMES:
                    raise Exception(UNKNOWN_GROUP_NAME.format(groupSymbol))
                treeherder['groupName'] = GROUP_NAMES[groupSymbol]
            treeherder['symbol'] = symbol
            treeherder['jobKind'] = task_th['kind']
            treeherder['tier'] = task_th['tier']

            routes.extend([
                '{}.v2.{}.{}.{}'.format(TREEHERDER_ROUTE_ROOTS[env],
                                        config.params['project'],
                                        config.params['head_rev'],
                                        config.params['pushlog_id'])
                for env in task_th['environments']
            ])

        if 'expires-after' not in task:
            task['expires-after'] = '28 days' if config.params['project'] == 'try' else '1 year'

        if 'deadline-after' not in task:
            task['deadline-after'] = '1 day'

        if 'coalesce-name' in task and int(config.params['level']) > 1:
            key = COALESCE_KEY.format(
                project=config.params['project'],
                name=task['coalesce-name'])
            routes.append('coalesce.v1.' + key)

        task_def = {
            'provisionerId': provisioner_id,
            'workerType': worker_type,
            'routes': routes,
            'created': {'relative-datestamp': '0 seconds'},
            'deadline': {'relative-datestamp': task['deadline-after']},
            'expires': {'relative-datestamp': task['expires-after']},
            'scopes': scopes,
            'metadata': {
                'description': task['description'],
                'name': task['label'],
                'owner': config.params['owner'],
                'source': '{}/file/{}/{}'.format(
                    config.params['head_repository'],
                    config.params['head_rev'],
                    config.path),
            },
            'extra': extra,
            'tags': {'createdForUser': config.params['owner']},
        }

        # add the payload and adjust anything else as required (e.g., scopes)
        payload_builders[task['worker']['implementation']](config, task, task_def)

        attributes = task.get('attributes', {})
        attributes['run_on_projects'] = task.get('run-on-projects', ['all'])

        yield {
            'label': task['label'],
            'task': task_def,
            'dependencies': task.get('dependencies', {}),
            'attributes': attributes,
            'when': task.get('when', {}),
        }


# Check that the v2 route templates match those used by Mozharness.  This can
# go away once Mozharness builds are no longer performed in Buildbot, and the
# Mozharness code referencing routes.json is deleted.
def check_v2_routes():
    with open("testing/mozharness/configs/routes.json", "rb") as f:
        routes_json = json.load(f)

    # we only deal with the 'routes' key here
    routes = routes_json['routes']

    # we use different variables than mozharness
    for mh, tg in [
            ('{index}', 'index'),
            ('{build_product}', '{product}'),
            ('{build_name}-{build_type}', '{job-name-gecko-v2}'),
            ('{year}.{month}.{day}.{pushdate}', '{build_date_long}')]:
        routes = [r.replace(mh, tg) for r in routes]

    if sorted(routes) != sorted(V2_ROUTE_TEMPLATES):
        raise Exception("V2_ROUTE_TEMPLATES does not match Mozharness's routes.json: "
                        "%s vs %s" % (V2_ROUTE_TEMPLATES, routes))

check_v2_routes()