1 files changed, 648 insertions, 0 deletions
diff --git a/taskcluster/taskgraph/transforms/task.py b/taskcluster/taskgraph/transforms/task.py
new file mode 100644
index 000000000..6e371e4ba
--- /dev/null
+++ b/taskcluster/taskgraph/transforms/task.py
@@ -0,0 +1,648 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+These transformations take a task description and turn it into a TaskCluster
+task definition (along with attributes, label, etc.).  The input to these
+transformations is generic to any kind of task, but abstracts away some of the
+complexities of worker implementations, scopes, and treeherder annotations.
+"""
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import json
+import time
+
+from taskgraph.util.treeherder import split_symbol
+from taskgraph.transforms.base import (
+    validate_schema,
+    TransformSequence
+)
+from voluptuous import Schema, Any, Required, Optional, Extra
+
+from .gecko_v2_whitelist import JOB_NAME_WHITELIST, JOB_NAME_WHITELIST_ERROR
+
+# shortcut for a string where task references are allowed
+taskref_or_string = Any(
+    basestring,
+    {Required('task-reference'): basestring})
+
+# A task description is a general description of a TaskCluster task
+task_description_schema = Schema({
+    # the label for this task
+    Required('label'): basestring,
+
+    # description of the task (for metadata)
+    Required('description'): basestring,
+
+    # attributes for this task
+    Optional('attributes'): {basestring: object},
+
+    # dependencies of this task, keyed by name; these are passed through
+    # verbatim and subject to the interpretation of the Task's get_dependencies
+    # method.
+    Optional('dependencies'): {basestring: object},
+
+    # expiration and deadline times, relative to task creation, with units
+    # (e.g., "14 days").  Defaults are set based on the project.
+    Optional('expires-after'): basestring,
+    Optional('deadline-after'): basestring,
+
+    # custom routes for this task; the default treeherder routes will be added
+    # automatically
+    Optional('routes'): [basestring],
+
+    # custom scopes for this task; any scopes required for the worker will be
+    # added automatically
+    Optional('scopes'): [basestring],
+
+    # custom "task.extra" content
+    Optional('extra'): {basestring: object},
+
+    # treeherder-related information; see
+    # https://schemas.taskcluster.net/taskcluster-treeherder/v1/task-treeherder-config.json
+    # If not specified, no treeherder extra information or routes will be
+    # added to the task
+    Optional('treeherder'): {
+        # either a bare symbol, or "grp(sym)".
+        'symbol': basestring,
+
+        # the job kind
+        'kind': Any('build', 'test', 'other'),
+
+        # tier for this task
+        'tier': int,
+
+        # task platform, in the form platform/collection, used to set
+        # treeherder.machine.platform and treeherder.collection or
+        # treeherder.labels
+        'platform': basestring,
+
+        # treeherder environments (defaults to both staging and production)
+        Required('environments', default=['production', 'staging']): ['production', 'staging'],
+    },
+
+    # information for indexing this build so its artifacts can be discovered;
+    # if omitted, the build will not be indexed.
+    Optional('index'): {
+        # the name of the product this build produces
+        'product': Any('firefox', 'mobile'),
+
+        # the names to use for this job in the TaskCluster index
+        'job-name': Any(
+            # Assuming the job is named "normally", this is the v2 job name,
+            # and the v1 and buildbot routes will be determined appropriately.
+            basestring,
+
+            # otherwise, give separate names for each of the legacy index
+            # routes; if a name is omitted, no corresponding route will be
+            # created.
+            {
+                # the name as it appears in buildbot routes
+                Optional('buildbot'): basestring,
+                Required('gecko-v2'): basestring,
+            }
+        ),
+
+        # The rank that the task will receive in the TaskCluster
+        # index.  A newly completed task supercedes the currently
+        # indexed task iff it has a higher rank.  If unspecified,
+        # 'by-tier' behavior will be used.
+        'rank': Any(
+            # Rank is equal the timestamp of the build_date for tier-1
+            # tasks, and zero for non-tier-1.  This sorts tier-{2,3}
+            # builds below tier-1 in the index.
+            'by-tier',
+
+            # Rank is given as an integer constant (e.g. zero to make
+            # sure a task is last in the index).
+            int,
+
+            # Rank is equal to the timestamp of the build_date.  This
+            # option can be used to override the 'by-tier' behavior
+            # for non-tier-1 tasks.
+            'build_date',
+        ),
+    },
+
+    # The `run_on_projects` attribute, defaulting to "all".  This dictates the
+    # projects on which this task should be included in the target task set.
+    # See the attributes documentation for details.
+    Optional('run-on-projects'): [basestring],
+
+    # If the task can be coalesced, this is the name used in the coalesce key
+    # the project, etc. will be added automatically.  Note that try (level 1)
+    # tasks are never coalesced
+    Optional('coalesce-name'): basestring,
+
+    # the provisioner-id/worker-type for the task.  The following parameters will
+    # be substituted in this string:
+    #  {level} -- the scm level of this push
+    'worker-type': basestring,
+
+    # information specific to the worker implementation that will run this task
+    'worker': Any({
+        Required('implementation'): Any('docker-worker', 'docker-engine'),
+
+        # For tasks that will run in docker-worker or docker-engine, this is the
+        # name of the docker image or in-tree docker image to run the task in.  If
+        # in-tree, then a dependency will be created automatically.  This is
+        # generally `desktop-test`, or an image that acts an awful lot like it.
+        Required('docker-image'): Any(
+            # a raw Docker image path (repo/image:tag)
+            basestring,
+            # an in-tree generated docker image (from `testing/docker/<name>`)
+            {'in-tree': basestring}
+        ),
+
+        # worker features that should be enabled
+        Required('relengapi-proxy', default=False): bool,
+        Required('chain-of-trust', default=False): bool,
+        Required('taskcluster-proxy', default=False): bool,
+        Required('allow-ptrace', default=False): bool,
+        Required('loopback-video', default=False): bool,
+        Required('loopback-audio', default=False): bool,
+
+        # caches to set up for the task
+        Optional('caches'): [{
+            # only one type is supported by any of the workers right now
+            'type': 'persistent',
+
+            # name of the cache, allowing re-use by subsequent tasks naming the
+            # same cache
+            'name': basestring,
+
+            # location in the task image where the cache will be mounted
+            'mount-point': basestring,
+        }],
+
+        # artifacts to extract from the task image after completion
+        Optional('artifacts'): [{
+            # type of artifact -- simple file, or recursive directory
+            'type': Any('file', 'directory'),
+
+            # task image path from which to read artifact
+            'path': basestring,
+
+            # name of the produced artifact (root of the names for
+            # type=directory)
+            'name': basestring,
+        }],
+
+        # environment variables
+        Required('env', default={}): {basestring: taskref_or_string},
+
+        # the command to run
+        'command': [taskref_or_string],
+
+        # the maximum time to run, in seconds
+        'max-run-time': int,
+
+        # the exit status code that indicates the task should be retried
+        Optional('retry-exit-status'): int,
+
+    }, {
+        Required('implementation'): 'generic-worker',
+
+        # command is a list of commands to run, sequentially
+        'command': [taskref_or_string],
+
+        # artifacts to extract from the task image after completion; note that artifacts
+        # for the generic worker cannot have names
+        Optional('artifacts'): [{
+            # type of artifact -- simple file, or recursive directory
+            'type': Any('file', 'directory'),
+
+            # task image path from which to read artifact
+            'path': basestring,
+        }],
+
+        # directories and/or files to be mounted
+        Optional('mounts'): [{
+            # a unique name for the cache volume
+            'cache-name': basestring,
+
+            # task image path for the cache
+            'path': basestring,
+        }],
+
+        # environment variables
+        Required('env', default={}): {basestring: taskref_or_string},
+
+        # the maximum time to run, in seconds
+        'max-run-time': int,
+
+        # os user groups for test task workers
+        Optional('os-groups', default=[]): [basestring],
+    }, {
+        Required('implementation'): 'buildbot-bridge',
+
+        # see
+        # https://github.com/mozilla/buildbot-bridge/blob/master/bbb/schemas/payload.yml
+        'buildername': basestring,
+        'sourcestamp': {
+            'branch': basestring,
+            Optional('revision'): basestring,
+            Optional('repository'): basestring,
+            Optional('project'): basestring,
+        },
+        'properties': {
+            'product': basestring,
+            Extra: basestring,  # additional properties are allowed
+        },
+    }, {
+        'implementation': 'macosx-engine',
+
+        # A link for an executable to download
+        Optional('link'): basestring,
+
+        # the command to run
+        Required('command'): [taskref_or_string],
+
+        # environment variables
+        Optional('env'): {basestring: taskref_or_string},
+
+        # artifacts to extract from the task image after completion
+        Optional('artifacts'): [{
+            # type of artifact -- simple file, or recursive directory
+            Required('type'): Any('file', 'directory'),
+
+            # task image path from which to read artifact
+            Required('path'): basestring,
+
+            # name of the produced artifact (root of the names for
+            # type=directory)
+            Required('name'): basestring,
+        }],
+    }),
+
+    # The "when" section contains descriptions of the circumstances
+    # under which this task can be "optimized", that is, left out of the
+    # task graph because it is unnecessary.
+    Optional('when'): Any({
+        # This task only needs to be run if a file matching one of the given
+        # patterns has changed in the push.  The patterns use the mozpack
+        # match function (python/mozbuild/mozpack/path.py).
+        Optional('files-changed'): [basestring],
+    }),
+})
+
+GROUP_NAMES = {
+    'tc': 'Executed by TaskCluster',
+    'tc-e10s': 'Executed by TaskCluster with e10s',
+    'tc-Fxfn-l': 'Firefox functional tests (local) executed by TaskCluster',
+    'tc-Fxfn-l-e10s': 'Firefox functional tests (local) executed by TaskCluster with e10s',
+    'tc-Fxfn-r': 'Firefox functional tests (remote) executed by TaskCluster',
+    'tc-Fxfn-r-e10s': 'Firefox functional tests (remote) executed by TaskCluster with e10s',
+    'tc-M': 'Mochitests executed by TaskCluster',
+    'tc-M-e10s': 'Mochitests executed by TaskCluster with e10s',
+    'tc-R': 'Reftests executed by TaskCluster',
+    'tc-R-e10s': 'Reftests executed by TaskCluster with e10s',
+    'tc-VP': 'VideoPuppeteer tests executed by TaskCluster',
+    'tc-W': 'Web platform tests executed by TaskCluster',
+    'tc-W-e10s': 'Web platform tests executed by TaskCluster with e10s',
+    'tc-X': 'Xpcshell tests executed by TaskCluster',
+    'tc-X-e10s': 'Xpcshell tests executed by TaskCluster with e10s',
+    'Aries': 'Aries Device Image',
+    'Nexus 5-L': 'Nexus 5-L Device Image',
+    'Cc': 'Toolchain builds',
+    'SM-tc': 'Spidermonkey builds',
+}
+UNKNOWN_GROUP_NAME = "Treeherder group {} has no name; add it to " + __file__
+
+BUILDBOT_ROUTE_TEMPLATES = [
+    "index.buildbot.branches.{project}.{job-name-buildbot}",
+    "index.buildbot.revisions.{head_rev}.{project}.{job-name-buildbot}",
+]
+
+V2_ROUTE_TEMPLATES = [
+    "index.gecko.v2.{project}.latest.{product}.{job-name-gecko-v2}",
+    "index.gecko.v2.{project}.pushdate.{build_date_long}.{product}.{job-name-gecko-v2}",
+    "index.gecko.v2.{project}.revision.{head_rev}.{product}.{job-name-gecko-v2}",
+]
+
+# the roots of the treeherder routes, keyed by treeherder environment
+TREEHERDER_ROUTE_ROOTS = {
+    'production': 'tc-treeherder',
+    'staging': 'tc-treeherder-stage',
+}
+
+COALESCE_KEY = 'builds.{project}.{name}'
+
+# define a collection of payload builders, depending on the worker implementation
+payload_builders = {}
+
+
+def payload_builder(name):
+    def wrap(func):
+        payload_builders[name] = func
+        return func
+    return wrap
+
+
+@payload_builder('docker-worker')
+def build_docker_worker_payload(config, task, task_def):
+    worker = task['worker']
+
+    image = worker['docker-image']
+    if isinstance(image, dict):
+        docker_image_task = 'build-docker-image-' + image['in-tree']
+        task.setdefault('dependencies', {})['docker-image'] = docker_image_task
+        image = {
+            "path": "public/image.tar.zst",
+            "taskId": {"task-reference": "<docker-image>"},
+            "type": "task-image",
+        }
+
+    features = {}
+
+    if worker.get('relengapi-proxy'):
+        features['relengAPIProxy'] = True
+
+    if worker.get('taskcluster-proxy'):
+        features['taskclusterProxy'] = True
+
+    if worker.get('allow-ptrace'):
+        features['allowPtrace'] = True
+        task_def['scopes'].append('docker-worker:feature:allowPtrace')
+
+    if worker.get('chain-of-trust'):
+        features['chainOfTrust'] = True
+
+    capabilities = {}
+
+    for lo in 'audio', 'video':
+        if worker.get('loopback-' + lo):
+            capitalized = 'loopback' + lo.capitalize()
+            devices = capabilities.setdefault('devices', {})
+            devices[capitalized] = True
+            task_def['scopes'].append('docker-worker:capability:device:' + capitalized)
+
+    task_def['payload'] = payload = {
+        'command': worker['command'],
+        'image': image,
+        'env': worker['env'],
+    }
+
+    if 'max-run-time' in worker:
+        payload['maxRunTime'] = worker['max-run-time']
+
+    if 'retry-exit-status' in worker:
+        payload['onExitStatus'] = {'retry': [worker['retry-exit-status']]}
+
+    if 'artifacts' in worker:
+        artifacts = {}
+        for artifact in worker['artifacts']:
+            artifacts[artifact['name']] = {
+                'path': artifact['path'],
+                'type': artifact['type'],
+                'expires': task_def['expires'],  # always expire with the task
+            }
+        payload['artifacts'] = artifacts
+
+    if 'caches' in worker:
+        caches = {}
+        for cache in worker['caches']:
+            caches[cache['name']] = cache['mount-point']
+            task_def['scopes'].append('docker-worker:cache:' + cache['name'])
+        payload['cache'] = caches
+
+    if features:
+        payload['features'] = features
+    if capabilities:
+        payload['capabilities'] = capabilities
+
+    # coalesce / superseding
+    if 'coalesce-name' in task and int(config.params['level']) > 1:
+        key = COALESCE_KEY.format(
+            project=config.params['project'],
+            name=task['coalesce-name'])
+        payload['supersederUrl'] = "https://coalesce.mozilla-releng.net/v1/list/" + key
+
+
+@payload_builder('generic-worker')
+def build_generic_worker_payload(config, task, task_def):
+    worker = task['worker']
+
+    artifacts = []
+
+    for artifact in worker['artifacts']:
+        artifacts.append({
+            'path': artifact['path'],
+            'type': artifact['type'],
+            'expires': task_def['expires'],  # always expire with the task
+        })
+
+    mounts = []
+
+    for mount in worker.get('mounts', []):
+        mounts.append({
+            'cacheName': mount['cache-name'],
+            'directory': mount['path']
+        })
+
+    task_def['payload'] = {
+        'command': worker['command'],
+        'artifacts': artifacts,
+        'env': worker.get('env', {}),
+        'mounts': mounts,
+        'maxRunTime': worker['max-run-time'],
+        'osGroups': worker.get('os-groups', []),
+    }
+
+    if 'retry-exit-status' in worker:
+        raise Exception("retry-exit-status not supported in generic-worker")
+
+
+@payload_builder('macosx-engine')
+def build_macosx_engine_payload(config, task, task_def):
+    worker = task['worker']
+    artifacts = map(lambda artifact: {
+        'name': artifact['name'],
+        'path': artifact['path'],
+        'type': artifact['type'],
+        'expires': task_def['expires'],
+    }, worker['artifacts'])
+
+    task_def['payload'] = {
+        'link': worker['link'],
+        'command': worker['command'],
+        'env': worker['env'],
+        'artifacts': artifacts,
+    }
+
+transforms = TransformSequence()
+
+
+@transforms.add
+def validate(config, tasks):
+    for task in tasks:
+        yield validate_schema(
+            task_description_schema, task,
+            "In task {!r}:".format(task.get('label', '?no-label?')))
+
+
+@transforms.add
+def add_index_routes(config, tasks):
+    for task in tasks:
+        index = task.get('index')
+        routes = task.setdefault('routes', [])
+
+        if not index:
+            yield task
+            continue
+
+        job_name = index['job-name']
+        # unpack the v2 name to v1 and buildbot names
+        if isinstance(job_name, basestring):
+            base_name, type_name = job_name.rsplit('-', 1)
+            job_name = {
+                'buildbot': base_name,
+                'gecko-v2': '{}-{}'.format(base_name, type_name),
+            }
+
+        if job_name['gecko-v2'] not in JOB_NAME_WHITELIST:
+            raise Exception(JOB_NAME_WHITELIST_ERROR.format(job_name['gecko-v2']))
+
+        subs = config.params.copy()
+        for n in job_name:
+            subs['job-name-' + n] = job_name[n]
+        subs['build_date_long'] = time.strftime("%Y.%m.%d.%Y%m%d%H%M%S",
+                                                time.gmtime(config.params['build_date']))
+        subs['product'] = index['product']
+
+        if 'buildbot' in job_name:
+            for tpl in BUILDBOT_ROUTE_TEMPLATES:
+                routes.append(tpl.format(**subs))
+        if 'gecko-v2' in job_name:
+            for tpl in V2_ROUTE_TEMPLATES:
+                routes.append(tpl.format(**subs))
+
+        # The default behavior is to rank tasks according to their tier
+        extra_index = task.setdefault('extra', {}).setdefault('index', {})
+        rank = index.get('rank', 'by-tier')
+
+        if rank == 'by-tier':
+            # rank is zero for non-tier-1 tasks and based on pushid for others;
+            # this sorts tier-{2,3} builds below tier-1 in the index
+            tier = task.get('treeherder', {}).get('tier', 3)
+            extra_index['rank'] = 0 if tier > 1 else int(config.params['build_date'])
+        elif rank == 'build_date':
+            extra_index['rank'] = int(config.params['build_date'])
+        else:
+            extra_index['rank'] = rank
+
+        del task['index']
+        yield task
+
+
+@transforms.add
+def build_task(config, tasks):
+    for task in tasks:
+        worker_type = task['worker-type'].format(level=str(config.params['level']))
+        provisioner_id, worker_type = worker_type.split('/', 1)
+
+        routes = task.get('routes', [])
+        scopes = task.get('scopes', [])
+
+        # set up extra
+        extra = task.get('extra', {})
+        task_th = task.get('treeherder')
+        if task_th:
+            extra['treeherderEnv'] = task_th['environments']
+
+            treeherder = extra.setdefault('treeherder', {})
+
+            machine_platform, collection = task_th['platform'].split('/', 1)
+            treeherder['machine'] = {'platform': machine_platform}
+            treeherder['collection'] = {collection: True}
+
+            groupSymbol, symbol = split_symbol(task_th['symbol'])
+            if groupSymbol != '?':
+                treeherder['groupSymbol'] = groupSymbol
+                if groupSymbol not in GROUP_NAMES:
+                    raise Exception(UNKNOWN_GROUP_NAME.format(groupSymbol))
+                treeherder['groupName'] = GROUP_NAMES[groupSymbol]
+            treeherder['symbol'] = symbol
+            treeherder['jobKind'] = task_th['kind']
+            treeherder['tier'] = task_th['tier']
+
+            routes.extend([
+                '{}.v2.{}.{}.{}'.format(TREEHERDER_ROUTE_ROOTS[env],
+                                        config.params['project'],
+                                        config.params['head_rev'],
+                                        config.params['pushlog_id'])
+                for env in task_th['environments']
+            ])
+
+        if 'expires-after' not in task:
+            task['expires-after'] = '28 days' if config.params['project'] == 'try' else '1 year'
+
+        if 'deadline-after' not in task:
+            task['deadline-after'] = '1 day'
+
+        if 'coalesce-name' in task and int(config.params['level']) > 1:
+            key = COALESCE_KEY.format(
+                project=config.params['project'],
+                name=task['coalesce-name'])
+            routes.append('coalesce.v1.' + key)
+
+        task_def = {
+            'provisionerId': provisioner_id,
+            'workerType': worker_type,
+            'routes': routes,
+            'created': {'relative-datestamp': '0 seconds'},
+            'deadline': {'relative-datestamp': task['deadline-after']},
+            'expires': {'relative-datestamp': task['expires-after']},
+            'scopes': scopes,
+            'metadata': {
+                'description': task['description'],
+                'name': task['label'],
+                'owner': config.params['owner'],
+                'source': '{}/file/{}/{}'.format(
+                    config.params['head_repository'],
+                    config.params['head_rev'],
+                    config.path),
+            },
+            'extra': extra,
+            'tags': {'createdForUser': config.params['owner']},
+        }
+
+        # add the payload and adjust anything else as required (e.g., scopes)
+        payload_builders[task['worker']['implementation']](config, task, task_def)
+
+        attributes = task.get('attributes', {})
+        attributes['run_on_projects'] = task.get('run-on-projects', ['all'])
+
+        yield {
+            'label': task['label'],
+            'task': task_def,
+            'dependencies': task.get('dependencies', {}),
+            'attributes': attributes,
+            'when': task.get('when', {}),
+        }
+
+
+# Check that the v2 route templates match those used by Mozharness.  This can
+# go away once Mozharness builds are no longer performed in Buildbot, and the
+# Mozharness code referencing routes.json is deleted.
+def check_v2_routes():
+    with open("testing/mozharness/configs/routes.json", "rb") as f:
+        routes_json = json.load(f)
+
+    # we only deal with the 'routes' key here
+    routes = routes_json['routes']
+
+    # we use different variables than mozharness
+    for mh, tg in [
+            ('{index}', 'index'),
+            ('{build_product}', '{product}'),
+            ('{build_name}-{build_type}', '{job-name-gecko-v2}'),
+            ('{year}.{month}.{day}.{pushdate}', '{build_date_long}')]:
+        routes = [r.replace(mh, tg) for r in routes]
+
+    if sorted(routes) != sorted(V2_ROUTE_TEMPLATES):
+        raise Exception("V2_ROUTE_TEMPLATES does not match Mozharness's routes.json: "
+                        "%s vs %s" % (V2_ROUTE_TEMPLATES, routes))
+
+check_v2_routes()