taskcluster/taskgraph/task/transform.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function, unicode_literals

import logging
import itertools

from . import base
from .. import files_changed
from ..util.python_path import find_object
from ..util.templates import merge
from ..util.yaml import load_yaml
from ..util.seta import is_low_value_task

from ..transforms.base import TransformSequence, TransformConfig

logger = logging.getLogger(__name__)


class TransformTask(base.Task):
    """
    Tasks of this class are generated by applying transformations to a sequence
    of input entities.  By default, it gets those inputs from YAML data in the
    kind directory, but subclasses may override `get_inputs` to produce them in
    some other way.
    """

    @classmethod
    def get_inputs(cls, kind, path, config, params, loaded_tasks):
        """
        Get the input elements that will be transformed into tasks.  The
        elements themselves are free-form, and become the input to the first
        transform.

        By default, this reads jobs from the `jobs` key, or from yaml files
        named by `jobs-from`.  The entities are read from mappings, and the
        keys to those mappings are added in the `name` key of each entity.

        If there is a `job-defaults` config, then every job is merged with it.
        This provides a simple way to set default values for all jobs of a
        kind.  More complex defaults should be implemented with custom
        transforms.

        This method can be overridden in subclasses that need to perform more
        complex calculations to generate the list of inputs.
        """
        def jobs():
            defaults = config.get('job-defaults')
            jobs = config.get('jobs', {}).iteritems()
            jobs_from = itertools.chain.from_iterable(
                load_yaml(path, filename).iteritems()
                for filename in config.get('jobs-from', {}))
            for name, job in itertools.chain(jobs, jobs_from):
                if defaults:
                    job = merge(defaults, job)
                yield name, job

        for name, job in jobs():
            job['name'] = name
            logger.debug("Generating tasks for {} {}".format(kind, name))
            yield job

    @classmethod
    def load_tasks(cls, kind, path, config, params, loaded_tasks):
        inputs = cls.get_inputs(kind, path, config, params, loaded_tasks)

        transforms = TransformSequence()
        for xform_path in config['transforms']:
            transform = find_object(xform_path)
            transforms.add(transform)

        # perform the transformations
        trans_config = TransformConfig(kind, path, config, params)
        tasks = [cls(kind, t) for t in transforms(trans_config, inputs)]
        return tasks

    def __init__(self, kind, task):
        self.dependencies = task['dependencies']
        self.when = task['when']
        super(TransformTask, self).__init__(kind, task['label'],
                                            task['attributes'], task['task'])

    def get_dependencies(self, taskgraph):
        return [(label, name) for name, label in self.dependencies.items()]

    def optimize(self, params):
        if 'files-changed' in self.when:
            changed = files_changed.check(
                params, self.when['files-changed'])
            if not changed:
                logger.debug('no files found matching a pattern in `when.files-changed` for ' +
                             self.label)
                return True, None

        # we would like to return 'False, None' while it's high_value_task
        # and we wouldn't optimize it. Otherwise, it will return 'True, None'
        if is_low_value_task(self.label, params.get('project')):
            # Always optimize away low-value tasks
            return True, None
        else:
            return False, None

    @classmethod
    def from_json(cls, task_dict):
        # when reading back from JSON, we lose the "when" information
        task_dict['when'] = {}
        return cls(task_dict['attributes']['kind'], task_dict)