diff options
Diffstat (limited to 'taskcluster/taskgraph/optimize.py')
-rw-r--r-- | taskcluster/taskgraph/optimize.py | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/taskcluster/taskgraph/optimize.py b/taskcluster/taskgraph/optimize.py new file mode 100644 index 000000000..120e6807b --- /dev/null +++ b/taskcluster/taskgraph/optimize.py @@ -0,0 +1,156 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals +import logging +import re + +from .graph import Graph +from .taskgraph import TaskGraph +from slugid import nice as slugid + +logger = logging.getLogger(__name__) +TASK_REFERENCE_PATTERN = re.compile('<([^>]+)>') + + +def optimize_task_graph(target_task_graph, params, do_not_optimize, existing_tasks=None): + """ + Perform task optimization, without optimizing tasks named in + do_not_optimize. + """ + named_links_dict = target_task_graph.graph.named_links_dict() + label_to_taskid = {} + + # This proceeds in two phases. First, mark all optimized tasks (those + # which will be removed from the graph) as such, including a replacement + # taskId where applicable. Second, generate a new task graph containing + # only the non-optimized tasks, with all task labels resolved to taskIds + # and with task['dependencies'] populated. + annotate_task_graph(target_task_graph=target_task_graph, + params=params, + do_not_optimize=do_not_optimize, + named_links_dict=named_links_dict, + label_to_taskid=label_to_taskid, + existing_tasks=existing_tasks) + return get_subgraph(target_task_graph, named_links_dict, label_to_taskid), label_to_taskid + + +def resolve_task_references(label, task_def, taskid_for_edge_name): + def repl(match): + key = match.group(1) + try: + return taskid_for_edge_name[key] + except KeyError: + # handle escaping '<' + if key == '<': + return key + raise KeyError("task '{}' has no dependency named '{}'".format(label, key)) + + def recurse(val): + if isinstance(val, list): + return [recurse(v) for v in val] + elif isinstance(val, dict): + if val.keys() == ['task-reference']: + return TASK_REFERENCE_PATTERN.sub(repl, val['task-reference']) + else: + return {k: recurse(v) for k, v in val.iteritems()} + else: + return val + return recurse(task_def) + + +def annotate_task_graph(target_task_graph, params, do_not_optimize, + named_links_dict, label_to_taskid, existing_tasks): + """ + Annotate each task in the graph with .optimized (boolean) and .task_id + (possibly None), following the rules for optimization and calling the task + kinds' `optimize_task` method. + + As a side effect, label_to_taskid is updated with labels for all optimized + tasks that are replaced with existing tasks. + """ + + # set .optimized for all tasks, and .task_id for optimized tasks + # with replacements + for label in target_task_graph.graph.visit_postorder(): + task = target_task_graph.tasks[label] + named_task_dependencies = named_links_dict.get(label, {}) + + # check whether any dependencies have been optimized away + dependencies = [target_task_graph.tasks[l] for l in named_task_dependencies.itervalues()] + for t in dependencies: + if t.optimized and not t.task_id: + raise Exception( + "task {} was optimized away, but {} depends on it".format( + t.label, label)) + + # if this task is blacklisted, don't even consider optimizing + replacement_task_id = None + if label in do_not_optimize: + optimized = False + # Let's check whether this task has been created before + elif existing_tasks is not None and label in existing_tasks: + optimized = True + replacement_task_id = existing_tasks[label] + # otherwise, examine the task itself (which may be an expensive operation) + else: + optimized, replacement_task_id = task.optimize(params) + + task.optimized = optimized + task.task_id = replacement_task_id + if replacement_task_id: + label_to_taskid[label] = replacement_task_id + + if optimized: + if replacement_task_id: + logger.debug("optimizing `{}`, replacing with task `{}`" + .format(label, replacement_task_id)) + else: + logger.debug("optimizing `{}` away".format(label)) + # note: any dependent tasks will fail when they see this + else: + if replacement_task_id: + raise Exception("{}: optimize_task returned False with a taskId".format(label)) + + +def get_subgraph(annotated_task_graph, named_links_dict, label_to_taskid): + """ + Return the subgraph of annotated_task_graph consisting only of + non-optimized tasks and edges between them. + + To avoid losing track of taskIds for tasks optimized away, this method + simultaneously substitutes real taskIds for task labels in the graph, and + populates each task definition's `dependencies` key with the appropriate + taskIds. Task references are resolved in the process. + """ + + # resolve labels to taskIds and populate task['dependencies'] + tasks_by_taskid = {} + for label in annotated_task_graph.graph.visit_postorder(): + task = annotated_task_graph.tasks[label] + if task.optimized: + continue + task.task_id = label_to_taskid[label] = slugid() + named_task_dependencies = { + name: label_to_taskid[label] + for name, label in named_links_dict.get(label, {}).iteritems()} + task.task = resolve_task_references(task.label, task.task, named_task_dependencies) + task.task.setdefault('dependencies', []).extend(named_task_dependencies.itervalues()) + tasks_by_taskid[task.task_id] = task + + # resolve edges to taskIds + edges_by_taskid = ( + (label_to_taskid.get(left), label_to_taskid.get(right), name) + for (left, right, name) in annotated_task_graph.graph.edges + ) + # ..and drop edges that are no longer in the task graph + edges_by_taskid = set( + (left, right, name) + for (left, right, name) in edges_by_taskid + if left in tasks_by_taskid and right in tasks_by_taskid + ) + + return TaskGraph( + tasks_by_taskid, + Graph(set(tasks_by_taskid), edges_by_taskid)) |