summaryrefslogtreecommitdiffstats
path: root/taskcluster/taskgraph/optimize.py
diff options
context:
space:
mode:
Diffstat (limited to 'taskcluster/taskgraph/optimize.py')
-rw-r--r--taskcluster/taskgraph/optimize.py156
1 files changed, 156 insertions, 0 deletions
diff --git a/taskcluster/taskgraph/optimize.py b/taskcluster/taskgraph/optimize.py
new file mode 100644
index 000000000..120e6807b
--- /dev/null
+++ b/taskcluster/taskgraph/optimize.py
@@ -0,0 +1,156 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+import logging
+import re
+
+from .graph import Graph
+from .taskgraph import TaskGraph
+from slugid import nice as slugid
+
+logger = logging.getLogger(__name__)
+TASK_REFERENCE_PATTERN = re.compile('<([^>]+)>')
+
+
+def optimize_task_graph(target_task_graph, params, do_not_optimize, existing_tasks=None):
+ """
+ Perform task optimization, without optimizing tasks named in
+ do_not_optimize.
+ """
+ named_links_dict = target_task_graph.graph.named_links_dict()
+ label_to_taskid = {}
+
+ # This proceeds in two phases. First, mark all optimized tasks (those
+ # which will be removed from the graph) as such, including a replacement
+ # taskId where applicable. Second, generate a new task graph containing
+ # only the non-optimized tasks, with all task labels resolved to taskIds
+ # and with task['dependencies'] populated.
+ annotate_task_graph(target_task_graph=target_task_graph,
+ params=params,
+ do_not_optimize=do_not_optimize,
+ named_links_dict=named_links_dict,
+ label_to_taskid=label_to_taskid,
+ existing_tasks=existing_tasks)
+ return get_subgraph(target_task_graph, named_links_dict, label_to_taskid), label_to_taskid
+
+
+def resolve_task_references(label, task_def, taskid_for_edge_name):
+ def repl(match):
+ key = match.group(1)
+ try:
+ return taskid_for_edge_name[key]
+ except KeyError:
+ # handle escaping '<'
+ if key == '<':
+ return key
+ raise KeyError("task '{}' has no dependency named '{}'".format(label, key))
+
+ def recurse(val):
+ if isinstance(val, list):
+ return [recurse(v) for v in val]
+ elif isinstance(val, dict):
+ if val.keys() == ['task-reference']:
+ return TASK_REFERENCE_PATTERN.sub(repl, val['task-reference'])
+ else:
+ return {k: recurse(v) for k, v in val.iteritems()}
+ else:
+ return val
+ return recurse(task_def)
+
+
+def annotate_task_graph(target_task_graph, params, do_not_optimize,
+ named_links_dict, label_to_taskid, existing_tasks):
+ """
+ Annotate each task in the graph with .optimized (boolean) and .task_id
+ (possibly None), following the rules for optimization and calling the task
+ kinds' `optimize_task` method.
+
+ As a side effect, label_to_taskid is updated with labels for all optimized
+ tasks that are replaced with existing tasks.
+ """
+
+ # set .optimized for all tasks, and .task_id for optimized tasks
+ # with replacements
+ for label in target_task_graph.graph.visit_postorder():
+ task = target_task_graph.tasks[label]
+ named_task_dependencies = named_links_dict.get(label, {})
+
+ # check whether any dependencies have been optimized away
+ dependencies = [target_task_graph.tasks[l] for l in named_task_dependencies.itervalues()]
+ for t in dependencies:
+ if t.optimized and not t.task_id:
+ raise Exception(
+ "task {} was optimized away, but {} depends on it".format(
+ t.label, label))
+
+ # if this task is blacklisted, don't even consider optimizing
+ replacement_task_id = None
+ if label in do_not_optimize:
+ optimized = False
+ # Let's check whether this task has been created before
+ elif existing_tasks is not None and label in existing_tasks:
+ optimized = True
+ replacement_task_id = existing_tasks[label]
+ # otherwise, examine the task itself (which may be an expensive operation)
+ else:
+ optimized, replacement_task_id = task.optimize(params)
+
+ task.optimized = optimized
+ task.task_id = replacement_task_id
+ if replacement_task_id:
+ label_to_taskid[label] = replacement_task_id
+
+ if optimized:
+ if replacement_task_id:
+ logger.debug("optimizing `{}`, replacing with task `{}`"
+ .format(label, replacement_task_id))
+ else:
+ logger.debug("optimizing `{}` away".format(label))
+ # note: any dependent tasks will fail when they see this
+ else:
+ if replacement_task_id:
+ raise Exception("{}: optimize_task returned False with a taskId".format(label))
+
+
+def get_subgraph(annotated_task_graph, named_links_dict, label_to_taskid):
+ """
+ Return the subgraph of annotated_task_graph consisting only of
+ non-optimized tasks and edges between them.
+
+ To avoid losing track of taskIds for tasks optimized away, this method
+ simultaneously substitutes real taskIds for task labels in the graph, and
+ populates each task definition's `dependencies` key with the appropriate
+ taskIds. Task references are resolved in the process.
+ """
+
+ # resolve labels to taskIds and populate task['dependencies']
+ tasks_by_taskid = {}
+ for label in annotated_task_graph.graph.visit_postorder():
+ task = annotated_task_graph.tasks[label]
+ if task.optimized:
+ continue
+ task.task_id = label_to_taskid[label] = slugid()
+ named_task_dependencies = {
+ name: label_to_taskid[label]
+ for name, label in named_links_dict.get(label, {}).iteritems()}
+ task.task = resolve_task_references(task.label, task.task, named_task_dependencies)
+ task.task.setdefault('dependencies', []).extend(named_task_dependencies.itervalues())
+ tasks_by_taskid[task.task_id] = task
+
+ # resolve edges to taskIds
+ edges_by_taskid = (
+ (label_to_taskid.get(left), label_to_taskid.get(right), name)
+ for (left, right, name) in annotated_task_graph.graph.edges
+ )
+ # ..and drop edges that are no longer in the task graph
+ edges_by_taskid = set(
+ (left, right, name)
+ for (left, right, name) in edges_by_taskid
+ if left in tasks_by_taskid and right in tasks_by_taskid
+ )
+
+ return TaskGraph(
+ tasks_by_taskid,
+ Graph(set(tasks_by_taskid), edges_by_taskid))