summaryrefslogtreecommitdiffstats
path: root/taskcluster/taskgraph/util
diff options
context:
space:
mode:
Diffstat (limited to 'taskcluster/taskgraph/util')
-rw-r--r--taskcluster/taskgraph/util/__init__.py0
-rw-r--r--taskcluster/taskgraph/util/attributes.py26
-rw-r--r--taskcluster/taskgraph/util/docker.py160
-rw-r--r--taskcluster/taskgraph/util/python_path.py27
-rw-r--r--taskcluster/taskgraph/util/seta.py85
-rw-r--r--taskcluster/taskgraph/util/templates.py155
-rw-r--r--taskcluster/taskgraph/util/time.py114
-rw-r--r--taskcluster/taskgraph/util/treeherder.py24
-rw-r--r--taskcluster/taskgraph/util/yaml.py16
9 files changed, 607 insertions, 0 deletions
diff --git a/taskcluster/taskgraph/util/__init__.py b/taskcluster/taskgraph/util/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/taskcluster/taskgraph/util/__init__.py
diff --git a/taskcluster/taskgraph/util/attributes.py b/taskcluster/taskgraph/util/attributes.py
new file mode 100644
index 000000000..b44a3364f
--- /dev/null
+++ b/taskcluster/taskgraph/util/attributes.py
@@ -0,0 +1,26 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+def attrmatch(attributes, **kwargs):
+ """Determine whether the given set of task attributes matches. The
+ conditions are given as keyword arguments, where each keyword names an
+ attribute. The keyword value can be a literal, a set, or a callable. A
+ literal must match the attribute exactly. Given a set, the attribute value
+ must be in the set. A callable is called with the attribute value. If an
+ attribute is specified as a keyword argument but not present in the
+ attributes, the result is False."""
+ for kwkey, kwval in kwargs.iteritems():
+ if kwkey not in attributes:
+ return False
+ attval = attributes[kwkey]
+ if isinstance(kwval, set):
+ if attval not in kwval:
+ return False
+ elif callable(kwval):
+ if not kwval(attval):
+ return False
+ elif kwval != attributes[kwkey]:
+ return False
+ return True
diff --git a/taskcluster/taskgraph/util/docker.py b/taskcluster/taskgraph/util/docker.py
new file mode 100644
index 000000000..df97e57bc
--- /dev/null
+++ b/taskcluster/taskgraph/util/docker.py
@@ -0,0 +1,160 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import hashlib
+import os
+import shutil
+import subprocess
+import tarfile
+import tempfile
+
+from mozpack.archive import (
+ create_tar_gz_from_files,
+)
+
+
+GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..'))
+DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
+INDEX_PREFIX = 'docker.images.v2'
+ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+
+
+def docker_image(name, default_version=None):
+ '''Determine the docker image name, including repository and tag, from an
+ in-tree docker file.'''
+ try:
+ with open(os.path.join(DOCKER_ROOT, name, 'REGISTRY')) as f:
+ registry = f.read().strip()
+ except IOError:
+ with open(os.path.join(DOCKER_ROOT, 'REGISTRY')) as f:
+ registry = f.read().strip()
+
+ try:
+ with open(os.path.join(DOCKER_ROOT, name, 'VERSION')) as f:
+ version = f.read().strip()
+ except IOError:
+ if not default_version:
+ raise
+
+ version = default_version
+
+ return '{}/{}:{}'.format(registry, name, version)
+
+
+def generate_context_hash(topsrcdir, image_path, image_name):
+ """Generates a sha256 hash for context directory used to build an image."""
+
+ # It is a bit unfortunate we have to create a temp file here - it would
+ # be nicer to use an in-memory buffer.
+ fd, p = tempfile.mkstemp()
+ os.close(fd)
+ try:
+ return create_context_tar(topsrcdir, image_path, p, image_name)
+ finally:
+ os.unlink(p)
+
+
+def create_context_tar(topsrcdir, context_dir, out_path, prefix):
+ """Create a context tarball.
+
+ A directory ``context_dir`` containing a Dockerfile will be assembled into
+ a gzipped tar file at ``out_path``. Files inside the archive will be
+ prefixed by directory ``prefix``.
+
+ We also scan the source Dockerfile for special syntax that influences
+ context generation.
+
+ If a line in the Dockerfile has the form ``# %include <path>``,
+ the relative path specified on that line will be matched against
+ files in the source repository and added to the context under the
+ path ``topsrcdir/``. If an entry is a directory, we add all files
+ under that directory.
+
+ Returns the SHA-256 hex digest of the created archive.
+ """
+ archive_files = {}
+
+ for root, dirs, files in os.walk(context_dir):
+ for f in files:
+ source_path = os.path.join(root, f)
+ rel = source_path[len(context_dir) + 1:]
+ archive_path = os.path.join(prefix, rel)
+ archive_files[archive_path] = source_path
+
+ # Parse Dockerfile for special syntax of extra files to include.
+ with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh:
+ for line in fh:
+ line = line.rstrip()
+ if not line.startswith('# %include'):
+ continue
+
+ p = line[len('# %include '):].strip()
+ if os.path.isabs(p):
+ raise Exception('extra include path cannot be absolute: %s' % p)
+
+ fs_path = os.path.normpath(os.path.join(topsrcdir, p))
+ # Check for filesystem traversal exploits.
+ if not fs_path.startswith(topsrcdir):
+ raise Exception('extra include path outside topsrcdir: %s' % p)
+
+ if not os.path.exists(fs_path):
+ raise Exception('extra include path does not exist: %s' % p)
+
+ if os.path.isdir(fs_path):
+ for root, dirs, files in os.walk(fs_path):
+ for f in files:
+ source_path = os.path.join(root, f)
+ archive_path = os.path.join(prefix, 'topsrcdir', p, f)
+ archive_files[archive_path] = source_path
+ else:
+ archive_path = os.path.join(prefix, 'topsrcdir', p)
+ archive_files[archive_path] = fs_path
+
+ with open(out_path, 'wb') as fh:
+ create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix)
+
+ h = hashlib.sha256()
+ with open(out_path, 'rb') as fh:
+ while True:
+ data = fh.read(32768)
+ if not data:
+ break
+ h.update(data)
+ return h.hexdigest()
+
+
+def build_from_context(docker_bin, context_path, prefix, tag=None):
+ """Build a Docker image from a context archive.
+
+ Given the path to a `docker` binary, a image build tar.gz (produced with
+ ``create_context_tar()``, a prefix in that context containing files, and
+ an optional ``tag`` for the produced image, build that Docker image.
+ """
+ d = tempfile.mkdtemp()
+ try:
+ with tarfile.open(context_path, 'r:gz') as tf:
+ tf.extractall(d)
+
+ # If we wanted to do post-processing of the Dockerfile, this is
+ # where we'd do it.
+
+ args = [
+ docker_bin,
+ 'build',
+ # Use --no-cache so we always get the latest package updates.
+ '--no-cache',
+ ]
+
+ if tag:
+ args.extend(['-t', tag])
+
+ args.append('.')
+
+ res = subprocess.call(args, cwd=os.path.join(d, prefix))
+ if res:
+ raise Exception('error building image')
+ finally:
+ shutil.rmtree(d)
diff --git a/taskcluster/taskgraph/util/python_path.py b/taskcluster/taskgraph/util/python_path.py
new file mode 100644
index 000000000..b14223ca6
--- /dev/null
+++ b/taskcluster/taskgraph/util/python_path.py
@@ -0,0 +1,27 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+
+def find_object(path):
+ """
+ Find a Python object given a path of the form <modulepath>:<objectpath>.
+ Conceptually equivalent to
+
+ def find_object(modulepath, objectpath):
+ import <modulepath> as mod
+ return mod.<objectpath>
+ """
+ if path.count(':') != 1:
+ raise ValueError(
+ 'python path {!r} does not have the form "module:object"'.format(path))
+
+ modulepath, objectpath = path.split(':')
+ obj = __import__(modulepath)
+ for a in modulepath.split('.')[1:]:
+ obj = getattr(obj, a)
+ for a in objectpath.split('.'):
+ obj = getattr(obj, a)
+ return obj
diff --git a/taskcluster/taskgraph/util/seta.py b/taskcluster/taskgraph/util/seta.py
new file mode 100644
index 000000000..a0cd30675
--- /dev/null
+++ b/taskcluster/taskgraph/util/seta.py
@@ -0,0 +1,85 @@
+import json
+import logging
+import requests
+from redo import retry
+from requests import exceptions
+
+logger = logging.getLogger(__name__)
+headers = {
+ 'User-Agent': 'TaskCluster'
+}
+
+# It's a list of project name which SETA is useful on
+SETA_PROJECTS = ['mozilla-inbound', 'autoland']
+SETA_ENDPOINT = "https://seta.herokuapp.com/data/setadetails/?branch=%s"
+
+
+class SETA(object):
+ """
+ Interface to the SETA service, which defines low-value tasks that can be optimized out
+ of the taskgraph.
+ """
+ def __init__(self):
+ # cached low value tasks, by project
+ self.low_value_tasks = {}
+
+ def query_low_value_tasks(self, project):
+ # Request the set of low value tasks from the SETA service. Low value tasks will be
+ # optimized out of the task graph.
+ if project not in SETA_PROJECTS:
+ logger.debug("SETA is not enabled for project `{}`".format(project))
+ return []
+
+ logger.debug("Querying SETA service for low-value tasks on {}".format(project))
+ low_value_tasks = []
+
+ url = SETA_ENDPOINT % project
+ # Try to fetch the SETA data twice, falling back to an empty list of low value tasks.
+ # There are 10 seconds between each try.
+ try:
+ logger.debug("Retrieving low-value jobs list from SETA")
+ response = retry(requests.get, attempts=2, sleeptime=10,
+ args=(url, ),
+ kwargs={'timeout': 5, 'headers': headers})
+ task_list = json.loads(response.content).get('jobtypes', '')
+ if len(task_list) > 0:
+ low_value_tasks = task_list.values()[0]
+
+ # Bug 1315145, disable SETA for tier-1 platforms until backfill is implemented.
+ low_value_tasks = [x for x in low_value_tasks if x.find('debug') == -1]
+ low_value_tasks = [x for x in low_value_tasks if x.find('asan') == -1]
+
+ # In the event of request times out, requests will raise a TimeoutError.
+ except exceptions.Timeout:
+ logger.warning("SETA server is timeout, we will treat all test tasks as high value.")
+
+ # In the event of a network problem (e.g. DNS failure, refused connection, etc),
+ # requests will raise a ConnectionError.
+ except exceptions.ConnectionError:
+ logger.warning("SETA server is timeout, we will treat all test tasks as high value.")
+
+ # In the event of the rare invalid HTTP response(e.g 404, 401),
+ # requests will raise an HTTPError exception
+ except exceptions.HTTPError:
+ logger.warning("We got bad Http response from ouija,"
+ " we will treat all test tasks as high value.")
+
+ # We just print the error out as a debug message if we failed to catch the exception above
+ except exceptions.RequestException as error:
+ logger.warning(error)
+
+ # When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426)
+ except ValueError as error:
+ logger.warning("Invalid JSON, possible server error: {}".format(error))
+
+ return low_value_tasks
+
+ def is_low_value_task(self, label, project):
+ # cache the low value tasks per project to avoid repeated SETA server queries
+ if project not in self.low_value_tasks:
+ self.low_value_tasks[project] = self.query_low_value_tasks(project)
+ return label in self.low_value_tasks[project]
+
+# create a single instance of this class, and expose its `is_low_value_task`
+# bound method as a module-level function
+is_low_value_task = SETA().is_low_value_task
diff --git a/taskcluster/taskgraph/util/templates.py b/taskcluster/taskgraph/util/templates.py
new file mode 100644
index 000000000..97620fa75
--- /dev/null
+++ b/taskcluster/taskgraph/util/templates.py
@@ -0,0 +1,155 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import os
+
+import pystache
+import yaml
+import copy
+
+# Key used in template inheritance...
+INHERITS_KEY = '$inherits'
+
+
+def merge_to(source, dest):
+ '''
+ Merge dict and arrays (override scalar values)
+
+ Keys from source override keys from dest, and elements from lists in source
+ are appended to lists in dest.
+
+ :param dict source: to copy from
+ :param dict dest: to copy to (modified in place)
+ '''
+
+ for key, value in source.items():
+ # Override mismatching or empty types
+ if type(value) != type(dest.get(key)): # noqa
+ dest[key] = source[key]
+ continue
+
+ # Merge dict
+ if isinstance(value, dict):
+ merge_to(value, dest[key])
+ continue
+
+ if isinstance(value, list):
+ dest[key] = dest[key] + source[key]
+ continue
+
+ dest[key] = source[key]
+
+ return dest
+
+
+def merge(*objects):
+ '''
+ Merge the given objects, using the semantics described for merge_to, with
+ objects later in the list taking precedence. From an inheritance
+ perspective, "parents" should be listed before "children".
+
+ Returns the result without modifying any arguments.
+ '''
+ if len(objects) == 1:
+ return copy.deepcopy(objects[0])
+ return merge_to(objects[-1], merge(*objects[:-1]))
+
+
+class TemplatesException(Exception):
+ pass
+
+
+class Templates():
+ '''
+ The taskcluster integration makes heavy use of yaml to describe tasks this
+ class handles the loading/rendering.
+ '''
+
+ def __init__(self, root):
+ '''
+ Initialize the template render.
+
+ :param str root: Root path where to load yaml files.
+ '''
+ if not root:
+ raise TemplatesException('Root is required')
+
+ if not os.path.isdir(root):
+ raise TemplatesException('Root must be a directory')
+
+ self.root = root
+
+ def _inherits(self, path, obj, properties, seen):
+ blueprint = obj.pop(INHERITS_KEY)
+ seen.add(path)
+
+ # Resolve the path here so we can detect circular references.
+ template = self.resolve_path(blueprint.get('from'))
+ variables = blueprint.get('variables', {})
+
+ # Passed parameters override anything in the task itself.
+ for key in properties:
+ variables[key] = properties[key]
+
+ if not template:
+ msg = '"{}" inheritance template missing'.format(path)
+ raise TemplatesException(msg)
+
+ if template in seen:
+ msg = 'Error while handling "{}" in "{}" circular template' + \
+ 'inheritance seen \n {}'
+ raise TemplatesException(msg.format(path, template, seen))
+
+ try:
+ out = self.load(template, variables, seen)
+ except TemplatesException as e:
+ msg = 'Error expanding parent ("{}") of "{}" original error {}'
+ raise TemplatesException(msg.format(template, path, str(e)))
+
+ # Anything left in obj is merged into final results (and overrides)
+ return merge_to(obj, out)
+
+ def render(self, path, content, parameters, seen):
+ '''
+ Renders a given yaml string.
+
+ :param str path: used to prevent infinite recursion in inheritance.
+ :param str content: Of yaml file.
+ :param dict parameters: For mustache templates.
+ :param set seen: Seen files (used for inheritance)
+ '''
+ content = pystache.render(content, parameters)
+ result = yaml.load(content)
+
+ # In addition to the usual template logic done by mustache we also
+ # handle special '$inherit' dict keys.
+ if isinstance(result, dict) and INHERITS_KEY in result:
+ return self._inherits(path, result, parameters, seen)
+
+ return result
+
+ def resolve_path(self, path):
+ return os.path.join(self.root, path)
+
+ def load(self, path, parameters=None, seen=None):
+ '''
+ Load an render the given yaml path.
+
+ :param str path: Location of yaml file to load (relative to root).
+ :param dict parameters: To template yaml file with.
+ '''
+ seen = seen or set()
+
+ if not path:
+ raise TemplatesException('path is required')
+
+ path = self.resolve_path(path)
+
+ if not os.path.isfile(path):
+ raise TemplatesException('"{}" is not a file'.format(path))
+
+ content = open(path).read()
+ return self.render(path, content, parameters, seen)
diff --git a/taskcluster/taskgraph/util/time.py b/taskcluster/taskgraph/util/time.py
new file mode 100644
index 000000000..160aaa70c
--- /dev/null
+++ b/taskcluster/taskgraph/util/time.py
@@ -0,0 +1,114 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Python port of the ms.js node module this is not a direct port some things are
+# more complicated or less precise and we lean on time delta here.
+
+import re
+import datetime
+
+PATTERN = re.compile(
+ '((?:\d+)?\.?\d+) *([a-z]+)'
+)
+
+
+def seconds(value):
+ return datetime.timedelta(seconds=int(value))
+
+
+def minutes(value):
+ return datetime.timedelta(minutes=int(value))
+
+
+def hours(value):
+ return datetime.timedelta(hours=int(value))
+
+
+def days(value):
+ return datetime.timedelta(days=int(value))
+
+
+def months(value):
+ # See warning in years(), below
+ return datetime.timedelta(days=int(value) * 30)
+
+
+def years(value):
+ # Warning here "years" are vague don't use this for really sensitive date
+ # computation the idea is to give you a absolute amount of time in the
+ # future which is not the same thing as "precisely on this date next year"
+ return datetime.timedelta(days=int(value) * 365)
+
+ALIASES = {}
+ALIASES['seconds'] = ALIASES['second'] = ALIASES['s'] = seconds
+ALIASES['minutes'] = ALIASES['minute'] = ALIASES['min'] = minutes
+ALIASES['hours'] = ALIASES['hour'] = ALIASES['h'] = hours
+ALIASES['days'] = ALIASES['day'] = ALIASES['d'] = days
+ALIASES['months'] = ALIASES['month'] = ALIASES['mo'] = months
+ALIASES['years'] = ALIASES['year'] = ALIASES['y'] = years
+
+
+class InvalidString(Exception):
+ pass
+
+
+class UnknownTimeMeasurement(Exception):
+ pass
+
+
+def value_of(input_str):
+ '''
+ Convert a string to a json date in the future
+ :param str input_str: (ex: 1d, 2d, 6years, 2 seconds)
+ :returns: Unit given in seconds
+ '''
+
+ matches = PATTERN.search(input_str)
+
+ if matches is None or len(matches.groups()) < 2:
+ raise InvalidString("'{}' is invalid string".format(input_str))
+
+ value, unit = matches.groups()
+
+ if unit not in ALIASES:
+ raise UnknownTimeMeasurement(
+ '{} is not a valid time measure use one of {}'.format(
+ unit,
+ sorted(ALIASES.keys())
+ )
+ )
+
+ return ALIASES[unit](value)
+
+
+def json_time_from_now(input_str, now=None, datetime_format=False):
+ '''
+ :param str input_str: Input string (see value of)
+ :param datetime now: Optionally set the definition of `now`
+ :param boolean datetime_format: Set `True` to get a `datetime` output
+ :returns: JSON string representation of time in future.
+ '''
+
+ if now is None:
+ now = datetime.datetime.utcnow()
+
+ time = now + value_of(input_str)
+
+ if datetime_format is True:
+ return time
+ else:
+ # Sorta a big hack but the json schema validator for date does not like the
+ # ISO dates until 'Z' (for timezone) is added...
+ return time.isoformat() + 'Z'
+
+
+def current_json_time(datetime_format=False):
+ '''
+ :param boolean datetime_format: Set `True` to get a `datetime` output
+ :returns: JSON string representation of the current time.
+ '''
+ if datetime_format is True:
+ return datetime.datetime.utcnow()
+ else:
+ return datetime.datetime.utcnow().isoformat() + 'Z'
diff --git a/taskcluster/taskgraph/util/treeherder.py b/taskcluster/taskgraph/util/treeherder.py
new file mode 100644
index 000000000..e66db582f
--- /dev/null
+++ b/taskcluster/taskgraph/util/treeherder.py
@@ -0,0 +1,24 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+import re
+
+
+def split_symbol(treeherder_symbol):
+ """Split a symbol expressed as grp(sym) into its two parts. If no group is
+ given, the returned group is '?'"""
+ groupSymbol = '?'
+ symbol = treeherder_symbol
+ if '(' in symbol:
+ groupSymbol, symbol = re.match(r'([^(]*)\(([^)]*)\)', symbol).groups()
+ return groupSymbol, symbol
+
+
+def join_symbol(group, symbol):
+ """Perform the reverse of split_symbol, combining the given group and
+ symbol. If the group is '?', then it is omitted."""
+ if group == '?':
+ return symbol
+ return '{}({})'.format(group, symbol)
diff --git a/taskcluster/taskgraph/util/yaml.py b/taskcluster/taskgraph/util/yaml.py
new file mode 100644
index 000000000..4e541b775
--- /dev/null
+++ b/taskcluster/taskgraph/util/yaml.py
@@ -0,0 +1,16 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import os
+import yaml
+
+
+def load_yaml(path, name):
+ """Convenience function to load a YAML file in the given path. This is
+ useful for loading kind configuration files from the kind path."""
+ filename = os.path.join(path, name)
+ with open(filename, "rb") as f:
+ return yaml.load(f)