diff options
Diffstat (limited to 'taskcluster/taskgraph/util')
-rw-r--r-- | taskcluster/taskgraph/util/__init__.py | 0 | ||||
-rw-r--r-- | taskcluster/taskgraph/util/attributes.py | 26 | ||||
-rw-r--r-- | taskcluster/taskgraph/util/docker.py | 160 | ||||
-rw-r--r-- | taskcluster/taskgraph/util/python_path.py | 27 | ||||
-rw-r--r-- | taskcluster/taskgraph/util/seta.py | 85 | ||||
-rw-r--r-- | taskcluster/taskgraph/util/templates.py | 155 | ||||
-rw-r--r-- | taskcluster/taskgraph/util/time.py | 114 | ||||
-rw-r--r-- | taskcluster/taskgraph/util/treeherder.py | 24 | ||||
-rw-r--r-- | taskcluster/taskgraph/util/yaml.py | 16 |
9 files changed, 607 insertions, 0 deletions
diff --git a/taskcluster/taskgraph/util/__init__.py b/taskcluster/taskgraph/util/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/taskcluster/taskgraph/util/__init__.py diff --git a/taskcluster/taskgraph/util/attributes.py b/taskcluster/taskgraph/util/attributes.py new file mode 100644 index 000000000..b44a3364f --- /dev/null +++ b/taskcluster/taskgraph/util/attributes.py @@ -0,0 +1,26 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +def attrmatch(attributes, **kwargs): + """Determine whether the given set of task attributes matches. The + conditions are given as keyword arguments, where each keyword names an + attribute. The keyword value can be a literal, a set, or a callable. A + literal must match the attribute exactly. Given a set, the attribute value + must be in the set. A callable is called with the attribute value. If an + attribute is specified as a keyword argument but not present in the + attributes, the result is False.""" + for kwkey, kwval in kwargs.iteritems(): + if kwkey not in attributes: + return False + attval = attributes[kwkey] + if isinstance(kwval, set): + if attval not in kwval: + return False + elif callable(kwval): + if not kwval(attval): + return False + elif kwval != attributes[kwkey]: + return False + return True diff --git a/taskcluster/taskgraph/util/docker.py b/taskcluster/taskgraph/util/docker.py new file mode 100644 index 000000000..df97e57bc --- /dev/null +++ b/taskcluster/taskgraph/util/docker.py @@ -0,0 +1,160 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import hashlib +import os +import shutil +import subprocess +import tarfile +import tempfile + +from mozpack.archive import ( + create_tar_gz_from_files, +) + + +GECKO = os.path.realpath(os.path.join(__file__, '..', '..', '..', '..')) +DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker') +INDEX_PREFIX = 'docker.images.v2' +ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}' + + +def docker_image(name, default_version=None): + '''Determine the docker image name, including repository and tag, from an + in-tree docker file.''' + try: + with open(os.path.join(DOCKER_ROOT, name, 'REGISTRY')) as f: + registry = f.read().strip() + except IOError: + with open(os.path.join(DOCKER_ROOT, 'REGISTRY')) as f: + registry = f.read().strip() + + try: + with open(os.path.join(DOCKER_ROOT, name, 'VERSION')) as f: + version = f.read().strip() + except IOError: + if not default_version: + raise + + version = default_version + + return '{}/{}:{}'.format(registry, name, version) + + +def generate_context_hash(topsrcdir, image_path, image_name): + """Generates a sha256 hash for context directory used to build an image.""" + + # It is a bit unfortunate we have to create a temp file here - it would + # be nicer to use an in-memory buffer. + fd, p = tempfile.mkstemp() + os.close(fd) + try: + return create_context_tar(topsrcdir, image_path, p, image_name) + finally: + os.unlink(p) + + +def create_context_tar(topsrcdir, context_dir, out_path, prefix): + """Create a context tarball. + + A directory ``context_dir`` containing a Dockerfile will be assembled into + a gzipped tar file at ``out_path``. Files inside the archive will be + prefixed by directory ``prefix``. + + We also scan the source Dockerfile for special syntax that influences + context generation. + + If a line in the Dockerfile has the form ``# %include <path>``, + the relative path specified on that line will be matched against + files in the source repository and added to the context under the + path ``topsrcdir/``. If an entry is a directory, we add all files + under that directory. + + Returns the SHA-256 hex digest of the created archive. + """ + archive_files = {} + + for root, dirs, files in os.walk(context_dir): + for f in files: + source_path = os.path.join(root, f) + rel = source_path[len(context_dir) + 1:] + archive_path = os.path.join(prefix, rel) + archive_files[archive_path] = source_path + + # Parse Dockerfile for special syntax of extra files to include. + with open(os.path.join(context_dir, 'Dockerfile'), 'rb') as fh: + for line in fh: + line = line.rstrip() + if not line.startswith('# %include'): + continue + + p = line[len('# %include '):].strip() + if os.path.isabs(p): + raise Exception('extra include path cannot be absolute: %s' % p) + + fs_path = os.path.normpath(os.path.join(topsrcdir, p)) + # Check for filesystem traversal exploits. + if not fs_path.startswith(topsrcdir): + raise Exception('extra include path outside topsrcdir: %s' % p) + + if not os.path.exists(fs_path): + raise Exception('extra include path does not exist: %s' % p) + + if os.path.isdir(fs_path): + for root, dirs, files in os.walk(fs_path): + for f in files: + source_path = os.path.join(root, f) + archive_path = os.path.join(prefix, 'topsrcdir', p, f) + archive_files[archive_path] = source_path + else: + archive_path = os.path.join(prefix, 'topsrcdir', p) + archive_files[archive_path] = fs_path + + with open(out_path, 'wb') as fh: + create_tar_gz_from_files(fh, archive_files, '%s.tar.gz' % prefix) + + h = hashlib.sha256() + with open(out_path, 'rb') as fh: + while True: + data = fh.read(32768) + if not data: + break + h.update(data) + return h.hexdigest() + + +def build_from_context(docker_bin, context_path, prefix, tag=None): + """Build a Docker image from a context archive. + + Given the path to a `docker` binary, a image build tar.gz (produced with + ``create_context_tar()``, a prefix in that context containing files, and + an optional ``tag`` for the produced image, build that Docker image. + """ + d = tempfile.mkdtemp() + try: + with tarfile.open(context_path, 'r:gz') as tf: + tf.extractall(d) + + # If we wanted to do post-processing of the Dockerfile, this is + # where we'd do it. + + args = [ + docker_bin, + 'build', + # Use --no-cache so we always get the latest package updates. + '--no-cache', + ] + + if tag: + args.extend(['-t', tag]) + + args.append('.') + + res = subprocess.call(args, cwd=os.path.join(d, prefix)) + if res: + raise Exception('error building image') + finally: + shutil.rmtree(d) diff --git a/taskcluster/taskgraph/util/python_path.py b/taskcluster/taskgraph/util/python_path.py new file mode 100644 index 000000000..b14223ca6 --- /dev/null +++ b/taskcluster/taskgraph/util/python_path.py @@ -0,0 +1,27 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + + +def find_object(path): + """ + Find a Python object given a path of the form <modulepath>:<objectpath>. + Conceptually equivalent to + + def find_object(modulepath, objectpath): + import <modulepath> as mod + return mod.<objectpath> + """ + if path.count(':') != 1: + raise ValueError( + 'python path {!r} does not have the form "module:object"'.format(path)) + + modulepath, objectpath = path.split(':') + obj = __import__(modulepath) + for a in modulepath.split('.')[1:]: + obj = getattr(obj, a) + for a in objectpath.split('.'): + obj = getattr(obj, a) + return obj diff --git a/taskcluster/taskgraph/util/seta.py b/taskcluster/taskgraph/util/seta.py new file mode 100644 index 000000000..a0cd30675 --- /dev/null +++ b/taskcluster/taskgraph/util/seta.py @@ -0,0 +1,85 @@ +import json +import logging +import requests +from redo import retry +from requests import exceptions + +logger = logging.getLogger(__name__) +headers = { + 'User-Agent': 'TaskCluster' +} + +# It's a list of project name which SETA is useful on +SETA_PROJECTS = ['mozilla-inbound', 'autoland'] +SETA_ENDPOINT = "https://seta.herokuapp.com/data/setadetails/?branch=%s" + + +class SETA(object): + """ + Interface to the SETA service, which defines low-value tasks that can be optimized out + of the taskgraph. + """ + def __init__(self): + # cached low value tasks, by project + self.low_value_tasks = {} + + def query_low_value_tasks(self, project): + # Request the set of low value tasks from the SETA service. Low value tasks will be + # optimized out of the task graph. + if project not in SETA_PROJECTS: + logger.debug("SETA is not enabled for project `{}`".format(project)) + return [] + + logger.debug("Querying SETA service for low-value tasks on {}".format(project)) + low_value_tasks = [] + + url = SETA_ENDPOINT % project + # Try to fetch the SETA data twice, falling back to an empty list of low value tasks. + # There are 10 seconds between each try. + try: + logger.debug("Retrieving low-value jobs list from SETA") + response = retry(requests.get, attempts=2, sleeptime=10, + args=(url, ), + kwargs={'timeout': 5, 'headers': headers}) + task_list = json.loads(response.content).get('jobtypes', '') + if len(task_list) > 0: + low_value_tasks = task_list.values()[0] + + # Bug 1315145, disable SETA for tier-1 platforms until backfill is implemented. + low_value_tasks = [x for x in low_value_tasks if x.find('debug') == -1] + low_value_tasks = [x for x in low_value_tasks if x.find('asan') == -1] + + # In the event of request times out, requests will raise a TimeoutError. + except exceptions.Timeout: + logger.warning("SETA server is timeout, we will treat all test tasks as high value.") + + # In the event of a network problem (e.g. DNS failure, refused connection, etc), + # requests will raise a ConnectionError. + except exceptions.ConnectionError: + logger.warning("SETA server is timeout, we will treat all test tasks as high value.") + + # In the event of the rare invalid HTTP response(e.g 404, 401), + # requests will raise an HTTPError exception + except exceptions.HTTPError: + logger.warning("We got bad Http response from ouija," + " we will treat all test tasks as high value.") + + # We just print the error out as a debug message if we failed to catch the exception above + except exceptions.RequestException as error: + logger.warning(error) + + # When we get invalid JSON (i.e. 500 error), it results in a ValueError (bug 1313426) + except ValueError as error: + logger.warning("Invalid JSON, possible server error: {}".format(error)) + + return low_value_tasks + + def is_low_value_task(self, label, project): + # cache the low value tasks per project to avoid repeated SETA server queries + if project not in self.low_value_tasks: + self.low_value_tasks[project] = self.query_low_value_tasks(project) + return label in self.low_value_tasks[project] + +# create a single instance of this class, and expose its `is_low_value_task` +# bound method as a module-level function +is_low_value_task = SETA().is_low_value_task diff --git a/taskcluster/taskgraph/util/templates.py b/taskcluster/taskgraph/util/templates.py new file mode 100644 index 000000000..97620fa75 --- /dev/null +++ b/taskcluster/taskgraph/util/templates.py @@ -0,0 +1,155 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import os + +import pystache +import yaml +import copy + +# Key used in template inheritance... +INHERITS_KEY = '$inherits' + + +def merge_to(source, dest): + ''' + Merge dict and arrays (override scalar values) + + Keys from source override keys from dest, and elements from lists in source + are appended to lists in dest. + + :param dict source: to copy from + :param dict dest: to copy to (modified in place) + ''' + + for key, value in source.items(): + # Override mismatching or empty types + if type(value) != type(dest.get(key)): # noqa + dest[key] = source[key] + continue + + # Merge dict + if isinstance(value, dict): + merge_to(value, dest[key]) + continue + + if isinstance(value, list): + dest[key] = dest[key] + source[key] + continue + + dest[key] = source[key] + + return dest + + +def merge(*objects): + ''' + Merge the given objects, using the semantics described for merge_to, with + objects later in the list taking precedence. From an inheritance + perspective, "parents" should be listed before "children". + + Returns the result without modifying any arguments. + ''' + if len(objects) == 1: + return copy.deepcopy(objects[0]) + return merge_to(objects[-1], merge(*objects[:-1])) + + +class TemplatesException(Exception): + pass + + +class Templates(): + ''' + The taskcluster integration makes heavy use of yaml to describe tasks this + class handles the loading/rendering. + ''' + + def __init__(self, root): + ''' + Initialize the template render. + + :param str root: Root path where to load yaml files. + ''' + if not root: + raise TemplatesException('Root is required') + + if not os.path.isdir(root): + raise TemplatesException('Root must be a directory') + + self.root = root + + def _inherits(self, path, obj, properties, seen): + blueprint = obj.pop(INHERITS_KEY) + seen.add(path) + + # Resolve the path here so we can detect circular references. + template = self.resolve_path(blueprint.get('from')) + variables = blueprint.get('variables', {}) + + # Passed parameters override anything in the task itself. + for key in properties: + variables[key] = properties[key] + + if not template: + msg = '"{}" inheritance template missing'.format(path) + raise TemplatesException(msg) + + if template in seen: + msg = 'Error while handling "{}" in "{}" circular template' + \ + 'inheritance seen \n {}' + raise TemplatesException(msg.format(path, template, seen)) + + try: + out = self.load(template, variables, seen) + except TemplatesException as e: + msg = 'Error expanding parent ("{}") of "{}" original error {}' + raise TemplatesException(msg.format(template, path, str(e))) + + # Anything left in obj is merged into final results (and overrides) + return merge_to(obj, out) + + def render(self, path, content, parameters, seen): + ''' + Renders a given yaml string. + + :param str path: used to prevent infinite recursion in inheritance. + :param str content: Of yaml file. + :param dict parameters: For mustache templates. + :param set seen: Seen files (used for inheritance) + ''' + content = pystache.render(content, parameters) + result = yaml.load(content) + + # In addition to the usual template logic done by mustache we also + # handle special '$inherit' dict keys. + if isinstance(result, dict) and INHERITS_KEY in result: + return self._inherits(path, result, parameters, seen) + + return result + + def resolve_path(self, path): + return os.path.join(self.root, path) + + def load(self, path, parameters=None, seen=None): + ''' + Load an render the given yaml path. + + :param str path: Location of yaml file to load (relative to root). + :param dict parameters: To template yaml file with. + ''' + seen = seen or set() + + if not path: + raise TemplatesException('path is required') + + path = self.resolve_path(path) + + if not os.path.isfile(path): + raise TemplatesException('"{}" is not a file'.format(path)) + + content = open(path).read() + return self.render(path, content, parameters, seen) diff --git a/taskcluster/taskgraph/util/time.py b/taskcluster/taskgraph/util/time.py new file mode 100644 index 000000000..160aaa70c --- /dev/null +++ b/taskcluster/taskgraph/util/time.py @@ -0,0 +1,114 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Python port of the ms.js node module this is not a direct port some things are +# more complicated or less precise and we lean on time delta here. + +import re +import datetime + +PATTERN = re.compile( + '((?:\d+)?\.?\d+) *([a-z]+)' +) + + +def seconds(value): + return datetime.timedelta(seconds=int(value)) + + +def minutes(value): + return datetime.timedelta(minutes=int(value)) + + +def hours(value): + return datetime.timedelta(hours=int(value)) + + +def days(value): + return datetime.timedelta(days=int(value)) + + +def months(value): + # See warning in years(), below + return datetime.timedelta(days=int(value) * 30) + + +def years(value): + # Warning here "years" are vague don't use this for really sensitive date + # computation the idea is to give you a absolute amount of time in the + # future which is not the same thing as "precisely on this date next year" + return datetime.timedelta(days=int(value) * 365) + +ALIASES = {} +ALIASES['seconds'] = ALIASES['second'] = ALIASES['s'] = seconds +ALIASES['minutes'] = ALIASES['minute'] = ALIASES['min'] = minutes +ALIASES['hours'] = ALIASES['hour'] = ALIASES['h'] = hours +ALIASES['days'] = ALIASES['day'] = ALIASES['d'] = days +ALIASES['months'] = ALIASES['month'] = ALIASES['mo'] = months +ALIASES['years'] = ALIASES['year'] = ALIASES['y'] = years + + +class InvalidString(Exception): + pass + + +class UnknownTimeMeasurement(Exception): + pass + + +def value_of(input_str): + ''' + Convert a string to a json date in the future + :param str input_str: (ex: 1d, 2d, 6years, 2 seconds) + :returns: Unit given in seconds + ''' + + matches = PATTERN.search(input_str) + + if matches is None or len(matches.groups()) < 2: + raise InvalidString("'{}' is invalid string".format(input_str)) + + value, unit = matches.groups() + + if unit not in ALIASES: + raise UnknownTimeMeasurement( + '{} is not a valid time measure use one of {}'.format( + unit, + sorted(ALIASES.keys()) + ) + ) + + return ALIASES[unit](value) + + +def json_time_from_now(input_str, now=None, datetime_format=False): + ''' + :param str input_str: Input string (see value of) + :param datetime now: Optionally set the definition of `now` + :param boolean datetime_format: Set `True` to get a `datetime` output + :returns: JSON string representation of time in future. + ''' + + if now is None: + now = datetime.datetime.utcnow() + + time = now + value_of(input_str) + + if datetime_format is True: + return time + else: + # Sorta a big hack but the json schema validator for date does not like the + # ISO dates until 'Z' (for timezone) is added... + return time.isoformat() + 'Z' + + +def current_json_time(datetime_format=False): + ''' + :param boolean datetime_format: Set `True` to get a `datetime` output + :returns: JSON string representation of the current time. + ''' + if datetime_format is True: + return datetime.datetime.utcnow() + else: + return datetime.datetime.utcnow().isoformat() + 'Z' diff --git a/taskcluster/taskgraph/util/treeherder.py b/taskcluster/taskgraph/util/treeherder.py new file mode 100644 index 000000000..e66db582f --- /dev/null +++ b/taskcluster/taskgraph/util/treeherder.py @@ -0,0 +1,24 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals +import re + + +def split_symbol(treeherder_symbol): + """Split a symbol expressed as grp(sym) into its two parts. If no group is + given, the returned group is '?'""" + groupSymbol = '?' + symbol = treeherder_symbol + if '(' in symbol: + groupSymbol, symbol = re.match(r'([^(]*)\(([^)]*)\)', symbol).groups() + return groupSymbol, symbol + + +def join_symbol(group, symbol): + """Perform the reverse of split_symbol, combining the given group and + symbol. If the group is '?', then it is omitted.""" + if group == '?': + return symbol + return '{}({})'.format(group, symbol) diff --git a/taskcluster/taskgraph/util/yaml.py b/taskcluster/taskgraph/util/yaml.py new file mode 100644 index 000000000..4e541b775 --- /dev/null +++ b/taskcluster/taskgraph/util/yaml.py @@ -0,0 +1,16 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, print_function, unicode_literals + +import os +import yaml + + +def load_yaml(path, name): + """Convenience function to load a YAML file in the given path. This is + useful for loading kind configuration files from the kind path.""" + filename = os.path.join(path, name) + with open(filename, "rb") as f: + return yaml.load(f) |