from argparse import ArgumentParser from collections import defaultdict import json import os import sys import requests here = os.path.abspath(os.path.dirname(__file__)) ACTIVE_DATA_URL = "http://activedata.allizom.org/query" PERCENTILE = 0.5 # ignore the bottom PERCENTILE*100% of numbers def query_activedata(suite, e10s, platforms=None): platforms = ', "build.platform":%s' % json.dumps(platforms) if platforms else '' e10s_clause = '"eq":{"run.type":"e10s"}' if not e10s: e10s_clause = '"not":{%s}' % e10s_clause query = """ { "from":"unittest", "limit":200000, "groupby":["result.test"], "select":{"value":"result.duration","aggregate":"average"}, "where":{"and":[ {"eq":{"run.suite":"%s"%s}}, {%s}, {"gt":{"run.timestamp":"{{today-week}}"}} ]} } """ % (suite, platforms, e10s_clause) response = requests.post(ACTIVE_DATA_URL, data=query, stream=True) response.raise_for_status() data = response.json()["data"] return data def write_runtimes(data, suite, indir=here, outdir=here): data = dict(data) outfilename = os.path.join(outdir, "%s.runtimes.json" % suite) infilename = os.path.join(indir, "%s.runtimes.json" % suite) if not os.path.exists(outdir): os.makedirs(outdir) # read in existing data, if any indata = None if os.path.exists(infilename): with open(infilename, 'r') as f: indata = json.loads(f.read()).get('runtimes') # identify a threshold of durations, below which we ignore runtimes = [] for result in data.itervalues(): duration = int(result * 1000) if result else 0 if duration: runtimes.append(duration) runtimes.sort() threshold = runtimes[int(len(runtimes) * PERCENTILE)] # split the durations into two groups; omitted and specified omitted = [] specified = indata if indata else {} current_tests = [] for test, duration in data.iteritems(): current_tests.append(test) duration = int(duration * 1000) if duration else 0 if duration > 0 and duration < threshold: omitted.append(duration) if test in specified: del specified[test] elif duration >= threshold and test != "automation.py": original = specified.get(test, 0) if not original or abs(original - duration) > (original/20): # only write new data if it's > 20% different than original specified[test] = duration # delete any test references no longer needed to_delete = [] for test in specified: if test not in current_tests: to_delete.append(test) for test in to_delete: del specified[test] avg = int(sum(omitted)/len(omitted)) results = {'excluded_test_average': avg, 'runtimes': specified} with open(outfilename, 'w') as f: f.write(json.dumps(results, indent=2, sort_keys=True)) def cli(args=sys.argv[1:]): parser = ArgumentParser() parser.add_argument('-o', '--output-directory', dest='outdir', default=here, help="Directory to save runtime data.") parser.add_argument('-i', '--input-directory', dest='indir', default=here, help="Directory from which to read current runtime data.") parser.add_argument('-p', '--platforms', default=None, help="Comma separated list of platforms from which to generate data.") parser.add_argument('-s', '--suite', dest='suite', default=None, help="Suite for which to generate data.") parser.add_argument('--disable-e10s', dest='e10s', default=True, action='store_false', help="Generate runtimes for non-e10s tests.") args = parser.parse_args(args) if not args.suite: raise ValueError("Must specify suite with the -s argument") if ',' in args.suite: raise ValueError("Passing multiple suites is not supported") if args.platforms: args.platforms = args.platforms.split(',') data = query_activedata(args.suite, args.e10s, args.platforms) suite = args.suite if args.e10s: suite = '%s-e10s' % suite write_runtimes(data, suite, indir=args.indir, outdir=args.outdir) if __name__ == "__main__": sys.exit(cli())