diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /js/src/devtools/rootAnalysis | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'js/src/devtools/rootAnalysis')
34 files changed, 4447 insertions, 0 deletions
diff --git a/js/src/devtools/rootAnalysis/CFG.js b/js/src/devtools/rootAnalysis/CFG.js new file mode 100644 index 000000000..6e9facaa1 --- /dev/null +++ b/js/src/devtools/rootAnalysis/CFG.js @@ -0,0 +1,159 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +var functionBodies; + +function findAllPoints(bodies, blockId) +{ + var points = []; + var body; + + for (var xbody of bodies) { + if (sameBlockId(xbody.BlockId, blockId)) { + assert(!body); + body = xbody; + } + } + assert(body); + + if (!("PEdge" in body)) + return; + for (var edge of body.PEdge) { + points.push([body, edge.Index[0]]); + if (edge.Kind == "Loop") + Array.prototype.push.apply(points, findAllPoints(bodies, edge.BlockId)); + } + + return points; +} + +function isMatchingDestructor(constructor, edge) +{ + if (edge.Kind != "Call") + return false; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + var variable = callee.Variable; + assert(variable.Kind == "Func"); + if (variable.Name[1].charAt(0) != '~') + return false; + + var constructExp = constructor.PEdgeCallInstance.Exp; + assert(constructExp.Kind == "Var"); + + var destructExp = edge.PEdgeCallInstance.Exp; + if (destructExp.Kind != "Var") + return false; + + return sameVariable(constructExp.Variable, destructExp.Variable); +} + +// Return all calls within the RAII scope of any constructor matched by +// isConstructor(). (Note that this would be insufficient if you needed to +// treat each instance separately, such as when different regions of a function +// body were guarded by these constructors and you needed to do something +// different with each.) +function allRAIIGuardedCallPoints(bodies, body, isConstructor) +{ + if (!("PEdge" in body)) + return []; + + var points = []; + + for (var edge of body.PEdge) { + if (edge.Kind != "Call") + continue; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + continue; + var variable = callee.Variable; + assert(variable.Kind == "Func"); + if (!isConstructor(edge.Type, variable.Name)) + continue; + if (!("PEdgeCallInstance" in edge)) + continue; + if (edge.PEdgeCallInstance.Exp.Kind != "Var") + continue; + + Array.prototype.push.apply(points, pointsInRAIIScope(bodies, body, edge)); + } + + return points; +} + +// Test whether the given edge is the constructor corresponding to the given +// destructor edge +function isMatchingConstructor(destructor, edge) +{ + if (edge.Kind != "Call") + return false; + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + var variable = callee.Variable; + if (variable.Kind != "Func") + return false; + var name = readable(variable.Name[0]); + var destructorName = readable(destructor.Exp[0].Variable.Name[0]); + var match = destructorName.match(/^(.*?::)~(\w+)\(/); + if (!match) { + printErr("Unhandled destructor syntax: " + destructorName); + return false; + } + var constructorSubstring = match[1] + match[2]; + if (name.indexOf(constructorSubstring) == -1) + return false; + + var destructExp = destructor.PEdgeCallInstance.Exp; + assert(destructExp.Kind == "Var"); + + var constructExp = edge.PEdgeCallInstance.Exp; + if (constructExp.Kind != "Var") + return false; + + return sameVariable(constructExp.Variable, destructExp.Variable); +} + +function findMatchingConstructor(destructorEdge, body) +{ + var worklist = [destructorEdge]; + var predecessors = getPredecessors(body); + while(worklist.length > 0) { + var edge = worklist.pop(); + if (isMatchingConstructor(destructorEdge, edge)) + return edge; + if (edge.Index[0] in predecessors) { + for (var e of predecessors[edge.Index[0]]) + worklist.push(e); + } + } + printErr("Could not find matching constructor!"); + debugger; +} + +function pointsInRAIIScope(bodies, body, constructorEdge) { + var seen = {}; + var worklist = [constructorEdge.Index[1]]; + var points = []; + while (worklist.length) { + var point = worklist.pop(); + if (point in seen) + continue; + seen[point] = true; + points.push([body, point]); + var successors = getSuccessors(body); + if (!(point in successors)) + continue; + for (var nedge of successors[point]) { + if (isMatchingDestructor(constructorEdge, nedge)) + continue; + if (nedge.Kind == "Loop") + Array.prototype.push.apply(points, findAllPoints(bodies, nedge.BlockId)); + worklist.push(nedge.Index[1]); + } + } + + return points; +} diff --git a/js/src/devtools/rootAnalysis/Makefile.in b/js/src/devtools/rootAnalysis/Makefile.in new file mode 100644 index 000000000..896e03e65 --- /dev/null +++ b/js/src/devtools/rootAnalysis/Makefile.in @@ -0,0 +1,79 @@ +# -*- Mode: makefile -*- +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This Makefile is used to kick off a static rooting analysis. This Makefile is +# NOT intended for use as part of the standard Mozilla build. Instead, this +# Makefile will use $PATH to subvert compiler invocations to add in the sixgill +# plugin, and then do a regular build of whatever portion of the tree you are +# analyzing. The plugins will dump out several xdb database files. Various +# analysis scripts, written in JS, will run over those database files to +# produce the final analysis output. + +include $(topsrcdir)/config/config.mk + +# Tree to build and analyze, defaulting to the current tree +TARGET_JSOBJDIR ?= $(MOZ_BUILD_ROOT) + +# Path to a JS binary to use to run the analysis. You really want this to be an +# optimized build. +JS ?= $(MOZ_BUILD_ROOT)/shell/js + +# Path to an xgill checkout containing the GCC plugin, xdb-processing binaries, +# and compiler wrapper scripts used to automatically integrate into an existing +# build system. +SIXGILL ?= @SIXGILL_PATH@ + +# Path to the JS scripts that will perform the analysis, defaulting to the same +# place as this Makefile.in, which is probably what you want. +ANALYSIS_SCRIPT_DIR ?= $(srcdir) + +# Number of simultanous analyzeRoots.js scripts to run. +JOBS ?= 6 + +all : rootingHazards.txt allFunctions.txt + +CALL_JS := time env PATH=$$PATH:$(SIXGILL)/bin XDB=$(SIXGILL)/bin/xdb.so $(JS) + +src_body.xdb src_comp.xdb: run_complete + @echo Started compilation at $$(date) + $(ANALYSIS_SCRIPT_DIR)/run_complete --foreground --build-root=$(TARGET_JSOBJDIR) --work-dir=work -b $(SIXGILL)/bin $(CURDIR) + @echo Finished compilation at $$(date) + +callgraph.txt: src_body.xdb src_comp.xdb computeCallgraph.js + @echo Started computation of $@ at $$(date) + $(CALL_JS) $(ANALYSIS_SCRIPT_DIR)/computeCallgraph.js > $@.tmp + mv $@.tmp $@ + @echo Finished computation of $@ at $$(date) + +gcFunctions.txt: callgraph.txt computeGCFunctions.js annotations.js + @echo Started computation of $@ at $$(date) + $(CALL_JS) $(ANALYSIS_SCRIPT_DIR)/computeGCFunctions.js ./callgraph.txt > $@.tmp + mv $@.tmp $@ + @echo Finished computation of $@ at $$(date) + +gcFunctions.lst: gcFunctions.txt + perl -lne 'print $$1 if /^GC Function: (.*)/' gcFunctions.txt > $@ + +suppressedFunctions.lst: gcFunctions.txt + perl -lne 'print $$1 if /^Suppressed Function: (.*)/' gcFunctions.txt > $@ + +gcTypes.txt: src_comp.xdb computeGCTypes.js annotations.js + @echo Started computation of $@ at $$(date) + $(CALL_JS) $(ANALYSIS_SCRIPT_DIR)/computeGCTypes.js > $@.tmp + mv $@.tmp $@ + @echo Finished computation of $@ at $$(date) + +allFunctions.txt: src_body.xdb + @echo Started computation of $@ at $$(date) + time $(SIXGILL)/bin/xdbkeys $^ > $@.tmp + mv $@.tmp $@ + @echo Finished computation of $@ at $$(date) + +rootingHazards.txt: gcFunctions.lst suppressedFunctions.lst gcTypes.txt analyzeRoots.js annotations.js gen-hazards.sh + @echo Started computation of $@ at $$(date) + time env JS=$(JS) ANALYZE='$(ANALYSIS_SCRIPT_DIR)/analyzeRoots.js' SIXGILL='$(SIXGILL)' '$(ANALYSIS_SCRIPT_DIR)/gen-hazards.sh' $(JOBS) > $@.tmp + mv $@.tmp $@ + @echo Finished computation of $@ at $$(date) diff --git a/js/src/devtools/rootAnalysis/README.md b/js/src/devtools/rootAnalysis/README.md new file mode 100644 index 000000000..0588cae66 --- /dev/null +++ b/js/src/devtools/rootAnalysis/README.md @@ -0,0 +1,64 @@ +# Spidermonkey JSAPI rooting analysis + +This directory contains scripts for running Brian Hackett's static GC rooting +analysis on a JS source directory. + +To use it on SpiderMonkey: + +1. Be on Fedora/CentOS/RedHat Linux x86_64, or a Docker image of one of those. + + Specifically, the prebuilt GCC **won't work on Ubuntu** + without the `CFLAGS` and `CXXFLAGS` settings from + <http://trac.wildfiregames.com/wiki/StaticRootingAnalysis>. + +2. Have the Gecko build prerequisites installed. + +3. Install taskcluster-vcs, eg by doing + + npm install taskcluster-vcs + export PATH="$PATH:$(pwd)/node_modules/.bin" + +4. In some directory, using $SRCDIR as the top of your Gecko source checkout, + run these commands: + + mkdir work + cd work + ( export GECKO_DIR=$SRCDIR; $GECKO_DIR/taskcluster/scripts/builder/build-haz-linux.sh $(pwd) --dep ) + +The `--dep` is optional, and will avoid rebuilding the JS shell used to run the +analysis later. + +If you see the error ``/lib/../lib64/crti.o: unrecognized relocation (0x2a) in section .init`` then have a version mismatch between the precompiled gcc used in automation and your installed glibc. The easiest way to fix this is to delete the ld provided with the precompiled gcc (it will be in two places, one given in the first part of the error message), which will cause gcc to fall back to your system ld. But you will need to additionally pass ``--no-tooltool`` to build-haz-linux.sh. With the current package, you could do the deletion with + + rm gcc/bin/ld + rm gcc/x86_64-unknown-linux-gnu/bin/ld + +Output goes to `analysis/hazards.txt`. This will run the +analysis on the js/src tree only; if you wish to analyze the full browser, use + + ( export GECKO_DIR=$SRCDIR; $GECKO_DIR/taskcluster/scripts/builder/build-haz-linux.sh --project browser $(pwd) ) + +After running the analysis once, you can reuse the `*.xdb` database files +generated, using modified analysis scripts, by running +`analysis/run-analysis.sh` (or pass `--list` to see ways to select even more +restrictive parts of the overall analysis; the default is `gcTypes` which will +do everything but regenerate the xdb files). + +Also, you can pass `-v` to get exact command lines to cut & paste for running the +various stages, which is helpful for running under a debugger. + + +## Overview of what is going on here + +So what does this actually do? + +1. It downloads a GCC compiler and plugin ("sixgill") from Mozilla servers, using + "tooltool" (a binary archive tool). + +2. It runs `run_complete`, a script that builds the target codebase with the + downloaded GCC, generating a few database files containing control flow + graphs of the full compile, along with type information etc. + +3. Then it runs `analyze.py`, a Python script, which runs all the scripts + which actually perform the analysis -- the tricky parts. + (Those scripts are written in JS.) diff --git a/js/src/devtools/rootAnalysis/analyze.py b/js/src/devtools/rootAnalysis/analyze.py new file mode 100755 index 000000000..69482dab7 --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyze.py @@ -0,0 +1,298 @@ +#!/usr/bin/python + +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Runs the static rooting analysis +""" + +from subprocess import Popen +import subprocess +import os +import argparse +import sys +import re + +def env(config): + e = dict(os.environ) + e['PATH'] = ':'.join(p for p in (config.get('gcc_bin'), config.get('sixgill_bin'), e['PATH']) if p) + e['XDB'] = '%(sixgill_bin)s/xdb.so' % config + e['SOURCE'] = config['source'] + e['ANALYZED_OBJDIR'] = config['objdir'] + return e + +def fill(command, config): + try: + return tuple(s % config for s in command) + except: + print("Substitution failed:") + problems = [] + for fragment in command: + try: + fragment % config + except: + problems.append(fragment) + raise Exception("\n".join(["Substitution failed:"] + [ " %s" % s for s in problems ])) + +def print_command(command, outfile=None, env=None): + output = ' '.join(command) + if outfile: + output += ' > ' + outfile + if env: + changed = {} + e = os.environ + for key,value in env.items(): + if (key not in e) or (e[key] != value): + changed[key] = value + if changed: + outputs = [] + for key, value in changed.items(): + if key in e and e[key] in value: + start = value.index(e[key]) + end = start + len(e[key]) + outputs.append('%s="%s${%s}%s"' % (key, + value[:start], + key, + value[end:])) + else: + outputs.append("%s='%s'" % (key, value)) + output = ' '.join(outputs) + " " + output + + print output + +def generate_hazards(config, outfilename): + jobs = [] + for i in range(int(config['jobs'])): + command = fill(('%(js)s', + '%(analysis_scriptdir)s/analyzeRoots.js', + '%(gcFunctions_list)s', + '%(gcEdges)s', + '%(suppressedFunctions_list)s', + '%(gcTypes)s', + '%(typeInfo)s', + str(i+1), '%(jobs)s', + 'tmp.%s' % (i+1,)), + config) + outfile = 'rootingHazards.%s' % (i+1,) + output = open(outfile, 'w') + if config['verbose']: + print_command(command, outfile=outfile, env=env(config)) + jobs.append((command, Popen(command, stdout=output, env=env(config)))) + + final_status = 0 + while jobs: + pid, status = os.wait() + jobs = [ job for job in jobs if job[1].pid != pid ] + final_status = final_status or status + + if final_status: + raise subprocess.CalledProcessError(final_status, 'analyzeRoots.js') + + with open(outfilename, 'w') as output: + command = ['cat'] + [ 'rootingHazards.%s' % (i+1,) for i in range(int(config['jobs'])) ] + if config['verbose']: + print_command(command, outfile=outfilename) + subprocess.call(command, stdout=output) + +JOBS = { 'dbs': + (('%(ANALYSIS_SCRIPTDIR)s/run_complete', + '--foreground', + '--no-logs', + '--build-root=%(objdir)s', + '--wrap-dir=%(sixgill)s/scripts/wrap_gcc', + '--work-dir=work', + '-b', '%(sixgill_bin)s', + '--buildcommand=%(buildcommand)s', + '.'), + ()), + + 'list-dbs': + (('ls', '-l'), + ()), + + 'callgraph': + (('%(js)s', '%(analysis_scriptdir)s/computeCallgraph.js', '%(typeInfo)s'), + 'callgraph.txt'), + + 'gcFunctions': + (('%(js)s', '%(analysis_scriptdir)s/computeGCFunctions.js', '%(callgraph)s', + '[gcFunctions]', '[gcFunctions_list]', '[gcEdges]', '[suppressedFunctions_list]'), + ('gcFunctions.txt', 'gcFunctions.lst', 'gcEdges.txt', 'suppressedFunctions.lst')), + + 'gcTypes': + (('%(js)s', '%(analysis_scriptdir)s/computeGCTypes.js', + '[gcTypes]', '[typeInfo]'), + ('gcTypes.txt', 'typeInfo.txt')), + + 'allFunctions': + (('%(sixgill_bin)s/xdbkeys', 'src_body.xdb',), + 'allFunctions.txt'), + + 'hazards': + (generate_hazards, 'rootingHazards.txt'), + + 'explain': + ((os.environ.get('PYTHON', 'python2.7'), + '%(analysis_scriptdir)s/explain.py', + '%(hazards)s', '%(gcFunctions)s', + '[explained_hazards]', '[unnecessary]', '[refs]'), + ('hazards.txt', 'unnecessary.txt', 'refs.txt')) + } + +def out_indexes(command): + for i in range(len(command)): + m = re.match(r'^\[(.*)\]$', command[i]) + if m: + yield (i, m.group(1)) + +def run_job(name, config): + cmdspec, outfiles = JOBS[name] + print("Running " + name + " to generate " + str(outfiles)) + if hasattr(cmdspec, '__call__'): + cmdspec(config, outfiles) + else: + temp_map = {} + cmdspec = fill(cmdspec, config) + if isinstance(outfiles, basestring): + stdout_filename = '%s.tmp' % name + temp_map[stdout_filename] = outfiles + if config['verbose']: + print_command(cmdspec, outfile=outfiles, env=env(config)) + else: + stdout_filename = None + pc = list(cmdspec) + outfile = 0 + for (i, name) in out_indexes(cmdspec): + pc[i] = outfiles[outfile] + outfile += 1 + if config['verbose']: + print_command(pc, env=env(config)) + + command = list(cmdspec) + outfile = 0 + for (i, name) in out_indexes(cmdspec): + command[i] = '%s.tmp' % name + temp_map[command[i]] = outfiles[outfile] + outfile += 1 + + sys.stdout.flush() + if stdout_filename is None: + subprocess.check_call(command, env=env(config)) + else: + with open(stdout_filename, 'w') as output: + subprocess.check_call(command, stdout=output, env=env(config)) + for (temp, final) in temp_map.items(): + try: + os.rename(temp, final) + except OSError: + print("Error renaming %s -> %s" % (temp, final)) + raise + +config = { 'ANALYSIS_SCRIPTDIR': os.path.dirname(__file__) } + +defaults = [ '%s/defaults.py' % config['ANALYSIS_SCRIPTDIR'], + '%s/defaults.py' % os.getcwd() ] + +parser = argparse.ArgumentParser(description='Statically analyze build tree for rooting hazards.') +parser.add_argument('step', metavar='STEP', type=str, nargs='?', + help='run starting from this step') +parser.add_argument('--source', metavar='SOURCE', type=str, nargs='?', + help='source code to analyze') +parser.add_argument('--objdir', metavar='DIR', type=str, nargs='?', + help='object directory of compiled files') +parser.add_argument('--js', metavar='JSSHELL', type=str, nargs='?', + help='full path to ctypes-capable JS shell') +parser.add_argument('--upto', metavar='UPTO', type=str, nargs='?', + help='last step to execute') +parser.add_argument('--jobs', '-j', default=None, metavar='JOBS', type=int, + help='number of simultaneous analyzeRoots.js jobs') +parser.add_argument('--list', const=True, nargs='?', type=bool, + help='display available steps') +parser.add_argument('--buildcommand', '--build', '-b', type=str, nargs='?', + help='command to build the tree being analyzed') +parser.add_argument('--tag', '-t', type=str, nargs='?', + help='name of job, also sets build command to "build.<tag>"') +parser.add_argument('--expect-file', type=str, nargs='?', + help='deprecated option, temporarily still present for backwards compatibility') +parser.add_argument('--verbose', '-v', action='store_true', + help='Display cut & paste commands to run individual steps') + +args = parser.parse_args() + +for default in defaults: + try: + execfile(default, config) + if args.verbose: + print("Loaded %s" % default) + except: + pass + +data = config.copy() + +for k,v in vars(args).items(): + if v is not None: + data[k] = v + +if args.tag and not args.buildcommand: + args.buildcommand="build.%s" % args.tag + +if args.jobs is not None: + data['jobs'] = args.jobs +if not data.get('jobs'): + data['jobs'] = subprocess.check_output(['nproc', '--ignore=1']).strip() + +if args.buildcommand: + data['buildcommand'] = args.buildcommand +elif 'BUILD' in os.environ: + data['buildcommand'] = os.environ['BUILD'] +else: + data['buildcommand'] = 'make -j4 -s' + +if 'ANALYZED_OBJDIR' in os.environ: + data['objdir'] = os.environ['ANALYZED_OBJDIR'] + +if 'SOURCE' in os.environ: + data['source'] = os.environ['SOURCE'] +if not data.get('source') and data.get('sixgill_bin'): + path = subprocess.check_output(['sh', '-c', data['sixgill_bin'] + '/xdbkeys file_source.xdb | grep jsapi.cpp']) + data['source'] = path.replace("/js/src/jsapi.cpp", "") + +steps = [ 'dbs', + 'gcTypes', + 'callgraph', + 'gcFunctions', + 'allFunctions', + 'hazards', + 'explain' ] + +if args.list: + for step in steps: + command, outfilename = JOBS[step] + if outfilename: + print("%s -> %s" % (step, outfilename)) + else: + print(step) + sys.exit(0) + +for step in steps: + command, outfiles = JOBS[step] + if isinstance(outfiles, basestring): + data[step] = outfiles + else: + outfile = 0 + for (i, name) in out_indexes(command): + data[name] = outfiles[outfile] + outfile += 1 + assert len(outfiles) == outfile, 'step \'%s\': mismatched number of output files (%d) and params (%d)' % (step, outfile, len(outfiles)) + +if args.step: + steps = steps[steps.index(args.step):] + +if args.upto: + steps = steps[:steps.index(args.upto)+1] + +for step in steps: + run_job(step, data) diff --git a/js/src/devtools/rootAnalysis/analyzeRoots.js b/js/src/devtools/rootAnalysis/analyzeRoots.js new file mode 100644 index 000000000..61b46e387 --- /dev/null +++ b/js/src/devtools/rootAnalysis/analyzeRoots.js @@ -0,0 +1,871 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('CFG.js'); + +var sourceRoot = (os.getenv('SOURCE') || '') + '/' + +var functionName; +var functionBodies; + +if (typeof scriptArgs[0] != 'string' || typeof scriptArgs[1] != 'string') + throw "Usage: analyzeRoots.js [-f function_name] <gcFunctions.lst> <gcEdges.txt> <suppressedFunctions.lst> <gcTypes.txt> <typeInfo.txt> [start end [tmpfile]]"; + +var theFunctionNameToFind; +if (scriptArgs[0] == '--function' || scriptArgs[0] == '-f') { + theFunctionNameToFind = scriptArgs[1]; + scriptArgs = scriptArgs.slice(2); +} + +var gcFunctionsFile = scriptArgs[0] || "gcFunctions.lst"; +var gcEdgesFile = scriptArgs[1] || "gcEdges.txt"; +var suppressedFunctionsFile = scriptArgs[2] || "suppressedFunctions.lst"; +var gcTypesFile = scriptArgs[3] || "gcTypes.txt"; +var typeInfoFile = scriptArgs[4] || "typeInfo.txt"; +var batch = (scriptArgs[5]|0) || 1; +var numBatches = (scriptArgs[6]|0) || 1; +var tmpfile = scriptArgs[7] || "tmp.txt"; + +GCSuppressionTypes = loadTypeInfo(typeInfoFile)["Suppress GC"] || []; + +var gcFunctions = {}; +var text = snarf("gcFunctions.lst").split("\n"); +assert(text.pop().length == 0); +for (var line of text) + gcFunctions[mangled(line)] = true; + +var suppressedFunctions = {}; +var text = snarf(suppressedFunctionsFile).split("\n"); +assert(text.pop().length == 0); +for (var line of text) { + suppressedFunctions[line] = true; +} +text = null; + +var gcEdges = {}; +text = snarf(gcEdgesFile).split('\n'); +assert(text.pop().length == 0); +for (var line of text) { + var [ block, edge, func ] = line.split(" || "); + if (!(block in gcEdges)) + gcEdges[block] = {} + gcEdges[block][edge] = func; +} +text = null; + +var match; +var gcThings = {}; +var gcPointers = {}; + +text = snarf(gcTypesFile).split("\n"); +for (var line of text) { + if (match = /^GCThing: (.*)/.exec(line)) + gcThings[match[1]] = true; + if (match = /^GCPointer: (.*)/.exec(line)) + gcPointers[match[1]] = true; +} +text = null; + +function isGCType(type) +{ + if (type.Kind == "CSU") + return type.Name in gcThings; + else if (type.Kind == "Array") + return isGCType(type.Type); + return false; +} + +function isUnrootedType(type) +{ + if (type.Kind == "Pointer") + return isGCType(type.Type); + else if (type.Kind == "Array") + return isUnrootedType(type.Type); + else if (type.Kind == "CSU") + return type.Name in gcPointers; + else + return false; +} + +function expressionUsesVariable(exp, variable) +{ + if (exp.Kind == "Var" && sameVariable(exp.Variable, variable)) + return true; + if (!("Exp" in exp)) + return false; + for (var childExp of exp.Exp) { + if (expressionUsesVariable(childExp, variable)) + return true; + } + return false; +} + +function expressionUsesVariableContents(exp, variable) +{ + if (!("Exp" in exp)) + return false; + for (var childExp of exp.Exp) { + if (childExp.Kind == 'Drf') { + if (expressionUsesVariable(childExp, variable)) + return true; + } else if (expressionUsesVariableContents(childExp, variable)) { + return true; + } + } + return false; +} + +// Detect simple |return nullptr;| statements. +function isReturningImmobileValue(edge, variable) +{ + if (variable.Kind == "Return") { + if (edge.Exp[0].Kind == "Var" && sameVariable(edge.Exp[0].Variable, variable)) { + if (edge.Exp[1].Kind == "Int" && edge.Exp[1].String == "0") { + return true; + } + } + } + return false; +} + +// If the edge uses the given variable's value, return the earliest point at +// which the use is definite. Usually, that means the source of the edge +// (anything that reaches that source point will end up using the variable, but +// there may be other ways to reach the destination of the edge.) +// +// Return values are implicitly used at the very last point in the function. +// This makes a difference: if an RAII class GCs in its destructor, we need to +// start looking at the final point in the function, not one point back from +// that, since that would skip over the GCing call. +// +// Note that this returns true only if the variable's incoming value is used. +// So this would return false for 'obj': +// +// obj = someFunction(); +// +// but these would return true: +// +// obj = someFunction(obj); +// obj->foo = someFunction(); +// +function edgeUsesVariable(edge, variable, body) +{ + if (ignoreEdgeUse(edge, variable, body)) + return 0; + + if (variable.Kind == "Return" && body.Index[1] == edge.Index[1] && body.BlockId.Kind == "Function") + return edge.Index[1]; // Last point in function body uses the return value. + + var src = edge.Index[0]; + + switch (edge.Kind) { + + case "Assign": { + if (isReturningImmobileValue(edge, variable)) + return 0; + const [lhs, rhs] = edge.Exp; + if (expressionUsesVariable(rhs, variable)) + return src; + if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable)) + return src; + return 0; + } + + case "Assume": + return expressionUsesVariableContents(edge.Exp[0], variable) ? src : 0; + + case "Call": { + const callee = edge.Exp[0]; + if (expressionUsesVariable(callee, variable)) + return src; + if ("PEdgeCallInstance" in edge) { + if (expressionUsesVariable(edge.PEdgeCallInstance.Exp, variable)) { + if (edgeKillsVariable(edge, variable)) { + // If the variable is being constructed, then the incoming + // value is not used here; it didn't exist before + // construction. (The analysis doesn't get told where + // variables are defined, so must infer it from + // construction. If the variable does not have a + // constructor, its live range may be larger than it really + // ought to be if it is defined within a loop body, but + // that is conservative.) + } else { + return src; + } + } + } + if ("PEdgeCallArguments" in edge) { + for (var exp of edge.PEdgeCallArguments.Exp) { + if (expressionUsesVariable(exp, variable)) + return src; + } + } + if (edge.Exp.length == 1) + return 0; + + // Assigning call result to a variable. + const lhs = edge.Exp[1]; + if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable)) + return src; + return 0; + } + + case "Loop": + return 0; + + default: + assert(false); + } +} + +function expressionIsVariableAddress(exp, variable) +{ + while (exp.Kind == "Fld") + exp = exp.Exp[0]; + return exp.Kind == "Var" && sameVariable(exp.Variable, variable); +} + +function edgeTakesVariableAddress(edge, variable, body) +{ + if (ignoreEdgeUse(edge, variable, body)) + return false; + if (ignoreEdgeAddressTaken(edge)) + return false; + switch (edge.Kind) { + case "Assign": + return expressionIsVariableAddress(edge.Exp[1], variable); + case "Call": + if ("PEdgeCallArguments" in edge) { + for (var exp of edge.PEdgeCallArguments.Exp) { + if (expressionIsVariableAddress(exp, variable)) + return true; + } + } + return false; + default: + return false; + } +} + +function expressionIsVariable(exp, variable) +{ + return exp.Kind == "Var" && sameVariable(exp.Variable, variable); +} + +// Return whether the edge kills (overwrites) the variable's incoming value. +// Examples of killing 'obj': +// +// obj = foo; +// obj = foo(); +// obj = foo(obj); // uses previous value but then kills it +// SomeClass obj(true, 1); // constructor +// +function edgeKillsVariable(edge, variable) +{ + // Direct assignments kill their lhs: var = value + if (edge.Kind == "Assign") { + const [lhs] = edge.Exp; + return (expressionIsVariable(lhs, variable) && + !isReturningImmobileValue(edge, variable)); + } + + if (edge.Kind != "Call") + return false; + + // Assignments of call results kill their lhs. + if (1 in edge.Exp) { + var lhs = edge.Exp[1]; + if (expressionIsVariable(lhs, variable)) + return true; + } + + // Constructor calls kill their 'this' value. + if ("PEdgeCallInstance" in edge) { + var instance = edge.PEdgeCallInstance.Exp; + + // Kludge around incorrect dereference on some constructor calls. + if (instance.Kind == "Drf") + instance = instance.Exp[0]; + + if (!expressionIsVariable(instance, variable)) + return false; + + var callee = edge.Exp[0]; + if (callee.Kind != "Var") + return false; + + assert(callee.Variable.Kind == "Func"); + var calleeName = readable(callee.Variable.Name[0]); + + // Constructor calls include the text 'Name::Name(' or 'Name<...>::Name('. + var openParen = calleeName.indexOf('('); + if (openParen < 0) + return false; + calleeName = calleeName.substring(0, openParen); + + var lastColon = calleeName.lastIndexOf('::'); + if (lastColon < 0) + return false; + var constructorName = calleeName.substr(lastColon + 2); + calleeName = calleeName.substr(0, lastColon); + + var lastTemplateOpen = calleeName.lastIndexOf('<'); + if (lastTemplateOpen >= 0) + calleeName = calleeName.substr(0, lastTemplateOpen); + + if (calleeName.endsWith(constructorName)) + return true; + } + + return false; +} + +function edgeCanGC(edge) +{ + if (edge.Kind != "Call") + return false; + + var callee = edge.Exp[0]; + + while (callee.Kind == "Drf") + callee = callee.Exp[0]; + + if (callee.Kind == "Var") { + var variable = callee.Variable; + + if (variable.Kind == "Func") { + var callee = mangled(variable.Name[0]); + if ((callee in gcFunctions) || ((callee + internalMarker) in gcFunctions)) + return "'" + variable.Name[0] + "'"; + return null; + } + + var varName = variable.Name[0]; + return indirectCallCannotGC(functionName, varName) ? null : "*" + varName; + } + + if (callee.Kind == "Fld") { + var field = callee.Field; + var csuName = field.FieldCSU.Type.Name; + var fullFieldName = csuName + "." + field.Name[0]; + if (fieldCallCannotGC(csuName, fullFieldName)) + return null; + return (fullFieldName in suppressedFunctions) ? null : fullFieldName; + } +} + +// Search recursively through predecessors from the use of a variable's value, +// returning whether a GC call is reachable (in the reverse direction; this +// means that the variable use is reachable from the GC call, and therefore the +// variable is live after the GC call), along with some additional information. +// What info we want depends on whether the variable turns out to be live +// across a GC call. We are looking for both hazards (unrooted variables live +// across GC calls) and unnecessary roots (rooted variables that have no GC +// calls in their live ranges.) +// +// If not: +// +// - 'minimumUse': the earliest point in each body that uses the variable, for +// reporting on unnecessary roots. +// +// If so: +// +// - 'why': a path from the GC call to a use of the variable after the GC +// call, chained through a 'why' field in the returned edge descriptor +// +// - 'gcInfo': a direct pointer to the GC call edge +// +function findGCBeforeValueUse(start_body, start_point, suppressed, variable) +{ + // Scan through all edges preceding an unrooted variable use, using an + // explicit worklist, looking for a GC call. A worklist contains an + // incoming edge together with a description of where it or one of its + // successors GC'd (if any). + + var bodies_visited = new Map(); + + let worklist = [{body: start_body, ppoint: start_point, preGCLive: false, gcInfo: null, why: null}]; + while (worklist.length) { + // Grab an entry off of the worklist, representing a point within the + // CFG identified by <body,ppoint>. If this point has a descendant + // later in the CFG that can GC, gcInfo will be set to the information + // about that GC call. + + var entry = worklist.pop(); + var { body, ppoint, gcInfo, preGCLive } = entry; + + // Handle the case where there are multiple ways to reach this point + // (traversing backwards). + var visited = bodies_visited.get(body); + if (!visited) + bodies_visited.set(body, visited = new Map()); + if (visited.has(ppoint)) { + var seenEntry = visited.get(ppoint); + + // This point already knows how to GC through some other path, so + // we have nothing new to learn. (The other path will consider the + // predecessors.) + if (seenEntry.gcInfo) + continue; + + // If this worklist's entry doesn't know of any way to GC, then + // there's no point in continuing the traversal through it. Perhaps + // another edge will be found that *can* GC; otherwise, the first + // route to the point will traverse through predecessors. + // + // Note that this means we may visit a point more than once, if the + // first time we visit we don't have a known reachable GC call and + // the second time we do. + if (!gcInfo) + continue; + } + visited.set(ppoint, {body: body, gcInfo: gcInfo}); + + // Check for hitting the entry point of the current body (which may be + // the outer function or a loop within it.) + if (ppoint == body.Index[0]) { + if (body.BlockId.Kind == "Loop") { + // Propagate to outer body parents that enter the loop body. + if ("BlockPPoint" in body) { + for (var parent of body.BlockPPoint) { + var found = false; + for (var xbody of functionBodies) { + if (sameBlockId(xbody.BlockId, parent.BlockId)) { + assert(!found); + found = true; + worklist.push({body: xbody, ppoint: parent.Index, + gcInfo: gcInfo, why: entry}); + } + } + assert(found); + } + } + + // Also propagate to the *end* of this loop, for the previous + // iteration. + worklist.push({body: body, ppoint: body.Index[1], + gcInfo: gcInfo, why: entry}); + } else if (variable.Kind == "Arg" && gcInfo) { + // The scope of arguments starts at the beginning of the + // function + return entry; + } else if (entry.preGCLive) { + // We didn't find a "good" explanation beginning of the live + // range, but we do know the variable was live across the GC. + // This can happen if the live range started when a variable is + // used as a retparam. + return entry; + } + } + + var predecessors = getPredecessors(body); + if (!(ppoint in predecessors)) + continue; + + for (var edge of predecessors[ppoint]) { + var source = edge.Index[0]; + + var edge_kills = edgeKillsVariable(edge, variable); + var edge_uses = edgeUsesVariable(edge, variable, body); + + if (edge_kills || edge_uses) { + if (!body.minimumUse || source < body.minimumUse) + body.minimumUse = source; + } + + if (edge_kills) { + // This is a beginning of the variable's live range. If we can + // reach a GC call from here, then we're done -- we have a path + // from the beginning of the live range, through the GC call, + // to a use after the GC call that proves its live range + // extends at least that far. + if (gcInfo) + return {body: body, ppoint: source, gcInfo: gcInfo, why: entry }; + + // Otherwise, keep searching through the graph, but truncate + // this particular branch of the search at this edge. + continue; + } + + var src_gcInfo = gcInfo; + var src_preGCLive = preGCLive; + if (!gcInfo && !(source in body.suppressed) && !suppressed) { + var gcName = edgeCanGC(edge, body); + if (gcName) + src_gcInfo = {name:gcName, body:body, ppoint:source}; + } + + if (edge_uses) { + // The live range starts at least this far back, so we're done + // for the same reason as with edge_kills. The only difference + // is that a GC on this edge indicates a hazard, whereas if + // we're killing a live range in the GC call then it's not live + // *across* the call. + // + // However, we may want to generate a longer usage chain for + // the variable than is minimally necessary. For example, + // consider: + // + // Value v = f(); + // if (v.isUndefined()) + // return false; + // gc(); + // return v; + // + // The call to .isUndefined() is considered to be a use and + // therefore indicates that v must be live at that point. But + // it's more helpful to the user to continue the 'why' path to + // include the ancestor where the value was generated. So we + // will only return here if edge.Kind is Assign; otherwise, + // we'll pass a "preGCLive" value up through the worklist to + // remember that the variable *is* alive before the GC and so + // this function should be returning a true value even if we + // don't find an assignment. + + if (src_gcInfo) { + src_preGCLive = true; + if (edge.Kind == 'Assign') + return {body:body, ppoint:source, gcInfo:src_gcInfo, why:entry}; + } + } + + if (edge.Kind == "Loop") { + // Additionally propagate the search into a loop body, starting + // with the exit point. + var found = false; + for (var xbody of functionBodies) { + if (sameBlockId(xbody.BlockId, edge.BlockId)) { + assert(!found); + found = true; + worklist.push({body:xbody, ppoint:xbody.Index[1], + preGCLive: src_preGCLive, gcInfo:src_gcInfo, + why:entry}); + } + } + assert(found); + // Don't continue to predecessors here without going through + // the loop. (The points in this body that enter the loop will + // be traversed when we reach the entry point of the loop.) + break; + } + + // Propagate the search to the predecessors of this edge. + worklist.push({body:body, ppoint:source, + preGCLive: src_preGCLive, gcInfo:src_gcInfo, + why:entry}); + } + } + + return null; +} + +function variableLiveAcrossGC(suppressed, variable) +{ + // A variable is live across a GC if (1) it is used by an edge (as in, it + // was at least initialized), and (2) it is used after a GC in a successor + // edge. + + for (var body of functionBodies) + body.minimumUse = 0; + + for (var body of functionBodies) { + if (!("PEdge" in body)) + continue; + for (var edge of body.PEdge) { + // Examples: + // + // JSObject* obj = NewObject(); + // cangc(); + // obj = NewObject(); <-- mentions 'obj' but kills previous value + // + // This is not a hazard. Contrast this with: + // + // JSObject* obj = NewObject(); + // cangc(); + // obj = LookAt(obj); <-- uses 'obj' and kills previous value + // + // This is a hazard; the initial value of obj is live across + // cangc(). And a third possibility: + // + // JSObject* obj = NewObject(); + // obj = CopyObject(obj); + // + // This is not a hazard, because even though CopyObject can GC, obj + // is not live across it. (obj is live before CopyObject, and + // probably after, but not across.) There may be a hazard within + // CopyObject, of course. + // + + var usePoint = edgeUsesVariable(edge, variable, body); + if (usePoint) { + var call = findGCBeforeValueUse(body, usePoint, suppressed, variable); + if (!call) + continue; + + call.afterGCUse = usePoint; + return call; + } + } + } + return null; +} + +// An unrooted variable has its address stored in another variable via +// assignment, or passed into a function that can GC. If the address is +// assigned into some other variable, we can't track it to see if it is held +// live across a GC. If it is passed into a function that can GC, then it's +// sort of like a Handle to an unrooted location, and the callee could GC +// before overwriting it or rooting it. +function unsafeVariableAddressTaken(suppressed, variable) +{ + for (var body of functionBodies) { + if (!("PEdge" in body)) + continue; + for (var edge of body.PEdge) { + if (edgeTakesVariableAddress(edge, variable, body)) { + if (edge.Kind == "Assign" || (!suppressed && edgeCanGC(edge))) + return {body:body, ppoint:edge.Index[0]}; + } + } + } + return null; +} + +// Read out the brief (non-JSON, semi-human-readable) CFG description for the +// given function and store it. +function loadPrintedLines(functionName) +{ + assert(!os.system("xdbfind src_body.xdb '" + functionName + "' > " + tmpfile)); + var lines = snarf(tmpfile).split('\n'); + + for (var body of functionBodies) + body.lines = []; + + // Distribute lines of output to the block they originate from. + var currentBody = null; + for (var line of lines) { + if (/^block:/.test(line)) { + if (match = /:(loop#[\d#]+)/.exec(line)) { + var loop = match[1]; + var found = false; + for (var body of functionBodies) { + if (body.BlockId.Kind == "Loop" && body.BlockId.Loop == loop) { + assert(!found); + found = true; + currentBody = body; + } + } + assert(found); + } else { + for (var body of functionBodies) { + if (body.BlockId.Kind == "Function") + currentBody = body; + } + } + } + if (currentBody) + currentBody.lines.push(line); + } +} + +function findLocation(body, ppoint, opts={brief: false}) +{ + var location = body.PPoint[ppoint - 1].Location; + var file = location.CacheString; + + if (file.indexOf(sourceRoot) == 0) + file = file.substring(sourceRoot.length); + + if (opts.brief) { + var m = /.*\/(.*)/.exec(file); + if (m) + file = m[1]; + } + + return file + ":" + location.Line; +} + +function locationLine(text) +{ + if (match = /:(\d+)$/.exec(text)) + return match[1]; + return 0; +} + +function printEntryTrace(functionName, entry) +{ + var gcPoint = entry.gcInfo ? entry.gcInfo.ppoint : 0; + + if (!functionBodies[0].lines) + loadPrintedLines(functionName); + + while (entry) { + var ppoint = entry.ppoint; + var lineText = findLocation(entry.body, ppoint, {"brief": true}); + + var edgeText = ""; + if (entry.why && entry.why.body == entry.body) { + // If the next point in the trace is in the same block, look for an edge between them. + var next = entry.why.ppoint; + + if (!entry.body.edgeTable) { + var table = {}; + entry.body.edgeTable = table; + for (var line of entry.body.lines) { + if (match = /^\w+\((\d+,\d+),/.exec(line)) + table[match[1]] = line; // May be multiple? + } + if (entry.body.BlockId.Kind == 'Loop') { + const [startPoint, endPoint] = entry.body.Index; + table[`${endPoint},${startPoint}`] = '(loop to next iteration)'; + } + } + + edgeText = entry.body.edgeTable[ppoint + "," + next]; + assert(edgeText); + if (ppoint == gcPoint) + edgeText += " [[GC call]]"; + } else { + // Look for any outgoing edge from the chosen point. + for (var line of entry.body.lines) { + if (match = /\((\d+),/.exec(line)) { + if (match[1] == ppoint) { + edgeText = line; + break; + } + } + } + if (ppoint == entry.body.Index[1] && entry.body.BlockId.Kind == "Function") + edgeText += " [[end of function]]"; + } + + print(" " + lineText + (edgeText.length ? ": " + edgeText : "")); + entry = entry.why; + } +} + +function isRootedType(type) +{ + return type.Kind == "CSU" && isRootedTypeName(type.Name); +} + +function typeDesc(type) +{ + if (type.Kind == "CSU") { + return type.Name; + } else if ('Type' in type) { + var inner = typeDesc(type.Type); + if (type.Kind == 'Pointer') + return inner + '*'; + else if (type.Kind == 'Array') + return inner + '[]'; + else + return inner + '?'; + } else { + return '???'; + } +} + +function processBodies(functionName) +{ + if (!("DefineVariable" in functionBodies[0])) + return; + var suppressed = (mangled(functionName) in suppressedFunctions); + for (var variable of functionBodies[0].DefineVariable) { + var name; + if (variable.Variable.Kind == "This") + name = "this"; + else if (variable.Variable.Kind == "Return") + name = "<returnvalue>"; + else + name = variable.Variable.Name[0]; + + if (isRootedType(variable.Type)) { + if (!variableLiveAcrossGC(suppressed, variable.Variable)) { + // The earliest use of the variable should be its constructor. + var lineText; + for (var body of functionBodies) { + if (body.minimumUse) { + var text = findLocation(body, body.minimumUse); + if (!lineText || locationLine(lineText) > locationLine(text)) + lineText = text; + } + } + print("\nFunction '" + functionName + "'" + + " has unnecessary root '" + name + "' at " + lineText); + } + } else if (isUnrootedType(variable.Type)) { + var result = variableLiveAcrossGC(suppressed, variable.Variable); + if (result) { + var lineText = findLocation(result.gcInfo.body, result.gcInfo.ppoint); + print("\nFunction '" + functionName + "'" + + " has unrooted '" + name + "'" + + " of type '" + typeDesc(variable.Type) + "'" + + " live across GC call " + result.gcInfo.name + + " at " + lineText); + printEntryTrace(functionName, result); + } + result = unsafeVariableAddressTaken(suppressed, variable.Variable); + if (result) { + var lineText = findLocation(result.body, result.ppoint); + print("\nFunction '" + functionName + "'" + + " takes unsafe address of unrooted '" + name + "'" + + " at " + lineText); + printEntryTrace(functionName, {body:result.body, ppoint:result.ppoint}); + } + } + } +} + +if (batch == 1) + print("Time: " + new Date); + +var xdb = xdbLibrary(); +xdb.open("src_body.xdb"); + +var minStream = xdb.min_data_stream()|0; +var maxStream = xdb.max_data_stream()|0; + +var N = (maxStream - minStream) + 1; +var start = Math.floor((batch - 1) / numBatches * N) + minStream; +var start_next = Math.floor(batch / numBatches * N) + minStream; +var end = start_next - 1; + +function process(name, json) { + functionName = name; + functionBodies = JSON.parse(json); + + for (var body of functionBodies) + body.suppressed = []; + for (var body of functionBodies) { + for (var [pbody, id] of allRAIIGuardedCallPoints(functionBodies, body, isSuppressConstructor)) + pbody.suppressed[id] = true; + } + processBodies(functionName); +} + +if (theFunctionNameToFind) { + var data = xdb.read_entry(theFunctionNameToFind); + var json = data.readString(); + process(theFunctionNameToFind, json); + xdb.free_string(data); + quit(0); +} + +for (var nameIndex = start; nameIndex <= end; nameIndex++) { + var name = xdb.read_key(nameIndex); + var functionName = name.readString(); + var data = xdb.read_entry(name); + xdb.free_string(name); + var json = data.readString(); + try { + process(functionName, json); + } catch (e) { + printErr("Exception caught while handling " + functionName); + throw(e); + } + xdb.free_string(data); +} diff --git a/js/src/devtools/rootAnalysis/annotations.js b/js/src/devtools/rootAnalysis/annotations.js new file mode 100644 index 000000000..5b798516f --- /dev/null +++ b/js/src/devtools/rootAnalysis/annotations.js @@ -0,0 +1,404 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +// RAII types within which we should assume GC is suppressed, eg +// AutoSuppressGC. +var GCSuppressionTypes = []; + +// Ignore calls made through these function pointers +var ignoreIndirectCalls = { + "mallocSizeOf" : true, + "aMallocSizeOf" : true, + "_malloc_message" : true, + "je_malloc_message" : true, + "chunk_dalloc" : true, + "chunk_alloc" : true, + "__conv" : true, + "__convf" : true, + "prerrortable.c:callback_newtable" : true, + "mozalloc_oom.cpp:void (* gAbortHandler)(size_t)" : true, +}; + +function indirectCallCannotGC(fullCaller, fullVariable) +{ + var caller = readable(fullCaller); + + // This is usually a simple variable name, but sometimes a full name gets + // passed through. And sometimes that name is truncated. Examples: + // _ZL13gAbortHandler|mozalloc_oom.cpp:void (* gAbortHandler)(size_t) + // _ZL14pMutexUnlockFn|umutex.cpp:void (* pMutexUnlockFn)(const void* + var name = readable(fullVariable); + + if (name in ignoreIndirectCalls) + return true; + + if (name == "mapper" && caller == "ptio.c:pt_MapError") + return true; + + if (name == "params" && caller == "PR_ExplodeTime") + return true; + + if (name == "op" && /GetWeakmapKeyDelegate/.test(caller)) + return true; + + // hook called during script finalization which cannot GC. + if (/CallDestroyScriptHook/.test(caller)) + return true; + + // template method called during marking and hence cannot GC + if (name == "op" && caller.indexOf("bool js::WeakMap<Key, Value, HashPolicy>::keyNeedsMark(JSObject*)") != -1) + { + return true; + } + + return false; +} + +// Ignore calls through functions pointers with these types +var ignoreClasses = { + "JSStringFinalizer" : true, + "SprintfState" : true, + "SprintfStateStr" : true, + "JSLocaleCallbacks" : true, + "JSC::ExecutableAllocator" : true, + "PRIOMethods": true, + "XPCOMFunctions" : true, // I'm a little unsure of this one + "_MD_IOVector" : true, + "malloc_table_t": true, // replace_malloc + "malloc_hook_table_t": true, // replace_malloc +}; + +// Ignore calls through TYPE.FIELD, where TYPE is the class or struct name containing +// a function pointer field named FIELD. +var ignoreCallees = { + "js::ClassOps.trace" : true, + "js::ClassOps.finalize" : true, + "JSRuntime.destroyPrincipals" : true, + "icu_50::UObject.__deleting_dtor" : true, // destructors in ICU code can't cause GC + "mozilla::CycleCollectedJSContext.DescribeCustomObjects" : true, // During tracing, cannot GC. + "mozilla::CycleCollectedJSContext.NoteCustomGCThingXPCOMChildren" : true, // During tracing, cannot GC. + "PLDHashTableOps.hashKey" : true, + "z_stream_s.zfree" : true, + "z_stream_s.zalloc" : true, + "GrGLInterface.fCallback" : true, + "std::strstreambuf._M_alloc_fun" : true, + "std::strstreambuf._M_free_fun" : true, + "struct js::gc::Callback<void (*)(JSContext*, void*)>.op" : true, + "mozilla::ThreadSharedFloatArrayBufferList::Storage.mFree" : true, +}; + +function fieldCallCannotGC(csu, fullfield) +{ + if (csu in ignoreClasses) + return true; + if (fullfield in ignoreCallees) + return true; + return false; +} + +function ignoreEdgeUse(edge, variable, body) +{ + // Horrible special case for ignoring a false positive in xptcstubs: there + // is a local variable 'paramBuffer' holding an array of nsXPTCMiniVariant + // on the stack, which appears to be live across a GC call because its + // constructor is called when the array is initialized, even though the + // constructor is a no-op. So we'll do a very narrow exclusion for the use + // that incorrectly started the live range, which was basically "__temp_1 = + // paramBuffer". + // + // By scoping it so narrowly, we can detect most hazards that would be + // caused by modifications in the PrepareAndDispatch code. It just barely + // avoids having a hazard already. + if (('Name' in variable) && (variable.Name[0] == 'paramBuffer')) { + if (body.BlockId.Kind == 'Function' && body.BlockId.Variable.Name[0] == 'PrepareAndDispatch') + if (edge.Kind == 'Assign' && edge.Type.Kind == 'Pointer') + if (edge.Exp[0].Kind == 'Var' && edge.Exp[1].Kind == 'Var') + if (edge.Exp[1].Variable.Kind == 'Local' && edge.Exp[1].Variable.Name[0] == 'paramBuffer') + return true; + } + + // Functions which should not be treated as using variable. + if (edge.Kind == "Call") { + var callee = edge.Exp[0]; + if (callee.Kind == "Var") { + var name = callee.Variable.Name[0]; + if (/~DebugOnly/.test(name)) + return true; + if (/~ScopedThreadSafeStringInspector/.test(name)) + return true; + } + } + + return false; +} + +function ignoreEdgeAddressTaken(edge) +{ + // Functions which may take indirect pointers to unrooted GC things, + // but will copy them into rooted locations before calling anything + // that can GC. These parameters should usually be replaced with + // handles or mutable handles. + if (edge.Kind == "Call") { + var callee = edge.Exp[0]; + if (callee.Kind == "Var") { + var name = callee.Variable.Name[0]; + if (/js::Invoke\(/.test(name)) + return true; + } + } + + return false; +} + +// Return whether csu.method is one that we claim can never GC. +function isSuppressedVirtualMethod(csu, method) +{ + return csu == "nsISupports" && (method == "AddRef" || method == "Release"); +} + +// Ignore calls of these functions (so ignore any stack containing these) +var ignoreFunctions = { + "ptio.c:pt_MapError" : true, + "je_malloc_printf" : true, + "vprintf_stderr" : true, + "PR_ExplodeTime" : true, + "PR_ErrorInstallTable" : true, + "PR_SetThreadPrivate" : true, + "JSObject* js::GetWeakmapKeyDelegate(JSObject*)" : true, // FIXME: mark with AutoSuppressGCAnalysis instead + "uint8 NS_IsMainThread()" : true, + + // Has an indirect call under it by the name "__f", which seemed too + // generic to ignore by itself. + "void* std::_Locale_impl::~_Locale_impl(int32)" : true, + + // Bug 1056410 - devirtualization prevents the standard nsISupports::Release heuristic from working + "uint32 nsXPConnect::Release()" : true, + + // FIXME! + "NS_LogInit": true, + "NS_LogTerm": true, + "NS_LogAddRef": true, + "NS_LogRelease": true, + "NS_LogCtor": true, + "NS_LogDtor": true, + "NS_LogCOMPtrAddRef": true, + "NS_LogCOMPtrRelease": true, + + // FIXME! + "NS_DebugBreak": true, + + // These are a little overzealous -- these destructors *can* GC if they end + // up wrapping a pending exception. See bug 898815 for the heavyweight fix. + "void js::AutoCompartment::~AutoCompartment(int32)" : true, + "void JSAutoCompartment::~JSAutoCompartment(int32)" : true, + + // The nsScriptNameSpaceManager functions can't actually GC. They + // just use a PLDHashTable which has function pointers, which makes the + // analysis think maybe they can. + "nsGlobalNameStruct* nsScriptNameSpaceManager::LookupNavigatorName(nsAString_internal*)": true, + "nsGlobalNameStruct* nsScriptNameSpaceManager::LookupName(nsAString_internal*, uint16**)": true, + + // Similar to heap snapshot mock classes, and GTests below. This posts a + // synchronous runnable when a GTest fails, and we are pretty sure that the + // particular runnable it posts can't even GC, but the analysis isn't + // currently smart enough to determine that. In either case, this is (a) + // only in GTests, and (b) only when the Gtest has already failed. We have + // static and dynamic checks for no GC in the non-test code, and in the test + // code we fall back to only the dynamic checks. + "void test::RingbufferDumper::OnTestPartResult(testing::TestPartResult*)" : true, + + "float64 JS_GetCurrentEmbedderTime()" : true, + + "uint64 js::TenuringTracer::moveObjectToTenured(JSObject*, JSObject*, int32)" : true, + "uint32 js::TenuringTracer::moveObjectToTenured(JSObject*, JSObject*, int32)" : true, + "void js::Nursery::freeMallocedBuffers()" : true, + + // It would be cool to somehow annotate that nsTHashtable<T> will use + // nsTHashtable<T>::s_MatchEntry for its matchEntry function pointer, but + // there is no mechanism for that. So we will just annotate a particularly + // troublesome logging-related usage. + "EntryType* nsTHashtable<EntryType>::PutEntry(nsTHashtable<EntryType>::KeyType, const fallible_t&) [with EntryType = nsBaseHashtableET<nsCharPtrHashKey, nsAutoPtr<mozilla::LogModule> >; nsTHashtable<EntryType>::KeyType = const char*; nsTHashtable<EntryType>::fallible_t = mozilla::fallible_t]" : true, + "EntryType* nsTHashtable<EntryType>::GetEntry(nsTHashtable<EntryType>::KeyType) const [with EntryType = nsBaseHashtableET<nsCharPtrHashKey, nsAutoPtr<mozilla::LogModule> >; nsTHashtable<EntryType>::KeyType = const char*]" : true, + "EntryType* nsTHashtable<EntryType>::PutEntry(nsTHashtable<EntryType>::KeyType) [with EntryType = nsBaseHashtableET<nsPtrHashKey<const mozilla::BlockingResourceBase>, nsAutoPtr<mozilla::DeadlockDetector<mozilla::BlockingResourceBase>::OrderingEntry> >; nsTHashtable<EntryType>::KeyType = const mozilla::BlockingResourceBase*]" : true, + "EntryType* nsTHashtable<EntryType>::GetEntry(nsTHashtable<EntryType>::KeyType) const [with EntryType = nsBaseHashtableET<nsPtrHashKey<const mozilla::BlockingResourceBase>, nsAutoPtr<mozilla::DeadlockDetector<mozilla::BlockingResourceBase>::OrderingEntry> >; nsTHashtable<EntryType>::KeyType = const mozilla::BlockingResourceBase*]" : true, + + // The big hammers. + "PR_GetCurrentThread" : true, + "calloc" : true, +}; + +function extraGCFunctions() { + return ["ffi_call"].filter(f => f in readableNames); +} +function isProtobuf(name) +{ + return name.match(/\bgoogle::protobuf\b/) || + name.match(/\bmozilla::devtools::protobuf\b/); +} + +function isHeapSnapshotMockClass(name) +{ + return name.match(/\bMockWriter\b/) || + name.match(/\bMockDeserializedNode\b/); +} + +function isGTest(name) +{ + return name.match(/\btesting::/); +} + +function ignoreGCFunction(mangled) +{ + assert(mangled in readableNames, mangled + " not in readableNames"); + var fun = readableNames[mangled][0]; + + if (fun in ignoreFunctions) + return true; + + // The protobuf library, and [de]serialization code generated by the + // protobuf compiler, uses a _ton_ of function pointers but they are all + // internal. Easiest to just ignore that mess here. + if (isProtobuf(fun)) + return true; + + // Ignore anything that goes through heap snapshot GTests or mocked classes + // used in heap snapshot GTests. GTest and GMock expose a lot of virtual + // methods and function pointers that could potentially GC after an + // assertion has already failed (depending on user-provided code), but don't + // exhibit that behavior currently. For non-test code, we have dynamic and + // static checks that ensure we don't GC. However, for test code we opt out + // of static checks here, because of the above stated GMock/GTest issues, + // and rely on only the dynamic checks provided by AutoAssertCannotGC. + if (isHeapSnapshotMockClass(fun) || isGTest(fun)) + return true; + + // Templatized function + if (fun.indexOf("void nsCOMPtr<T>::Assert_NoQueryNeeded()") >= 0) + return true; + + // These call through an 'op' function pointer. + if (fun.indexOf("js::WeakMap<Key, Value, HashPolicy>::getDelegate(") >= 0) + return true; + + // XXX modify refillFreeList<NoGC> to not need data flow analysis to understand it cannot GC. + if (/refillFreeList/.test(fun) && /\(js::AllowGC\)0u/.test(fun)) + return true; + return false; +} + +function stripUCSAndNamespace(name) +{ + name = name.replace(/(struct|class|union|const) /g, ""); + name = name.replace(/(js::ctypes::|js::|JS::|mozilla::dom::|mozilla::)/g, ""); + return name; +} + +function isRootedGCTypeName(name) +{ + return (name == "JSAddonId"); +} + +function isRootedGCPointerTypeName(name) +{ + name = stripUCSAndNamespace(name); + + if (name.startsWith('MaybeRooted<')) + return /\(js::AllowGC\)1u>::RootType/.test(name); + + if (name == "ErrorResult" || + name == "JSErrorResult" || + name == "WrappableJSErrorResult" || + name == "binding_detail::FastErrorResult" || + name == "IgnoredErrorResult" || + name == "frontend::TokenStream" || + name == "frontend::TokenStream::Position" || + name == "ModuleValidator") + { + return true; + } + + return name.startsWith('Rooted') || name.startsWith('PersistentRooted'); +} + +function isRootedTypeName(name) +{ + return isRootedGCTypeName(name) || isRootedGCPointerTypeName(name); +} + +function isUnsafeStorage(typeName) +{ + typeName = stripUCSAndNamespace(typeName); + return typeName.startsWith('UniquePtr<'); +} + +function isSuppressConstructor(edgeType, varName) +{ + // Check whether this could be a constructor + if (edgeType.Kind != 'Function') + return false; + if (!('TypeFunctionCSU' in edgeType)) + return false; + if (edgeType.Type.Kind != 'Void') + return false; + + // Check whether the type is a known suppression type. + var type = edgeType.TypeFunctionCSU.Type.Name; + if (GCSuppressionTypes.indexOf(type) == -1) + return false; + + // And now make sure this is the constructor, not some other method on a + // suppression type. varName[0] contains the qualified name. + var [ mangled, unmangled ] = splitFunction(varName[0]); + if (mangled.search(/C\dE/) == -1) + return false; // Mangled names of constructors have C<num>E + var m = unmangled.match(/([~\w]+)(?:<.*>)?\(/); + if (!m) + return false; + var type_stem = type.replace(/\w+::/g, '').replace(/\<.*\>/g, ''); + return m[1] == type_stem; +} + +// nsISupports subclasses' methods may be scriptable (or overridden +// via binary XPCOM), and so may GC. But some fields just aren't going +// to get overridden with something that can GC. +function isOverridableField(initialCSU, csu, field) +{ + if (csu != 'nsISupports') + return false; + + // Now that binary XPCOM is dead, all these annotations should be replaced + // with something based on bug 1347999. + if (field == 'GetCurrentJSContext') + return false; + if (field == 'IsOnCurrentThread') + return false; + if (field == 'GetNativeContext') + return false; + if (field == "GetGlobalJSObject") + return false; + if (field == "GetIsMainThread") + return false; + if (initialCSU == 'nsIXPConnectJSObjectHolder' && field == 'GetJSObject') + return false; + if (initialCSU == 'nsIXPConnect' && field == 'GetSafeJSContext') + return false; + + // nsIScriptSecurityManager is not [builtinclass], but smaug says "the + // interface definitely should be builtinclass", which is good enough. + if (initialCSU == 'nsIScriptSecurityManager' && field == 'IsSystemPrincipal') + return false; + + if (initialCSU == 'nsIScriptContext') { + if (field == 'GetWindowProxy' || field == 'GetWindowProxyPreserveColor') + return false; + } + return true; +} + +function listNonGCPointers() { + return [ + // Safe only because jsids are currently only made from pinned strings. + 'NPIdentifier', + ]; +} diff --git a/js/src/devtools/rootAnalysis/build.js b/js/src/devtools/rootAnalysis/build.js new file mode 100755 index 000000000..d934c5663 --- /dev/null +++ b/js/src/devtools/rootAnalysis/build.js @@ -0,0 +1,11 @@ +#!/bin/sh + +set -e + +cd $SOURCE +make -f client.mk configure +make -C $ANALYZED_OBJDIR export +./mach build -X nsprpub mfbt memory memory/mozalloc modules/zlib mozglue js/src xpcom/glue js/ductwork/debugger js/ipc js/xpconnect/loader js/xpconnect/wrappers js/xpconnect/src +status=$? +echo "[[[[ build.js complete, exit code $status ]]]]" +exit $status diff --git a/js/src/devtools/rootAnalysis/build/gcc-b2g.manifest b/js/src/devtools/rootAnalysis/build/gcc-b2g.manifest new file mode 100644 index 000000000..0d5eeb050 --- /dev/null +++ b/js/src/devtools/rootAnalysis/build/gcc-b2g.manifest @@ -0,0 +1,11 @@ +[ +{ +"version": "gcc 4.9.3", +"size": 102421980, +"visibility": "public", +"digest": "f25292aa93dc449e0472eee511c0ac15b5f1a4272ab76cf53ce5d20dc57f29e83da49ae1a9d9e994192647f75e13ae60f75ba2ac3cb9d26d5f5d6cabf88de921", +"algorithm": "sha512", +"filename": "gcc.tar.xz", +"unpack": true +} +] diff --git a/js/src/devtools/rootAnalysis/build/gcc.manifest b/js/src/devtools/rootAnalysis/build/gcc.manifest new file mode 100644 index 000000000..21b570f1c --- /dev/null +++ b/js/src/devtools/rootAnalysis/build/gcc.manifest @@ -0,0 +1,19 @@ +[ +{ +"version": "gcc 4.9.3", +"size": 102421980, +"visibility": "public", +"digest": "f25292aa93dc449e0472eee511c0ac15b5f1a4272ab76cf53ce5d20dc57f29e83da49ae1a9d9e994192647f75e13ae60f75ba2ac3cb9d26d5f5d6cabf88de921", +"algorithm": "sha512", +"filename": "gcc.tar.xz", +"unpack": true +}, +{ +"size": 12072532, +"digest": "3915f8ec396c56a8a92e6f9695b70f09ce9d1582359d1258e37e3fd43a143bc974410e4cfc27f500e095f34a8956206e0ebf799b7287f0f38def0d5e34ed71c9", +"algorithm": "sha512", +"filename": "gtk3.tar.xz", +"setup": "setup.sh", +"unpack": true +} +] diff --git a/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest b/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest new file mode 100644 index 000000000..1ecb5d066 --- /dev/null +++ b/js/src/devtools/rootAnalysis/build/sixgill-b2g.manifest @@ -0,0 +1,10 @@ +[ +{ +"hg_id" : "ec7b7d2442e8", +"algorithm" : "sha512", +"digest" : "49627d734df52cb9e7319733da5a6be1812b9373355dc300ee5600b431122570e00d380d50c7c5b5003c462c2c2cb022494b42c4ad00f8eba01c2259cbe6e502", +"filename" : "sixgill.tar.xz", +"size" : 2628868, +"unpack" : true +} +] diff --git a/js/src/devtools/rootAnalysis/build/sixgill.manifest b/js/src/devtools/rootAnalysis/build/sixgill.manifest new file mode 100644 index 000000000..d02bb1bf4 --- /dev/null +++ b/js/src/devtools/rootAnalysis/build/sixgill.manifest @@ -0,0 +1,10 @@ +[ +{ +"digest" : "36dc644e24c0aa824975ad8f5c15714445d5cb064d823000c3cb637e885199414d7df551e6b99233f0656dcf5760918192ef04113c486af37f3c489bb93ad029", +"size" : 2631908, +"hg_id" : "8cb9c3fb039a+ tip", +"unpack" : true, +"filename" : "sixgill.tar.xz", +"algorithm" : "sha512" +} +] diff --git a/js/src/devtools/rootAnalysis/computeCallgraph.js b/js/src/devtools/rootAnalysis/computeCallgraph.js new file mode 100644 index 000000000..dab3f7621 --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeCallgraph.js @@ -0,0 +1,435 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('CFG.js'); + +var theFunctionNameToFind; +if (scriptArgs[0] == '--function') { + theFunctionNameToFind = scriptArgs[1]; + scriptArgs = scriptArgs.slice(2); +} + +var typeInfo_filename = scriptArgs[0] || "typeInfo.txt"; + +var subclasses = new Map(); // Map from csu => set of immediate subclasses +var superclasses = new Map(); // Map from csu => set of immediate superclasses +var classFunctions = new Map(); // Map from "csu:name" => set of full method name + +var virtualResolutionsSeen = new Set(); + +function addEntry(map, name, entry) +{ + if (!map.has(name)) + map.set(name, new Set()); + map.get(name).add(entry); +} + +// CSU is "Class/Struct/Union" +function processCSU(csuName, csu) +{ + if (!("FunctionField" in csu)) + return; + for (var field of csu.FunctionField) { + if (1 in field.Field) { + var superclass = field.Field[1].Type.Name; + var subclass = field.Field[1].FieldCSU.Type.Name; + assert(subclass == csuName); + addEntry(subclasses, superclass, subclass); + addEntry(superclasses, subclass, superclass); + } + if ("Variable" in field) { + // Note: not dealing with overloading correctly. + var name = field.Variable.Name[0]; + var key = csuName + ":" + field.Field[0].Name[0]; + addEntry(classFunctions, key, name); + } + } +} + +// Return the nearest ancestor method definition, or all nearest definitions in +// the case of multiple inheritance. +function nearestAncestorMethods(csu, method) +{ + var key = csu + ":" + method; + + if (classFunctions.has(key)) + return new Set(classFunctions.get(key)); + + var functions = new Set(); + if (superclasses.has(csu)) { + for (var parent of superclasses.get(csu)) + functions.update(nearestAncestorMethods(parent, method)); + } + + return functions; +} + +// Return [ instantations, suppressed ], where instantiations is a Set of all +// possible implementations of 'field' given static type 'initialCSU', plus +// null if arbitrary other implementations are possible, and suppressed is true +// if we the method is assumed to be non-GC'ing by annotation. +function findVirtualFunctions(initialCSU, field) +{ + var worklist = [initialCSU]; + var functions = new Set(); + + // Loop through all methods of initialCSU (by looking at all methods of ancestor csus). + // + // If field is nsISupports::AddRef or ::Release, return an empty list and a + // boolean that says we assert that it cannot GC. + // + // If this is a method that is annotated to be dangerous (eg, it could be + // overridden with an implementation that could GC), then use null as a + // signal value that it should be considered to GC, even though we'll also + // collect all of the instantiations for other purposes. + + while (worklist.length) { + var csu = worklist.pop(); + if (isSuppressedVirtualMethod(csu, field)) + return [ new Set(), true ]; + if (isOverridableField(initialCSU, csu, field)) { + // We will still resolve the virtual function call, because it's + // nice to have as complete a callgraph as possible for other uses. + // But push a token saying that we can run arbitrary code. + functions.add(null); + } + + if (superclasses.has(csu)) + worklist.push(...superclasses.get(csu)); + } + + // Now return a list of all the instantiations of the method named 'field' + // that could execute on an instance of initialCSU or a descendant class. + + // Start with the class itself, or if it doesn't define the method, all + // nearest ancestor definitions. + functions.update(nearestAncestorMethods(initialCSU, field)); + + // Then recurse through all descendants to add in their definitions. + var worklist = [initialCSU]; + while (worklist.length) { + var csu = worklist.pop(); + var key = csu + ":" + field; + + if (classFunctions.has(key)) + functions.update(classFunctions.get(key)); + + if (subclasses.has(csu)) + worklist.push(...subclasses.get(csu)); + } + + return [ functions, false ]; +} + +var memoized = new Map(); +var memoizedCount = 0; + +function memo(name) +{ + if (!memoized.has(name)) { + let id = memoized.size + 1; + memoized.set(name, "" + id); + print(`#${id} ${name}`); + } + return memoized.get(name); +} + +// Return a list of all callees that the given edge might be a call to. Each +// one is represented by an object with a 'kind' field that is one of +// ('direct', 'field', 'resolved-field', 'indirect', 'unknown'), though note +// that 'resolved-field' is really a global record of virtual method +// resolutions, indepedent of this particular edge. +function getCallees(edge) +{ + if (edge.Kind != "Call") + return []; + + var callee = edge.Exp[0]; + var callees = []; + if (callee.Kind == "Var") { + assert(callee.Variable.Kind == "Func"); + callees.push({'kind': 'direct', 'name': callee.Variable.Name[0]}); + } else { + assert(callee.Kind == "Drf"); + if (callee.Exp[0].Kind == "Fld") { + var field = callee.Exp[0].Field; + var fieldName = field.Name[0]; + var csuName = field.FieldCSU.Type.Name; + var functions; + if ("FieldInstanceFunction" in field) { + let suppressed; + [ functions, suppressed ] = findVirtualFunctions(csuName, fieldName, suppressed); + if (suppressed) { + // Field call known to not GC; mark it as suppressed so + // direct invocations will be ignored + callees.push({'kind': "field", 'csu': csuName, 'field': fieldName, + 'suppressed': true}); + } + } else { + functions = new Set([null]); // field call + } + + // Known set of virtual call targets. Treat them as direct calls to + // all possible resolved types, but also record edges from this + // field call to each final callee. When the analysis is checking + // whether an edge can GC and it sees an unrooted pointer held live + // across this field call, it will know whether any of the direct + // callees can GC or not. + var targets = []; + var fullyResolved = true; + for (var name of functions) { + if (name === null) { + // Unknown set of call targets, meaning either a function + // pointer call ("field call") or a virtual method that can + // be overridden in extensions. + callees.push({'kind': "field", 'csu': csuName, 'field': fieldName}); + fullyResolved = false; + } else { + callees.push({'kind': "direct", 'name': name}); + targets.push({'kind': "direct", 'name': name}); + } + } + if (fullyResolved) + callees.push({'kind': "resolved-field", 'csu': csuName, 'field': fieldName, 'callees': targets}); + } else if (callee.Exp[0].Kind == "Var") { + // indirect call through a variable. + callees.push({'kind': "indirect", 'variable': callee.Exp[0].Variable.Name[0]}); + } else { + // unknown call target. + callees.push({'kind': "unknown"}); + } + } + + return callees; +} + +var lastline; +function printOnce(line) +{ + if (line != lastline) { + print(line); + lastline = line; + } +} + +// Returns a table mapping function name to lists of [annotation-name, +// annotation-value] pairs: { function-name => [ [annotation-name, annotation-value] ] } +function getAnnotations(body) +{ + var all_annotations = {}; + for (var v of (body.DefineVariable || [])) { + if (v.Variable.Kind != 'Func') + continue; + var name = v.Variable.Name[0]; + var annotations = all_annotations[name] = []; + + for (var ann of (v.Type.Annotation || [])) { + annotations.push(ann.Name); + } + } + + return all_annotations; +} + +function getTags(functionName, body) { + var tags = new Set(); + var annotations = getAnnotations(body); + if (functionName in annotations) { + for (var [ annName, annValue ] of annotations[functionName]) { + if (annName == 'Tag') + tags.add(annValue); + } + } + return tags; +} + +function processBody(functionName, body) +{ + if (!('PEdge' in body)) + return; + + for (var tag of getTags(functionName, body).values()) + print("T " + memo(functionName) + " " + tag); + + // Set of all callees that have been output so far, in order to suppress + // repeated callgraph edges from being recorded. Use a separate set for + // suppressed callees, since we don't want a suppressed edge (within one + // RAII scope) to prevent an unsuppressed edge from being recorded. The + // seen array is indexed by a boolean 'suppressed' variable. + var seen = [ new Set(), new Set() ]; + + lastline = null; + for (var edge of body.PEdge) { + if (edge.Kind != "Call") + continue; + + // Whether this call is within the RAII scope of a GC suppression class + var edgeSuppressed = (edge.Index[0] in body.suppressed); + + for (var callee of getCallees(edge)) { + var suppressed = Boolean(edgeSuppressed || callee.suppressed); + var prologue = suppressed ? "SUPPRESS_GC " : ""; + prologue += memo(functionName) + " "; + if (callee.kind == 'direct') { + if (!seen[+suppressed].has(callee.name)) { + seen[+suppressed].add(callee.name); + printOnce("D " + prologue + memo(callee.name)); + } + } else if (callee.kind == 'field') { + var { csu, field } = callee; + printOnce("F " + prologue + "CLASS " + csu + " FIELD " + field); + } else if (callee.kind == 'resolved-field') { + // Fully-resolved field (virtual method) call. Record the + // callgraph edges. Do not consider suppression, since it is + // local to this callsite and we are writing out a global + // record here. + // + // Any field call that does *not* have an R entry must be + // assumed to call anything. + var { csu, field, callees } = callee; + var fullFieldName = csu + "." + field; + if (!virtualResolutionsSeen.has(fullFieldName)) { + virtualResolutionsSeen.add(fullFieldName); + for (var target of callees) + printOnce("R " + memo(fullFieldName) + " " + memo(target.name)); + } + } else if (callee.kind == 'indirect') { + printOnce("I " + prologue + "VARIABLE " + callee.variable); + } else if (callee.kind == 'unknown') { + printOnce("I " + prologue + "VARIABLE UNKNOWN"); + } else { + printErr("invalid " + callee.kind + " callee"); + debugger; + } + } + } +} + +GCSuppressionTypes = loadTypeInfo(typeInfo_filename)["Suppress GC"] || []; + +var xdb = xdbLibrary(); +xdb.open("src_comp.xdb"); + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); + +for (var csuIndex = minStream; csuIndex <= maxStream; csuIndex++) { + var csu = xdb.read_key(csuIndex); + var data = xdb.read_entry(csu); + var json = JSON.parse(data.readString()); + processCSU(csu.readString(), json[0]); + + xdb.free_string(csu); + xdb.free_string(data); +} + +xdb.open("src_body.xdb"); + +printErr("Finished loading data structures"); + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); + +if (theFunctionNameToFind) { + var index = xdb.lookup_key(theFunctionNameToFind); + if (!index) { + printErr("Function not found"); + quit(1); + } + minStream = maxStream = index; +} + +function process(functionName, functionBodies) +{ + for (var body of functionBodies) + body.suppressed = []; + + for (var body of functionBodies) { + for (var [pbody, id] of allRAIIGuardedCallPoints(functionBodies, body, isSuppressConstructor)) + pbody.suppressed[id] = true; + } + + for (var body of functionBodies) + processBody(functionName, body); + + // GCC generates multiple constructors and destructors ("in-charge" and + // "not-in-charge") to handle virtual base classes. They are normally + // identical, and it appears that GCC does some magic to alias them to the + // same thing. But this aliasing is not visible to the analysis. So we'll + // add a dummy call edge from "foo" -> "foo *INTERNAL* ", since only "foo" + // will show up as called but only "foo *INTERNAL* " will be emitted in the + // case where the constructors are identical. + // + // This is slightly conservative in the case where they are *not* + // identical, but that should be rare enough that we don't care. + var markerPos = functionName.indexOf(internalMarker); + if (markerPos > 0) { + var inChargeXTor = functionName.replace(internalMarker, ""); + print("D " + memo(inChargeXTor) + " " + memo(functionName)); + + // Bug 1056410: Oh joy. GCC does something even funkier internally, + // where it generates calls to ~Foo() but a body for ~Foo(int32) even + // though it uses the same mangled name for both. So we need to add a + // synthetic edge from ~Foo() -> ~Foo(int32). + // + // inChargeXTor will have the (int32). + if (functionName.indexOf("::~") > 0) { + var calledDestructor = inChargeXTor.replace("(int32)", "()"); + print("D " + memo(calledDestructor) + " " + memo(inChargeXTor)); + } + } + + // Further note: from http://mentorembedded.github.io/cxx-abi/abi.html the + // different kinds of constructors/destructors are: + // C1 # complete object constructor + // C2 # base object constructor + // C3 # complete object allocating constructor + // D0 # deleting destructor + // D1 # complete object destructor + // D2 # base object destructor + // + // In actual practice, I have observed C4 and D4 xtors generated by gcc + // 4.9.3 (but not 4.7.3). The gcc source code says: + // + // /* This is the old-style "[unified]" constructor. + // In some cases, we may emit this function and call + // it from the clones in order to share code and save space. */ + // + // Unfortunately, that "call... from the clones" does not seem to appear in + // the CFG we get from GCC. So if we see a C4 constructor or D4 destructor, + // inject an edge to it from C1, C2, and C3 (or D1, D2, and D3). (Note that + // C3 isn't even used in current GCC, but add the edge anyway just in + // case.) + if (functionName.indexOf("C4E") != -1 || functionName.indexOf("D4Ev") != -1) { + var [ mangled, unmangled ] = splitFunction(functionName); + // E terminates the method name (and precedes the method parameters). + // If eg "C4E" shows up in the mangled name for another reason, this + // will create bogus edges in the callgraph. But will affect little and + // is somewhat difficult to avoid, so we will live with it. + for (let [synthetic, variant] of [['C4E', 'C1E'], + ['C4E', 'C2E'], + ['C4E', 'C3E'], + ['D4Ev', 'D1Ev'], + ['D4Ev', 'D2Ev'], + ['D4Ev', 'D3Ev']]) + { + if (mangled.indexOf(synthetic) == -1) + continue; + + let variant_mangled = mangled.replace(synthetic, variant); + let variant_full = variant_mangled + "$" + unmangled; + print("D " + memo(variant_full) + " " + memo(functionName)); + } + } +} + +for (var nameIndex = minStream; nameIndex <= maxStream; nameIndex++) { + var name = xdb.read_key(nameIndex); + var data = xdb.read_entry(name); + process(name.readString(), JSON.parse(data.readString())); + xdb.free_string(name); + xdb.free_string(data); +} diff --git a/js/src/devtools/rootAnalysis/computeGCFunctions.js b/js/src/devtools/rootAnalysis/computeGCFunctions.js new file mode 100644 index 000000000..97efcb38a --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeGCFunctions.js @@ -0,0 +1,69 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); +loadRelativeToScript('loadCallgraph.js'); + +if (typeof scriptArgs[0] != 'string') + throw "Usage: computeGCFunctions.js <callgraph.txt> <out:gcFunctions.txt> <out:gcFunctions.lst> <out:gcEdges.txt> <out:suppressedFunctions.lst>"; + +var start = "Time: " + new Date; + +var callgraph_filename = scriptArgs[0]; +var gcFunctions_filename = scriptArgs[1] || "gcFunctions.txt"; +var gcFunctionsList_filename = scriptArgs[2] || "gcFunctions.lst"; +var gcEdges_filename = scriptArgs[3] || "gcEdges.txt"; +var suppressedFunctionsList_filename = scriptArgs[4] || "suppressedFunctions.lst"; + +loadCallgraph(callgraph_filename); + +printErr("Writing " + gcFunctions_filename); +redirect(gcFunctions_filename); + +for (var name in gcFunctions) { + for (let readable of readableNames[name]) { + print(""); + print("GC Function: " + name + "$" + readable); + let current = name; + do { + current = gcFunctions[current]; + if (current in readableNames) + print(" " + readableNames[current][0]); + else + print(" " + current); + } while (current in gcFunctions); + } +} + +printErr("Writing " + gcFunctionsList_filename); +redirect(gcFunctionsList_filename); +for (var name in gcFunctions) { + for (var readable of readableNames[name]) + print(name + "$" + readable); +} + +// gcEdges is a list of edges that can GC for more specific reasons than just +// calling a function that is in gcFunctions.txt. +// +// Right now, it is unused. It was meant for ~AutoCompartment when it might +// wrap an exception, but anything held live across ~AC will have to be held +// live across the corresponding constructor (and hence the whole scope of the +// AC), and in that case it'll be held live across whatever could create an +// exception within the AC scope. So ~AC edges are redundant. I will leave the +// stub machinery here for now. +printErr("Writing " + gcEdges_filename); +redirect(gcEdges_filename); +for (var block in gcEdges) { + for (var edge in gcEdges[block]) { + var func = gcEdges[block][edge]; + print([ block, edge, func ].join(" || ")); + } +} + +printErr("Writing " + suppressedFunctionsList_filename); +redirect(suppressedFunctionsList_filename); +for (var name in suppressedFunctions) { + print(name); +} diff --git a/js/src/devtools/rootAnalysis/computeGCTypes.js b/js/src/devtools/rootAnalysis/computeGCTypes.js new file mode 100644 index 000000000..af4d70389 --- /dev/null +++ b/js/src/devtools/rootAnalysis/computeGCTypes.js @@ -0,0 +1,299 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); +loadRelativeToScript('annotations.js'); + +var gcTypes_filename = scriptArgs[0] || "gcTypes.txt"; +var typeInfo_filename = scriptArgs[1] || "typeInfo.txt"; + +var annotations = { + 'GCPointers': [], + 'GCThings': [], + 'NonGCTypes': {}, // unused + 'NonGCPointers': {}, + 'RootedPointers': {}, + 'GCSuppressors': {}, +}; + +var gDescriptors = new Map; // Map from descriptor string => Set of typeName + +var structureParents = {}; // Map from field => list of <parent, fieldName> +var pointerParents = {}; // Map from field => list of <parent, fieldName> +var baseClasses = {}; // Map from struct name => list of base class name strings + +var gcTypes = {}; // map from parent struct => Set of GC typed children +var gcPointers = {}; // map from parent struct => Set of GC typed children +var gcFields = new Map; + +var rootedPointers = {}; + +function processCSU(csu, body) +{ + for (let { 'Base': base } of (body.CSUBaseClass || [])) + addBaseClass(csu, base); + + for (let field of (body.DataField || [])) { + var type = field.Field.Type; + var fieldName = field.Field.Name[0]; + if (type.Kind == "Pointer") { + var target = type.Type; + if (target.Kind == "CSU") + addNestedPointer(csu, target.Name, fieldName); + } + if (type.Kind == "Array") { + var target = type.Type; + if (target.Kind == "CSU") + addNestedStructure(csu, target.Name, fieldName); + } + if (type.Kind == "CSU") { + // Ignore nesting in classes which are AutoGCRooters. We only consider + // types with fields that may not be properly rooted. + if (type.Name == "JS::AutoGCRooter" || type.Name == "JS::CustomAutoRooter") + return; + addNestedStructure(csu, type.Name, fieldName); + } + } + + for (let { 'Name': [ annType, tag ] } of (body.Annotation || [])) { + if (annType != 'Tag') + continue; + + if (tag == 'GC Pointer') + annotations.GCPointers.push(csu); + else if (tag == 'Invalidated by GC') + annotations.GCPointers.push(csu); + else if (tag == 'GC Thing') + annotations.GCThings.push(csu); + else if (tag == 'Suppressed GC Pointer') + annotations.NonGCPointers[csu] = true; + else if (tag == 'Rooted Pointer') + annotations.RootedPointers[csu] = true; + else if (tag == 'Suppress GC') + annotations.GCSuppressors[csu] = true; + } +} + +// csu.field is of type inner +function addNestedStructure(csu, inner, field) +{ + if (!(inner in structureParents)) + structureParents[inner] = []; + + if (field.match(/^field:\d+$/) && (csu in baseClasses) && (baseClasses[csu].indexOf(inner) != -1)) + return; + + structureParents[inner].push([ csu, field ]); +} + +function addBaseClass(csu, base) { + if (!(csu in baseClasses)) + baseClasses[csu] = []; + baseClasses[csu].push(base); + var k = baseClasses[csu].length; + addNestedStructure(csu, base, `<base-${k}>`); +} + +function addNestedPointer(csu, inner, field) +{ + if (!(inner in pointerParents)) + pointerParents[inner] = []; + pointerParents[inner].push([ csu, field ]); +} + +var xdb = xdbLibrary(); +xdb.open("src_comp.xdb"); + +var minStream = xdb.min_data_stream(); +var maxStream = xdb.max_data_stream(); + +for (var csuIndex = minStream; csuIndex <= maxStream; csuIndex++) { + var csu = xdb.read_key(csuIndex); + var data = xdb.read_entry(csu); + var json = JSON.parse(data.readString()); + assert(json.length == 1); + processCSU(csu.readString(), json[0]); + + xdb.free_string(csu); + xdb.free_string(data); +} + +// Now that we have the whole hierarchy set up, add all the types and propagate +// info. +for (let csu of annotations.GCThings) + addGCType(csu); +for (let csu of annotations.GCPointers) + addGCPointer(csu); + +function stars(n) { return n ? '*' + stars(n-1) : '' }; + +// "typeName is a (pointer to a)^'typePtrLevel' GC type because it contains a field +// named 'child' of type 'why' (or pointer to 'why' if fieldPtrLevel == 1), which is +// itself a GCThing or GCPointer." +function markGCType(typeName, child, why, typePtrLevel, fieldPtrLevel, indent) +{ + //printErr(`${indent}${typeName}${stars(typePtrLevel)} may be a gctype/ptr because of its child '${child}' of type ${why}${stars(fieldPtrLevel)}`); + + // Some types, like UniquePtr, do not mark/trace/relocate their contained + // pointers and so should not hold them live across a GC. UniquePtr in + // particular should be the only thing pointing to a structure containing a + // GCPointer, so nothing else can possibly trace it and it'll die when the + // UniquePtr goes out of scope. So we say that memory pointed to by a + // UniquePtr is just as unsafe as the stack for storing GC pointers. + if (!fieldPtrLevel && isUnsafeStorage(typeName)) { + // The UniquePtr itself is on the stack but when you dereference the + // contained pointer, you get to the unsafe memory that we are treating + // as if it were the stack (aka ptrLevel 0). Note that + // UniquePtr<UniquePtr<JSObject*>> is fine, so we don't want to just + // hardcode the ptrLevel. + fieldPtrLevel = -1; + } + + // Example: with: + // struct Pair { JSObject* foo; int bar; }; + // struct { Pair** info }*** + // make a call to: + // child='info' typePtrLevel=3 fieldPtrLevel=2 + // for a final ptrLevel of 5, used to later call: + // child='foo' typePtrLevel=5 fieldPtrLevel=1 + // + var ptrLevel = typePtrLevel + fieldPtrLevel; + + // ...except when > 2 levels of pointers away from an actual GC thing, stop + // searching the graph. (This would just be > 1, except that a UniquePtr + // field might still have a GC pointer.) + if (ptrLevel > 2) + return; + + if (ptrLevel == 0 && isRootedGCTypeName(typeName)) + return; + if (ptrLevel == 1 && isRootedGCPointerTypeName(typeName)) + return; + + if (ptrLevel == 0) { + if (typeName in annotations.NonGCTypes) + return; + if (!(typeName in gcTypes)) + gcTypes[typeName] = new Set(); + gcTypes[typeName].add(why); + } else if (ptrLevel == 1) { + if (typeName in annotations.NonGCPointers) + return; + if (!(typeName in gcPointers)) + gcPointers[typeName] = new Set(); + gcPointers[typeName].add(why); + } + + if (ptrLevel < 2) { + if (!gcFields.has(typeName)) + gcFields.set(typeName, new Map()); + gcFields.get(typeName).set(child, [ why, fieldPtrLevel ]); + } + + if (typeName in structureParents) { + for (var field of structureParents[typeName]) { + var [ holderType, fieldName ] = field; + markGCType(holderType, fieldName, typeName, ptrLevel, 0, indent + " "); + } + } + if (typeName in pointerParents) { + for (var field of pointerParents[typeName]) { + var [ holderType, fieldName ] = field; + markGCType(holderType, fieldName, typeName, ptrLevel, 1, indent + " "); + } + } +} + +function addGCType(typeName, child, why, depth, fieldPtrLevel) +{ + markGCType(typeName, '<annotation>', '(annotation)', 0, 0, ""); +} + +function addGCPointer(typeName) +{ + markGCType(typeName, '<pointer-annotation>', '(annotation)', 1, 0, ""); +} + +// Add an arbitrary descriptor to a type, and apply it recursively to all base +// structs and structs that contain the given typeName as a field. +function addDescriptor(typeName, descriptor) +{ + if (!gDescriptors.has(descriptor)) + gDescriptors.set(descriptor, new Set); + let descriptorTypes = gDescriptors.get(descriptor); + if (!descriptorTypes.has(typeName)) { + descriptorTypes.add(typeName); + if (typeName in structureParents) { + for (let [holder, field] of structureParents[typeName]) + addDescriptor(holder, descriptor); + } + if (typeName in baseClasses) { + for (let base of baseClasses[typeName]) + addDescriptor(base, descriptor); + } + } +} + +for (var type of listNonGCPointers()) + annotations.NonGCPointers[type] = true; + +function explain(csu, indent, seen) { + if (!seen) + seen = new Set(); + seen.add(csu); + if (!gcFields.has(csu)) + return; + var fields = gcFields.get(csu); + + if (fields.has('<annotation>')) { + print(indent + "which is annotated as a GCThing"); + return; + } + if (fields.has('<pointer-annotation>')) { + print(indent + "which is annotated as a GCPointer"); + return; + } + for (var [ field, [ child, ptrdness ] ] of fields) { + var msg = indent; + if (field[0] == '<') + msg += "inherits from "; + else { + msg += "contains field '" + field + "' "; + if (ptrdness == -1) + msg += "(with a pointer to unsafe storage) holding a "; + else if (ptrdness == 0) + msg += "of type "; + else + msg += "pointing to type "; + } + msg += child; + print(msg); + if (!seen.has(child)) + explain(child, indent + " ", seen); + } +} + +var origOut = os.file.redirect(gcTypes_filename); + +for (var csu in gcTypes) { + print("GCThing: " + csu); + explain(csu, " "); +} +for (var csu in gcPointers) { + print("GCPointer: " + csu); + explain(csu, " "); +} + +// Redirect output to the typeInfo file and close the gcTypes file. +os.file.close(os.file.redirect(typeInfo_filename)); + +for (let csu in annotations.GCSuppressors) + addDescriptor(csu, 'Suppress GC'); + +for (let [descriptor, types] of gDescriptors) { + for (let csu of types) + print(descriptor + "$$" + csu); +} + +os.file.close(os.file.redirect(origOut)); diff --git a/js/src/devtools/rootAnalysis/expect.b2g.json b/js/src/devtools/rootAnalysis/expect.b2g.json new file mode 100644 index 000000000..06f2beb36 --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.b2g.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/expect.browser.json b/js/src/devtools/rootAnalysis/expect.browser.json new file mode 100644 index 000000000..06f2beb36 --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.browser.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/expect.shell.json b/js/src/devtools/rootAnalysis/expect.shell.json new file mode 100644 index 000000000..06f2beb36 --- /dev/null +++ b/js/src/devtools/rootAnalysis/expect.shell.json @@ -0,0 +1,3 @@ +{ + "expect-hazards": 0 +} diff --git a/js/src/devtools/rootAnalysis/explain.py b/js/src/devtools/rootAnalysis/explain.py new file mode 100755 index 000000000..dc8b76f5c --- /dev/null +++ b/js/src/devtools/rootAnalysis/explain.py @@ -0,0 +1,103 @@ +#!/usr/bin/python + +import re +import argparse + +from collections import defaultdict + +parser = argparse.ArgumentParser(description='Process some integers.') +parser.add_argument('rootingHazards', nargs='?', default='rootingHazards.txt') +parser.add_argument('gcFunctions', nargs='?', default='gcFunctions.txt') +parser.add_argument('hazards', nargs='?', default='hazards.txt') +parser.add_argument('extra', nargs='?', default='unnecessary.txt') +parser.add_argument('refs', nargs='?', default='refs.txt') +args = parser.parse_args() + +num_hazards = 0 +num_refs = 0 +try: + with open(args.rootingHazards) as rootingHazards, \ + open(args.hazards, 'w') as hazards, \ + open(args.extra, 'w') as extra, \ + open(args.refs, 'w') as refs: + current_gcFunction = None + + # Map from a GC function name to the list of hazards resulting from + # that GC function + hazardousGCFunctions = defaultdict(list) + + # List of tuples (gcFunction, index of hazard) used to maintain the + # ordering of the hazards + hazardOrder = [] + + for line in rootingHazards: + m = re.match(r'^Time: (.*)', line) + mm = re.match(r'^Run on:', line) + if m or mm: + print >>hazards, line + print >>extra, line + print >>refs, line + continue + + m = re.match(r'^Function.*has unnecessary root', line) + if m: + print >>extra, line + continue + + m = re.match(r'^Function.*takes unsafe address of unrooted', line) + if m: + num_refs += 1 + print >>refs, line + continue + + m = re.match(r"^Function.*has unrooted.*of type.*live across GC call ('?)(.*?)('?) at \S+:\d+$", line) + if m: + # Function names are surrounded by single quotes. Field calls + # are unquoted. + current_gcFunction = m.group(2) + hazardousGCFunctions[current_gcFunction].append(line) + hazardOrder.append((current_gcFunction, len(hazardousGCFunctions[current_gcFunction]) - 1)) + num_hazards += 1 + continue + + if current_gcFunction: + if not line.strip(): + # Blank line => end of this hazard + current_gcFunction = None + else: + hazardousGCFunctions[current_gcFunction][-1] += line + + with open(args.gcFunctions) as gcFunctions: + gcExplanations = {} # gcFunction => stack showing why it can GC + + current_func = None + explanation = None + for line in gcFunctions: + m = re.match(r'^GC Function: (.*)', line) + if m: + if current_func: + gcExplanations[current_func] = explanation + current_func = None + if m.group(1) in hazardousGCFunctions: + current_func = m.group(1) + explanation = line + elif current_func: + explanation += line + if current_func: + gcExplanations[current_func] = explanation + + for gcFunction, index in hazardOrder: + gcHazards = hazardousGCFunctions[gcFunction] + + if gcFunction in gcExplanations: + print >>hazards, (gcHazards[index] + gcExplanations[gcFunction]) + else: + print >>hazards, gcHazards[index] + +except IOError as e: + print 'Failed: %s' % str(e) + +print("Wrote %s" % args.hazards) +print("Wrote %s" % args.extra) +print("Wrote %s" % args.refs) +print("Found %d hazards and %d unsafe references" % (num_hazards, num_refs)) diff --git a/js/src/devtools/rootAnalysis/gen-hazards.sh b/js/src/devtools/rootAnalysis/gen-hazards.sh new file mode 100755 index 000000000..7007969a1 --- /dev/null +++ b/js/src/devtools/rootAnalysis/gen-hazards.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +JOBS="$1" + +for j in $(seq $JOBS); do + env PATH=$PATH:$SIXGILL/bin XDB=$SIXGILL/bin/xdb.so $JS $ANALYZE gcFunctions.lst suppressedFunctions.lst gcTypes.txt $j $JOBS tmp.$j > rootingHazards.$j & +done + +wait + +for j in $(seq $JOBS); do + cat rootingHazards.$j +done diff --git a/js/src/devtools/rootAnalysis/loadCallgraph.js b/js/src/devtools/rootAnalysis/loadCallgraph.js new file mode 100644 index 000000000..9ee8d7628 --- /dev/null +++ b/js/src/devtools/rootAnalysis/loadCallgraph.js @@ -0,0 +1,203 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +loadRelativeToScript('utility.js'); + +// Functions come out of sixgill in the form "mangled|readable". The mangled +// name is Truth. One mangled name might correspond to multiple readable names, +// for multiple reasons, including (1) sixgill/gcc doesn't always qualify types +// the same way or de-typedef the same amount; (2) sixgill's output treats +// references and pointers the same, and so doesn't distinguish them, but C++ +// treats them as separate for overloading and linking; (3) (identical) +// destructors sometimes have an int32 parameter, sometimes not. +// +// The readable names are useful because they're far more meaningful to the +// user, and are what should show up in reports and questions to mrgiggles. At +// least in most cases, it's fine to have the extra mangled name tacked onto +// the beginning for these. +// +// The strategy used is to separate out the pieces whenever they are read in, +// create a table mapping mangled names to (one of the) readable names, and +// use the mangled names in all computation. +// +// Note that callgraph.txt uses a compressed representation -- each name is +// mapped to an integer, and those integers are what is recorded in the edges. +// But the integers depend on the full name, whereas the true edge should only +// consider the mangled name. And some of the names encoded in callgraph.txt +// are FieldCalls, not just function names. + +var readableNames = {}; // map from mangled name => list of readable names +var mangledName = {}; // map from demangled names => mangled names. Could be eliminated. +var calleeGraph = {}; // map from mangled => list of tuples of {'callee':mangled, 'suppressed':bool} +var callerGraph = {}; // map from mangled => list of tuples of {'caller':mangled, 'suppressed':bool} +var gcFunctions = {}; // map from mangled callee => reason +var suppressedFunctions = {}; // set of mangled names (map from mangled name => true) +var gcEdges = {}; + +function addGCFunction(caller, reason) +{ + if (caller in suppressedFunctions) + return false; + + if (ignoreGCFunction(caller)) + return false; + + if (!(caller in gcFunctions)) { + gcFunctions[caller] = reason; + return true; + } + + return false; +} + +function addCallEdge(caller, callee, suppressed) +{ + addToKeyedList(calleeGraph, caller, {callee:callee, suppressed:suppressed}); + addToKeyedList(callerGraph, callee, {caller:caller, suppressed:suppressed}); +} + +// Map from identifier to full "mangled|readable" name. Or sometimes to a +// Class.Field name. +var functionNames = [""]; + +// Map from identifier to mangled name (or to a Class.Field) +var idToMangled = [""]; + +function loadCallgraph(file) +{ + var suppressedFieldCalls = {}; + var resolvedFunctions = {}; + + var numGCCalls = 0; + + for (var line of readFileLines_gen(file)) { + line = line.replace(/\n/, ""); + + var match; + if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) { + assert(functionNames.length == match[1]); + functionNames.push(match[2]); + var [ mangled, readable ] = splitFunction(match[2]); + if (mangled in readableNames) + readableNames[mangled].push(readable); + else + readableNames[mangled] = [ readable ]; + mangledName[readable] = mangled; + idToMangled.push(mangled); + continue; + } + var suppressed = false; + if (line.indexOf("SUPPRESS_GC") != -1) { + match = /^(..)SUPPRESS_GC (.*)/.exec(line); + line = match[1] + match[2]; + suppressed = true; + } + var tag = line.charAt(0); + if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) { + var mangledCaller = idToMangled[match[1]]; + var name = match[2]; + if (!indirectCallCannotGC(functionNames[match[1]], name) && !suppressed) + addGCFunction(mangledCaller, "IndirectCall: " + name); + } else if (match = tag == 'F' && /^F (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { + var caller = idToMangled[match[1]]; + var csu = match[2]; + var fullfield = csu + "." + match[3]; + if (suppressed) + suppressedFieldCalls[fullfield] = true; + else if (!fieldCallCannotGC(csu, fullfield)) + addGCFunction(caller, "FieldCall: " + fullfield); + } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) { + var caller = idToMangled[match[1]]; + var callee = idToMangled[match[2]]; + addCallEdge(caller, callee, suppressed); + } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) { + var callerField = idToMangled[match[1]]; + var callee = idToMangled[match[2]]; + addCallEdge(callerField, callee, false); + resolvedFunctions[callerField] = true; + } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) { + var mangled = idToMangled[match[1]]; + var tag = match[2]; + if (tag == 'GC Call') { + addGCFunction(mangled, "GC"); + numGCCalls++; + } + } + } + + // mess up the id <-> name correspondence. Also, we need to know if the + // functions even exist in the first place.) + for (var func of extraGCFunctions()) { + addGCFunction(func, "annotation"); + } + + // Initialize suppressedFunctions to the set of all functions, and the + // worklist to all toplevel callers. + var worklist = []; + for (var callee in callerGraph) + suppressedFunctions[callee] = true; + for (var caller in calleeGraph) { + if (!(caller in callerGraph)) { + suppressedFunctions[caller] = true; + worklist.push(caller); + } + } + + // Find all functions reachable via an unsuppressed call chain, and remove + // them from the suppressedFunctions set. Everything remaining is only + // reachable when GC is suppressed. + var top = worklist.length; + while (top > 0) { + name = worklist[--top]; + if (!(name in suppressedFunctions)) + continue; + delete suppressedFunctions[name]; + if (!(name in calleeGraph)) + continue; + for (var entry of calleeGraph[name]) { + if (!entry.suppressed) + worklist[top++] = entry.callee; + } + } + + // Such functions are known to not GC. + for (var name in gcFunctions) { + if (name in suppressedFunctions) + delete gcFunctions[name]; + } + + for (var name in suppressedFieldCalls) { + suppressedFunctions[name] = true; + } + + // Sanity check to make sure the callgraph has some functions annotated as + // GC Calls. This is mostly a check to be sure the earlier processing + // succeeded (as opposed to, say, running on empty xdb files because you + // didn't actually compile anything interesting.) + assert(numGCCalls > 0, "No GC functions found!"); + + // Initialize the worklist to all known gcFunctions. + var worklist = []; + for (var name in gcFunctions) + worklist.push(name); + + // Recursively find all callers and add them to the set of gcFunctions. + while (worklist.length) { + name = worklist.shift(); + assert(name in gcFunctions); + if (!(name in callerGraph)) + continue; + for (var entry of callerGraph[name]) { + if (!entry.suppressed && addGCFunction(entry.caller, name)) + worklist.push(entry.caller); + } + } + + // Any field call that has been resolved to all possible callees can be + // trusted to not GC if all of those callees are known to not GC. + for (var name in resolvedFunctions) { + if (!(name in gcFunctions)) + suppressedFunctions[name] = true; + } +} diff --git a/js/src/devtools/rootAnalysis/run-analysis.sh b/js/src/devtools/rootAnalysis/run-analysis.sh new file mode 100755 index 000000000..bdfab6e68 --- /dev/null +++ b/js/src/devtools/rootAnalysis/run-analysis.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +SRCDIR=$(cd $(dirname $0)/../../../..; pwd) +GECKO_DIR=$SRCDIR $SRCDIR/taskcluster/scripts/builder/build-haz-linux.sh $(pwd) "$@" diff --git a/js/src/devtools/rootAnalysis/run-test.py b/js/src/devtools/rootAnalysis/run-test.py new file mode 100644 index 000000000..3bc9085a0 --- /dev/null +++ b/js/src/devtools/rootAnalysis/run-test.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import site +import subprocess +import argparse + +testdir = os.path.abspath(os.path.join(os.path.dirname(__file__), 't')) +site.addsitedir(testdir) +from testlib import Test, equal + +scriptdir = os.path.abspath(os.path.dirname(__file__)) + +parser = argparse.ArgumentParser(description='run hazard analysis tests') +parser.add_argument( + '--js', default=os.environ.get('JS'), + help='JS binary to run the tests with') +parser.add_argument( + '--sixgill', default=os.environ.get('SIXGILL', os.path.join(testdir, "sixgill")), + help='Path to root of sixgill installation') +parser.add_argument( + '--sixgill-bin', default=os.environ.get('SIXGILL_BIN'), + help='Path to sixgill binary dir') +parser.add_argument( + '--sixgill-plugin', default=os.environ.get('SIXGILL_PLUGIN'), + help='Full path to sixgill gcc plugin') +parser.add_argument( + '--gccdir', default=os.environ.get('GCCDIR'), + help='Path to GCC installation dir') +parser.add_argument( + '--cc', default=os.environ.get('CC'), + help='Path to gcc') +parser.add_argument( + '--cxx', default=os.environ.get('CXX'), + help='Path to g++') +parser.add_argument( + '--verbose', '-v', action='store_true', + help='Display verbose output, including commands executed') +parser.add_argument( + 'tests', nargs='*', default=['sixgill-tree', 'suppression', 'hazards', 'exceptions'], + help='tests to run') + +cfg = parser.parse_args() + +if not cfg.js: + exit('Must specify JS binary through environment variable or --js option') +if not cfg.cc: + if cfg.gccdir: + cfg.cc = os.path.join(cfg.gccdir, "bin", "gcc") + else: + cfg.cc = "gcc" +if not cfg.cxx: + if cfg.gccdir: + cfg.cxx = os.path.join(cfg.gccdir, "bin", "g++") + else: + cfg.cxx = "g++" +if not cfg.sixgill_bin: + cfg.sixgill_bin = os.path.join(cfg.sixgill, "usr", "bin") +if not cfg.sixgill_plugin: + cfg.sixgill_plugin = os.path.join(cfg.sixgill, "usr", "libexec", "sixgill", "gcc", "xgill.so") + +subprocess.check_call([cfg.js, '-e', 'if (!getBuildConfiguration()["has-ctypes"]) quit(1)']) + +def binpath(prog): + return os.path.join(cfg.sixgill_bin, prog) + +try: + os.mkdir(os.path.join('t', 'out')) +except OSError: + pass + +for name in cfg.tests: + name = os.path.basename(name) + indir = os.path.join(testdir, name) + outdir = os.path.join(testdir, 'out', name) + try: + os.mkdir(outdir) + except OSError: + pass + + test = Test(indir, outdir, cfg) + + os.chdir(outdir) + subprocess.call(["sh", "-c", "rm *.xdb"]) + execfile(os.path.join(indir, "test.py"), {'test': test, 'equal': equal}) + print("TEST-PASSED: %s" % name) diff --git a/js/src/devtools/rootAnalysis/run_complete b/js/src/devtools/rootAnalysis/run_complete new file mode 100755 index 000000000..b1fbadb81 --- /dev/null +++ b/js/src/devtools/rootAnalysis/run_complete @@ -0,0 +1,380 @@ +#!/usr/bin/perl + +# Sixgill: Static assertion checker for C/C++ programs. +# Copyright (C) 2009-2010 Stanford University +# Author: Brian Hackett +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# do a complete run of the system from raw source to reports. this requires +# various run_monitor processes to be running in the background (maybe on other +# machines) and watching a shared poll_file for jobs. if the output directory +# for this script already exists then an incremental analysis will be performed +# and the reports will only reflect the changes since the earlier run. + +use strict; +use warnings; +use IO::Handle; +use File::Basename qw(dirname); +use Getopt::Long; +use Cwd; + +################################# +# environment specific settings # +################################# + +my $WORKDIR; +my $SIXGILL_BIN; + +# poll file shared with the run_monitor script. +my $poll_file; + +# root directory of the project. +my $build_dir; + +# directory containing gcc wrapper scripts. +my $wrap_dir; + +# optional file with annotations from the web interface. +my $ann_file = ""; + +# optional output directory to do a diff against. +my $old_dir = ""; + +# run in the foreground +my $foreground; + +my $builder = "make -j4"; + +my $suppress_logs; +GetOptions("build-root|b=s" => \$build_dir, + "poll-file=s" => \$poll_file, + "no-logs!" => \$suppress_logs, + "work-dir=s" => \$WORKDIR, + "sixgill-binaries|binaries|b=s" => \$SIXGILL_BIN, + "wrap-dir=s" => \$wrap_dir, + "annotations-file|annotations|a=s" => \$ann_file, + "old-dir|old=s" => \$old_dir, + "foreground!" => \$foreground, + "buildcommand=s" => \$builder, + ) + or die; + +if (not -d $build_dir) { + mkdir($build_dir); +} +if ($old_dir ne "" && not -d $old_dir) { + die "Old directory '$old_dir' does not exist\n"; +} + +$WORKDIR ||= "sixgill-work"; +mkdir($WORKDIR, 0755) if ! -d $WORKDIR; +$poll_file ||= "$WORKDIR/poll.file"; +$build_dir ||= "$WORKDIR/js-inbound-xgill"; + +if (!defined $SIXGILL_BIN) { + chomp(my $path = `which xmanager`); + if ($path) { + use File::Basename qw(dirname); + $SIXGILL_BIN = dirname($path); + } else { + die "Cannot find sixgill binaries. Use the -b option."; + } +} + +$wrap_dir ||= "$WORKDIR/xgill-inbound/wrap_gcc"; +$wrap_dir = "$SIXGILL_BIN/../scripts/wrap_gcc" if not (-e "$wrap_dir/basecc"); +die "Bad wrapper directory: $wrap_dir" if not (-e "$wrap_dir/basecc"); + +# code to clean the project from $build_dir. +sub clean_project { + system("make clean"); +} + +# code to build the project from $build_dir. +sub build_project { + return system($builder) >> 8; +} + +our %kill_on_exit; +END { + for my $pid (keys %kill_on_exit) { + kill($pid); + } +} + +# commands to start the various xgill binaries. timeouts can be specified +# for the backend analyses here, and a memory limit can be specified for +# xmanager if desired (and USE_COUNT_ALLOCATOR is defined in util/alloc.h). +my $xmanager = "$SIXGILL_BIN/xmanager"; +my $xsource = "$SIXGILL_BIN/xsource"; +my $xmemlocal = "$SIXGILL_BIN/xmemlocal -timeout=20"; +my $xinfer = "$SIXGILL_BIN/xinfer -timeout=60"; +my $xcheck = "$SIXGILL_BIN/xcheck -timeout=30"; + +# prefix directory to strip off source files. +my $prefix_dir = $build_dir; + +########################## +# general purpose script # +########################## + +# Prevent ccache from being used. I don't think this does any good. The problem +# I'm struggling with is that if autoconf.mk still has 'ccache gcc' in it, the +# builds fail in a mysterious way. +$ENV{CCACHE_COMPILERCHECK} = 'date +%s.%N'; +delete $ENV{CCACHE_PREFIX}; + +my $usage = "USAGE: run_complete result-dir\n"; +my $result_dir = shift or die $usage; + +if (not $foreground) { + my $pid = fork(); + if ($pid != 0) { + print "Forked, exiting...\n"; + exit(0); + } +} + +# if the result directory does not already exist, mark for a clean build. +my $do_clean = 0; +if (not (-d $result_dir)) { + $do_clean = 1; + mkdir $result_dir; +} + +if (!$suppress_logs) { + my $log_file = "$result_dir/complete.log"; + open(OUT, ">>", $log_file) or die "append to $log_file: $!"; + OUT->autoflush(1); # don't buffer writes to the main log. + + # redirect stdout and stderr to the log. + STDOUT->fdopen(\*OUT, "w"); + STDERR->fdopen(\*OUT, "w"); +} + +# pids to wait on before exiting. these are collating worker output. +my @waitpids; + +chdir $result_dir; + +# to do a partial run, comment out the commands here you don't want to do. + +my $status = run_build(); + +# end of run commands. + +for my $pid (@waitpids) { + waitpid($pid, 0); + $status ||= $? >> 8; +} + +print "Exiting run_complete with status $status\n"; +exit $status; + +# get the IP address which a freshly created manager is listening on. +sub get_manager_address +{ + my $log_file = shift or die; + + # give the manager one second to start, any longer and something's broken. + sleep(1); + + my $log_data = `cat $log_file`; + my ($port) = $log_data =~ /Listening on ([\.\:0-9]*)/ + or die "no manager found"; + print OUT "Connecting to manager on port $port\n" unless $suppress_logs; + print "Connecting to manager on port $port.\n"; + return $1; +} + +sub logging_suffix { + my ($show_logs, $log_file) = @_; + return $show_logs ? "2>&1 | tee $log_file" + : "> $log_file 2>&1"; +} + +sub run_build +{ + print "build started: "; + print scalar(localtime()); + print "\n"; + + # fork off a process to run the build. + defined(my $pid = fork) or die; + + # log file for the manager. + my $manager_log_file = "$result_dir/build_manager.log"; + + if (!$pid) { + # this is the child process, fork another process to run a manager. + defined(my $pid = fork) or die; + my $logging = logging_suffix($suppress_logs, $manager_log_file); + exec("$xmanager -terminate-on-assert $logging") if (!$pid); + $kill_on_exit{$pid} = 1; + + if (!$suppress_logs) { + # open new streams to redirect stdout and stderr. + open(LOGOUT, "> $result_dir/build.log"); + open(LOGERR, "> $result_dir/build_err.log"); + STDOUT->fdopen(\*LOGOUT, "w"); + STDERR->fdopen(\*LOGERR, "w"); + } + + my $address = get_manager_address($manager_log_file); + + # write the configuration file for the wrapper script. + my $config_file = "$WORKDIR/xgill.config"; + open(CONFIG, ">", $config_file) or die "create $config_file: $!"; + print CONFIG "$prefix_dir\n"; + print CONFIG Cwd::abs_path("$result_dir/build_xgill.log")."\n"; + print CONFIG "$address\n"; + my @extra = ("-fplugin-arg-xgill-mangle=1"); + push(@extra, "-fplugin-arg-xgill-annfile=$ann_file") + if ($ann_file ne "" && -e $ann_file); + print CONFIG join(" ", @extra) . "\n"; + close(CONFIG); + + # Tell the wrapper where to find the config + $ENV{"XGILL_CONFIG"} = Cwd::abs_path($config_file); + + # update the PATH so that the build will see the wrappers. + if (exists $ENV{CC}) { + $ENV{PATH} = dirname($ENV{CC}) . ":$ENV{PATH}"; + delete $ENV{CC}; + delete $ENV{CXX}; + } + $ENV{"PATH"} = "$wrap_dir:" . $ENV{"PATH"}; + + # do the build, cleaning if necessary. + chdir $build_dir; + clean_project() if ($do_clean); + my $exit_status = build_project(); + + # signal the manager that it's over. + system("$xsource -remote=$address -end-manager"); + + # wait for the manager to clean up and terminate. + print "Waiting for manager to finish (build status $exit_status)...\n"; + waitpid($pid, 0); + my $manager_status = $?; + delete $kill_on_exit{$pid}; + + # build is finished, the complete run can resume. + # return value only useful if --foreground + print "Exiting with status " . ($manager_status || $exit_status) . "\n"; + exit($manager_status || $exit_status); + } + + # this is the complete process, wait for the build to finish. + waitpid($pid, 0); + my $status = $? >> 8; + print "build finished (status $status): "; + print scalar(localtime()); + print "\n"; + + return $status; +} + +sub run_pass +{ + my ($name, $command) = @_; + my $log_file = "$result_dir/manager.$name.log"; + + # extra commands to pass to the manager. + my $manager_extra = ""; + $manager_extra .= "-modset-wait=10" if ($name eq "xmemlocal"); + + # fork off a manager process for the analysis. + defined(my $pid = fork) or die; + my $logging = logging_suffix($suppress_logs, $log_file); + exec("$xmanager $manager_extra $logging") if (!$pid); + + my $address = get_manager_address($log_file); + + # write the poll file for this pass. + if (! -d dirname($poll_file)) { + system("mkdir", "-p", dirname($poll_file)); + } + open(POLL, "> $poll_file"); + print POLL "$command\n"; + print POLL "$result_dir/$name\n"; + print POLL "$address\n"; + close(POLL); + + print "$name started: "; + print scalar(localtime()); + print "\n"; + + waitpid($pid, 0); + unlink($poll_file); + + print "$name finished: "; + print scalar(localtime()); + print "\n"; + + # collate the worker's output into a single file. make this asynchronous + # so we can wait a bit and make sure we get all worker output. + defined($pid = fork) or die; + + if (!$pid) { + sleep(20); + exec("cat $name.*.log > $name.log"); + } + + push(@waitpids, $pid); +} + +# the names of all directories containing reports to archive. +my $indexes; + +sub run_index +{ + my ($name, $kind) = @_; + + return if (not (-e "report_$kind.xdb")); + + print "$name started: "; + print scalar(localtime()); + print "\n"; + + # make an index for the report diff if applicable. + if ($old_dir ne "") { + system("make_index $kind $old_dir > $name.diff.log"); + system("mv $kind diff_$kind"); + $indexes .= " diff_$kind"; + } + + # make an index for the full set of reports. + system("make_index $kind > $name.log"); + $indexes .= " $kind"; + + print "$name finished: "; + print scalar(localtime()); + print "\n"; +} + +sub archive_indexes +{ + print "archive started: "; + print scalar(localtime()); + print "\n"; + + system("tar -czf reports.tgz $indexes"); + system("rm -rf $indexes"); + + print "archive finished: "; + print scalar(localtime()); + print "\n"; +} diff --git a/js/src/devtools/rootAnalysis/t/exceptions/source.cpp b/js/src/devtools/rootAnalysis/t/exceptions/source.cpp new file mode 100644 index 000000000..14169740e --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/exceptions/source.cpp @@ -0,0 +1,42 @@ +#define ANNOTATE(property) __attribute__((tag(property))) + +struct Cell { int f; } ANNOTATE("GC Thing"); + +extern void GC() ANNOTATE("GC Call"); + +void GC() +{ + // If the implementation is too trivial, the function body won't be emitted at all. + asm(""); +} + +class RAII_GC { + public: + RAII_GC() {} + ~RAII_GC() { GC(); } +}; + +// ~AutoSomething calls GC because of the RAII_GC field. The constructor, +// though, should *not* GC -- unless it throws an exception. Which is not +// possible when compiled with -fno-exceptions. +class AutoSomething { + RAII_GC gc; + public: + AutoSomething() : gc() { + asm(""); // Ooh, scary, this might throw an exception + } + ~AutoSomething() { + asm(""); + } +}; + +extern void usevar(Cell* cell); + +void f() { + Cell* thing = nullptr; // Live range starts here + + { + AutoSomething smth; // Constructor can GC only if exceptions are enabled + usevar(thing); // Live range ends here + } // In particular, 'thing' is dead at the destructor, so no hazard +} diff --git a/js/src/devtools/rootAnalysis/t/exceptions/test.py b/js/src/devtools/rootAnalysis/t/exceptions/test.py new file mode 100644 index 000000000..f6d7f5e35 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/exceptions/test.py @@ -0,0 +1,19 @@ +test.compile("source.cpp", '-fno-exceptions') +test.run_analysis_script('gcTypes') + +hazards = test.load_hazards() +assert(len(hazards) == 0) + +# If we compile with exceptions, then there *should* be a hazard because +# AutoSomething::AutoSomething might throw an exception, which would cause the +# partially-constructed value to be torn down, which will call ~RAII_GC. + +test.compile("source.cpp", '-fexceptions') +test.run_analysis_script('gcTypes') + +hazards = test.load_hazards() +assert(len(hazards) == 1) +hazard = hazards[0] +assert(hazard.function == 'void f()') +assert(hazard.variable == 'thing') +assert("AutoSomething::AutoSomething" in hazard.GCFunction) diff --git a/js/src/devtools/rootAnalysis/t/hazards/source.cpp b/js/src/devtools/rootAnalysis/t/hazards/source.cpp new file mode 100644 index 000000000..7f84a99db --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/hazards/source.cpp @@ -0,0 +1,186 @@ +#define ANNOTATE(property) __attribute__((tag(property))) + +struct Cell { int f; } ANNOTATE("GC Thing"); + +struct RootedCell { RootedCell(Cell*) {} } ANNOTATE("Rooted Pointer"); + +class AutoSuppressGC_Base { + public: + AutoSuppressGC_Base() {} + ~AutoSuppressGC_Base() {} +} ANNOTATE("Suppress GC"); + +class AutoSuppressGC_Child : public AutoSuppressGC_Base { + public: + AutoSuppressGC_Child() : AutoSuppressGC_Base() {} +}; + +class AutoSuppressGC { + AutoSuppressGC_Child helpImBeingSuppressed; + + public: + AutoSuppressGC() {} +}; + +extern void GC() ANNOTATE("GC Call"); +extern void invisible(); + +void GC() +{ + // If the implementation is too trivial, the function body won't be emitted at all. + asm(""); + invisible(); +} + +extern void usecell(Cell*); + +void suppressedFunction() { + GC(); // Calls GC, but is always called within AutoSuppressGC +} + +void halfSuppressedFunction() { + GC(); // Calls GC, but is sometimes called within AutoSuppressGC +} + +void unsuppressedFunction() { + GC(); // Calls GC, never within AutoSuppressGC +} + +volatile static int x = 3; +volatile static int* xp = &x; +struct GCInDestructor { + ~GCInDestructor() { + invisible(); + asm(""); + *xp = 4; + GC(); + } +}; + +Cell* +f() +{ + GCInDestructor kaboom; + + Cell cell; + Cell* cell1 = &cell; + Cell* cell2 = &cell; + Cell* cell3 = &cell; + Cell* cell4 = &cell; + { + AutoSuppressGC nogc; + suppressedFunction(); + halfSuppressedFunction(); + } + usecell(cell1); + halfSuppressedFunction(); + usecell(cell2); + unsuppressedFunction(); + { + // Old bug: it would look from the first AutoSuppressGC constructor it + // found to the last destructor. This statement *should* have no effect. + AutoSuppressGC nogc; + } + usecell(cell3); + Cell* cell5 = &cell; + usecell(cell5); + + // Hazard in return value due to ~GCInDestructor + Cell* cell6 = &cell; + return cell6; +} + +Cell* copy_and_gc(Cell* src) +{ + GC(); + return reinterpret_cast<Cell*>(88); +} + +void use(Cell* cell) +{ + static int x = 0; + if (cell) + x++; +} + +struct CellContainer { + Cell* cell; + CellContainer() { + asm(""); + } +}; + +void loopy() +{ + Cell cell; + + // No hazard: haz1 is not live during call to copy_and_gc. + Cell* haz1; + for (int i = 0; i < 10; i++) { + haz1 = copy_and_gc(haz1); + } + + // No hazard: haz2 is live up to just before the GC, and starting at the + // next statement after it, but not across the GC. + Cell* haz2 = &cell; + for (int j = 0; j < 10; j++) { + use(haz2); + GC(); + haz2 = &cell; + } + + // Hazard: haz3 is live from the final statement in one iteration, across + // the GC in the next, to the use in the 2nd statement. + Cell* haz3; + for (int k = 0; k < 10; k++) { + GC(); + use(haz3); + haz3 = &cell; + } + + // Hazard: haz4 is live across a GC hidden in a loop. + Cell* haz4 = &cell; + for (int i2 = 0; i2 < 10; i2++) { + GC(); + } + use(haz4); + + // Hazard: haz5 is live from within a loop across a GC. + Cell* haz5; + for (int i3 = 0; i3 < 10; i3++) { + haz5 = &cell; + } + GC(); + use(haz5); + + // No hazard: similar to the haz3 case, but verifying that we do not get + // into an infinite loop. + Cell* haz6; + for (int i4 = 0; i4 < 10; i4++) { + GC(); + haz6 = &cell; + } + + // No hazard: haz7 is constructed within the body, so it can't make a + // hazard across iterations. Note that this requires CellContainer to have + // a constructor, because otherwise the analysis doesn't see where + // variables are declared. (With the constructor, it knows that + // construction of haz7 obliterates any previous value it might have had. + // Not that that's possible given its scope, but the analysis doesn't get + // that information.) + for (int i5 = 0; i5 < 10; i5++) { + GC(); + CellContainer haz7; + use(haz7.cell); + haz7.cell = &cell; + } + + // Hazard: make sure we *can* see hazards across iterations involving + // CellContainer; + CellContainer haz8; + for (int i6 = 0; i6 < 10; i6++) { + GC(); + use(haz8.cell); + haz8.cell = &cell; + } +} diff --git a/js/src/devtools/rootAnalysis/t/hazards/test.py b/js/src/devtools/rootAnalysis/t/hazards/test.py new file mode 100644 index 000000000..3eb08aa09 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/hazards/test.py @@ -0,0 +1,47 @@ +test.compile("source.cpp") +test.run_analysis_script('gcTypes') + +# gcFunctions should be the inverse, but we get to rely on unmangled names here. +gcFunctions = test.load_gcFunctions() +print(gcFunctions) +assert('void GC()' in gcFunctions) +assert('void suppressedFunction()' not in gcFunctions) +assert('void halfSuppressedFunction()' in gcFunctions) +assert('void unsuppressedFunction()' in gcFunctions) +assert('Cell* f()' in gcFunctions) + +hazards = test.load_hazards() +hazmap = {haz.variable: haz for haz in hazards} +assert('cell1' not in hazmap) +assert('cell2' in hazmap) +assert('cell3' in hazmap) +assert('cell4' not in hazmap) +assert('cell5' not in hazmap) +assert('cell6' not in hazmap) +assert('<returnvalue>' in hazmap) + +# All hazards should be in f() and loopy() +assert(hazmap['cell2'].function == 'Cell* f()') +print(len(set(haz.function for haz in hazards))) +assert(len(set(haz.function for haz in hazards)) == 2) + +# Check that the correct GC call is reported for each hazard. (cell3 has a +# hazard from two different GC calls; it doesn't really matter which is +# reported.) +assert(hazmap['cell2'].GCFunction == 'void halfSuppressedFunction()') +assert(hazmap['cell3'].GCFunction in ('void halfSuppressedFunction()', 'void unsuppressedFunction()')) +assert(hazmap['<returnvalue>'].GCFunction == 'void GCInDestructor::~GCInDestructor()') + +# Type names are handy to have in the report. +assert(hazmap['cell2'].type == 'Cell*') +assert(hazmap['<returnvalue>'].type == 'Cell*') + +# loopy hazards. See comments in source. +assert('haz1' not in hazmap); +assert('haz2' not in hazmap); +assert('haz3' in hazmap); +assert('haz4' in hazmap); +assert('haz5' in hazmap); +assert('haz6' not in hazmap); +assert('haz7' not in hazmap); +assert('haz8' in hazmap); diff --git a/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp b/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp new file mode 100644 index 000000000..2de9ef4bb --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/sixgill-tree/source.cpp @@ -0,0 +1,70 @@ +#define ANNOTATE(property) __attribute__((tag(property))) + +namespace js { +namespace gc { +struct Cell { int f; } ANNOTATE("GC Thing"); +} +} + +struct Bogon { +}; + +struct JustACell : public js::gc::Cell { + bool iHaveNoDataMembers() { return true; } +}; + +struct JSObject : public js::gc::Cell, public Bogon { + int g; +}; + +struct SpecialObject : public JSObject { + int z; +}; + +struct ErrorResult { + bool hasObj; + JSObject *obj; + void trace() {} +} ANNOTATE("Suppressed GC Pointer"); + +struct OkContainer { + ErrorResult res; + bool happy; +}; + +struct UnrootedPointer { + JSObject *obj; +}; + +template <typename T> +class Rooted { + T data; +} ANNOTATE("Rooted Pointer"); + +extern void js_GC() ANNOTATE("GC Call") ANNOTATE("Slow"); + +void js_GC() {} + +void root_arg(JSObject *obj, JSObject *random) +{ + // Use all these types so they get included in the output. + SpecialObject so; + UnrootedPointer up; + Bogon b; + OkContainer okc; + Rooted<JSObject*> ro; + Rooted<SpecialObject*> rso; + + obj = random; + + JSObject *other1 = obj; + js_GC(); + + float MARKER1 = 0; + JSObject *other2 = obj; + other1->f = 1; + other2->f = -1; + + unsigned int u1 = 1; + unsigned int u2 = -1; +} diff --git a/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py b/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py new file mode 100644 index 000000000..c0c0263cd --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/sixgill-tree/test.py @@ -0,0 +1,60 @@ +import re + +test.compile("source.cpp") +test.computeGCTypes() +body = test.process_body(test.load_db_entry("src_body", re.compile(r'root_arg'))[0]) + +# Rendering positive and negative integers +marker1 = body.assignment_line('MARKER1') +equal(body.edge_from_line(marker1 + 2)['Exp'][1]['String'], '1') +equal(body.edge_from_line(marker1 + 3)['Exp'][1]['String'], '-1') + +equal(body.edge_from_point(body.assignment_point('u1'))['Exp'][1]['String'], '1') +equal(body.edge_from_point(body.assignment_point('u2'))['Exp'][1]['String'], '4294967295') + +assert('obj' in body['Variables']) +assert('random' in body['Variables']) +assert('other1' in body['Variables']) +assert('other2' in body['Variables']) + +# Test function annotations +js_GC = test.process_body(test.load_db_entry("src_body", re.compile(r'js_GC'))[0]) +annotations = js_GC['Variables']['void js_GC()']['Annotation'] +assert(annotations) +found_call_tag = False +for annotation in annotations: + (annType, value) = annotation['Name'] + if annType == 'Tag' and value == 'GC Call': + found_call_tag = True +assert(found_call_tag) + +# Test type annotations + +# js::gc::Cell first +cell = test.load_db_entry("src_comp", 'js::gc::Cell')[0] +assert(cell['Kind'] == 'Struct') +annotations = cell['Annotation'] +assert(len(annotations) == 1) +(tag, value) = annotations[0]['Name'] +assert(tag == 'Tag') +assert(value == 'GC Thing') + +# Check JSObject inheritance. +JSObject = test.load_db_entry("src_comp", 'JSObject')[0] +bases = [ b['Base'] for b in JSObject['CSUBaseClass'] ] +assert('js::gc::Cell' in bases) +assert('Bogon' in bases) +assert(len(bases) == 2) + +# Check type analysis +gctypes = test.load_gcTypes() +assert('js::gc::Cell' in gctypes['GCThings']) +assert('JustACell' in gctypes['GCThings']) +assert('JSObject' in gctypes['GCThings']) +assert('SpecialObject' in gctypes['GCThings']) +assert('UnrootedPointer' in gctypes['GCPointers']) +assert('Bogon' not in gctypes['GCThings']) +assert('Bogon' not in gctypes['GCPointers']) +assert('ErrorResult' not in gctypes['GCPointers']) +assert('OkContainer' not in gctypes['GCPointers']) +assert('class Rooted<JSObject*>' not in gctypes['GCPointers']) diff --git a/js/src/devtools/rootAnalysis/t/sixgill.py b/js/src/devtools/rootAnalysis/t/sixgill.py new file mode 100644 index 000000000..2bdf76a49 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/sixgill.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from collections import defaultdict + +# Simplified version of the body info. +class Body(dict): + def __init__(self, body): + self['BlockIdKind'] = body['BlockId']['Kind'] + if 'Variable' in body['BlockId']: + self['BlockName'] = body['BlockId']['Variable']['Name'][0].split("$")[-1] + loc = body['Location'] + self['LineRange'] = (loc[0]['Line'], loc[1]['Line']) + self['Filename'] = loc[0]['CacheString'] + self['Edges'] = body.get('PEdge', []) + self['Points'] = { i: p['Location']['Line'] for i, p in enumerate(body['PPoint'], 1) } + self['Index'] = body['Index'] + self['Variables'] = { x['Variable']['Name'][0].split("$")[-1]: x['Type'] for x in body['DefineVariable'] } + + # Indexes + self['Line2Points'] = defaultdict(list) + for point, line in self['Points'].items(): + self['Line2Points'][line].append(point) + self['SrcPoint2Edges'] = defaultdict(list) + for edge in self['Edges']: + src, dst = edge['Index'] + self['SrcPoint2Edges'][src].append(edge) + self['Line2Edges'] = defaultdict(list) + for (src, edges) in self['SrcPoint2Edges'].items(): + line = self['Points'][src] + self['Line2Edges'][line].extend(edges) + + def edges_from_line(self, line): + return self['Line2Edges'][line] + + def edge_from_line(self, line): + edges = self.edges_from_line(line) + assert(len(edges) == 1) + return edges[0] + + def edges_from_point(self, point): + return self['SrcPoint2Edges'][point] + + def edge_from_point(self, point): + edges = self.edges_from_point(point) + assert(len(edges) == 1) + return edges[0] + + def assignment_point(self, varname): + for edge in self['Edges']: + if edge['Kind'] != 'Assign': + continue + dst = edge['Exp'][0] + if dst['Kind'] != 'Var': + continue + if dst['Variable']['Name'][0] == varname: + return edge['Index'][0] + raise Exception("assignment to variable %s not found" % varname) + + def assignment_line(self, varname): + return self['Points'][self.assignment_point(varname)] diff --git a/js/src/devtools/rootAnalysis/t/suppression/source.cpp b/js/src/devtools/rootAnalysis/t/suppression/source.cpp new file mode 100644 index 000000000..e7b41b4cb --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/suppression/source.cpp @@ -0,0 +1,64 @@ +#define ANNOTATE(property) __attribute__((tag(property))) + +struct Cell { int f; } ANNOTATE("GC Thing"); + +class AutoSuppressGC_Base { + public: + AutoSuppressGC_Base() {} + ~AutoSuppressGC_Base() {} +} ANNOTATE("Suppress GC"); + +class AutoSuppressGC_Child : public AutoSuppressGC_Base { + public: + AutoSuppressGC_Child() : AutoSuppressGC_Base() {} +}; + +class AutoSuppressGC { + AutoSuppressGC_Child helpImBeingSuppressed; + + public: + AutoSuppressGC() {} +}; + +extern void GC() ANNOTATE("GC Call"); + +void GC() +{ + // If the implementation is too trivial, the function body won't be emitted at all. + asm(""); +} + +extern void foo(Cell*); + +void suppressedFunction() { + GC(); // Calls GC, but is always called within AutoSuppressGC +} + +void halfSuppressedFunction() { + GC(); // Calls GC, but is sometimes called within AutoSuppressGC +} + +void unsuppressedFunction() { + GC(); // Calls GC, never within AutoSuppressGC +} + +void f() { + Cell* cell1 = nullptr; + Cell* cell2 = nullptr; + Cell* cell3 = nullptr; + { + AutoSuppressGC nogc; + suppressedFunction(); + halfSuppressedFunction(); + } + foo(cell1); + halfSuppressedFunction(); + foo(cell2); + unsuppressedFunction(); + { + // Old bug: it would look from the first AutoSuppressGC constructor it + // found to the last destructor. This statement *should* have no effect. + AutoSuppressGC nogc; + } + foo(cell3); +} diff --git a/js/src/devtools/rootAnalysis/t/suppression/test.py b/js/src/devtools/rootAnalysis/t/suppression/test.py new file mode 100644 index 000000000..65974cc33 --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/suppression/test.py @@ -0,0 +1,23 @@ +test.compile("source.cpp") +test.run_analysis_script('gcTypes', upto='gcFunctions') + +# The suppressions file uses only mangled names since it's for internal use, +# though I may change that soon given (1) the unfortunate non-uniqueness of +# mangled constructor names, and (2) the usefulness of this file for +# mrgiggles's reporting. +suppressed = test.load_suppressed_functions() + +# Only one of these is fully suppressed (ie, *always* called within the scope +# of an AutoSuppressGC). +assert(len(filter(lambda f: 'suppressedFunction' in f, suppressed)) == 1) +assert(len(filter(lambda f: 'halfSuppressedFunction' in f, suppressed)) == 0) +assert(len(filter(lambda f: 'unsuppressedFunction' in f, suppressed)) == 0) + +# gcFunctions should be the inverse, but we get to rely on unmangled names here. +gcFunctions = test.load_gcFunctions() +print(gcFunctions) +assert('void GC()' in gcFunctions) +assert('void suppressedFunction()' not in gcFunctions) +assert('void halfSuppressedFunction()' in gcFunctions) +assert('void unsuppressedFunction()' in gcFunctions) +assert('void f()' in gcFunctions) diff --git a/js/src/devtools/rootAnalysis/t/testlib.py b/js/src/devtools/rootAnalysis/t/testlib.py new file mode 100644 index 000000000..438398f1e --- /dev/null +++ b/js/src/devtools/rootAnalysis/t/testlib.py @@ -0,0 +1,120 @@ +import json +import os +import re +import subprocess + +from sixgill import Body +from collections import defaultdict, namedtuple + +scriptdir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) + +HazardSummary = namedtuple('HazardSummary', ['function', 'variable', 'type', 'GCFunction', 'location']) + + +def equal(got, expected): + if got != expected: + print("Got '%s', expected '%s'" % (got, expected)) + +def extract_unmangled(func): + return func.split('$')[-1] + + +class Test(object): + def __init__(self, indir, outdir, cfg): + self.indir = indir + self.outdir = outdir + self.cfg = cfg + + def infile(self, path): + return os.path.join(self.indir, path) + + def binpath(self, prog): + return os.path.join(self.cfg.sixgill_bin, prog) + + def compile(self, source, options = ''): + cmd = "{CXX} -c {source} -O3 -std=c++11 -fplugin={sixgill} -fplugin-arg-xgill-mangle=1 {options}".format( + source=self.infile(source), + CXX=self.cfg.cxx, sixgill=self.cfg.sixgill_plugin, + options=options) + if self.cfg.verbose: + print("Running %s" % cmd) + subprocess.check_call(["sh", "-c", cmd]) + + def load_db_entry(self, dbname, pattern): + '''Look up an entry from an XDB database file, 'pattern' may be an exact + matching string, or an re pattern object matching a single entry.''' + + if not isinstance(pattern, basestring): + output = subprocess.check_output([self.binpath("xdbkeys"), dbname + ".xdb"]) + matches = filter(lambda _: re.search(pattern, _), output.splitlines()) + if len(matches) == 0: + raise Exception("entry not found") + if len(matches) > 1: + raise Exception("multiple entries found") + pattern = matches[0] + + output = subprocess.check_output([self.binpath("xdbfind"), "-json", dbname + ".xdb", pattern]) + return json.loads(output) + + def run_analysis_script(self, phase, upto=None): + file("defaults.py", "w").write('''\ +analysis_scriptdir = '{scriptdir}' +sixgill_bin = '{bindir}' +'''.format(scriptdir=scriptdir, bindir=self.cfg.sixgill_bin)) + cmd = [os.path.join(scriptdir, "analyze.py"), phase] + if upto: + cmd += ["--upto", upto] + cmd.append("--source=%s" % self.indir) + cmd.append("--objdir=%s" % self.outdir) + cmd.append("--js=%s" % self.cfg.js) + if self.cfg.verbose: + cmd.append("--verbose") + print("Running " + " ".join(cmd)) + subprocess.check_call(cmd) + + def computeGCTypes(self): + self.run_analysis_script("gcTypes", upto="gcTypes") + + def computeHazards(self): + self.run_analysis_script("gcTypes") + + def load_text_file(self, filename, extract=lambda l: l): + fullpath = os.path.join(self.outdir, filename) + values = (extract(line.strip()) for line in file(fullpath)) + return filter(lambda _: _ is not None, values) + + def load_suppressed_functions(self): + return set(self.load_text_file("suppressedFunctions.lst")) + + def load_gcTypes(self): + def grab_type(line): + m = re.match(r'^(GC\w+): (.*)', line) + if m: + return (m.group(1) + 's', m.group(2)) + return None + + gctypes = defaultdict(list) + for collection, typename in self.load_text_file('gcTypes.txt', extract=grab_type): + gctypes[collection].append(typename) + return gctypes + + def load_gcFunctions(self): + return self.load_text_file('gcFunctions.lst', extract=extract_unmangled) + + def load_hazards(self): + def grab_hazard(line): + m = re.match(r"Function '(.*?)' has unrooted '(.*?)' of type '(.*?)' live across GC call '(.*?)' at (.*)", line) + if m: + info = list(m.groups()) + info[0] = info[0].split("$")[-1] + info[3] = info[3].split("$")[-1] + return HazardSummary(*info) + return None + + return self.load_text_file('rootingHazards.txt', extract=grab_hazard) + + def process_body(self, body): + return Body(body) + + def process_bodies(self, bodies): + return [self.process_body(b) for b in bodies] diff --git a/js/src/devtools/rootAnalysis/utility.js b/js/src/devtools/rootAnalysis/utility.js new file mode 100644 index 000000000..06c18804f --- /dev/null +++ b/js/src/devtools/rootAnalysis/utility.js @@ -0,0 +1,211 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ + +"use strict"; + +// gcc appends this to mangled function names for "not in charge" +// constructors/destructors. +var internalMarker = " *INTERNAL* "; + +if (! Set.prototype.hasOwnProperty("update")) { + Object.defineProperty(Set.prototype, "update", { + value: function (collection) { + for (let elt of collection) + this.add(elt); + } + }); +} + +function assert(x, msg) +{ + if (x) + return; + debugger; + if (msg) + throw "assertion failed: " + msg + "\n" + (Error().stack); + else + throw "assertion failed: " + (Error().stack); +} + +function defined(x) { + return x !== undefined; +} + +function xprint(x, padding) +{ + if (!padding) + padding = ""; + if (x instanceof Array) { + print(padding + "["); + for (var elem of x) + xprint(elem, padding + " "); + print(padding + "]"); + } else if (x instanceof Object) { + print(padding + "{"); + for (var prop in x) { + print(padding + " " + prop + ":"); + xprint(x[prop], padding + " "); + } + print(padding + "}"); + } else { + print(padding + x); + } +} + +function sameBlockId(id0, id1) +{ + if (id0.Kind != id1.Kind) + return false; + if (!sameVariable(id0.Variable, id1.Variable)) + return false; + if (id0.Kind == "Loop" && id0.Loop != id1.Loop) + return false; + return true; +} + +function sameVariable(var0, var1) +{ + assert("Name" in var0 || var0.Kind == "This" || var0.Kind == "Return"); + assert("Name" in var1 || var1.Kind == "This" || var1.Kind == "Return"); + if ("Name" in var0) + return "Name" in var1 && var0.Name[0] == var1.Name[0]; + return var0.Kind == var1.Kind; +} + +function blockIdentifier(body) +{ + if (body.BlockId.Kind == "Loop") + return body.BlockId.Loop; + assert(body.BlockId.Kind == "Function", "body.Kind should be Function, not " + body.BlockId.Kind); + return body.BlockId.Variable.Name[0]; +} + +function collectBodyEdges(body) +{ + body.predecessors = []; + body.successors = []; + if (!("PEdge" in body)) + return; + + for (var edge of body.PEdge) { + var [ source, target ] = edge.Index; + if (!(target in body.predecessors)) + body.predecessors[target] = []; + body.predecessors[target].push(edge); + if (!(source in body.successors)) + body.successors[source] = []; + body.successors[source].push(edge); + } +} + +function getPredecessors(body) +{ + try { + if (!('predecessors' in body)) + collectBodyEdges(body); + } catch (e) { + debugger; + printErr("body is " + body); + } + return body.predecessors; +} + +function getSuccessors(body) +{ + if (!('successors' in body)) + collectBodyEdges(body); + return body.successors; +} + +// Split apart a function from sixgill into its mangled and unmangled name. If +// no mangled name was given, use the unmangled name as its mangled name +function splitFunction(func) +{ + var split = func.indexOf("$"); + if (split != -1) + return [ func.substr(0, split), func.substr(split+1) ]; + split = func.indexOf("|"); + if (split != -1) + return [ func.substr(0, split), func.substr(split+1) ]; + return [ func, func ]; +} + +function mangled(fullname) +{ + var [ mangled, unmangled ] = splitFunction(fullname); + return mangled; +} + +function readable(fullname) +{ + var [ mangled, unmangled ] = splitFunction(fullname); + return unmangled; +} + +function xdbLibrary() +{ + var lib = ctypes.open(os.getenv('XDB')); + var api = { + open: lib.declare("xdb_open", ctypes.default_abi, ctypes.void_t, ctypes.char.ptr), + min_data_stream: lib.declare("xdb_min_data_stream", ctypes.default_abi, ctypes.int), + max_data_stream: lib.declare("xdb_max_data_stream", ctypes.default_abi, ctypes.int), + read_key: lib.declare("xdb_read_key", ctypes.default_abi, ctypes.char.ptr, ctypes.int), + read_entry: lib.declare("xdb_read_entry", ctypes.default_abi, ctypes.char.ptr, ctypes.char.ptr), + free_string: lib.declare("xdb_free", ctypes.default_abi, ctypes.void_t, ctypes.char.ptr) + }; + try { + api.lookup_key = lib.declare("xdb_lookup_key", ctypes.default_abi, ctypes.int, ctypes.char.ptr); + } catch (e) { + // lookup_key is for development use only and is not strictly necessary. + } + return api; +} + +function cLibrary() +{ + var lib; + try { + lib = ctypes.open("libc.so.6"); + } catch(e) { + lib = ctypes.open("libc.so"); + } + + return { + fopen: lib.declare("fopen", ctypes.default_abi, ctypes.void_t.ptr, ctypes.char.ptr, ctypes.char.ptr), + getline: lib.declare("getline", ctypes.default_abi, ctypes.ssize_t, ctypes.char.ptr.ptr, ctypes.size_t.ptr, ctypes.void_t.ptr), + fclose: lib.declare("fclose", ctypes.default_abi, ctypes.int, ctypes.void_t.ptr), + free: lib.declare("free", ctypes.default_abi, ctypes.void_t, ctypes.void_t.ptr), + }; +} + +function* readFileLines_gen(filename) +{ + var libc = cLibrary(); + var linebuf = ctypes.char.ptr(); + var bufsize = ctypes.size_t(0); + var fp = libc.fopen(filename, "r"); + if (fp.isNull()) + throw "Unable to open '" + filename + "'" + + while (libc.getline(linebuf.address(), bufsize.address(), fp) > 0) + yield linebuf.readString(); + libc.fclose(fp); + libc.free(ctypes.void_t.ptr(linebuf)); +} + +function addToKeyedList(collection, key, entry) +{ + if (!(key in collection)) + collection[key] = []; + collection[key].push(entry); +} + +function loadTypeInfo(filename) +{ + var info = {}; + for (var line of readFileLines_gen(filename)) { + line = line.replace(/\n/, ""); + let [property, name] = line.split("$$"); + addToKeyedList(info, property, name); + } + return info; +} |