diff options
Diffstat (limited to 'memory/replace/dmd/dmd.py')
-rwxr-xr-x | memory/replace/dmd/dmd.py | 890 |
1 files changed, 890 insertions, 0 deletions
diff --git a/memory/replace/dmd/dmd.py b/memory/replace/dmd/dmd.py new file mode 100755 index 000000000..d6b09f756 --- /dev/null +++ b/memory/replace/dmd/dmd.py @@ -0,0 +1,890 @@ +#! /usr/bin/env python +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''This script analyzes a JSON file emitted by DMD.''' + +from __future__ import print_function, division + +import argparse +import collections +import gzip +import json +import os +import platform +import re +import shutil +import sys +import tempfile +from bisect import bisect_right + +# The DMD output version this script handles. +outputVersion = 5 + +# If --ignore-alloc-fns is specified, stack frames containing functions that +# match these strings will be removed from the *start* of stack traces. (Once +# we hit a non-matching frame, any subsequent frames won't be removed even if +# they do match.) +allocatorFns = [ + # Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc, pod_malloc, malloc_zone_*, g_malloc. + 'malloc', + # Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc, pod_calloc, malloc_zone_calloc, pod_callocCanGC. + 'calloc', + # Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc, pod_realloc, pod_reallocCanGC. + 'realloc', + # Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign, moz_xmemalign, moz_xposix_memalign, vpx_memalign, malloc_zone_memalign. + 'memalign', + 'operator new(', + 'operator new[](', + 'g_slice_alloc', + # This one necessary to fully filter some sequences of allocation functions + # that happen in practice. Note that ??? entries that follow non-allocation + # functions won't be stripped, as explained above. + '???', +] + +class Record(object): + '''A record is an aggregation of heap blocks that have identical stack + traces. It can also be used to represent the difference between two + records.''' + + def __init__(self): + self.numBlocks = 0 + self.reqSize = 0 + self.slopSize = 0 + self.usableSize = 0 + self.allocatedAtDesc = None + self.reportedAtDescs = [] + self.usableSizes = collections.defaultdict(int) + + def isZero(self, args): + return self.numBlocks == 0 and \ + self.reqSize == 0 and \ + self.slopSize == 0 and \ + self.usableSize == 0 and \ + len(self.usableSizes) == 0 + + def negate(self): + self.numBlocks = -self.numBlocks + self.reqSize = -self.reqSize + self.slopSize = -self.slopSize + self.usableSize = -self.usableSize + + negatedUsableSizes = collections.defaultdict(int) + for usableSize, count in self.usableSizes.items(): + negatedUsableSizes[-usableSize] = count + self.usableSizes = negatedUsableSizes + + def subtract(self, r): + # We should only be calling this on records with matching stack traces. + # Check this. + assert self.allocatedAtDesc == r.allocatedAtDesc + assert self.reportedAtDescs == r.reportedAtDescs + + self.numBlocks -= r.numBlocks + self.reqSize -= r.reqSize + self.slopSize -= r.slopSize + self.usableSize -= r.usableSize + + usableSizes1 = self.usableSizes + usableSizes2 = r.usableSizes + usableSizes3 = collections.defaultdict(int) + for usableSize in usableSizes1: + counts1 = usableSizes1[usableSize] + if usableSize in usableSizes2: + counts2 = usableSizes2[usableSize] + del usableSizes2[usableSize] + counts3 = counts1 - counts2 + if counts3 != 0: + if counts3 < 0: + usableSize = -usableSize + counts3 = -counts3 + usableSizes3[usableSize] = counts3 + else: + usableSizes3[usableSize] = counts1 + + for usableSize in usableSizes2: + usableSizes3[-usableSize] = usableSizes2[usableSize] + + self.usableSizes = usableSizes3 + + @staticmethod + def cmpByUsableSize(r1, r2): + # Sort by usable size, then by req size. + return cmp(abs(r1.usableSize), abs(r2.usableSize)) or \ + Record.cmpByReqSize(r1, r2) + + @staticmethod + def cmpByReqSize(r1, r2): + # Sort by req size. + return cmp(abs(r1.reqSize), abs(r2.reqSize)) + + @staticmethod + def cmpBySlopSize(r1, r2): + # Sort by slop size. + return cmp(abs(r1.slopSize), abs(r2.slopSize)) + + @staticmethod + def cmpByNumBlocks(r1, r2): + # Sort by block counts, then by usable size. + return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or \ + Record.cmpByUsableSize(r1, r2) + + +sortByChoices = { + 'usable': Record.cmpByUsableSize, # the default + 'req': Record.cmpByReqSize, + 'slop': Record.cmpBySlopSize, + 'num-blocks': Record.cmpByNumBlocks, +} + + +def parseCommandLine(): + # 24 is the maximum number of frames that DMD will produce. + def range_1_24(string): + value = int(string) + if value < 1 or value > 24: + msg = '{:s} is not in the range 1..24'.format(string) + raise argparse.ArgumentTypeError(msg) + return value + + description = ''' +Analyze heap data produced by DMD. +If one file is specified, analyze it; if two files are specified, analyze the +difference. +Input files can be gzipped. +Write to stdout unless -o/--output is specified. +Stack traces are fixed to show function names, filenames and line numbers +unless --no-fix-stacks is specified; stack fixing modifies the original file +and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment +variable is used to find breakpad symbols for stack fixing. +''' + p = argparse.ArgumentParser(description=description) + + p.add_argument('-o', '--output', type=argparse.FileType('w'), + help='output file; stdout if unspecified') + + p.add_argument('-f', '--max-frames', type=range_1_24, + help='maximum number of frames to consider in each trace') + + p.add_argument('-s', '--sort-by', choices=sortByChoices.keys(), + default='usable', + help='sort the records by a particular metric') + + p.add_argument('-a', '--ignore-alloc-fns', action='store_true', + help='ignore allocation functions at the start of traces') + + p.add_argument('--no-fix-stacks', action='store_true', + help='do not fix stacks') + + p.add_argument('--clamp-contents', action='store_true', + help='for a scan mode log, clamp addresses to the start of live blocks, or zero if not in one') + + p.add_argument('--print-clamp-stats', action='store_true', + help='print information about the results of pointer clamping; mostly useful for debugging clamping') + + p.add_argument('--filter-stacks-for-testing', action='store_true', + help='filter stack traces; only useful for testing purposes') + + p.add_argument('input_file', + help='a file produced by DMD') + + p.add_argument('input_file2', nargs='?', + help='a file produced by DMD; if present, it is diff\'d with input_file') + + return p.parse_args(sys.argv[1:]) + + +# Fix stacks if necessary: first write the output to a tempfile, then replace +# the original file with it. +def fixStackTraces(inputFilename, isZipped, opener): + # This append() call is needed to make the import statements work when this + # script is installed as a symlink. + sys.path.append(os.path.dirname(__file__)) + + bpsyms = os.environ.get('BREAKPAD_SYMBOLS_PATH', None) + sysname = platform.system() + if bpsyms and os.path.exists(bpsyms): + import fix_stack_using_bpsyms as fixModule + fix = lambda line: fixModule.fixSymbols(line, bpsyms) + elif sysname == 'Linux': + import fix_linux_stack as fixModule + fix = lambda line: fixModule.fixSymbols(line) + elif sysname == 'Darwin': + import fix_macosx_stack as fixModule + fix = lambda line: fixModule.fixSymbols(line) + else: + fix = None # there is no fix script for Windows + + if fix: + # Fix stacks, writing output to a temporary file, and then + # overwrite the original file. + tmpFile = tempfile.NamedTemporaryFile(delete=False) + + # If the input is gzipped, then the output (written initially to + # |tmpFile|) should be gzipped as well. + # + # And we want to set its pre-gzipped filename to '' rather than the + # name of the temporary file, so that programs like the Unix 'file' + # utility don't say that it was called 'tmp6ozTxE' (or something like + # that) before it was zipped. So that explains the |filename=''| + # parameter. + # + # But setting the filename like that clobbers |tmpFile.name|, so we + # must get that now in order to move |tmpFile| at the end. + tmpFilename = tmpFile.name + if isZipped: + tmpFile = gzip.GzipFile(filename='', fileobj=tmpFile) + + with opener(inputFilename, 'rb') as inputFile: + for line in inputFile: + tmpFile.write(fix(line)) + + tmpFile.close() + + shutil.move(tmpFilename, inputFilename) + + +def getDigestFromFile(args, inputFile): + # Handle gzipped input if necessary. + isZipped = inputFile.endswith('.gz') + opener = gzip.open if isZipped else open + + # Fix stack traces unless otherwise instructed. + if not args.no_fix_stacks: + fixStackTraces(inputFile, isZipped, opener) + + if args.clamp_contents: + clampBlockList(args, inputFile, isZipped, opener) + + with opener(inputFile, 'rb') as f: + j = json.load(f) + + if j['version'] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + # Extract the main parts of the JSON object. + invocation = j['invocation'] + dmdEnvVar = invocation['dmdEnvVar'] + mode = invocation['mode'] + blockList = j['blockList'] + traceTable = j['traceTable'] + frameTable = j['frameTable'] + + # Insert the necessary entries for unrecorded stack traces. Note that 'ut' + # and 'uf' will not overlap with any keys produced by DMD's + # ToIdStringConverter::Base32() function. + unrecordedTraceID = 'ut' + unrecordedFrameID = 'uf' + traceTable[unrecordedTraceID] = [unrecordedFrameID] + frameTable[unrecordedFrameID] = \ + '#00: (no stack trace recorded due to --stacks=partial)' + + # For the purposes of this script, 'scan' behaves like 'live'. + if mode == 'scan': + mode = 'live' + + if not mode in ['live', 'dark-matter', 'cumulative']: + raise Exception("bad 'mode' property: '{:s}'".format(mode)) + + # Remove allocation functions at the start of traces. + if args.ignore_alloc_fns: + # Build a regexp that matches every function in allocatorFns. + escapedAllocatorFns = map(re.escape, allocatorFns) + fn_re = re.compile('|'.join(escapedAllocatorFns)) + + # Remove allocator fns from each stack trace. + for traceKey, frameKeys in traceTable.items(): + numSkippedFrames = 0 + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if re.search(fn_re, frameDesc): + numSkippedFrames += 1 + else: + break + if numSkippedFrames > 0: + traceTable[traceKey] = frameKeys[numSkippedFrames:] + + # Trim the number of frames. + for traceKey, frameKeys in traceTable.items(): + if len(frameKeys) > args.max_frames: + traceTable[traceKey] = frameKeys[:args.max_frames] + + def buildTraceDescription(traceTable, frameTable, traceKey): + frameKeys = traceTable[traceKey] + fmt = ' #{:02d}{:}' + + if args.filter_stacks_for_testing: + # When running SmokeDMD.cpp, every stack trace should contain at + # least one frame that contains 'DMD.cpp', from either |DMD.cpp| or + # |SmokeDMD.cpp|. (Or 'dmd.cpp' on Windows.) If we see such a + # frame, we replace the entire stack trace with a single, + # predictable frame. There is too much variation in the stack + # traces across different machines and platforms to do more precise + # matching, but this level of matching will result in failure if + # stack fixing fails completely. + for frameKey in frameKeys: + frameDesc = frameTable[frameKey] + if 'DMD.cpp' in frameDesc or 'dmd.cpp' in frameDesc: + return [fmt.format(1, ': ... DMD.cpp ...')] + + # The frame number is always '#00' (see DMD.h for why), so we have to + # replace that with the correct frame number. + desc = [] + for n, frameKey in enumerate(traceTable[traceKey], start=1): + desc.append(fmt.format(n, frameTable[frameKey][3:])) + return desc + + # Aggregate blocks into records. All sufficiently similar blocks go into a + # single record. + + if mode in ['live', 'cumulative']: + liveOrCumulativeRecords = collections.defaultdict(Record) + elif mode == 'dark-matter': + unreportedRecords = collections.defaultdict(Record) + onceReportedRecords = collections.defaultdict(Record) + twiceReportedRecords = collections.defaultdict(Record) + + heapUsableSize = 0 + heapBlocks = 0 + + recordKeyPartCache = {} + + for block in blockList: + # For each block we compute a |recordKey|, and all blocks with the same + # |recordKey| are aggregated into a single record. The |recordKey| is + # derived from the block's 'alloc' and 'reps' (if present) stack + # traces. + # + # We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing + # traces for equality. We can't use trace keys or frame keys because + # they're not comparable across different DMD runs (which is relevant + # when doing diffs). + # + # Using frame descriptions also fits in with the stack trimming done + # for --max-frames, which requires that stack traces with common + # beginnings but different endings to be considered equivalent. E.g. if + # we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4] + # and we trim the final frame of each they should be considered + # equivalent because the untrimmed frame descriptions (D1 and D2) + # match. + # + # Having said all that, during a single invocation of dmd.py on a + # single DMD file, for a single frameKey value the record key will + # always be the same, and we might encounter it 1000s of times. So we + # cache prior results for speed. + def makeRecordKeyPart(traceKey): + if traceKey in recordKeyPartCache: + return recordKeyPartCache[traceKey] + + recordKeyPart = str(map(lambda frameKey: frameTable[frameKey], + traceTable[traceKey])) + recordKeyPartCache[traceKey] = recordKeyPart + return recordKeyPart + + allocatedAtTraceKey = block.get('alloc', unrecordedTraceID) + if mode in ['live', 'cumulative']: + recordKey = makeRecordKeyPart(allocatedAtTraceKey) + records = liveOrCumulativeRecords + elif mode == 'dark-matter': + recordKey = makeRecordKeyPart(allocatedAtTraceKey) + if 'reps' in block: + reportedAtTraceKeys = block['reps'] + for reportedAtTraceKey in reportedAtTraceKeys: + recordKey += makeRecordKeyPart(reportedAtTraceKey) + if len(reportedAtTraceKeys) == 1: + records = onceReportedRecords + else: + records = twiceReportedRecords + else: + records = unreportedRecords + + record = records[recordKey] + + if 'req' not in block: + raise Exception("'req' property missing in block'") + + reqSize = block['req'] + slopSize = block.get('slop', 0) + + if 'num' in block: + num = block['num'] + else: + num = 1 + + usableSize = reqSize + slopSize + heapUsableSize += num * usableSize + heapBlocks += num + + record.numBlocks += num + record.reqSize += num * reqSize + record.slopSize += num * slopSize + record.usableSize += num * usableSize + if record.allocatedAtDesc == None: + record.allocatedAtDesc = \ + buildTraceDescription(traceTable, frameTable, + allocatedAtTraceKey) + + if mode in ['live', 'cumulative']: + pass + elif mode == 'dark-matter': + if 'reps' in block and record.reportedAtDescs == []: + f = lambda k: buildTraceDescription(traceTable, frameTable, k) + record.reportedAtDescs = map(f, reportedAtTraceKeys) + record.usableSizes[usableSize] += num + + # All the processed data for a single DMD file is called a "digest". + digest = {} + digest['dmdEnvVar'] = dmdEnvVar + digest['mode'] = mode + digest['heapUsableSize'] = heapUsableSize + digest['heapBlocks'] = heapBlocks + if mode in ['live', 'cumulative']: + digest['liveOrCumulativeRecords'] = liveOrCumulativeRecords + elif mode == 'dark-matter': + digest['unreportedRecords'] = unreportedRecords + digest['onceReportedRecords'] = onceReportedRecords + digest['twiceReportedRecords'] = twiceReportedRecords + return digest + + +def diffRecords(args, records1, records2): + records3 = {} + + # Process records1. + for k in records1: + r1 = records1[k] + if k in records2: + # This record is present in both records1 and records2. + r2 = records2[k] + del records2[k] + r2.subtract(r1) + if not r2.isZero(args): + records3[k] = r2 + else: + # This record is present only in records1. + r1.negate() + records3[k] = r1 + + for k in records2: + # This record is present only in records2. + records3[k] = records2[k] + + return records3 + + +def diffDigests(args, d1, d2): + if (d1['mode'] != d2['mode']): + raise Exception("the input files have different 'mode' properties") + + d3 = {} + d3['dmdEnvVar'] = (d1['dmdEnvVar'], d2['dmdEnvVar']) + d3['mode'] = d1['mode'] + d3['heapUsableSize'] = d2['heapUsableSize'] - d1['heapUsableSize'] + d3['heapBlocks'] = d2['heapBlocks'] - d1['heapBlocks'] + if d1['mode'] in ['live', 'cumulative']: + d3['liveOrCumulativeRecords'] = \ + diffRecords(args, d1['liveOrCumulativeRecords'], + d2['liveOrCumulativeRecords']) + elif d1['mode'] == 'dark-matter': + d3['unreportedRecords'] = diffRecords(args, d1['unreportedRecords'], + d2['unreportedRecords']) + d3['onceReportedRecords'] = diffRecords(args, d1['onceReportedRecords'], + d2['onceReportedRecords']) + d3['twiceReportedRecords'] = diffRecords(args, d1['twiceReportedRecords'], + d2['twiceReportedRecords']) + return d3 + + +def printDigest(args, digest): + dmdEnvVar = digest['dmdEnvVar'] + mode = digest['mode'] + heapUsableSize = digest['heapUsableSize'] + heapBlocks = digest['heapBlocks'] + if mode in ['live', 'cumulative']: + liveOrCumulativeRecords = digest['liveOrCumulativeRecords'] + elif mode == 'dark-matter': + unreportedRecords = digest['unreportedRecords'] + onceReportedRecords = digest['onceReportedRecords'] + twiceReportedRecords = digest['twiceReportedRecords'] + + separator = '#' + '-' * 65 + '\n' + + def number(n): + '''Format a number with comma as a separator.''' + return '{:,d}'.format(n) + + def perc(m, n): + return 0 if n == 0 else (100 * m / n) + + def plural(n): + return '' if n == 1 else 's' + + # Prints to stdout, or to file if -o/--output was specified. + def out(*arguments, **kwargs): + print(*arguments, file=args.output, **kwargs) + + def printStack(traceDesc): + for frameDesc in traceDesc: + out(frameDesc) + + def printRecords(recordKind, records, heapUsableSize): + RecordKind = recordKind.capitalize() + out(separator) + numRecords = len(records) + cmpRecords = sortByChoices[args.sort_by] + sortedRecords = sorted(records.values(), cmp=cmpRecords, reverse=True) + kindBlocks = 0 + kindUsableSize = 0 + maxRecord = 1000 + + # First iteration: get totals, etc. + for record in sortedRecords: + kindBlocks += record.numBlocks + kindUsableSize += record.usableSize + + # Second iteration: print. + if numRecords == 0: + out('# no {:} heap blocks\n'.format(recordKind)) + + kindCumulativeUsableSize = 0 + for i, record in enumerate(sortedRecords, start=1): + # Stop printing at the |maxRecord|th record. + if i == maxRecord: + out('# {:}: stopping after {:,d} heap block records\n'. + format(RecordKind, i)) + break + + kindCumulativeUsableSize += record.usableSize + + out(RecordKind + ' {') + out(' {:} block{:} in heap block record {:,d} of {:,d}'. + format(number(record.numBlocks), + plural(record.numBlocks), i, numRecords)) + out(' {:} bytes ({:} requested / {:} slop)'. + format(number(record.usableSize), + number(record.reqSize), + number(record.slopSize))) + + abscmp = lambda (usableSize1, _1), (usableSize2, _2): \ + cmp(abs(usableSize1), abs(usableSize2)) + usableSizes = sorted(record.usableSizes.items(), cmp=abscmp, + reverse=True) + + hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1 + + if not hasSingleBlock: + out(' Individual block sizes: ', end='') + if len(usableSizes) == 0: + out('(no change)', end='') + else: + isFirst = True + for usableSize, count in usableSizes: + if not isFirst: + out('; ', end='') + out('{:}'.format(number(usableSize)), end='') + if count > 1: + out(' x {:,d}'.format(count), end='') + isFirst = False + out() + + out(' {:4.2f}% of the heap ({:4.2f}% cumulative)'. + format(perc(record.usableSize, heapUsableSize), + perc(kindCumulativeUsableSize, heapUsableSize))) + if mode in ['live', 'cumulative']: + pass + elif mode == 'dark-matter': + out(' {:4.2f}% of {:} ({:4.2f}% cumulative)'. + format(perc(record.usableSize, kindUsableSize), + recordKind, + perc(kindCumulativeUsableSize, kindUsableSize))) + out(' Allocated at {') + printStack(record.allocatedAtDesc) + out(' }') + if mode in ['live', 'cumulative']: + pass + elif mode == 'dark-matter': + for n, reportedAtDesc in enumerate(record.reportedAtDescs): + again = 'again ' if n > 0 else '' + out(' Reported {:}at {{'.format(again)) + printStack(reportedAtDesc) + out(' }') + out('}\n') + + return (kindUsableSize, kindBlocks) + + + def printInvocation(n, dmdEnvVar, mode): + out('Invocation{:} {{'.format(n)) + if dmdEnvVar == None: + out(' $DMD is undefined') + else: + out(' $DMD = \'' + dmdEnvVar + '\'') + out(' Mode = \'' + mode + '\'') + out('}\n') + + # Print command line. Strip dirs so the output is deterministic, which is + # needed for testing. + out(separator, end='') + out('# ' + ' '.join(map(os.path.basename, sys.argv)) + '\n') + + # Print invocation(s). + if type(dmdEnvVar) is not tuple: + printInvocation('', dmdEnvVar, mode) + else: + printInvocation(' 1', dmdEnvVar[0], mode) + printInvocation(' 2', dmdEnvVar[1], mode) + + # Print records. + if mode in ['live', 'cumulative']: + liveOrCumulativeUsableSize, liveOrCumulativeBlocks = \ + printRecords(mode, liveOrCumulativeRecords, heapUsableSize) + elif mode == 'dark-matter': + twiceReportedUsableSize, twiceReportedBlocks = \ + printRecords('twice-reported', twiceReportedRecords, heapUsableSize) + + unreportedUsableSize, unreportedBlocks = \ + printRecords('unreported', unreportedRecords, heapUsableSize) + + onceReportedUsableSize, onceReportedBlocks = \ + printRecords('once-reported', onceReportedRecords, heapUsableSize) + + # Print summary. + out(separator) + out('Summary {') + if mode in ['live', 'cumulative']: + out(' Total: {:} bytes in {:} blocks'. + format(number(liveOrCumulativeUsableSize), + number(liveOrCumulativeBlocks))) + elif mode == 'dark-matter': + fmt = ' {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)' + out(fmt. + format('Total:', + number(heapUsableSize), + 100, + number(heapBlocks), + 100)) + out(fmt. + format('Unreported:', + number(unreportedUsableSize), + perc(unreportedUsableSize, heapUsableSize), + number(unreportedBlocks), + perc(unreportedBlocks, heapBlocks))) + out(fmt. + format('Once-reported:', + number(onceReportedUsableSize), + perc(onceReportedUsableSize, heapUsableSize), + number(onceReportedBlocks), + perc(onceReportedBlocks, heapBlocks))) + out(fmt. + format('Twice-reported:', + number(twiceReportedUsableSize), + perc(twiceReportedUsableSize, heapUsableSize), + number(twiceReportedBlocks), + perc(twiceReportedBlocks, heapBlocks))) + out('}\n') + + +############################# +# Pretty printer for DMD JSON +############################# + +def prettyPrintDmdJson(out, j): + out.write('{\n') + + out.write(' "version": {0},\n'.format(j['version'])) + out.write(' "invocation": ') + json.dump(j['invocation'], out, sort_keys=True) + out.write(',\n') + + out.write(' "blockList": [') + first = True + for b in j['blockList']: + out.write('' if first else ',') + out.write('\n ') + json.dump(b, out, sort_keys=True) + first = False + out.write('\n ],\n') + + out.write(' "traceTable": {') + first = True + for k, l in j['traceTable'].iteritems(): + out.write('' if first else ',') + out.write('\n "{0}": {1}'.format(k, json.dumps(l))) + first = False + out.write('\n },\n') + + out.write(' "frameTable": {') + first = True + for k, v in j['frameTable'].iteritems(): + out.write('' if first else ',') + out.write('\n "{0}": {1}'.format(k, json.dumps(v))) + first = False + out.write('\n }\n') + + out.write('}\n') + + +################################################################## +# Code for clamping addresses using conservative pointer analysis. +################################################################## + +# Start is the address of the first byte of the block, while end is +# the address of the first byte after the final byte in the block. +class AddrRange: + def __init__(self, block, length): + self.block = block + self.start = int(block, 16) + self.length = length + self.end = self.start + self.length + + assert self.start > 0 + assert length >= 0 + + +class ClampStats: + def __init__(self): + # Number of pointers already pointing to the start of a block. + self.startBlockPtr = 0 + + # Number of pointers pointing to the middle of a block. These + # are clamped to the start of the block they point into. + self.midBlockPtr = 0 + + # Number of null pointers. + self.nullPtr = 0 + + # Number of non-null pointers that didn't point into the middle + # of any blocks. These are clamped to null. + self.nonNullNonBlockPtr = 0 + + + def clampedBlockAddr(self, sameAddress): + if sameAddress: + self.startBlockPtr += 1 + else: + self.midBlockPtr += 1 + + def nullAddr(self): + self.nullPtr += 1 + + def clampedNonBlockAddr(self): + self.nonNullNonBlockPtr += 1 + + def log(self): + sys.stderr.write('Results:\n') + sys.stderr.write(' Number of pointers already pointing to start of blocks: ' + str(self.startBlockPtr) + '\n') + sys.stderr.write(' Number of pointers clamped to start of blocks: ' + str(self.midBlockPtr) + '\n') + sys.stderr.write(' Number of non-null pointers not pointing into blocks clamped to null: ' + str(self.nonNullNonBlockPtr) + '\n') + sys.stderr.write(' Number of null pointers: ' + str(self.nullPtr) + '\n') + + +# Search the block ranges array for a block that address points into. +# The search is carried out in an array of starting addresses for each blocks +# because it is faster. +def clampAddress(blockRanges, blockStarts, clampStats, address): + i = bisect_right(blockStarts, address) + + # Any addresses completely out of the range should have been eliminated already. + assert i > 0 + r = blockRanges[i - 1] + assert r.start <= address + + if address >= r.end: + assert address < blockRanges[i].start + clampStats.clampedNonBlockAddr() + return '0' + + clampStats.clampedBlockAddr(r.start == address) + return r.block + + +def clampBlockList(args, inputFileName, isZipped, opener): + # XXX This isn't very efficient because we end up reading and writing + # the file multiple times. + with opener(inputFileName, 'rb') as f: + j = json.load(f) + + if j['version'] != outputVersion: + raise Exception("'version' property isn't '{:d}'".format(outputVersion)) + + # Check that the invocation is reasonable for contents clamping. + invocation = j['invocation'] + if invocation['mode'] != 'scan': + raise Exception("Log was taken in mode " + invocation['mode'] + " not scan") + + sys.stderr.write('Creating block range list.\n') + blockList = j['blockList'] + blockRanges = [] + for block in blockList: + blockRanges.append(AddrRange(block['addr'], block['req'])) + blockRanges.sort(key=lambda r: r.start) + + # Make sure there are no overlapping blocks. + prevRange = blockRanges[0] + for currRange in blockRanges[1:]: + assert prevRange.end <= currRange.start + prevRange = currRange + + sys.stderr.write('Clamping block contents.\n') + clampStats = ClampStats() + firstAddr = blockRanges[0].start + lastAddr = blockRanges[-1].end + + blockStarts = [] + for r in blockRanges: + blockStarts.append(r.start) + + for block in blockList: + # Small blocks don't have any contents. + if not 'contents' in block: + continue + + cont = block['contents'] + for i in range(len(cont)): + address = int(cont[i], 16) + + if address == 0: + clampStats.nullAddr() + continue + + # If the address is before the first block or after the last + # block then it can't be within a block. + if address < firstAddr or address >= lastAddr: + clampStats.clampedNonBlockAddr() + cont[i] = '0' + continue + + cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address) + + # Remove any trailing nulls. + while len(cont) and cont[-1] == '0': + cont.pop() + + if args.print_clamp_stats: + clampStats.log() + + sys.stderr.write('Saving file.\n') + tmpFile = tempfile.NamedTemporaryFile(delete=False) + tmpFilename = tmpFile.name + if isZipped: + tmpFile = gzip.GzipFile(filename='', fileobj=tmpFile) + prettyPrintDmdJson(tmpFile, j) + tmpFile.close() + shutil.move(tmpFilename, inputFileName) + + +def main(): + args = parseCommandLine() + digest = getDigestFromFile(args, args.input_file) + if args.input_file2: + digest2 = getDigestFromFile(args, args.input_file2) + digest = diffDigests(args, digest, digest2) + printDigest(args, digest) + + +if __name__ == '__main__': + main() + |