diff options
Diffstat (limited to 'memory/replace/dmd/dmd.py')
-rwxr-xr-x | memory/replace/dmd/dmd.py | 890 |
1 files changed, 0 insertions, 890 deletions
diff --git a/memory/replace/dmd/dmd.py b/memory/replace/dmd/dmd.py deleted file mode 100755 index d6b09f756..000000000 --- a/memory/replace/dmd/dmd.py +++ /dev/null @@ -1,890 +0,0 @@ -#! /usr/bin/env python -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -'''This script analyzes a JSON file emitted by DMD.''' - -from __future__ import print_function, division - -import argparse -import collections -import gzip -import json -import os -import platform -import re -import shutil -import sys -import tempfile -from bisect import bisect_right - -# The DMD output version this script handles. -outputVersion = 5 - -# If --ignore-alloc-fns is specified, stack frames containing functions that -# match these strings will be removed from the *start* of stack traces. (Once -# we hit a non-matching frame, any subsequent frames won't be removed even if -# they do match.) -allocatorFns = [ - # Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc, pod_malloc, malloc_zone_*, g_malloc. - 'malloc', - # Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc, pod_calloc, malloc_zone_calloc, pod_callocCanGC. - 'calloc', - # Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc, pod_realloc, pod_reallocCanGC. - 'realloc', - # Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign, moz_xmemalign, moz_xposix_memalign, vpx_memalign, malloc_zone_memalign. - 'memalign', - 'operator new(', - 'operator new[](', - 'g_slice_alloc', - # This one necessary to fully filter some sequences of allocation functions - # that happen in practice. Note that ??? entries that follow non-allocation - # functions won't be stripped, as explained above. - '???', -] - -class Record(object): - '''A record is an aggregation of heap blocks that have identical stack - traces. It can also be used to represent the difference between two - records.''' - - def __init__(self): - self.numBlocks = 0 - self.reqSize = 0 - self.slopSize = 0 - self.usableSize = 0 - self.allocatedAtDesc = None - self.reportedAtDescs = [] - self.usableSizes = collections.defaultdict(int) - - def isZero(self, args): - return self.numBlocks == 0 and \ - self.reqSize == 0 and \ - self.slopSize == 0 and \ - self.usableSize == 0 and \ - len(self.usableSizes) == 0 - - def negate(self): - self.numBlocks = -self.numBlocks - self.reqSize = -self.reqSize - self.slopSize = -self.slopSize - self.usableSize = -self.usableSize - - negatedUsableSizes = collections.defaultdict(int) - for usableSize, count in self.usableSizes.items(): - negatedUsableSizes[-usableSize] = count - self.usableSizes = negatedUsableSizes - - def subtract(self, r): - # We should only be calling this on records with matching stack traces. - # Check this. - assert self.allocatedAtDesc == r.allocatedAtDesc - assert self.reportedAtDescs == r.reportedAtDescs - - self.numBlocks -= r.numBlocks - self.reqSize -= r.reqSize - self.slopSize -= r.slopSize - self.usableSize -= r.usableSize - - usableSizes1 = self.usableSizes - usableSizes2 = r.usableSizes - usableSizes3 = collections.defaultdict(int) - for usableSize in usableSizes1: - counts1 = usableSizes1[usableSize] - if usableSize in usableSizes2: - counts2 = usableSizes2[usableSize] - del usableSizes2[usableSize] - counts3 = counts1 - counts2 - if counts3 != 0: - if counts3 < 0: - usableSize = -usableSize - counts3 = -counts3 - usableSizes3[usableSize] = counts3 - else: - usableSizes3[usableSize] = counts1 - - for usableSize in usableSizes2: - usableSizes3[-usableSize] = usableSizes2[usableSize] - - self.usableSizes = usableSizes3 - - @staticmethod - def cmpByUsableSize(r1, r2): - # Sort by usable size, then by req size. - return cmp(abs(r1.usableSize), abs(r2.usableSize)) or \ - Record.cmpByReqSize(r1, r2) - - @staticmethod - def cmpByReqSize(r1, r2): - # Sort by req size. - return cmp(abs(r1.reqSize), abs(r2.reqSize)) - - @staticmethod - def cmpBySlopSize(r1, r2): - # Sort by slop size. - return cmp(abs(r1.slopSize), abs(r2.slopSize)) - - @staticmethod - def cmpByNumBlocks(r1, r2): - # Sort by block counts, then by usable size. - return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or \ - Record.cmpByUsableSize(r1, r2) - - -sortByChoices = { - 'usable': Record.cmpByUsableSize, # the default - 'req': Record.cmpByReqSize, - 'slop': Record.cmpBySlopSize, - 'num-blocks': Record.cmpByNumBlocks, -} - - -def parseCommandLine(): - # 24 is the maximum number of frames that DMD will produce. - def range_1_24(string): - value = int(string) - if value < 1 or value > 24: - msg = '{:s} is not in the range 1..24'.format(string) - raise argparse.ArgumentTypeError(msg) - return value - - description = ''' -Analyze heap data produced by DMD. -If one file is specified, analyze it; if two files are specified, analyze the -difference. -Input files can be gzipped. -Write to stdout unless -o/--output is specified. -Stack traces are fixed to show function names, filenames and line numbers -unless --no-fix-stacks is specified; stack fixing modifies the original file -and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment -variable is used to find breakpad symbols for stack fixing. -''' - p = argparse.ArgumentParser(description=description) - - p.add_argument('-o', '--output', type=argparse.FileType('w'), - help='output file; stdout if unspecified') - - p.add_argument('-f', '--max-frames', type=range_1_24, - help='maximum number of frames to consider in each trace') - - p.add_argument('-s', '--sort-by', choices=sortByChoices.keys(), - default='usable', - help='sort the records by a particular metric') - - p.add_argument('-a', '--ignore-alloc-fns', action='store_true', - help='ignore allocation functions at the start of traces') - - p.add_argument('--no-fix-stacks', action='store_true', - help='do not fix stacks') - - p.add_argument('--clamp-contents', action='store_true', - help='for a scan mode log, clamp addresses to the start of live blocks, or zero if not in one') - - p.add_argument('--print-clamp-stats', action='store_true', - help='print information about the results of pointer clamping; mostly useful for debugging clamping') - - p.add_argument('--filter-stacks-for-testing', action='store_true', - help='filter stack traces; only useful for testing purposes') - - p.add_argument('input_file', - help='a file produced by DMD') - - p.add_argument('input_file2', nargs='?', - help='a file produced by DMD; if present, it is diff\'d with input_file') - - return p.parse_args(sys.argv[1:]) - - -# Fix stacks if necessary: first write the output to a tempfile, then replace -# the original file with it. -def fixStackTraces(inputFilename, isZipped, opener): - # This append() call is needed to make the import statements work when this - # script is installed as a symlink. - sys.path.append(os.path.dirname(__file__)) - - bpsyms = os.environ.get('BREAKPAD_SYMBOLS_PATH', None) - sysname = platform.system() - if bpsyms and os.path.exists(bpsyms): - import fix_stack_using_bpsyms as fixModule - fix = lambda line: fixModule.fixSymbols(line, bpsyms) - elif sysname == 'Linux': - import fix_linux_stack as fixModule - fix = lambda line: fixModule.fixSymbols(line) - elif sysname == 'Darwin': - import fix_macosx_stack as fixModule - fix = lambda line: fixModule.fixSymbols(line) - else: - fix = None # there is no fix script for Windows - - if fix: - # Fix stacks, writing output to a temporary file, and then - # overwrite the original file. - tmpFile = tempfile.NamedTemporaryFile(delete=False) - - # If the input is gzipped, then the output (written initially to - # |tmpFile|) should be gzipped as well. - # - # And we want to set its pre-gzipped filename to '' rather than the - # name of the temporary file, so that programs like the Unix 'file' - # utility don't say that it was called 'tmp6ozTxE' (or something like - # that) before it was zipped. So that explains the |filename=''| - # parameter. - # - # But setting the filename like that clobbers |tmpFile.name|, so we - # must get that now in order to move |tmpFile| at the end. - tmpFilename = tmpFile.name - if isZipped: - tmpFile = gzip.GzipFile(filename='', fileobj=tmpFile) - - with opener(inputFilename, 'rb') as inputFile: - for line in inputFile: - tmpFile.write(fix(line)) - - tmpFile.close() - - shutil.move(tmpFilename, inputFilename) - - -def getDigestFromFile(args, inputFile): - # Handle gzipped input if necessary. - isZipped = inputFile.endswith('.gz') - opener = gzip.open if isZipped else open - - # Fix stack traces unless otherwise instructed. - if not args.no_fix_stacks: - fixStackTraces(inputFile, isZipped, opener) - - if args.clamp_contents: - clampBlockList(args, inputFile, isZipped, opener) - - with opener(inputFile, 'rb') as f: - j = json.load(f) - - if j['version'] != outputVersion: - raise Exception("'version' property isn't '{:d}'".format(outputVersion)) - - # Extract the main parts of the JSON object. - invocation = j['invocation'] - dmdEnvVar = invocation['dmdEnvVar'] - mode = invocation['mode'] - blockList = j['blockList'] - traceTable = j['traceTable'] - frameTable = j['frameTable'] - - # Insert the necessary entries for unrecorded stack traces. Note that 'ut' - # and 'uf' will not overlap with any keys produced by DMD's - # ToIdStringConverter::Base32() function. - unrecordedTraceID = 'ut' - unrecordedFrameID = 'uf' - traceTable[unrecordedTraceID] = [unrecordedFrameID] - frameTable[unrecordedFrameID] = \ - '#00: (no stack trace recorded due to --stacks=partial)' - - # For the purposes of this script, 'scan' behaves like 'live'. - if mode == 'scan': - mode = 'live' - - if not mode in ['live', 'dark-matter', 'cumulative']: - raise Exception("bad 'mode' property: '{:s}'".format(mode)) - - # Remove allocation functions at the start of traces. - if args.ignore_alloc_fns: - # Build a regexp that matches every function in allocatorFns. - escapedAllocatorFns = map(re.escape, allocatorFns) - fn_re = re.compile('|'.join(escapedAllocatorFns)) - - # Remove allocator fns from each stack trace. - for traceKey, frameKeys in traceTable.items(): - numSkippedFrames = 0 - for frameKey in frameKeys: - frameDesc = frameTable[frameKey] - if re.search(fn_re, frameDesc): - numSkippedFrames += 1 - else: - break - if numSkippedFrames > 0: - traceTable[traceKey] = frameKeys[numSkippedFrames:] - - # Trim the number of frames. - for traceKey, frameKeys in traceTable.items(): - if len(frameKeys) > args.max_frames: - traceTable[traceKey] = frameKeys[:args.max_frames] - - def buildTraceDescription(traceTable, frameTable, traceKey): - frameKeys = traceTable[traceKey] - fmt = ' #{:02d}{:}' - - if args.filter_stacks_for_testing: - # When running SmokeDMD.cpp, every stack trace should contain at - # least one frame that contains 'DMD.cpp', from either |DMD.cpp| or - # |SmokeDMD.cpp|. (Or 'dmd.cpp' on Windows.) If we see such a - # frame, we replace the entire stack trace with a single, - # predictable frame. There is too much variation in the stack - # traces across different machines and platforms to do more precise - # matching, but this level of matching will result in failure if - # stack fixing fails completely. - for frameKey in frameKeys: - frameDesc = frameTable[frameKey] - if 'DMD.cpp' in frameDesc or 'dmd.cpp' in frameDesc: - return [fmt.format(1, ': ... DMD.cpp ...')] - - # The frame number is always '#00' (see DMD.h for why), so we have to - # replace that with the correct frame number. - desc = [] - for n, frameKey in enumerate(traceTable[traceKey], start=1): - desc.append(fmt.format(n, frameTable[frameKey][3:])) - return desc - - # Aggregate blocks into records. All sufficiently similar blocks go into a - # single record. - - if mode in ['live', 'cumulative']: - liveOrCumulativeRecords = collections.defaultdict(Record) - elif mode == 'dark-matter': - unreportedRecords = collections.defaultdict(Record) - onceReportedRecords = collections.defaultdict(Record) - twiceReportedRecords = collections.defaultdict(Record) - - heapUsableSize = 0 - heapBlocks = 0 - - recordKeyPartCache = {} - - for block in blockList: - # For each block we compute a |recordKey|, and all blocks with the same - # |recordKey| are aggregated into a single record. The |recordKey| is - # derived from the block's 'alloc' and 'reps' (if present) stack - # traces. - # - # We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing - # traces for equality. We can't use trace keys or frame keys because - # they're not comparable across different DMD runs (which is relevant - # when doing diffs). - # - # Using frame descriptions also fits in with the stack trimming done - # for --max-frames, which requires that stack traces with common - # beginnings but different endings to be considered equivalent. E.g. if - # we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4] - # and we trim the final frame of each they should be considered - # equivalent because the untrimmed frame descriptions (D1 and D2) - # match. - # - # Having said all that, during a single invocation of dmd.py on a - # single DMD file, for a single frameKey value the record key will - # always be the same, and we might encounter it 1000s of times. So we - # cache prior results for speed. - def makeRecordKeyPart(traceKey): - if traceKey in recordKeyPartCache: - return recordKeyPartCache[traceKey] - - recordKeyPart = str(map(lambda frameKey: frameTable[frameKey], - traceTable[traceKey])) - recordKeyPartCache[traceKey] = recordKeyPart - return recordKeyPart - - allocatedAtTraceKey = block.get('alloc', unrecordedTraceID) - if mode in ['live', 'cumulative']: - recordKey = makeRecordKeyPart(allocatedAtTraceKey) - records = liveOrCumulativeRecords - elif mode == 'dark-matter': - recordKey = makeRecordKeyPart(allocatedAtTraceKey) - if 'reps' in block: - reportedAtTraceKeys = block['reps'] - for reportedAtTraceKey in reportedAtTraceKeys: - recordKey += makeRecordKeyPart(reportedAtTraceKey) - if len(reportedAtTraceKeys) == 1: - records = onceReportedRecords - else: - records = twiceReportedRecords - else: - records = unreportedRecords - - record = records[recordKey] - - if 'req' not in block: - raise Exception("'req' property missing in block'") - - reqSize = block['req'] - slopSize = block.get('slop', 0) - - if 'num' in block: - num = block['num'] - else: - num = 1 - - usableSize = reqSize + slopSize - heapUsableSize += num * usableSize - heapBlocks += num - - record.numBlocks += num - record.reqSize += num * reqSize - record.slopSize += num * slopSize - record.usableSize += num * usableSize - if record.allocatedAtDesc == None: - record.allocatedAtDesc = \ - buildTraceDescription(traceTable, frameTable, - allocatedAtTraceKey) - - if mode in ['live', 'cumulative']: - pass - elif mode == 'dark-matter': - if 'reps' in block and record.reportedAtDescs == []: - f = lambda k: buildTraceDescription(traceTable, frameTable, k) - record.reportedAtDescs = map(f, reportedAtTraceKeys) - record.usableSizes[usableSize] += num - - # All the processed data for a single DMD file is called a "digest". - digest = {} - digest['dmdEnvVar'] = dmdEnvVar - digest['mode'] = mode - digest['heapUsableSize'] = heapUsableSize - digest['heapBlocks'] = heapBlocks - if mode in ['live', 'cumulative']: - digest['liveOrCumulativeRecords'] = liveOrCumulativeRecords - elif mode == 'dark-matter': - digest['unreportedRecords'] = unreportedRecords - digest['onceReportedRecords'] = onceReportedRecords - digest['twiceReportedRecords'] = twiceReportedRecords - return digest - - -def diffRecords(args, records1, records2): - records3 = {} - - # Process records1. - for k in records1: - r1 = records1[k] - if k in records2: - # This record is present in both records1 and records2. - r2 = records2[k] - del records2[k] - r2.subtract(r1) - if not r2.isZero(args): - records3[k] = r2 - else: - # This record is present only in records1. - r1.negate() - records3[k] = r1 - - for k in records2: - # This record is present only in records2. - records3[k] = records2[k] - - return records3 - - -def diffDigests(args, d1, d2): - if (d1['mode'] != d2['mode']): - raise Exception("the input files have different 'mode' properties") - - d3 = {} - d3['dmdEnvVar'] = (d1['dmdEnvVar'], d2['dmdEnvVar']) - d3['mode'] = d1['mode'] - d3['heapUsableSize'] = d2['heapUsableSize'] - d1['heapUsableSize'] - d3['heapBlocks'] = d2['heapBlocks'] - d1['heapBlocks'] - if d1['mode'] in ['live', 'cumulative']: - d3['liveOrCumulativeRecords'] = \ - diffRecords(args, d1['liveOrCumulativeRecords'], - d2['liveOrCumulativeRecords']) - elif d1['mode'] == 'dark-matter': - d3['unreportedRecords'] = diffRecords(args, d1['unreportedRecords'], - d2['unreportedRecords']) - d3['onceReportedRecords'] = diffRecords(args, d1['onceReportedRecords'], - d2['onceReportedRecords']) - d3['twiceReportedRecords'] = diffRecords(args, d1['twiceReportedRecords'], - d2['twiceReportedRecords']) - return d3 - - -def printDigest(args, digest): - dmdEnvVar = digest['dmdEnvVar'] - mode = digest['mode'] - heapUsableSize = digest['heapUsableSize'] - heapBlocks = digest['heapBlocks'] - if mode in ['live', 'cumulative']: - liveOrCumulativeRecords = digest['liveOrCumulativeRecords'] - elif mode == 'dark-matter': - unreportedRecords = digest['unreportedRecords'] - onceReportedRecords = digest['onceReportedRecords'] - twiceReportedRecords = digest['twiceReportedRecords'] - - separator = '#' + '-' * 65 + '\n' - - def number(n): - '''Format a number with comma as a separator.''' - return '{:,d}'.format(n) - - def perc(m, n): - return 0 if n == 0 else (100 * m / n) - - def plural(n): - return '' if n == 1 else 's' - - # Prints to stdout, or to file if -o/--output was specified. - def out(*arguments, **kwargs): - print(*arguments, file=args.output, **kwargs) - - def printStack(traceDesc): - for frameDesc in traceDesc: - out(frameDesc) - - def printRecords(recordKind, records, heapUsableSize): - RecordKind = recordKind.capitalize() - out(separator) - numRecords = len(records) - cmpRecords = sortByChoices[args.sort_by] - sortedRecords = sorted(records.values(), cmp=cmpRecords, reverse=True) - kindBlocks = 0 - kindUsableSize = 0 - maxRecord = 1000 - - # First iteration: get totals, etc. - for record in sortedRecords: - kindBlocks += record.numBlocks - kindUsableSize += record.usableSize - - # Second iteration: print. - if numRecords == 0: - out('# no {:} heap blocks\n'.format(recordKind)) - - kindCumulativeUsableSize = 0 - for i, record in enumerate(sortedRecords, start=1): - # Stop printing at the |maxRecord|th record. - if i == maxRecord: - out('# {:}: stopping after {:,d} heap block records\n'. - format(RecordKind, i)) - break - - kindCumulativeUsableSize += record.usableSize - - out(RecordKind + ' {') - out(' {:} block{:} in heap block record {:,d} of {:,d}'. - format(number(record.numBlocks), - plural(record.numBlocks), i, numRecords)) - out(' {:} bytes ({:} requested / {:} slop)'. - format(number(record.usableSize), - number(record.reqSize), - number(record.slopSize))) - - abscmp = lambda (usableSize1, _1), (usableSize2, _2): \ - cmp(abs(usableSize1), abs(usableSize2)) - usableSizes = sorted(record.usableSizes.items(), cmp=abscmp, - reverse=True) - - hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1 - - if not hasSingleBlock: - out(' Individual block sizes: ', end='') - if len(usableSizes) == 0: - out('(no change)', end='') - else: - isFirst = True - for usableSize, count in usableSizes: - if not isFirst: - out('; ', end='') - out('{:}'.format(number(usableSize)), end='') - if count > 1: - out(' x {:,d}'.format(count), end='') - isFirst = False - out() - - out(' {:4.2f}% of the heap ({:4.2f}% cumulative)'. - format(perc(record.usableSize, heapUsableSize), - perc(kindCumulativeUsableSize, heapUsableSize))) - if mode in ['live', 'cumulative']: - pass - elif mode == 'dark-matter': - out(' {:4.2f}% of {:} ({:4.2f}% cumulative)'. - format(perc(record.usableSize, kindUsableSize), - recordKind, - perc(kindCumulativeUsableSize, kindUsableSize))) - out(' Allocated at {') - printStack(record.allocatedAtDesc) - out(' }') - if mode in ['live', 'cumulative']: - pass - elif mode == 'dark-matter': - for n, reportedAtDesc in enumerate(record.reportedAtDescs): - again = 'again ' if n > 0 else '' - out(' Reported {:}at {{'.format(again)) - printStack(reportedAtDesc) - out(' }') - out('}\n') - - return (kindUsableSize, kindBlocks) - - - def printInvocation(n, dmdEnvVar, mode): - out('Invocation{:} {{'.format(n)) - if dmdEnvVar == None: - out(' $DMD is undefined') - else: - out(' $DMD = \'' + dmdEnvVar + '\'') - out(' Mode = \'' + mode + '\'') - out('}\n') - - # Print command line. Strip dirs so the output is deterministic, which is - # needed for testing. - out(separator, end='') - out('# ' + ' '.join(map(os.path.basename, sys.argv)) + '\n') - - # Print invocation(s). - if type(dmdEnvVar) is not tuple: - printInvocation('', dmdEnvVar, mode) - else: - printInvocation(' 1', dmdEnvVar[0], mode) - printInvocation(' 2', dmdEnvVar[1], mode) - - # Print records. - if mode in ['live', 'cumulative']: - liveOrCumulativeUsableSize, liveOrCumulativeBlocks = \ - printRecords(mode, liveOrCumulativeRecords, heapUsableSize) - elif mode == 'dark-matter': - twiceReportedUsableSize, twiceReportedBlocks = \ - printRecords('twice-reported', twiceReportedRecords, heapUsableSize) - - unreportedUsableSize, unreportedBlocks = \ - printRecords('unreported', unreportedRecords, heapUsableSize) - - onceReportedUsableSize, onceReportedBlocks = \ - printRecords('once-reported', onceReportedRecords, heapUsableSize) - - # Print summary. - out(separator) - out('Summary {') - if mode in ['live', 'cumulative']: - out(' Total: {:} bytes in {:} blocks'. - format(number(liveOrCumulativeUsableSize), - number(liveOrCumulativeBlocks))) - elif mode == 'dark-matter': - fmt = ' {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)' - out(fmt. - format('Total:', - number(heapUsableSize), - 100, - number(heapBlocks), - 100)) - out(fmt. - format('Unreported:', - number(unreportedUsableSize), - perc(unreportedUsableSize, heapUsableSize), - number(unreportedBlocks), - perc(unreportedBlocks, heapBlocks))) - out(fmt. - format('Once-reported:', - number(onceReportedUsableSize), - perc(onceReportedUsableSize, heapUsableSize), - number(onceReportedBlocks), - perc(onceReportedBlocks, heapBlocks))) - out(fmt. - format('Twice-reported:', - number(twiceReportedUsableSize), - perc(twiceReportedUsableSize, heapUsableSize), - number(twiceReportedBlocks), - perc(twiceReportedBlocks, heapBlocks))) - out('}\n') - - -############################# -# Pretty printer for DMD JSON -############################# - -def prettyPrintDmdJson(out, j): - out.write('{\n') - - out.write(' "version": {0},\n'.format(j['version'])) - out.write(' "invocation": ') - json.dump(j['invocation'], out, sort_keys=True) - out.write(',\n') - - out.write(' "blockList": [') - first = True - for b in j['blockList']: - out.write('' if first else ',') - out.write('\n ') - json.dump(b, out, sort_keys=True) - first = False - out.write('\n ],\n') - - out.write(' "traceTable": {') - first = True - for k, l in j['traceTable'].iteritems(): - out.write('' if first else ',') - out.write('\n "{0}": {1}'.format(k, json.dumps(l))) - first = False - out.write('\n },\n') - - out.write(' "frameTable": {') - first = True - for k, v in j['frameTable'].iteritems(): - out.write('' if first else ',') - out.write('\n "{0}": {1}'.format(k, json.dumps(v))) - first = False - out.write('\n }\n') - - out.write('}\n') - - -################################################################## -# Code for clamping addresses using conservative pointer analysis. -################################################################## - -# Start is the address of the first byte of the block, while end is -# the address of the first byte after the final byte in the block. -class AddrRange: - def __init__(self, block, length): - self.block = block - self.start = int(block, 16) - self.length = length - self.end = self.start + self.length - - assert self.start > 0 - assert length >= 0 - - -class ClampStats: - def __init__(self): - # Number of pointers already pointing to the start of a block. - self.startBlockPtr = 0 - - # Number of pointers pointing to the middle of a block. These - # are clamped to the start of the block they point into. - self.midBlockPtr = 0 - - # Number of null pointers. - self.nullPtr = 0 - - # Number of non-null pointers that didn't point into the middle - # of any blocks. These are clamped to null. - self.nonNullNonBlockPtr = 0 - - - def clampedBlockAddr(self, sameAddress): - if sameAddress: - self.startBlockPtr += 1 - else: - self.midBlockPtr += 1 - - def nullAddr(self): - self.nullPtr += 1 - - def clampedNonBlockAddr(self): - self.nonNullNonBlockPtr += 1 - - def log(self): - sys.stderr.write('Results:\n') - sys.stderr.write(' Number of pointers already pointing to start of blocks: ' + str(self.startBlockPtr) + '\n') - sys.stderr.write(' Number of pointers clamped to start of blocks: ' + str(self.midBlockPtr) + '\n') - sys.stderr.write(' Number of non-null pointers not pointing into blocks clamped to null: ' + str(self.nonNullNonBlockPtr) + '\n') - sys.stderr.write(' Number of null pointers: ' + str(self.nullPtr) + '\n') - - -# Search the block ranges array for a block that address points into. -# The search is carried out in an array of starting addresses for each blocks -# because it is faster. -def clampAddress(blockRanges, blockStarts, clampStats, address): - i = bisect_right(blockStarts, address) - - # Any addresses completely out of the range should have been eliminated already. - assert i > 0 - r = blockRanges[i - 1] - assert r.start <= address - - if address >= r.end: - assert address < blockRanges[i].start - clampStats.clampedNonBlockAddr() - return '0' - - clampStats.clampedBlockAddr(r.start == address) - return r.block - - -def clampBlockList(args, inputFileName, isZipped, opener): - # XXX This isn't very efficient because we end up reading and writing - # the file multiple times. - with opener(inputFileName, 'rb') as f: - j = json.load(f) - - if j['version'] != outputVersion: - raise Exception("'version' property isn't '{:d}'".format(outputVersion)) - - # Check that the invocation is reasonable for contents clamping. - invocation = j['invocation'] - if invocation['mode'] != 'scan': - raise Exception("Log was taken in mode " + invocation['mode'] + " not scan") - - sys.stderr.write('Creating block range list.\n') - blockList = j['blockList'] - blockRanges = [] - for block in blockList: - blockRanges.append(AddrRange(block['addr'], block['req'])) - blockRanges.sort(key=lambda r: r.start) - - # Make sure there are no overlapping blocks. - prevRange = blockRanges[0] - for currRange in blockRanges[1:]: - assert prevRange.end <= currRange.start - prevRange = currRange - - sys.stderr.write('Clamping block contents.\n') - clampStats = ClampStats() - firstAddr = blockRanges[0].start - lastAddr = blockRanges[-1].end - - blockStarts = [] - for r in blockRanges: - blockStarts.append(r.start) - - for block in blockList: - # Small blocks don't have any contents. - if not 'contents' in block: - continue - - cont = block['contents'] - for i in range(len(cont)): - address = int(cont[i], 16) - - if address == 0: - clampStats.nullAddr() - continue - - # If the address is before the first block or after the last - # block then it can't be within a block. - if address < firstAddr or address >= lastAddr: - clampStats.clampedNonBlockAddr() - cont[i] = '0' - continue - - cont[i] = clampAddress(blockRanges, blockStarts, clampStats, address) - - # Remove any trailing nulls. - while len(cont) and cont[-1] == '0': - cont.pop() - - if args.print_clamp_stats: - clampStats.log() - - sys.stderr.write('Saving file.\n') - tmpFile = tempfile.NamedTemporaryFile(delete=False) - tmpFilename = tmpFile.name - if isZipped: - tmpFile = gzip.GzipFile(filename='', fileobj=tmpFile) - prettyPrintDmdJson(tmpFile, j) - tmpFile.close() - shutil.move(tmpFilename, inputFileName) - - -def main(): - args = parseCommandLine() - digest = getDigestFromFile(args, args.input_file) - if args.input_file2: - digest2 = getDigestFromFile(args, args.input_file2) - digest = diffDigests(args, digest, digest2) - printDigest(args, digest) - - -if __name__ == '__main__': - main() - |