diff options
Diffstat (limited to 'memory/replace/logalloc/replay/logalloc_munge.py')
-rw-r--r-- | memory/replace/logalloc/replay/logalloc_munge.py | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/memory/replace/logalloc/replay/logalloc_munge.py b/memory/replace/logalloc/replay/logalloc_munge.py new file mode 100644 index 000000000..a244c3c3d --- /dev/null +++ b/memory/replace/logalloc/replay/logalloc_munge.py @@ -0,0 +1,147 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +This script takes a log from the replace-malloc logalloc library on stdin +and munges it so that it can be used with the logalloc-replay tool. + +Given the following output: + 13663 malloc(42)=0x7f0c33502040 + 13663 malloc(24)=0x7f0c33503040 + 13663 free(0x7f0c33502040) +The resulting output is: + 1 malloc(42)=#1 + 1 malloc(24)=#2 + 1 free(#1) + +See README for more details. +""" + +from __future__ import print_function +import sys +from collections import ( + defaultdict, + deque, +) + +class IdMapping(object): + """Class to map values to ids. + + Each value is associated to an increasing id, starting from 1. + When a value is removed, its id is recycled and will be reused for + subsequent values. + """ + def __init__(self): + self.id = 1 + self._values = {} + self._recycle = deque() + + def __getitem__(self, value): + if value not in self._values: + if self._recycle: + self._values[value] = self._recycle.popleft() + else: + self._values[value] = self.id + self.id += 1 + return self._values[value] + + def __delitem__(self, value): + if value == 0: + return + self._recycle.append(self._values[value]) + del self._values[value] + + def __contains__(self, value): + return value == 0 or value in self._values + + +class Ignored(Exception): pass + + +def split_log_line(line): + try: + # The format for each line is: + # <pid> [<tid>] <function>([<args>])[=<result>] + # + # The original format didn't include the tid, so we try to parse + # lines whether they have one or not. + pid, func_call = line.split(' ', 1) + call, result = func_call.split(')') + func, args = call.split('(') + args = args.split(',') if args else [] + if result: + if result[0] != '=': + raise Ignored('Malformed input') + result = result[1:] + if ' ' in func: + tid, func = func.split(' ', 1) + else: + tid = pid + return pid, tid, func, args, result + except: + raise Ignored('Malformed input') + + +NUM_ARGUMENTS = { + 'jemalloc_stats': 0, + 'free': 1, + 'malloc': 1, + 'posix_memalign': 2, + 'aligned_alloc': 2, + 'calloc': 2, + 'realloc': 2, + 'memalign': 2, + 'valloc': 1, +} + + +def main(): + pids = IdMapping() + processes = defaultdict(lambda: { 'pointers': IdMapping(), + 'tids': IdMapping() }) + for line in sys.stdin: + line = line.strip() + + try: + pid, tid, func, args, result = split_log_line(line) + + # Replace pid with an id. + pid = pids[int(pid)] + + process = processes[pid] + tid = process['tids'][int(tid)] + + pointers = process['pointers'] + + if func not in NUM_ARGUMENTS: + raise Ignored('Unknown function') + + if len(args) != NUM_ARGUMENTS[func]: + raise Ignored('Malformed input') + + if func in ('jemalloc_stats', 'free') and result: + raise Ignored('Malformed input') + + if func in ('free', 'realloc'): + ptr = int(args[0], 16) + if ptr and ptr not in pointers: + raise Ignored('Did not see an alloc for pointer') + args[0] = "#%d" % pointers[ptr] + del pointers[ptr] + + if result: + result = int(result, 16) + if not result: + raise Ignored('Result is NULL') + result = "#%d" % pointers[result] + + print('%d %d %s(%s)%s' % (pid, tid, func, ','.join(args), + '=%s' % result if result else '')) + + except Exception as e: + print('Ignored "%s": %s' % (line, e.message), file=sys.stderr) + + +if __name__ == '__main__': + main() |