# mozilla/unwind.py --- unwinder and frame filter for SpiderMonkey

import gdb
import gdb.types
from gdb.FrameDecorator import FrameDecorator
import re
import platform
from mozilla.ExecutableAllocator import jsjitExecutableAllocatorCache, jsjitExecutableAllocator

# For ease of use in Python 2, we use "long" instead of "int"
# everywhere.
try:
    long
except NameError:
    long = int

# The Python 3 |map| built-in works lazily, but in Python 2 we need
# itertools.imap to get this.
try:
    from itertools import imap
except ImportError:
    imap = map

_have_unwinder = True
try:
    from gdb.unwinder import Unwinder
except ImportError:
    _have_unwinder = False
    # We need something here; it doesn't matter what as no unwinder
    # will ever be instantiated.
    Unwinder = object

def debug(something):
    # print("@@ " + something)
    pass

# Maps frametype enum base names to corresponding class.
SizeOfFramePrefix = {
    'JitFrame_IonJS': 'ExitFrameLayout',
    'JitFrame_BaselineJS': 'JitFrameLayout',
    'JitFrame_BaselineStub': 'BaselineStubFrameLayout',
    'JitFrame_IonStub': 'JitStubFrameLayout',
    # Technically EntryFrameLayout, but that doesn't wind up in the
    # debuginfo because there are no uses of it.
    'JitFrame_Entry': 'JitFrameLayout',
    'JitFrame_Rectifier': 'RectifierFrameLayout',
    'JitFrame_IonAccessorIC': 'IonAccessorICFrameLayout',
    'JitFrame_Exit': 'ExitFrameLayout',
    'JitFrame_Bailout': 'JitFrameLayout',
}

# All types and symbols that we need are attached to an object that we
# can dispose of as needed.
class UnwinderTypeCache(object):
    def __init__(self):
        self.d = None
        self.frame_enum_names = {}
        self.frame_class_types = {}

    # We take this bizarre approach to defer trying to look up any
    # symbols until absolutely needed.  Without this, the loading
    # approach taken by the gdb-tests would cause spurious exceptions.
    def __getattr__(self, name):
        if self.d is None:
            self.initialize()
        return self.d[name]

    def value(self, name):
        return long(gdb.parse_and_eval('js::jit::' + name))

    def initialize(self):
        self.d = {}
        self.d['FRAMETYPE_MASK'] = (1 << self.value('FRAMETYPE_BITS')) - 1
        self.d['FRAMESIZE_SHIFT'] = self.value('FRAMESIZE_SHIFT')
        self.d['FRAME_HEADER_SIZE_SHIFT'] = self.value('FRAME_HEADER_SIZE_SHIFT')
        self.d['FRAME_HEADER_SIZE_MASK'] = self.value('FRAME_HEADER_SIZE_MASK')

        self.compute_frame_info()
        commonFrameLayout = gdb.lookup_type('js::jit::CommonFrameLayout')
        self.d['typeCommonFrameLayout'] = commonFrameLayout
        self.d['typeCommonFrameLayoutPointer'] = commonFrameLayout.pointer()
        self.d['per_tls_data'] = gdb.lookup_global_symbol('js::TlsPerThreadData')

        self.d['void_starstar'] = gdb.lookup_type('void').pointer().pointer()
        self.d['mod_ExecutableAllocator'] = jsjitExecutableAllocatorCache()

        jitframe = gdb.lookup_type("js::jit::JitFrameLayout")
        self.d['jitFrameLayoutPointer'] = jitframe.pointer()

        self.d['CalleeToken_Function'] = self.value("CalleeToken_Function")
        self.d['CalleeToken_FunctionConstructing'] = self.value("CalleeToken_FunctionConstructing")
        self.d['CalleeToken_Script'] = self.value("CalleeToken_Script")
        self.d['JSFunction'] = gdb.lookup_type("JSFunction").pointer()
        self.d['JSScript'] = gdb.lookup_type("JSScript").pointer()
        self.d['Value'] = gdb.lookup_type("JS::Value")

        self.d['SOURCE_SLOT'] = long(gdb.parse_and_eval('js::ScriptSourceObject::SOURCE_SLOT'))
        self.d['NativeObject'] = gdb.lookup_type("js::NativeObject").pointer()
        self.d['HeapSlot'] = gdb.lookup_type("js::HeapSlot").pointer()
        self.d['ScriptSource'] = gdb.lookup_type("js::ScriptSource").pointer()

    # Compute maps related to jit frames.
    def compute_frame_info(self):
        t = gdb.lookup_type('enum js::jit::FrameType')
        for field in t.fields():
            # Strip off "js::jit::".
            name = field.name[9:]
            enumval = long(field.enumval)
            self.d[name] = enumval
            self.frame_enum_names[enumval] = name
            class_type = gdb.lookup_type('js::jit::' + SizeOfFramePrefix[name])
            self.frame_class_types[enumval] = class_type.pointer()

# gdb doesn't have a direct way to tell us if a given address is
# claimed by some shared library or the executable.  See
# https://sourceware.org/bugzilla/show_bug.cgi?id=19288
# In the interest of not requiring a patched gdb, instead we read
# /proc/.../maps.  This only works locally, but maybe could work
# remotely using "remote get".  FIXME.
def parse_proc_maps():
    mapfile = '/proc/' + str(gdb.selected_inferior().pid) + '/maps'
    # Note we only examine executable mappings here.
    matcher = re.compile("^([a-fA-F0-9]+)-([a-fA-F0-9]+)\s+..x.\s+\S+\s+\S+\s+\S*(.*)$")
    mappings = []
    with open(mapfile, "r") as inp:
        for line in inp:
            match = matcher.match(line)
            if not match:
                # Header lines and such.
                continue
            start = match.group(1)
            end = match.group(2)
            name = match.group(3).strip()
            if name is '' or (name.startswith('[') and name is not '[vdso]'):
                # Skip entries not corresponding to a file.
                continue
            mappings.append((long(start, 16), long(end, 16)))
    return mappings

# A symbol/value pair as expected from gdb frame decorators.
class FrameSymbol(object):
    def __init__(self, sym, val):
        self.sym = sym
        self.val = val

    def symbol(self):
        return self.sym

    def value(self):
        return self.val

# This represents a single JIT frame for the purposes of display.
# That is, the frame filter creates instances of this when it sees a
# JIT frame in the stack.
class JitFrameDecorator(FrameDecorator):
    def __init__(self, base, info, cache):
        super(JitFrameDecorator, self).__init__(base)
        self.info = info
        self.cache = cache

    def _decode_jitframe(self, this_frame):
        calleetoken = long(this_frame['calleeToken_'])
        tag = calleetoken & 3
        calleetoken = calleetoken ^ tag
        function = None
        script = None
        if tag == self.cache.CalleeToken_Function or tag == self.cache.CalleeToken_FunctionConstructing:
            fptr = gdb.Value(calleetoken).cast(self.cache.JSFunction)
            try:
                atom = fptr['atom_']
                if atom:
                    function = str(atom)
            except gdb.MemoryError:
                function = "(could not read function name)"
            script = fptr['u']['i']['s']['script_']
        elif tag == self.cache.CalleeToken_Script:
            script = gdb.Value(calleetoken).cast(self.cache.JSScript)
        return {"function": function, "script": script}

    def function(self):
        if self.info["name"] is None:
            return FrameDecorator.function(self)
        name = self.info["name"]
        result = "<<" + name
        # If we have a frame, we can extract the callee information
        # from it for display here.
        this_frame = self.info["this_frame"]
        if this_frame is not None:
            if gdb.types.has_field(this_frame.type.target(), "calleeToken_"):
                function = self._decode_jitframe(this_frame)["function"]
                if function is not None:
                    result = result + " " + function
        return result + ">>"

    def filename(self):
        this_frame = self.info["this_frame"]
        if this_frame is not None:
            if gdb.types.has_field(this_frame.type.target(), "calleeToken_"):
                script = self._decode_jitframe(this_frame)["script"]
                if script is not None:
                    obj = script['sourceObject_']['value']
                    # Verify that this is a ScriptSource object.
                    # FIXME should also deal with wrappers here.
                    nativeobj = obj.cast(self.cache.NativeObject)
                    # See bug 987069 and despair.  At least this
                    # approach won't give exceptions.
                    class_name = nativeobj['group_']['value']['clasp_']['name'].string("ISO-8859-1")
                    if class_name != "ScriptSource":
                        return FrameDecorator.filename(self)
                    scriptsourceobj = (nativeobj + 1).cast(self.cache.HeapSlot)[self.cache.SOURCE_SLOT]
                    scriptsource = scriptsourceobj['value']['data']['asBits'] << 1
                    scriptsource = scriptsource.cast(self.cache.ScriptSource)
                    return scriptsource['filename_']['mTuple']['mFirstA'].string()
        return FrameDecorator.filename(self)

    def frame_args(self):
        this_frame = self.info["this_frame"]
        if this_frame is None:
            return FrameDecorator.frame_args(self)
        if not gdb.types.has_field(this_frame.type.target(), "numActualArgs_"):
            return FrameDecorator.frame_args(self)
        # See if this is a function call.
        if self._decode_jitframe(this_frame)["function"] is None:
            return FrameDecorator.frame_args(self)
        # Construct and return an iterable of all the arguments.
        result = []
        num_args = long(this_frame["numActualArgs_"])
        # Sometimes we see very large values here, so truncate it to
        # bypass the damage.
        if num_args > 10:
            num_args = 10
        args_ptr = (this_frame + 1).cast(self.cache.Value.pointer())
        for i in range(num_args + 1):
            # Synthesize names, since there doesn't seem to be
            # anything better to do.
            if i == 0:
                name = 'this'
            else:
                name = 'arg%d' % i
            result.append(FrameSymbol(name, args_ptr[i]))
        return result

# A frame filter for SpiderMonkey.
class SpiderMonkeyFrameFilter(object):
    # |state_holder| is either None, or an instance of
    # SpiderMonkeyUnwinder.  If the latter, then this class will
    # reference the |unwinder_state| attribute to find the current
    # unwinder state.
    def __init__(self, cache, state_holder):
        self.name = "SpiderMonkey"
        self.enabled = True
        self.priority = 100
        self.state_holder = state_holder
        self.cache = cache

    def maybe_wrap_frame(self, frame):
        if self.state_holder is None or self.state_holder.unwinder_state is None:
            return frame
        base = frame.inferior_frame()
        info = self.state_holder.unwinder_state.get_frame(base)
        if info is None:
            return frame
        return JitFrameDecorator(frame, info, self.cache)

    def filter(self, frame_iter):
        return imap(self.maybe_wrap_frame, frame_iter)

# A frame id class, as specified by the gdb unwinder API.
class SpiderMonkeyFrameId(object):
    def __init__(self, sp, pc):
        self.sp = sp
        self.pc = pc

# This holds all the state needed during a given unwind.  Each time a
# new unwind is done, a new instance of this class is created.  It
# keeps track of all the state needed to unwind JIT frames.  Note that
# this class is not directly instantiated.
#
# This is a base class, and must be specialized for each target
# architecture, both because we need to use arch-specific register
# names, and because entry frame unwinding is arch-specific.
# See https://sourceware.org/bugzilla/show_bug.cgi?id=19286 for info
# about the register name issue.
#
# Each subclass must define SP_REGISTER, PC_REGISTER, and
# SENTINEL_REGISTER (see x64UnwinderState for info); and implement
# unwind_entry_frame_registers.
class UnwinderState(object):
    def __init__(self, typecache):
        self.next_sp = None
        self.next_type = None
        self.activation = None
        # An unwinder instance is specific to a thread.  Record the
        # selected thread for later verification.
        self.thread = gdb.selected_thread()
        self.frame_map = {}
        self.proc_mappings = None
        try:
            self.proc_mappings = parse_proc_maps()
        except IOError:
            pass
        self.typecache = typecache

    # If the given gdb.Frame was created by this unwinder, return the
    # corresponding informational dictionary for the frame.
    # Otherwise, return None.  This is used by the frame filter to
    # display extra information about the frame.
    def get_frame(self, frame):
        sp = long(frame.read_register(self.SP_REGISTER))
        if sp in self.frame_map:
            return self.frame_map[sp]
        return None

    # Add information about a frame to the frame map.  This map is
    # queried by |self.get_frame|.  |sp| is the frame's stack pointer,
    # and |name| the frame's type as a string, e.g. "JitFrame_Exit".
    def add_frame(self, sp, name = None, this_frame = None):
        self.frame_map[long(sp)] = { "name": name, "this_frame": this_frame }

    # See whether |pc| is claimed by some text mapping.  See
    # |parse_proc_maps| for details on how the decision is made.
    def text_address_claimed(self, pc):
        for (start, end) in self.proc_mappings:
            if (pc >= start and pc <= end):
                return True
        return False

    # See whether |pc| is claimed by the Jit.
    def is_jit_address(self, pc):
        if self.proc_mappings != None:
            return not self.text_address_claimed(pc)

        ptd = self.get_tls_per_thread_data()
        runtime = ptd['runtime_']
        if runtime == 0:
            return False

        jitRuntime = runtime['jitRuntime_']
        if jitRuntime == 0:
            return False

        execAllocators = [jitRuntime['execAlloc_'], jitRuntime['backedgeExecAlloc_']]
        for execAlloc in execAllocators:
            for pool in jsjitExecutableAllocator(execAlloc, self.typecache):
                pages = pool['m_allocation']['pages']
                size = pool['m_allocation']['size']
                if pages <= pc and pc < pages + size:
                    return True
        return False

    # Check whether |self| is valid for the selected thread.
    def check(self):
        return gdb.selected_thread() is self.thread

    # Essentially js::TlsPerThreadData.get().
    def get_tls_per_thread_data(self):
        return self.typecache.per_tls_data.value()['mValue']

    # |common| is a pointer to a CommonFrameLayout object.  Return a
    # tuple (local_size, header_size, frame_type), where |size| is the
    # integer size of the previous frame's locals; |header_size| is
    # the size of this frame's header; and |frame_type| is an integer
    # representing the previous frame's type.
    def unpack_descriptor(self, common):
        value = long(common['descriptor_'])
        local_size = value >> self.typecache.FRAMESIZE_SHIFT
        header_size = ((value >> self.typecache.FRAME_HEADER_SIZE_SHIFT) &
                       self.typecache.FRAME_HEADER_SIZE_MASK)
        header_size = header_size * self.typecache.void_starstar.sizeof
        frame_type = long(value & self.typecache.FRAMETYPE_MASK)
        if frame_type == self.typecache.JitFrame_Entry:
            # Trampoline-x64.cpp pushes a JitFrameLayout object, but
            # the stack pointer is actually adjusted as if a
            # CommonFrameLayout object was pushed.
            header_size = self.typecache.typeCommonFrameLayout.sizeof
        return (local_size, header_size, frame_type)

    # Create a new frame for gdb.  This makes a new unwind info object
    # and fills it in, then returns it.  It also registers any
    # pertinent information with the frame filter for later display.
    #
    # |pc| is the PC from the pending frame
    # |sp| is the stack pointer to use
    # |frame| points to the CommonFrameLayout object
    # |frame_type| is a integer, one of the |enum FrameType| values,
    # describing the current frame.
    # |pending_frame| is the pending frame (see the gdb unwinder
    # documentation).
    def create_frame(self, pc, sp, frame, frame_type, pending_frame):
        # Make a frame_id that claims that |frame| is sort of like a
        # frame pointer for this frame.
        frame_id = SpiderMonkeyFrameId(frame, pc)

        # Read the frame layout object to find the next such object.
        # This lets us unwind the necessary registers for the next
        # frame, and also update our internal state to match.
        common = frame.cast(self.typecache.typeCommonFrameLayoutPointer)
        next_pc = common['returnAddress_']
        (local_size, header_size, next_type) = self.unpack_descriptor(common)
        next_sp = frame + header_size + local_size

        # Compute the type of the next oldest frame's descriptor.
        this_class_type = self.typecache.frame_class_types[frame_type]
        this_frame = frame.cast(this_class_type)

        # Register this frame so the frame filter can find it.  This
        # is registered using SP because we don't have any other good
        # approach -- you can't get the frame id from a gdb.Frame.
        # https://sourceware.org/bugzilla/show_bug.cgi?id=19800
        frame_name = self.typecache.frame_enum_names[frame_type]
        self.add_frame(sp, name = frame_name, this_frame = this_frame)

        # Update internal state for the next unwind.
        self.next_sp = next_sp
        self.next_type = next_type

        unwind_info = pending_frame.create_unwind_info(frame_id)
        unwind_info.add_saved_register(self.PC_REGISTER, next_pc)
        unwind_info.add_saved_register(self.SP_REGISTER, next_sp)
        # FIXME it would be great to unwind any other registers here.
        return unwind_info

    # Unwind an "ordinary" JIT frame.  This is used for JIT frames
    # other than enter and exit frames.  Returns the newly-created
    # unwind info for gdb.
    def unwind_ordinary(self, pc, pending_frame):
        return self.create_frame(pc, self.next_sp, self.next_sp,
                                 self.next_type, pending_frame)

    # Unwind an exit frame.  Returns None if this cannot be done;
    # otherwise returns the newly-created unwind info for gdb.
    def unwind_exit_frame(self, pc, pending_frame):
        if self.activation == 0:
            # Reached the end of the list.
            return None
        elif self.activation is None:
            ptd = self.get_tls_per_thread_data()
            self.activation = ptd['runtime_']['jitActivation']
            jittop = ptd['runtime_']['jitTop']
        else:
            jittop = self.activation['prevJitTop_']
            self.activation = self.activation['prevJitActivation_']

        if jittop == 0:
            return None

        exit_sp = pending_frame.read_register(self.SP_REGISTER)
        frame_type = self.typecache.JitFrame_Exit
        return self.create_frame(pc, exit_sp, jittop, frame_type, pending_frame)

    # A wrapper for unwind_entry_frame_registers that handles
    # architecture-independent boilerplate.
    def unwind_entry_frame(self, pc, pending_frame):
        sp = self.next_sp
        # Notify the frame filter.
        self.add_frame(sp, name = 'JitFrame_Entry')
        # Make an unwind_info for the per-architecture code to fill in.
        frame_id = SpiderMonkeyFrameId(sp, pc)
        unwind_info = pending_frame.create_unwind_info(frame_id)
        self.unwind_entry_frame_registers(sp, unwind_info)
        self.next_sp = None
        self.next_type = None
        return unwind_info

    # The main entry point that is called to try to unwind a JIT frame
    # of any type.  Returns None if this cannot be done; otherwise
    # returns the newly-created unwind info for gdb.
    def unwind(self, pending_frame):
        pc = pending_frame.read_register(self.PC_REGISTER)

        # If the jit does not claim this address, bail.  GDB defers to our
        # unwinder by default, but we don't really want that kind of power.
        if not self.is_jit_address(long(pc)):
            return None

        if self.next_sp is not None:
            if self.next_type == self.typecache.JitFrame_Entry:
                return self.unwind_entry_frame(pc, pending_frame)
            return self.unwind_ordinary(pc, pending_frame)
        # Maybe we've found an exit frame.  FIXME I currently don't
        # know how to identify these precisely, so we'll just hope for
        # the time being.
        return self.unwind_exit_frame(pc, pending_frame)

# The UnwinderState subclass for x86-64.
class x64UnwinderState(UnwinderState):
    SP_REGISTER = 'rsp'
    PC_REGISTER = 'rip'

    # A register unique to this architecture, that is also likely to
    # have been saved in any frame.  The best thing to use here is
    # some arch-specific name for PC or SP.
    SENTINEL_REGISTER = 'rip'

    # Must be in sync with Trampoline-x64.cpp:generateEnterJIT.  Note
    # that rip isn't pushed there explicitly, but rather by the
    # previous function's call.
    PUSHED_REGS = ["r15", "r14", "r13", "r12", "rbx", "rbp", "rip"]

    # Fill in the unwound registers for an entry frame.
    def unwind_entry_frame_registers(self, sp, unwind_info):
        sp = sp.cast(self.typecache.void_starstar)
        # Skip the "result" push.
        sp = sp + 1
        for reg in self.PUSHED_REGS:
            data = sp.dereference()
            sp = sp + 1
            unwind_info.add_saved_register(reg, data)
            if reg is "rbp":
                unwind_info.add_saved_register(self.SP_REGISTER, sp)

# The unwinder object.  This provides the "user interface" to the JIT
# unwinder, and also handles constructing or destroying UnwinderState
# objects as needed.
class SpiderMonkeyUnwinder(Unwinder):
    # A list of all the possible unwinders.  See |self.make_unwinder|.
    UNWINDERS = [x64UnwinderState]

    def __init__(self, typecache):
        super(SpiderMonkeyUnwinder, self).__init__("SpiderMonkey")
        self.typecache = typecache
        self.unwinder_state = None

        # Disabled by default until we figure out issues in gdb.
        self.enabled = False
        gdb.write("SpiderMonkey unwinder is disabled by default, to enable it type:\n" +
                  "\tenable unwinder .* SpiderMonkey\n")
        # Some versions of gdb did not flush the internal frame cache
        # when enabling or disabling an unwinder.  This was fixed in
        # the same release of gdb that added the breakpoint_created
        # event.
        if not hasattr(gdb.events, "breakpoint_created"):
            gdb.write("\tflushregs\n")

        # We need to invalidate the unwinder state whenever the
        # inferior starts executing.  This avoids having a stale
        # cache.
        gdb.events.cont.connect(self.invalidate_unwinder_state)
        assert self.test_sentinels()

    def test_sentinels(self):
        # Self-check.
        regs = {}
        for unwinder in self.UNWINDERS:
            if unwinder.SENTINEL_REGISTER in regs:
                return False
            regs[unwinder.SENTINEL_REGISTER] = 1
        return True

    def make_unwinder(self, pending_frame):
        # gdb doesn't provide a good way to find the architecture.
        # See https://sourceware.org/bugzilla/show_bug.cgi?id=19399
        # So, we look at each known architecture and see if the
        # corresponding "unique register" is known.
        for unwinder in self.UNWINDERS:
            try:
                pending_frame.read_register(unwinder.SENTINEL_REGISTER)
            except:
                # Failed to read the register, so let's keep going.
                # This is more fragile than it might seem, because it
                # fails if the sentinel register wasn't saved in the
                # previous frame.
                continue
            return unwinder(self.typecache)
        return None

    def __call__(self, pending_frame):
        if self.unwinder_state is None or not self.unwinder_state.check():
            self.unwinder_state = self.make_unwinder(pending_frame)
        if not self.unwinder_state:
            return None
        return self.unwinder_state.unwind(pending_frame)

    def invalidate_unwinder_state(self, *args, **kwargs):
        self.unwinder_state = None

# Register the unwinder and frame filter with |objfile|.  If |objfile|
# is None, register them globally.
def register_unwinder(objfile):
    type_cache = UnwinderTypeCache()
    unwinder = None
    # This currently only works on Linux, due to parse_proc_maps.
    if _have_unwinder and platform.system() == "Linux":
        unwinder = SpiderMonkeyUnwinder(type_cache)
        gdb.unwinder.register_unwinder(objfile, unwinder, replace=True)
    # We unconditionally register the frame filter, because at some
    # point we'll add interpreter frame filtering.
    filt = SpiderMonkeyFrameFilter(type_cache, unwinder)
    if objfile is None:
        objfile = gdb
    objfile.frame_filters[filt.name] = filt