summaryrefslogtreecommitdiffstats
path: root/python/macholib/macholib/MachO.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/macholib/macholib/MachO.py')
-rw-r--r--python/macholib/macholib/MachO.py398
1 files changed, 398 insertions, 0 deletions
diff --git a/python/macholib/macholib/MachO.py b/python/macholib/macholib/MachO.py
new file mode 100644
index 000000000..f83ddb711
--- /dev/null
+++ b/python/macholib/macholib/MachO.py
@@ -0,0 +1,398 @@
+"""
+Utilities for reading and writing Mach-O headers
+"""
+from __future__ import print_function
+
+import sys
+import struct
+
+from macholib.mach_o import *
+from macholib.dyld import dyld_find, framework_info
+from macholib.util import fileview
+try:
+ from macholib.compat import bytes
+except ImportError:
+ pass
+
+try:
+ unicode
+except NameError:
+ unicode = str
+
+__all__ = ['MachO']
+
+_RELOCATABLE = set((
+ # relocatable commands that should be used for dependency walking
+ LC_LOAD_DYLIB,
+ LC_LOAD_WEAK_DYLIB,
+ LC_PREBOUND_DYLIB,
+ LC_REEXPORT_DYLIB,
+))
+
+_RELOCATABLE_NAMES = {
+ LC_LOAD_DYLIB: 'load_dylib',
+ LC_LOAD_WEAK_DYLIB: 'load_weak_dylib',
+ LC_PREBOUND_DYLIB: 'prebound_dylib',
+ LC_REEXPORT_DYLIB: 'reexport_dylib',
+}
+
+def _shouldRelocateCommand(cmd):
+ """
+ Should this command id be investigated for relocation?
+ """
+ return cmd in _RELOCATABLE
+
+class MachO(object):
+ """
+ Provides reading/writing the Mach-O header of a specific existing file
+ """
+ # filename - the original filename of this mach-o
+ # sizediff - the current deviation from the initial mach-o size
+ # header - the mach-o header
+ # commands - a list of (load_command, somecommand, data)
+ # data is either a str, or a list of segment structures
+ # total_size - the current mach-o header size (including header)
+ # low_offset - essentially, the maximum mach-o header size
+ # id_cmd - the index of my id command, or None
+
+
+ def __init__(self, filename):
+
+ # supports the ObjectGraph protocol
+ self.graphident = filename
+ self.filename = filename
+
+ # initialized by load
+ self.fat = None
+ self.headers = []
+ with open(filename, 'rb') as fp:
+ self.load(fp)
+
+ def __repr__(self):
+ return "<MachO filename=%r>" % (self.filename,)
+
+ def load(self, fh):
+ assert fh.tell() == 0
+ header = struct.unpack('>I', fh.read(4))[0]
+ fh.seek(0)
+ if header == FAT_MAGIC:
+ self.load_fat(fh)
+ else:
+ fh.seek(0, 2)
+ size = fh.tell()
+ fh.seek(0)
+ self.load_header(fh, 0, size)
+
+ def load_fat(self, fh):
+ self.fat = fat_header.from_fileobj(fh)
+ archs = [fat_arch.from_fileobj(fh) for i in range(self.fat.nfat_arch)]
+ for arch in archs:
+ self.load_header(fh, arch.offset, arch.size)
+
+ def rewriteLoadCommands(self, *args, **kw):
+ changed = False
+ for header in self.headers:
+ if header.rewriteLoadCommands(*args, **kw):
+ changed = True
+ return changed
+
+ def load_header(self, fh, offset, size):
+ fh.seek(offset)
+ header = struct.unpack('>I', fh.read(4))[0]
+ fh.seek(offset)
+ if header == MH_MAGIC:
+ magic, hdr, endian = MH_MAGIC, mach_header, '>'
+ elif header == MH_CIGAM:
+ magic, hdr, endian = MH_CIGAM, mach_header, '<'
+ elif header == MH_MAGIC_64:
+ magic, hdr, endian = MH_MAGIC_64, mach_header_64, '>'
+ elif header == MH_CIGAM_64:
+ magic, hdr, endian = MH_CIGAM_64, mach_header_64, '<'
+ else:
+ raise ValueError("Unknown Mach-O header: 0x%08x in %r" % (
+ header, fh))
+ hdr = MachOHeader(self, fh, offset, size, magic, hdr, endian)
+ self.headers.append(hdr)
+
+ def write(self, f):
+ for header in self.headers:
+ header.write(f)
+
+class MachOHeader(object):
+ """
+ Provides reading/writing the Mach-O header of a specific existing file
+ """
+ # filename - the original filename of this mach-o
+ # sizediff - the current deviation from the initial mach-o size
+ # header - the mach-o header
+ # commands - a list of (load_command, somecommand, data)
+ # data is either a str, or a list of segment structures
+ # total_size - the current mach-o header size (including header)
+ # low_offset - essentially, the maximum mach-o header size
+ # id_cmd - the index of my id command, or None
+
+
+ def __init__(self, parent, fh, offset, size, magic, hdr, endian):
+ self.MH_MAGIC = magic
+ self.mach_header = hdr
+
+ # These are all initialized by self.load()
+ self.parent = parent
+ self.offset = offset
+ self.size = size
+
+ self.endian = endian
+ self.header = None
+ self.commands = None
+ self.id_cmd = None
+ self.sizediff = None
+ self.total_size = None
+ self.low_offset = None
+ self.filetype = None
+ self.headers = []
+
+ self.load(fh)
+
+ def __repr__(self):
+ return "<%s filename=%r offset=%d size=%d endian=%r>" % (
+ type(self).__name__, self.parent.filename, self.offset, self.size,
+ self.endian)
+
+ def load(self, fh):
+ fh = fileview(fh, self.offset, self.size)
+ fh.seek(0)
+
+ self.sizediff = 0
+ kw = {'_endian_': self.endian}
+ header = self.mach_header.from_fileobj(fh, **kw)
+ self.header = header
+ #if header.magic != self.MH_MAGIC:
+ # raise ValueError("header has magic %08x, expecting %08x" % (
+ # header.magic, self.MH_MAGIC))
+
+ cmd = self.commands = []
+
+ self.filetype = self.get_filetype_shortname(header.filetype)
+
+ read_bytes = 0
+ low_offset = sys.maxsize
+ for i in range(header.ncmds):
+ # read the load command
+ cmd_load = load_command.from_fileobj(fh, **kw)
+
+ # read the specific command
+ klass = LC_REGISTRY.get(cmd_load.cmd, None)
+ if klass is None:
+ raise ValueError("Unknown load command: %d" % (cmd_load.cmd,))
+ cmd_cmd = klass.from_fileobj(fh, **kw)
+
+ if cmd_load.cmd == LC_ID_DYLIB:
+ # remember where this command was
+ if self.id_cmd is not None:
+ raise ValueError("This dylib already has an id")
+ self.id_cmd = i
+
+ if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64):
+ # for segment commands, read the list of segments
+ segs = []
+ # assert that the size makes sense
+ if cmd_load.cmd == LC_SEGMENT:
+ section_cls = section
+ else: # LC_SEGMENT_64
+ section_cls = section_64
+
+ expected_size = (
+ sizeof(klass) + sizeof(load_command) +
+ (sizeof(section_cls) * cmd_cmd.nsects)
+ )
+ if cmd_load.cmdsize != expected_size:
+ raise ValueError("Segment size mismatch")
+ # this is a zero block or something
+ # so the beginning is wherever the fileoff of this command is
+ if cmd_cmd.nsects == 0:
+ if cmd_cmd.filesize != 0:
+ low_offset = min(low_offset, cmd_cmd.fileoff)
+ else:
+ # this one has multiple segments
+ for j in range(cmd_cmd.nsects):
+ # read the segment
+ seg = section_cls.from_fileobj(fh, **kw)
+ # if the segment has a size and is not zero filled
+ # then its beginning is the offset of this segment
+ not_zerofill = ((seg.flags & S_ZEROFILL) != S_ZEROFILL)
+ if seg.offset > 0 and seg.size > 0 and not_zerofill:
+ low_offset = min(low_offset, seg.offset)
+ if not_zerofill:
+ c = fh.tell()
+ fh.seek(seg.offset)
+ sd = fh.read(seg.size)
+ seg.add_section_data(sd)
+ fh.seek(c)
+ segs.append(seg)
+ # data is a list of segments
+ cmd_data = segs
+
+ # XXX: Disabled for now because writing back doesn't work
+ #elif cmd_load.cmd == LC_CODE_SIGNATURE:
+ # c = fh.tell()
+ # fh.seek(cmd_cmd.dataoff)
+ # cmd_data = fh.read(cmd_cmd.datasize)
+ # fh.seek(c)
+ #elif cmd_load.cmd == LC_SYMTAB:
+ # c = fh.tell()
+ # fh.seek(cmd_cmd.stroff)
+ # cmd_data = fh.read(cmd_cmd.strsize)
+ # fh.seek(c)
+
+ else:
+ # data is a raw str
+ data_size = (
+ cmd_load.cmdsize - sizeof(klass) - sizeof(load_command)
+ )
+ cmd_data = fh.read(data_size)
+ cmd.append((cmd_load, cmd_cmd, cmd_data))
+ read_bytes += cmd_load.cmdsize
+
+ # make sure the header made sense
+ if read_bytes != header.sizeofcmds:
+ raise ValueError("Read %d bytes, header reports %d bytes" % (
+ read_bytes, header.sizeofcmds))
+ self.total_size = sizeof(self.mach_header) + read_bytes
+ self.low_offset = low_offset
+
+ # this header overwrites a segment, what the heck?
+ if self.total_size > low_offset:
+ raise ValueError("total_size > low_offset (%d > %d)" % (
+ self.total_size, low_offset))
+
+ def walkRelocatables(self, shouldRelocateCommand=_shouldRelocateCommand):
+ """
+ for all relocatable commands
+ yield (command_index, command_name, filename)
+ """
+ for (idx, (lc, cmd, data)) in enumerate(self.commands):
+ if shouldRelocateCommand(lc.cmd):
+ name = _RELOCATABLE_NAMES[lc.cmd]
+ ofs = cmd.name - sizeof(lc.__class__) - sizeof(cmd.__class__)
+ yield idx, name, data[ofs:data.find(b'\x00', ofs)].decode(
+ sys.getfilesystemencoding())
+
+ def rewriteInstallNameCommand(self, loadcmd):
+ """Rewrite the load command of this dylib"""
+ if self.id_cmd is not None:
+ self.rewriteDataForCommand(self.id_cmd, loadcmd)
+ return True
+ return False
+
+ def changedHeaderSizeBy(self, bytes):
+ self.sizediff += bytes
+ if (self.total_size + self.sizediff) > self.low_offset:
+ print("WARNING: Mach-O header in %r may be too large to relocate"%(self.parent.filename,))
+
+ def rewriteLoadCommands(self, changefunc):
+ """
+ Rewrite the load commands based upon a change dictionary
+ """
+ data = changefunc(self.parent.filename)
+ changed = False
+ if data is not None:
+ if self.rewriteInstallNameCommand(
+ data.encode(sys.getfilesystemencoding())):
+ changed = True
+ for idx, name, filename in self.walkRelocatables():
+ data = changefunc(filename)
+ if data is not None:
+ if self.rewriteDataForCommand(idx, data.encode(
+ sys.getfilesystemencoding())):
+ changed = True
+ return changed
+
+ def rewriteDataForCommand(self, idx, data):
+ lc, cmd, old_data = self.commands[idx]
+ hdrsize = sizeof(lc.__class__) + sizeof(cmd.__class__)
+ align = struct.calcsize('L')
+ data = data + (b'\x00' * (align - (len(data) % align)))
+ newsize = hdrsize + len(data)
+ self.commands[idx] = (lc, cmd, data)
+ self.changedHeaderSizeBy(newsize - lc.cmdsize)
+ lc.cmdsize, cmd.name = newsize, hdrsize
+ return True
+
+ def synchronize_size(self):
+ if (self.total_size + self.sizediff) > self.low_offset:
+ raise ValueError("New Mach-O header is too large to relocate in %r"%(self.parent.filename,))
+ self.header.sizeofcmds += self.sizediff
+ self.total_size = sizeof(self.mach_header) + self.header.sizeofcmds
+ self.sizediff = 0
+
+ def write(self, fileobj):
+ fileobj = fileview(fileobj, self.offset, self.size)
+ fileobj.seek(0)
+
+ # serialize all the mach-o commands
+ self.synchronize_size()
+
+ self.header.to_fileobj(fileobj)
+ for lc, cmd, data in self.commands:
+ lc.to_fileobj(fileobj)
+ cmd.to_fileobj(fileobj)
+
+ if sys.version_info[0] == 2:
+ if isinstance(data, unicode):
+ fileobj.write(data.encode(sys.getfilesystemencoding()))
+
+ elif isinstance(data, (bytes, str)):
+ fileobj.write(data)
+ else:
+ # segments..
+ for obj in data:
+ obj.to_fileobj(fileobj)
+ else:
+ if isinstance(data, str):
+ fileobj.write(data.encode(sys.getfilesystemencoding()))
+
+ elif isinstance(data, bytes):
+ fileobj.write(data)
+
+ else:
+ # segments..
+ for obj in data:
+ obj.to_fileobj(fileobj)
+
+ # zero out the unused space, doubt this is strictly necessary
+ # and is generally probably already the case
+ fileobj.write(b'\x00' * (self.low_offset - fileobj.tell()))
+
+ def getSymbolTableCommand(self):
+ for lc, cmd, data in self.commands:
+ if lc.cmd == LC_SYMTAB:
+ return cmd
+ return None
+
+ def getDynamicSymbolTableCommand(self):
+ for lc, cmd, data in self.commands:
+ if lc.cmd == LC_DYSYMTAB:
+ return cmd
+ return None
+
+ def get_filetype_shortname(self, filetype):
+ if filetype in MH_FILETYPE_SHORTNAMES:
+ return MH_FILETYPE_SHORTNAMES[filetype]
+ else:
+ return 'unknown'
+
+def main(fn):
+ m = MachO(fn)
+ seen = set()
+ for header in m.headers:
+ for idx, name, other in header.walkRelocatables():
+ if other not in seen:
+ seen.add(other)
+ print('\t' + name + ": " + other)
+
+if __name__ == '__main__':
+ import sys
+ files = sys.argv[1:] or ['/bin/ls']
+ for fn in files:
+ print(fn)
+ main(fn)