summaryrefslogtreecommitdiffstats
path: root/toolkit/crashreporter/tools/symbolstore.py
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/crashreporter/tools/symbolstore.py')
-rwxr-xr-xtoolkit/crashreporter/tools/symbolstore.py1078
1 files changed, 1078 insertions, 0 deletions
diff --git a/toolkit/crashreporter/tools/symbolstore.py b/toolkit/crashreporter/tools/symbolstore.py
new file mode 100755
index 000000000..15eb5e112
--- /dev/null
+++ b/toolkit/crashreporter/tools/symbolstore.py
@@ -0,0 +1,1078 @@
+#!/bin/env python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# Usage: symbolstore.py <params> <dump_syms path> <symbol store path>
+# <debug info files or dirs>
+# Runs dump_syms on each debug info file specified on the command line,
+# then places the resulting symbol file in the proper directory
+# structure in the symbol store path. Accepts multiple files
+# on the command line, so can be called as part of a pipe using
+# find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
+# But really, you might just want to pass it <dir>.
+#
+# Parameters accepted:
+# -c : Copy debug info files to the same directory structure
+# as sym files. On Windows, this will also copy
+# binaries into the symbol store.
+# -a "<archs>" : Run dump_syms -a <arch> for each space separated
+# cpu architecture in <archs> (only on OS X)
+# -s <srcdir> : Use <srcdir> as the top source directory to
+# generate relative filenames.
+
+import buildconfig
+import errno
+import sys
+import platform
+import os
+import re
+import shutil
+import textwrap
+import fnmatch
+import subprocess
+import time
+import ctypes
+import urlparse
+import concurrent.futures
+import multiprocessing
+
+from optparse import OptionParser
+from xml.dom.minidom import parse
+
+from mozpack.copier import FileRegistry
+from mozpack.manifests import (
+ InstallManifest,
+ UnreadableInstallManifest,
+)
+
+# Utility classes
+
+class VCSFileInfo:
+ """ A base class for version-controlled file information. Ensures that the
+ following attributes are generated only once (successfully):
+
+ self.root
+ self.clean_root
+ self.revision
+ self.filename
+
+ The attributes are generated by a single call to the GetRoot,
+ GetRevision, and GetFilename methods. Those methods are explicitly not
+ implemented here and must be implemented in derived classes. """
+
+ def __init__(self, file):
+ if not file:
+ raise ValueError
+ self.file = file
+
+ def __getattr__(self, name):
+ """ __getattr__ is only called for attributes that are not set on self,
+ so setting self.[attr] will prevent future calls to the GetRoot,
+ GetRevision, and GetFilename methods. We don't set the values on
+ failure on the off chance that a future call might succeed. """
+
+ if name == "root":
+ root = self.GetRoot()
+ if root:
+ self.root = root
+ return root
+
+ elif name == "clean_root":
+ clean_root = self.GetCleanRoot()
+ if clean_root:
+ self.clean_root = clean_root
+ return clean_root
+
+ elif name == "revision":
+ revision = self.GetRevision()
+ if revision:
+ self.revision = revision
+ return revision
+
+ elif name == "filename":
+ filename = self.GetFilename()
+ if filename:
+ self.filename = filename
+ return filename
+
+ raise AttributeError
+
+ def GetRoot(self):
+ """ This method should return the unmodified root for the file or 'None'
+ on failure. """
+ raise NotImplementedError
+
+ def GetCleanRoot(self):
+ """ This method should return the repository root for the file or 'None'
+ on failure. """
+ raise NotImplementedError
+
+ def GetRevision(self):
+ """ This method should return the revision number for the file or 'None'
+ on failure. """
+ raise NotImplementedError
+
+ def GetFilename(self):
+ """ This method should return the repository-specific filename for the
+ file or 'None' on failure. """
+ raise NotImplementedError
+
+
+# This regex separates protocol and optional username/password from a url.
+# For instance, all the following urls will be transformed into
+# 'foo.com/bar':
+#
+# http://foo.com/bar
+# svn+ssh://user@foo.com/bar
+# svn+ssh://user:pass@foo.com/bar
+#
+rootRegex = re.compile(r'^\S+?:/+(?:[^\s/]*@)?(\S+)$')
+
+def read_output(*args):
+ (stdout, _) = subprocess.Popen(args=args, stdout=subprocess.PIPE).communicate()
+ return stdout.rstrip()
+
+class HGRepoInfo:
+ def __init__(self, path):
+ self.path = path
+
+ rev = os.environ.get('MOZ_SOURCE_CHANGESET')
+ if not rev:
+ rev = read_output('hg', '-R', path,
+ 'parent', '--template={node|short}')
+
+ # Look for the default hg path. If MOZ_SOURCE_REPO is set, we
+ # don't bother asking hg.
+ hg_root = os.environ.get('MOZ_SOURCE_REPO')
+ if hg_root:
+ root = hg_root
+ else:
+ root = read_output('hg', '-R', path,
+ 'showconfig', 'paths.default')
+ if not root:
+ print >> sys.stderr, "Failed to get HG Repo for %s" % path
+ cleanroot = None
+ if root:
+ match = rootRegex.match(root)
+ if match:
+ cleanroot = match.group(1)
+ if cleanroot.endswith('/'):
+ cleanroot = cleanroot[:-1]
+ if cleanroot is None:
+ print >> sys.stderr, textwrap.dedent("""\
+ Could not determine repo info for %s. This is either not a clone of the web-based
+ repository, or you have not specified MOZ_SOURCE_REPO, or the clone is corrupt.""") % path
+ sys.exit(1)
+ self.rev = rev
+ self.root = root
+ self.cleanroot = cleanroot
+
+ def GetFileInfo(self, file):
+ return HGFileInfo(file, self)
+
+class HGFileInfo(VCSFileInfo):
+ def __init__(self, file, repo):
+ VCSFileInfo.__init__(self, file)
+ self.repo = repo
+ self.file = os.path.relpath(file, repo.path)
+
+ def GetRoot(self):
+ return self.repo.root
+
+ def GetCleanRoot(self):
+ return self.repo.cleanroot
+
+ def GetRevision(self):
+ return self.repo.rev
+
+ def GetFilename(self):
+ if self.revision and self.clean_root:
+ return "hg:%s:%s:%s" % (self.clean_root, self.file, self.revision)
+ return self.file
+
+class GitRepoInfo:
+ """
+ Info about a local git repository. Does not currently
+ support discovering info about a git clone, the info must be
+ provided out-of-band.
+ """
+ def __init__(self, path, rev, root):
+ self.path = path
+ cleanroot = None
+ if root:
+ match = rootRegex.match(root)
+ if match:
+ cleanroot = match.group(1)
+ if cleanroot.endswith('/'):
+ cleanroot = cleanroot[:-1]
+ if cleanroot is None:
+ print >> sys.stderr, textwrap.dedent("""\
+ Could not determine repo info for %s (%s). This is either not a clone of a web-based
+ repository, or you have not specified MOZ_SOURCE_REPO, or the clone is corrupt.""") % (path, root)
+ sys.exit(1)
+ self.rev = rev
+ self.cleanroot = cleanroot
+
+ def GetFileInfo(self, file):
+ return GitFileInfo(file, self)
+
+class GitFileInfo(VCSFileInfo):
+ def __init__(self, file, repo):
+ VCSFileInfo.__init__(self, file)
+ self.repo = repo
+ self.file = os.path.relpath(file, repo.path)
+
+ def GetRoot(self):
+ return self.repo.path
+
+ def GetCleanRoot(self):
+ return self.repo.cleanroot
+
+ def GetRevision(self):
+ return self.repo.rev
+
+ def GetFilename(self):
+ if self.revision and self.clean_root:
+ return "git:%s:%s:%s" % (self.clean_root, self.file, self.revision)
+ return self.file
+
+# Utility functions
+
+# A cache of files for which VCS info has already been determined. Used to
+# prevent extra filesystem activity or process launching.
+vcsFileInfoCache = {}
+
+def IsInDir(file, dir):
+ # the lower() is to handle win32+vc8, where
+ # the source filenames come out all lowercase,
+ # but the srcdir can be mixed case
+ return os.path.abspath(file).lower().startswith(os.path.abspath(dir).lower())
+
+def GetVCSFilenameFromSrcdir(file, srcdir):
+ if srcdir not in Dumper.srcdirRepoInfo:
+ # Not in cache, so find it adnd cache it
+ if os.path.isdir(os.path.join(srcdir, '.hg')):
+ Dumper.srcdirRepoInfo[srcdir] = HGRepoInfo(srcdir)
+ else:
+ # Unknown VCS or file is not in a repo.
+ return None
+ return Dumper.srcdirRepoInfo[srcdir].GetFileInfo(file)
+
+def GetVCSFilename(file, srcdirs):
+ """Given a full path to a file, and the top source directory,
+ look for version control information about this file, and return
+ a tuple containing
+ 1) a specially formatted filename that contains the VCS type,
+ VCS location, relative filename, and revision number, formatted like:
+ vcs:vcs location:filename:revision
+ For example:
+ cvs:cvs.mozilla.org/cvsroot:mozilla/browser/app/nsBrowserApp.cpp:1.36
+ 2) the unmodified root information if it exists"""
+ (path, filename) = os.path.split(file)
+ if path == '' or filename == '':
+ return (file, None)
+
+ fileInfo = None
+ root = ''
+ if file in vcsFileInfoCache:
+ # Already cached this info, use it.
+ fileInfo = vcsFileInfoCache[file]
+ else:
+ for srcdir in srcdirs:
+ if not IsInDir(file, srcdir):
+ continue
+ fileInfo = GetVCSFilenameFromSrcdir(file, srcdir)
+ if fileInfo:
+ vcsFileInfoCache[file] = fileInfo
+ break
+
+ if fileInfo:
+ file = fileInfo.filename
+ root = fileInfo.root
+
+ # we want forward slashes on win32 paths
+ return (file.replace("\\", "/"), root)
+
+def validate_install_manifests(install_manifest_args):
+ args = []
+ for arg in install_manifest_args:
+ bits = arg.split(',')
+ if len(bits) != 2:
+ raise ValueError('Invalid format for --install-manifest: '
+ 'specify manifest,target_dir')
+ manifest_file, destination = map(os.path.abspath, bits)
+ if not os.path.isfile(manifest_file):
+ raise IOError(errno.ENOENT, 'Manifest file not found',
+ manifest_file)
+ if not os.path.isdir(destination):
+ raise IOError(errno.ENOENT, 'Install directory not found',
+ destination)
+ try:
+ manifest = InstallManifest(manifest_file)
+ except UnreadableInstallManifest:
+ raise IOError(errno.EINVAL, 'Error parsing manifest file',
+ manifest_file)
+ args.append((manifest, destination))
+ return args
+
+def make_file_mapping(install_manifests):
+ file_mapping = {}
+ for manifest, destination in install_manifests:
+ destination = os.path.abspath(destination)
+ reg = FileRegistry()
+ manifest.populate_registry(reg)
+ for dst, src in reg:
+ if hasattr(src, 'path'):
+ abs_dest = os.path.normpath(os.path.join(destination, dst))
+ file_mapping[abs_dest] = src.path
+ return file_mapping
+
+def GetPlatformSpecificDumper(**kwargs):
+ """This function simply returns a instance of a subclass of Dumper
+ that is appropriate for the current platform."""
+ return {'WINNT': Dumper_Win32,
+ 'Linux': Dumper_Linux,
+ 'Darwin': Dumper_Mac}[buildconfig.substs['OS_ARCH']](**kwargs)
+
+def SourceIndex(fileStream, outputPath, vcs_root):
+ """Takes a list of files, writes info to a data block in a .stream file"""
+ # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
+ # Create the srcsrv data block that indexes the pdb file
+ result = True
+ pdbStreamFile = open(outputPath, "w")
+ pdbStreamFile.write('''SRCSRV: ini ------------------------------------------------\r\nVERSION=2\r\nINDEXVERSION=2\r\nVERCTRL=http\r\nSRCSRV: variables ------------------------------------------\r\nHGSERVER=''')
+ pdbStreamFile.write(vcs_root)
+ pdbStreamFile.write('''\r\nSRCSRVVERCTRL=http\r\nHTTP_EXTRACT_TARGET=%hgserver%/raw-file/%var3%/%var2%\r\nSRCSRVTRG=%http_extract_target%\r\nSRCSRV: source files ---------------------------------------\r\n''')
+ pdbStreamFile.write(fileStream) # can't do string interpolation because the source server also uses this and so there are % in the above
+ pdbStreamFile.write("SRCSRV: end ------------------------------------------------\r\n\n")
+ pdbStreamFile.close()
+ return result
+
+def StartJob(dumper, lock, srcdirRepoInfo, func_name, args):
+ # Windows worker processes won't have run GlobalInit,
+ # and due to a lack of fork(), won't inherit the class
+ # variables from the parent, so set them here.
+ Dumper.lock = lock
+ Dumper.srcdirRepoInfo = srcdirRepoInfo
+ return getattr(dumper, func_name)(*args)
+
+class JobPool(object):
+ jobs = {}
+ executor = None
+
+ @classmethod
+ def init(cls, executor):
+ cls.executor = executor
+
+ @classmethod
+ def shutdown(cls):
+ cls.executor.shutdown()
+
+ @classmethod
+ def submit(cls, args, callback):
+ cls.jobs[cls.executor.submit(StartJob, *args)] = callback
+
+ @classmethod
+ def as_completed(cls):
+ '''Like concurrent.futures.as_completed, but allows adding new futures
+ between generator steps. Iteration will end when the generator has
+ yielded all completed futures and JobQueue.jobs is empty.
+ Yields (future, callback) pairs.
+ '''
+ while cls.jobs:
+ completed, _ = concurrent.futures.wait(cls.jobs.keys(), return_when=concurrent.futures.FIRST_COMPLETED)
+ for f in completed:
+ callback = cls.jobs[f]
+ del cls.jobs[f]
+ yield f, callback
+
+class Dumper:
+ """This class can dump symbols from a file with debug info, and
+ store the output in a directory structure that is valid for use as
+ a Breakpad symbol server. Requires a path to a dump_syms binary--
+ |dump_syms| and a directory to store symbols in--|symbol_path|.
+ Optionally takes a list of processor architectures to process from
+ each debug file--|archs|, the full path to the top source
+ directory--|srcdir|, for generating relative source file names,
+ and an option to copy debug info files alongside the dumped
+ symbol files--|copy_debug|, mostly useful for creating a
+ Microsoft Symbol Server from the resulting output.
+
+ You don't want to use this directly if you intend to process files.
+ Instead, call GetPlatformSpecificDumper to get an instance of a
+ subclass.
+
+ Processing is performed asynchronously via worker processes; in
+ order to wait for processing to finish and cleanup correctly, you
+ must call Finish after all ProcessFiles calls have been made.
+ You must also call Dumper.GlobalInit before creating or using any
+ instances."""
+ def __init__(self, dump_syms, symbol_path,
+ archs=None,
+ srcdirs=[],
+ copy_debug=False,
+ vcsinfo=False,
+ srcsrv=False,
+ exclude=[],
+ repo_manifest=None,
+ file_mapping=None):
+ # popen likes absolute paths, at least on windows
+ self.dump_syms = os.path.abspath(dump_syms)
+ self.symbol_path = symbol_path
+ if archs is None:
+ # makes the loop logic simpler
+ self.archs = ['']
+ else:
+ self.archs = ['-a %s' % a for a in archs.split()]
+ self.srcdirs = [os.path.normpath(a) for a in srcdirs]
+ self.copy_debug = copy_debug
+ self.vcsinfo = vcsinfo
+ self.srcsrv = srcsrv
+ self.exclude = exclude[:]
+ if repo_manifest:
+ self.parse_repo_manifest(repo_manifest)
+ self.file_mapping = file_mapping or {}
+
+ # book-keeping to keep track of the cleanup work per file tuple
+ self.files_record = {}
+
+ @classmethod
+ def GlobalInit(cls, executor=concurrent.futures.ProcessPoolExecutor):
+ """Initialize the class globals for the multiprocessing setup; must
+ be called before any Dumper instances are created and used. Test cases
+ may pass in a different executor to use, usually
+ concurrent.futures.ThreadPoolExecutor."""
+ num_cpus = multiprocessing.cpu_count()
+ if num_cpus is None:
+ # assume a dual core machine if we can't find out for some reason
+ # probably better on single core anyway due to I/O constraints
+ num_cpus = 2
+
+ # have to create any locks etc before the pool
+ manager = multiprocessing.Manager()
+ cls.lock = manager.RLock()
+ cls.srcdirRepoInfo = manager.dict()
+ JobPool.init(executor(max_workers=num_cpus))
+
+ def output(self, dest, output_str):
+ """Writes |output_str| to |dest|, holding |lock|;
+ terminates with a newline."""
+ with Dumper.lock:
+ dest.write(output_str + "\n")
+ dest.flush()
+
+ def output_pid(self, dest, output_str):
+ """Debugging output; prepends the pid to the string."""
+ self.output(dest, "%d: %s" % (os.getpid(), output_str))
+
+ def parse_repo_manifest(self, repo_manifest):
+ """
+ Parse an XML manifest of repository info as produced
+ by the `repo manifest -r` command.
+ """
+ doc = parse(repo_manifest)
+ if doc.firstChild.tagName != "manifest":
+ return
+ # First, get remotes.
+ def ensure_slash(u):
+ if not u.endswith("/"):
+ return u + "/"
+ return u
+ remotes = dict([(r.getAttribute("name"), ensure_slash(r.getAttribute("fetch"))) for r in doc.getElementsByTagName("remote")])
+ # And default remote.
+ default_remote = None
+ if doc.getElementsByTagName("default"):
+ default_remote = doc.getElementsByTagName("default")[0].getAttribute("remote")
+ # Now get projects. Assume they're relative to repo_manifest.
+ base_dir = os.path.abspath(os.path.dirname(repo_manifest))
+ for proj in doc.getElementsByTagName("project"):
+ # name is the repository URL relative to the remote path.
+ name = proj.getAttribute("name")
+ # path is the path on-disk, relative to the manifest file.
+ path = proj.getAttribute("path")
+ # revision is the changeset ID.
+ rev = proj.getAttribute("revision")
+ # remote is the base URL to use.
+ remote = proj.getAttribute("remote")
+ # remote defaults to the <default remote>.
+ if not remote:
+ remote = default_remote
+ # path defaults to name.
+ if not path:
+ path = name
+ if not (name and path and rev and remote):
+ print "Skipping project %s" % proj.toxml()
+ continue
+ remote = remotes[remote]
+ # Turn git URLs into http URLs so that urljoin works.
+ if remote.startswith("git:"):
+ remote = "http" + remote[3:]
+ # Add this project to srcdirs.
+ srcdir = os.path.join(base_dir, path)
+ self.srcdirs.append(srcdir)
+ # And cache its VCS file info. Currently all repos mentioned
+ # in a repo manifest are assumed to be git.
+ root = urlparse.urljoin(remote, name)
+ Dumper.srcdirRepoInfo[srcdir] = GitRepoInfo(srcdir, rev, root)
+
+ # subclasses override this
+ def ShouldProcess(self, file):
+ return not any(fnmatch.fnmatch(os.path.basename(file), exclude) for exclude in self.exclude)
+
+ # and can override this
+ def ShouldSkipDir(self, dir):
+ return False
+
+ def RunFileCommand(self, file):
+ """Utility function, returns the output of file(1)"""
+ try:
+ # we use -L to read the targets of symlinks,
+ # and -b to print just the content, not the filename
+ return os.popen("file -Lb " + file).read()
+ except:
+ return ""
+
+ # This is a no-op except on Win32
+ def FixFilenameCase(self, file):
+ return file
+
+ # This is a no-op except on Win32
+ def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
+ return ""
+
+ # subclasses override this if they want to support this
+ def CopyDebug(self, file, debug_file, guid, code_file, code_id):
+ pass
+
+ def Finish(self, stop_pool=True):
+ '''Process all pending jobs and any jobs their callbacks submit.
+ By default, will shutdown the executor, but for testcases that
+ need multiple runs, pass stop_pool = False.'''
+ for job, callback in JobPool.as_completed():
+ try:
+ res = job.result()
+ except Exception as e:
+ self.output(sys.stderr, 'Job raised exception: %s' % e)
+ continue
+ callback(res)
+ if stop_pool:
+ JobPool.shutdown()
+
+ def Process(self, *args):
+ """Process files recursively in args."""
+ # We collect all files to process first then sort by size to schedule
+ # larger files first because larger files tend to take longer and we
+ # don't like long pole stragglers.
+ files = set()
+ for arg in args:
+ for f in self.get_files_to_process(arg):
+ files.add(f)
+
+ for f in sorted(files, key=os.path.getsize, reverse=True):
+ self.ProcessFiles((f,))
+
+ def get_files_to_process(self, file_or_dir):
+ """Generate the files to process from an input."""
+ if os.path.isdir(file_or_dir) and not self.ShouldSkipDir(file_or_dir):
+ for f in self.get_files_to_process_in_dir(file_or_dir):
+ yield f
+ elif os.path.isfile(file_or_dir):
+ yield file_or_dir
+
+ def get_files_to_process_in_dir(self, path):
+ """Generate the files to process in a directory.
+
+ Valid files are are determined by calling ShouldProcess.
+ """
+ for root, dirs, files in os.walk(path):
+ for d in dirs[:]:
+ if self.ShouldSkipDir(d):
+ dirs.remove(d)
+ for f in files:
+ fullpath = os.path.join(root, f)
+ if self.ShouldProcess(fullpath):
+ yield fullpath
+
+ def SubmitJob(self, file_key, func_name, args, callback):
+ """Submits a job to the pool of workers"""
+ JobPool.submit((self, Dumper.lock, Dumper.srcdirRepoInfo, func_name, args), callback)
+
+ def ProcessFilesFinished(self, res):
+ """Callback from multiprocesing when ProcessFilesWork finishes;
+ run the cleanup work, if any"""
+ # only run the cleanup function once per tuple of files
+ self.files_record[res['files']] += 1
+ if self.files_record[res['files']] == len(self.archs):
+ del self.files_record[res['files']]
+ if res['after']:
+ res['after'](res['status'], res['after_arg'])
+
+ def ProcessFiles(self, files, after=None, after_arg=None):
+ """Dump symbols from these files into a symbol file, stored
+ in the proper directory structure in |symbol_path|; processing is performed
+ asynchronously, and Finish must be called to wait for it complete and cleanup.
+ All files after the first are fallbacks in case the first file does not process
+ successfully; if it does, no other files will be touched."""
+ self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files))
+
+ # tries to get the vcs root from the .mozconfig first - if it's not set
+ # the tinderbox vcs path will be assigned further down
+ vcs_root = os.environ.get('MOZ_SOURCE_REPO')
+ for arch_num, arch in enumerate(self.archs):
+ self.files_record[files] = 0 # record that we submitted jobs for this tuple of files
+ self.SubmitJob(files[-1], 'ProcessFilesWork', args=(files, arch_num, arch, vcs_root, after, after_arg), callback=self.ProcessFilesFinished)
+
+ def dump_syms_cmdline(self, file, arch, files):
+ '''
+ Get the commandline used to invoke dump_syms.
+ '''
+ # The Mac dumper overrides this.
+ return [self.dump_syms, file]
+
+ def ProcessFilesWork(self, files, arch_num, arch, vcs_root, after, after_arg):
+ t_start = time.time()
+ self.output_pid(sys.stderr, "Worker processing files: %s" % (files,))
+
+ # our result is a status, a cleanup function, an argument to that function, and the tuple of files we were called on
+ result = { 'status' : False, 'after' : after, 'after_arg' : after_arg, 'files' : files }
+
+ sourceFileStream = ''
+ code_id, code_file = None, None
+ for file in files:
+ # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully
+ try:
+ cmd = self.dump_syms_cmdline(file, arch, files)
+ self.output_pid(sys.stderr, ' '.join(cmd))
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+ stderr=open(os.devnull, 'wb'))
+ module_line = proc.stdout.next()
+ if module_line.startswith("MODULE"):
+ # MODULE os cpu guid debug_file
+ (guid, debug_file) = (module_line.split())[3:5]
+ # strip off .pdb extensions, and append .sym
+ sym_file = re.sub("\.pdb$", "", debug_file) + ".sym"
+ # we do want forward slashes here
+ rel_path = os.path.join(debug_file,
+ guid,
+ sym_file).replace("\\", "/")
+ full_path = os.path.normpath(os.path.join(self.symbol_path,
+ rel_path))
+ try:
+ os.makedirs(os.path.dirname(full_path))
+ except OSError: # already exists
+ pass
+ f = open(full_path, "w")
+ f.write(module_line)
+ # now process the rest of the output
+ for line in proc.stdout:
+ if line.startswith("FILE"):
+ # FILE index filename
+ (x, index, filename) = line.rstrip().split(None, 2)
+ filename = os.path.normpath(self.FixFilenameCase(filename))
+ # We want original file paths for the source server.
+ sourcepath = filename
+ if filename in self.file_mapping:
+ filename = self.file_mapping[filename]
+ if self.vcsinfo:
+ (filename, rootname) = GetVCSFilename(filename, self.srcdirs)
+ # sets vcs_root in case the loop through files were to end on an empty rootname
+ if vcs_root is None:
+ if rootname:
+ vcs_root = rootname
+ # gather up files with hg for indexing
+ if filename.startswith("hg"):
+ (ver, checkout, source_file, revision) = filename.split(":", 3)
+ sourceFileStream += sourcepath + "*" + source_file + '*' + revision + "\r\n"
+ f.write("FILE %s %s\n" % (index, filename))
+ elif line.startswith("INFO CODE_ID "):
+ # INFO CODE_ID code_id code_file
+ # This gives some info we can use to
+ # store binaries in the symbol store.
+ bits = line.rstrip().split(None, 3)
+ if len(bits) == 4:
+ code_id, code_file = bits[2:]
+ f.write(line)
+ else:
+ # pass through all other lines unchanged
+ f.write(line)
+ # we want to return true only if at least one line is not a MODULE or FILE line
+ result['status'] = True
+ f.close()
+ proc.wait()
+ # we output relative paths so callers can get a list of what
+ # was generated
+ self.output(sys.stdout, rel_path)
+ if self.srcsrv and vcs_root:
+ # add source server indexing to the pdb file
+ self.SourceServerIndexing(file, guid, sourceFileStream, vcs_root)
+ # only copy debug the first time if we have multiple architectures
+ if self.copy_debug and arch_num == 0:
+ self.CopyDebug(file, debug_file, guid,
+ code_file, code_id)
+ except StopIteration:
+ pass
+ except Exception as e:
+ self.output(sys.stderr, "Unexpected error: %s" % (str(e),))
+ raise
+ if result['status']:
+ # we only need 1 file to work
+ break
+
+ elapsed = time.time() - t_start
+ self.output_pid(sys.stderr, 'Worker finished processing %s in %.2fs' %
+ (files, elapsed))
+ return result
+
+# Platform-specific subclasses. For the most part, these just have
+# logic to determine what files to extract symbols from.
+
+class Dumper_Win32(Dumper):
+ fixedFilenameCaseCache = {}
+
+ def ShouldProcess(self, file):
+ """This function will allow processing of pdb files that have dll
+ or exe files with the same base name next to them."""
+ if not Dumper.ShouldProcess(self, file):
+ return False
+ if file.endswith(".pdb"):
+ (path,ext) = os.path.splitext(file)
+ if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"):
+ return True
+ return False
+
+ def FixFilenameCase(self, file):
+ """Recent versions of Visual C++ put filenames into
+ PDB files as all lowercase. If the file exists
+ on the local filesystem, fix it."""
+
+ # Use a cached version if we have one.
+ if file in self.fixedFilenameCaseCache:
+ return self.fixedFilenameCaseCache[file]
+
+ result = file
+
+ ctypes.windll.kernel32.SetErrorMode(ctypes.c_uint(1))
+ if not isinstance(file, unicode):
+ file = unicode(file, sys.getfilesystemencoding())
+ handle = ctypes.windll.kernel32.CreateFileW(file,
+ # GENERIC_READ
+ 0x80000000,
+ # FILE_SHARE_READ
+ 1,
+ None,
+ # OPEN_EXISTING
+ 3,
+ 0,
+ None)
+ if handle != -1:
+ size = ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle,
+ None,
+ 0,
+ 0)
+ buf = ctypes.create_unicode_buffer(size)
+ if ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle,
+ buf,
+ size,
+ 0) > 0:
+ # The return value of GetFinalPathNameByHandleW uses the
+ # '\\?\' prefix.
+ result = buf.value.encode(sys.getfilesystemencoding())[4:]
+ ctypes.windll.kernel32.CloseHandle(handle)
+
+ # Cache the corrected version to avoid future filesystem hits.
+ self.fixedFilenameCaseCache[file] = result
+ return result
+
+ def CopyDebug(self, file, debug_file, guid, code_file, code_id):
+ def compress(path):
+ compressed_file = path[:-1] + '_'
+ # ignore makecab's output
+ success = subprocess.call(["makecab.exe", "/D",
+ "CompressionType=MSZIP",
+ path, compressed_file],
+ stdout=open(os.devnull, 'w'),
+ stderr=subprocess.STDOUT)
+ if success == 0 and os.path.exists(compressed_file):
+ os.unlink(path)
+ return True
+ return False
+
+ rel_path = os.path.join(debug_file,
+ guid,
+ debug_file).replace("\\", "/")
+ full_path = os.path.normpath(os.path.join(self.symbol_path,
+ rel_path))
+ shutil.copyfile(file, full_path)
+ if compress(full_path):
+ self.output(sys.stdout, rel_path[:-1] + '_')
+ else:
+ self.output(sys.stdout, rel_path)
+
+ # Copy the binary file as well
+ if code_file and code_id:
+ full_code_path = os.path.join(os.path.dirname(file),
+ code_file)
+ if os.path.exists(full_code_path):
+ rel_path = os.path.join(code_file,
+ code_id,
+ code_file).replace("\\", "/")
+ full_path = os.path.normpath(os.path.join(self.symbol_path,
+ rel_path))
+ try:
+ os.makedirs(os.path.dirname(full_path))
+ except OSError as e:
+ if e.errno != errno.EEXIST:
+ raise
+ shutil.copyfile(full_code_path, full_path)
+ if compress(full_path):
+ self.output(sys.stdout, rel_path[:-1] + '_')
+ else:
+ self.output(sys.stdout, rel_path)
+
+ def SourceServerIndexing(self, debug_file, guid, sourceFileStream, vcs_root):
+ # Creates a .pdb.stream file in the mozilla\objdir to be used for source indexing
+ debug_file = os.path.abspath(debug_file)
+ streamFilename = debug_file + ".stream"
+ stream_output_path = os.path.abspath(streamFilename)
+ # Call SourceIndex to create the .stream file
+ result = SourceIndex(sourceFileStream, stream_output_path, vcs_root)
+ if self.copy_debug:
+ pdbstr_path = os.environ.get("PDBSTR_PATH")
+ pdbstr = os.path.normpath(pdbstr_path)
+ subprocess.call([pdbstr, "-w", "-p:" + os.path.basename(debug_file),
+ "-i:" + os.path.basename(streamFilename), "-s:srcsrv"],
+ cwd=os.path.dirname(stream_output_path))
+ # clean up all the .stream files when done
+ os.remove(stream_output_path)
+ return result
+
+class Dumper_Linux(Dumper):
+ objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy'
+ def ShouldProcess(self, file):
+ """This function will allow processing of files that are
+ executable, or end with the .so extension, and additionally
+ file(1) reports as being ELF files. It expects to find the file
+ command in PATH."""
+ if not Dumper.ShouldProcess(self, file):
+ return False
+ if file.endswith(".so") or os.access(file, os.X_OK):
+ return self.RunFileCommand(file).startswith("ELF")
+ return False
+
+ def CopyDebug(self, file, debug_file, guid, code_file, code_id):
+ # We want to strip out the debug info, and add a
+ # .gnu_debuglink section to the object, so the debugger can
+ # actually load our debug info later.
+ file_dbg = file + ".dbg"
+ if subprocess.call([self.objcopy, '--only-keep-debug', file, file_dbg]) == 0 and \
+ subprocess.call([self.objcopy, '--add-gnu-debuglink=%s' % file_dbg, file]) == 0:
+ rel_path = os.path.join(debug_file,
+ guid,
+ debug_file + ".dbg")
+ full_path = os.path.normpath(os.path.join(self.symbol_path,
+ rel_path))
+ shutil.move(file_dbg, full_path)
+ # gzip the shipped debug files
+ os.system("gzip -4 -f %s" % full_path)
+ self.output(sys.stdout, rel_path + ".gz")
+ else:
+ if os.path.isfile(file_dbg):
+ os.unlink(file_dbg)
+
+class Dumper_Solaris(Dumper):
+ def RunFileCommand(self, file):
+ """Utility function, returns the output of file(1)"""
+ try:
+ output = os.popen("file " + file).read()
+ return output.split('\t')[1];
+ except:
+ return ""
+
+ def ShouldProcess(self, file):
+ """This function will allow processing of files that are
+ executable, or end with the .so extension, and additionally
+ file(1) reports as being ELF files. It expects to find the file
+ command in PATH."""
+ if not Dumper.ShouldProcess(self, file):
+ return False
+ if file.endswith(".so") or os.access(file, os.X_OK):
+ return self.RunFileCommand(file).startswith("ELF")
+ return False
+
+def AfterMac(status, dsymbundle):
+ """Cleanup function to run on Macs after we process the file(s)."""
+ # CopyDebug will already have been run from Dumper.ProcessFiles
+ shutil.rmtree(dsymbundle)
+
+class Dumper_Mac(Dumper):
+ def ShouldProcess(self, file):
+ """This function will allow processing of files that are
+ executable, or end with the .dylib extension, and additionally
+ file(1) reports as being Mach-O files. It expects to find the file
+ command in PATH."""
+ if not Dumper.ShouldProcess(self, file):
+ return False
+ if file.endswith(".dylib") or os.access(file, os.X_OK):
+ return self.RunFileCommand(file).startswith("Mach-O")
+ return False
+
+ def ShouldSkipDir(self, dir):
+ """We create .dSYM bundles on the fly, but if someone runs
+ buildsymbols twice, we should skip any bundles we created
+ previously, otherwise we'll recurse into them and try to
+ dump the inner bits again."""
+ if dir.endswith(".dSYM"):
+ return True
+ return False
+
+ def ProcessFiles(self, files, after=None, after_arg=None):
+ # also note, files must be len 1 here, since we're the only ones
+ # that ever add more than one file to the list
+ self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % (files[0]))
+ self.SubmitJob(files[0], 'ProcessFilesWorkMac', args=(files[0],), callback=self.ProcessFilesMacFinished)
+
+ def ProcessFilesMacFinished(self, result):
+ if result['status']:
+ # kick off new jobs per-arch with our new list of files
+ Dumper.ProcessFiles(self, result['files'], after=AfterMac, after_arg=result['files'][0])
+
+ def dump_syms_cmdline(self, file, arch, files):
+ '''
+ Get the commandline used to invoke dump_syms.
+ '''
+ # dump_syms wants the path to the original binary and the .dSYM
+ # in order to dump all the symbols.
+ if len(files) == 2 and file == files[0] and file.endswith('.dSYM'):
+ # This is the .dSYM bundle.
+ return [self.dump_syms] + arch.split() + ['-g', file, files[1]]
+ return Dumper.dump_syms_cmdline(self, file, arch, files)
+
+ def ProcessFilesWorkMac(self, file):
+ """dump_syms on Mac needs to be run on a dSYM bundle produced
+ by dsymutil(1), so run dsymutil here and pass the bundle name
+ down to the superclass method instead."""
+ t_start = time.time()
+ self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file,))
+
+ # our return is a status and a tuple of files to dump symbols for
+ # the extra files are fallbacks; as soon as one is dumped successfully, we stop
+ result = { 'status' : False, 'files' : None, 'file_key' : file }
+ dsymbundle = file + ".dSYM"
+ if os.path.exists(dsymbundle):
+ shutil.rmtree(dsymbundle)
+ dsymutil = buildconfig.substs['DSYMUTIL']
+ # dsymutil takes --arch=foo instead of -a foo like everything else
+ try:
+ cmd = ([dsymutil] +
+ [a.replace('-a ', '--arch=') for a in self.archs if a] +
+ [file])
+ self.output_pid(sys.stderr, ' '.join(cmd))
+ subprocess.check_call(cmd, stdout=open(os.devnull, 'w'))
+ except subprocess.CalledProcessError as e:
+ self.output_pid(sys.stderr, 'Error running dsymutil: %s' % str(e))
+
+ if not os.path.exists(dsymbundle):
+ # dsymutil won't produce a .dSYM for files without symbols
+ self.output_pid(sys.stderr, "No symbols found in file: %s" % (file,))
+ result['status'] = False
+ result['files'] = (file, )
+ return result
+
+ result['status'] = True
+ result['files'] = (dsymbundle, file)
+ elapsed = time.time() - t_start
+ self.output_pid(sys.stderr, 'Worker finished processing %s in %.2fs' %
+ (file, elapsed))
+ return result
+
+ def CopyDebug(self, file, debug_file, guid, code_file, code_id):
+ """ProcessFiles has already produced a dSYM bundle, so we should just
+ copy that to the destination directory. However, we'll package it
+ into a .tar.bz2 because the debug symbols are pretty huge, and
+ also because it's a bundle, so it's a directory. |file| here is the
+ dSYM bundle, and |debug_file| is the original filename."""
+ rel_path = os.path.join(debug_file,
+ guid,
+ os.path.basename(file) + ".tar.bz2")
+ full_path = os.path.abspath(os.path.join(self.symbol_path,
+ rel_path))
+ success = subprocess.call(["tar", "cjf", full_path, os.path.basename(file)],
+ cwd=os.path.dirname(file),
+ stdout=open(os.devnull, 'w'), stderr=subprocess.STDOUT)
+ if success == 0 and os.path.exists(full_path):
+ self.output(sys.stdout, rel_path)
+
+# Entry point if called as a standalone program
+def main():
+ parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>")
+ parser.add_option("-c", "--copy",
+ action="store_true", dest="copy_debug", default=False,
+ help="Copy debug info files into the same directory structure as symbol files")
+ parser.add_option("-a", "--archs",
+ action="store", dest="archs",
+ help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)")
+ parser.add_option("-s", "--srcdir",
+ action="append", dest="srcdir", default=[],
+ help="Use SRCDIR to determine relative paths to source files")
+ parser.add_option("-v", "--vcs-info",
+ action="store_true", dest="vcsinfo",
+ help="Try to retrieve VCS info for each FILE listed in the output")
+ parser.add_option("-i", "--source-index",
+ action="store_true", dest="srcsrv", default=False,
+ help="Add source index information to debug files, making them suitable for use in a source server.")
+ parser.add_option("-x", "--exclude",
+ action="append", dest="exclude", default=[], metavar="PATTERN",
+ help="Skip processing files matching PATTERN.")
+ parser.add_option("--repo-manifest",
+ action="store", dest="repo_manifest",
+ help="""Get source information from this XML manifest
+produced by the `repo manifest -r` command.
+""")
+ parser.add_option("--install-manifest",
+ action="append", dest="install_manifests",
+ default=[],
+ help="""Use this install manifest to map filenames back
+to canonical locations in the source repository. Specify
+<install manifest filename>,<install destination> as a comma-separated pair.
+""")
+ (options, args) = parser.parse_args()
+
+ #check to see if the pdbstr.exe exists
+ if options.srcsrv:
+ pdbstr = os.environ.get("PDBSTR_PATH")
+ if not os.path.exists(pdbstr):
+ print >> sys.stderr, "Invalid path to pdbstr.exe - please set/check PDBSTR_PATH.\n"
+ sys.exit(1)
+
+ if len(args) < 3:
+ parser.error("not enough arguments")
+ exit(1)
+
+ try:
+ manifests = validate_install_manifests(options.install_manifests)
+ except (IOError, ValueError) as e:
+ parser.error(str(e))
+ exit(1)
+ file_mapping = make_file_mapping(manifests)
+ dumper = GetPlatformSpecificDumper(dump_syms=args[0],
+ symbol_path=args[1],
+ copy_debug=options.copy_debug,
+ archs=options.archs,
+ srcdirs=options.srcdir,
+ vcsinfo=options.vcsinfo,
+ srcsrv=options.srcsrv,
+ exclude=options.exclude,
+ repo_manifest=options.repo_manifest,
+ file_mapping=file_mapping)
+
+ dumper.Process(*args[2:])
+ dumper.Finish()
+
+# run main if run directly
+if __name__ == "__main__":
+ # set up the multiprocessing infrastructure before we start;
+ # note that this needs to be in the __main__ guard, or else Windows will choke
+ Dumper.GlobalInit()
+
+ main()