summaryrefslogtreecommitdiffstats
path: root/tools/jprof
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /tools/jprof
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'tools/jprof')
-rw-r--r--tools/jprof/README.html330
-rw-r--r--tools/jprof/bfd.cpp231
-rw-r--r--tools/jprof/coff.cpp99
-rw-r--r--tools/jprof/elf.cpp133
-rw-r--r--tools/jprof/intcnt.cpp71
-rw-r--r--tools/jprof/intcnt.h38
-rwxr-xr-xtools/jprof/jprofsig46
-rw-r--r--tools/jprof/leaky.cpp863
-rw-r--r--tools/jprof/leaky.h122
-rw-r--r--tools/jprof/moz.build28
-rwxr-xr-xtools/jprof/split-profile.py143
-rw-r--r--tools/jprof/strset.cpp40
-rw-r--r--tools/jprof/strset.h19
-rw-r--r--tools/jprof/stub/Makefile.in8
-rw-r--r--tools/jprof/stub/config.h18
-rw-r--r--tools/jprof/stub/jprof.h17
-rw-r--r--tools/jprof/stub/libmalloc.cpp790
-rw-r--r--tools/jprof/stub/libmalloc.h45
-rw-r--r--tools/jprof/stub/moz.build17
19 files changed, 3058 insertions, 0 deletions
diff --git a/tools/jprof/README.html b/tools/jprof/README.html
new file mode 100644
index 000000000..2ae88dec4
--- /dev/null
+++ b/tools/jprof/README.html
@@ -0,0 +1,330 @@
+<!-- This Source Code Form is subject to the terms of the Mozilla Public
+ - License, v. 2.0. If a copy of the MPL was not distributed with this
+ - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
+
+<html>
+<head><title>The Jprof Profiler</title></head>
+
+<body bgcolor="#FFFFFF" text="#000000"
+ link="#0000EE" vlink="#551A8B" alink="#FF0000">
+<center>
+<h1>The Jprof Profiler</h1>
+<font size="-1">
+<a href="mailto:jim_nance%yahoo.com">jim_nance@yahoo.com</a><p>
+Recent (4/2011) updates Randell Jesup (see bugzilla for contact info)
+</font>
+<hr>
+
+<a href="#introduction">Introduction</a> | <a href="#operation">Operation</a> |
+<a href="#setup">Setup</a> | <a href="#usage">Usage</a> |
+<a href="#interpretation">Interpretation</a>
+
+</center>
+<hr>
+
+<h3><a name="introduction">Introduction</a></h3>
+
+Jprof is a profiling tool. I am writing it because I need to find out
+where mozilla is spending its time, and there do not seem to be any
+profilers for Linux that can handle threads and/or shared libraries.
+This code is based heavily on Kipp Hickman's leaky.
+
+<h3><a name="operation">Operation</a></h3>
+
+Jprof operates by installing a timer which periodically interrupts mozilla.
+When this timer goes off, the jprof code inside mozilla walks the function call
+stack to determine which code was executing and saves the results into the
+<code>jprof-log</code> and <code>jprof-map</code> files. By collecting a large
+number of these call stacks, it is possible to deduce where mozilla is spending
+its time.
+
+<h3><a name="setup">Setup</a></h3>
+
+<p>Configure your mozilla with jprof support by adding
+<code>--enable-jprof</code> to your configure options (eg adding
+<code>ac_add_options --enable-jprof</code> to your <code>.mozconfig</code>) and
+making sure that you do <strong>not</strong> have the
+<code>--enable-strip</code> configure option set -- jprof needs symbols to
+operate. On many architectures with GCC, you'll need to add
+<code>--enable-optimize="-O3 -fno-omit-frame-pointer"</code> or the
+equivalent to ensure frame pointer generation in the compiler you're using.</p>
+
+<p>Finally, build mozilla with your new configuration. Now you can run jprof.</p>
+
+<h3><a name="usage">Usage</a></h3>
+<pre> jprof [-v] [-t] [-e exclude] [-i include] [-s stackdepth] [--last] [--all] [--start n [--end m]] [--output-dir dir] prog log [log2 ...]</pre>
+Options:
+<ul>
+ <li><b>-s depth</b> : Limit depth looked at from captured stack
+ frames</li>
+ <li><b>-v</b> : Output some information about the symbols, memory map, etc.</li>
+ <li><b>-t or --threads</b> : Group output according to thread. May require external
+ LD_PRELOAD library to help force sampling of spawned threads; jprof
+ may capture the main thread only. See <a
+ href="http://sam.zoy.org/writings/programming/gprof.html">gprof-helper</a>;
+ it may need adaption for jprof.</li>
+ <li><b>--only-thread id</b> : Only output data for thread 'id'</li>
+ <li><b>-e exclusion</b> : Allows excluding specific stack frames</li>
+ <li><b>-i inclusion</b> : Allows including specific stack frames</li>
+ <li><b>--last</b> : Only process data from the last 'section' of sampling
+ (starting at the last PROF)</li>
+ <li><b>--start N</b> : Start processing data at 'section' N </li>
+ <li><b>--end N</b> : Stop processing data at 'section' N </li>
+ <li><b>--output-dir dir</b> : Store generated .html files in the given directory </li>
+</ul>
+The behavior of jprof is determined by the value of the JPROF_FLAGS environment
+variable. This environment variable can be composed of several substrings
+which have the following meanings:
+<ul>
+ <li> <b>JP_START</b> : Install the signal handler, and start sending the
+ timer signals.
+
+ <li> <b>JP_DEFER</b> : Install the signal handler, but don't start sending
+ the timer signals. The user must start the signals by sending the first
+ one (with <code>kill -PROF</code>, or with <code>kill -ALRM</code> if
+ JP_REALTIME is used, or with <code>kill -POLL</code> (also known as <code>kill -IO</code>) if JP_RTC_HZ is used).
+
+ <li> <b>JP_FIRST=x</b> : Wait x seconds before starting the timer
+
+ <li> <b>JP_PERIOD=y</b> : Set timer to interrupt every y seconds. Only
+ values of y greater than or equal to 0.001 are supported. Default is
+ 0.050 (50ms).
+
+ <li> <b>JP_REALTIME</b> : Do the profiling in intervals of real time rather
+ than intervals of time used by the mozilla process (and the kernel
+ when doing work for mozilla). This could probably lead to weird
+ results (you'll see whatever runs when mozilla is waiting for events),
+ but is needed to see time spent in the X server.
+
+ <li> <b>JP_RTC_HZ=freq</b> : This option, only available on Linux if the
+ kernel is built with RTC support, makes jprof use the RTC timer instead of
+ using its own timer. This option, like JP_REALTIME, uses intervals of real
+ time. This option overrides JP_PERIOD. <code>freq</code> is the frequency
+ at which the timer should fire, measured in Hz. It must be a power of 2.
+ The maximal frequency allowed by the kernel can be changed by writing to
+ <code>/proc/sys/dev/rtc/max-user-freq</code>; the maximum value it can be
+ set to is 8192. Note that <code>/dev/rtc</code> will need to be readable
+ by the Firefox process; making that file world-readable is a simple way to
+ accomplish that.
+
+ <li> <b>JP_CIRCULAR=size</b> : This tells jprof to store samples in a
+ circular buffer of the given size, which then will be saved (appended)
+ to disk when SIGUSR1 is received or JProfStopProfiling is done. If the
+ buffer overflows, the oldest entries will be evicted until there's
+ space for the new entry.<p>
+
+ SIGUSR2 will cause the circular buffer to be cleared.
+
+ <li> <b>JP_FILENAME=basefilename</b> : This is the filename used for
+ saving the log files to; the default is "jprof-log". If Electrolysis
+ is used, each process after the first will have the process ID
+ added ("jprof-log-3212");
+
+</ul>
+
+<h4>Starting and stopping jprof from JavaScript</h4>
+<p>
+A build with jprof enabled adds four functions to the Window object:<p>
+<code>JProfStartProfiling()</code> and <code>JProfStopProfiling()</code>: When used with JP_DEFER, these
+allow one to start and stop the timer just around whatever critical section is
+being profiled.</p><p>
+<code>JProfClearCircular()</code> and <code>JProfSaveCircular()</code>:
+These clear the circular buffer and save the buffer (without stopping), respectively.</p>
+
+<h4>Examples of JPROF_FLAGS usage</h4>
+<ul>
+
+ <li>To make the timer start firing 3 seconds after the program is started and
+ fire every 25 milliseconds of program time use:
+ <pre>
+ setenv JPROF_FLAGS "JP_START JP_FIRST=3 JP_PERIOD=0.025" </pre>
+
+ <li>To make the timer start on your signal and fire every 1 millisecond of
+ program time use:
+ <pre>
+ setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.001" </pre>
+
+ <li>To make the timer start on your signal and fire every 10 milliseconds of
+ wall-clock time use:
+ <pre>
+ setenv JPROF_FLAGS "JP_DEFER JP_PERIOD=0.010 JP_REALTIME" </pre>
+
+ <li>To make the timer start on your signal and fire at 8192 Hz in wall-clock
+ time use:
+ <pre>
+ setenv JPROF_FLAGS "JP_DEFER JP_RTC_HZ=8192" </pre>
+
+ <li>To make the timer start on JProfStartProfiling() and run continously
+ with a 1ms sample rate until told to stop, then save the last 1MB of
+ data:
+ <pre>
+ setenv JPROF_FLAGS "JP_DEFER JP_CIRCULAR=1048576 JP_PERIOD=0.001" </pre>
+
+</ul>
+
+<h4>Pausing profiles</h4>
+
+<P>jprof can be paused at any time by sending a SIGUSR1 to mozilla (<code>kill
+-USR1</code>). This will cause the timer signals to stop and jprof-map to be
+written, but it will not close jprof-log. Combining SIGUSR1 with the JP_DEFER
+option allows profiling of one sequence of actions by starting the timer right
+before starting the actions and stopping the timer right afterward.
+
+<P>After a SIGUSR1, sending another timer signal (SIGPROF, SIGALRM, or SIGPOLL (aka SIGIO),
+depending on the mode) can be used to continue writing data to the same
+output.
+
+<P>SIGUSR2 will cause the circular buffer to be cleared, if it's in use.
+This is useful right before running a test when you're using a large,
+continuous circular buffer, or programmatically at the start of an action
+which might take too long (JProfClearCircular()).
+
+<h4>Looking at the results</h4>
+
+Now that we have <code>jprof-log</code> and <code>jprof-map</code> files, we
+can use the jprof executable is used to turn them into readable output. To do
+this jprof needs the name of the mozilla binary and the log file. It deduces
+the name of the map file:
+
+<pre>
+ ./jprof /home/user/mozilla/objdir/dist/bin/firefox ./jprof-log > tmp.html
+</pre>
+
+This will generate the file <code>tmp.html</code> which you should view in a
+web browser.
+
+<pre>
+ ./jprof --output-dir=/tmp /home/user/mozilla/objdir/dist/bin/firefox ./jprof-log*
+</pre>
+
+This will generate a set of files in /tmp for each process.
+
+
+<h3><a name="interpretation">Interpretation</a></h3>
+
+
+The Jprof output is split into a flat portion and a hierarchical portion.
+There are links to each section at the top of the page. It is typically
+easier to analyze the profile by starting with the flat output and following
+the links contained in the flat output up to the hierarchical output.
+
+<h4><a name="flat">Flat output</a></h3>
+
+The flat portion of the profile indicates which functions were executing
+when the timer was going off. It is displayed as a list of functions names
+on the right and the number of times that function was interrupted on the
+left. The list is sorted by decreasing interrupt count. For example:
+
+<blockquote> <pre>
+Total hit count: 151603
+Count %Total Function Name
+
+<a href="#23081">8806 5.8 __libc_poll</a>
+<a href="#40008">2254 1.5 __i686.get_pc_thunk.bx</a>
+<a href="#21390">2053 1.4 _int_malloc</a>
+<a href="#49013">1777 1.2 nsStyleContext::GetStyleData(nsStyleStructID)</a>
+<a href="#21380">1600 1.1 __libc_malloc</a>
+<a href="#603">1552 1.0 nsCOMPtr_base::~nsCOMPtr_base()</a>
+</pre> </blockquote>
+
+This shows that of the 151603 times the timer fired, 1777 (1.2% of the total) were inside nsStyleContext::GetStyleData() and 1552 (1.0% of the total) were in the nsCOMPtr_base destructor.
+
+<p>
+In general, the functions with the highest count are the functions which
+are taking the most time.
+
+<P>
+The function names are linked to the entry for that function in the
+hierarchical profile, which is described in the next section.
+
+<h4><a name="hier">Hierarchical output</a></h4>
+
+The hierarchical output is divided up into sections, with each section
+corresponding to one function. A typical section looks something like
+this:
+
+<blockquote><pre>
+ index Count Hits Function Name
+ <A href="#72871"> 545 (46.4%) nsBlockFrame::ReflowInlineFrames(nsBlockReflowState&, nsLineList_iterator, int*)</A>
+ <A href="#72873"> 100 (8.5%) nsBlockFrame::ReflowDirtyLines(nsBlockReflowState&)</A>
+ 72870 4 (0.3%) <a name=72870> 645 (54.9%)</a> <b>nsBlockFrame::DoReflowInlineFrames(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsFlowAreaRect&, int&, nsFloatManager::SavedState*, int*, LineReflowStatus*, int)</b>
+ <A href="#72821"> 545 (46.4%) nsBlockFrame::ReflowInlineFrame(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsIFrame*, LineReflowStatus*)</A>
+ <A href="#72853"> 83 (7.1%) nsBlockFrame::PlaceLine(nsBlockReflowState&, nsLineLayout&, nsLineList_iterator, nsFloatManager::SavedState*, nsRect&, int&, int*)</A>
+ <A href="#74150"> 9 (0.8%) nsLineLayout::BeginLineReflow(int, int, int, int, int, int)</A>
+ <A href="#74897"> 1 (0.1%) nsTextFrame::GetType() const</A>
+ <A href="#74131"> 1 (0.1%) nsLineLayout::RelativePositionFrames(nsOverflowAreas&)</A>
+ <A href="#58320"> 1 (0.1%) __i686.get_pc_thunk.bx</A>
+ <A href="#53077"> 1 (0.1%) PL_ArenaAllocate</A>
+</pre></blockquote>
+
+The information this block tells us is:
+
+<ul>
+<li>There were 4 profiler hits <em>in</em> <code>nsBlockFrame::DoReflowInlineFrames</code>
+<li>There were 645 profiler hits <em>in or under</em> <code>nsBlockFrame::DoReflowInlineFrames</code>. Of these:
+<ul>
+ <li>545 were in or under <code>nsBlockFrame::ReflowInlineFrame</code>
+ <li>83 were in or under <code>nsBlockFrame::PlaceLine</code>
+ <li>9 were in or under <code>nsLineLayout::BeginLineReflow</code>
+ <li>1 was in or under <code>nsTextFrame::GetType</code>
+ <li>1 was in or under <code>nsLineLayout::RelativePositionFrames</code>
+ <li>1 was in or under <code>__i686.get_pc_thunk.bx</code>
+ <li>1 was in or under <code>PL_ArenaAllocate</code>
+</ul>
+<li>Of these 645 calls into <code>nsBlockFrame::DoReflowInlineFrames</code>:
+<ul>
+ <li>545 came from <code>nsBlockFrame::ReflowInlineFrames</code>
+ <li>100 came from <code>nsBlockFrame::ReflowDirtyLines</code>
+</ul>
+</ul>
+
+
+The rest of this section explains how to read this information off from the jprof output.
+
+<p>This block corresponds to the function <code>nsBlockFrame::DoReflowInlineFrames</code>, which is
+therefore bolded and not a link. The name of this function is preceded by
+five numbers which have the following meaning. The number on the left (72870)
+is the index number, and is not important. The next number (4) and the
+percentage following (0.3%) are the number
+of times this function was interrupted by the timer and the percentage of
+the total hits that is. The last number pair ("645 (54.9%)")
+are the number of times this function was in the call stack when the timer went
+off. That is, the timer went off while we were in code that was ultimately
+called from <code>nsBlockFrame::DoReflowInlineFrames</code>.
+<p>For our example we can see that our function was in the call stack for
+645 interrupt ticks, but we were only the function that was running when
+the interrupt arrived 4 times.
+<P>
+The functions listed above the line for <code>nsBlockFrame::DoReflowInlineFrames</code> are its
+callers. The numbers to the left of these function names are the numbers of
+times these functions were in the call stack as callers of
+<code>nsBlockFrame::DoReflowInlineFrames</code>. In our example, we were called 545 times by
+<code>nsBlockFrame::ReflowInlineFrames</code> and 100 times by
+<code>nsBlockFrame::ReflowDirtyLines</code>.
+<P>
+The functions listed below the line for <code>nsBlockFrame::DoReflowInlineFrames</code> are its
+callees. The numbers to the left of the function names are the numbers of
+times these functions were in the callstack as callees of
+<code>nsBlockFrame::DoReflowInlineFrames</code> and the corresponding percentages. In our example, of the 645 profiler hits under <code>nsBlockFrame::DoReflowInlineFrames</code> 545 were under <code>nsBlockFrame::ReflowInlineFrame</code>, 83 were under <code>nsBlockFrame::PlaceLine</code>, and so forth.<p>
+
+<b>NOTE:</b> If there are loops of execution or recursion, the numbers will
+not add up and percentages can exceed 100%. If a function directly calls
+itself "(self)" will be appended to the line, but indirect recursion will
+not be marked.
+
+<h3>Bugs</h3>
+The current build of Jprof has only been tested under Ubuntu 8.04 LTS, but
+should work under any fairly modern linux distribution using GCC/GLIBC.
+Please update this document with any known compatibilities/incompatibilities.
+<p>
+If you get an error:<p><code>Inconsistency detected by ld.so: dl-open.c: 260: dl_open_worker: Assertion `_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT' failed!
+</code><p>that means you've hit a timing hole in the version of glibc you're
+running. See <a
+href="http://sources.redhat.com/bugzilla/show_bug.cgi?id=4578">Redhat bug 4578</a>.
+<!-- <h3>Update</h3>
+<ul>
+</ul>
+-->
+
+</body>
+</html>
diff --git a/tools/jprof/bfd.cpp b/tools/jprof/bfd.cpp
new file mode 100644
index 000000000..2e013d0ae
--- /dev/null
+++ b/tools/jprof/bfd.cpp
@@ -0,0 +1,231 @@
+// vim:ts=8:sw=2:et:
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "leaky.h"
+
+#ifdef USE_BFD
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <bfd.h>
+#include <cxxabi.h>
+
+static bfd *try_debug_file(const char *filename, unsigned long crc32)
+{
+ int fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return nullptr;
+
+ unsigned char buf[4*1024];
+ unsigned long crc = 0;
+
+ while (1) {
+ ssize_t count = read(fd, buf, sizeof(buf));
+ if (count <= 0)
+ break;
+
+ crc = bfd_calc_gnu_debuglink_crc32(crc, buf, count);
+ }
+
+ close(fd);
+
+ if (crc != crc32)
+ return nullptr;
+
+ bfd *object = bfd_openr(filename, nullptr);
+ if (!bfd_check_format(object, bfd_object)) {
+ bfd_close(object);
+ return nullptr;
+ }
+
+ return object;
+}
+
+static bfd *find_debug_file(bfd *lib, const char *aFileName)
+{
+ // check for a separate debug file with symbols
+ asection *sect = bfd_get_section_by_name(lib, ".gnu_debuglink");
+
+ if (!sect)
+ return nullptr;
+
+ bfd_size_type debuglinkSize = bfd_section_size (objfile->obfd, sect);
+
+ char *debuglink = new char[debuglinkSize];
+ bfd_get_section_contents(lib, sect, debuglink, 0, debuglinkSize);
+
+ // crc checksum is aligned to 4 bytes, and after the NUL.
+ int crc_offset = (int(strlen(debuglink)) & ~3) + 4;
+ unsigned long crc32 = bfd_get_32(lib, debuglink + crc_offset);
+
+ // directory component
+ char *dirbuf = strdup(aFileName);
+ const char *dir = dirname(dirbuf);
+
+ static const char debug_subdir[] = ".debug";
+ // This is gdb's default global debugging info directory, but gdb can
+ // be instructed to use a different directory.
+ static const char global_debug_dir[] = "/usr/lib/debug";
+
+ char *filename =
+ new char[strlen(global_debug_dir) + strlen(dir) + crc_offset + 3];
+
+ // /path/debuglink
+ sprintf(filename, "%s/%s", dir, debuglink);
+ bfd *debugFile = try_debug_file(filename, crc32);
+ if (!debugFile) {
+
+ // /path/.debug/debuglink
+ sprintf(filename, "%s/%s/%s", dir, debug_subdir, debuglink);
+ debugFile = try_debug_file(filename, crc32);
+ if (!debugFile) {
+
+ // /usr/lib/debug/path/debuglink
+ sprintf(filename, "%s/%s/%s", global_debug_dir, dir, debuglink);
+ debugFile = try_debug_file(filename, crc32);
+ }
+ }
+
+ delete[] filename;
+ free(dirbuf);
+ delete[] debuglink;
+
+ return debugFile;
+}
+
+
+// Use an indirect array to avoid copying tons of objects
+Symbol ** leaky::ExtendSymbols(int num)
+{
+ long n = numExternalSymbols + num;
+
+ externalSymbols = (Symbol**)
+ realloc(externalSymbols,
+ (size_t) (sizeof(externalSymbols[0]) * n));
+ Symbol *new_array = new Symbol[n];
+ for (int i = 0; i < num; i++) {
+ externalSymbols[i + numExternalSymbols] = &new_array[i];
+ }
+ lastSymbol = externalSymbols + n;
+ Symbol **sp = externalSymbols + numExternalSymbols;
+ numExternalSymbols = n;
+ return sp;
+}
+
+#define NEXT_SYMBOL do { sp++; \
+ if (sp >= lastSymbol) { \
+ sp = ExtendSymbols(16384); \
+ } \
+ } while (0)
+
+void leaky::ReadSymbols(const char *aFileName, u_long aBaseAddress)
+{
+ int initialSymbols = usefulSymbols;
+ if (nullptr == externalSymbols) {
+ externalSymbols = (Symbol**) calloc(sizeof(Symbol*),10000);
+ Symbol *new_array = new Symbol[10000];
+ for (int i = 0; i < 10000; i++) {
+ externalSymbols[i] = &new_array[i];
+ }
+ numExternalSymbols = 10000;
+ }
+ Symbol** sp = externalSymbols + usefulSymbols;
+ lastSymbol = externalSymbols + numExternalSymbols;
+
+ // Create a dummy symbol for the library so, if it doesn't have any
+ // symbols, we show it by library.
+ (*sp)->Init(aFileName, aBaseAddress);
+ NEXT_SYMBOL;
+
+ bfd_boolean kDynamic = (bfd_boolean) false;
+
+ static int firstTime = 1;
+ if (firstTime) {
+ firstTime = 0;
+ bfd_init ();
+ }
+
+ bfd* lib = bfd_openr(aFileName, nullptr);
+ if (nullptr == lib) {
+ return;
+ }
+ if (!bfd_check_format(lib, bfd_object)) {
+ bfd_close(lib);
+ return;
+ }
+
+ bfd *symbolFile = find_debug_file(lib, aFileName);
+
+ // read mini symbols
+ PTR minisyms;
+ unsigned int size;
+ long symcount = 0;
+
+ if (symbolFile) {
+ symcount = bfd_read_minisymbols(symbolFile, kDynamic, &minisyms, &size);
+ if (symcount == 0) {
+ bfd_close(symbolFile);
+ } else {
+ bfd_close(lib);
+ }
+ }
+ if (symcount == 0) {
+ symcount = bfd_read_minisymbols(lib, kDynamic, &minisyms, &size);
+ if (symcount == 0) {
+ // symtab is empty; try dynamic symbols
+ kDynamic = (bfd_boolean) true;
+ symcount = bfd_read_minisymbols(lib, kDynamic, &minisyms, &size);
+ }
+ symbolFile = lib;
+ }
+
+ asymbol* store;
+ store = bfd_make_empty_symbol(symbolFile);
+
+ // Scan symbols
+ size_t demangle_buffer_size = 128;
+ char *demangle_buffer = (char*) malloc(demangle_buffer_size);
+ bfd_byte* from = (bfd_byte *) minisyms;
+ bfd_byte* fromend = from + symcount * size;
+ for (; from < fromend; from += size) {
+ asymbol *sym;
+ sym = bfd_minisymbol_to_symbol(symbolFile, kDynamic, (const PTR) from, store);
+
+ symbol_info syminfo;
+ bfd_get_symbol_info (symbolFile, sym, &syminfo);
+
+// if ((syminfo.type == 'T') || (syminfo.type == 't')) {
+ const char* nm = bfd_asymbol_name(sym);
+ if (nm && nm[0]) {
+ char* dnm = nullptr;
+ if (strncmp("__thunk", nm, 7)) {
+ dnm =
+ abi::__cxa_demangle(nm, demangle_buffer, &demangle_buffer_size, 0);
+ if (dnm) {
+ demangle_buffer = dnm;
+ }
+ }
+ (*sp)->Init(dnm ? dnm : nm, syminfo.value + aBaseAddress);
+ NEXT_SYMBOL;
+ }
+// }
+ }
+
+ free(demangle_buffer);
+ demangle_buffer = nullptr;
+
+ bfd_close(symbolFile);
+
+ int interesting = sp - externalSymbols;
+ if (!quiet) {
+ printf("%s provided %d symbols\n", aFileName,
+ interesting - initialSymbols);
+ }
+ usefulSymbols = interesting;
+}
+
+#endif /* USE_BFD */
diff --git a/tools/jprof/coff.cpp b/tools/jprof/coff.cpp
new file mode 100644
index 000000000..78aa26733
--- /dev/null
+++ b/tools/jprof/coff.cpp
@@ -0,0 +1,99 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "leaky.h"
+
+#ifdef USE_COFF
+
+#define LANGUAGE_C
+#include <sym.h>
+#include <cmplrs/stsupport.h>
+#include <symconst.h>
+#include <filehdr.h>
+#include <ldfcn.h>
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef IRIX4
+extern "C" {
+ extern char *demangle(char const* in);
+};
+#else
+#include <dem.h>
+#endif
+
+static char *Demangle(char *rawName)
+{
+#ifdef IRIX4
+ return strdup(demangle(rawName));
+#else
+ char namebuf[4000];
+ demangle(rawName, namebuf);
+ return strdup(namebuf);
+#endif
+}
+
+void leaky::readSymbols(const char *fileName)
+{
+ LDFILE *ldptr;
+
+ ldptr = ldopen(fileName, nullptr);
+ if (!ldptr) {
+ fprintf(stderr, "%s: unable to open \"%s\"\n", applicationName,
+ fileName);
+ exit(-1);
+ }
+ if (PSYMTAB(ldptr) == 0) {
+ fprintf(stderr, "%s: \"%s\": has no symbol table\n", applicationName,
+ fileName);
+ exit(-1);
+ }
+
+ long isymMax = SYMHEADER(ldptr).isymMax;
+ long iextMax = SYMHEADER(ldptr).iextMax;
+ long iMax = isymMax + iextMax;
+
+ long alloced = 10000;
+ Symbol* syms = (Symbol*) malloc(sizeof(Symbol) * 10000);
+ Symbol* sp = syms;
+ Symbol* last = syms + alloced;
+ SYMR symr;
+
+ for (long isym = 0; isym < iMax; isym++) {
+ if (ldtbread(ldptr, isym, &symr) != SUCCESS) {
+ fprintf(stderr, "%s: can't read symbol #%d\n", applicationName,
+ isym);
+ exit(-1);
+ }
+ if (isym < isymMax) {
+ if ((symr.st == stStaticProc)
+ || ((symr.st == stProc) &&
+ ((symr.sc == scText) || (symr.sc == scAbs)))
+ || ((symr.st == stBlock) &&
+ (symr.sc == scText))) {
+ // Text symbol. Set name field to point to the symbol name
+ sp->name = Demangle(ldgetname(ldptr, &symr));
+ sp->address = symr.value;
+ sp++;
+ if (sp >= last) {
+ long n = alloced + 10000;
+ syms = (Symbol*)
+ realloc(syms, (size_t) (sizeof(Symbol) * n));
+ last = syms + n;
+ sp = syms + alloced;
+ alloced = n;
+ }
+ }
+ }
+ }
+
+ int interesting = sp - syms;
+ if (!quiet) {
+ printf("Total of %d symbols\n", interesting);
+ }
+ usefulSymbols = interesting;
+ externalSymbols = syms;
+}
+
+#endif /* USE_COFF */
diff --git a/tools/jprof/elf.cpp b/tools/jprof/elf.cpp
new file mode 100644
index 000000000..1de1d2dcc
--- /dev/null
+++ b/tools/jprof/elf.cpp
@@ -0,0 +1,133 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "leaky.h"
+
+#ifdef USE_ELF
+
+#include "leaky.h"
+#include <stdio.h>
+#include <malloc.h>
+#include <libelf/libelf.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+
+void leaky::readSymbols(const char *fileName)
+{
+ int fd = ::open(fileName, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s: unable to open \"%s\"\n", applicationName,
+ fileName);
+ exit(-1);
+ }
+
+ elf_version(EV_CURRENT);
+ Elf *elf = elf_begin(fd, ELF_C_READ, 0);
+ if (!elf) {
+ fprintf(stderr, "%s: \"%s\": has no symbol table\n", applicationName,
+ fileName);
+ exit(-1);
+ }
+
+ long alloced = 10000;
+ Symbol* syms = (Symbol*) malloc(sizeof(Symbol) * 10000);
+ Symbol* sp = syms;
+ Symbol* last = syms + alloced;
+
+ // Get each of the relevant sections and add them to the list of
+ // symbols.
+ Elf32_Ehdr *ehdr = elf32_getehdr(elf);
+ if (!ehdr) {
+ fprintf(stderr, "%s: elf library lossage\n", applicationName);
+ exit(-1);
+ }
+#if 0
+ Elf32_Half ndx = ehdr->e_shstrndx;
+#endif
+
+ Elf_Scn *scn = 0;
+ int strtabndx = -1;
+ for (int i = 1; (scn = elf_nextscn(elf, scn)) != 0; i++) {
+ Elf32_Shdr *shdr = elf32_getshdr(scn);
+#if 0
+ char *name = elf_strptr(elf, ndx, (size_t) shdr->sh_name);
+ printf("Section %s (%d 0x%x)\n", name ? name : "(null)",
+ shdr->sh_type, shdr->sh_type);
+#endif
+ if (shdr->sh_type == SHT_STRTAB) {
+ /* We assume here that string tables preceed symbol tables... */
+ strtabndx = i;
+ continue;
+ }
+#if 0
+ if (shdr->sh_type == SHT_DYNAMIC) {
+ /* Dynamic */
+ Elf_Data *data = elf_getdata(scn, 0);
+ if (!data || !data->d_size) {
+ printf("No data...");
+ continue;
+ }
+
+ Elf32_Dyn *dyn = (Elf32_Dyn*) data->d_buf;
+ Elf32_Dyn *lastdyn =
+ (Elf32_Dyn*) ((char*) data->d_buf + data->d_size);
+ for (; dyn < lastdyn; dyn++) {
+ printf("tag=%d value=0x%x\n", dyn->d_tag, dyn->d_un.d_val);
+ }
+ } else
+#endif
+ if ((shdr->sh_type == SHT_SYMTAB) ||
+ (shdr->sh_type == SHT_DYNSYM)) {
+ /* Symbol table */
+ Elf_Data *data = elf_getdata(scn, 0);
+ if (!data || !data->d_size) {
+ printf("No data...");
+ continue;
+ }
+
+ /* In theory we now have the symbols... */
+ Elf32_Sym *esym = (Elf32_Sym*) data->d_buf;
+ Elf32_Sym *lastsym =
+ (Elf32_Sym*) ((char*) data->d_buf + data->d_size);
+ for (; esym < lastsym; esym++) {
+#if 0
+ char *nm = elf_strptr(elf, strtabndx, (size_t)esym->st_name);
+ printf("%20s 0x%08x %02x %02x\n",
+ nm, esym->st_value, ELF32_ST_BIND(esym->st_info),
+ ELF32_ST_TYPE(esym->st_info));
+#endif
+ if ((esym->st_value == 0) ||
+ (ELF32_ST_BIND(esym->st_info) == STB_WEAK) ||
+ (ELF32_ST_BIND(esym->st_info) == STB_NUM) ||
+ (ELF32_ST_TYPE(esym->st_info) != STT_FUNC)) {
+ continue;
+ }
+#if 1
+ char *nm = elf_strptr(elf, strtabndx, (size_t)esym->st_name);
+#endif
+ sp->name = nm ? strdup(nm) : "(no name)";
+ sp->address = esym->st_value;
+ sp++;
+ if (sp >= last) {
+ long n = alloced + 10000;
+ syms = (Symbol*)
+ realloc(syms, (size_t) (sizeof(Symbol) * n));
+ last = syms + n;
+ sp = syms + alloced;
+ alloced = n;
+ }
+ }
+ }
+ }
+
+ int interesting = sp - syms;
+ if (!quiet) {
+ printf("Total of %d symbols\n", interesting);
+ }
+ usefulSymbols = interesting;
+ externalSymbols = syms;
+}
+
+#endif /* USE_ELF */
diff --git a/tools/jprof/intcnt.cpp b/tools/jprof/intcnt.cpp
new file mode 100644
index 000000000..d0f7e1f33
--- /dev/null
+++ b/tools/jprof/intcnt.cpp
@@ -0,0 +1,71 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "intcnt.h"
+
+IntCount::IntCount() : numInts(0), iPair(nullptr) { }
+IntCount::~IntCount() { delete [] iPair;}
+int IntCount::getSize() {return numInts;}
+int IntCount::getCount(int pos) {return iPair[pos].cnt;}
+int IntCount::getIndex(int pos) {return iPair[pos].idx;}
+
+void IntCount::clear()
+{
+ delete[] iPair;
+ iPair = new IntPair[0];
+ numInts = 0;
+}
+
+int IntCount::countAdd(int index, int increment)
+{
+ if(numInts) {
+ // Do a binary search to find the element
+ int divPoint = 0;
+
+ if(index>iPair[numInts-1].idx) {
+ divPoint = numInts;
+ } else if(index<iPair[0].idx) {
+ divPoint = 0;
+ } else {
+ int low=0, high=numInts-1;
+ int mid = (low+high)/2;
+ while(1) {
+ mid = (low+high)/2;
+
+ if(index<iPair[mid].idx) {
+ high = mid;
+ } else if(index>iPair[mid].idx) {
+ if(mid<numInts-1 && index<iPair[mid+1].idx) {
+ divPoint = mid+1;
+ break;
+ } else {
+ low = mid+1;
+ }
+ } else if(index==iPair[mid].idx) {
+ return iPair[mid].cnt += increment;
+ }
+ }
+ }
+
+ int i;
+ IntPair *tpair = new IntPair[numInts+1];
+ for(i=0; i<divPoint; i++) {
+ tpair[i] = iPair[i];
+ }
+ for(i=divPoint; i<numInts; i++) {
+ tpair[i+1] = iPair[i];
+ }
+ ++numInts;
+ delete [] iPair;
+ iPair = tpair;
+ iPair[divPoint].idx = index;
+ iPair[divPoint].cnt = increment;
+ return increment;
+ } else {
+ iPair = new IntPair[1];
+ numInts = 1;
+ iPair[0].idx = index;
+ return iPair[0].cnt = increment;
+ }
+}
diff --git a/tools/jprof/intcnt.h b/tools/jprof/intcnt.h
new file mode 100644
index 000000000..3c009eac2
--- /dev/null
+++ b/tools/jprof/intcnt.h
@@ -0,0 +1,38 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef INTCNT_H
+#define INTCNT_H
+
+class IntCount
+{
+public:
+ IntCount();
+ ~IntCount();
+ void clear();
+ int countAdd(int index, int increment=1);
+ int countGet(int index);
+ int getSize();
+ int getCount(int pos);
+ int getIndex(int pos);
+
+ IntCount(const IntCount&old)
+ {
+ numInts = old.numInts;
+ if (numInts > 0) {
+ iPair = new IntPair[numInts];
+ for (int i = 0; i < numInts; i++) {
+ iPair[i] = old.iPair[i];
+ }
+ } else {
+ iPair = nullptr;
+ }
+ }
+private:
+
+ int numInts;
+ struct IntPair{int idx; int cnt;} *iPair;
+};
+
+#endif
diff --git a/tools/jprof/jprofsig b/tools/jprof/jprofsig
new file mode 100755
index 000000000..02226fc4b
--- /dev/null
+++ b/tools/jprof/jprofsig
@@ -0,0 +1,46 @@
+#!/bin/sh
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#
+# Find Mozilla PID and send it a signal, to be used
+# with the jprof tool.
+#
+
+jpsignal_usage() {
+ echo "Usage: jprofsig [start|stop]"
+ exit 1
+}
+
+if [ $# != 1 ]; then
+ echo "Wrong number of arguments."
+ jpsignal_usage
+fi
+
+jpsignal_arg="$1"
+
+# Find & print mozilla PID
+tmpmoz=`ps aux | grep mozilla-bin | head -1 | awk '{ print $2 }'`
+echo "Mozilla PID = $tmpmoz"
+
+# See how we were called.
+case "$jpsignal_arg" in
+ start)
+ if [ "$JP_REALTIME" = 1 ]; then
+ kill -ALRM $tmpmoz
+ else
+ # Normal, non-realtime mode.
+ kill -PROF $tmpmoz
+ fi
+ ;;
+ stop)
+ kill -USR1 $tmpmoz
+ ;;
+ *)
+ jpsignal_usage
+ exit 1
+esac
+
+exit 0
diff --git a/tools/jprof/leaky.cpp b/tools/jprof/leaky.cpp
new file mode 100644
index 000000000..d8e5322f5
--- /dev/null
+++ b/tools/jprof/leaky.cpp
@@ -0,0 +1,863 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "leaky.h"
+#include "intcnt.h"
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#ifndef NTO
+#include <getopt.h>
+#endif
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef NTO
+#include <mem.h>
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+static const u_int DefaultBuckets = 10007; // arbitrary, but prime
+static const u_int MaxBuckets = 1000003; // arbitrary, but prime
+
+//----------------------------------------------------------------------
+
+int main(int argc, char** argv)
+{
+ leaky* l = new leaky;
+
+ l->initialize(argc, argv);
+ l->outputfd = stdout;
+
+ for (int i = 0; i < l->numLogFiles; i++) {
+ if (l->output_dir || l->numLogFiles > 1) {
+ char name[2048]; // XXX fix
+ if (l->output_dir)
+ snprintf(name,sizeof(name),"%s/%s.html",l->output_dir,argv[l->logFileIndex + i]);
+ else
+ snprintf(name,sizeof(name),"%s.html",argv[l->logFileIndex + i]);
+
+ fprintf(stderr,"opening %s\n",name);
+ l->outputfd = fopen(name,"w");
+ // if an error we won't process the file
+ }
+ if (l->outputfd) { // paranoia
+ l->open(argv[l->logFileIndex + i]);
+
+ if (l->outputfd != stderr) {
+ fclose(l->outputfd);
+ l->outputfd = nullptr;
+ }
+ }
+ }
+
+ return 0;
+}
+
+char *
+htmlify(const char *in)
+{
+ const char *p = in;
+ char *out, *q;
+ int n = 0;
+ size_t newlen;
+
+ // Count the number of '<' and '>' in the input.
+ while ((p = strpbrk(p, "<>")))
+ {
+ ++n;
+ ++p;
+ }
+
+ // Knowing the number of '<' and '>', we can calculate the space
+ // needed for the output string.
+ newlen = strlen(in) + n * 3 + 1;
+ out = new char[newlen];
+
+ // Copy the input to the output, with substitutions.
+ p = in;
+ q = out;
+ do
+ {
+ if (*p == '<')
+ {
+ strcpy(q, "&lt;");
+ q += 4;
+ }
+ else if (*p == '>')
+ {
+ strcpy(q, "&gt;");
+ q += 4;
+ }
+ else
+ {
+ *q++ = *p;
+ }
+ p++;
+ } while (*p);
+ *q = '\0';
+
+ return out;
+}
+
+leaky::leaky()
+{
+ applicationName = nullptr;
+ progFile = nullptr;
+
+ quiet = true;
+ showAddress = false;
+ showThreads = false;
+ stackDepth = 100000;
+ onlyThread = 0;
+ cleo = false;
+
+ mappedLogFile = -1;
+ firstLogEntry = lastLogEntry = 0;
+
+ sfd = -1;
+ externalSymbols = 0;
+ usefulSymbols = 0;
+ numExternalSymbols = 0;
+ lowestSymbolAddr = 0;
+ highestSymbolAddr = 0;
+
+ loadMap = nullptr;
+
+ collect_last = false;
+ collect_start = -1;
+ collect_end = -1;
+}
+
+leaky::~leaky()
+{
+}
+
+void leaky::usageError()
+{
+ fprintf(stderr, "Usage: %s [-v] [-t] [-e exclude] [-i include] [-s stackdepth] [--last] [--all] [--start n [--end m]] [--cleo] [--output-dir dir] prog log [log2 ...]\n", (char*) applicationName);
+ fprintf(stderr,
+ "\t-v: verbose\n"
+ "\t-t | --threads: split threads\n"
+ "\t--only-thread n: only profile thread N\n"
+ "\t-i include-id: stack must include specified id\n"
+ "\t-e exclude-id: stack must NOT include specified id\n"
+ "\t-s stackdepth: Limit depth looked at from captured stack frames\n"
+ "\t--last: only profile the last capture section\n"
+ "\t--start n [--end m]: profile n to m (or end) capture sections\n"
+ "\t--cleo: format output for 'cleopatra' display\n"
+ "\t--output-dir dir: write output files to dir\n"
+ "\tIf there's one log, output goes to stdout unless --output-dir is set\n"
+ "\tIf there are more than one log, output files will be named with .html added\n"
+ );
+ exit(-1);
+}
+
+static struct option longopts[] = {
+ { "threads", 0, nullptr, 't' },
+ { "only-thread", 1, nullptr, 'T' },
+ { "last", 0, nullptr, 'l' },
+ { "start", 1, nullptr, 'x' },
+ { "end", 1, nullptr, 'n' },
+ { "cleo",0, nullptr, 'c' },
+ { "output-dir", 1, nullptr, 'd' },
+ { nullptr, 0, nullptr, 0 },
+};
+
+void leaky::initialize(int argc, char** argv)
+{
+ applicationName = argv[0];
+ applicationName = strrchr(applicationName, '/');
+ if (!applicationName) {
+ applicationName = argv[0];
+ } else {
+ applicationName++;
+ }
+
+ int arg;
+ int errflg = 0;
+ int longindex = 0;
+
+ onlyThread = 0;
+ output_dir = nullptr;
+ cleo = false;
+
+ // XXX tons of cruft here left over from tracemalloc
+ // XXX The -- options shouldn't need short versions, or they should be documented
+ while (((arg = getopt_long(argc, argv, "adEe:gh:i:r:Rs:tT:qvx:ln:",longopts,&longindex)) != -1)) {
+ switch (arg) {
+ case '?':
+ default:
+ fprintf(stderr,"error: unknown option %c\n",optopt);
+ errflg++;
+ break;
+ case 'a':
+ break;
+ case 'A': // not implemented
+ showAddress = true;
+ break;
+ case 'c':
+ cleo = true;
+ break;
+ case 'd':
+ output_dir = optarg; // reference to an argv pointer
+ break;
+ case 'R':
+ break;
+ case 'e':
+ exclusions.add(optarg);
+ break;
+ case 'g':
+ break;
+ case 'r': // not implemented
+ roots.add(optarg);
+ if (!includes.IsEmpty()) {
+ errflg++;
+ }
+ break;
+ case 'i':
+ includes.add(optarg);
+ if (!roots.IsEmpty()) {
+ errflg++;
+ }
+ break;
+ case 'h':
+ break;
+ case 's':
+ stackDepth = atoi(optarg);
+ if (stackDepth < 2) {
+ stackDepth = 2;
+ }
+ break;
+ case 'x':
+ // --start
+ collect_start = atoi(optarg);
+ break;
+ case 'n':
+ // --end
+ collect_end = atoi(optarg);
+ break;
+ case 'l':
+ // --last
+ collect_last = true;
+ break;
+ case 'q':
+ break;
+ case 'v':
+ quiet = !quiet;
+ break;
+ case 't':
+ showThreads = true;
+ break;
+ case 'T':
+ showThreads = true;
+ onlyThread = atoi(optarg);
+ break;
+ }
+ }
+ if (errflg || ((argc - optind) < 2)) {
+ usageError();
+ }
+ progFile = argv[optind++];
+ logFileIndex = optind;
+ numLogFiles = argc - optind;
+ if (!quiet)
+ fprintf(stderr,"numlogfiles = %d\n",numLogFiles);
+}
+
+static void* mapFile(int fd, u_int flags, off_t* sz)
+{
+ struct stat sb;
+ if (fstat(fd, &sb) < 0) {
+ perror("fstat");
+ exit(-1);
+ }
+ void* base = mmap(0, (int)sb.st_size, flags, MAP_PRIVATE, fd, 0);
+ if (!base) {
+ perror("mmap");
+ exit(-1);
+ }
+ *sz = sb.st_size;
+ return base;
+}
+
+void leaky::LoadMap()
+{
+ malloc_map_entry mme;
+ char name[1000];
+
+ if (!loadMap) {
+ // all files use the same map
+ int fd = ::open(M_MAPFILE, O_RDONLY);
+ if (fd < 0) {
+ perror("open: " M_MAPFILE);
+ exit(-1);
+ }
+ for (;;) {
+ int nb = read(fd, &mme, sizeof(mme));
+ if (nb != sizeof(mme)) break;
+ nb = read(fd, name, mme.nameLen);
+ if (nb != (int)mme.nameLen) break;
+ name[mme.nameLen] = 0;
+ if (!quiet) {
+ fprintf(stderr,"%s @ %lx\n", name, mme.address);
+ }
+
+ LoadMapEntry* lme = new LoadMapEntry;
+ lme->address = mme.address;
+ lme->name = strdup(name);
+ lme->next = loadMap;
+ loadMap = lme;
+ }
+ close(fd);
+ }
+}
+
+void leaky::open(char *logFile)
+{
+ int threadArray[100]; // should auto-expand
+ int last_thread = -1;
+ int numThreads = 0;
+ int section = -1;
+ bool collecting = false;
+
+ LoadMap();
+
+ setupSymbols(progFile);
+
+ // open up the log file
+ if (mappedLogFile)
+ ::close(mappedLogFile);
+
+ mappedLogFile = ::open(logFile, O_RDONLY);
+ if (mappedLogFile < 0) {
+ perror("open");
+ exit(-1);
+ }
+ off_t size;
+ firstLogEntry = (malloc_log_entry*) mapFile(mappedLogFile, PROT_READ, &size);
+ lastLogEntry = (malloc_log_entry*)((char*)firstLogEntry + size);
+
+ if (!collect_last || collect_start < 0) {
+ collecting = true;
+ }
+
+ // First, restrict it to the capture sections specified (all, last, start/end)
+ // This loop walks through all the call stacks we recorded
+ for (malloc_log_entry* lep=firstLogEntry;
+ lep < lastLogEntry;
+ lep = reinterpret_cast<malloc_log_entry*>(&lep->pcs[lep->numpcs])) {
+
+ if (lep->flags & JP_FIRST_AFTER_PAUSE) {
+ section++;
+ if (collect_last) {
+ firstLogEntry = lep;
+ numThreads = 0;
+ collecting = true;
+ }
+ if (collect_start == section) {
+ collecting = true;
+ firstLogEntry = lep;
+ }
+ if (collect_end == section) {
+ collecting = false;
+ lastLogEntry = lep;
+ }
+ if (!quiet)
+ fprintf(stderr,"New section %d: first=%p, last=%p, collecting=%d\n",
+ section,(void*)firstLogEntry,(void*)lastLogEntry,collecting);
+ }
+
+ // Capture thread info at the same time
+
+ // Find all the threads captured
+
+ // pthread/linux docs say the signal can be delivered to any thread in
+ // the process. In practice, it appears in Linux that it's always
+ // delivered to the thread that called setitimer(), and each thread can
+ // have a separate itimer. There's a support library for gprof that
+ // overlays pthread_create() to set timers in any threads you spawn.
+ if (showThreads && collecting) {
+ if (lep->thread != last_thread)
+ {
+ int i;
+ for (i=0; i<numThreads; i++)
+ {
+ if (lep->thread == threadArray[i])
+ break;
+ }
+ if (i == numThreads &&
+ i < (int) (sizeof(threadArray)/sizeof(threadArray[0])))
+ {
+ threadArray[i] = lep->thread;
+ numThreads++;
+ if (!quiet)
+ fprintf(stderr,"new thread %d\n",lep->thread);
+ }
+ }
+ }
+ }
+ if (!quiet)
+ fprintf(stderr,"Done collecting: sections %d: first=%p, last=%p, numThreads=%d\n",
+ section,(void*)firstLogEntry,(void*)lastLogEntry,numThreads);
+
+ if (!cleo) {
+ fprintf(outputfd,"<html><head><title>Jprof Profile Report</title></head><body>\n");
+ fprintf(outputfd,"<h1><center>Jprof Profile Report</center></h1>\n");
+ }
+
+ if (showThreads)
+ {
+ fprintf(stderr,"Num threads %d\n",numThreads);
+
+ if (!cleo) {
+ fprintf(outputfd,"<hr>Threads:<p><pre>\n");
+ for (int i=0; i<numThreads; i++)
+ {
+ fprintf(outputfd," <a href=\"#thread_%d\">%d</a> ",
+ threadArray[i],threadArray[i]);
+ if ((i+1)%10 == 0)
+ fprintf(outputfd,"<br>\n");
+ }
+ fprintf(outputfd,"</pre>");
+ }
+
+ for (int i=0; i<numThreads; i++)
+ {
+ if (!onlyThread || onlyThread == threadArray[i])
+ analyze(threadArray[i]);
+ }
+ }
+ else
+ {
+ analyze(0);
+ }
+
+ if (!cleo)
+ fprintf(outputfd,"</pre></body></html>\n");
+}
+
+//----------------------------------------------------------------------
+
+
+static int symbolOrder(void const* a, void const* b)
+{
+ Symbol const** ap = (Symbol const **)a;
+ Symbol const** bp = (Symbol const **)b;
+ return (*ap)->address == (*bp)->address ? 0 :
+ ((*ap)->address > (*bp)->address ? 1 : -1);
+}
+
+void leaky::ReadSharedLibrarySymbols()
+{
+ LoadMapEntry* lme = loadMap;
+ while (nullptr != lme) {
+ ReadSymbols(lme->name, lme->address);
+ lme = lme->next;
+ }
+}
+
+void leaky::setupSymbols(const char *fileName)
+{
+ if (usefulSymbols == 0) {
+ // only read once!
+
+ // Read in symbols from the program
+ ReadSymbols(fileName, 0);
+
+ // Read in symbols from the .so's
+ ReadSharedLibrarySymbols();
+
+ if (!quiet) {
+ fprintf(stderr,"A total of %d symbols were loaded\n", usefulSymbols);
+ }
+
+ // Now sort them
+ qsort(externalSymbols, usefulSymbols, sizeof(Symbol *), symbolOrder);
+ lowestSymbolAddr = externalSymbols[0]->address;
+ highestSymbolAddr = externalSymbols[usefulSymbols-1]->address;
+ }
+}
+
+// Binary search the table, looking for a symbol that covers this
+// address.
+int leaky::findSymbolIndex(u_long addr)
+{
+ u_int base = 0;
+ u_int limit = usefulSymbols - 1;
+ Symbol** end = &externalSymbols[limit];
+ while (base <= limit) {
+ u_int midPoint = (base + limit)>>1;
+ Symbol** sp = &externalSymbols[midPoint];
+ if (addr < (*sp)->address) {
+ if (midPoint == 0) {
+ return -1;
+ }
+ limit = midPoint - 1;
+ } else {
+ if (sp+1 < end) {
+ if (addr < (*(sp+1))->address) {
+ return midPoint;
+ }
+ } else {
+ return midPoint;
+ }
+ base = midPoint + 1;
+ }
+ }
+ return -1;
+}
+
+Symbol* leaky::findSymbol(u_long addr)
+{
+ int idx = findSymbolIndex(addr);
+
+ if(idx<0) {
+ return nullptr;
+ } else {
+ return externalSymbols[idx];
+ }
+}
+
+//----------------------------------------------------------------------
+
+bool leaky::excluded(malloc_log_entry* lep)
+{
+ if (exclusions.IsEmpty()) {
+ return false;
+ }
+
+ char** pcp = &lep->pcs[0];
+ u_int n = lep->numpcs;
+ for (u_int i = 0; i < n; i++, pcp++) {
+ Symbol* sp = findSymbol((u_long) *pcp);
+ if (sp && exclusions.contains(sp->name)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool leaky::included(malloc_log_entry* lep)
+{
+ if (includes.IsEmpty()) {
+ return true;
+ }
+
+ char** pcp = &lep->pcs[0];
+ u_int n = lep->numpcs;
+ for (u_int i = 0; i < n; i++, pcp++) {
+ Symbol* sp = findSymbol((u_long) *pcp);
+ if (sp && includes.contains(sp->name)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+//----------------------------------------------------------------------
+
+void leaky::displayStackTrace(FILE* out, malloc_log_entry* lep)
+{
+ char** pcp = &lep->pcs[0];
+ u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth;
+ for (u_int i = 0; i < n; i++, pcp++) {
+ u_long addr = (u_long) *pcp;
+ Symbol* sp = findSymbol(addr);
+ if (sp) {
+ fputs(sp->name, out);
+ if (showAddress) {
+ fprintf(out, "[%p]", (char*)addr);
+ }
+ }
+ else {
+ fprintf(out, "<%p>", (char*)addr);
+ }
+ fputc(' ', out);
+ }
+ fputc('\n', out);
+}
+
+void leaky::dumpEntryToLog(malloc_log_entry* lep)
+{
+ printf("%ld\t", lep->delTime);
+ printf(" --> ");
+ displayStackTrace(outputfd, lep);
+}
+
+void leaky::generateReportHTML(FILE *fp, int *countArray, int count, int thread)
+{
+ fprintf(fp,"<center>");
+ if (showThreads)
+ {
+ fprintf(fp,"<hr><A NAME=thread_%d><b>Thread: %d</b></A><p>",
+ thread,thread);
+ }
+ fprintf(fp,"<A href=#flat_%d>flat</A><b> | </b><A href=#hier_%d>hierarchical</A>",
+ thread,thread);
+ fprintf(fp,"</center><P><P><P>\n");
+
+ int totalTimerHits = count;
+ int *rankingTable = new int[usefulSymbols];
+
+ for(int cnt=usefulSymbols; --cnt>=0; rankingTable[cnt]=cnt);
+
+ // Drat. I would use ::qsort() but I would need a global variable and my
+ // intro-pascal professor threatened to flunk anyone who used globals.
+ // She damaged me for life :-) (That was 1986. See how much influence
+ // she had. I don't remember her name but I always feel guilty about globals)
+
+ // Shell Sort. 581130733 is the max 31 bit value of h = 3h+1
+ int mx, i, h;
+ for(mx=usefulSymbols/9, h=581130733; h>0; h/=3) {
+ if(h<mx) {
+ for(i = h-1; i<usefulSymbols; i++) {
+ int j, tmp=rankingTable[i], val = countArray[tmp];
+ for(j = i; (j>=h) && (countArray[rankingTable[j-h]]<val); j-=h) {
+ rankingTable[j] = rankingTable[j-h];
+ }
+ rankingTable[j] = tmp;
+ }
+ }
+ }
+
+ // Ok, We are sorted now. Let's go through the table until we get to
+ // functions that were never called. Right now we don't do much inside
+ // this loop. Later we can get callers and callees into it like gprof
+ // does
+ fprintf(fp,
+ "<h2><A NAME=hier_%d></A><center><a href=\"http://dxr.mozilla.org/mozilla-central/source/tools/jprof/README.html#hier\">Hierarchical Profile</a></center></h2><hr>\n",
+ thread);
+ fprintf(fp, "<pre>\n");
+ fprintf(fp, "%6s %6s %4s %s\n",
+ "index", "Count", "Hits", "Function Name");
+
+ for(i=0; i<usefulSymbols && countArray[rankingTable[i]]>0; i++) {
+ Symbol **sp=&externalSymbols[rankingTable[i]];
+
+ (*sp)->cntP.printReport(fp, this, rankingTable[i], totalTimerHits);
+
+ char *symname = htmlify((*sp)->name);
+ fprintf(fp, "%6d %6d (%3.1f%%)%s <a name=%d>%8d (%3.1f%%)</a>%s <b>%s</b>\n",
+ rankingTable[i],
+ (*sp)->timerHit, ((*sp)->timerHit*1000/totalTimerHits)/10.0,
+ ((*sp)->timerHit*1000/totalTimerHits)/10.0 >= 10.0 ? "" : " ",
+ rankingTable[i], countArray[rankingTable[i]],
+ (countArray[rankingTable[i]]*1000/totalTimerHits)/10.0,
+ (countArray[rankingTable[i]]*1000/totalTimerHits)/10.0 >= 10.0 ? "" : " ",
+ symname);
+ delete [] symname;
+
+ (*sp)->cntC.printReport(fp, this, rankingTable[i], totalTimerHits);
+
+ fprintf(fp, "<hr>\n");
+ }
+ fprintf(fp,"</pre>\n");
+
+ // OK, Now we want to print the flat profile. To do this we resort on
+ // the hit count.
+
+ // Cut-N-Paste Shell sort from above. The Ranking Table has already been
+ // populated, so we do not have to reinitialize it.
+ for(mx=usefulSymbols/9, h=581130733; h>0; h/=3) {
+ if(h<mx) {
+ for(i = h-1; i<usefulSymbols; i++) {
+ int j, tmp=rankingTable[i], val = externalSymbols[tmp]->timerHit;
+ for(j = i;
+ (j>=h) && (externalSymbols[rankingTable[j-h]]->timerHit<val); j-=h) {
+ rankingTable[j] = rankingTable[j-h];
+ }
+ rankingTable[j] = tmp;
+ }
+ }
+ }
+
+ // Pre-count up total counter hits, to get a percentage.
+ // I wanted the total before walking the list, if this
+ // double-pass over externalSymbols gets slow we can
+ // do single-pass and print this out after the loop finishes.
+ totalTimerHits = 0;
+ for(i=0;
+ i<usefulSymbols && externalSymbols[rankingTable[i]]->timerHit>0; i++) {
+ Symbol **sp=&externalSymbols[rankingTable[i]];
+ totalTimerHits += (*sp)->timerHit;
+ }
+ if (totalTimerHits == 0)
+ totalTimerHits = 1;
+
+ if (totalTimerHits != count)
+ fprintf(stderr,"Hit count mismatch: count=%d; totalTimerHits=%d",
+ count,totalTimerHits);
+
+ fprintf(fp,"<h2><A NAME=flat_%d></A><center><a href=\"http://dxr.mozilla.org/mozilla-central/source/tools/jprof/README.html#flat\">Flat Profile</a></center></h2><br>\n",
+ thread);
+ fprintf(fp, "<pre>\n");
+
+ fprintf(fp, "Total hit count: %d\n", totalTimerHits);
+ fprintf(fp, "Count %%Total Function Name\n");
+ // Now loop for as long as we have timer hits
+ for(i=0;
+ i<usefulSymbols && externalSymbols[rankingTable[i]]->timerHit>0; i++) {
+
+ Symbol **sp=&externalSymbols[rankingTable[i]];
+
+ char *symname = htmlify((*sp)->name);
+ fprintf(fp, "<a href=\"#%d\">%3d %-2.1f %s</a>\n",
+ rankingTable[i], (*sp)->timerHit,
+ ((float)(*sp)->timerHit/(float)totalTimerHits)*100.0, symname);
+ delete [] symname;
+ }
+}
+
+void leaky::analyze(int thread)
+{
+ int *countArray = new int[usefulSymbols];
+ int *flagArray = new int[usefulSymbols];
+
+ //Zero our function call counter
+ memset(countArray, 0, sizeof(countArray[0])*usefulSymbols);
+
+ // reset hit counts
+ for(int i=0; i<usefulSymbols; i++) {
+ externalSymbols[i]->timerHit = 0;
+ externalSymbols[i]->regClear();
+ }
+
+ // The flag array is used to prevent counting symbols multiple times
+ // if functions are called recursively. In order to keep from having
+ // to zero it on each pass through the loop, we mark it with the value
+ // of stacks on each trip through the loop. This means we can determine
+ // if we have seen this symbol for this stack trace w/o having to reset
+ // from the prior stacktrace.
+ memset(flagArray, -1, sizeof(flagArray[0])*usefulSymbols);
+
+ if (cleo)
+ fprintf(outputfd,"m-Start\n");
+
+ // This loop walks through all the call stacks we recorded
+ // --last, --start and --end can restrict it, as can excludes/includes
+ stacks = 0;
+ for(malloc_log_entry* lep=firstLogEntry;
+ lep < lastLogEntry;
+ lep = reinterpret_cast<malloc_log_entry*>(&lep->pcs[lep->numpcs])) {
+
+ if ((thread != 0 && lep->thread != thread) ||
+ excluded(lep) || !included(lep))
+ {
+ continue;
+ }
+
+ ++stacks; // How many stack frames did we collect
+
+ u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth;
+ char** pcp = &lep->pcs[n-1];
+ int idx=-1, parrentIdx=-1; // Init idx incase n==0
+ if (cleo) {
+ // This loop walks through every symbol in the call stack. By walking it
+ // backwards we know who called the function when we get there.
+ char type = 's';
+ for (int i=n-1; i>=0; --i, --pcp) {
+ idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp));
+
+ if(idx>=0) {
+ // Skip over bogus __restore_rt frames that realtime profiling
+ // can introduce.
+ if (i > 0 && !strcmp(externalSymbols[idx]->name, "__restore_rt")) {
+ --pcp;
+ --i;
+ idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp));
+ if (idx < 0) {
+ continue;
+ }
+ }
+ Symbol **sp=&externalSymbols[idx];
+ char *symname = htmlify((*sp)->name);
+ fprintf(outputfd,"%c-%s\n",type,symname);
+ delete [] symname;
+ }
+ // else can't find symbol - ignore
+ type = 'c';
+ }
+ } else {
+ // This loop walks through every symbol in the call stack. By walking it
+ // backwards we know who called the function when we get there.
+ for (int i=n-1; i>=0; --i, --pcp) {
+ idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp));
+
+ if(idx>=0) {
+ // Skip over bogus __restore_rt frames that realtime profiling
+ // can introduce.
+ if (i > 0 && !strcmp(externalSymbols[idx]->name, "__restore_rt")) {
+ --pcp;
+ --i;
+ idx = findSymbolIndex(reinterpret_cast<u_long>(*pcp));
+ if (idx < 0) {
+ continue;
+ }
+ }
+
+ // If we have not seen this symbol before count it and mark it as seen
+ if(flagArray[idx]!=stacks && ((flagArray[idx]=stacks) || true)) {
+ ++countArray[idx];
+ }
+
+ // We know who we are and we know who our parrent is. Count this
+ if(parrentIdx>=0) {
+ externalSymbols[parrentIdx]->regChild(idx);
+ externalSymbols[idx]->regParrent(parrentIdx);
+ }
+ // inside if() so an unknown in the middle of a stack won't break
+ // the link!
+ parrentIdx=idx;
+ }
+ }
+
+ // idx should be the function that we were in when we received the signal.
+ if(idx>=0) {
+ ++externalSymbols[idx]->timerHit;
+ }
+
+ }
+ }
+ if (!cleo)
+ generateReportHTML(outputfd, countArray, stacks, thread);
+}
+
+void FunctionCount::printReport(FILE *fp, leaky *lk, int parent, int total)
+{
+ const char *fmt = " <A href=\"#%d\">%8d (%3.1f%%)%s %s</A>%s\n";
+
+ int nmax, tmax=((~0U)>>1);
+
+ do {
+ nmax=0;
+ for(int j=getSize(); --j>=0;) {
+ int cnt = getCount(j);
+ if(cnt==tmax) {
+ int idx = getIndex(j);
+ char *symname = htmlify(lk->indexToName(idx));
+ fprintf(fp, fmt, idx, getCount(j),
+ getCount(j)*100.0/total,
+ getCount(j)*100.0/total >= 10.0 ? "" : " ",
+ symname,
+ parent == idx ? " (self)" : "");
+ delete [] symname;
+ } else if(cnt<tmax && cnt>nmax) {
+ nmax=cnt;
+ }
+ }
+ } while((tmax=nmax)>0);
+}
diff --git a/tools/jprof/leaky.h b/tools/jprof/leaky.h
new file mode 100644
index 000000000..5dafe30b9
--- /dev/null
+++ b/tools/jprof/leaky.h
@@ -0,0 +1,122 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __leaky_h_
+#define __leaky_h_
+
+#include "config.h"
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include "libmalloc.h"
+#include "strset.h"
+#include "intcnt.h"
+
+typedef unsigned int u_int;
+
+struct Symbol;
+struct leaky;
+
+class FunctionCount : public IntCount
+{
+public:
+ void printReport(FILE *fp, leaky *lk, int parent, int total);
+};
+
+struct Symbol {
+ char* name;
+ u_long address;
+ int timerHit;
+ FunctionCount cntP, cntC;
+
+ int regChild(int id) {return cntC.countAdd(id, 1);}
+ int regParrent(int id) {return cntP.countAdd(id, 1);}
+ void regClear() {cntC.clear(); cntP.clear();}
+
+ Symbol() : timerHit(0) {}
+ void Init(const char* aName, u_long aAddress) {
+ name = aName ? strdup(aName) : (char *)"";
+ address = aAddress;
+ }
+};
+
+struct LoadMapEntry {
+ char* name; // name of .so
+ u_long address; // base address where it was mapped in
+ LoadMapEntry* next;
+};
+
+struct leaky {
+ leaky();
+ ~leaky();
+
+ void initialize(int argc, char** argv);
+ void open(char *arg);
+
+ char* applicationName;
+ int logFileIndex;
+ int numLogFiles;
+ char* progFile;
+ FILE* outputfd;
+
+ bool quiet;
+ bool showAddress;
+ bool showThreads;
+ bool cleo;
+ u_int stackDepth;
+ int onlyThread;
+ char* output_dir;
+
+ int mappedLogFile;
+ malloc_log_entry* firstLogEntry;
+ malloc_log_entry* lastLogEntry;
+
+ int stacks;
+
+ int sfd;
+ Symbol** externalSymbols;
+ Symbol** lastSymbol;
+ int usefulSymbols;
+ int numExternalSymbols;
+ StrSet exclusions;
+ u_long lowestSymbolAddr;
+ u_long highestSymbolAddr;
+
+ LoadMapEntry* loadMap;
+
+ bool collect_last;
+ int collect_start;
+ int collect_end;
+
+ StrSet roots;
+ StrSet includes;
+
+ void usageError();
+
+ void LoadMap();
+
+ void analyze(int thread);
+
+ void dumpEntryToLog(malloc_log_entry* lep);
+
+ void insertAddress(u_long address, malloc_log_entry* lep);
+ void removeAddress(u_long address, malloc_log_entry* lep);
+
+ void displayStackTrace(FILE* out, malloc_log_entry* lep);
+
+ Symbol ** ExtendSymbols(int num);
+ void ReadSymbols(const char* fileName, u_long aBaseAddress);
+ void ReadSharedLibrarySymbols();
+ void setupSymbols(const char* fileName);
+ Symbol* findSymbol(u_long address);
+ bool excluded(malloc_log_entry* lep);
+ bool included(malloc_log_entry* lep);
+ const char* indexToName(int idx) {return externalSymbols[idx]->name;}
+
+ private:
+ void generateReportHTML(FILE *fp, int *countArray, int count, int thread);
+ int findSymbolIndex(u_long address);
+};
+
+#endif /* __leaky_h_ */
diff --git a/tools/jprof/moz.build b/tools/jprof/moz.build
new file mode 100644
index 000000000..680161e81
--- /dev/null
+++ b/tools/jprof/moz.build
@@ -0,0 +1,28 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DIRS += ['stub']
+
+Program('jprof')
+
+SOURCES += [
+ 'bfd.cpp',
+ 'coff.cpp',
+ 'elf.cpp',
+ 'intcnt.cpp',
+ 'leaky.cpp',
+ 'strset.cpp',
+]
+
+LOCAL_INCLUDES += [
+ 'stub',
+]
+
+OS_LIBS += [
+ 'dl',
+ 'bfd',
+ 'iberty',
+]
diff --git a/tools/jprof/split-profile.py b/tools/jprof/split-profile.py
new file mode 100755
index 000000000..89454d3eb
--- /dev/null
+++ b/tools/jprof/split-profile.py
@@ -0,0 +1,143 @@
+#!/usr/bin/python
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This program splits up a jprof profile into multiple files based on a
+# list of functions in a text file. First, a complete profile is
+# generated. Then, for each line in the text file, a profile is
+# generated containing only stacks that go through that line, and also
+# excluding all stacks in earlier lines in the text file. This means
+# that the text file, from start to end, is splitting out pieces of the
+# profile in their own file. Finally, a final profile containing the
+# remainder is produced.
+
+# The program takes four arguments:
+# (1) The path to jprof.
+# (2) The path to the text file describing the splits. The output
+# will be placed in the same directory as this file.
+# (3) The program that was profiled.
+# (4) The jprof-log file generated by the profile, to be split up.
+# (Really, all arguments from (3) and later are passed through to
+# jprof, so additional arguments could be provided if you want to pass
+# additional arguments to jprof.)
+
+# In slightly more detail:
+#
+# This script uses jprof's includes (-i) and excludes (-e) options to
+# split profiles into segments. It takes as input a single text file,
+# and from that text file creates a series of jprof profiles in the
+# directory the text file is in.
+#
+# The input file format looks like the following:
+#
+# poll g_main_poll
+# GetRuleCascade CSSRuleProcessor::GetRuleCascade(nsPresContext *, nsIAtom *)
+# RuleProcessorData RuleProcessorData::RuleProcessorData(nsPresContext *, nsIContent *, nsRuleWalker *, nsCompatibility *)
+#
+# From this input file, the script will construct a profile called
+# jprof-0.html that contains the whole profile, a profile called
+# jprof-1-poll.html that includes only stacks with g_main_poll, a
+# profile called jprof-2-GetRuleCascade.html that includes only stacks
+# that have GetRuleCascade and do not have g_main_poll, a profile called
+# jprof-3-RuleProcessorData.html that includes only stacks that have the
+# RuleProcessorData constructor and do not have GetRuleCascade or
+# g_main_poll, and a profile called jprof-4.html that includes only
+# stacks that do not have any of the three functions in them.
+#
+# This means that all of the segments of the profile, except
+# jprof-0.html, are mutually exclusive. Thus clever ordering of the
+# functions in the input file can lead to a logical splitting of the
+# profile into segments.
+
+import sys
+import subprocess
+import os.path
+
+if len(sys.argv) < 5:
+ sys.stderr.write("Expected arguments: <jprof> <split-file> <program> <jprof-log>\n")
+ sys.exit(1)
+
+jprof = sys.argv[1]
+splitfile = sys.argv[2]
+passthrough = sys.argv[3:]
+
+for f in [jprof, splitfile]:
+ if not os.path.isfile(f):
+ sys.stderr.write("could not find file: {0}\n".format(f))
+ sys.exit(1)
+
+def read_splits(splitfile):
+ """
+ Read splitfile (each line of which contains a name, a space, and
+ then a function name to split on), and return a list of pairs
+ representing exactly that. (Note that the name cannot contain
+ spaces, but the function name can, and often does.)
+ """
+ def line_to_split(line):
+ line = line.strip("\r\n")
+ idx = line.index(" ")
+ return (line[0:idx], line[idx+1:])
+
+ io = open(splitfile, "r")
+ result = [line_to_split(line) for line in io]
+ io.close()
+ return result
+
+splits = read_splits(splitfile)
+
+def generate_profile(options, destfile):
+ """
+ Run jprof to generate one split of the profile.
+ """
+ args = [jprof] + options + passthrough
+ print "Generating {0}".format(destfile)
+ destio = open(destfile, "w")
+ # jprof expects the "jprof-map" file to be in its current working directory
+ cwd = None
+ for option in passthrough:
+ if option.find("jprof-log"):
+ cwd = os.path.dirname(option)
+ if cwd is None:
+ raise StandardError("no jprof-log option given")
+ process = subprocess.Popen(args, stdout=destio, cwd=cwd)
+ process.wait()
+ destio.close()
+ if process.returncode != 0:
+ os.remove(destfile)
+ sys.stderr.write("Error {0} from command:\n {1}\n".format(process.returncode, " ".join(args)))
+ sys.exit(process.returncode)
+
+def output_filename(number, splitname):
+ """
+ Return the filename (absolute path) we should use to output the
+ profile segment with the given number and splitname. Splitname
+ should be None for the complete profile and the remainder.
+ """
+ def pad_count(i):
+ result = str(i)
+ # 0-pad to the same length
+ result = "0" * (len(str(len(splits) + 1)) - len(result)) + result
+ return result
+
+ name = pad_count(number)
+ if splitname is not None:
+ name += "-" + splitname
+
+ return os.path.join(os.path.dirname(splitfile),
+ "jprof-{0}.html".format(name))
+
+# generate the complete profile
+generate_profile([], output_filename(0, None))
+
+# generate the listed splits
+count = 1
+excludes = []
+for (splitname, splitfunction) in splits:
+ generate_profile(excludes + ["-i" + splitfunction],
+ output_filename(count, splitname))
+ excludes += ["-e" + splitfunction]
+ count = count + 1
+
+# generate the remainder after the splits
+generate_profile(excludes, output_filename(count, None))
diff --git a/tools/jprof/strset.cpp b/tools/jprof/strset.cpp
new file mode 100644
index 000000000..623ad3f90
--- /dev/null
+++ b/tools/jprof/strset.cpp
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "strset.h"
+#include <malloc.h>
+#include <string.h>
+
+StrSet::StrSet()
+{
+ strings = 0;
+ numstrings = 0;
+}
+
+void StrSet::add(const char* s)
+{
+ if (strings) {
+ strings = (char**) realloc(strings, (numstrings + 1) * sizeof(char*));
+ } else {
+ strings = (char**) malloc(sizeof(char*));
+ }
+ strings[numstrings] = strdup(s);
+ numstrings++;
+}
+
+int StrSet::contains(const char* s)
+{
+ char** sp = strings;
+ int i = numstrings;
+
+ while (--i >= 0) {
+ char *ss = *sp++;
+ if (ss[0] == s[0]) {
+ if (strcmp(ss, s) == 0) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/tools/jprof/strset.h b/tools/jprof/strset.h
new file mode 100644
index 000000000..681ed22a2
--- /dev/null
+++ b/tools/jprof/strset.h
@@ -0,0 +1,19 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __strset_h_
+#define __strset_h_
+
+struct StrSet {
+ StrSet();
+
+ void add(const char* string);
+ int contains(const char* string);
+ bool IsEmpty() const { return 0 == numstrings; }
+
+ char** strings;
+ int numstrings;
+};
+
+#endif /* __strset_h_ */
diff --git a/tools/jprof/stub/Makefile.in b/tools/jprof/stub/Makefile.in
new file mode 100644
index 000000000..8e6b6b8f8
--- /dev/null
+++ b/tools/jprof/stub/Makefile.in
@@ -0,0 +1,8 @@
+#! gmake
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# override optimization
+MOZ_OPTIMIZE_FLAGS = -fno-omit-frame-pointer
diff --git a/tools/jprof/stub/config.h b/tools/jprof/stub/config.h
new file mode 100644
index 000000000..d43ffcaa6
--- /dev/null
+++ b/tools/jprof/stub/config.h
@@ -0,0 +1,18 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef config_h___
+#define config_h___
+
+#define MAX_STACK_CRAWL 500
+#define M_LOGFILE "jprof-log"
+#define M_MAPFILE "jprof-map"
+
+#if defined(linux) || defined(NTO)
+#define USE_BFD
+#undef NEED_WRAPPERS
+
+#endif /* linux */
+
+#endif /* config_h___ */
diff --git a/tools/jprof/stub/jprof.h b/tools/jprof/stub/jprof.h
new file mode 100644
index 000000000..5ee99279c
--- /dev/null
+++ b/tools/jprof/stub/jprof.h
@@ -0,0 +1,17 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jprof_h___
+#define jprof_h___
+#include "nscore.h"
+
+#ifdef _IMPL_JPPROF_API
+#define JPROF_API(type) NS_EXPORT_(type)
+#else
+#define JPROF_API(type) NS_IMPORT_(type)
+#endif
+
+JPROF_API(void) setupProfilingStuff(void);
+
+#endif /* jprof_h___ */
diff --git a/tools/jprof/stub/libmalloc.cpp b/tools/jprof/stub/libmalloc.cpp
new file mode 100644
index 000000000..2da2a37b3
--- /dev/null
+++ b/tools/jprof/stub/libmalloc.cpp
@@ -0,0 +1,790 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+// vim:cindent:sw=4:et:ts=8:
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// The linux glibc hides part of sigaction if _POSIX_SOURCE is defined
+#if defined(linux)
+#undef _POSIX_SOURCE
+#undef _SVID_SOURCE
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#endif
+
+#include <errno.h>
+#if defined(linux)
+#include <linux/rtc.h>
+#include <pthread.h>
+#endif
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <ucontext.h>
+#include <execinfo.h>
+
+#include "libmalloc.h"
+#include "jprof.h"
+#include <string.h>
+#include <errno.h>
+#include <dlfcn.h>
+
+// Must define before including jprof.h
+void *moz_xmalloc(size_t size)
+{
+ return malloc(size);
+}
+
+void moz_xfree(void *mem)
+{
+ free(mem);
+}
+
+#ifdef NTO
+#include <sys/link.h>
+extern r_debug _r_debug;
+#else
+#include <link.h>
+#endif
+
+#define USE_GLIBC_BACKTRACE 1
+// To debug, use #define JPROF_STATIC
+#define JPROF_STATIC static
+
+static int gLogFD = -1;
+static pthread_t main_thread;
+
+static bool gIsSlave = false;
+static int gFilenamePID;
+
+static void startSignalCounter(unsigned long millisec);
+static int enableRTCSignals(bool enable);
+
+
+//----------------------------------------------------------------------
+// replace use of atexit()
+
+static void DumpAddressMap();
+
+struct JprofShutdown {
+ JprofShutdown() {}
+ ~JprofShutdown() {
+ DumpAddressMap();
+ }
+};
+
+static void RegisterJprofShutdown() {
+ // This instanciates the dummy class above, and will trigger the class
+ // destructor when libxul is unloaded. This is equivalent to atexit(),
+ // but gracefully handles dlclose().
+ static JprofShutdown t;
+}
+
+#if defined(i386) || defined(_i386) || defined(__x86_64__)
+JPROF_STATIC void CrawlStack(malloc_log_entry* me,
+ void* stack_top, void* top_instr_ptr)
+{
+#if USE_GLIBC_BACKTRACE
+ // This probably works on more than x86! But we need a way to get the
+ // top instruction pointer, which is kindof arch-specific
+ void *array[500];
+ int cnt, i;
+ u_long numpcs = 0;
+
+ // This is from glibc. A more generic version might use
+ // libunwind and/or CaptureStackBackTrace() on Windows
+ cnt = backtrace(&array[0],sizeof(array)/sizeof(array[0]));
+
+ // StackHook->JprofLog->CrawlStack
+ // Then we have sigaction, which replaced top_instr_ptr
+ array[3] = top_instr_ptr;
+ for (i = 3; i < cnt; i++)
+ {
+ me->pcs[numpcs++] = (char *) array[i];
+ }
+ me->numpcs = numpcs;
+
+#else
+ // original code - this breaks on many platforms
+ void **bp;
+#if defined(__i386)
+ __asm__( "movl %%ebp, %0" : "=g"(bp));
+#elif defined(__x86_64__)
+ __asm__( "movq %%rbp, %0" : "=g"(bp));
+#else
+ // It would be nice if this worked uniformly, but at least on i386 and
+ // x86_64, it stopped working with gcc 4.1, because it points to the
+ // end of the saved registers instead of the start.
+ bp = __builtin_frame_address(0);
+#endif
+ u_long numpcs = 0;
+ bool tracing = false;
+
+ me->pcs[numpcs++] = (char*) top_instr_ptr;
+
+ while (numpcs < MAX_STACK_CRAWL) {
+ void** nextbp = (void**) *bp++;
+ void* pc = *bp;
+ if (nextbp < bp) {
+ break;
+ }
+ if (tracing) {
+ // Skip the signal handling.
+ me->pcs[numpcs++] = (char*) pc;
+ }
+ else if (pc == top_instr_ptr) {
+ tracing = true;
+ }
+ bp = nextbp;
+ }
+ me->numpcs = numpcs;
+#endif
+}
+#endif
+
+//----------------------------------------------------------------------
+
+static int rtcHz;
+static int rtcFD = -1;
+static bool circular = false;
+
+#if defined(linux) || defined(NTO)
+static void DumpAddressMap()
+{
+ // Turn off the timer so we don't get interrupts during shutdown
+#if defined(linux)
+ if (rtcHz) {
+ enableRTCSignals(false);
+ } else
+#endif
+ {
+ startSignalCounter(0);
+ }
+
+ char filename[2048];
+ if (gIsSlave)
+ snprintf(filename, sizeof(filename), "%s-%d", M_MAPFILE, gFilenamePID);
+ else
+ snprintf(filename, sizeof(filename), "%s", M_MAPFILE);
+
+ int mfd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0666);
+ if (mfd >= 0) {
+ malloc_map_entry mme;
+ link_map* map = _r_debug.r_map;
+ while (nullptr != map) {
+ if (map->l_name && *map->l_name) {
+ mme.nameLen = strlen(map->l_name);
+ mme.address = map->l_addr;
+ write(mfd, &mme, sizeof(mme));
+ write(mfd, map->l_name, mme.nameLen);
+#if 0
+ write(1, map->l_name, mme.nameLen);
+ write(1, "\n", 1);
+#endif
+ }
+ map = map->l_next;
+ }
+ close(mfd);
+ }
+}
+#endif
+
+static bool was_paused = true;
+
+JPROF_STATIC void JprofBufferDump();
+JPROF_STATIC void JprofBufferClear();
+
+static void ClearProfilingHook(int signum)
+{
+ if (circular) {
+ JprofBufferClear();
+ puts("Jprof: cleared circular buffer.");
+ }
+}
+
+static void EndProfilingHook(int signum)
+{
+ if (circular)
+ JprofBufferDump();
+
+ DumpAddressMap();
+ was_paused = true;
+ puts("Jprof: profiling paused.");
+}
+
+
+
+//----------------------------------------------------------------------
+// proper usage would be a template, including the function to find the
+// size of an entry, or include a size header explicitly to each entry.
+#if defined(linux)
+#define DUMB_LOCK() pthread_mutex_lock(&mutex);
+#define DUMB_UNLOCK() pthread_mutex_unlock(&mutex);
+#else
+#define DUMB_LOCK() FIXME()
+#define DUMB_UNLOCK() FIXME()
+#endif
+
+
+class DumbCircularBuffer
+{
+public:
+ DumbCircularBuffer(size_t init_buffer_size) {
+ used = 0;
+ buffer_size = init_buffer_size;
+ buffer = (unsigned char *) malloc(buffer_size);
+ head = tail = buffer;
+
+#if defined(linux)
+ pthread_mutexattr_t mAttr;
+ pthread_mutexattr_settype(&mAttr, PTHREAD_MUTEX_RECURSIVE_NP);
+ pthread_mutex_init(&mutex, &mAttr);
+ pthread_mutexattr_destroy(&mAttr);
+#endif
+ }
+ ~DumbCircularBuffer() {
+ free(buffer);
+#if defined(linux)
+ pthread_mutex_destroy (&mutex);
+#endif
+ }
+
+ void clear() {
+ DUMB_LOCK();
+ head = tail;
+ used = 0;
+ DUMB_UNLOCK();
+ }
+
+ bool empty() {
+ return head == tail;
+ }
+
+ size_t space_available() {
+ size_t result;
+ DUMB_LOCK();
+ if (tail > head)
+ result = buffer_size - (tail-head) - 1;
+ else
+ result = head-tail - 1;
+ DUMB_UNLOCK();
+ return result;
+ }
+
+ void drop(size_t size) {
+ // assumes correctness!
+ DUMB_LOCK();
+ head += size;
+ if (head >= &buffer[buffer_size])
+ head -= buffer_size;
+ used--;
+ DUMB_UNLOCK();
+ }
+
+ bool insert(void *data, size_t size) {
+ // can fail if not enough space in the entire buffer
+ DUMB_LOCK();
+ if (space_available() < size)
+ return false;
+
+ size_t max_without_wrap = &buffer[buffer_size] - tail;
+ size_t initial = size > max_without_wrap ? max_without_wrap : size;
+#if DEBUG_CIRCULAR
+ fprintf(stderr,"insert(%d): max_without_wrap %d, size %d, initial %d\n",used,max_without_wrap,size,initial);
+#endif
+ memcpy(tail,data,initial);
+ tail += initial;
+ data = ((char *)data)+initial;
+ size -= initial;
+ if (size != 0) {
+#if DEBUG_CIRCULAR
+ fprintf(stderr,"wrapping by %d bytes\n",size);
+#endif
+ memcpy(buffer,data,size);
+ tail = &(((unsigned char *)buffer)[size]);
+ }
+
+ used++;
+ DUMB_UNLOCK();
+
+ return true;
+ }
+
+ // for external access to the buffer (saving)
+ void lock() {
+ DUMB_LOCK();
+ }
+
+ void unlock() {
+ DUMB_UNLOCK();
+ }
+
+ // XXX These really shouldn't be public...
+ unsigned char *head;
+ unsigned char *tail;
+ unsigned int used;
+ unsigned char *buffer;
+ size_t buffer_size;
+
+private:
+ pthread_mutex_t mutex;
+};
+
+class DumbCircularBuffer *JprofBuffer;
+
+JPROF_STATIC void
+JprofBufferInit(size_t size)
+{
+ JprofBuffer = new DumbCircularBuffer(size);
+}
+
+JPROF_STATIC void
+JprofBufferClear()
+{
+ fprintf(stderr,"Told to clear JPROF circular buffer\n");
+ JprofBuffer->clear();
+}
+
+JPROF_STATIC size_t
+JprofEntrySizeof(malloc_log_entry *me)
+{
+ return offsetof(malloc_log_entry, pcs) + me->numpcs*sizeof(char*);
+}
+
+JPROF_STATIC void
+JprofBufferAppend(malloc_log_entry *me)
+{
+ size_t size = JprofEntrySizeof(me);
+
+ do {
+ while (JprofBuffer->space_available() < size &&
+ JprofBuffer->used > 0) {
+#if DEBUG_CIRCULAR
+ fprintf(stderr,"dropping entry: %d in use, %d free, need %d, size_to_free = %d\n",
+ JprofBuffer->used,JprofBuffer->space_available(),size,JprofEntrySizeof((malloc_log_entry *) JprofBuffer->head));
+#endif
+ JprofBuffer->drop(JprofEntrySizeof((malloc_log_entry *) JprofBuffer->head));
+ }
+ if (JprofBuffer->space_available() < size)
+ return;
+
+ } while (!JprofBuffer->insert(me,size));
+}
+
+JPROF_STATIC void
+JprofBufferDump()
+{
+ JprofBuffer->lock();
+#if DEBUG_CIRCULAR
+ fprintf(stderr,"dumping JP_CIRCULAR buffer, %d of %d bytes\n",
+ JprofBuffer->tail > JprofBuffer->head ?
+ JprofBuffer->tail - JprofBuffer->head :
+ JprofBuffer->buffer_size + JprofBuffer->tail - JprofBuffer->head,
+ JprofBuffer->buffer_size);
+#endif
+ if (JprofBuffer->tail >= JprofBuffer->head) {
+ write(gLogFD, JprofBuffer->head, JprofBuffer->tail - JprofBuffer->head);
+ } else {
+ write(gLogFD, JprofBuffer->head, &(JprofBuffer->buffer[JprofBuffer->buffer_size]) - JprofBuffer->head);
+ write(gLogFD, JprofBuffer->buffer, JprofBuffer->tail - JprofBuffer->buffer);
+ }
+ JprofBuffer->clear();
+ JprofBuffer->unlock();
+}
+
+//----------------------------------------------------------------------
+
+JPROF_STATIC void
+JprofLog(u_long aTime, void* stack_top, void* top_instr_ptr)
+{
+ // Static is simply to make debugging tolerable
+ static malloc_log_entry me;
+
+ me.delTime = aTime;
+ me.thread = syscall(SYS_gettid); //gettid();
+ if (was_paused) {
+ me.flags = JP_FIRST_AFTER_PAUSE;
+ was_paused = 0;
+ } else {
+ me.flags = 0;
+ }
+
+ CrawlStack(&me, stack_top, top_instr_ptr);
+
+#ifndef NTO
+ if (circular) {
+ JprofBufferAppend(&me);
+ } else {
+ write(gLogFD, &me, JprofEntrySizeof(&me));
+ }
+#else
+ printf("Neutrino is missing the pcs member of malloc_log_entry!! \n");
+#endif
+}
+
+static int realTime;
+
+/* Lets interrupt at 10 Hz. This is so my log files don't get too large.
+ * This can be changed to a faster value latter. This timer is not
+ * programmed to reset, even though it is capable of doing so. This is
+ * to keep from getting interrupts from inside of the handler.
+*/
+static void startSignalCounter(unsigned long millisec)
+{
+ struct itimerval tvalue;
+
+ tvalue.it_interval.tv_sec = 0;
+ tvalue.it_interval.tv_usec = 0;
+ tvalue.it_value.tv_sec = millisec/1000;
+ tvalue.it_value.tv_usec = (millisec%1000)*1000;
+
+ if (realTime) {
+ setitimer(ITIMER_REAL, &tvalue, nullptr);
+ } else {
+ setitimer(ITIMER_PROF, &tvalue, nullptr);
+ }
+}
+
+static long timerMilliSec = 50;
+
+#if defined(linux)
+static int setupRTCSignals(int hz, struct sigaction *sap)
+{
+ /* global */ rtcFD = open("/dev/rtc", O_RDONLY);
+ if (rtcFD < 0) {
+ perror("JPROF_RTC setup: open(\"/dev/rtc\", O_RDONLY)");
+ return 0;
+ }
+
+ if (sigaction(SIGIO, sap, nullptr) == -1) {
+ perror("JPROF_RTC setup: sigaction(SIGIO)");
+ return 0;
+ }
+
+ if (ioctl(rtcFD, RTC_IRQP_SET, hz) == -1) {
+ perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_IRQP_SET, $JPROF_RTC_HZ)");
+ return 0;
+ }
+
+ if (ioctl(rtcFD, RTC_PIE_ON, 0) == -1) {
+ perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_PIE_ON)");
+ return 0;
+ }
+
+ if (fcntl(rtcFD, F_SETSIG, 0) == -1) {
+ perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETSIG, 0)");
+ return 0;
+ }
+
+ if (fcntl(rtcFD, F_SETOWN, getpid()) == -1) {
+ perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETOWN, getpid())");
+ return 0;
+ }
+
+ return 1;
+}
+
+static int enableRTCSignals(bool enable)
+{
+ static bool enabled = false;
+ if (enabled == enable) {
+ return 0;
+ }
+ enabled = enable;
+
+ int flags = fcntl(rtcFD, F_GETFL);
+ if (flags < 0) {
+ perror("JPROF_RTC setup: fcntl(/dev/rtc, F_GETFL)");
+ return 0;
+ }
+
+ if (enable) {
+ flags |= FASYNC;
+ } else {
+ flags &= ~FASYNC;
+ }
+
+ if (fcntl(rtcFD, F_SETFL, flags) == -1) {
+ if (enable) {
+ perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags | FASYNC)");
+ } else {
+ perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags & ~FASYNC)");
+ }
+ return 0;
+ }
+
+ return 1;
+}
+#endif
+
+JPROF_STATIC void StackHook(
+int signum,
+siginfo_t *info,
+void *ucontext)
+{
+ static struct timeval tFirst;
+ static int first=1;
+ size_t millisec = 0;
+
+#if defined(linux)
+ if (rtcHz && pthread_self() != main_thread) {
+ // Only collect stack data on the main thread, for now.
+ return;
+ }
+#endif
+
+ if(first && !(first=0)) {
+ puts("Jprof: received first signal");
+#if defined(linux)
+ if (rtcHz) {
+ enableRTCSignals(true);
+ } else
+#endif
+ {
+ gettimeofday(&tFirst, 0);
+ millisec = 0;
+ }
+ } else {
+#if defined(linux)
+ if (rtcHz) {
+ enableRTCSignals(true);
+ } else
+#endif
+ {
+ struct timeval tNow;
+ gettimeofday(&tNow, 0);
+ double usec = 1e6*(tNow.tv_sec - tFirst.tv_sec);
+ usec += (tNow.tv_usec - tFirst.tv_usec);
+ millisec = static_cast<size_t>(usec*1e-3);
+ }
+ }
+
+ gregset_t &gregs = ((ucontext_t*)ucontext)->uc_mcontext.gregs;
+#ifdef __x86_64__
+ JprofLog(millisec, (void*)gregs[REG_RSP], (void*)gregs[REG_RIP]);
+#else
+ JprofLog(millisec, (void*)gregs[REG_ESP], (void*)gregs[REG_EIP]);
+#endif
+
+ if (!rtcHz)
+ startSignalCounter(timerMilliSec);
+}
+
+NS_EXPORT_(void) setupProfilingStuff(void)
+{
+ static int gFirstTime = 1;
+ char filename[2048]; // XXX fix
+
+ if(gFirstTime && !(gFirstTime=0)) {
+ int startTimer = 1;
+ int doNotStart = 1;
+ int firstDelay = 0;
+ int append = O_TRUNC;
+ char *tst = getenv("JPROF_FLAGS");
+
+ /* Options from JPROF_FLAGS environment variable:
+ * JP_DEFER -> Wait for a SIGPROF (or SIGALRM, if JP_REALTIME
+ * is set) from userland before starting
+ * to generate them internally
+ * JP_START -> Install the signal handler
+ * JP_PERIOD -> Time between profiler ticks
+ * JP_FIRST -> Extra delay before starting
+ * JP_REALTIME -> Take stack traces in intervals of real time
+ * rather than time used by the process (and the
+ * system for the process). This is useful for
+ * finding time spent by the X server.
+ * JP_APPEND -> Append to jprof-log rather than overwriting it.
+ * This is somewhat risky since it depends on the
+ * address map staying constant across multiple runs.
+ * JP_FILENAME -> base filename to use when saving logs. Note that
+ * this does not affect the mapfile.
+ * JP_CIRCULAR -> use a circular buffer of size N, write/clear on SIGUSR1
+ *
+ * JPROF_SLAVE is set if this is not the first process.
+ */
+
+ circular = false;
+
+ if(tst) {
+ if(strstr(tst, "JP_DEFER"))
+ {
+ doNotStart = 0;
+ startTimer = 0;
+ }
+ if(strstr(tst, "JP_START")) doNotStart = 0;
+ if(strstr(tst, "JP_REALTIME")) realTime = 1;
+ if(strstr(tst, "JP_APPEND")) append = O_APPEND;
+
+ char *delay = strstr(tst,"JP_PERIOD=");
+ if(delay) {
+ double tmp = strtod(delay+strlen("JP_PERIOD="), nullptr);
+ if (tmp>=1e-3) {
+ timerMilliSec = static_cast<unsigned long>(1000 * tmp);
+ } else {
+ fprintf(stderr,
+ "JP_PERIOD of %g less than 0.001 (1ms), using 1ms\n",
+ tmp);
+ timerMilliSec = 1;
+ }
+ }
+
+ char *circular_op = strstr(tst,"JP_CIRCULAR=");
+ if(circular_op) {
+ size_t size = atol(circular_op+strlen("JP_CIRCULAR="));
+ if (size < 1000) {
+ fprintf(stderr,
+ "JP_CIRCULAR of %lu less than 1000, using 10000\n",
+ (unsigned long) size);
+ size = 10000;
+ }
+ JprofBufferInit(size);
+ fprintf(stderr,"JP_CIRCULAR buffer of %lu bytes\n", (unsigned long) size);
+ circular = true;
+ }
+
+ char *first = strstr(tst, "JP_FIRST=");
+ if(first) {
+ firstDelay = atol(first+strlen("JP_FIRST="));
+ }
+
+ char *rtc = strstr(tst, "JP_RTC_HZ=");
+ if (rtc) {
+#if defined(linux)
+ rtcHz = atol(rtc+strlen("JP_RTC_HZ="));
+ timerMilliSec = 0; /* This makes JP_FIRST work right. */
+ realTime = 1; /* It's the _R_TC and all. ;) */
+
+#define IS_POWER_OF_TWO(x) (((x) & ((x) - 1)) == 0)
+
+ if (!IS_POWER_OF_TWO(rtcHz) || rtcHz < 2) {
+ fprintf(stderr, "JP_RTC_HZ must be power of two and >= 2, "
+ "but %d was provided; using default of 2048\n",
+ rtcHz);
+ rtcHz = 2048;
+ }
+#else
+ fputs("JP_RTC_HZ found, but RTC profiling only supported on "
+ "Linux!\n", stderr);
+
+#endif
+ }
+ const char *f = strstr(tst,"JP_FILENAME=");
+ if (f)
+ f = f + strlen("JP_FILENAME=");
+ else
+ f = M_LOGFILE;
+
+ char *is_slave = getenv("JPROF_SLAVE");
+ if (!is_slave)
+ setenv("JPROF_SLAVE","", 0);
+ gIsSlave = !!is_slave;
+
+ gFilenamePID = syscall(SYS_gettid); //gettid();
+ if (is_slave)
+ snprintf(filename,sizeof(filename),"%s-%d",f,gFilenamePID);
+ else
+ snprintf(filename,sizeof(filename),"%s",f);
+
+ // XXX FIX! inherit current capture state!
+ }
+
+ if(!doNotStart) {
+
+ if(gLogFD<0) {
+ gLogFD = open(filename, O_CREAT | O_WRONLY | append, 0666);
+ if(gLogFD<0) {
+ fprintf(stderr, "Unable to create " M_LOGFILE);
+ perror(":");
+ } else {
+ struct sigaction action;
+ sigset_t mset;
+
+ // Dump out the address map when we terminate
+ RegisterJprofShutdown();
+
+ main_thread = pthread_self();
+ //fprintf(stderr,"jprof: main_thread = %u\n",
+ // (unsigned int)main_thread);
+
+ // FIX! probably should block these against each other
+ // Very unlikely.
+ sigemptyset(&mset);
+ action.sa_handler = nullptr;
+ action.sa_sigaction = StackHook;
+ action.sa_mask = mset;
+ action.sa_flags = SA_RESTART | SA_SIGINFO;
+#if defined(linux)
+ if (rtcHz) {
+ if (!setupRTCSignals(rtcHz, &action)) {
+ fputs("jprof: Error initializing RTC, NOT "
+ "profiling\n", stderr);
+ return;
+ }
+ }
+
+ if (!rtcHz || firstDelay != 0)
+#endif
+ {
+ if (realTime) {
+ sigaction(SIGALRM, &action, nullptr);
+ }
+ }
+ // enable PROF in all cases to simplify JP_DEFER/pause/restart
+ sigaction(SIGPROF, &action, nullptr);
+
+ // make it so a SIGUSR1 will stop the profiling
+ // Note: It currently does not close the logfile.
+ // This could be configurable (so that it could
+ // later be reopened).
+
+ struct sigaction stop_action;
+ stop_action.sa_handler = EndProfilingHook;
+ stop_action.sa_mask = mset;
+ stop_action.sa_flags = SA_RESTART;
+ sigaction(SIGUSR1, &stop_action, nullptr);
+
+ // make it so a SIGUSR2 will clear the circular buffer
+
+ stop_action.sa_handler = ClearProfilingHook;
+ stop_action.sa_mask = mset;
+ stop_action.sa_flags = SA_RESTART;
+ sigaction(SIGUSR2, &stop_action, nullptr);
+
+ printf("Jprof: Initialized signal handler and set "
+ "timer for %lu %s, %d s "
+ "initial delay\n",
+ rtcHz ? rtcHz : timerMilliSec,
+ rtcHz ? "Hz" : "ms",
+ firstDelay);
+
+ if(startTimer) {
+#if defined(linux)
+ /* If we have an initial delay we can just use
+ startSignalCounter to set up a timer to fire the
+ first stackHook after that delay. When that happens
+ we'll go and switch to RTC profiling. */
+ if (rtcHz && firstDelay == 0) {
+ puts("Jprof: enabled RTC signals");
+ enableRTCSignals(true);
+ } else
+#endif
+ {
+ puts("Jprof: started timer");
+ startSignalCounter(firstDelay*1000 + timerMilliSec);
+ }
+ }
+ }
+ }
+ }
+ } else {
+ printf("setupProfilingStuff() called multiple times\n");
+ }
+}
diff --git a/tools/jprof/stub/libmalloc.h b/tools/jprof/stub/libmalloc.h
new file mode 100644
index 000000000..8b29bf889
--- /dev/null
+++ b/tools/jprof/stub/libmalloc.h
@@ -0,0 +1,45 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef libmalloc_h___
+#define libmalloc_h___
+
+#include <sys/types.h>
+#include <malloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "config.h"
+
+typedef unsigned long u_long;
+
+// For me->flags
+#define JP_FIRST_AFTER_PAUSE 1
+
+// Format of a jprof log entry. This is what's written out to the
+// "jprof-log" file.
+// It's called malloc_log_entry because the history of jprof is that
+// it's a modified version of tracemalloc.
+struct malloc_log_entry {
+ u_long delTime;
+ u_long numpcs;
+ unsigned int flags;
+ int thread;
+ char* pcs[MAX_STACK_CRAWL];
+};
+
+// Format of a malloc map entry; after this struct is nameLen+1 bytes of
+// name data.
+struct malloc_map_entry {
+ u_long nameLen;
+ u_long address; // base address
+};
+
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
+
+#endif /* libmalloc_h___ */
diff --git a/tools/jprof/stub/moz.build b/tools/jprof/stub/moz.build
new file mode 100644
index 000000000..a2f514a54
--- /dev/null
+++ b/tools/jprof/stub/moz.build
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS += [
+ 'jprof.h',
+]
+
+SOURCES += [
+ 'libmalloc.cpp',
+]
+
+SharedLibrary('jprof')
+
+DEFINES['_IMPL_JPROF_API'] = True