101 files changed, 26801 insertions, 0 deletions
diff --git a/tools/profiler/core/EHABIStackWalk.cpp b/tools/profiler/core/EHABIStackWalk.cpp
new file mode 100644
index 000000000..76068cdea
--- /dev/null
+++ b/tools/profiler/core/EHABIStackWalk.cpp
@@ -0,0 +1,678 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI, as described in:
+ *   http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf
+ *
+ * This handles only the ARM-defined "personality routines" (chapter
+ * 9), and don't track the value of FP registers, because profiling
+ * needs only chain of PC/SP values.
+ *
+ * Because the exception handling info may not be accurate for all
+ * possible places where an async signal could occur (e.g., in a
+ * prologue or epilogue), this bounds-checks all stack accesses.
+ *
+ * This file uses "struct" for structures in the exception tables and
+ * "class" otherwise.  We should avoid violating the C++11
+ * standard-layout rules in the former.
+ */
+
+#include "EHABIStackWalk.h"
+
+#include "shared-libraries.h"
+#include "platform.h"
+
+#include "mozilla/Atomics.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/EndianUtils.h"
+
+#include <algorithm>
+#include <elf.h>
+#include <stdint.h>
+#include <vector>
+#include <string>
+
+#ifndef PT_ARM_EXIDX
+#define PT_ARM_EXIDX 0x70000001
+#endif
+
+// Bug 1082817: ICS B2G has a buggy linker that doesn't always ensure
+// that the EXIDX is sorted by address, as the spec requires.  So in
+// that case we build and sort an array of pointers into the index,
+// and binary-search that; otherwise, we search the index in place
+// (avoiding the time and space overhead of the indirection).
+#if defined(ANDROID_VERSION) && ANDROID_VERSION < 16
+#define HAVE_UNSORTED_EXIDX
+#endif
+
+namespace mozilla {
+
+struct PRel31 {
+  uint32_t mBits;
+  bool topBit() const { return mBits & 0x80000000; }
+  uint32_t value() const { return mBits & 0x7fffffff; }
+  int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; }
+  const void *compute() const {
+    return reinterpret_cast<const char *>(this) + offset();
+  }
+private:
+  PRel31(const PRel31 &copied) = delete;
+  PRel31() = delete;
+};
+
+struct EHEntry {
+  PRel31 startPC;
+  PRel31 exidx;
+private:
+  EHEntry(const EHEntry &copied) = delete;
+  EHEntry() = delete;
+};
+
+class EHState {
+  // Note that any core register can be used as a "frame pointer" to
+  // influence the unwinding process, so this must track all of them.
+  uint32_t mRegs[16];
+public:
+  bool unwind(const EHEntry *aEntry, const void *stackBase);
+  uint32_t &operator[](int i) { return mRegs[i]; }
+  const uint32_t &operator[](int i) const { return mRegs[i]; }
+  EHState(const mcontext_t &);
+};
+
+enum {
+  R_SP = 13,
+  R_LR = 14,
+  R_PC = 15
+};
+
+#ifdef HAVE_UNSORTED_EXIDX
+class EHEntryHandle {
+  const EHEntry *mValue;
+public:
+  EHEntryHandle(const EHEntry *aEntry) : mValue(aEntry) { }
+  const EHEntry *value() const { return mValue; }
+};
+
+bool operator<(const EHEntryHandle &lhs, const EHEntryHandle &rhs) {
+  return lhs.value()->startPC.compute() < rhs.value()->startPC.compute();
+}
+#endif
+
+class EHTable {
+  uint32_t mStartPC;
+  uint32_t mEndPC;
+  uint32_t mLoadOffset;
+#ifdef HAVE_UNSORTED_EXIDX
+  // In principle we should be able to binary-search the index section in
+  // place, but the ICS toolchain's linker is noncompliant and produces
+  // indices that aren't entirely sorted (e.g., libc).  So we have this:
+  std::vector<EHEntryHandle> mEntries;
+  typedef std::vector<EHEntryHandle>::const_iterator EntryIterator;
+  EntryIterator entriesBegin() const { return mEntries.begin(); }
+  EntryIterator entriesEnd() const { return mEntries.end(); }
+  static const EHEntry* entryGet(EntryIterator aEntry) {
+    return aEntry->value();
+  }
+#else
+  typedef const EHEntry *EntryIterator;
+  EntryIterator mEntriesBegin, mEntriesEnd;
+  EntryIterator entriesBegin() const { return mEntriesBegin; }
+  EntryIterator entriesEnd() const { return mEntriesEnd; }
+  static const EHEntry* entryGet(EntryIterator aEntry) { return aEntry; }
+#endif
+  std::string mName;
+public:
+  EHTable(const void *aELF, size_t aSize, const std::string &aName);
+  const EHEntry *lookup(uint32_t aPC) const;
+  bool isValid() const { return entriesEnd() != entriesBegin(); }
+  const std::string &name() const { return mName; }
+  uint32_t startPC() const { return mStartPC; }
+  uint32_t endPC() const { return mEndPC; }
+  uint32_t loadOffset() const { return mLoadOffset; }
+};
+
+class EHAddrSpace {
+  std::vector<uint32_t> mStarts;
+  std::vector<EHTable> mTables;
+  static mozilla::Atomic<const EHAddrSpace*> sCurrent;
+public:
+  explicit EHAddrSpace(const std::vector<EHTable>& aTables);
+  const EHTable *lookup(uint32_t aPC) const;
+  static void Update();
+  static const EHAddrSpace *Get();
+};
+
+
+void EHABIStackWalkInit()
+{
+  EHAddrSpace::Update();
+}
+
+size_t EHABIStackWalk(const mcontext_t &aContext, void *stackBase,
+                      void **aSPs, void **aPCs, const size_t aNumFrames)
+{
+  const EHAddrSpace *space = EHAddrSpace::Get();
+  EHState state(aContext);
+  size_t count = 0;
+
+  while (count < aNumFrames) {
+    uint32_t pc = state[R_PC], sp = state[R_SP];
+    aPCs[count] = reinterpret_cast<void *>(pc);
+    aSPs[count] = reinterpret_cast<void *>(sp);
+    count++;
+
+    if (!space)
+      break;
+    // TODO: cache these lookups.  Binary-searching libxul is
+    // expensive (possibly more expensive than doing the actual
+    // unwind), and even a small cache should help.
+    const EHTable *table = space->lookup(pc);
+    if (!table)
+      break;
+    const EHEntry *entry = table->lookup(pc);
+    if (!entry)
+      break;
+    if (!state.unwind(entry, stackBase))
+      break;
+  }
+  
+  return count;
+}
+
+
+class EHInterp {
+public:
+  // Note that stackLimit is exclusive and stackBase is inclusive
+  // (i.e, stackLimit < SP <= stackBase), following the convention
+  // set by the AAPCS spec.
+  EHInterp(EHState &aState, const EHEntry *aEntry,
+           uint32_t aStackLimit, uint32_t aStackBase)
+    : mState(aState),
+      mStackLimit(aStackLimit),
+      mStackBase(aStackBase),
+      mNextWord(0),
+      mWordsLeft(0),
+      mFailed(false)
+  {
+    const PRel31 &exidx = aEntry->exidx;
+    uint32_t firstWord;
+
+    if (exidx.mBits == 1) {  // EXIDX_CANTUNWIND
+      mFailed = true;
+      return;
+    }
+    if (exidx.topBit()) {
+      firstWord = exidx.mBits;
+    } else {
+      mNextWord = reinterpret_cast<const uint32_t *>(exidx.compute());
+      firstWord = *mNextWord++;
+    }
+
+    switch (firstWord >> 24) {
+    case 0x80: // short
+      mWord = firstWord << 8;
+      mBytesLeft = 3;
+      break;
+    case 0x81: case 0x82: // long; catch descriptor size ignored
+      mWord = firstWord << 16;
+      mBytesLeft = 2;
+      mWordsLeft = (firstWord >> 16) & 0xff;
+      break;
+    default:
+      // unknown personality
+      mFailed = true;
+    }
+  }
+
+  bool unwind();
+
+private:
+  // TODO: GCC has been observed not CSEing repeated reads of
+  // mState[R_SP] with writes to mFailed between them, suggesting that
+  // it hasn't determined that they can't alias and is thus missing
+  // optimization opportunities.  So, we may want to flatten EHState
+  // into this class; this may also make the code simpler.
+  EHState &mState;
+  uint32_t mStackLimit;
+  uint32_t mStackBase;
+  const uint32_t *mNextWord;
+  uint32_t mWord;
+  uint8_t mWordsLeft;
+  uint8_t mBytesLeft;
+  bool mFailed;
+
+  enum {
+    I_ADDSP    = 0x00, // 0sxxxxxx (subtract if s)
+    M_ADDSP    = 0x80,
+    I_POPMASK  = 0x80, // 1000iiii iiiiiiii (if any i set)
+    M_POPMASK  = 0xf0,
+    I_MOVSP    = 0x90, // 1001nnnn
+    M_MOVSP    = 0xf0,
+    I_POPN     = 0xa0, // 1010lnnn
+    M_POPN     = 0xf0,
+    I_FINISH   = 0xb0, // 10110000
+    I_POPLO    = 0xb1, // 10110001 0000iiii (if any i set)
+    I_ADDSPBIG = 0xb2, // 10110010 uleb128
+    I_POPFDX   = 0xb3, // 10110011 sssscccc
+    I_POPFDX8  = 0xb8, // 10111nnn
+    M_POPFDX8  = 0xf8,
+    // "Intel Wireless MMX" extensions omitted.
+    I_POPFDD   = 0xc8, // 1100100h sssscccc
+    M_POPFDD   = 0xfe,
+    I_POPFDD8  = 0xd0, // 11010nnn
+    M_POPFDD8  = 0xf8
+  };
+
+  uint8_t next() {
+    if (mBytesLeft == 0) {
+      if (mWordsLeft == 0) {
+        return I_FINISH;
+      }
+      mWordsLeft--;
+      mWord = *mNextWord++;
+      mBytesLeft = 4;
+    }
+    mBytesLeft--;
+    mWord = (mWord << 8) | (mWord >> 24); // rotate
+    return mWord;
+  }
+
+  uint32_t &vSP() { return mState[R_SP]; }
+  uint32_t *ptrSP() { return reinterpret_cast<uint32_t *>(vSP()); }
+
+  void checkStackBase() { if (vSP() > mStackBase) mFailed = true; }
+  void checkStackLimit() { if (vSP() <= mStackLimit) mFailed = true; }
+  void checkStackAlign() { if ((vSP() & 3) != 0) mFailed = true; }
+  void checkStack() {
+    checkStackBase();
+    checkStackLimit();
+    checkStackAlign();
+  }
+
+  void popRange(uint8_t first, uint8_t last, uint16_t mask) {
+    bool hasSP = false;
+    uint32_t tmpSP;
+    if (mask == 0)
+      mFailed = true;
+    for (uint8_t r = first; r <= last; ++r) {
+      if (mask & 1) {
+        if (r == R_SP) {
+          hasSP = true;
+          tmpSP = *ptrSP();
+        } else
+          mState[r] = *ptrSP();
+        vSP() += 4;
+        checkStackBase();
+        if (mFailed)
+          return;
+      }
+      mask >>= 1;
+    }
+    if (hasSP) {
+      vSP() = tmpSP;
+      checkStack();
+    }
+  }
+};
+
+
+bool EHState::unwind(const EHEntry *aEntry, const void *stackBasePtr) {
+  // The unwinding program cannot set SP to less than the initial value.
+  uint32_t stackLimit = mRegs[R_SP] - 4;
+  uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr);
+  EHInterp interp(*this, aEntry, stackLimit, stackBase);
+  return interp.unwind();
+}
+
+bool EHInterp::unwind() {
+  mState[R_PC] = 0;
+  checkStack();
+  while (!mFailed) {
+    uint8_t insn = next();
+#if DEBUG_EHABI_UNWIND
+    LOGF("unwind insn = %02x", (unsigned)insn);
+#endif
+    // Try to put the common cases first.
+
+    // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4
+    // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4
+    if ((insn & M_ADDSP) == I_ADDSP) {
+      uint32_t offset = ((insn & 0x3f) << 2) + 4;
+      if (insn & 0x40) {
+        vSP() -= offset;
+        checkStackLimit();
+      } else {
+        vSP() += offset;
+        checkStackBase();
+      }
+      continue;
+    }
+
+    // 10100nnn: Pop r4-r[4+nnn]
+    // 10101nnn: Pop r4-r[4+nnn], r14
+    if ((insn & M_POPN) == I_POPN) {
+      uint8_t n = (insn & 0x07) + 1;
+      bool lr = insn & 0x08;
+      uint32_t *ptr = ptrSP();
+      vSP() += (n + (lr ? 1 : 0)) * 4;
+      checkStackBase();
+      for (uint8_t r = 4; r < 4 + n; ++r)
+        mState[r] = *ptr++;
+      if (lr)
+        mState[R_LR] = *ptr++;
+      continue;
+    }
+
+    // 1011000: Finish
+    if (insn == I_FINISH) {
+      if (mState[R_PC] == 0) {
+        mState[R_PC] = mState[R_LR];
+        // Non-standard change (bug 916106): Prevent the caller from
+        // re-using LR.  Since the caller is by definition not a leaf
+        // routine, it will have to restore LR from somewhere to
+        // return to its own caller, so we can safely zero it here.
+        // This makes a difference only if an error in unwinding
+        // (e.g., caused by starting from within a prologue/epilogue)
+        // causes us to load a pointer to a leaf routine as LR; if we
+        // don't do something, we'll go into an infinite loop of
+        // "returning" to that same function.
+        mState[R_LR] = 0;
+      }
+      return true;
+    }
+
+    // 1001nnnn: Set vsp = r[nnnn]
+    if ((insn & M_MOVSP) == I_MOVSP) {
+      vSP() = mState[insn & 0x0f];
+      checkStack();
+      continue;
+    }
+
+    // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD)
+    // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD)
+    if ((insn & M_POPFDD) == I_POPFDD) {
+      uint8_t n = (next() & 0x0f) + 1;
+      // Note: if the 16+ssss+cccc > 31, the encoding is reserved.
+      // As the space is currently unused, we don't try to check.
+      vSP() += 8 * n;
+      checkStackBase();
+      continue;
+    }
+
+    // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD)
+    if ((insn & M_POPFDD8) == I_POPFDD8) {
+      uint8_t n = (insn & 0x07) + 1;
+      vSP() += 8 * n;
+      checkStackBase();
+      continue;
+    }
+
+    // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2)
+    if (insn == I_ADDSPBIG) {
+      uint32_t acc = 0;
+      uint8_t shift = 0;
+      uint8_t byte;
+      do {
+        if (shift >= 32)
+          return false;
+        byte = next();
+        acc |= (byte & 0x7f) << shift;
+        shift += 7;
+      } while (byte & 0x80);
+      uint32_t offset = 0x204 + (acc << 2);
+      // The calculations above could have overflowed.
+      // But the one we care about is this:
+      if (vSP() + offset < vSP())
+        mFailed = true;
+      vSP() += offset;
+      // ...so that this is the only other check needed:
+      checkStackBase();
+      continue;
+    }
+
+    // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4}
+    if ((insn & M_POPMASK) == I_POPMASK) {
+      popRange(4, 15, ((insn & 0x0f) << 8) | next());
+      continue;
+    }
+
+    // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0}
+    if (insn == I_POPLO) {
+      popRange(0, 3, next() & 0x0f);
+      continue;
+    }
+
+    // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX)
+    if (insn == I_POPFDX) {
+      uint8_t n = (next() & 0x0f) + 1;
+      vSP() += 8 * n + 4;
+      checkStackBase();
+      continue;
+    }
+
+    // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX)
+    if ((insn & M_POPFDX8) == I_POPFDX8) {
+      uint8_t n = (insn & 0x07) + 1;
+      vSP() += 8 * n + 4;
+      checkStackBase();
+      continue;
+    }
+
+    // unhandled instruction
+#ifdef DEBUG_EHABI_UNWIND
+    LOGF("Unhandled EHABI instruction 0x%02x", insn);
+#endif
+    mFailed = true;
+  }
+  return false;
+}
+
+
+bool operator<(const EHTable &lhs, const EHTable &rhs) {
+  return lhs.startPC() < rhs.startPC();
+}
+
+// Async signal unsafe.
+EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables)
+  : mTables(aTables)
+{
+  std::sort(mTables.begin(), mTables.end());
+  DebugOnly<uint32_t> lastEnd = 0;
+  for (std::vector<EHTable>::iterator i = mTables.begin();
+       i != mTables.end(); ++i) {
+    MOZ_ASSERT(i->startPC() >= lastEnd);
+    mStarts.push_back(i->startPC());
+    lastEnd = i->endPC();
+  }
+}
+
+const EHTable *EHAddrSpace::lookup(uint32_t aPC) const {
+  ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC)
+                 - mStarts.begin()) - 1;
+
+  if (i < 0 || aPC >= mTables[i].endPC())
+    return 0;
+  return &mTables[i];
+}
+
+
+const EHEntry *EHTable::lookup(uint32_t aPC) const {
+  MOZ_ASSERT(aPC >= mStartPC);
+  if (aPC >= mEndPC)
+    return nullptr;
+
+  EntryIterator begin = entriesBegin();
+  EntryIterator end = entriesEnd();
+  MOZ_ASSERT(begin < end);
+  if (aPC < reinterpret_cast<uint32_t>(entryGet(begin)->startPC.compute()))
+    return nullptr;
+
+  while (end - begin > 1) {
+#ifdef EHABI_UNWIND_MORE_ASSERTS
+    if (entryGet(end - 1)->startPC.compute()
+        < entryGet(begin)->startPC.compute()) {
+      MOZ_CRASH("unsorted exidx");
+    }
+#endif
+    EntryIterator mid = begin + (end - begin) / 2;
+    if (aPC < reinterpret_cast<uint32_t>(entryGet(mid)->startPC.compute()))
+      end = mid;
+    else
+      begin = mid;
+  }
+  return entryGet(begin);
+}
+
+
+#if MOZ_LITTLE_ENDIAN
+static const unsigned char hostEndian = ELFDATA2LSB;
+#elif MOZ_BIG_ENDIAN
+static const unsigned char hostEndian = ELFDATA2MSB;
+#else
+#error "No endian?"
+#endif
+
+// Async signal unsafe: std::vector::reserve, std::string copy ctor.
+EHTable::EHTable(const void *aELF, size_t aSize, const std::string &aName)
+  : mStartPC(~0), // largest uint32_t
+    mEndPC(0),
+#ifndef HAVE_UNSORTED_EXIDX
+    mEntriesBegin(nullptr),
+    mEntriesEnd(nullptr),
+#endif
+    mName(aName)
+{
+  const uint32_t base = reinterpret_cast<uint32_t>(aELF);
+
+  if (aSize < sizeof(Elf32_Ehdr))
+    return;
+
+  const Elf32_Ehdr &file = *(reinterpret_cast<Elf32_Ehdr *>(base));
+  if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 ||
+      file.e_ident[EI_CLASS] != ELFCLASS32 ||
+      file.e_ident[EI_DATA] != hostEndian ||
+      file.e_ident[EI_VERSION] != EV_CURRENT ||
+      file.e_ident[EI_OSABI] != ELFOSABI_SYSV ||
+#ifdef EI_ABIVERSION
+      file.e_ident[EI_ABIVERSION] != 0 ||
+#endif
+      file.e_machine != EM_ARM ||
+      file.e_version != EV_CURRENT)
+    // e_flags?
+    return;
+
+  MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize);
+  const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0;
+  for (unsigned i = 0; i < file.e_phnum; ++i) {
+    const Elf32_Phdr &phdr =
+      *(reinterpret_cast<Elf32_Phdr *>(base + file.e_phoff
+                                       + i * file.e_phentsize));
+    if (phdr.p_type == PT_ARM_EXIDX) {
+      exidxHdr = &phdr;
+    } else if (phdr.p_type == PT_LOAD) {
+      if (phdr.p_offset == 0) {
+        zeroHdr = &phdr;
+      }
+      if (phdr.p_flags & PF_X) {
+        mStartPC = std::min(mStartPC, phdr.p_vaddr);
+        mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz);
+      }
+    }
+  }
+  if (!exidxHdr)
+    return;
+  if (!zeroHdr)
+    return;
+  mLoadOffset = base - zeroHdr->p_vaddr;
+  mStartPC += mLoadOffset;
+  mEndPC += mLoadOffset;
+
+  // Create a sorted index of the index to work around linker bugs.
+  const EHEntry *startTable =
+    reinterpret_cast<const EHEntry *>(mLoadOffset + exidxHdr->p_vaddr);
+  const EHEntry *endTable =
+    reinterpret_cast<const EHEntry *>(mLoadOffset + exidxHdr->p_vaddr
+                                    + exidxHdr->p_memsz);
+#ifdef HAVE_UNSORTED_EXIDX
+  mEntries.reserve(endTable - startTable);
+  for (const EHEntry *i = startTable; i < endTable; ++i)
+    mEntries.push_back(i);
+  std::sort(mEntries.begin(), mEntries.end());
+#else
+  mEntriesBegin = startTable;
+  mEntriesEnd = endTable;
+#endif
+}
+
+
+mozilla::Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr);
+
+// Async signal safe; can fail if Update() hasn't returned yet.
+const EHAddrSpace *EHAddrSpace::Get() {
+  return sCurrent;
+}
+
+// Collect unwinding information from loaded objects.  Calls after the
+// first have no effect.  Async signal unsafe.
+void EHAddrSpace::Update() {
+  const EHAddrSpace *space = sCurrent;
+  if (space)
+    return;
+
+  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+  std::vector<EHTable> tables;
+
+  for (size_t i = 0; i < info.GetSize(); ++i) {
+    const SharedLibrary &lib = info.GetEntry(i);
+    if (lib.GetOffset() != 0)
+      // TODO: if it has a name, and we haven't seen a mapping of
+      // offset 0 for that file, try opening it and reading the
+      // headers instead.  The only thing I've seen so far that's
+      // linked so as to need that treatment is the dynamic linker
+      // itself.
+      continue;
+    EHTable tab(reinterpret_cast<const void *>(lib.GetStart()),
+              lib.GetEnd() - lib.GetStart(), lib.GetName());
+    if (tab.isValid())
+      tables.push_back(tab);
+  }
+  space = new EHAddrSpace(tables);
+
+  if (!sCurrent.compareExchange(nullptr, space)) {
+    delete space;
+    space = sCurrent;
+  }
+}
+
+
+EHState::EHState(const mcontext_t &context) {
+#ifdef linux
+  mRegs[0] = context.arm_r0;
+  mRegs[1] = context.arm_r1;
+  mRegs[2] = context.arm_r2;
+  mRegs[3] = context.arm_r3;
+  mRegs[4] = context.arm_r4;
+  mRegs[5] = context.arm_r5;
+  mRegs[6] = context.arm_r6;
+  mRegs[7] = context.arm_r7;
+  mRegs[8] = context.arm_r8;
+  mRegs[9] = context.arm_r9;
+  mRegs[10] = context.arm_r10;
+  mRegs[11] = context.arm_fp;
+  mRegs[12] = context.arm_ip;
+  mRegs[13] = context.arm_sp;
+  mRegs[14] = context.arm_lr;
+  mRegs[15] = context.arm_pc;
+#else
+# error "Unhandled OS for ARM EHABI unwinding"
+#endif
+}
+
+} // namespace mozilla
+
diff --git a/tools/profiler/core/EHABIStackWalk.h b/tools/profiler/core/EHABIStackWalk.h
new file mode 100644
index 000000000..5529d9511
--- /dev/null
+++ b/tools/profiler/core/EHABIStackWalk.h
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This is an implementation of stack unwinding according to a subset
+ * of the ARM Exception Handling ABI; see the comment at the top of
+ * the .cpp file for details.
+ */
+
+#ifndef mozilla_EHABIStackWalk_h__
+#define mozilla_EHABIStackWalk_h__
+
+#include <stddef.h>
+#include <ucontext.h>
+
+namespace mozilla {
+
+void EHABIStackWalkInit();
+
+size_t EHABIStackWalk(const mcontext_t &aContext, void *stackBase,
+                      void **aSPs, void **aPCs, size_t aNumFrames);
+
+}
+
+#endif
diff --git a/tools/profiler/core/GeckoSampler.cpp b/tools/profiler/core/GeckoSampler.cpp
new file mode 100644
index 000000000..f4249a7a5
--- /dev/null
+++ b/tools/profiler/core/GeckoSampler.cpp
@@ -0,0 +1,1306 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <string>
+#include <stdio.h>
+#include <fstream>
+#include <sstream>
+#include "GeckoProfiler.h"
+#ifndef SPS_STANDALONE
+#include "SaveProfileTask.h"
+#include "nsThreadUtils.h"
+#include "prenv.h"
+#include "prtime.h"
+#include "nsXULAppAPI.h"
+#endif
+#include "ProfileEntry.h"
+#include "SyncProfile.h"
+#include "platform.h"
+#include "shared-libraries.h"
+#include "mozilla/StackWalk.h"
+#include "GeckoSampler.h"
+
+// JSON
+#include "ProfileJSONWriter.h"
+
+#ifndef SPS_STANDALONE
+// Meta
+#include "nsXPCOM.h"
+#include "nsXPCOMCID.h"
+#include "nsIHttpProtocolHandler.h"
+#include "nsServiceManagerUtils.h"
+#include "nsIXULRuntime.h"
+#include "nsIXULAppInfo.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsIObserverService.h"
+#include "mozilla/Services.h"
+#include "PlatformMacros.h"
+#include "nsTArray.h"
+
+#include "mozilla/ProfileGatherer.h"
+#endif
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+  #include "FennecJNIWrappers.h"
+#endif
+
+#ifndef SPS_STANDALONE
+// JS
+#include "jsfriendapi.h"
+#include "js/ProfilingFrameIterator.h"
+#endif
+
+#if defined(MOZ_PROFILING) && (defined(XP_MACOSX) || defined(XP_WIN))
+ #define USE_NS_STACKWALK
+#endif
+
+#if defined(XP_WIN)
+typedef CONTEXT tickcontext_t;
+#elif defined(LINUX)
+#include <ucontext.h>
+typedef ucontext_t tickcontext_t;
+#endif
+
+#if defined(LINUX) || defined(XP_MACOSX)
+#include <sys/types.h>
+pid_t gettid();
+#endif
+
+#if defined(__arm__) && defined(ANDROID)
+ // Should also work on ARM Linux, but not tested there yet.
+ #define USE_EHABI_STACKWALK
+#endif
+#ifdef USE_EHABI_STACKWALK
+ #include "EHABIStackWalk.h"
+#endif
+
+#ifndef SPS_STANDALONE
+#if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux)
+# define USE_LUL_STACKWALK
+# include "lul/LulMain.h"
+# include "lul/platform-linux-lul.h"
+#endif
+#endif
+
+using std::string;
+using namespace mozilla;
+
+#ifndef MAXPATHLEN
+ #ifdef PATH_MAX
+  #define MAXPATHLEN PATH_MAX
+ #elif defined(MAX_PATH)
+  #define MAXPATHLEN MAX_PATH
+ #elif defined(_MAX_PATH)
+  #define MAXPATHLEN _MAX_PATH
+ #elif defined(CCHMAXPATH)
+  #define MAXPATHLEN CCHMAXPATH
+ #else
+  #define MAXPATHLEN 1024
+ #endif
+#endif
+
+#ifdef MOZ_VALGRIND
+# include <valgrind/memcheck.h>
+#else
+# define VALGRIND_MAKE_MEM_DEFINED(_addr,_len)   ((void)0)
+#endif
+
+
+///////////////////////////////////////////////////////////////////////
+// BEGIN SaveProfileTask et al
+
+static void
+AddSharedLibraryInfoToStream(std::ostream& aStream, const SharedLibrary& aLib)
+{
+  aStream << "{";
+  aStream << "\"start\":" << aLib.GetStart();
+  aStream << ",\"end\":" << aLib.GetEnd();
+  aStream << ",\"offset\":" << aLib.GetOffset();
+  aStream << ",\"name\":\"" << aLib.GetName() << "\"";
+  const std::string &breakpadId = aLib.GetBreakpadId();
+  aStream << ",\"breakpadId\":\"" << breakpadId << "\"";
+#ifdef XP_WIN
+  // FIXME: remove this XP_WIN code when the profiler plugin has switched to
+  // using breakpadId.
+  std::string pdbSignature = breakpadId.substr(0, 32);
+  std::string pdbAgeStr = breakpadId.substr(32,  breakpadId.size() - 1);
+
+  std::stringstream stream;
+  stream << pdbAgeStr;
+
+  unsigned pdbAge;
+  stream << std::hex;
+  stream >> pdbAge;
+
+#ifdef DEBUG
+  std::ostringstream oStream;
+  oStream << pdbSignature << std::hex << std::uppercase << pdbAge;
+  MOZ_ASSERT(breakpadId == oStream.str());
+#endif
+
+  aStream << ",\"pdbSignature\":\"" << pdbSignature << "\"";
+  aStream << ",\"pdbAge\":" << pdbAge;
+  aStream << ",\"pdbName\":\"" << aLib.GetName() << "\"";
+#endif
+  aStream << "}";
+}
+
+std::string
+GetSharedLibraryInfoStringInternal()
+{
+  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+  if (info.GetSize() == 0)
+    return "[]";
+
+  std::ostringstream os;
+  os << "[";
+  AddSharedLibraryInfoToStream(os, info.GetEntry(0));
+
+  for (size_t i = 1; i < info.GetSize(); i++) {
+    os << ",";
+    AddSharedLibraryInfoToStream(os, info.GetEntry(i));
+  }
+
+  os << "]";
+  return os.str();
+}
+
+static bool
+hasFeature(const char** aFeatures, uint32_t aFeatureCount, const char* aFeature) {
+  for(size_t i = 0; i < aFeatureCount; i++) {
+    if (strcmp(aFeatures[i], aFeature) == 0)
+      return true;
+  }
+  return false;
+}
+
+GeckoSampler::GeckoSampler(double aInterval, int aEntrySize,
+                         const char** aFeatures, uint32_t aFeatureCount,
+                         const char** aThreadNameFilters, uint32_t aFilterCount)
+  : Sampler(aInterval, true, aEntrySize)
+  , mPrimaryThreadProfile(nullptr)
+  , mBuffer(new ProfileBuffer(aEntrySize))
+  , mSaveRequested(false)
+#if defined(XP_WIN)
+  , mIntelPowerGadget(nullptr)
+#endif
+{
+  mUseStackWalk = hasFeature(aFeatures, aFeatureCount, "stackwalk");
+
+  mProfileJS = hasFeature(aFeatures, aFeatureCount, "js");
+  mProfileGPU = hasFeature(aFeatures, aFeatureCount, "gpu");
+  mProfilePower = hasFeature(aFeatures, aFeatureCount, "power");
+  // Users sometimes ask to filter by a list of threads but forget to request
+  // profiling non main threads. Let's make it implificit if we have a filter
+  mProfileThreads = hasFeature(aFeatures, aFeatureCount, "threads") || aFilterCount > 0;
+  mAddLeafAddresses = hasFeature(aFeatures, aFeatureCount, "leaf");
+  mPrivacyMode = hasFeature(aFeatures, aFeatureCount, "privacy");
+  mAddMainThreadIO = hasFeature(aFeatures, aFeatureCount, "mainthreadio");
+  mProfileMemory = hasFeature(aFeatures, aFeatureCount, "memory");
+  mTaskTracer = hasFeature(aFeatures, aFeatureCount, "tasktracer");
+  mLayersDump = hasFeature(aFeatures, aFeatureCount, "layersdump");
+  mDisplayListDump = hasFeature(aFeatures, aFeatureCount, "displaylistdump");
+  mProfileRestyle = hasFeature(aFeatures, aFeatureCount, "restyle");
+
+#if defined(XP_WIN)
+  if (mProfilePower) {
+    mIntelPowerGadget = new IntelPowerGadget();
+    mProfilePower = mIntelPowerGadget->Init();
+  }
+#endif
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+  mProfileJava = mozilla::jni::IsFennec() &&
+      hasFeature(aFeatures, aFeatureCount, "java");
+#else
+  mProfileJava = false;
+#endif
+
+  // Deep copy aThreadNameFilters
+  MOZ_ALWAYS_TRUE(mThreadNameFilters.resize(aFilterCount));
+  for (uint32_t i = 0; i < aFilterCount; ++i) {
+    mThreadNameFilters[i] = aThreadNameFilters[i];
+  }
+
+  // Deep copy aFeatures
+  MOZ_ALWAYS_TRUE(mFeatures.resize(aFeatureCount));
+  for (uint32_t i = 0; i < aFeatureCount; ++i) {
+    mFeatures[i] = aFeatures[i];
+  }
+
+  bool ignore;
+  sStartTime = mozilla::TimeStamp::ProcessCreation(ignore);
+
+  {
+    ::MutexAutoLock lock(*sRegisteredThreadsMutex);
+
+    // Create ThreadProfile for each registered thread
+    for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+      ThreadInfo* info = sRegisteredThreads->at(i);
+
+      RegisterThread(info);
+    }
+
+    SetActiveSampler(this);
+  }
+
+#ifdef MOZ_TASK_TRACER
+  if (mTaskTracer) {
+    mozilla::tasktracer::StartLogging();
+  }
+#endif
+
+  mGatherer = new mozilla::ProfileGatherer(this);
+}
+
+GeckoSampler::~GeckoSampler()
+{
+  if (IsActive())
+    Stop();
+
+  SetActiveSampler(nullptr);
+
+  // Destroy ThreadProfile for all threads
+  {
+    ::MutexAutoLock lock(*sRegisteredThreadsMutex);
+
+    for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+      ThreadInfo* info = sRegisteredThreads->at(i);
+      ThreadProfile* profile = info->Profile();
+      if (profile) {
+        delete profile;
+        info->SetProfile(nullptr);
+      }
+      // We've stopped profiling. We no longer need to retain
+      // information for an old thread.
+      if (info->IsPendingDelete()) {
+        delete info;
+        sRegisteredThreads->erase(sRegisteredThreads->begin() + i);
+        i--;
+      }
+    }
+  }
+#if defined(XP_WIN)
+  delete mIntelPowerGadget;
+#endif
+
+  // Cancel any in-flight async profile gatherering
+  // requests
+  mGatherer->Cancel();
+}
+
+void GeckoSampler::HandleSaveRequest()
+{
+  if (!mSaveRequested)
+    return;
+  mSaveRequested = false;
+
+#ifndef SPS_STANDALONE
+  // TODO: Use use the ipc/chromium Tasks here to support processes
+  // without XPCOM.
+  nsCOMPtr<nsIRunnable> runnable = new SaveProfileTask();
+  NS_DispatchToMainThread(runnable);
+#endif
+}
+
+void GeckoSampler::DeleteExpiredMarkers()
+{
+  mBuffer->deleteExpiredStoredMarkers();
+}
+
+void GeckoSampler::StreamTaskTracer(SpliceableJSONWriter& aWriter)
+{
+#ifdef MOZ_TASK_TRACER
+  aWriter.StartArrayProperty("data");
+    UniquePtr<nsTArray<nsCString>> data = mozilla::tasktracer::GetLoggedData(sStartTime);
+    for (uint32_t i = 0; i < data->Length(); ++i) {
+      aWriter.StringElement((data->ElementAt(i)).get());
+    }
+  aWriter.EndArray();
+
+  aWriter.StartArrayProperty("threads");
+    ::MutexAutoLock lock(*sRegisteredThreadsMutex);
+    for (size_t i = 0; i < sRegisteredThreads->size(); i++) {
+      // Thread meta data
+      ThreadInfo* info = sRegisteredThreads->at(i);
+      aWriter.StartObjectElement();
+        if (XRE_GetProcessType() == GeckoProcessType_Plugin) {
+          // TODO Add the proper plugin name
+          aWriter.StringProperty("name", "Plugin");
+        } else {
+          aWriter.StringProperty("name", info->Name());
+        }
+        aWriter.IntProperty("tid", static_cast<int>(info->ThreadId()));
+      aWriter.EndObject();
+    }
+  aWriter.EndArray();
+
+  aWriter.DoubleProperty("start", static_cast<double>(mozilla::tasktracer::GetStartTime()));
+#endif
+}
+
+
+void GeckoSampler::StreamMetaJSCustomObject(SpliceableJSONWriter& aWriter)
+{
+  aWriter.IntProperty("version", 3);
+  aWriter.DoubleProperty("interval", interval());
+  aWriter.IntProperty("stackwalk", mUseStackWalk);
+
+#ifndef SPS_STANDALONE
+  mozilla::TimeDuration delta = mozilla::TimeStamp::Now() - sStartTime;
+  aWriter.DoubleProperty("startTime", static_cast<double>(PR_Now()/1000.0 - delta.ToMilliseconds()));
+
+  aWriter.IntProperty("processType", XRE_GetProcessType());
+
+  nsresult res;
+  nsCOMPtr<nsIHttpProtocolHandler> http = do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res);
+  if (!NS_FAILED(res)) {
+    nsAutoCString string;
+
+    res = http->GetPlatform(string);
+    if (!NS_FAILED(res))
+      aWriter.StringProperty("platform", string.Data());
+
+    res = http->GetOscpu(string);
+    if (!NS_FAILED(res))
+      aWriter.StringProperty("oscpu", string.Data());
+
+    res = http->GetMisc(string);
+    if (!NS_FAILED(res))
+      aWriter.StringProperty("misc", string.Data());
+  }
+
+  nsCOMPtr<nsIXULRuntime> runtime = do_GetService("@mozilla.org/xre/runtime;1");
+  if (runtime) {
+    nsAutoCString string;
+
+    res = runtime->GetXPCOMABI(string);
+    if (!NS_FAILED(res))
+      aWriter.StringProperty("abi", string.Data());
+
+    res = runtime->GetWidgetToolkit(string);
+    if (!NS_FAILED(res))
+      aWriter.StringProperty("toolkit", string.Data());
+  }
+
+  nsCOMPtr<nsIXULAppInfo> appInfo = do_GetService("@mozilla.org/xre/app-info;1");
+  if (appInfo) {
+    nsAutoCString string;
+
+    res = appInfo->GetName(string);
+    if (!NS_FAILED(res))
+      aWriter.StringProperty("product", string.Data());
+  }
+#endif
+}
+
+void GeckoSampler::ToStreamAsJSON(std::ostream& stream, double aSinceTime)
+{
+  SpliceableJSONWriter b(mozilla::MakeUnique<OStreamJSONWriteFunc>(stream));
+  StreamJSON(b, aSinceTime);
+}
+
+#ifndef SPS_STANDALONE
+JSObject* GeckoSampler::ToJSObject(JSContext *aCx, double aSinceTime)
+{
+  JS::RootedValue val(aCx);
+  {
+    UniquePtr<char[]> buf = ToJSON(aSinceTime);
+    NS_ConvertUTF8toUTF16 js_string(nsDependentCString(buf.get()));
+    MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx, static_cast<const char16_t*>(js_string.get()),
+                                 js_string.Length(), &val));
+  }
+  return &val.toObject();
+}
+
+void GeckoSampler::GetGatherer(nsISupports** aRetVal)
+{
+  if (!aRetVal || NS_WARN_IF(!mGatherer)) {
+    return;
+  }
+  NS_ADDREF(*aRetVal = mGatherer);
+}
+#endif
+
+UniquePtr<char[]> GeckoSampler::ToJSON(double aSinceTime)
+{
+  SpliceableChunkedJSONWriter b;
+  StreamJSON(b, aSinceTime);
+  return b.WriteFunc()->CopyData();
+}
+
+void GeckoSampler::ToJSObjectAsync(double aSinceTime,
+                                  mozilla::dom::Promise* aPromise)
+{
+  if (NS_WARN_IF(!mGatherer)) {
+    return;
+  }
+
+  mGatherer->Start(aSinceTime, aPromise);
+}
+
+struct SubprocessClosure {
+  explicit SubprocessClosure(SpliceableJSONWriter* aWriter)
+    : mWriter(aWriter)
+  {}
+
+  SpliceableJSONWriter* mWriter;
+};
+
+void SubProcessCallback(const char* aProfile, void* aClosure)
+{
+  // Called by the observer to get their profile data included
+  // as a sub profile
+  SubprocessClosure* closure = (SubprocessClosure*)aClosure;
+
+  // Add the string profile into the profile
+  closure->mWriter->StringElement(aProfile);
+}
+
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+static
+void BuildJavaThreadJSObject(SpliceableJSONWriter& aWriter)
+{
+  aWriter.StringProperty("name", "Java Main Thread");
+
+  aWriter.StartArrayProperty("samples");
+
+    // for each sample
+    for (int sampleId = 0; true; sampleId++) {
+      bool firstRun = true;
+      // for each frame
+      for (int frameId = 0; true; frameId++) {
+        jni::String::LocalRef frameName =
+            java::GeckoJavaSampler::GetFrameName(0, sampleId, frameId);
+        // when we run out of frames, we stop looping
+        if (!frameName) {
+          // if we found at least one frame, we have objects to close
+          if (!firstRun) {
+              aWriter.EndArray();
+            aWriter.EndObject();
+          }
+          break;
+        }
+        // the first time around, open the sample object and frames array
+        if (firstRun) {
+          firstRun = false;
+
+          double sampleTime =
+              java::GeckoJavaSampler::GetSampleTime(0, sampleId);
+
+          aWriter.StartObjectElement();
+            aWriter.DoubleProperty("time", sampleTime);
+
+            aWriter.StartArrayProperty("frames");
+        }
+        // add a frame to the sample
+        aWriter.StartObjectElement();
+          aWriter.StringProperty("location",
+                                 frameName->ToCString().BeginReading());
+        aWriter.EndObject();
+      }
+      // if we found no frames for this sample, we are done
+      if (firstRun) {
+        break;
+      }
+    }
+
+  aWriter.EndArray();
+}
+#endif
+
+void GeckoSampler::StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime)
+{
+  aWriter.Start(SpliceableJSONWriter::SingleLineStyle);
+  {
+    // Put shared library info
+    aWriter.StringProperty("libs", GetSharedLibraryInfoStringInternal().c_str());
+
+    // Put meta data
+    aWriter.StartObjectProperty("meta");
+      StreamMetaJSCustomObject(aWriter);
+    aWriter.EndObject();
+
+    // Data of TaskTracer doesn't belong in the circular buffer.
+    if (TaskTracer()) {
+      aWriter.StartObjectProperty("tasktracer");
+      StreamTaskTracer(aWriter);
+      aWriter.EndObject();
+    }
+
+    // Lists the samples for each ThreadProfile
+    aWriter.StartArrayProperty("threads");
+    {
+      SetPaused(true);
+
+      {
+        ::MutexAutoLock lock(*sRegisteredThreadsMutex);
+
+        for (size_t i = 0; i < sRegisteredThreads->size(); i++) {
+          // Thread not being profiled, skip it
+          if (!sRegisteredThreads->at(i)->Profile())
+            continue;
+
+          // Note that we intentionally include ThreadProfile which
+          // have been marked for pending delete.
+
+          ::MutexAutoLock lock(sRegisteredThreads->at(i)->Profile()->GetMutex());
+
+          sRegisteredThreads->at(i)->Profile()->StreamJSON(aWriter, aSinceTime);
+        }
+      }
+
+#ifndef SPS_STANDALONE
+      if (Sampler::CanNotifyObservers()) {
+        // Send a event asking any subprocesses (plugins) to
+        // give us their information
+        SubprocessClosure closure(&aWriter);
+        nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+        if (os) {
+          RefPtr<ProfileSaveEvent> pse = new ProfileSaveEvent(SubProcessCallback, &closure);
+          os->NotifyObservers(pse, "profiler-subprocess", nullptr);
+        }
+      }
+
+  #if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+      if (ProfileJava()) {
+        java::GeckoJavaSampler::Pause();
+
+        aWriter.Start();
+        {
+          BuildJavaThreadJSObject(aWriter);
+        }
+        aWriter.End();
+
+        java::GeckoJavaSampler::Unpause();
+      }
+  #endif
+#endif
+
+      SetPaused(false);
+    }
+    aWriter.EndArray();
+  }
+  aWriter.End();
+}
+
+void GeckoSampler::FlushOnJSShutdown(JSContext* aContext)
+{
+#ifndef SPS_STANDALONE
+  SetPaused(true);
+
+  {
+    ::MutexAutoLock lock(*sRegisteredThreadsMutex);
+
+    for (size_t i = 0; i < sRegisteredThreads->size(); i++) {
+      // Thread not being profiled, skip it.
+      if (!sRegisteredThreads->at(i)->Profile() ||
+          sRegisteredThreads->at(i)->IsPendingDelete()) {
+        continue;
+      }
+
+      // Thread not profiling the context that's going away, skip it.
+      if (sRegisteredThreads->at(i)->Profile()->GetPseudoStack()->mContext != aContext) {
+        continue;
+      }
+
+      ::MutexAutoLock lock(sRegisteredThreads->at(i)->Profile()->GetMutex());
+      sRegisteredThreads->at(i)->Profile()->FlushSamplesAndMarkers();
+    }
+  }
+
+  SetPaused(false);
+#endif
+}
+
+void PseudoStack::flushSamplerOnJSShutdown()
+{
+#ifndef SPS_STANDALONE
+  MOZ_ASSERT(mContext);
+  GeckoSampler* t = tlsTicker.get();
+  if (t) {
+    t->FlushOnJSShutdown(mContext);
+  }
+#endif
+}
+
+// END SaveProfileTask et al
+////////////////////////////////////////////////////////////////////////
+
+static
+void addDynamicTag(ThreadProfile &aProfile, char aTagName, const char *aStr)
+{
+  aProfile.addTag(ProfileEntry(aTagName, ""));
+  // Add one to store the null termination
+  size_t strLen = strlen(aStr) + 1;
+  for (size_t j = 0; j < strLen;) {
+    // Store as many characters in the void* as the platform allows
+    char text[sizeof(void*)];
+    size_t len = sizeof(void*)/sizeof(char);
+    if (j+len >= strLen) {
+      len = strLen - j;
+    }
+    memcpy(text, &aStr[j], len);
+    j += sizeof(void*)/sizeof(char);
+    // Cast to *((void**) to pass the text data to a void*
+    aProfile.addTag(ProfileEntry('d', *((void**)(&text[0]))));
+  }
+}
+
+static
+void addPseudoEntry(volatile StackEntry &entry, ThreadProfile &aProfile,
+                    PseudoStack *stack, void *lastpc)
+{
+  // Pseudo-frames with the BEGIN_PSEUDO_JS flag are just annotations
+  // and should not be recorded in the profile.
+  if (entry.hasFlag(StackEntry::BEGIN_PSEUDO_JS))
+    return;
+
+  int lineno = -1;
+
+  // First entry has tagName 's' (start)
+  // Check for magic pointer bit 1 to indicate copy
+  const char* sampleLabel = entry.label();
+  if (entry.isCopyLabel()) {
+    // Store the string using 1 or more 'd' (dynamic) tags
+    // that will happen to the preceding tag
+
+    addDynamicTag(aProfile, 'c', sampleLabel);
+#ifndef SPS_STANDALONE
+    if (entry.isJs()) {
+      JSScript* script = entry.script();
+      if (script) {
+        if (!entry.pc()) {
+          // The JIT only allows the top-most entry to have a nullptr pc
+          MOZ_ASSERT(&entry == &stack->mStack[stack->stackSize() - 1]);
+          // If stack-walking was disabled, then that's just unfortunate
+          if (lastpc) {
+            jsbytecode *jspc = js::ProfilingGetPC(stack->mContext, script,
+                                                  lastpc);
+            if (jspc) {
+              lineno = JS_PCToLineNumber(script, jspc);
+            }
+          }
+        } else {
+          lineno = JS_PCToLineNumber(script, entry.pc());
+        }
+      }
+    } else {
+      lineno = entry.line();
+    }
+#endif
+  } else {
+    aProfile.addTag(ProfileEntry('c', sampleLabel));
+
+    // XXX: Bug 1010578. Don't assume a CPP entry and try to get the
+    // line for js entries as well.
+    if (entry.isCpp()) {
+      lineno = entry.line();
+    }
+  }
+
+  if (lineno != -1) {
+    aProfile.addTag(ProfileEntry('n', lineno));
+  }
+
+  uint32_t category = entry.category();
+  MOZ_ASSERT(!(category & StackEntry::IS_CPP_ENTRY));
+  MOZ_ASSERT(!(category & StackEntry::FRAME_LABEL_COPY));
+
+  if (category) {
+    aProfile.addTag(ProfileEntry('y', (int)category));
+  }
+}
+
+struct NativeStack
+{
+  void** pc_array;
+  void** sp_array;
+  size_t size;
+  size_t count;
+};
+
+mozilla::Atomic<bool> WALKING_JS_STACK(false);
+
+struct AutoWalkJSStack {
+  bool walkAllowed;
+
+  AutoWalkJSStack() : walkAllowed(false) {
+    walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
+  }
+
+  ~AutoWalkJSStack() {
+    if (walkAllowed)
+        WALKING_JS_STACK = false;
+  }
+};
+
+static
+void mergeStacksIntoProfile(ThreadProfile& aProfile, TickSample* aSample, NativeStack& aNativeStack)
+{
+  PseudoStack* pseudoStack = aProfile.GetPseudoStack();
+  volatile StackEntry *pseudoFrames = pseudoStack->mStack;
+  uint32_t pseudoCount = pseudoStack->stackSize();
+
+  // Make a copy of the JS stack into a JSFrame array. This is necessary since,
+  // like the native stack, the JS stack is iterated youngest-to-oldest and we
+  // need to iterate oldest-to-youngest when adding entries to aProfile.
+
+  // Synchronous sampling reports an invalid buffer generation to
+  // ProfilingFrameIterator to avoid incorrectly resetting the generation of
+  // sampled JIT entries inside the JS engine. See note below concerning 'J'
+  // entries.
+  uint32_t startBufferGen;
+  if (aSample->isSamplingCurrentThread) {
+    startBufferGen = UINT32_MAX;
+  } else {
+    startBufferGen = aProfile.bufferGeneration();
+  }
+  uint32_t jsCount = 0;
+#ifndef SPS_STANDALONE
+  JS::ProfilingFrameIterator::Frame jsFrames[1000];
+  // Only walk jit stack if profiling frame iterator is turned on.
+  if (pseudoStack->mContext && JS::IsProfilingEnabledForContext(pseudoStack->mContext)) {
+    AutoWalkJSStack autoWalkJSStack;
+    const uint32_t maxFrames = mozilla::ArrayLength(jsFrames);
+
+    if (aSample && autoWalkJSStack.walkAllowed) {
+      JS::ProfilingFrameIterator::RegisterState registerState;
+      registerState.pc = aSample->pc;
+      registerState.sp = aSample->sp;
+#ifdef ENABLE_ARM_LR_SAVING
+      registerState.lr = aSample->lr;
+#endif
+
+      JS::ProfilingFrameIterator jsIter(pseudoStack->mContext,
+                                        registerState,
+                                        startBufferGen);
+      for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) {
+        // See note below regarding 'J' entries.
+        if (aSample->isSamplingCurrentThread || jsIter.isWasm()) {
+          uint32_t extracted = jsIter.extractStack(jsFrames, jsCount, maxFrames);
+          jsCount += extracted;
+          if (jsCount == maxFrames)
+            break;
+        } else {
+          mozilla::Maybe<JS::ProfilingFrameIterator::Frame> frame =
+            jsIter.getPhysicalFrameWithoutLabel();
+          if (frame.isSome())
+            jsFrames[jsCount++] = mozilla::Move(frame.ref());
+        }
+      }
+    }
+  }
+#endif
+
+  // Start the sample with a root entry.
+  aProfile.addTag(ProfileEntry('s', "(root)"));
+
+  // While the pseudo-stack array is ordered oldest-to-youngest, the JS and
+  // native arrays are ordered youngest-to-oldest. We must add frames to
+  // aProfile oldest-to-youngest. Thus, iterate over the pseudo-stack forwards
+  // and JS and native arrays backwards. Note: this means the terminating
+  // condition jsIndex and nativeIndex is being < 0.
+  uint32_t pseudoIndex = 0;
+  int32_t jsIndex = jsCount - 1;
+  int32_t nativeIndex = aNativeStack.count - 1;
+
+  uint8_t *lastPseudoCppStackAddr = nullptr;
+
+  // Iterate as long as there is at least one frame remaining.
+  while (pseudoIndex != pseudoCount || jsIndex >= 0 || nativeIndex >= 0) {
+    // There are 1 to 3 frames available. Find and add the oldest.
+
+    uint8_t *pseudoStackAddr = nullptr;
+    uint8_t *jsStackAddr = nullptr;
+    uint8_t *nativeStackAddr = nullptr;
+
+    if (pseudoIndex != pseudoCount) {
+      volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex];
+
+      if (pseudoFrame.isCpp())
+        lastPseudoCppStackAddr = (uint8_t *) pseudoFrame.stackAddress();
+
+#ifndef SPS_STANDALONE
+      // Skip any pseudo-stack JS frames which are marked isOSR
+      // Pseudostack frames are marked isOSR when the JS interpreter
+      // enters a jit frame on a loop edge (via on-stack-replacement,
+      // or OSR).  To avoid both the pseudoframe and jit frame being
+      // recorded (and showing up twice), the interpreter marks the
+      // interpreter pseudostack entry with the OSR flag to ensure that
+      // it doesn't get counted.
+      if (pseudoFrame.isJs() && pseudoFrame.isOSR()) {
+          pseudoIndex++;
+          continue;
+      }
+#endif
+
+      MOZ_ASSERT(lastPseudoCppStackAddr);
+      pseudoStackAddr = lastPseudoCppStackAddr;
+    }
+
+#ifndef SPS_STANDALONE
+    if (jsIndex >= 0)
+      jsStackAddr = (uint8_t *) jsFrames[jsIndex].stackAddress;
+#endif
+
+    if (nativeIndex >= 0)
+      nativeStackAddr = (uint8_t *) aNativeStack.sp_array[nativeIndex];
+
+    // If there's a native stack entry which has the same SP as a
+    // pseudo stack entry, pretend we didn't see the native stack
+    // entry.  Ditto for a native stack entry which has the same SP as
+    // a JS stack entry.  In effect this means pseudo or JS entries
+    // trump conflicting native entries.
+    if (nativeStackAddr && (pseudoStackAddr == nativeStackAddr || jsStackAddr == nativeStackAddr)) {
+      nativeStackAddr = nullptr;
+      nativeIndex--;
+      MOZ_ASSERT(pseudoStackAddr || jsStackAddr);
+    }
+
+    // Sanity checks.
+    MOZ_ASSERT_IF(pseudoStackAddr, pseudoStackAddr != jsStackAddr &&
+                                   pseudoStackAddr != nativeStackAddr);
+    MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != pseudoStackAddr &&
+                               jsStackAddr != nativeStackAddr);
+    MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != pseudoStackAddr &&
+                                   nativeStackAddr != jsStackAddr);
+
+    // Check to see if pseudoStack frame is top-most.
+    if (pseudoStackAddr > jsStackAddr && pseudoStackAddr > nativeStackAddr) {
+      MOZ_ASSERT(pseudoIndex < pseudoCount);
+      volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex];
+      addPseudoEntry(pseudoFrame, aProfile, pseudoStack, nullptr);
+      pseudoIndex++;
+      continue;
+    }
+
+#ifndef SPS_STANDALONE
+    // Check to see if JS jit stack frame is top-most
+    if (jsStackAddr > nativeStackAddr) {
+      MOZ_ASSERT(jsIndex >= 0);
+      const JS::ProfilingFrameIterator::Frame& jsFrame = jsFrames[jsIndex];
+
+      // Stringifying non-wasm JIT frames is delayed until streaming
+      // time. To re-lookup the entry in the JitcodeGlobalTable, we need to
+      // store the JIT code address ('J') in the circular buffer.
+      //
+      // Note that we cannot do this when we are sychronously sampling the
+      // current thread; that is, when called from profiler_get_backtrace. The
+      // captured backtrace is usually externally stored for an indeterminate
+      // amount of time, such as in nsRefreshDriver. Problematically, the
+      // stored backtrace may be alive across a GC during which the profiler
+      // itself is disabled. In that case, the JS engine is free to discard
+      // its JIT code. This means that if we inserted such 'J' entries into
+      // the buffer, nsRefreshDriver would now be holding on to a backtrace
+      // with stale JIT code return addresses.
+      if (aSample->isSamplingCurrentThread ||
+          jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
+        addDynamicTag(aProfile, 'c', jsFrame.label.get());
+      } else {
+        MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
+                   jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
+        aProfile.addTag(ProfileEntry('J', jsFrames[jsIndex].returnAddress));
+      }
+
+      jsIndex--;
+      continue;
+    }
+#endif
+
+    // If we reach here, there must be a native stack entry and it must be the
+    // greatest entry.
+    if (nativeStackAddr) {
+      MOZ_ASSERT(nativeIndex >= 0);
+      aProfile
+        .addTag(ProfileEntry('l', (void*)aNativeStack.pc_array[nativeIndex]));
+    }
+    if (nativeIndex >= 0) {
+      nativeIndex--;
+    }
+  }
+
+#ifndef SPS_STANDALONE
+  // Update the JS context with the current profile sample buffer generation.
+  //
+  // Do not do this for synchronous sampling, which create their own
+  // ProfileBuffers.
+  if (!aSample->isSamplingCurrentThread && pseudoStack->mContext) {
+    MOZ_ASSERT(aProfile.bufferGeneration() >= startBufferGen);
+    uint32_t lapCount = aProfile.bufferGeneration() - startBufferGen;
+    JS::UpdateJSContextProfilerSampleBufferGen(pseudoStack->mContext,
+                                               aProfile.bufferGeneration(),
+                                               lapCount);
+  }
+#endif
+}
+
+#ifdef USE_NS_STACKWALK
+static
+void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
+                       void* aClosure)
+{
+  NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
+  MOZ_ASSERT(nativeStack->count < nativeStack->size);
+  nativeStack->sp_array[nativeStack->count] = aSP;
+  nativeStack->pc_array[nativeStack->count] = aPC;
+  nativeStack->count++;
+}
+
+void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample)
+{
+  void* pc_array[1000];
+  void* sp_array[1000];
+  NativeStack nativeStack = {
+    pc_array,
+    sp_array,
+    mozilla::ArrayLength(pc_array),
+    0
+  };
+
+  // Start with the current function. We use 0 as the frame number here because
+  // the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N.
+  // This is a bit weird but it doesn't matter because StackWalkCallback()
+  // doesn't use the frame number argument.
+  StackWalkCallback(/* frameNumber */ 0, aSample->pc, aSample->sp, &nativeStack);
+
+  uint32_t maxFrames = uint32_t(nativeStack.size - nativeStack.count);
+  // win X64 doesn't support disabling frame pointers emission so we need
+  // to fallback to using StackWalk64 which is slower.
+#if defined(XP_MACOSX) || (defined(XP_WIN) && !defined(V8_HOST_ARCH_X64))
+  void *stackEnd = aSample->threadProfile->GetStackTop();
+  bool rv = true;
+  if (aSample->fp >= aSample->sp && aSample->fp <= stackEnd)
+    rv = FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0,
+                               maxFrames, &nativeStack,
+                               reinterpret_cast<void**>(aSample->fp), stackEnd);
+#else
+  void *platformData = nullptr;
+
+  uintptr_t thread = GetThreadHandle(aSample->threadProfile->GetPlatformData());
+  MOZ_ASSERT(thread);
+  bool rv = MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
+                             &nativeStack, thread, platformData);
+#endif
+  if (rv)
+    mergeStacksIntoProfile(aProfile, aSample, nativeStack);
+}
+#endif
+
+
+#ifdef USE_EHABI_STACKWALK
+void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample)
+{
+  void *pc_array[1000];
+  void *sp_array[1000];
+  NativeStack nativeStack = {
+    pc_array,
+    sp_array,
+    mozilla::ArrayLength(pc_array),
+    0
+  };
+
+  const mcontext_t *mcontext = &reinterpret_cast<ucontext_t *>(aSample->context)->uc_mcontext;
+  mcontext_t savedContext;
+  PseudoStack *pseudoStack = aProfile.GetPseudoStack();
+
+  nativeStack.count = 0;
+  // The pseudostack contains an "EnterJIT" frame whenever we enter
+  // JIT code with profiling enabled; the stack pointer value points
+  // the saved registers.  We use this to unwind resume unwinding
+  // after encounting JIT code.
+  for (uint32_t i = pseudoStack->stackSize(); i > 0; --i) {
+    // The pseudostack grows towards higher indices, so we iterate
+    // backwards (from callee to caller).
+    volatile StackEntry &entry = pseudoStack->mStack[i - 1];
+    if (!entry.isJs() && strcmp(entry.label(), "EnterJIT") == 0) {
+      // Found JIT entry frame.  Unwind up to that point (i.e., force
+      // the stack walk to stop before the block of saved registers;
+      // note that it yields nondecreasing stack pointers), then restore
+      // the saved state.
+      uint32_t *vSP = reinterpret_cast<uint32_t*>(entry.stackAddress());
+
+      nativeStack.count += EHABIStackWalk(*mcontext,
+                                          /* stackBase = */ vSP,
+                                          sp_array + nativeStack.count,
+                                          pc_array + nativeStack.count,
+                                          nativeStack.size - nativeStack.count);
+
+      memset(&savedContext, 0, sizeof(savedContext));
+      // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp
+      savedContext.arm_r4 = *vSP++;
+      savedContext.arm_r5 = *vSP++;
+      savedContext.arm_r6 = *vSP++;
+      savedContext.arm_r7 = *vSP++;
+      savedContext.arm_r8 = *vSP++;
+      savedContext.arm_r9 = *vSP++;
+      savedContext.arm_r10 = *vSP++;
+      savedContext.arm_fp = *vSP++;
+      savedContext.arm_lr = *vSP++;
+      savedContext.arm_sp = reinterpret_cast<uint32_t>(vSP);
+      savedContext.arm_pc = savedContext.arm_lr;
+      mcontext = &savedContext;
+    }
+  }
+
+  // Now unwind whatever's left (starting from either the last EnterJIT
+  // frame or, if no EnterJIT was found, the original registers).
+  nativeStack.count += EHABIStackWalk(*mcontext,
+                                      aProfile.GetStackTop(),
+                                      sp_array + nativeStack.count,
+                                      pc_array + nativeStack.count,
+                                      nativeStack.size - nativeStack.count);
+
+  mergeStacksIntoProfile(aProfile, aSample, nativeStack);
+}
+#endif
+
+
+#ifdef USE_LUL_STACKWALK
+void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample)
+{
+  const mcontext_t* mc
+    = &reinterpret_cast<ucontext_t *>(aSample->context)->uc_mcontext;
+
+  lul::UnwindRegs startRegs;
+  memset(&startRegs, 0, sizeof(startRegs));
+
+# if defined(SPS_PLAT_amd64_linux)
+  startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
+  startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
+  startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
+# elif defined(SPS_PLAT_arm_android)
+  startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
+  startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
+  startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
+  startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
+  startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
+  startRegs.r7  = lul::TaggedUWord(mc->arm_r7);
+# elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android)
+  startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
+  startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
+  startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
+# else
+#   error "Unknown plat"
+# endif
+
+  /* Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not
+     going past the stack's registered top point.  Do some basic
+     sanity checks too.  This assumes that the TaggedUWord holding
+     the stack pointer value is valid, but it should be, since it
+     was constructed that way in the code just above. */
+
+  lul::StackImage stackImg;
+
+  {
+#   if defined(SPS_PLAT_amd64_linux)
+    uintptr_t rEDZONE_SIZE = 128;
+    uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+#   elif defined(SPS_PLAT_arm_android)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
+#   elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android)
+    uintptr_t rEDZONE_SIZE = 0;
+    uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
+#   else
+#     error "Unknown plat"
+#   endif
+    uintptr_t end   = reinterpret_cast<uintptr_t>(aProfile.GetStackTop());
+    uintptr_t ws    = sizeof(void*);
+    start &= ~(ws-1);
+    end   &= ~(ws-1);
+    uintptr_t nToCopy = 0;
+    if (start < end) {
+      nToCopy = end - start;
+      if (nToCopy > lul::N_STACK_BYTES)
+        nToCopy = lul::N_STACK_BYTES;
+    }
+    MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+    stackImg.mLen       = nToCopy;
+    stackImg.mStartAvma = start;
+    if (nToCopy > 0) {
+      memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
+      (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
+    }
+  }
+
+  // The maximum number of frames that LUL will produce.  Setting it
+  // too high gives a risk of it wasting a lot of time looping on
+  // corrupted stacks.
+  const int MAX_NATIVE_FRAMES = 256;
+
+  size_t scannedFramesAllowed = 0;
+
+  uintptr_t framePCs[MAX_NATIVE_FRAMES];
+  uintptr_t frameSPs[MAX_NATIVE_FRAMES];
+  size_t framesAvail = mozilla::ArrayLength(framePCs);
+  size_t framesUsed  = 0;
+  size_t scannedFramesAcquired = 0;
+  sLUL->Unwind( &framePCs[0], &frameSPs[0],
+                &framesUsed, &scannedFramesAcquired,
+                framesAvail, scannedFramesAllowed,
+                &startRegs, &stackImg );
+
+  NativeStack nativeStack = {
+    reinterpret_cast<void**>(framePCs),
+    reinterpret_cast<void**>(frameSPs),
+    mozilla::ArrayLength(framePCs),
+    0
+  };
+
+  nativeStack.count = framesUsed;
+
+  mergeStacksIntoProfile(aProfile, aSample, nativeStack);
+
+  // Update stats in the LUL stats object.  Unfortunately this requires
+  // three global memory operations.
+  sLUL->mStats.mContext += 1;
+  sLUL->mStats.mCFI     += framesUsed - 1 - scannedFramesAcquired;
+  sLUL->mStats.mScanned += scannedFramesAcquired;
+}
+#endif
+
+
+static
+void doSampleStackTrace(ThreadProfile &aProfile, TickSample *aSample, bool aAddLeafAddresses)
+{
+  NativeStack nativeStack = { nullptr, nullptr, 0, 0 };
+  mergeStacksIntoProfile(aProfile, aSample, nativeStack);
+
+#ifdef ENABLE_SPS_LEAF_DATA
+  if (aSample && aAddLeafAddresses) {
+    aProfile.addTag(ProfileEntry('l', (void*)aSample->pc));
+#ifdef ENABLE_ARM_LR_SAVING
+    aProfile.addTag(ProfileEntry('L', (void*)aSample->lr));
+#endif
+  }
+#endif
+}
+
+void GeckoSampler::Tick(TickSample* sample)
+{
+  // Don't allow for ticks to happen within other ticks.
+  InplaceTick(sample);
+}
+
+void GeckoSampler::InplaceTick(TickSample* sample)
+{
+  ThreadProfile& currThreadProfile = *sample->threadProfile;
+
+  currThreadProfile.addTag(ProfileEntry('T', currThreadProfile.ThreadId()));
+
+  if (sample) {
+    mozilla::TimeDuration delta = sample->timestamp - sStartTime;
+    currThreadProfile.addTag(ProfileEntry('t', delta.ToMilliseconds()));
+  }
+
+  PseudoStack* stack = currThreadProfile.GetPseudoStack();
+
+#if defined(USE_NS_STACKWALK) || defined(USE_EHABI_STACKWALK) || \
+    defined(USE_LUL_STACKWALK)
+  if (mUseStackWalk) {
+    doNativeBacktrace(currThreadProfile, sample);
+  } else {
+    doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses);
+  }
+#else
+  doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses);
+#endif
+
+  // Don't process the PeudoStack's markers if we're
+  // synchronously sampling the current thread.
+  if (!sample->isSamplingCurrentThread) {
+    ProfilerMarkerLinkedList* pendingMarkersList = stack->getPendingMarkers();
+    while (pendingMarkersList && pendingMarkersList->peek()) {
+      ProfilerMarker* marker = pendingMarkersList->popHead();
+      currThreadProfile.addStoredMarker(marker);
+      currThreadProfile.addTag(ProfileEntry('m', marker));
+    }
+  }
+
+#ifndef SPS_STANDALONE
+  if (sample && currThreadProfile.GetThreadResponsiveness()->HasData()) {
+    mozilla::TimeDuration delta = currThreadProfile.GetThreadResponsiveness()->GetUnresponsiveDuration(sample->timestamp);
+    currThreadProfile.addTag(ProfileEntry('r', delta.ToMilliseconds()));
+  }
+#endif
+
+  // rssMemory is equal to 0 when we are not recording.
+  if (sample && sample->rssMemory != 0) {
+    currThreadProfile.addTag(ProfileEntry('R', static_cast<double>(sample->rssMemory)));
+  }
+
+  // ussMemory is equal to 0 when we are not recording.
+  if (sample && sample->ussMemory != 0) {
+    currThreadProfile.addTag(ProfileEntry('U', static_cast<double>(sample->ussMemory)));
+  }
+
+#if defined(XP_WIN)
+  if (mProfilePower) {
+    mIntelPowerGadget->TakeSample();
+    currThreadProfile.addTag(ProfileEntry('p', static_cast<double>(mIntelPowerGadget->GetTotalPackagePowerInWatts())));
+  }
+#endif
+
+  if (sLastFrameNumber != sFrameNumber) {
+    currThreadProfile.addTag(ProfileEntry('f', sFrameNumber));
+    sLastFrameNumber = sFrameNumber;
+  }
+}
+
+namespace {
+
+SyncProfile* NewSyncProfile()
+{
+  PseudoStack* stack = tlsPseudoStack.get();
+  if (!stack) {
+    MOZ_ASSERT(stack);
+    return nullptr;
+  }
+  Thread::tid_t tid = Thread::GetCurrentId();
+
+  ThreadInfo* info = new ThreadInfo("SyncProfile", tid, false, stack, nullptr);
+  SyncProfile* profile = new SyncProfile(info, GET_BACKTRACE_DEFAULT_ENTRY);
+  return profile;
+}
+
+} // namespace
+
+SyncProfile* GeckoSampler::GetBacktrace()
+{
+  SyncProfile* profile = NewSyncProfile();
+
+  TickSample sample;
+  sample.threadProfile = profile;
+
+#if defined(HAVE_NATIVE_UNWIND) || defined(USE_LUL_STACKWALK)
+#if defined(XP_WIN) || defined(LINUX)
+  tickcontext_t context;
+  sample.PopulateContext(&context);
+#elif defined(XP_MACOSX)
+  sample.PopulateContext(nullptr);
+#endif
+#endif
+
+  sample.isSamplingCurrentThread = true;
+  sample.timestamp = mozilla::TimeStamp::Now();
+
+  profile->BeginUnwind();
+  Tick(&sample);
+  profile->EndUnwind();
+
+  return profile;
+}
+
+void
+GeckoSampler::GetBufferInfo(uint32_t *aCurrentPosition, uint32_t *aTotalSize, uint32_t *aGeneration)
+{
+  *aCurrentPosition = mBuffer->mWritePos;
+  *aTotalSize = mBuffer->mEntrySize;
+  *aGeneration = mBuffer->mGeneration;
+}
diff --git a/tools/profiler/core/GeckoSampler.h b/tools/profiler/core/GeckoSampler.h
new file mode 100644
index 000000000..da1fdfe43
--- /dev/null
+++ b/tools/profiler/core/GeckoSampler.h
@@ -0,0 +1,181 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef GeckoSampler_h
+#define GeckoSampler_h
+
+#include "platform.h"
+#include "ProfileEntry.h"
+#include "mozilla/Vector.h"
+#include "ThreadProfile.h"
+#include "ThreadInfo.h"
+#ifndef SPS_STANDALONE
+#include "IntelPowerGadget.h"
+#endif
+#ifdef MOZ_TASK_TRACER
+#include "GeckoTaskTracer.h"
+#endif
+
+#include <algorithm>
+
+namespace mozilla {
+class ProfileGatherer;
+} // namespace mozilla
+
+typedef mozilla::Vector<std::string> ThreadNameFilterList;
+typedef mozilla::Vector<std::string> FeatureList;
+
+static bool
+threadSelected(ThreadInfo* aInfo, const ThreadNameFilterList &aThreadNameFilters) {
+  if (aThreadNameFilters.empty()) {
+    return true;
+  }
+
+  std::string name = aInfo->Name();
+  std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+
+  for (uint32_t i = 0; i < aThreadNameFilters.length(); ++i) {
+    std::string filter = aThreadNameFilters[i];
+    std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
+
+    // Crude, non UTF-8 compatible, case insensitive substring search
+    if (name.find(filter) != std::string::npos) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+extern mozilla::TimeStamp sLastTracerEvent;
+extern int sFrameNumber;
+extern int sLastFrameNumber;
+
+class GeckoSampler: public Sampler {
+ public:
+  GeckoSampler(double aInterval, int aEntrySize,
+              const char** aFeatures, uint32_t aFeatureCount,
+              const char** aThreadNameFilters, uint32_t aFilterCount);
+  ~GeckoSampler();
+
+  void RegisterThread(ThreadInfo* aInfo) {
+    if (!aInfo->IsMainThread() && !mProfileThreads) {
+      return;
+    }
+
+    if (!threadSelected(aInfo, mThreadNameFilters)) {
+      return;
+    }
+
+    ThreadProfile* profile = new ThreadProfile(aInfo, mBuffer);
+    aInfo->SetProfile(profile);
+  }
+
+  // Called within a signal. This function must be reentrant
+  virtual void Tick(TickSample* sample) override;
+
+  // Immediately captures the calling thread's call stack and returns it.
+  virtual SyncProfile* GetBacktrace() override;
+
+  // Called within a signal. This function must be reentrant
+  virtual void RequestSave() override
+  {
+    mSaveRequested = true;
+#ifdef MOZ_TASK_TRACER
+    if (mTaskTracer) {
+      mozilla::tasktracer::StopLogging();
+    }
+#endif
+  }
+
+  virtual void HandleSaveRequest() override;
+  virtual void DeleteExpiredMarkers() override;
+
+  ThreadProfile* GetPrimaryThreadProfile()
+  {
+    if (!mPrimaryThreadProfile) {
+      ::MutexAutoLock lock(*sRegisteredThreadsMutex);
+
+      for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+        ThreadInfo* info = sRegisteredThreads->at(i);
+        if (info->IsMainThread() && !info->IsPendingDelete()) {
+          mPrimaryThreadProfile = info->Profile();
+          break;
+        }
+      }
+    }
+
+    return mPrimaryThreadProfile;
+  }
+
+  void ToStreamAsJSON(std::ostream& stream, double aSinceTime = 0);
+#ifndef SPS_STANDALONE
+  virtual JSObject *ToJSObject(JSContext *aCx, double aSinceTime = 0);
+  void GetGatherer(nsISupports** aRetVal);
+#endif
+  mozilla::UniquePtr<char[]> ToJSON(double aSinceTime = 0);
+  virtual void ToJSObjectAsync(double aSinceTime = 0, mozilla::dom::Promise* aPromise = 0);
+  void StreamMetaJSCustomObject(SpliceableJSONWriter& aWriter);
+  void StreamTaskTracer(SpliceableJSONWriter& aWriter);
+  void FlushOnJSShutdown(JSContext* aContext);
+  bool ProfileJS() const { return mProfileJS; }
+  bool ProfileJava() const { return mProfileJava; }
+  bool ProfileGPU() const { return mProfileGPU; }
+  bool ProfilePower() const { return mProfilePower; }
+  bool ProfileThreads() const override { return mProfileThreads; }
+  bool InPrivacyMode() const { return mPrivacyMode; }
+  bool AddMainThreadIO() const { return mAddMainThreadIO; }
+  bool ProfileMemory() const { return mProfileMemory; }
+  bool TaskTracer() const { return mTaskTracer; }
+  bool LayersDump() const { return mLayersDump; }
+  bool DisplayListDump() const { return mDisplayListDump; }
+  bool ProfileRestyle() const { return mProfileRestyle; }
+  const ThreadNameFilterList& ThreadNameFilters() { return mThreadNameFilters; }
+  const FeatureList& Features() { return mFeatures; }
+
+  void GetBufferInfo(uint32_t *aCurrentPosition, uint32_t *aTotalSize, uint32_t *aGeneration);
+
+protected:
+  // Called within a signal. This function must be reentrant
+  virtual void InplaceTick(TickSample* sample);
+
+  // Not implemented on platforms which do not support backtracing
+  void doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample);
+
+  void StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime);
+
+  // This represent the application's main thread (SAMPLER_INIT)
+  ThreadProfile* mPrimaryThreadProfile;
+  RefPtr<ProfileBuffer> mBuffer;
+  bool mSaveRequested;
+  bool mAddLeafAddresses;
+  bool mUseStackWalk;
+  bool mProfileJS;
+  bool mProfileGPU;
+  bool mProfileThreads;
+  bool mProfileJava;
+  bool mProfilePower;
+  bool mLayersDump;
+  bool mDisplayListDump;
+  bool mProfileRestyle;
+
+  // Keep the thread filter to check against new thread that
+  // are started while profiling
+  ThreadNameFilterList mThreadNameFilters;
+  FeatureList mFeatures;
+  bool mPrivacyMode;
+  bool mAddMainThreadIO;
+  bool mProfileMemory;
+  bool mTaskTracer;
+#if defined(XP_WIN)
+  IntelPowerGadget* mIntelPowerGadget;
+#endif
+
+private:
+  RefPtr<mozilla::ProfileGatherer> mGatherer;
+};
+
+#endif
+
diff --git a/tools/profiler/core/IntelPowerGadget.cpp b/tools/profiler/core/IntelPowerGadget.cpp
new file mode 100644
index 000000000..fe267b80f
--- /dev/null
+++ b/tools/profiler/core/IntelPowerGadget.cpp
@@ -0,0 +1,310 @@
+/*
+ * Copyright 2013, Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Author: Joe Olivas <joseph.k.olivas@intel.com>
+ */
+
+#include "nsDebug.h"
+#include "nsString.h"
+#include "IntelPowerGadget.h"
+#include "prenv.h"
+
+IntelPowerGadget::IntelPowerGadget() :
+    libpowergadget(nullptr),
+    Initialize(nullptr),
+    GetNumNodes(nullptr),
+    GetMsrName(nullptr),
+    GetMsrFunc(nullptr),
+    ReadMSR(nullptr),
+    WriteMSR(nullptr),
+    GetIAFrequency(nullptr),
+    GetTDP(nullptr),
+    GetMaxTemperature(nullptr),
+    GetThresholds(nullptr),
+    GetTemperature(nullptr),
+    ReadSample(nullptr),
+    GetSysTime(nullptr),
+    GetRDTSC(nullptr),
+    GetTimeInterval(nullptr),
+    GetBaseFrequency(nullptr),
+    GetPowerData(nullptr),
+    StartLog(nullptr),
+    StopLog(nullptr),
+    GetNumMsrs(nullptr),
+    packageMSR(-1),
+    cpuMSR(-1),
+    freqMSR(-1),
+    tempMSR(-1)
+{
+}
+
+bool
+IntelPowerGadget::Init()
+{
+    bool success = false;
+    const char *path = PR_GetEnv("IPG_Dir");
+    nsCString ipg_library;
+    if (path && *path) {
+        ipg_library.Append(path);
+        ipg_library.Append('/');
+        ipg_library.AppendLiteral(PG_LIBRARY_NAME);
+        libpowergadget = PR_LoadLibrary(ipg_library.get());
+    }
+
+    if(libpowergadget) {
+        Initialize = (IPGInitialize) PR_FindFunctionSymbol(libpowergadget, "IntelEnergyLibInitialize");
+        GetNumNodes = (IPGGetNumNodes) PR_FindFunctionSymbol(libpowergadget, "GetNumNodes");
+        GetMsrName = (IPGGetMsrName) PR_FindFunctionSymbol(libpowergadget, "GetMsrName");
+        GetMsrFunc = (IPGGetMsrFunc) PR_FindFunctionSymbol(libpowergadget, "GetMsrFunc");
+        ReadMSR = (IPGReadMSR) PR_FindFunctionSymbol(libpowergadget, "ReadMSR");
+        WriteMSR = (IPGWriteMSR) PR_FindFunctionSymbol(libpowergadget, "WriteMSR");
+        GetIAFrequency = (IPGGetIAFrequency) PR_FindFunctionSymbol(libpowergadget, "GetIAFrequency");
+        GetTDP = (IPGGetTDP) PR_FindFunctionSymbol(libpowergadget, "GetTDP");
+        GetMaxTemperature = (IPGGetMaxTemperature) PR_FindFunctionSymbol(libpowergadget, "GetMaxTemperature");
+        GetThresholds = (IPGGetThresholds) PR_FindFunctionSymbol(libpowergadget, "GetThresholds");
+        GetTemperature = (IPGGetTemperature) PR_FindFunctionSymbol(libpowergadget, "GetTemperature");
+        ReadSample = (IPGReadSample) PR_FindFunctionSymbol(libpowergadget, "ReadSample");
+        GetSysTime = (IPGGetSysTime) PR_FindFunctionSymbol(libpowergadget, "GetSysTime");
+        GetRDTSC = (IPGGetRDTSC) PR_FindFunctionSymbol(libpowergadget, "GetRDTSC");
+        GetTimeInterval = (IPGGetTimeInterval) PR_FindFunctionSymbol(libpowergadget, "GetTimeInterval");
+        GetBaseFrequency = (IPGGetBaseFrequency) PR_FindFunctionSymbol(libpowergadget, "GetBaseFrequency");
+        GetPowerData = (IPGGetPowerData) PR_FindFunctionSymbol(libpowergadget, "GetPowerData");
+        StartLog = (IPGStartLog) PR_FindFunctionSymbol(libpowergadget, "StartLog");
+        StopLog = (IPGStopLog) PR_FindFunctionSymbol(libpowergadget, "StopLog");
+        GetNumMsrs = (IPGGetNumMsrs) PR_FindFunctionSymbol(libpowergadget, "GetNumMsrs");
+    }
+
+    if(Initialize) {
+        Initialize();
+        int msrCount = GetNumberMsrs();
+        wchar_t name[1024] = {0};
+        for(int i = 0; i < msrCount; ++i) {
+            GetMsrName(i, name);
+            int func = 0;
+            GetMsrFunc(i, &func);
+            // MSR for frequency
+            if(wcscmp(name, L"CPU Frequency") == 0 && (func == 0)) {
+                this->freqMSR = i;
+            }
+            // MSR for Package
+            else if(wcscmp(name, L"Processor") == 0 && (func == 1)) {
+                this->packageMSR = i;
+            }
+            // MSR for CPU
+            else if(wcscmp(name, L"IA") == 0 && (func == 1)) {
+                this->cpuMSR = i;
+            }
+            // MSR for Temperature
+            else if(wcscmp(name, L"Package") == 0 && (func == 2)) {
+                this->tempMSR = i;
+            }
+        }
+        // Grab one sample at startup for a diff
+        TakeSample();
+        success = true;
+    }
+    return success;
+}
+
+IntelPowerGadget::~IntelPowerGadget()
+{
+    if(libpowergadget) {
+        NS_WARNING("Unloading PowerGadget library!\n");
+        PR_UnloadLibrary(libpowergadget);
+        libpowergadget = nullptr;
+        Initialize = nullptr;
+        GetNumNodes = nullptr;
+        GetMsrName = nullptr;
+        GetMsrFunc = nullptr;
+        ReadMSR = nullptr;
+        WriteMSR = nullptr;
+        GetIAFrequency = nullptr;
+        GetTDP = nullptr;
+        GetMaxTemperature = nullptr;
+        GetThresholds = nullptr;
+        GetTemperature = nullptr;
+        ReadSample = nullptr;
+        GetSysTime = nullptr;
+        GetRDTSC = nullptr;
+        GetTimeInterval = nullptr;
+        GetBaseFrequency = nullptr;
+        GetPowerData = nullptr;
+        StartLog = nullptr;
+        StopLog = nullptr;
+        GetNumMsrs = nullptr;
+    }
+}
+
+int
+IntelPowerGadget::GetNumberNodes()
+{
+    int nodes = 0;
+    if(GetNumNodes) {
+        int ok = GetNumNodes(&nodes);
+    }
+    return nodes;
+}
+
+int
+IntelPowerGadget::GetNumberMsrs()
+{
+    int msrs = 0;
+    if(GetNumMsrs) {
+        int ok = GetNumMsrs(&msrs);
+    }
+    return msrs;
+}
+
+int
+IntelPowerGadget::GetCPUFrequency(int node)
+{
+    int frequency = 0;
+    if(GetIAFrequency) {
+        int ok = GetIAFrequency(node, &frequency);
+    }
+    return frequency;
+}
+
+double
+IntelPowerGadget::GetTdp(int node)
+{
+    double tdp = 0.0;
+    if(GetTDP) {
+        int ok = GetTDP(node, &tdp);
+    }
+    return tdp;
+}
+
+int
+IntelPowerGadget::GetMaxTemp(int node)
+{
+    int maxTemperatureC = 0;
+    if(GetMaxTemperature) {
+        int ok = GetMaxTemperature(node, &maxTemperatureC);
+    }
+    return maxTemperatureC;
+}
+
+int
+IntelPowerGadget::GetTemp(int node)
+{
+    int temperatureC = 0;
+    if(GetTemperature) {
+        int ok = GetTemperature(node, &temperatureC);
+    }
+    return temperatureC;
+}
+
+int
+IntelPowerGadget::TakeSample()
+{
+    int ok = 0;
+    if(ReadSample) {
+        ok = ReadSample();
+    }
+    return ok;
+}
+
+uint64_t
+IntelPowerGadget::GetRdtsc()
+{
+    uint64_t rdtsc = 0;
+    if(GetRDTSC) {
+        int ok = GetRDTSC(&rdtsc);
+    }
+    return rdtsc;
+}
+
+double
+IntelPowerGadget::GetInterval()
+{
+    double interval = 0.0;
+    if(GetTimeInterval) {
+        int ok = GetTimeInterval(&interval);
+    }
+    return interval;
+}
+
+double
+IntelPowerGadget::GetCPUBaseFrequency(int node)
+{
+    double freq = 0.0;
+    if(GetBaseFrequency) {
+        int ok = GetBaseFrequency(node, &freq);
+    }
+    return freq;
+}
+
+double
+IntelPowerGadget::GetTotalPackagePowerInWatts()
+{
+    int nodes = GetNumberNodes();
+    double totalPower = 0.0;
+    for(int i = 0; i < nodes; ++i) {
+        totalPower += GetPackagePowerInWatts(i);
+    }
+    return totalPower;
+}
+
+double
+IntelPowerGadget::GetPackagePowerInWatts(int node)
+{
+    int numResult = 0;
+    double result[] = {0.0, 0.0, 0.0};
+    if(GetPowerData && packageMSR != -1) {
+        int ok = GetPowerData(node, packageMSR, result, &numResult);
+    }
+    return result[0];
+}
+
+double
+IntelPowerGadget::GetTotalCPUPowerInWatts()
+{
+    int nodes = GetNumberNodes();
+    double totalPower = 0.0;
+    for(int i = 0; i < nodes; ++i) {
+        totalPower += GetCPUPowerInWatts(i);
+    }
+    return totalPower;
+}
+
+double
+IntelPowerGadget::GetCPUPowerInWatts(int node)
+{
+    int numResult = 0;
+    double result[] = {0.0, 0.0, 0.0};
+    if(GetPowerData && cpuMSR != -1) {
+        int ok = GetPowerData(node, cpuMSR, result, &numResult);
+    }
+    return result[0];
+}
+
+double
+IntelPowerGadget::GetTotalGPUPowerInWatts()
+{
+    int nodes = GetNumberNodes();
+    double totalPower = 0.0;
+    for(int i = 0; i < nodes; ++i) {
+        totalPower += GetGPUPowerInWatts(i);
+    }
+    return totalPower;
+}
+
+double
+IntelPowerGadget::GetGPUPowerInWatts(int node)
+{
+    return 0.0;
+}
+
diff --git a/tools/profiler/core/IntelPowerGadget.h b/tools/profiler/core/IntelPowerGadget.h
new file mode 100644
index 000000000..4a24215b6
--- /dev/null
+++ b/tools/profiler/core/IntelPowerGadget.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2013, Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Author: Joe Olivas <joseph.k.olivas@intel.com>
+ */
+
+#ifndef profiler_IntelPowerGadget_h
+#define profiler_IntelPowerGadget_h
+
+#ifdef _MSC_VER
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+#include "prlink.h"
+
+typedef int (*IPGInitialize) ();
+typedef int (*IPGGetNumNodes) (int *nNodes);
+typedef int (*IPGGetNumMsrs) (int *nMsr);
+typedef int (*IPGGetMsrName) (int iMsr, wchar_t *szName);
+typedef int (*IPGGetMsrFunc) (int iMsr, int *pFuncID);
+typedef int (*IPGReadMSR) (int iNode, unsigned int address, uint64_t *value);
+typedef int (*IPGWriteMSR) (int iNode, unsigned int address, uint64_t value);
+typedef int (*IPGGetIAFrequency) (int iNode, int *freqInMHz);
+typedef int (*IPGGetTDP) (int iNode, double *TDP);
+typedef int (*IPGGetMaxTemperature) (int iNode, int *degreeC);
+typedef int (*IPGGetThresholds) (int iNode, int *degree1C, int *degree2C);
+typedef int (*IPGGetTemperature) (int iNode, int *degreeC);
+typedef int (*IPGReadSample) ();
+typedef int (*IPGGetSysTime) (void *pSysTime);
+typedef int (*IPGGetRDTSC) (uint64_t *pTSC);
+typedef int (*IPGGetTimeInterval) (double *pOffset);
+typedef int (*IPGGetBaseFrequency) (int iNode, double *pBaseFrequency);
+typedef int (*IPGGetPowerData) (int iNode, int iMSR, double *pResult, int *nResult);
+typedef int (*IPGStartLog) (wchar_t *szFileName);
+typedef int (*IPGStopLog) ();
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+#define PG_LIBRARY_NAME "EnergyLib64"
+#else
+#define PG_LIBRARY_NAME "EnergyLib32"
+#endif
+
+
+class IntelPowerGadget
+{
+public:
+
+    IntelPowerGadget();
+    ~IntelPowerGadget();
+
+    // Fails if initialization is incomplete
+    bool Init();
+
+    // Returns the number of packages on the system
+    int GetNumberNodes();
+
+    // Returns the number of MSRs being tracked
+    int GetNumberMsrs();
+
+    // Given a node, returns the temperature
+    int GetCPUFrequency(int);
+
+    // Returns the TDP of the given node
+    double GetTdp(int);
+
+    // Returns the maximum temperature for the given node
+    int GetMaxTemp(int);
+
+    // Returns the current temperature in degrees C
+    // of the given node
+    int GetTemp(int);
+
+    // Takes a sample of data. Must be called before
+    // any current data is retrieved.
+    int TakeSample();
+
+    // Gets the timestamp of the most recent sample
+    uint64_t GetRdtsc();
+
+    // returns number of seconds between the last
+    // two samples
+    double GetInterval();
+
+    // Returns the base frequency for the given node
+    double GetCPUBaseFrequency(int node);
+
+    // Returns the combined package power for all
+    // packages on the system for the last sample.
+    double GetTotalPackagePowerInWatts();
+    double GetPackagePowerInWatts(int node);
+
+    // Returns the combined CPU power for all
+    // packages on the system for the last sample.
+    // If the reading is not available, returns 0.0
+    double GetTotalCPUPowerInWatts();
+    double GetCPUPowerInWatts(int node);
+
+    // Returns the combined GPU power for all
+    // packages on the system for the last sample.
+    // If the reading is not available, returns 0.0
+    double GetTotalGPUPowerInWatts();
+    double GetGPUPowerInWatts(int node);
+
+private:
+
+    PRLibrary *libpowergadget;
+    IPGInitialize Initialize;
+    IPGGetNumNodes GetNumNodes;
+    IPGGetNumMsrs GetNumMsrs;
+    IPGGetMsrName GetMsrName;
+    IPGGetMsrFunc GetMsrFunc;
+    IPGReadMSR ReadMSR;
+    IPGWriteMSR WriteMSR;
+    IPGGetIAFrequency GetIAFrequency;
+    IPGGetTDP GetTDP;
+    IPGGetMaxTemperature GetMaxTemperature;
+    IPGGetThresholds GetThresholds;
+    IPGGetTemperature GetTemperature;
+    IPGReadSample ReadSample;
+    IPGGetSysTime GetSysTime;
+    IPGGetRDTSC GetRDTSC;
+    IPGGetTimeInterval GetTimeInterval;
+    IPGGetBaseFrequency GetBaseFrequency;
+    IPGGetPowerData GetPowerData;
+    IPGStartLog StartLog;
+    IPGStopLog StopLog;
+
+    int packageMSR;
+    int cpuMSR;
+    int freqMSR;
+    int tempMSR;
+};
+
+#endif // profiler_IntelPowerGadget_h
diff --git a/tools/profiler/core/PlatformMacros.h b/tools/profiler/core/PlatformMacros.h
new file mode 100644
index 000000000..9a544a42e
--- /dev/null
+++ b/tools/profiler/core/PlatformMacros.h
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef SPS_PLATFORM_MACROS_H
+#define SPS_PLATFORM_MACROS_H
+
+/* Define platform selection macros in a consistent way.  Don't add
+   anything else to this file, so it can remain freestanding.  The
+   primary factorisation is on (ARCH,OS) pairs ("PLATforms") but ARCH_
+   and OS_ macros are defined too, since they are sometimes
+   convenient. */
+
+#undef SPS_PLAT_arm_android
+#undef SPS_PLAT_amd64_linux
+#undef SPS_PLAT_x86_linux
+#undef SPS_PLAT_amd64_darwin
+#undef SPS_PLAT_x86_darwin
+#undef SPS_PLAT_x86_windows
+#undef SPS_PLAT_amd64_windows
+
+#undef SPS_ARCH_arm
+#undef SPS_ARCH_x86
+#undef SPS_ARCH_amd64
+
+#undef SPS_OS_android
+#undef SPS_OS_linux
+#undef SPS_OS_darwin
+#undef SPS_OS_windows
+
+#if defined(__linux__) && defined(__x86_64__)
+#  define SPS_PLAT_amd64_linux 1
+#  define SPS_ARCH_amd64 1
+#  define SPS_OS_linux 1
+
+#elif defined(__ANDROID__) && defined(__arm__)
+#  define SPS_PLAT_arm_android 1
+#  define SPS_ARCH_arm 1
+#  define SPS_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__i386__)
+#  define SPS_PLAT_x86_android 1
+#  define SPS_ARCH_x86 1
+#  define SPS_OS_android 1
+
+#elif defined(__linux__) && defined(__i386__)
+#  define SPS_PLAT_x86_linux 1
+#  define SPS_ARCH_x86 1
+#  define SPS_OS_linux 1
+
+#elif defined(__APPLE__) && defined(__x86_64__)
+#  define SPS_PLAT_amd64_darwin 1
+#  define SPS_ARCH_amd64 1
+#  define SPS_OS_darwin 1
+
+#elif defined(__APPLE__) && defined(__i386__)
+#  define SPS_PLAT_x86_darwin 1
+#  define SPS_ARCH_x86 1
+#  define SPS_OS_darwin 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && (defined(_M_IX86) || defined(__i386__))
+#  define SPS_PLAT_x86_windows 1
+#  define SPS_ARCH_x86 1
+#  define SPS_OS_windows 1
+
+#elif (defined(_MSC_VER) || defined(__MINGW32__)) && (defined(_M_X64) || defined(__x86_64__))
+#  define SPS_PLAT_amd64_windows 1
+#  define SPS_ARCH_amd64 1
+#  define SPS_OS_windows 1
+
+#else
+#  error "Unsupported platform"
+#endif
+
+#endif /* ndef SPS_PLATFORM_MACROS_H */
diff --git a/tools/profiler/core/ProfileBuffer.cpp b/tools/profiler/core/ProfileBuffer.cpp
new file mode 100644
index 000000000..a4b91d8fc
--- /dev/null
+++ b/tools/profiler/core/ProfileBuffer.cpp
@@ -0,0 +1,89 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfileBuffer.h"
+
+ProfileBuffer::ProfileBuffer(int aEntrySize)
+  : mEntries(MakeUnique<ProfileEntry[]>(aEntrySize))
+  , mWritePos(0)
+  , mReadPos(0)
+  , mEntrySize(aEntrySize)
+  , mGeneration(0)
+{
+}
+
+ProfileBuffer::~ProfileBuffer()
+{
+  while (mStoredMarkers.peek()) {
+    delete mStoredMarkers.popHead();
+  }
+}
+
+// Called from signal, call only reentrant functions
+void ProfileBuffer::addTag(const ProfileEntry& aTag)
+{
+  mEntries[mWritePos++] = aTag;
+  if (mWritePos == mEntrySize) {
+    // Wrapping around may result in things referenced in the buffer (e.g.,
+    // JIT code addresses and markers) being incorrectly collected.
+    MOZ_ASSERT(mGeneration != UINT32_MAX);
+    mGeneration++;
+    mWritePos = 0;
+  }
+  if (mWritePos == mReadPos) {
+    // Keep one slot open.
+    mEntries[mReadPos] = ProfileEntry();
+    mReadPos = (mReadPos + 1) % mEntrySize;
+  }
+}
+
+void ProfileBuffer::addStoredMarker(ProfilerMarker *aStoredMarker) {
+  aStoredMarker->SetGeneration(mGeneration);
+  mStoredMarkers.insert(aStoredMarker);
+}
+
+void ProfileBuffer::deleteExpiredStoredMarkers() {
+  // Delete markers of samples that have been overwritten due to circular
+  // buffer wraparound.
+  uint32_t generation = mGeneration;
+  while (mStoredMarkers.peek() &&
+         mStoredMarkers.peek()->HasExpired(generation)) {
+    delete mStoredMarkers.popHead();
+  }
+}
+
+void ProfileBuffer::reset() {
+  mGeneration += 2;
+  mReadPos = mWritePos = 0;
+}
+
+#define DYNAMIC_MAX_STRING 8192
+
+char* ProfileBuffer::processDynamicTag(int readPos,
+                                       int* tagsConsumed, char* tagBuff)
+{
+  int readAheadPos = (readPos + 1) % mEntrySize;
+  int tagBuffPos = 0;
+
+  // Read the string stored in mTagData until the null character is seen
+  bool seenNullByte = false;
+  while (readAheadPos != mWritePos && !seenNullByte) {
+    (*tagsConsumed)++;
+    ProfileEntry readAheadEntry = mEntries[readAheadPos];
+    for (size_t pos = 0; pos < sizeof(void*); pos++) {
+      tagBuff[tagBuffPos] = readAheadEntry.mTagChars[pos];
+      if (tagBuff[tagBuffPos] == '\0' || tagBuffPos == DYNAMIC_MAX_STRING-2) {
+        seenNullByte = true;
+        break;
+      }
+      tagBuffPos++;
+    }
+    if (!seenNullByte)
+      readAheadPos = (readAheadPos + 1) % mEntrySize;
+  }
+  return tagBuff;
+}
+
+
diff --git a/tools/profiler/core/ProfileBuffer.h b/tools/profiler/core/ProfileBuffer.h
new file mode 100644
index 000000000..7d90fe385
--- /dev/null
+++ b/tools/profiler/core/ProfileBuffer.h
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PROFILE_BUFFER_H
+#define MOZ_PROFILE_BUFFER_H
+
+#include "ProfileEntry.h"
+#include "platform.h"
+#include "ProfileJSONWriter.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/RefCounted.h"
+
+class ProfileBuffer : public mozilla::RefCounted<ProfileBuffer> {
+public:
+  MOZ_DECLARE_REFCOUNTED_VIRTUAL_TYPENAME(ProfileBuffer)
+
+  explicit ProfileBuffer(int aEntrySize);
+
+  virtual ~ProfileBuffer();
+
+  void addTag(const ProfileEntry& aTag);
+  void StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId, double aSinceTime,
+                           JSContext* cx, UniqueStacks& aUniqueStacks);
+  void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId, double aSinceTime,
+                           UniqueStacks& aUniqueStacks);
+  void DuplicateLastSample(int aThreadId);
+
+  void addStoredMarker(ProfilerMarker* aStoredMarker);
+
+  // The following two methods are not signal safe! They delete markers.
+  void deleteExpiredStoredMarkers();
+  void reset();
+
+protected:
+  char* processDynamicTag(int readPos, int* tagsConsumed, char* tagBuff);
+  int FindLastSampleOfThread(int aThreadId);
+
+public:
+  // Circular buffer 'Keep One Slot Open' implementation for simplicity
+  mozilla::UniquePtr<ProfileEntry[]> mEntries;
+
+  // Points to the next entry we will write to, which is also the one at which
+  // we need to stop reading.
+  int mWritePos;
+
+  // Points to the entry at which we can start reading.
+  int mReadPos;
+
+  // The number of entries in our buffer.
+  int mEntrySize;
+
+  // How many times mWritePos has wrapped around.
+  uint32_t mGeneration;
+
+  // Markers that marker entries in the buffer might refer to.
+  ProfilerMarkerLinkedList mStoredMarkers;
+};
+
+#endif
diff --git a/tools/profiler/core/ProfileEntry.cpp b/tools/profiler/core/ProfileEntry.cpp
new file mode 100644
index 000000000..22d53a6f3
--- /dev/null
+++ b/tools/profiler/core/ProfileEntry.cpp
@@ -0,0 +1,881 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <ostream>
+#include "platform.h"
+#include "mozilla/HashFunctions.h"
+
+#ifndef SPS_STANDALONE
+#include "nsThreadUtils.h"
+#include "nsXULAppAPI.h"
+
+// JS
+#include "jsapi.h"
+#include "jsfriendapi.h"
+#include "js/TrackedOptimizationInfo.h"
+#endif
+
+// Self
+#include "ProfileEntry.h"
+
+using mozilla::MakeUnique;
+using mozilla::UniquePtr;
+using mozilla::Maybe;
+using mozilla::Some;
+using mozilla::Nothing;
+using mozilla::JSONWriter;
+
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN ProfileEntry
+
+ProfileEntry::ProfileEntry()
+  : mTagData(nullptr)
+  , mTagName(0)
+{ }
+
+// aTagData must not need release (i.e. be a string from the text segment)
+ProfileEntry::ProfileEntry(char aTagName, const char *aTagData)
+  : mTagData(aTagData)
+  , mTagName(aTagName)
+{ }
+
+ProfileEntry::ProfileEntry(char aTagName, ProfilerMarker *aTagMarker)
+  : mTagMarker(aTagMarker)
+  , mTagName(aTagName)
+{ }
+
+ProfileEntry::ProfileEntry(char aTagName, void *aTagPtr)
+  : mTagPtr(aTagPtr)
+  , mTagName(aTagName)
+{ }
+
+ProfileEntry::ProfileEntry(char aTagName, double aTagDouble)
+  : mTagDouble(aTagDouble)
+  , mTagName(aTagName)
+{ }
+
+ProfileEntry::ProfileEntry(char aTagName, uintptr_t aTagOffset)
+  : mTagOffset(aTagOffset)
+  , mTagName(aTagName)
+{ }
+
+ProfileEntry::ProfileEntry(char aTagName, Address aTagAddress)
+  : mTagAddress(aTagAddress)
+  , mTagName(aTagName)
+{ }
+
+ProfileEntry::ProfileEntry(char aTagName, int aTagInt)
+  : mTagInt(aTagInt)
+  , mTagName(aTagName)
+{ }
+
+ProfileEntry::ProfileEntry(char aTagName, char aTagChar)
+  : mTagChar(aTagChar)
+  , mTagName(aTagName)
+{ }
+
+bool ProfileEntry::is_ent_hint(char hintChar) {
+  return mTagName == 'h' && mTagChar == hintChar;
+}
+
+bool ProfileEntry::is_ent_hint() {
+  return mTagName == 'h';
+}
+
+bool ProfileEntry::is_ent(char tagChar) {
+  return mTagName == tagChar;
+}
+
+void* ProfileEntry::get_tagPtr() {
+  // No consistency checking.  Oh well.
+  return mTagPtr;
+}
+
+// END ProfileEntry
+////////////////////////////////////////////////////////////////////////
+
+class JSONSchemaWriter
+{
+  JSONWriter& mWriter;
+  uint32_t mIndex;
+
+public:
+  explicit JSONSchemaWriter(JSONWriter& aWriter)
+   : mWriter(aWriter)
+   , mIndex(0)
+  {
+    aWriter.StartObjectProperty("schema");
+  }
+
+  void WriteField(const char* aName) {
+    mWriter.IntProperty(aName, mIndex++);
+  }
+
+  ~JSONSchemaWriter() {
+    mWriter.EndObject();
+  }
+};
+
+#ifndef SPS_STANDALONE
+class StreamOptimizationTypeInfoOp : public JS::ForEachTrackedOptimizationTypeInfoOp
+{
+  JSONWriter& mWriter;
+  UniqueJSONStrings& mUniqueStrings;
+  bool mStartedTypeList;
+
+public:
+  StreamOptimizationTypeInfoOp(JSONWriter& aWriter, UniqueJSONStrings& aUniqueStrings)
+    : mWriter(aWriter)
+    , mUniqueStrings(aUniqueStrings)
+    , mStartedTypeList(false)
+  { }
+
+  void readType(const char* keyedBy, const char* name,
+                const char* location, Maybe<unsigned> lineno) override {
+    if (!mStartedTypeList) {
+      mStartedTypeList = true;
+      mWriter.StartObjectElement();
+      mWriter.StartArrayProperty("typeset");
+    }
+
+    mWriter.StartObjectElement();
+    {
+      mUniqueStrings.WriteProperty(mWriter, "keyedBy", keyedBy);
+      if (name) {
+        mUniqueStrings.WriteProperty(mWriter, "name", name);
+      }
+      if (location) {
+        mUniqueStrings.WriteProperty(mWriter, "location", location);
+      }
+      if (lineno.isSome()) {
+        mWriter.IntProperty("line", *lineno);
+      }
+    }
+    mWriter.EndObject();
+  }
+
+  void operator()(JS::TrackedTypeSite site, const char* mirType) override {
+    if (mStartedTypeList) {
+      mWriter.EndArray();
+      mStartedTypeList = false;
+    } else {
+      mWriter.StartObjectElement();
+    }
+
+    {
+      mUniqueStrings.WriteProperty(mWriter, "site", JS::TrackedTypeSiteString(site));
+      mUniqueStrings.WriteProperty(mWriter, "mirType", mirType);
+    }
+    mWriter.EndObject();
+  }
+};
+
+// As mentioned in ProfileEntry.h, the JSON format contains many arrays whose
+// elements are laid out according to various schemas to help
+// de-duplication. This RAII class helps write these arrays by keeping track of
+// the last non-null element written and adding the appropriate number of null
+// elements when writing new non-null elements. It also automatically opens and
+// closes an array element on the given JSON writer.
+//
+// Example usage:
+//
+//     // Define the schema of elements in this type of array: [FOO, BAR, BAZ]
+//     enum Schema : uint32_t {
+//       FOO = 0,
+//       BAR = 1,
+//       BAZ = 2
+//     };
+//
+//     AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings);
+//     if (shouldWriteFoo) {
+//       writer.IntElement(FOO, getFoo());
+//     }
+//     ... etc ...
+class MOZ_RAII AutoArraySchemaWriter
+{
+  friend class AutoObjectWriter;
+
+  SpliceableJSONWriter& mJSONWriter;
+  UniqueJSONStrings*    mStrings;
+  uint32_t              mNextFreeIndex;
+
+public:
+  AutoArraySchemaWriter(SpliceableJSONWriter& aWriter, UniqueJSONStrings& aStrings)
+    : mJSONWriter(aWriter)
+    , mStrings(&aStrings)
+    , mNextFreeIndex(0)
+  {
+    mJSONWriter.StartArrayElement();
+  }
+
+  // If you don't have access to a UniqueStrings, you had better not try and
+  // write a string element down the line!
+  explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter)
+    : mJSONWriter(aWriter)
+    , mStrings(nullptr)
+    , mNextFreeIndex(0)
+  {
+    mJSONWriter.StartArrayElement();
+  }
+
+  ~AutoArraySchemaWriter() {
+    mJSONWriter.EndArray();
+  }
+
+  void FillUpTo(uint32_t aIndex) {
+    MOZ_ASSERT(aIndex >= mNextFreeIndex);
+    mJSONWriter.NullElements(aIndex - mNextFreeIndex);
+    mNextFreeIndex = aIndex + 1;
+  }
+
+  void IntElement(uint32_t aIndex, uint32_t aValue) {
+    FillUpTo(aIndex);
+    mJSONWriter.IntElement(aValue);
+  }
+
+  void DoubleElement(uint32_t aIndex, double aValue) {
+    FillUpTo(aIndex);
+    mJSONWriter.DoubleElement(aValue);
+  }
+
+  void StringElement(uint32_t aIndex, const char* aValue) {
+    MOZ_RELEASE_ASSERT(mStrings);
+    FillUpTo(aIndex);
+    mStrings->WriteElement(mJSONWriter, aValue);
+  }
+};
+
+class StreamOptimizationAttemptsOp : public JS::ForEachTrackedOptimizationAttemptOp
+{
+  SpliceableJSONWriter& mWriter;
+  UniqueJSONStrings& mUniqueStrings;
+
+public:
+  StreamOptimizationAttemptsOp(SpliceableJSONWriter& aWriter, UniqueJSONStrings& aUniqueStrings)
+    : mWriter(aWriter),
+      mUniqueStrings(aUniqueStrings)
+  { }
+
+  void operator()(JS::TrackedStrategy strategy, JS::TrackedOutcome outcome) override {
+    enum Schema : uint32_t {
+      STRATEGY = 0,
+      OUTCOME = 1
+    };
+
+    AutoArraySchemaWriter writer(mWriter, mUniqueStrings);
+    writer.StringElement(STRATEGY, JS::TrackedStrategyString(strategy));
+    writer.StringElement(OUTCOME, JS::TrackedOutcomeString(outcome));
+  }
+};
+
+class StreamJSFramesOp : public JS::ForEachProfiledFrameOp
+{
+  void* mReturnAddress;
+  UniqueStacks::Stack& mStack;
+  unsigned mDepth;
+
+public:
+  StreamJSFramesOp(void* aReturnAddr, UniqueStacks::Stack& aStack)
+   : mReturnAddress(aReturnAddr)
+   , mStack(aStack)
+   , mDepth(0)
+  { }
+
+  unsigned depth() const {
+    MOZ_ASSERT(mDepth > 0);
+    return mDepth;
+  }
+
+  void operator()(const JS::ForEachProfiledFrameOp::FrameHandle& aFrameHandle) override {
+    UniqueStacks::OnStackFrameKey frameKey(mReturnAddress, mDepth, aFrameHandle);
+    mStack.AppendFrame(frameKey);
+    mDepth++;
+  }
+};
+#endif
+
+uint32_t UniqueJSONStrings::GetOrAddIndex(const char* aStr)
+{
+  uint32_t index;
+  StringKey key(aStr);
+
+  auto it = mStringToIndexMap.find(key);
+
+  if (it != mStringToIndexMap.end()) {
+    return it->second;
+  }
+  index = mStringToIndexMap.size();
+  mStringToIndexMap[key] = index;
+  mStringTableWriter.StringElement(aStr);
+  return index;
+}
+
+bool UniqueStacks::FrameKey::operator==(const FrameKey& aOther) const
+{
+  return mLocation == aOther.mLocation &&
+         mLine == aOther.mLine &&
+         mCategory == aOther.mCategory &&
+         mJITAddress == aOther.mJITAddress &&
+         mJITDepth == aOther.mJITDepth;
+}
+
+bool UniqueStacks::StackKey::operator==(const StackKey& aOther) const
+{
+  MOZ_ASSERT_IF(mPrefix == aOther.mPrefix, mPrefixHash == aOther.mPrefixHash);
+  return mPrefix == aOther.mPrefix && mFrame == aOther.mFrame;
+}
+
+UniqueStacks::Stack::Stack(UniqueStacks& aUniqueStacks, const OnStackFrameKey& aRoot)
+ : mUniqueStacks(aUniqueStacks)
+ , mStack(aUniqueStacks.GetOrAddFrameIndex(aRoot))
+{
+}
+
+void UniqueStacks::Stack::AppendFrame(const OnStackFrameKey& aFrame)
+{
+  // Compute the prefix hash and index before mutating mStack.
+  uint32_t prefixHash = mStack.Hash();
+  uint32_t prefix = mUniqueStacks.GetOrAddStackIndex(mStack);
+  mStack.UpdateHash(prefixHash, prefix, mUniqueStacks.GetOrAddFrameIndex(aFrame));
+}
+
+uint32_t UniqueStacks::Stack::GetOrAddIndex() const
+{
+  return mUniqueStacks.GetOrAddStackIndex(mStack);
+}
+
+uint32_t UniqueStacks::FrameKey::Hash() const
+{
+  uint32_t hash = 0;
+  if (!mLocation.IsEmpty()) {
+#ifdef SPS_STANDALONE
+    hash = mozilla::HashString(mLocation.c_str());
+#else
+    hash = mozilla::HashString(mLocation.get());
+#endif
+  }
+  if (mLine.isSome()) {
+    hash = mozilla::AddToHash(hash, *mLine);
+  }
+  if (mCategory.isSome()) {
+    hash = mozilla::AddToHash(hash, *mCategory);
+  }
+  if (mJITAddress.isSome()) {
+    hash = mozilla::AddToHash(hash, *mJITAddress);
+    if (mJITDepth.isSome()) {
+      hash = mozilla::AddToHash(hash, *mJITDepth);
+    }
+  }
+  return hash;
+}
+
+uint32_t UniqueStacks::StackKey::Hash() const
+{
+  if (mPrefix.isNothing()) {
+    return mozilla::HashGeneric(mFrame);
+  }
+  return mozilla::AddToHash(*mPrefixHash, mFrame);
+}
+
+UniqueStacks::Stack UniqueStacks::BeginStack(const OnStackFrameKey& aRoot)
+{
+  return Stack(*this, aRoot);
+}
+
+UniqueStacks::UniqueStacks(JSContext* aContext)
+ : mContext(aContext)
+ , mFrameCount(0)
+{
+  mFrameTableWriter.StartBareList();
+  mStackTableWriter.StartBareList();
+}
+
+#ifdef SPS_STANDALONE
+uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack)
+{
+  uint32_t index;
+  auto it = mStackToIndexMap.find(aStack);
+
+  if (it != mStackToIndexMap.end()) {
+    return it->second;
+  }
+
+  index = mStackToIndexMap.size();
+  mStackToIndexMap[aStack] = index;
+  StreamStack(aStack);
+  return index;
+}
+#else
+uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack)
+{
+  uint32_t index;
+  if (mStackToIndexMap.Get(aStack, &index)) {
+    MOZ_ASSERT(index < mStackToIndexMap.Count());
+    return index;
+  }
+
+  index = mStackToIndexMap.Count();
+  mStackToIndexMap.Put(aStack, index);
+  StreamStack(aStack);
+  return index;
+}
+#endif
+
+#ifdef SPS_STANDALONE
+uint32_t UniqueStacks::GetOrAddFrameIndex(const OnStackFrameKey& aFrame)
+{
+  uint32_t index;
+  auto it = mFrameToIndexMap.find(aFrame);
+  if (it != mFrameToIndexMap.end()) {
+    MOZ_ASSERT(it->second < mFrameCount);
+    return it->second;
+  }
+
+  // A manual count is used instead of mFrameToIndexMap.Count() due to
+  // forwarding of canonical JIT frames above.
+  index = mFrameCount++;
+  mFrameToIndexMap[aFrame] = index;
+  StreamFrame(aFrame);
+  return index;
+}
+#else
+uint32_t UniqueStacks::GetOrAddFrameIndex(const OnStackFrameKey& aFrame)
+{
+  uint32_t index;
+  if (mFrameToIndexMap.Get(aFrame, &index)) {
+    MOZ_ASSERT(index < mFrameCount);
+    return index;
+  }
+
+  // If aFrame isn't canonical, forward it to the canonical frame's index.
+  if (aFrame.mJITFrameHandle) {
+    void* canonicalAddr = aFrame.mJITFrameHandle->canonicalAddress();
+    if (canonicalAddr != *aFrame.mJITAddress) {
+      OnStackFrameKey canonicalKey(canonicalAddr, *aFrame.mJITDepth, *aFrame.mJITFrameHandle);
+      uint32_t canonicalIndex = GetOrAddFrameIndex(canonicalKey);
+      mFrameToIndexMap.Put(aFrame, canonicalIndex);
+      return canonicalIndex;
+    }
+  }
+
+  // A manual count is used instead of mFrameToIndexMap.Count() due to
+  // forwarding of canonical JIT frames above.
+  index = mFrameCount++;
+  mFrameToIndexMap.Put(aFrame, index);
+  StreamFrame(aFrame);
+  return index;
+}
+#endif
+
+uint32_t UniqueStacks::LookupJITFrameDepth(void* aAddr)
+{
+  uint32_t depth;
+
+  auto it = mJITFrameDepthMap.find(aAddr);
+  if (it != mJITFrameDepthMap.end()) {
+    depth = it->second;
+    MOZ_ASSERT(depth > 0);
+    return depth;
+  }
+  return 0;
+}
+
+void UniqueStacks::AddJITFrameDepth(void* aAddr, unsigned depth)
+{
+  mJITFrameDepthMap[aAddr] = depth;
+}
+
+void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter)
+{
+  mFrameTableWriter.EndBareList();
+  aWriter.TakeAndSplice(mFrameTableWriter.WriteFunc());
+}
+
+void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter)
+{
+  mStackTableWriter.EndBareList();
+  aWriter.TakeAndSplice(mStackTableWriter.WriteFunc());
+}
+
+void UniqueStacks::StreamStack(const StackKey& aStack)
+{
+  enum Schema : uint32_t {
+    PREFIX = 0,
+    FRAME = 1
+  };
+
+  AutoArraySchemaWriter writer(mStackTableWriter, mUniqueStrings);
+  if (aStack.mPrefix.isSome()) {
+    writer.IntElement(PREFIX, *aStack.mPrefix);
+  }
+  writer.IntElement(FRAME, aStack.mFrame);
+}
+
+void UniqueStacks::StreamFrame(const OnStackFrameKey& aFrame)
+{
+  enum Schema : uint32_t {
+    LOCATION = 0,
+    IMPLEMENTATION = 1,
+    OPTIMIZATIONS = 2,
+    LINE = 3,
+    CATEGORY = 4
+  };
+
+  AutoArraySchemaWriter writer(mFrameTableWriter, mUniqueStrings);
+
+#ifndef SPS_STANDALONE
+  if (!aFrame.mJITFrameHandle) {
+#else
+  {
+#endif
+#ifdef SPS_STANDALONE
+    writer.StringElement(LOCATION, aFrame.mLocation.c_str());
+#else
+    writer.StringElement(LOCATION, aFrame.mLocation.get());
+#endif
+    if (aFrame.mLine.isSome()) {
+      writer.IntElement(LINE, *aFrame.mLine);
+    }
+    if (aFrame.mCategory.isSome()) {
+      writer.IntElement(CATEGORY, *aFrame.mCategory);
+    }
+  }
+#ifndef SPS_STANDALONE
+  else {
+    const JS::ForEachProfiledFrameOp::FrameHandle& jitFrame = *aFrame.mJITFrameHandle;
+
+    writer.StringElement(LOCATION, jitFrame.label());
+
+    JS::ProfilingFrameIterator::FrameKind frameKind = jitFrame.frameKind();
+    MOZ_ASSERT(frameKind == JS::ProfilingFrameIterator::Frame_Ion ||
+               frameKind == JS::ProfilingFrameIterator::Frame_Baseline);
+    writer.StringElement(IMPLEMENTATION,
+                         frameKind == JS::ProfilingFrameIterator::Frame_Ion
+                         ? "ion"
+                         : "baseline");
+
+    if (jitFrame.hasTrackedOptimizations()) {
+      writer.FillUpTo(OPTIMIZATIONS);
+      mFrameTableWriter.StartObjectElement();
+      {
+        mFrameTableWriter.StartArrayProperty("types");
+        {
+          StreamOptimizationTypeInfoOp typeInfoOp(mFrameTableWriter, mUniqueStrings);
+          jitFrame.forEachOptimizationTypeInfo(typeInfoOp);
+        }
+        mFrameTableWriter.EndArray();
+
+        JS::Rooted<JSScript*> script(mContext);
+        jsbytecode* pc;
+        mFrameTableWriter.StartObjectProperty("attempts");
+        {
+          {
+            JSONSchemaWriter schema(mFrameTableWriter);
+            schema.WriteField("strategy");
+            schema.WriteField("outcome");
+          }
+
+          mFrameTableWriter.StartArrayProperty("data");
+          {
+            StreamOptimizationAttemptsOp attemptOp(mFrameTableWriter, mUniqueStrings);
+            jitFrame.forEachOptimizationAttempt(attemptOp, script.address(), &pc);
+          }
+          mFrameTableWriter.EndArray();
+        }
+        mFrameTableWriter.EndObject();
+
+        if (JSAtom* name = js::GetPropertyNameFromPC(script, pc)) {
+          char buf[512];
+          JS_PutEscapedFlatString(buf, mozilla::ArrayLength(buf), js::AtomToFlatString(name), 0);
+          mUniqueStrings.WriteProperty(mFrameTableWriter, "propertyName", buf);
+        }
+
+        unsigned line, column;
+        line = JS_PCToLineNumber(script, pc, &column);
+        mFrameTableWriter.IntProperty("line", line);
+        mFrameTableWriter.IntProperty("column", column);
+      }
+      mFrameTableWriter.EndObject();
+    }
+  }
+#endif
+}
+
+struct ProfileSample
+{
+  uint32_t mStack;
+  Maybe<double> mTime;
+  Maybe<double> mResponsiveness;
+  Maybe<double> mRSS;
+  Maybe<double> mUSS;
+  Maybe<int> mFrameNumber;
+  Maybe<double> mPower;
+};
+
+static void WriteSample(SpliceableJSONWriter& aWriter, ProfileSample& aSample)
+{
+  enum Schema : uint32_t {
+    STACK = 0,
+    TIME = 1,
+    RESPONSIVENESS = 2,
+    RSS = 3,
+    USS = 4,
+    FRAME_NUMBER = 5,
+    POWER = 6
+  };
+
+  AutoArraySchemaWriter writer(aWriter);
+
+  writer.IntElement(STACK, aSample.mStack);
+
+  if (aSample.mTime.isSome()) {
+    writer.DoubleElement(TIME, *aSample.mTime);
+  }
+
+  if (aSample.mResponsiveness.isSome()) {
+    writer.DoubleElement(RESPONSIVENESS, *aSample.mResponsiveness);
+  }
+
+  if (aSample.mRSS.isSome()) {
+    writer.DoubleElement(RSS, *aSample.mRSS);
+  }
+
+  if (aSample.mUSS.isSome()) {
+    writer.DoubleElement(USS, *aSample.mUSS);
+  }
+
+  if (aSample.mFrameNumber.isSome()) {
+    writer.IntElement(FRAME_NUMBER, *aSample.mFrameNumber);
+  }
+
+  if (aSample.mPower.isSome()) {
+    writer.DoubleElement(POWER, *aSample.mPower);
+  }
+}
+
+void ProfileBuffer::StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
+                                        double aSinceTime, JSContext* aContext,
+                                        UniqueStacks& aUniqueStacks)
+{
+  Maybe<ProfileSample> sample;
+  int readPos = mReadPos;
+  int currentThreadID = -1;
+  Maybe<double> currentTime;
+  UniquePtr<char[]> tagBuff = MakeUnique<char[]>(DYNAMIC_MAX_STRING);
+
+  while (readPos != mWritePos) {
+    ProfileEntry entry = mEntries[readPos];
+    if (entry.mTagName == 'T') {
+      currentThreadID = entry.mTagInt;
+      currentTime.reset();
+      int readAheadPos = (readPos + 1) % mEntrySize;
+      if (readAheadPos != mWritePos) {
+        ProfileEntry readAheadEntry = mEntries[readAheadPos];
+        if (readAheadEntry.mTagName == 't') {
+          currentTime = Some(readAheadEntry.mTagDouble);
+        }
+      }
+    }
+    if (currentThreadID == aThreadId && (currentTime.isNothing() || *currentTime >= aSinceTime)) {
+      switch (entry.mTagName) {
+      case 'r':
+        if (sample.isSome()) {
+          sample->mResponsiveness = Some(entry.mTagDouble);
+        }
+        break;
+      case 'p':
+        if (sample.isSome()) {
+          sample->mPower = Some(entry.mTagDouble);
+        }
+        break;
+      case 'R':
+        if (sample.isSome()) {
+          sample->mRSS = Some(entry.mTagDouble);
+        }
+        break;
+      case 'U':
+        if (sample.isSome()) {
+          sample->mUSS = Some(entry.mTagDouble);
+         }
+        break;
+      case 'f':
+        if (sample.isSome()) {
+          sample->mFrameNumber = Some(entry.mTagInt);
+        }
+        break;
+      case 's':
+        {
+          // end the previous sample if there was one
+          if (sample.isSome()) {
+            WriteSample(aWriter, *sample);
+            sample.reset();
+          }
+          // begin the next sample
+          sample.emplace();
+          sample->mTime = currentTime;
+
+          // Seek forward through the entire sample, looking for frames
+          // this is an easier approach to reason about than adding more
+          // control variables and cases to the loop that goes through the buffer once
+
+          UniqueStacks::Stack stack =
+            aUniqueStacks.BeginStack(UniqueStacks::OnStackFrameKey("(root)"));
+
+          int framePos = (readPos + 1) % mEntrySize;
+          ProfileEntry frame = mEntries[framePos];
+          while (framePos != mWritePos && frame.mTagName != 's' && frame.mTagName != 'T') {
+            int incBy = 1;
+            frame = mEntries[framePos];
+
+            // Read ahead to the next tag, if it's a 'd' tag process it now
+            const char* tagStringData = frame.mTagData;
+            int readAheadPos = (framePos + 1) % mEntrySize;
+            // Make sure the string is always null terminated if it fills up
+            // DYNAMIC_MAX_STRING-2
+            tagBuff[DYNAMIC_MAX_STRING-1] = '\0';
+
+            if (readAheadPos != mWritePos && mEntries[readAheadPos].mTagName == 'd') {
+              tagStringData = processDynamicTag(framePos, &incBy, tagBuff.get());
+            }
+
+            // Write one frame. It can have either
+            // 1. only location - 'l' containing a memory address
+            // 2. location and line number - 'c' followed by 'd's,
+            // an optional 'n' and an optional 'y'
+            // 3. a JIT return address - 'j' containing native code address
+            if (frame.mTagName == 'l') {
+              // Bug 753041
+              // We need a double cast here to tell GCC that we don't want to sign
+              // extend 32-bit addresses starting with 0xFXXXXXX.
+              unsigned long long pc = (unsigned long long)(uintptr_t)frame.mTagPtr;
+              snprintf(tagBuff.get(), DYNAMIC_MAX_STRING, "%#llx", pc);
+              stack.AppendFrame(UniqueStacks::OnStackFrameKey(tagBuff.get()));
+            } else if (frame.mTagName == 'c') {
+              UniqueStacks::OnStackFrameKey frameKey(tagStringData);
+              readAheadPos = (framePos + incBy) % mEntrySize;
+              if (readAheadPos != mWritePos &&
+                  mEntries[readAheadPos].mTagName == 'n') {
+                frameKey.mLine = Some((unsigned) mEntries[readAheadPos].mTagInt);
+                incBy++;
+              }
+              readAheadPos = (framePos + incBy) % mEntrySize;
+              if (readAheadPos != mWritePos &&
+                  mEntries[readAheadPos].mTagName == 'y') {
+                frameKey.mCategory = Some((unsigned) mEntries[readAheadPos].mTagInt);
+                incBy++;
+              }
+              stack.AppendFrame(frameKey);
+#ifndef SPS_STANDALONE
+            } else if (frame.mTagName == 'J') {
+              // A JIT frame may expand to multiple frames due to inlining.
+              void* pc = frame.mTagPtr;
+              unsigned depth = aUniqueStacks.LookupJITFrameDepth(pc);
+              if (depth == 0) {
+                StreamJSFramesOp framesOp(pc, stack);
+                JS::ForEachProfiledFrame(aContext, pc, framesOp);
+                aUniqueStacks.AddJITFrameDepth(pc, framesOp.depth());
+              } else {
+                for (unsigned i = 0; i < depth; i++) {
+                  UniqueStacks::OnStackFrameKey inlineFrameKey(pc, i);
+                  stack.AppendFrame(inlineFrameKey);
+                }
+              }
+#endif
+            }
+            framePos = (framePos + incBy) % mEntrySize;
+          }
+
+          sample->mStack = stack.GetOrAddIndex();
+          break;
+        }
+      }
+    }
+    readPos = (readPos + 1) % mEntrySize;
+  }
+  if (sample.isSome()) {
+    WriteSample(aWriter, *sample);
+  }
+}
+
+void ProfileBuffer::StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId,
+                                        double aSinceTime, UniqueStacks& aUniqueStacks)
+{
+  int readPos = mReadPos;
+  int currentThreadID = -1;
+  while (readPos != mWritePos) {
+    ProfileEntry entry = mEntries[readPos];
+    if (entry.mTagName == 'T') {
+      currentThreadID = entry.mTagInt;
+    } else if (currentThreadID == aThreadId && entry.mTagName == 'm') {
+      const ProfilerMarker* marker = entry.getMarker();
+      if (marker->GetTime() >= aSinceTime) {
+        entry.getMarker()->StreamJSON(aWriter, aUniqueStacks);
+      }
+    }
+    readPos = (readPos + 1) % mEntrySize;
+  }
+}
+
+int ProfileBuffer::FindLastSampleOfThread(int aThreadId)
+{
+  // We search backwards from mWritePos-1 to mReadPos.
+  // Adding mEntrySize makes the result of the modulus positive.
+  for (int readPos  = (mWritePos + mEntrySize - 1) % mEntrySize;
+           readPos !=  (mReadPos + mEntrySize - 1) % mEntrySize;
+           readPos  =   (readPos + mEntrySize - 1) % mEntrySize) {
+    ProfileEntry entry = mEntries[readPos];
+    if (entry.mTagName == 'T' && entry.mTagInt == aThreadId) {
+      return readPos;
+    }
+  }
+
+  return -1;
+}
+
+void ProfileBuffer::DuplicateLastSample(int aThreadId)
+{
+  int lastSampleStartPos = FindLastSampleOfThread(aThreadId);
+  if (lastSampleStartPos == -1) {
+    return;
+  }
+
+  MOZ_ASSERT(mEntries[lastSampleStartPos].mTagName == 'T');
+
+  addTag(mEntries[lastSampleStartPos]);
+
+  // Go through the whole entry and duplicate it, until we find the next one.
+  for (int readPos = (lastSampleStartPos + 1) % mEntrySize;
+       readPos != mWritePos;
+       readPos = (readPos + 1) % mEntrySize) {
+    switch (mEntries[readPos].mTagName) {
+      case 'T':
+        // We're done.
+        return;
+      case 't':
+        // Copy with new time
+        addTag(ProfileEntry('t', (mozilla::TimeStamp::Now() - sStartTime).ToMilliseconds()));
+        break;
+      case 'm':
+        // Don't copy markers
+        break;
+      // Copy anything else we don't know about
+      // L, B, S, c, s, d, l, f, h, r, t, p
+      default:
+        addTag(mEntries[readPos]);
+        break;
+    }
+  }
+}
+
+// END ProfileBuffer
+////////////////////////////////////////////////////////////////////////
+
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN ThreadProfile
+
+// END ThreadProfile
+////////////////////////////////////////////////////////////////////////
diff --git a/tools/profiler/core/ProfileEntry.h b/tools/profiler/core/ProfileEntry.h
new file mode 100644
index 000000000..b82a2f271
--- /dev/null
+++ b/tools/profiler/core/ProfileEntry.h
@@ -0,0 +1,407 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PROFILE_ENTRY_H
+#define MOZ_PROFILE_ENTRY_H
+
+#include <ostream>
+#include "GeckoProfiler.h"
+#include "platform.h"
+#include "ProfileJSONWriter.h"
+#include "ProfilerBacktrace.h"
+#include "mozilla/RefPtr.h"
+#include <string>
+#include <map>
+#ifndef SPS_STANDALONE
+#include "js/ProfilingFrameIterator.h"
+#include "js/TrackedOptimizationInfo.h"
+#include "nsHashKeys.h"
+#include "nsDataHashtable.h"
+#endif
+#include "mozilla/Maybe.h"
+#include "mozilla/Vector.h"
+#ifndef SPS_STANDALONE
+#include "gtest/MozGtestFriend.h"
+#else
+#define FRIEND_TEST(a, b) // TODO Support standalone gtest
+#endif
+#include "mozilla/HashFunctions.h"
+#include "mozilla/UniquePtr.h"
+
+class ThreadProfile;
+
+// NB: Packing this structure has been shown to cause SIGBUS issues on ARM.
+#ifndef __arm__
+#pragma pack(push, 1)
+#endif
+
+class ProfileEntry
+{
+public:
+  ProfileEntry();
+
+  // aTagData must not need release (i.e. be a string from the text segment)
+  ProfileEntry(char aTagName, const char *aTagData);
+  ProfileEntry(char aTagName, void *aTagPtr);
+  ProfileEntry(char aTagName, ProfilerMarker *aTagMarker);
+  ProfileEntry(char aTagName, double aTagDouble);
+  ProfileEntry(char aTagName, uintptr_t aTagOffset);
+  ProfileEntry(char aTagName, Address aTagAddress);
+  ProfileEntry(char aTagName, int aTagLine);
+  ProfileEntry(char aTagName, char aTagChar);
+  bool is_ent_hint(char hintChar);
+  bool is_ent_hint();
+  bool is_ent(char tagName);
+  void* get_tagPtr();
+  const ProfilerMarker* getMarker() {
+    MOZ_ASSERT(mTagName == 'm');
+    return mTagMarker;
+  }
+
+  char getTagName() const { return mTagName; }
+
+private:
+  FRIEND_TEST(ThreadProfile, InsertOneTag);
+  FRIEND_TEST(ThreadProfile, InsertOneTagWithTinyBuffer);
+  FRIEND_TEST(ThreadProfile, InsertTagsNoWrap);
+  FRIEND_TEST(ThreadProfile, InsertTagsWrap);
+  FRIEND_TEST(ThreadProfile, MemoryMeasure);
+  friend class ProfileBuffer;
+  union {
+    const char* mTagData;
+    char        mTagChars[sizeof(void*)];
+    void*       mTagPtr;
+    ProfilerMarker* mTagMarker;
+    double      mTagDouble;
+    Address     mTagAddress;
+    uintptr_t   mTagOffset;
+    int         mTagInt;
+    char        mTagChar;
+  };
+  char mTagName;
+};
+
+#ifndef __arm__
+#pragma pack(pop)
+#endif
+
+class UniqueJSONStrings
+{
+public:
+  UniqueJSONStrings() {
+    mStringTableWriter.StartBareList();
+  }
+
+  void SpliceStringTableElements(SpliceableJSONWriter& aWriter) {
+    aWriter.TakeAndSplice(mStringTableWriter.WriteFunc());
+  }
+
+  void WriteProperty(mozilla::JSONWriter& aWriter, const char* aName, const char* aStr) {
+    aWriter.IntProperty(aName, GetOrAddIndex(aStr));
+  }
+
+  void WriteElement(mozilla::JSONWriter& aWriter, const char* aStr) {
+    aWriter.IntElement(GetOrAddIndex(aStr));
+  }
+
+  uint32_t GetOrAddIndex(const char* aStr);
+
+  struct StringKey {
+
+    explicit StringKey(const char* aStr)
+     : mStr(strdup(aStr))
+    {
+      mHash = mozilla::HashString(mStr);
+    }
+
+    StringKey(const StringKey& aOther)
+      : mStr(strdup(aOther.mStr))
+    {
+      mHash = aOther.mHash;
+    }
+
+    ~StringKey() {
+      free(mStr);
+    }
+
+    uint32_t Hash() const;
+    bool operator==(const StringKey& aOther) const {
+      return strcmp(mStr, aOther.mStr) == 0;
+    }
+    bool operator<(const StringKey& aOther) const {
+      return mHash < aOther.mHash;
+    }
+
+  private:
+    uint32_t mHash;
+    char* mStr;
+  };
+private:
+  SpliceableChunkedJSONWriter mStringTableWriter;
+  std::map<StringKey, uint32_t> mStringToIndexMap;
+};
+
+class UniqueStacks
+{
+public:
+  struct FrameKey {
+#ifdef SPS_STANDALONE
+    std::string mLocation;
+#else
+    // This cannot be a std::string, as it is not memmove compatible, which
+    // is used by nsHashTable
+    nsCString mLocation;
+#endif
+    mozilla::Maybe<unsigned> mLine;
+    mozilla::Maybe<unsigned> mCategory;
+    mozilla::Maybe<void*> mJITAddress;
+    mozilla::Maybe<uint32_t> mJITDepth;
+
+    explicit FrameKey(const char* aLocation)
+     : mLocation(aLocation)
+    {
+      mHash = Hash();
+    }
+
+    FrameKey(const FrameKey& aToCopy)
+     : mLocation(aToCopy.mLocation)
+     , mLine(aToCopy.mLine)
+     , mCategory(aToCopy.mCategory)
+     , mJITAddress(aToCopy.mJITAddress)
+     , mJITDepth(aToCopy.mJITDepth)
+    {
+      mHash = Hash();
+    }
+
+    FrameKey(void* aJITAddress, uint32_t aJITDepth)
+     : mJITAddress(mozilla::Some(aJITAddress))
+     , mJITDepth(mozilla::Some(aJITDepth))
+    {
+      mHash = Hash();
+    }
+
+    uint32_t Hash() const;
+    bool operator==(const FrameKey& aOther) const;
+    bool operator<(const FrameKey& aOther) const {
+      return mHash < aOther.mHash;
+    }
+
+  private:
+    uint32_t mHash;
+  };
+
+  // A FrameKey that holds a scoped reference to a JIT FrameHandle.
+  struct MOZ_STACK_CLASS OnStackFrameKey : public FrameKey {
+    explicit OnStackFrameKey(const char* aLocation)
+      : FrameKey(aLocation)
+#ifndef SPS_STANDALONE
+      , mJITFrameHandle(nullptr)
+#endif
+    { }
+
+    OnStackFrameKey(const OnStackFrameKey& aToCopy)
+      : FrameKey(aToCopy)
+#ifndef SPS_STANDALONE
+      , mJITFrameHandle(aToCopy.mJITFrameHandle)
+#endif
+    { }
+
+#ifndef SPS_STANDALONE
+    const JS::ForEachProfiledFrameOp::FrameHandle* mJITFrameHandle;
+
+    OnStackFrameKey(void* aJITAddress, unsigned aJITDepth)
+      : FrameKey(aJITAddress, aJITDepth)
+      , mJITFrameHandle(nullptr)
+    { }
+
+    OnStackFrameKey(void* aJITAddress, unsigned aJITDepth,
+                    const JS::ForEachProfiledFrameOp::FrameHandle& aJITFrameHandle)
+      : FrameKey(aJITAddress, aJITDepth)
+      , mJITFrameHandle(&aJITFrameHandle)
+    { }
+#endif
+  };
+
+  struct StackKey {
+    mozilla::Maybe<uint32_t> mPrefixHash;
+    mozilla::Maybe<uint32_t> mPrefix;
+    uint32_t mFrame;
+
+    explicit StackKey(uint32_t aFrame)
+     : mFrame(aFrame)
+    {
+      mHash = Hash();
+    }
+
+    uint32_t Hash() const;
+    bool operator==(const StackKey& aOther) const;
+    bool operator<(const StackKey& aOther) const {
+      return mHash < aOther.mHash;
+    }
+
+    void UpdateHash(uint32_t aPrefixHash, uint32_t aPrefix, uint32_t aFrame) {
+      mPrefixHash = mozilla::Some(aPrefixHash);
+      mPrefix = mozilla::Some(aPrefix);
+      mFrame = aFrame;
+      mHash = Hash();
+    }
+
+  private:
+    uint32_t mHash;
+  };
+
+  class Stack {
+  public:
+    Stack(UniqueStacks& aUniqueStacks, const OnStackFrameKey& aRoot);
+
+    void AppendFrame(const OnStackFrameKey& aFrame);
+    uint32_t GetOrAddIndex() const;
+
+  private:
+    UniqueStacks& mUniqueStacks;
+    StackKey mStack;
+  };
+
+  explicit UniqueStacks(JSContext* aContext);
+
+  Stack BeginStack(const OnStackFrameKey& aRoot);
+  uint32_t LookupJITFrameDepth(void* aAddr);
+  void AddJITFrameDepth(void* aAddr, unsigned depth);
+  void SpliceFrameTableElements(SpliceableJSONWriter& aWriter);
+  void SpliceStackTableElements(SpliceableJSONWriter& aWriter);
+
+private:
+  uint32_t GetOrAddFrameIndex(const OnStackFrameKey& aFrame);
+  uint32_t GetOrAddStackIndex(const StackKey& aStack);
+  void StreamFrame(const OnStackFrameKey& aFrame);
+  void StreamStack(const StackKey& aStack);
+
+public:
+  UniqueJSONStrings mUniqueStrings;
+
+private:
+  JSContext* mContext;
+
+  // To avoid incurring JitcodeGlobalTable lookup costs for every JIT frame,
+  // we cache the depth of frames keyed by JIT code address. If an address a
+  // maps to a depth d, then frames keyed by a for depths 0 to d are
+  // guaranteed to be in mFrameToIndexMap.
+  std::map<void*, uint32_t> mJITFrameDepthMap;
+
+  uint32_t mFrameCount;
+  SpliceableChunkedJSONWriter mFrameTableWriter;
+#ifdef SPS_STANDALNOE
+  std::map<FrameKey, uint32_t> mFrameToIndexMap;
+#else
+  nsDataHashtable<nsGenericHashKey<FrameKey>, uint32_t> mFrameToIndexMap;
+#endif
+
+  SpliceableChunkedJSONWriter mStackTableWriter;
+
+  // This sucks but this is really performance critical, nsDataHashtable is way faster
+  // than map/unordered_map but nsDataHashtable is tied to xpcom so we ifdef
+  // until we can find a better solution.
+#ifdef SPS_STANDALONE
+  std::map<StackKey, uint32_t> mStackToIndexMap;
+#else
+  nsDataHashtable<nsGenericHashKey<StackKey>, uint32_t> mStackToIndexMap;
+#endif
+};
+
+//
+// ThreadProfile JSON Format
+// -------------------------
+//
+// The profile contains much duplicate information. The output JSON of the
+// profile attempts to deduplicate strings, frames, and stack prefixes, to cut
+// down on size and to increase JSON streaming speed. Deduplicated values are
+// streamed as indices into their respective tables.
+//
+// Further, arrays of objects with the same set of properties (e.g., samples,
+// frames) are output as arrays according to a schema instead of an object
+// with property names. A property that is not present is represented in the
+// array as null or undefined.
+//
+// The format of the thread profile JSON is shown by the following example
+// with 1 sample and 1 marker:
+//
+// {
+//   "name": "Foo",
+//   "tid": 42,
+//   "samples":
+//   {
+//     "schema":
+//     {
+//       "stack": 0,           /* index into stackTable */
+//       "time": 1,            /* number */
+//       "responsiveness": 2,  /* number */
+//       "rss": 3,             /* number */
+//       "uss": 4,             /* number */
+//       "frameNumber": 5,     /* number */
+//       "power": 6            /* number */
+//     },
+//     "data":
+//     [
+//       [ 1, 0.0, 0.0 ]       /* { stack: 1, time: 0.0, responsiveness: 0.0 } */
+//     ]
+//   },
+//
+//   "markers":
+//   {
+//     "schema":
+//     {
+//       "name": 0,            /* index into stringTable */
+//       "time": 1,            /* number */
+//       "data": 2             /* arbitrary JSON */
+//     },
+//     "data":
+//     [
+//       [ 3, 0.1 ]            /* { name: 'example marker', time: 0.1 } */
+//     ]
+//   },
+//
+//   "stackTable":
+//   {
+//     "schema":
+//     {
+//       "prefix": 0,          /* index into stackTable */
+//       "frame": 1            /* index into frameTable */
+//     },
+//     "data":
+//     [
+//       [ null, 0 ],          /* (root) */
+//       [ 0,    1 ]           /* (root) > foo.js */
+//     ]
+//   },
+//
+//   "frameTable":
+//   {
+//     "schema":
+//     {
+//       "location": 0,        /* index into stringTable */
+//       "implementation": 1,  /* index into stringTable */
+//       "optimizations": 2,   /* arbitrary JSON */
+//       "line": 3,            /* number */
+//       "category": 4         /* number */
+//     },
+//     "data":
+//     [
+//       [ 0 ],                /* { location: '(root)' } */
+//       [ 1, 2 ]              /* { location: 'foo.js', implementation: 'baseline' } */
+//     ]
+//   },
+//
+//   "stringTable":
+//   [
+//     "(root)",
+//     "foo.js",
+//     "baseline",
+//     "example marker"
+//   ]
+// }
+//
+
+#endif /* ndef MOZ_PROFILE_ENTRY_H */
diff --git a/tools/profiler/core/ProfileJSONWriter.cpp b/tools/profiler/core/ProfileJSONWriter.cpp
new file mode 100644
index 000000000..65a9425a3
--- /dev/null
+++ b/tools/profiler/core/ProfileJSONWriter.cpp
@@ -0,0 +1,115 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/HashFunctions.h"
+
+#include "ProfileJSONWriter.h"
+
+void
+ChunkedJSONWriteFunc::Write(const char* aStr)
+{
+  MOZ_ASSERT(mChunkPtr >= mChunkList.back().get() && mChunkPtr <= mChunkEnd);
+  MOZ_ASSERT(mChunkEnd >= mChunkList.back().get() + mChunkLengths.back());
+  MOZ_ASSERT(*mChunkPtr == '\0');
+
+  size_t len = strlen(aStr);
+
+  // Most strings to be written are small, but subprocess profiles (e.g.,
+  // from the content process in e10s) may be huge. If the string is larger
+  // than a chunk, allocate its own chunk.
+  char* newPtr;
+  if (len >= kChunkSize) {
+    AllocChunk(len + 1);
+    newPtr = mChunkPtr + len;
+  } else {
+    newPtr = mChunkPtr + len;
+    if (newPtr >= mChunkEnd) {
+      AllocChunk(kChunkSize);
+      newPtr = mChunkPtr + len;
+    }
+  }
+
+  memcpy(mChunkPtr, aStr, len);
+  *newPtr = '\0';
+  mChunkPtr = newPtr;
+  mChunkLengths.back() += len;
+}
+
+mozilla::UniquePtr<char[]>
+ChunkedJSONWriteFunc::CopyData() const
+{
+  MOZ_ASSERT(mChunkLengths.length() == mChunkList.length());
+  size_t totalLen = 1;
+  for (size_t i = 0; i < mChunkLengths.length(); i++) {
+    MOZ_ASSERT(strlen(mChunkList[i].get()) == mChunkLengths[i]);
+    totalLen += mChunkLengths[i];
+  }
+  mozilla::UniquePtr<char[]> c = mozilla::MakeUnique<char[]>(totalLen);
+  char* ptr = c.get();
+  for (size_t i = 0; i < mChunkList.length(); i++) {
+    size_t len = mChunkLengths[i];
+    memcpy(ptr, mChunkList[i].get(), len);
+    ptr += len;
+  }
+  *ptr = '\0';
+  return c;
+}
+
+void
+ChunkedJSONWriteFunc::Take(ChunkedJSONWriteFunc&& aOther)
+{
+  for (size_t i = 0; i < aOther.mChunkList.length(); i++) {
+    MOZ_ALWAYS_TRUE(mChunkLengths.append(aOther.mChunkLengths[i]));
+    MOZ_ALWAYS_TRUE(mChunkList.append(mozilla::Move(aOther.mChunkList[i])));
+  }
+  mChunkPtr = mChunkList.back().get() + mChunkLengths.back();
+  mChunkEnd = mChunkPtr;
+  aOther.mChunkPtr = nullptr;
+  aOther.mChunkEnd = nullptr;
+  aOther.mChunkList.clear();
+  aOther.mChunkLengths.clear();
+}
+
+void
+ChunkedJSONWriteFunc::AllocChunk(size_t aChunkSize)
+{
+  MOZ_ASSERT(mChunkLengths.length() == mChunkList.length());
+  mozilla::UniquePtr<char[]> newChunk = mozilla::MakeUnique<char[]>(aChunkSize);
+  mChunkPtr = newChunk.get();
+  mChunkEnd = mChunkPtr + aChunkSize;
+  *mChunkPtr = '\0';
+  MOZ_ALWAYS_TRUE(mChunkLengths.append(0));
+  MOZ_ALWAYS_TRUE(mChunkList.append(mozilla::Move(newChunk)));
+}
+
+void
+SpliceableJSONWriter::TakeAndSplice(ChunkedJSONWriteFunc* aFunc)
+{
+  Separator();
+  for (size_t i = 0; i < aFunc->mChunkList.length(); i++) {
+    WriteFunc()->Write(aFunc->mChunkList[i].get());
+  }
+  aFunc->mChunkPtr = nullptr;
+  aFunc->mChunkEnd = nullptr;
+  aFunc->mChunkList.clear();
+  aFunc->mChunkLengths.clear();
+  mNeedComma[mDepth] = true;
+}
+
+void
+SpliceableJSONWriter::Splice(const char* aStr)
+{
+  Separator();
+  WriteFunc()->Write(aStr);
+  mNeedComma[mDepth] = true;
+}
+
+void
+SpliceableChunkedJSONWriter::TakeAndSplice(ChunkedJSONWriteFunc* aFunc)
+{
+  Separator();
+  WriteFunc()->Take(mozilla::Move(*aFunc));
+  mNeedComma[mDepth] = true;
+}
diff --git a/tools/profiler/core/ProfileJSONWriter.h b/tools/profiler/core/ProfileJSONWriter.h
new file mode 100644
index 000000000..d9e2115f9
--- /dev/null
+++ b/tools/profiler/core/ProfileJSONWriter.h
@@ -0,0 +1,126 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PROFILEJSONWRITER_H
+#define PROFILEJSONWRITER_H
+
+#include <ostream>
+#include <string>
+#include <string.h>
+
+#include "mozilla/JSONWriter.h"
+#include "mozilla/UniquePtr.h"
+
+class SpliceableChunkedJSONWriter;
+
+// On average, profile JSONs are large enough such that we want to avoid
+// reallocating its buffer when expanding. Additionally, the contents of the
+// profile are not accessed until the profile is entirely written. For these
+// reasons we use a chunked writer that keeps an array of chunks, which is
+// concatenated together after writing is finished.
+class ChunkedJSONWriteFunc : public mozilla::JSONWriteFunc
+{
+public:
+  friend class SpliceableJSONWriter;
+
+  ChunkedJSONWriteFunc() {
+    AllocChunk(kChunkSize);
+  }
+
+  bool IsEmpty() const {
+    MOZ_ASSERT_IF(!mChunkPtr, !mChunkEnd &&
+                              mChunkList.length() == 0 &&
+                              mChunkLengths.length() == 0);
+    return !mChunkPtr;
+  }
+
+  void Write(const char* aStr) override;
+  mozilla::UniquePtr<char[]> CopyData() const;
+  void Take(ChunkedJSONWriteFunc&& aOther);
+
+private:
+  void AllocChunk(size_t aChunkSize);
+
+  static const size_t kChunkSize = 4096 * 512;
+
+  // Pointer for writing inside the current chunk.
+  //
+  // The current chunk is always at the back of mChunkList, i.e.,
+  // mChunkList.back() <= mChunkPtr <= mChunkEnd.
+  char* mChunkPtr;
+
+  // Pointer to the end of the current chunk.
+  //
+  // The current chunk is always at the back of mChunkList, i.e.,
+  // mChunkEnd >= mChunkList.back() + mChunkLengths.back().
+  char* mChunkEnd;
+
+  // List of chunks and their lengths.
+  //
+  // For all i, the length of the string in mChunkList[i] is
+  // mChunkLengths[i].
+  mozilla::Vector<mozilla::UniquePtr<char[]>> mChunkList;
+  mozilla::Vector<size_t> mChunkLengths;
+};
+
+struct OStreamJSONWriteFunc : public mozilla::JSONWriteFunc
+{
+  explicit OStreamJSONWriteFunc(std::ostream& aStream)
+    : mStream(aStream)
+  { }
+
+  void Write(const char* aStr) override {
+    mStream << aStr;
+  }
+
+  std::ostream& mStream;
+};
+
+class SpliceableJSONWriter : public mozilla::JSONWriter
+{
+public:
+  explicit SpliceableJSONWriter(mozilla::UniquePtr<mozilla::JSONWriteFunc> aWriter)
+    : JSONWriter(mozilla::Move(aWriter))
+  { }
+
+  void StartBareList(CollectionStyle aStyle = SingleLineStyle) {
+    StartCollection(nullptr, "", aStyle);
+  }
+
+  void EndBareList() {
+    EndCollection("");
+  }
+
+  void NullElements(uint32_t aCount) {
+    for (uint32_t i = 0; i < aCount; i++) {
+      NullElement();
+    }
+  }
+
+  void Splice(const ChunkedJSONWriteFunc* aFunc);
+  void Splice(const char* aStr);
+
+  // Takes the chunks from aFunc and write them. If move is not possible
+  // (e.g., using OStreamJSONWriteFunc), aFunc's chunks are copied and its
+  // storage cleared.
+  virtual void TakeAndSplice(ChunkedJSONWriteFunc* aFunc);
+};
+
+class SpliceableChunkedJSONWriter : public SpliceableJSONWriter
+{
+public:
+  explicit SpliceableChunkedJSONWriter()
+    : SpliceableJSONWriter(mozilla::MakeUnique<ChunkedJSONWriteFunc>())
+  { }
+
+  ChunkedJSONWriteFunc* WriteFunc() const {
+    return static_cast<ChunkedJSONWriteFunc*>(JSONWriter::WriteFunc());
+  }
+
+  // Adopts the chunks from aFunc without copying.
+  virtual void TakeAndSplice(ChunkedJSONWriteFunc* aFunc) override;
+};
+
+#endif // PROFILEJSONWRITER_H
diff --git a/tools/profiler/core/ProfilerBacktrace.cpp b/tools/profiler/core/ProfilerBacktrace.cpp
new file mode 100644
index 000000000..7302dd64c
--- /dev/null
+++ b/tools/profiler/core/ProfilerBacktrace.cpp
@@ -0,0 +1,33 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ProfilerBacktrace.h"
+
+#include "ProfileJSONWriter.h"
+#include "SyncProfile.h"
+
+ProfilerBacktrace::ProfilerBacktrace(SyncProfile* aProfile)
+  : mProfile(aProfile)
+{
+  MOZ_COUNT_CTOR(ProfilerBacktrace);
+  MOZ_ASSERT(aProfile);
+}
+
+ProfilerBacktrace::~ProfilerBacktrace()
+{
+  MOZ_COUNT_DTOR(ProfilerBacktrace);
+  if (mProfile->ShouldDestroy()) {
+    delete mProfile;
+  }
+}
+
+void
+ProfilerBacktrace::StreamJSON(SpliceableJSONWriter& aWriter,
+                              UniqueStacks& aUniqueStacks)
+{
+  ::MutexAutoLock lock(mProfile->GetMutex());
+  mProfile->StreamJSON(aWriter, aUniqueStacks);
+}
diff --git a/tools/profiler/core/ProfilerMarkers.cpp b/tools/profiler/core/ProfilerMarkers.cpp
new file mode 100644
index 000000000..3cb47de48
--- /dev/null
+++ b/tools/profiler/core/ProfilerMarkers.cpp
@@ -0,0 +1,210 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "GeckoProfiler.h"
+#include "ProfilerBacktrace.h"
+#include "ProfilerMarkers.h"
+#include "SyncProfile.h"
+#ifndef SPS_STANDALONE
+#include "gfxASurface.h"
+#include "Layers.h"
+#include "mozilla/Sprintf.h"
+#endif
+
+ProfilerMarkerPayload::ProfilerMarkerPayload(ProfilerBacktrace* aStack)
+  : mStack(aStack)
+{}
+
+ProfilerMarkerPayload::ProfilerMarkerPayload(const mozilla::TimeStamp& aStartTime,
+                                             const mozilla::TimeStamp& aEndTime,
+                                             ProfilerBacktrace* aStack)
+  : mStartTime(aStartTime)
+  , mEndTime(aEndTime)
+  , mStack(aStack)
+{}
+
+ProfilerMarkerPayload::~ProfilerMarkerPayload()
+{
+  profiler_free_backtrace(mStack);
+}
+
+void
+ProfilerMarkerPayload::streamCommonProps(const char* aMarkerType,
+                                         SpliceableJSONWriter& aWriter,
+                                         UniqueStacks& aUniqueStacks)
+{
+  MOZ_ASSERT(aMarkerType);
+  aWriter.StringProperty("type", aMarkerType);
+  if (!mStartTime.IsNull()) {
+    aWriter.DoubleProperty("startTime", profiler_time(mStartTime));
+  }
+  if (!mEndTime.IsNull()) {
+    aWriter.DoubleProperty("endTime", profiler_time(mEndTime));
+  }
+  if (mStack) {
+    aWriter.StartObjectProperty("stack");
+    {
+      mStack->StreamJSON(aWriter, aUniqueStacks);
+    }
+    aWriter.EndObject();
+  }
+}
+
+ProfilerMarkerTracing::ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData)
+  : mCategory(aCategory)
+  , mMetaData(aMetaData)
+{
+  if (aMetaData == TRACING_EVENT_BACKTRACE) {
+    SetStack(profiler_get_backtrace());
+  }
+}
+
+ProfilerMarkerTracing::ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData,
+                                             ProfilerBacktrace* aCause)
+  : mCategory(aCategory)
+  , mMetaData(aMetaData)
+{
+  if (aCause) {
+    SetStack(aCause);
+  }
+}
+
+void
+ProfilerMarkerTracing::StreamPayload(SpliceableJSONWriter& aWriter,
+                                     UniqueStacks& aUniqueStacks)
+{
+  streamCommonProps("tracing", aWriter, aUniqueStacks);
+
+  if (GetCategory()) {
+    aWriter.StringProperty("category", GetCategory());
+  }
+  if (GetMetaData() != TRACING_DEFAULT) {
+    if (GetMetaData() == TRACING_INTERVAL_START) {
+      aWriter.StringProperty("interval", "start");
+    } else if (GetMetaData() == TRACING_INTERVAL_END) {
+      aWriter.StringProperty("interval", "end");
+    }
+  }
+}
+
+#ifndef SPS_STANDALONE
+GPUMarkerPayload::GPUMarkerPayload(
+  const mozilla::TimeStamp& aCpuTimeStart,
+  const mozilla::TimeStamp& aCpuTimeEnd,
+  uint64_t aGpuTimeStart,
+  uint64_t aGpuTimeEnd)
+
+  : ProfilerMarkerPayload(aCpuTimeStart, aCpuTimeEnd)
+  , mCpuTimeStart(aCpuTimeStart)
+  , mCpuTimeEnd(aCpuTimeEnd)
+  , mGpuTimeStart(aGpuTimeStart)
+  , mGpuTimeEnd(aGpuTimeEnd)
+{ }
+
+void
+GPUMarkerPayload::StreamPayload(SpliceableJSONWriter& aWriter,
+                                UniqueStacks& aUniqueStacks)
+{
+  streamCommonProps("gpu_timer_query", aWriter, aUniqueStacks);
+
+  aWriter.DoubleProperty("cpustart", profiler_time(mCpuTimeStart));
+  aWriter.DoubleProperty("cpuend", profiler_time(mCpuTimeEnd));
+  aWriter.IntProperty("gpustart", (int)mGpuTimeStart);
+  aWriter.IntProperty("gpuend", (int)mGpuTimeEnd);
+}
+
+ProfilerMarkerImagePayload::ProfilerMarkerImagePayload(gfxASurface *aImg)
+  : mImg(aImg)
+{ }
+
+void
+ProfilerMarkerImagePayload::StreamPayload(SpliceableJSONWriter& aWriter,
+                                          UniqueStacks& aUniqueStacks)
+{
+  streamCommonProps("innerHTML", aWriter, aUniqueStacks);
+  // TODO: Finish me
+  //aWriter.NameValue("innerHTML", "<img src=''/>");
+}
+
+IOMarkerPayload::IOMarkerPayload(const char* aSource,
+                                 const char* aFilename,
+                                 const mozilla::TimeStamp& aStartTime,
+                                 const mozilla::TimeStamp& aEndTime,
+                                 ProfilerBacktrace* aStack)
+  : ProfilerMarkerPayload(aStartTime, aEndTime, aStack),
+    mSource(aSource)
+{
+  mFilename = aFilename ? strdup(aFilename) : nullptr;
+  MOZ_ASSERT(aSource);
+}
+
+IOMarkerPayload::~IOMarkerPayload(){
+  free(mFilename);
+}
+
+void
+IOMarkerPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks)
+{
+  streamCommonProps("io", aWriter, aUniqueStacks);
+  aWriter.StringProperty("source", mSource);
+  if (mFilename != nullptr) {
+    aWriter.StringProperty("filename", mFilename);
+  }
+}
+
+void
+ProfilerJSEventMarker(const char *event)
+{
+    PROFILER_MARKER(event);
+}
+
+LayerTranslationPayload::LayerTranslationPayload(mozilla::layers::Layer* aLayer,
+                                                 mozilla::gfx::Point aPoint)
+  : ProfilerMarkerPayload(mozilla::TimeStamp::Now(), mozilla::TimeStamp::Now(), nullptr)
+  , mLayer(aLayer)
+  , mPoint(aPoint)
+{
+}
+
+void
+LayerTranslationPayload::StreamPayload(SpliceableJSONWriter& aWriter,
+                                       UniqueStacks& aUniqueStacks)
+{
+  const size_t bufferSize = 32;
+  char buffer[bufferSize];
+  SprintfLiteral(buffer, "%p", mLayer);
+
+  aWriter.StringProperty("layer", buffer);
+  aWriter.IntProperty("x", mPoint.x);
+  aWriter.IntProperty("y", mPoint.y);
+  aWriter.StringProperty("category", "LayerTranslation");
+}
+
+TouchDataPayload::TouchDataPayload(const mozilla::ScreenIntPoint& aPoint)
+  : ProfilerMarkerPayload(mozilla::TimeStamp::Now(), mozilla::TimeStamp::Now(), nullptr)
+{
+  mPoint = aPoint;
+}
+
+void
+TouchDataPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks)
+{
+  aWriter.IntProperty("x", mPoint.x);
+  aWriter.IntProperty("y", mPoint.y);
+}
+
+VsyncPayload::VsyncPayload(mozilla::TimeStamp aVsyncTimestamp)
+  : ProfilerMarkerPayload(aVsyncTimestamp, aVsyncTimestamp, nullptr)
+  , mVsyncTimestamp(aVsyncTimestamp)
+{
+}
+
+void
+VsyncPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks)
+{
+  aWriter.DoubleProperty("vsync", profiler_time(mVsyncTimestamp));
+  aWriter.StringProperty("category", "VsyncTimestamp");
+}
+#endif
diff --git a/tools/profiler/core/StackTop.cpp b/tools/profiler/core/StackTop.cpp
new file mode 100644
index 000000000..1f7944e5e
--- /dev/null
+++ b/tools/profiler/core/StackTop.cpp
@@ -0,0 +1,48 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef XP_MACOSX
+#include <mach/task.h>
+#include <mach/thread_act.h>
+#include <pthread.h>
+#elif XP_WIN
+#include <windows.h>
+#endif
+
+#include "StackTop.h"
+
+void *GetStackTop(void *guess) {
+#if defined(XP_MACOSX)
+  pthread_t thread = pthread_self();
+  return pthread_get_stackaddr_np(thread);
+#elif defined(XP_WIN)
+#if defined(_MSC_VER) && defined(_M_IX86)
+  // offset 0x18 from the FS segment register gives a pointer to
+  // the thread information block for the current thread
+  NT_TIB* pTib;
+  __asm {
+    MOV EAX, FS:[18h]
+      MOV pTib, EAX
+  }
+  return static_cast<void*>(pTib->StackBase);
+#elif defined(__GNUC__) && defined(i386)
+  // offset 0x18 from the FS segment register gives a pointer to
+  // the thread information block for the current thread
+  NT_TIB* pTib;
+  asm ( "movl %%fs:0x18, %0\n"
+       : "=r" (pTib)
+      );
+  return static_cast<void*>(pTib->StackBase);
+#elif defined(_M_X64) || defined(__x86_64)
+  PNT_TIB64 pTib = reinterpret_cast<PNT_TIB64>(NtCurrentTeb());
+  return reinterpret_cast<void*>(pTib->StackBase);
+#else
+#error Need a way to get the stack bounds on this platform (Windows)
+#endif
+#else
+  return guess;
+#endif
+}
diff --git a/tools/profiler/core/StackTop.h b/tools/profiler/core/StackTop.h
new file mode 100644
index 000000000..a933d10b4
--- /dev/null
+++ b/tools/profiler/core/StackTop.h
@@ -0,0 +1,10 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_STACK_TOP_H
+#define MOZ_STACK_TOP_H
+void *GetStackTop(void *guess);
+#endif
diff --git a/tools/profiler/core/SyncProfile.cpp b/tools/profiler/core/SyncProfile.cpp
new file mode 100644
index 000000000..4c4742f34
--- /dev/null
+++ b/tools/profiler/core/SyncProfile.cpp
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SyncProfile.h"
+
+SyncProfile::SyncProfile(ThreadInfo* aInfo, int aEntrySize)
+  : ThreadProfile(aInfo, new ProfileBuffer(aEntrySize))
+  , mOwnerState(REFERENCED)
+{
+  MOZ_COUNT_CTOR(SyncProfile);
+}
+
+SyncProfile::~SyncProfile()
+{
+  MOZ_COUNT_DTOR(SyncProfile);
+
+  // SyncProfile owns the ThreadInfo; see NewSyncProfile.
+  ThreadInfo* info = GetThreadInfo();
+  delete info;
+}
+
+bool
+SyncProfile::ShouldDestroy()
+{
+  ::MutexAutoLock lock(GetMutex());
+  if (mOwnerState == OWNED) {
+    mOwnerState = OWNER_DESTROYING;
+    return true;
+  }
+  mOwnerState = ORPHANED;
+  return false;
+}
+
+void
+SyncProfile::EndUnwind()
+{
+  if (mOwnerState != ORPHANED) {
+    mOwnerState = OWNED;
+  }
+  // Save mOwnerState before we release the mutex
+  OwnerState ownerState = mOwnerState;
+  ThreadProfile::EndUnwind();
+  if (ownerState == ORPHANED) {
+    delete this;
+  }
+}
+
+// SyncProfiles' stacks are deduplicated in the context of the containing
+// profile in which the backtrace is as a marker payload.
+void
+SyncProfile::StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks)
+{
+  ThreadProfile::StreamSamplesAndMarkers(aWriter, /* aSinceTime = */ 0, aUniqueStacks);
+}
diff --git a/tools/profiler/core/SyncProfile.h b/tools/profiler/core/SyncProfile.h
new file mode 100644
index 000000000..58f6b0d81
--- /dev/null
+++ b/tools/profiler/core/SyncProfile.h
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __SYNCPROFILE_H
+#define __SYNCPROFILE_H
+
+#include "ProfileEntry.h"
+#include "ThreadProfile.h"
+
+class SyncProfile : public ThreadProfile
+{
+public:
+  SyncProfile(ThreadInfo* aInfo, int aEntrySize);
+  ~SyncProfile();
+
+  // SyncProfiles' stacks are deduplicated in the context of the containing
+  // profile in which the backtrace is as a marker payload.
+  void StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks);
+
+  virtual void EndUnwind();
+  virtual SyncProfile* AsSyncProfile() { return this; }
+
+private:
+  friend class ProfilerBacktrace;
+
+  enum OwnerState
+  {
+    REFERENCED,       // ProfilerBacktrace has a pointer to this but doesn't own
+    OWNED,            // ProfilerBacktrace is responsible for destroying this
+    OWNER_DESTROYING, // ProfilerBacktrace owns this and is destroying
+    ORPHANED          // No owner, we must destroy ourselves
+  };
+
+  bool ShouldDestroy();
+
+  OwnerState mOwnerState;
+};
+
+#endif // __SYNCPROFILE_H
+
diff --git a/tools/profiler/core/ThreadInfo.cpp b/tools/profiler/core/ThreadInfo.cpp
new file mode 100644
index 000000000..0e25d2330
--- /dev/null
+++ b/tools/profiler/core/ThreadInfo.cpp
@@ -0,0 +1,73 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ThreadInfo.h"
+#include "ThreadProfile.h"
+
+#include "mozilla/DebugOnly.h"
+
+ThreadInfo::ThreadInfo(const char* aName, int aThreadId,
+                       bool aIsMainThread, PseudoStack* aPseudoStack,
+                       void* aStackTop)
+  : mName(strdup(aName))
+  , mThreadId(aThreadId)
+  , mIsMainThread(aIsMainThread)
+  , mPseudoStack(aPseudoStack)
+  , mPlatformData(Sampler::AllocPlatformData(aThreadId))
+  , mProfile(nullptr)
+  , mStackTop(aStackTop)
+  , mPendingDelete(false)
+{
+  MOZ_COUNT_CTOR(ThreadInfo);
+#ifndef SPS_STANDALONE
+  mThread = NS_GetCurrentThread();
+#endif
+
+  // We don't have to guess on mac
+#ifdef XP_MACOSX
+  pthread_t self = pthread_self();
+  mStackTop = pthread_get_stackaddr_np(self);
+#endif
+}
+
+ThreadInfo::~ThreadInfo() {
+  MOZ_COUNT_DTOR(ThreadInfo);
+  free(mName);
+
+  if (mProfile)
+    delete mProfile;
+
+  Sampler::FreePlatformData(mPlatformData);
+}
+
+void
+ThreadInfo::SetPendingDelete()
+{
+  mPendingDelete = true;
+  // We don't own the pseudostack so disconnect it.
+  mPseudoStack = nullptr;
+  if (mProfile) {
+    mProfile->SetPendingDelete();
+  }
+}
+
+bool
+ThreadInfo::CanInvokeJS() const
+{
+#ifdef SPS_STANDALONE
+  return false;
+#else
+  nsIThread* thread = GetThread();
+  if (!thread) {
+    MOZ_ASSERT(IsMainThread());
+    return true;
+  }
+  bool result;
+  mozilla::DebugOnly<nsresult> rv = thread->GetCanInvokeJS(&result);
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+  return result;
+#endif
+}
diff --git a/tools/profiler/core/ThreadInfo.h b/tools/profiler/core/ThreadInfo.h
new file mode 100644
index 000000000..1cb4e5dc8
--- /dev/null
+++ b/tools/profiler/core/ThreadInfo.h
@@ -0,0 +1,66 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_THREAD_INFO_H
+#define MOZ_THREAD_INFO_H
+
+#include "platform.h"
+
+class ThreadInfo {
+ public:
+  ThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, PseudoStack* aPseudoStack, void* aStackTop);
+
+  virtual ~ThreadInfo();
+
+  const char* Name() const { return mName; }
+  int ThreadId() const { return mThreadId; }
+
+  bool IsMainThread() const { return mIsMainThread; }
+  PseudoStack* Stack() const { return mPseudoStack; }
+
+  void SetProfile(ThreadProfile* aProfile) { mProfile = aProfile; }
+  ThreadProfile* Profile() const { return mProfile; }
+
+  PlatformData* GetPlatformData() const { return mPlatformData; }
+  void* StackTop() const { return mStackTop; }
+
+  virtual void SetPendingDelete();
+  bool IsPendingDelete() const { return mPendingDelete; }
+
+#ifndef SPS_STANDALONE
+  /**
+   * May be null for the main thread if the profiler was started during startup
+   */
+  nsIThread* GetThread() const { return mThread.get(); }
+
+#endif
+
+  bool CanInvokeJS() const;
+
+ private:
+  char* mName;
+  int mThreadId;
+  const bool mIsMainThread;
+  PseudoStack* mPseudoStack;
+  PlatformData* mPlatformData;
+  ThreadProfile* mProfile;
+  void* mStackTop;
+#ifndef SPS_STANDALONE
+  nsCOMPtr<nsIThread> mThread;
+#endif
+  bool mPendingDelete;
+};
+
+// Just like ThreadInfo, but owns a reference to the PseudoStack.
+class StackOwningThreadInfo : public ThreadInfo {
+ public:
+  StackOwningThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, PseudoStack* aPseudoStack, void* aStackTop);
+  virtual ~StackOwningThreadInfo();
+
+  virtual void SetPendingDelete();
+};
+
+#endif
diff --git a/tools/profiler/core/ThreadProfile.cpp b/tools/profiler/core/ThreadProfile.cpp
new file mode 100644
index 000000000..7452a7ee8
--- /dev/null
+++ b/tools/profiler/core/ThreadProfile.cpp
@@ -0,0 +1,260 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+ThreadProfile::ThreadProfile(ThreadInfo* aInfo, ProfileBuffer* aBuffer)
+  : mThreadInfo(aInfo)
+  , mBuffer(aBuffer)
+  , mPseudoStack(aInfo->Stack())
+  , mMutex(OS::CreateMutex("ThreadProfile::mMutex"))
+  , mThreadId(int(aInfo->ThreadId()))
+  , mIsMainThread(aInfo->IsMainThread())
+  , mPlatformData(aInfo->GetPlatformData())
+  , mStackTop(aInfo->StackTop())
+#ifndef SPS_STANDALONE
+  , mRespInfo(this)
+#endif
+#ifdef XP_LINUX
+  , mRssMemory(0)
+  , mUssMemory(0)
+#endif
+{
+  MOZ_COUNT_CTOR(ThreadProfile);
+  MOZ_ASSERT(aBuffer);
+
+  // I don't know if we can assert this. But we should warn.
+  MOZ_ASSERT(aInfo->ThreadId() >= 0, "native thread ID is < 0");
+  MOZ_ASSERT(aInfo->ThreadId() <= INT32_MAX, "native thread ID is > INT32_MAX");
+}
+
+ThreadProfile::~ThreadProfile()
+{
+  MOZ_COUNT_DTOR(ThreadProfile);
+}
+
+void ThreadProfile::addTag(const ProfileEntry& aTag)
+{
+  mBuffer->addTag(aTag);
+}
+
+void ThreadProfile::addStoredMarker(ProfilerMarker *aStoredMarker) {
+  mBuffer->addStoredMarker(aStoredMarker);
+}
+
+void ThreadProfile::StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime)
+{
+  // mUniqueStacks may already be emplaced from FlushSamplesAndMarkers.
+  if (!mUniqueStacks.isSome()) {
+#ifndef SPS_STANDALONE
+    mUniqueStacks.emplace(mPseudoStack->mContext);
+#else
+    mUniqueStacks.emplace(nullptr);
+#endif
+  }
+
+  aWriter.Start(SpliceableJSONWriter::SingleLineStyle);
+  {
+    StreamSamplesAndMarkers(aWriter, aSinceTime, *mUniqueStacks);
+
+    aWriter.StartObjectProperty("stackTable");
+    {
+      {
+        JSONSchemaWriter schema(aWriter);
+        schema.WriteField("prefix");
+        schema.WriteField("frame");
+      }
+
+      aWriter.StartArrayProperty("data");
+      {
+        mUniqueStacks->SpliceStackTableElements(aWriter);
+      }
+      aWriter.EndArray();
+    }
+    aWriter.EndObject();
+
+    aWriter.StartObjectProperty("frameTable");
+    {
+      {
+        JSONSchemaWriter schema(aWriter);
+        schema.WriteField("location");
+        schema.WriteField("implementation");
+        schema.WriteField("optimizations");
+        schema.WriteField("line");
+        schema.WriteField("category");
+      }
+
+      aWriter.StartArrayProperty("data");
+      {
+        mUniqueStacks->SpliceFrameTableElements(aWriter);
+      }
+      aWriter.EndArray();
+    }
+    aWriter.EndObject();
+
+    aWriter.StartArrayProperty("stringTable");
+    {
+      mUniqueStacks->mUniqueStrings.SpliceStringTableElements(aWriter);
+    }
+    aWriter.EndArray();
+  }
+  aWriter.End();
+
+  mUniqueStacks.reset();
+}
+
+void ThreadProfile::StreamSamplesAndMarkers(SpliceableJSONWriter& aWriter, double aSinceTime,
+                                            UniqueStacks& aUniqueStacks)
+{
+#ifndef SPS_STANDALONE
+  // Thread meta data
+  if (XRE_GetProcessType() == GeckoProcessType_Plugin) {
+    // TODO Add the proper plugin name
+    aWriter.StringProperty("name", "Plugin");
+  } else if (XRE_GetProcessType() == GeckoProcessType_Content) {
+    // This isn't going to really help once we have multiple content
+    // processes, but it'll do for now.
+    aWriter.StringProperty("name", "Content");
+  } else {
+    aWriter.StringProperty("name", Name());
+  }
+#else
+  aWriter.StringProperty("name", Name());
+#endif
+
+  aWriter.IntProperty("tid", static_cast<int>(mThreadId));
+
+  aWriter.StartObjectProperty("samples");
+  {
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("stack");
+      schema.WriteField("time");
+      schema.WriteField("responsiveness");
+      schema.WriteField("rss");
+      schema.WriteField("uss");
+      schema.WriteField("frameNumber");
+      schema.WriteField("power");
+    }
+
+    aWriter.StartArrayProperty("data");
+    {
+      if (mSavedStreamedSamples) {
+        // We would only have saved streamed samples during shutdown
+        // streaming, which cares about dumping the entire buffer, and thus
+        // should have passed in 0 for aSinceTime.
+        MOZ_ASSERT(aSinceTime == 0);
+        aWriter.Splice(mSavedStreamedSamples.get());
+        mSavedStreamedSamples.reset();
+      }
+      mBuffer->StreamSamplesToJSON(aWriter, mThreadId, aSinceTime,
+#ifndef SPS_STANDALONE
+                                   mPseudoStack->mContext,
+#else
+                                   nullptr,
+#endif
+                                   aUniqueStacks);
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+
+  aWriter.StartObjectProperty("markers");
+  {
+    {
+      JSONSchemaWriter schema(aWriter);
+      schema.WriteField("name");
+      schema.WriteField("time");
+      schema.WriteField("data");
+    }
+
+    aWriter.StartArrayProperty("data");
+    {
+      if (mSavedStreamedMarkers) {
+        MOZ_ASSERT(aSinceTime == 0);
+        aWriter.Splice(mSavedStreamedMarkers.get());
+        mSavedStreamedMarkers.reset();
+      }
+      mBuffer->StreamMarkersToJSON(aWriter, mThreadId, aSinceTime, aUniqueStacks);
+    }
+    aWriter.EndArray();
+  }
+  aWriter.EndObject();
+}
+
+void ThreadProfile::FlushSamplesAndMarkers()
+{
+  // This function is used to serialize the current buffer just before
+  // JSContext destruction.
+  MOZ_ASSERT(mPseudoStack->mContext);
+
+  // Unlike StreamJSObject, do not surround the samples in brackets by calling
+  // aWriter.{Start,End}BareList. The result string will be a comma-separated
+  // list of JSON object literals that will prepended by StreamJSObject into
+  // an existing array.
+  //
+  // Note that the UniqueStacks instance is persisted so that the frame-index
+  // mapping is stable across JS shutdown.
+#ifndef SPS_STANDALONE
+  mUniqueStacks.emplace(mPseudoStack->mContext);
+#else
+  mUniqueStacks.emplace(nullptr);
+#endif
+
+  {
+    SpliceableChunkedJSONWriter b;
+    b.StartBareList();
+    {
+      mBuffer->StreamSamplesToJSON(b, mThreadId, /* aSinceTime = */ 0,
+#ifndef SPS_STANDALONE
+                                   mPseudoStack->mContext,
+#else
+                                   nullptr,
+#endif
+                                   *mUniqueStacks);
+    }
+    b.EndBareList();
+    mSavedStreamedSamples = b.WriteFunc()->CopyData();
+  }
+
+  {
+    SpliceableChunkedJSONWriter b;
+    b.StartBareList();
+    {
+      mBuffer->StreamMarkersToJSON(b, mThreadId, /* aSinceTime = */ 0, *mUniqueStacks);
+    }
+    b.EndBareList();
+    mSavedStreamedMarkers = b.WriteFunc()->CopyData();
+  }
+
+  // Reset the buffer. Attempting to symbolicate JS samples after mContext has
+  // gone away will crash.
+  mBuffer->reset();
+}
+
+PseudoStack* ThreadProfile::GetPseudoStack()
+{
+  return mPseudoStack;
+}
+
+void ThreadProfile::BeginUnwind()
+{
+  mMutex->Lock();
+}
+
+void ThreadProfile::EndUnwind()
+{
+  mMutex->Unlock();
+}
+
+::Mutex& ThreadProfile::GetMutex()
+{
+  return *mMutex.get();
+}
+
+void ThreadProfile::DuplicateLastSample()
+{
+  mBuffer->DuplicateLastSample(mThreadId);
+}
+
diff --git a/tools/profiler/core/ThreadProfile.h b/tools/profiler/core/ThreadProfile.h
new file mode 100644
index 000000000..ca2bbfe7a
--- /dev/null
+++ b/tools/profiler/core/ThreadProfile.h
@@ -0,0 +1,107 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_THREAD_PROFILE_H
+#define MOZ_THREAD_PROFILE_H
+
+#include "ProfileBuffer.h"
+#include "ThreadInfo.h"
+
+class ThreadProfile
+{
+public:
+  ThreadProfile(ThreadInfo* aThreadInfo, ProfileBuffer* aBuffer);
+  virtual ~ThreadProfile();
+  void addTag(const ProfileEntry& aTag);
+
+  /**
+   * Track a marker which has been inserted into the ThreadProfile.
+   * This marker can safely be deleted once the generation has
+   * expired.
+   */
+  void addStoredMarker(ProfilerMarker *aStoredMarker);
+  PseudoStack* GetPseudoStack();
+  ::Mutex& GetMutex();
+  void StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime = 0);
+
+  /**
+   * Call this method when the JS entries inside the buffer are about to
+   * become invalid, i.e., just before JS shutdown.
+   */
+  void FlushSamplesAndMarkers();
+
+  void BeginUnwind();
+  virtual void EndUnwind();
+  virtual SyncProfile* AsSyncProfile() { return nullptr; }
+
+  bool IsMainThread() const { return mIsMainThread; }
+  const char* Name() const { return mThreadInfo->Name(); }
+  int ThreadId() const { return mThreadId; }
+
+  PlatformData* GetPlatformData() const { return mPlatformData; }
+  void* GetStackTop() const { return mStackTop; }
+  void DuplicateLastSample();
+
+  ThreadInfo* GetThreadInfo() const { return mThreadInfo; }
+#ifndef SPS_STANDALONE
+  ThreadResponsiveness* GetThreadResponsiveness() { return &mRespInfo; }
+#endif
+
+  bool CanInvokeJS() const { return mThreadInfo->CanInvokeJS(); }
+
+  void SetPendingDelete()
+  {
+    mPseudoStack = nullptr;
+    mPlatformData = nullptr;
+  }
+
+  uint32_t bufferGeneration() const {
+    return mBuffer->mGeneration;
+  }
+
+protected:
+  void StreamSamplesAndMarkers(SpliceableJSONWriter& aWriter, double aSinceTime,
+                               UniqueStacks& aUniqueStacks);
+
+private:
+  FRIEND_TEST(ThreadProfile, InsertOneTag);
+  FRIEND_TEST(ThreadProfile, InsertOneTagWithTinyBuffer);
+  FRIEND_TEST(ThreadProfile, InsertTagsNoWrap);
+  FRIEND_TEST(ThreadProfile, InsertTagsWrap);
+  FRIEND_TEST(ThreadProfile, MemoryMeasure);
+  ThreadInfo* mThreadInfo;
+
+  const RefPtr<ProfileBuffer> mBuffer;
+
+  // JS frames in the buffer may require a live JSRuntime to stream (e.g.,
+  // stringifying JIT frames). In the case of JSRuntime destruction,
+  // FlushSamplesAndMarkers should be called to save them. These are spliced
+  // into the final stream.
+  mozilla::UniquePtr<char[]> mSavedStreamedSamples;
+  mozilla::UniquePtr<char[]> mSavedStreamedMarkers;
+  mozilla::Maybe<UniqueStacks> mUniqueStacks;
+
+  PseudoStack*   mPseudoStack;
+  mozilla::UniquePtr<Mutex>  mMutex;
+  int            mThreadId;
+  bool           mIsMainThread;
+  PlatformData*  mPlatformData;  // Platform specific data.
+  void* const    mStackTop;
+#ifndef SPS_STANDALONE
+  ThreadResponsiveness mRespInfo;
+#endif
+
+  // Only Linux is using a signal sender, instead of stopping the thread, so we
+  // need some space to store the data which cannot be collected in the signal
+  // handler code.
+#ifdef XP_LINUX
+public:
+  int64_t        mRssMemory;
+  int64_t        mUssMemory;
+#endif
+};
+
+#endif
diff --git a/tools/profiler/core/platform-linux.cc b/tools/profiler/core/platform-linux.cc
new file mode 100644
index 000000000..160873c9d
--- /dev/null
+++ b/tools/profiler/core/platform-linux.cc
@@ -0,0 +1,715 @@
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+/*
+# vim: sw=2
+*/
+#include <stdio.h>
+#include <math.h>
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/prctl.h> // set name
+#include <stdlib.h>
+#include <sched.h>
+#ifdef ANDROID
+#include <android/log.h>
+#else
+#define __android_log_print(a, ...)
+#endif
+#include <ucontext.h>
+// Ubuntu Dapper requires memory pages to be marked as
+// executable. Otherwise, OS raises an exception when executing code
+// in that page.
+#include <sys/types.h>  // mmap & munmap
+#include <sys/mman.h>   // mmap & munmap
+#include <sys/stat.h>   // open
+#include <fcntl.h>      // open
+#include <unistd.h>     // sysconf
+#include <semaphore.h>
+#ifdef __GLIBC__
+#include <execinfo.h>   // backtrace, backtrace_symbols
+#endif  // def __GLIBC__
+#include <strings.h>    // index
+#include <errno.h>
+#include <stdarg.h>
+#include "prenv.h"
+#include "platform.h"
+#include "GeckoProfiler.h"
+#include "mozilla/Mutex.h"
+#include "mozilla/Atomics.h"
+#include "mozilla/LinuxSignal.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/DebugOnly.h"
+#include "ProfileEntry.h"
+#include "nsThreadUtils.h"
+#include "GeckoSampler.h"
+#include "ThreadResponsiveness.h"
+
+#if defined(__ARM_EABI__) && defined(ANDROID)
+ // Should also work on other Android and ARM Linux, but not tested there yet.
+# define USE_EHABI_STACKWALK
+# include "EHABIStackWalk.h"
+#elif defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux)
+# define USE_LUL_STACKWALK
+# include "lul/LulMain.h"
+# include "lul/platform-linux-lul.h"
+#endif
+
+// Memory profile
+#include "nsMemoryReporterManager.h"
+
+#include <string.h>
+#include <list>
+
+#define SIGNAL_SAVE_PROFILE SIGUSR2
+
+using namespace mozilla;
+
+#if defined(USE_LUL_STACKWALK)
+// A singleton instance of the library.  It is initialised at first
+// use.  Currently only the main thread can call Sampler::Start, so
+// there is no need for a mechanism to ensure that it is only
+// created once in a multi-thread-use situation.
+lul::LUL* sLUL = nullptr;
+
+// This is the sLUL initialization routine.
+static void sLUL_initialization_routine(void)
+{
+  MOZ_ASSERT(!sLUL);
+  MOZ_ASSERT(gettid() == getpid()); /* "this is the main thread" */
+  sLUL = new lul::LUL(logging_sink_for_LUL);
+  // Read all the unwind info currently available.
+  read_procmaps(sLUL);
+}
+#endif
+
+/* static */ Thread::tid_t
+Thread::GetCurrentId()
+{
+  return gettid();
+}
+
+#if !defined(ANDROID)
+// Keep track of when any of our threads calls fork(), so we can
+// temporarily disable signal delivery during the fork() call.  Not
+// doing so appears to cause a kind of race, in which signals keep
+// getting delivered to the thread doing fork(), which keeps causing
+// it to fail and be restarted; hence forward progress is delayed a
+// great deal.  A side effect of this is to permanently disable
+// sampling in the child process.  See bug 837390.
+
+// Unfortunately this is only doable on non-Android, since Bionic
+// doesn't have pthread_atfork.
+
+// This records the current state at the time we paused it.
+static bool was_paused = false;
+
+// In the parent, just before the fork, record the pausedness state,
+// and then pause.
+static void paf_prepare(void) {
+  if (Sampler::GetActiveSampler()) {
+    was_paused = Sampler::GetActiveSampler()->IsPaused();
+    Sampler::GetActiveSampler()->SetPaused(true);
+  } else {
+    was_paused = false;
+  }
+}
+
+// In the parent, just after the fork, return pausedness to the
+// pre-fork state.
+static void paf_parent(void) {
+  if (Sampler::GetActiveSampler())
+    Sampler::GetActiveSampler()->SetPaused(was_paused);
+}
+
+// Set up the fork handlers.
+static void* setup_atfork() {
+  pthread_atfork(paf_prepare, paf_parent, NULL);
+  return NULL;
+}
+#endif /* !defined(ANDROID) */
+
+struct SamplerRegistry {
+  static void AddActiveSampler(Sampler *sampler) {
+    ASSERT(!SamplerRegistry::sampler);
+    SamplerRegistry::sampler = sampler;
+  }
+  static void RemoveActiveSampler(Sampler *sampler) {
+    SamplerRegistry::sampler = NULL;
+  }
+  static Sampler *sampler;
+};
+
+Sampler *SamplerRegistry::sampler = NULL;
+
+static mozilla::Atomic<ThreadProfile*> sCurrentThreadProfile;
+static sem_t sSignalHandlingDone;
+
+static void ProfilerSaveSignalHandler(int signal, siginfo_t* info, void* context) {
+  Sampler::GetActiveSampler()->RequestSave();
+}
+
+static void SetSampleContext(TickSample* sample, void* context)
+{
+  // Extracting the sample from the context is extremely machine dependent.
+  ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context);
+  mcontext_t& mcontext = ucontext->uc_mcontext;
+#if V8_HOST_ARCH_IA32
+  sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
+  sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
+  sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
+#elif V8_HOST_ARCH_X64
+  sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
+  sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
+  sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
+#elif V8_HOST_ARCH_ARM
+// An undefined macro evaluates to 0, so this applies to Android's Bionic also.
+#if !defined(ANDROID) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3))
+  sample->pc = reinterpret_cast<Address>(mcontext.gregs[R15]);
+  sample->sp = reinterpret_cast<Address>(mcontext.gregs[R13]);
+  sample->fp = reinterpret_cast<Address>(mcontext.gregs[R11]);
+#ifdef ENABLE_ARM_LR_SAVING
+  sample->lr = reinterpret_cast<Address>(mcontext.gregs[R14]);
+#endif
+#else
+  sample->pc = reinterpret_cast<Address>(mcontext.arm_pc);
+  sample->sp = reinterpret_cast<Address>(mcontext.arm_sp);
+  sample->fp = reinterpret_cast<Address>(mcontext.arm_fp);
+#ifdef ENABLE_ARM_LR_SAVING
+  sample->lr = reinterpret_cast<Address>(mcontext.arm_lr);
+#endif
+#endif
+#elif V8_HOST_ARCH_MIPS
+  // Implement this on MIPS.
+  UNIMPLEMENTED();
+#endif
+}
+
+#ifdef ANDROID
+#define V8_HOST_ARCH_ARM 1
+#define SYS_gettid __NR_gettid
+#define SYS_tgkill __NR_tgkill
+#else
+#define V8_HOST_ARCH_X64 1
+#endif
+
+namespace {
+
+void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) {
+  // Avoid TSan warning about clobbering errno.
+  int savedErrno = errno;
+
+  if (!Sampler::GetActiveSampler()) {
+    sem_post(&sSignalHandlingDone);
+    errno = savedErrno;
+    return;
+  }
+
+  TickSample sample_obj;
+  TickSample* sample = &sample_obj;
+  sample->context = context;
+
+  // If profiling, we extract the current pc and sp.
+  if (Sampler::GetActiveSampler()->IsProfiling()) {
+    SetSampleContext(sample, context);
+  }
+  sample->threadProfile = sCurrentThreadProfile;
+  sample->timestamp = mozilla::TimeStamp::Now();
+  sample->rssMemory = sample->threadProfile->mRssMemory;
+  sample->ussMemory = sample->threadProfile->mUssMemory;
+
+  Sampler::GetActiveSampler()->Tick(sample);
+
+  sCurrentThreadProfile = NULL;
+  sem_post(&sSignalHandlingDone);
+  errno = savedErrno;
+}
+
+} // namespace
+
+static void ProfilerSignalThread(ThreadProfile *profile,
+                                 bool isFirstProfiledThread)
+{
+  if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) {
+    profile->mRssMemory = nsMemoryReporterManager::ResidentFast();
+    profile->mUssMemory = nsMemoryReporterManager::ResidentUnique();
+  } else {
+    profile->mRssMemory = 0;
+    profile->mUssMemory = 0;
+  }
+}
+
+int tgkill(pid_t tgid, pid_t tid, int signalno) {
+  return syscall(SYS_tgkill, tgid, tid, signalno);
+}
+
+class PlatformData {
+ public:
+  PlatformData()
+  {
+    MOZ_COUNT_CTOR(PlatformData);
+  }
+
+  ~PlatformData()
+  {
+    MOZ_COUNT_DTOR(PlatformData);
+  }
+};
+
+/* static */ PlatformData*
+Sampler::AllocPlatformData(int aThreadId)
+{
+  return new PlatformData;
+}
+
+/* static */ void
+Sampler::FreePlatformData(PlatformData* aData)
+{
+  delete aData;
+}
+
+static void* SignalSender(void* arg) {
+  // Taken from platform_thread_posix.cc
+  prctl(PR_SET_NAME, "SamplerThread", 0, 0, 0);
+
+  int vm_tgid_ = getpid();
+  DebugOnly<int> my_tid = gettid();
+
+  unsigned int nSignalsSent = 0;
+
+  TimeDuration lastSleepOverhead = 0;
+  TimeStamp sampleStart = TimeStamp::Now();
+  while (SamplerRegistry::sampler->IsActive()) {
+
+    SamplerRegistry::sampler->HandleSaveRequest();
+    SamplerRegistry::sampler->DeleteExpiredMarkers();
+
+    if (!SamplerRegistry::sampler->IsPaused()) {
+      ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+      std::vector<ThreadInfo*> threads =
+        SamplerRegistry::sampler->GetRegisteredThreads();
+
+      bool isFirstProfiledThread = true;
+      for (uint32_t i = 0; i < threads.size(); i++) {
+        ThreadInfo* info = threads[i];
+
+        // This will be null if we're not interested in profiling this thread.
+        if (!info->Profile() || info->IsPendingDelete())
+          continue;
+
+        PseudoStack::SleepState sleeping = info->Stack()->observeSleeping();
+        if (sleeping == PseudoStack::SLEEPING_AGAIN) {
+          info->Profile()->DuplicateLastSample();
+          continue;
+        }
+
+        info->Profile()->GetThreadResponsiveness()->Update();
+
+        // We use sCurrentThreadProfile the ThreadProfile for the
+        // thread we're profiling to the signal handler
+        sCurrentThreadProfile = info->Profile();
+
+        int threadId = info->ThreadId();
+        MOZ_ASSERT(threadId != my_tid);
+
+        // Profile from the signal sender for information which is not signal
+        // safe, and will have low variation between the emission of the signal
+        // and the signal handler catch.
+        ProfilerSignalThread(sCurrentThreadProfile, isFirstProfiledThread);
+
+        // Profile from the signal handler for information which is signal safe
+        // and needs to be precise too, such as the stack of the interrupted
+        // thread.
+        if (tgkill(vm_tgid_, threadId, SIGPROF) != 0) {
+          printf_stderr("profiler failed to signal tid=%d\n", threadId);
+#ifdef DEBUG
+          abort();
+#else
+          continue;
+#endif
+        }
+
+        // Wait for the signal handler to run before moving on to the next one
+        sem_wait(&sSignalHandlingDone);
+        isFirstProfiledThread = false;
+
+        // The LUL unwind object accumulates frame statistics.
+        // Periodically we should poke it to give it a chance to print
+        // those statistics.  This involves doing I/O (fprintf,
+        // __android_log_print, etc) and so can't safely be done from
+        // the unwinder threads, which is why it is done here.
+        if ((++nSignalsSent & 0xF) == 0) {
+#          if defined(USE_LUL_STACKWALK)
+           sLUL->MaybeShowStats();
+#          endif
+        }
+      }
+    }
+
+    TimeStamp targetSleepEndTime = sampleStart + TimeDuration::FromMicroseconds(SamplerRegistry::sampler->interval() * 1000);
+    TimeStamp beforeSleep = TimeStamp::Now();
+    TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
+    double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds());
+    OS::SleepMicro(sleepTime);
+    sampleStart = TimeStamp::Now();
+    lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
+  }
+  return 0;
+}
+
+Sampler::Sampler(double interval, bool profiling, int entrySize)
+    : interval_(interval),
+      profiling_(profiling),
+      paused_(false),
+      active_(false),
+      entrySize_(entrySize) {
+  MOZ_COUNT_CTOR(Sampler);
+}
+
+Sampler::~Sampler() {
+  MOZ_COUNT_DTOR(Sampler);
+  ASSERT(!signal_sender_launched_);
+}
+
+
+void Sampler::Start() {
+  LOG("Sampler started");
+
+#if defined(USE_EHABI_STACKWALK)
+  mozilla::EHABIStackWalkInit();
+#elif defined(USE_LUL_STACKWALK)
+  // NOTE: this isn't thread-safe.  But we expect Sampler::Start to be
+  // called only from the main thread, so this is OK in general.
+  if (!sLUL) {
+     sLUL_initialization_routine();
+  }
+#endif
+
+  SamplerRegistry::AddActiveSampler(this);
+
+  // Initialize signal handler communication
+  sCurrentThreadProfile = NULL;
+  if (sem_init(&sSignalHandlingDone, /* pshared: */ 0, /* value: */ 0) != 0) {
+    LOG("Error initializing semaphore");
+    return;
+  }
+
+  // Request profiling signals.
+  LOG("Request signal");
+  struct sigaction sa;
+  sa.sa_sigaction = MOZ_SIGNAL_TRAMPOLINE(ProfilerSignalHandler);
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART | SA_SIGINFO;
+  if (sigaction(SIGPROF, &sa, &old_sigprof_signal_handler_) != 0) {
+    LOG("Error installing signal");
+    return;
+  }
+
+  // Request save profile signals
+  struct sigaction sa2;
+  sa2.sa_sigaction = ProfilerSaveSignalHandler;
+  sigemptyset(&sa2.sa_mask);
+  sa2.sa_flags = SA_RESTART | SA_SIGINFO;
+  if (sigaction(SIGNAL_SAVE_PROFILE, &sa2, &old_sigsave_signal_handler_) != 0) {
+    LOG("Error installing start signal");
+    return;
+  }
+  LOG("Signal installed");
+  signal_handler_installed_ = true;
+
+#if defined(USE_LUL_STACKWALK)
+  // Switch into unwind mode.  After this point, we can't add or
+  // remove any unwind info to/from this LUL instance.  The only thing
+  // we can do with it is Unwind() calls.
+  sLUL->EnableUnwinding();
+
+  // Has a test been requested?
+  if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
+     int nTests = 0, nTestsPassed = 0;
+     RunLulUnitTests(&nTests, &nTestsPassed, sLUL);
+  }
+#endif
+
+  // Start a thread that sends SIGPROF signal to VM thread.
+  // Sending the signal ourselves instead of relying on itimer provides
+  // much better accuracy.
+  SetActive(true);
+  if (pthread_create(
+        &signal_sender_thread_, NULL, SignalSender, NULL) == 0) {
+    signal_sender_launched_ = true;
+  }
+  LOG("Profiler thread started");
+}
+
+
+void Sampler::Stop() {
+  SetActive(false);
+
+  // Wait for signal sender termination (it will exit after setting
+  // active_ to false).
+  if (signal_sender_launched_) {
+    pthread_join(signal_sender_thread_, NULL);
+    signal_sender_launched_ = false;
+  }
+
+  SamplerRegistry::RemoveActiveSampler(this);
+
+  // Restore old signal handler
+  if (signal_handler_installed_) {
+    sigaction(SIGNAL_SAVE_PROFILE, &old_sigsave_signal_handler_, 0);
+    sigaction(SIGPROF, &old_sigprof_signal_handler_, 0);
+    signal_handler_installed_ = false;
+  }
+}
+
+bool Sampler::RegisterCurrentThread(const char* aName,
+                                    PseudoStack* aPseudoStack,
+                                    bool aIsMainThread, void* stackTop)
+{
+  if (!Sampler::sRegisteredThreadsMutex)
+    return false;
+
+  ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  int id = gettid();
+  for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+    ThreadInfo* info = sRegisteredThreads->at(i);
+    if (info->ThreadId() == id && !info->IsPendingDelete()) {
+      // Thread already registered. This means the first unregister will be
+      // too early.
+      ASSERT(false);
+      return false;
+    }
+  }
+
+  set_tls_stack_top(stackTop);
+
+  ThreadInfo* info = new StackOwningThreadInfo(aName, id,
+    aIsMainThread, aPseudoStack, stackTop);
+
+  if (sActiveSampler) {
+    sActiveSampler->RegisterThread(info);
+  }
+
+  sRegisteredThreads->push_back(info);
+
+  return true;
+}
+
+void Sampler::UnregisterCurrentThread()
+{
+  if (!Sampler::sRegisteredThreadsMutex)
+    return;
+
+  tlsStackTop.set(nullptr);
+
+  ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  int id = gettid();
+
+  for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+    ThreadInfo* info = sRegisteredThreads->at(i);
+    if (info->ThreadId() == id && !info->IsPendingDelete()) {
+      if (profiler_is_active()) {
+        // We still want to show the results of this thread if you
+        // save the profile shortly after a thread is terminated.
+        // For now we will defer the delete to profile stop.
+        info->SetPendingDelete();
+        break;
+      } else {
+        delete info;
+        sRegisteredThreads->erase(sRegisteredThreads->begin() + i);
+        break;
+      }
+    }
+  }
+}
+
+#ifdef ANDROID
+static struct sigaction old_sigstart_signal_handler;
+const int SIGSTART = SIGUSR2;
+
+static void freeArray(const char** array, int size) {
+  for (int i = 0; i < size; i++) {
+    free((void*) array[i]);
+  }
+}
+
+static uint32_t readCSVArray(char* csvList, const char** buffer) {
+  uint32_t count;
+  char* savePtr;
+  int newlinePos = strlen(csvList) - 1;
+  if (csvList[newlinePos] == '\n') {
+    csvList[newlinePos] = '\0';
+  }
+
+  char* item = strtok_r(csvList, ",", &savePtr);
+  for (count = 0; item; item = strtok_r(NULL, ",", &savePtr)) {
+    int length = strlen(item) + 1;  // Include \0
+    char* newBuf = (char*) malloc(sizeof(char) * length);
+    buffer[count] = newBuf;
+    strncpy(newBuf, item, length);
+    count++;
+  }
+
+  return count;
+}
+
+// Currently support only the env variables
+// reported in read_profiler_env
+static void ReadProfilerVars(const char* fileName, const char** features,
+                            uint32_t* featureCount, const char** threadNames, uint32_t* threadCount) {
+  FILE* file = fopen(fileName, "r");
+  const int bufferSize = 1024;
+  char line[bufferSize];
+  char* feature;
+  char* value;
+  char* savePtr;
+
+  if (file) {
+    while (fgets(line, bufferSize, file) != NULL) {
+      feature = strtok_r(line, "=", &savePtr);
+      value = strtok_r(NULL, "", &savePtr);
+
+      if (strncmp(feature, PROFILER_INTERVAL, bufferSize) == 0) {
+        set_profiler_interval(value);
+      } else if (strncmp(feature, PROFILER_ENTRIES, bufferSize) == 0) {
+        set_profiler_entries(value);
+      } else if (strncmp(feature, PROFILER_STACK, bufferSize) == 0) {
+        set_profiler_scan(value);
+      } else if (strncmp(feature, PROFILER_FEATURES, bufferSize) == 0) {
+        *featureCount = readCSVArray(value, features);
+      } else if (strncmp(feature, "threads", bufferSize) == 0) {
+        *threadCount = readCSVArray(value, threadNames);
+      }
+    }
+
+    fclose(file);
+  }
+}
+
+static void DoStartTask() {
+  uint32_t featureCount = 0;
+  uint32_t threadCount = 0;
+
+  // Just allocate 10 features for now
+  // FIXME: these don't really point to const chars*
+  // So we free them later, but we don't want to change the const char**
+  // declaration in profiler_start. Annoying but ok for now.
+  const char* threadNames[10];
+  const char* features[10];
+  const char* profilerConfigFile = "/data/local/tmp/profiler.options";
+
+  ReadProfilerVars(profilerConfigFile, features, &featureCount, threadNames, &threadCount);
+  MOZ_ASSERT(featureCount < 10);
+  MOZ_ASSERT(threadCount < 10);
+
+  profiler_start(PROFILE_DEFAULT_ENTRY, 1,
+      features, featureCount,
+      threadNames, threadCount);
+
+  freeArray(threadNames, threadCount);
+  freeArray(features, featureCount);
+}
+
+static void StartSignalHandler(int signal, siginfo_t* info, void* context) {
+  class StartTask : public Runnable {
+  public:
+    NS_IMETHOD Run() override {
+      DoStartTask();
+      return NS_OK;
+    }
+  };
+  // XXX: technically NS_DispatchToMainThread is NOT async signal safe. We risk
+  // nasty things like deadlocks, but the probability is very low and we
+  // typically only do this once so it tends to be ok. See bug 909403.
+  NS_DispatchToMainThread(new StartTask());
+}
+
+void OS::Startup()
+{
+  LOG("Registering start signal");
+  struct sigaction sa;
+  sa.sa_sigaction = StartSignalHandler;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = SA_RESTART | SA_SIGINFO;
+  if (sigaction(SIGSTART, &sa, &old_sigstart_signal_handler) != 0) {
+    LOG("Error installing signal");
+  }
+}
+
+#else
+
+void OS::Startup() {
+  // Set up the fork handlers.
+  setup_atfork();
+}
+
+#endif
+
+
+
+void TickSample::PopulateContext(void* aContext)
+{
+  MOZ_ASSERT(aContext);
+  ucontext_t* pContext = reinterpret_cast<ucontext_t*>(aContext);
+  if (!getcontext(pContext)) {
+    context = pContext;
+    SetSampleContext(this, aContext);
+  }
+}
+
+void OS::SleepMicro(int microseconds)
+{
+  if (MOZ_UNLIKELY(microseconds >= 1000000)) {
+    // Use usleep for larger intervals, because the nanosleep
+    // code below only supports intervals < 1 second.
+    MOZ_ALWAYS_TRUE(!::usleep(microseconds));
+    return;
+  }
+
+  struct timespec ts;
+  ts.tv_sec  = 0;
+  ts.tv_nsec = microseconds * 1000UL;
+
+  int rv = ::nanosleep(&ts, &ts);
+
+  while (rv != 0 && errno == EINTR) {
+    // Keep waiting in case of interrupt.
+    // nanosleep puts the remaining time back into ts.
+    rv = ::nanosleep(&ts, &ts);
+  }
+
+  MOZ_ASSERT(!rv, "nanosleep call failed");
+}
diff --git a/tools/profiler/core/platform-macos.cc b/tools/profiler/core/platform-macos.cc
new file mode 100644
index 000000000..9a98d1a26
--- /dev/null
+++ b/tools/profiler/core/platform-macos.cc
@@ -0,0 +1,469 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <dlfcn.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <mach/mach_init.h>
+#include <mach-o/dyld.h>
+#include <mach-o/getsect.h>
+
+#include <AvailabilityMacros.h>
+
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <libkern/OSAtomic.h>
+#include <mach/mach.h>
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/vm_statistics.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <math.h>
+
+#ifndef SPS_STANDALONE
+#include "ThreadResponsiveness.h"
+#include "nsThreadUtils.h"
+
+// Memory profile
+#include "nsMemoryReporterManager.h"
+#endif
+
+#include "platform.h"
+#include "GeckoSampler.h"
+#include "mozilla/TimeStamp.h"
+
+using mozilla::TimeStamp;
+using mozilla::TimeDuration;
+
+// this port is based off of v8 svn revision 9837
+
+// XXX: this is a very stubbed out implementation
+// that only supports a single Sampler
+struct SamplerRegistry {
+  static void AddActiveSampler(Sampler *sampler) {
+    ASSERT(!SamplerRegistry::sampler);
+    SamplerRegistry::sampler = sampler;
+  }
+  static void RemoveActiveSampler(Sampler *sampler) {
+    SamplerRegistry::sampler = NULL;
+  }
+  static Sampler *sampler;
+};
+
+Sampler *SamplerRegistry::sampler = NULL;
+
+#ifdef DEBUG
+// 0 is never a valid thread id on MacOSX since a pthread_t is a pointer.
+static const pthread_t kNoThread = (pthread_t) 0;
+#endif
+
+void OS::Startup() {
+}
+
+void OS::Sleep(int milliseconds) {
+  usleep(1000 * milliseconds);
+}
+
+void OS::SleepMicro(int microseconds) {
+  usleep(microseconds);
+}
+
+Thread::Thread(const char* name)
+    : stack_size_(0) {
+  set_name(name);
+}
+
+
+Thread::~Thread() {
+}
+
+
+static void SetThreadName(const char* name) {
+  // pthread_setname_np is only available in 10.6 or later, so test
+  // for it at runtime.
+  int (*dynamic_pthread_setname_np)(const char*);
+  *reinterpret_cast<void**>(&dynamic_pthread_setname_np) =
+    dlsym(RTLD_DEFAULT, "pthread_setname_np");
+  if (!dynamic_pthread_setname_np)
+    return;
+
+  // Mac OS X does not expose the length limit of the name, so hardcode it.
+  static const int kMaxNameLength = 63;
+  USE(kMaxNameLength);
+  ASSERT(Thread::kMaxThreadNameLength <= kMaxNameLength);
+  dynamic_pthread_setname_np(name);
+}
+
+
+static void* ThreadEntry(void* arg) {
+  Thread* thread = reinterpret_cast<Thread*>(arg);
+
+  thread->thread_ = pthread_self();
+  SetThreadName(thread->name());
+  ASSERT(thread->thread_ != kNoThread);
+  thread->Run();
+  return NULL;
+}
+
+
+void Thread::set_name(const char* name) {
+  strncpy(name_, name, sizeof(name_));
+  name_[sizeof(name_) - 1] = '\0';
+}
+
+
+void Thread::Start() {
+  pthread_attr_t* attr_ptr = NULL;
+  pthread_attr_t attr;
+  if (stack_size_ > 0) {
+    pthread_attr_init(&attr);
+    pthread_attr_setstacksize(&attr, static_cast<size_t>(stack_size_));
+    attr_ptr = &attr;
+  }
+  pthread_create(&thread_, attr_ptr, ThreadEntry, this);
+  ASSERT(thread_ != kNoThread);
+}
+
+void Thread::Join() {
+  pthread_join(thread_, NULL);
+}
+
+class PlatformData {
+ public:
+  PlatformData() : profiled_thread_(mach_thread_self())
+  {
+    profiled_pthread_ = pthread_from_mach_thread_np(profiled_thread_);
+  }
+
+  ~PlatformData() {
+    // Deallocate Mach port for thread.
+    mach_port_deallocate(mach_task_self(), profiled_thread_);
+  }
+
+  thread_act_t profiled_thread() { return profiled_thread_; }
+  pthread_t profiled_pthread() { return profiled_pthread_; }
+
+ private:
+  // Note: for profiled_thread_ Mach primitives are used instead of PThread's
+  // because the latter doesn't provide thread manipulation primitives required.
+  // For details, consult "Mac OS X Internals" book, Section 7.3.
+  thread_act_t profiled_thread_;
+  // we also store the pthread because Mach threads have no concept of stack
+  // and we want to be able to get the stack size when we need to unwind the
+  // stack using frame pointers.
+  pthread_t profiled_pthread_;
+};
+
+/* static */ PlatformData*
+Sampler::AllocPlatformData(int aThreadId)
+{
+  return new PlatformData;
+}
+
+/* static */ void
+Sampler::FreePlatformData(PlatformData* aData)
+{
+  delete aData;
+}
+
+class SamplerThread : public Thread {
+ public:
+  explicit SamplerThread(double interval)
+      : Thread("SamplerThread")
+      , intervalMicro_(floor(interval * 1000 + 0.5))
+  {
+    if (intervalMicro_ <= 0) {
+      intervalMicro_ = 1;
+    }
+  }
+
+  static void AddActiveSampler(Sampler* sampler) {
+    SamplerRegistry::AddActiveSampler(sampler);
+    if (instance_ == NULL) {
+      instance_ = new SamplerThread(sampler->interval());
+      instance_->Start();
+    }
+  }
+
+  static void RemoveActiveSampler(Sampler* sampler) {
+    instance_->Join();
+    //XXX: unlike v8 we need to remove the active sampler after doing the Join
+    // because we drop the sampler immediately
+    SamplerRegistry::RemoveActiveSampler(sampler);
+    delete instance_;
+    instance_ = NULL;
+  }
+
+  // Implement Thread::Run().
+  virtual void Run() {
+    TimeDuration lastSleepOverhead = 0;
+    TimeStamp sampleStart = TimeStamp::Now();
+    while (SamplerRegistry::sampler->IsActive()) {
+      SamplerRegistry::sampler->DeleteExpiredMarkers();
+      if (!SamplerRegistry::sampler->IsPaused()) {
+        ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+        std::vector<ThreadInfo*> threads =
+          SamplerRegistry::sampler->GetRegisteredThreads();
+        bool isFirstProfiledThread = true;
+        for (uint32_t i = 0; i < threads.size(); i++) {
+          ThreadInfo* info = threads[i];
+
+          // This will be null if we're not interested in profiling this thread.
+          if (!info->Profile() || info->IsPendingDelete())
+            continue;
+
+          PseudoStack::SleepState sleeping = info->Stack()->observeSleeping();
+          if (sleeping == PseudoStack::SLEEPING_AGAIN) {
+            info->Profile()->DuplicateLastSample();
+            continue;
+          }
+
+#ifndef SPS_STANDALONE
+          info->Profile()->GetThreadResponsiveness()->Update();
+#endif
+
+          ThreadProfile* thread_profile = info->Profile();
+
+          SampleContext(SamplerRegistry::sampler, thread_profile,
+                        isFirstProfiledThread);
+          isFirstProfiledThread = false;
+        }
+      }
+
+      TimeStamp targetSleepEndTime = sampleStart + TimeDuration::FromMicroseconds(intervalMicro_);
+      TimeStamp beforeSleep = TimeStamp::Now();
+      TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
+      double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds());
+      OS::SleepMicro(sleepTime);
+      sampleStart = TimeStamp::Now();
+      lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
+    }
+  }
+
+  void SampleContext(Sampler* sampler, ThreadProfile* thread_profile,
+                     bool isFirstProfiledThread)
+  {
+    thread_act_t profiled_thread =
+      thread_profile->GetPlatformData()->profiled_thread();
+
+    TickSample sample_obj;
+    TickSample* sample = &sample_obj;
+
+    // Unique Set Size is not supported on Mac.
+    sample->ussMemory = 0;
+    sample->rssMemory = 0;
+
+#ifndef SPS_STANDALONE
+    if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) {
+      sample->rssMemory = nsMemoryReporterManager::ResidentFast();
+    }
+#endif
+
+    // We're using thread_suspend on OS X because pthread_kill (which is what
+    // we're using on Linux) has less consistent performance and causes
+    // strange crashes, see bug 1166778 and bug 1166808.
+
+    if (KERN_SUCCESS != thread_suspend(profiled_thread)) return;
+
+#if V8_HOST_ARCH_X64
+    thread_state_flavor_t flavor = x86_THREAD_STATE64;
+    x86_thread_state64_t state;
+    mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
+#if __DARWIN_UNIX03
+#define REGISTER_FIELD(name) __r ## name
+#else
+#define REGISTER_FIELD(name) r ## name
+#endif  // __DARWIN_UNIX03
+#elif V8_HOST_ARCH_IA32
+    thread_state_flavor_t flavor = i386_THREAD_STATE;
+    i386_thread_state_t state;
+    mach_msg_type_number_t count = i386_THREAD_STATE_COUNT;
+#if __DARWIN_UNIX03
+#define REGISTER_FIELD(name) __e ## name
+#else
+#define REGISTER_FIELD(name) e ## name
+#endif  // __DARWIN_UNIX03
+#else
+#error Unsupported Mac OS X host architecture.
+#endif  // V8_HOST_ARCH
+
+    if (thread_get_state(profiled_thread,
+                         flavor,
+                         reinterpret_cast<natural_t*>(&state),
+                         &count) == KERN_SUCCESS) {
+      sample->pc = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
+      sample->sp = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
+      sample->fp = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
+      sample->timestamp = mozilla::TimeStamp::Now();
+      sample->threadProfile = thread_profile;
+
+      sampler->Tick(sample);
+    }
+    thread_resume(profiled_thread);
+  }
+
+  int intervalMicro_;
+  //RuntimeProfilerRateLimiter rate_limiter_;
+
+  static SamplerThread* instance_;
+
+  DISALLOW_COPY_AND_ASSIGN(SamplerThread);
+};
+
+#undef REGISTER_FIELD
+
+SamplerThread* SamplerThread::instance_ = NULL;
+
+Sampler::Sampler(double interval, bool profiling, int entrySize)
+    : // isolate_(isolate),
+      interval_(interval),
+      profiling_(profiling),
+      paused_(false),
+      active_(false),
+      entrySize_(entrySize) /*,
+      samples_taken_(0)*/ {
+}
+
+
+Sampler::~Sampler() {
+  ASSERT(!IsActive());
+}
+
+
+void Sampler::Start() {
+  ASSERT(!IsActive());
+  SetActive(true);
+  SamplerThread::AddActiveSampler(this);
+}
+
+
+void Sampler::Stop() {
+  ASSERT(IsActive());
+  SetActive(false);
+  SamplerThread::RemoveActiveSampler(this);
+}
+
+pthread_t
+Sampler::GetProfiledThread(PlatformData* aData)
+{
+  return aData->profiled_pthread();
+}
+
+#include <sys/syscall.h>
+pid_t gettid()
+{
+  return (pid_t) syscall(SYS_thread_selfid);
+}
+
+/* static */ Thread::tid_t
+Thread::GetCurrentId()
+{
+  return gettid();
+}
+
+bool Sampler::RegisterCurrentThread(const char* aName,
+                                    PseudoStack* aPseudoStack,
+                                    bool aIsMainThread, void* stackTop)
+{
+  if (!Sampler::sRegisteredThreadsMutex)
+    return false;
+
+
+  ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  int id = gettid();
+  for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+    ThreadInfo* info = sRegisteredThreads->at(i);
+    if (info->ThreadId() == id && !info->IsPendingDelete()) {
+      // Thread already registered. This means the first unregister will be
+      // too early.
+      ASSERT(false);
+      return false;
+    }
+  }
+
+  set_tls_stack_top(stackTop);
+
+  ThreadInfo* info = new StackOwningThreadInfo(aName, id,
+    aIsMainThread, aPseudoStack, stackTop);
+
+  if (sActiveSampler) {
+    sActiveSampler->RegisterThread(info);
+  }
+
+  sRegisteredThreads->push_back(info);
+
+  return true;
+}
+
+void Sampler::UnregisterCurrentThread()
+{
+  if (!Sampler::sRegisteredThreadsMutex)
+    return;
+
+  tlsStackTop.set(nullptr);
+
+  ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  int id = gettid();
+
+  for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+    ThreadInfo* info = sRegisteredThreads->at(i);
+    if (info->ThreadId() == id && !info->IsPendingDelete()) {
+      if (profiler_is_active()) {
+        // We still want to show the results of this thread if you
+        // save the profile shortly after a thread is terminated.
+        // For now we will defer the delete to profile stop.
+        info->SetPendingDelete();
+        break;
+      } else {
+        delete info;
+        sRegisteredThreads->erase(sRegisteredThreads->begin() + i);
+        break;
+      }
+    }
+  }
+}
+
+void TickSample::PopulateContext(void* aContext)
+{
+  // Note that this asm changes if PopulateContext's parameter list is altered
+#if defined(SPS_PLAT_amd64_darwin)
+  asm (
+      // Compute caller's %rsp by adding to %rbp:
+      // 8 bytes for previous %rbp, 8 bytes for return address
+      "leaq 0x10(%%rbp), %0\n\t"
+      // Dereference %rbp to get previous %rbp
+      "movq (%%rbp), %1\n\t"
+      :
+      "=r"(sp),
+      "=r"(fp)
+  );
+#elif defined(SPS_PLAT_x86_darwin)
+  asm (
+      // Compute caller's %esp by adding to %ebp:
+      // 4 bytes for aContext + 4 bytes for return address +
+      // 4 bytes for previous %ebp
+      "leal 0xc(%%ebp), %0\n\t"
+      // Dereference %ebp to get previous %ebp
+      "movl (%%ebp), %1\n\t"
+      :
+      "=r"(sp),
+      "=r"(fp)
+  );
+#else
+# error "Unsupported architecture"
+#endif
+  pc = reinterpret_cast<Address>(__builtin_extract_return_addr(
+                                    __builtin_return_address(0)));
+}
+
diff --git a/tools/profiler/core/platform-win32.cc b/tools/profiler/core/platform-win32.cc
new file mode 100644
index 000000000..74b311f28
--- /dev/null
+++ b/tools/profiler/core/platform-win32.cc
@@ -0,0 +1,431 @@
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#include <windows.h>
+#include <mmsystem.h>
+#include <process.h>
+#include "platform.h"
+#include "GeckoSampler.h"
+#include "ThreadResponsiveness.h"
+#include "ProfileEntry.h"
+
+// Memory profile
+#include "nsMemoryReporterManager.h"
+
+#include "mozilla/StackWalk_windows.h"
+
+
+class PlatformData {
+ public:
+  // Get a handle to the calling thread. This is the thread that we are
+  // going to profile. We need to make a copy of the handle because we are
+  // going to use it in the sampler thread. Using GetThreadHandle() will
+  // not work in this case. We're using OpenThread because DuplicateHandle
+  // for some reason doesn't work in Chrome's sandbox.
+  PlatformData(int aThreadId) : profiled_thread_(OpenThread(THREAD_GET_CONTEXT |
+                                               THREAD_SUSPEND_RESUME |
+                                               THREAD_QUERY_INFORMATION,
+                                               false,
+                                               aThreadId)) {}
+
+  ~PlatformData() {
+    if (profiled_thread_ != NULL) {
+      CloseHandle(profiled_thread_);
+      profiled_thread_ = NULL;
+    }
+  }
+
+  HANDLE profiled_thread() { return profiled_thread_; }
+
+ private:
+  HANDLE profiled_thread_;
+};
+
+/* static */ PlatformData*
+Sampler::AllocPlatformData(int aThreadId)
+{
+  return new PlatformData(aThreadId);
+}
+
+/* static */ void
+Sampler::FreePlatformData(PlatformData* aData)
+{
+  delete aData;
+}
+
+uintptr_t
+Sampler::GetThreadHandle(PlatformData* aData)
+{
+  return (uintptr_t) aData->profiled_thread();
+}
+
+class SamplerThread : public Thread {
+ public:
+  SamplerThread(double interval, Sampler* sampler)
+      : Thread("SamplerThread")
+      , sampler_(sampler)
+      , interval_(interval)
+  {
+    interval_ = floor(interval + 0.5);
+    if (interval_ <= 0) {
+      interval_ = 1;
+    }
+  }
+
+  static void StartSampler(Sampler* sampler) {
+    if (instance_ == NULL) {
+      instance_ = new SamplerThread(sampler->interval(), sampler);
+      instance_->Start();
+    } else {
+      ASSERT(instance_->interval_ == sampler->interval());
+    }
+  }
+
+  static void StopSampler() {
+    instance_->Join();
+    delete instance_;
+    instance_ = NULL;
+  }
+
+  // Implement Thread::Run().
+  virtual void Run() {
+
+    // By default we'll not adjust the timer resolution which tends to be around
+    // 16ms. However, if the requested interval is sufficiently low we'll try to
+    // adjust the resolution to match.
+    if (interval_ < 10)
+        ::timeBeginPeriod(interval_);
+
+    while (sampler_->IsActive()) {
+      sampler_->DeleteExpiredMarkers();
+
+      if (!sampler_->IsPaused()) {
+        ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+        std::vector<ThreadInfo*> threads =
+          sampler_->GetRegisteredThreads();
+        bool isFirstProfiledThread = true;
+        for (uint32_t i = 0; i < threads.size(); i++) {
+          ThreadInfo* info = threads[i];
+
+          // This will be null if we're not interested in profiling this thread.
+          if (!info->Profile() || info->IsPendingDelete())
+            continue;
+
+          PseudoStack::SleepState sleeping = info->Stack()->observeSleeping();
+          if (sleeping == PseudoStack::SLEEPING_AGAIN) {
+            info->Profile()->DuplicateLastSample();
+            continue;
+          }
+
+          info->Profile()->GetThreadResponsiveness()->Update();
+
+          ThreadProfile* thread_profile = info->Profile();
+
+          SampleContext(sampler_, thread_profile, isFirstProfiledThread);
+          isFirstProfiledThread = false;
+        }
+      }
+      OS::Sleep(interval_);
+    }
+
+    // disable any timer resolution changes we've made
+    if (interval_ < 10)
+        ::timeEndPeriod(interval_);
+  }
+
+  void SampleContext(Sampler* sampler, ThreadProfile* thread_profile,
+                     bool isFirstProfiledThread)
+  {
+    uintptr_t thread = Sampler::GetThreadHandle(
+                               thread_profile->GetPlatformData());
+    HANDLE profiled_thread = reinterpret_cast<HANDLE>(thread);
+    if (profiled_thread == NULL)
+      return;
+
+    // Context used for sampling the register state of the profiled thread.
+    CONTEXT context;
+    memset(&context, 0, sizeof(context));
+
+    TickSample sample_obj;
+    TickSample* sample = &sample_obj;
+
+    // Grab the timestamp before pausing the thread, to avoid deadlocks.
+    sample->timestamp = mozilla::TimeStamp::Now();
+    sample->threadProfile = thread_profile;
+
+    if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) {
+      sample->rssMemory = nsMemoryReporterManager::ResidentFast();
+    } else {
+      sample->rssMemory = 0;
+    }
+
+    // Unique Set Size is not supported on Windows.
+    sample->ussMemory = 0;
+
+    static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
+    if (SuspendThread(profiled_thread) == kSuspendFailed)
+      return;
+
+    // SuspendThread is asynchronous, so the thread may still be running.
+    // Call GetThreadContext first to ensure the thread is really suspended.
+    // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+
+    // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
+    // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
+#if V8_HOST_ARCH_X64
+    context.ContextFlags = CONTEXT_FULL;
+#else
+    context.ContextFlags = CONTEXT_CONTROL;
+#endif
+    if (!GetThreadContext(profiled_thread, &context)) {
+      ResumeThread(profiled_thread);
+      return;
+    }
+
+    // Threads that may invoke JS require extra attention. Since, on windows,
+    // the jits also need to modify the same dynamic function table that we need
+    // to get a stack trace, we have to be wary of that to avoid deadlock.
+    //
+    // When embedded in Gecko, for threads that aren't the main thread,
+    // CanInvokeJS consults an unlocked value in the nsIThread, so we must
+    // consult this after suspending the profiled thread to avoid racing
+    // against a value change.
+    if (thread_profile->CanInvokeJS()) {
+      if (!TryAcquireStackWalkWorkaroundLock()) {
+        ResumeThread(profiled_thread);
+        return;
+      }
+
+      // It is safe to immediately drop the lock. We only need to contend with
+      // the case in which the profiled thread held needed system resources.
+      // If the profiled thread had held those resources, the trylock would have
+      // failed. Anyone else who grabs those resources will continue to make
+      // progress, since those threads are not suspended. Because of this,
+      // we cannot deadlock with them, and should let them run as they please.
+      ReleaseStackWalkWorkaroundLock();
+    }
+
+#if V8_HOST_ARCH_X64
+    sample->pc = reinterpret_cast<Address>(context.Rip);
+    sample->sp = reinterpret_cast<Address>(context.Rsp);
+    sample->fp = reinterpret_cast<Address>(context.Rbp);
+#else
+    sample->pc = reinterpret_cast<Address>(context.Eip);
+    sample->sp = reinterpret_cast<Address>(context.Esp);
+    sample->fp = reinterpret_cast<Address>(context.Ebp);
+#endif
+
+    sample->context = &context;
+    sampler->Tick(sample);
+
+    ResumeThread(profiled_thread);
+  }
+
+  Sampler* sampler_;
+  int interval_; // units: ms
+
+  // Protects the process wide state below.
+  static SamplerThread* instance_;
+
+  DISALLOW_COPY_AND_ASSIGN(SamplerThread);
+};
+
+SamplerThread* SamplerThread::instance_ = NULL;
+
+
+Sampler::Sampler(double interval, bool profiling, int entrySize)
+    : interval_(interval),
+      profiling_(profiling),
+      paused_(false),
+      active_(false),
+      entrySize_(entrySize) {
+}
+
+Sampler::~Sampler() {
+  ASSERT(!IsActive());
+}
+
+void Sampler::Start() {
+  ASSERT(!IsActive());
+  SetActive(true);
+  SamplerThread::StartSampler(this);
+}
+
+void Sampler::Stop() {
+  ASSERT(IsActive());
+  SetActive(false);
+  SamplerThread::StopSampler();
+}
+
+
+static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
+
+static unsigned int __stdcall ThreadEntry(void* arg) {
+  Thread* thread = reinterpret_cast<Thread*>(arg);
+  thread->Run();
+  return 0;
+}
+
+// Initialize a Win32 thread object. The thread has an invalid thread
+// handle until it is started.
+Thread::Thread(const char* name)
+    : stack_size_(0) {
+  thread_ = kNoThread;
+  set_name(name);
+}
+
+void Thread::set_name(const char* name) {
+  strncpy(name_, name, sizeof(name_));
+  name_[sizeof(name_) - 1] = '\0';
+}
+
+// Close our own handle for the thread.
+Thread::~Thread() {
+  if (thread_ != kNoThread) CloseHandle(thread_);
+}
+
+// Create a new thread. It is important to use _beginthreadex() instead of
+// the Win32 function CreateThread(), because the CreateThread() does not
+// initialize thread specific structures in the C runtime library.
+void Thread::Start() {
+  thread_ = reinterpret_cast<HANDLE>(
+      _beginthreadex(NULL,
+                     static_cast<unsigned>(stack_size_),
+                     ThreadEntry,
+                     this,
+                     0,
+                     (unsigned int*) &thread_id_));
+}
+
+// Wait for thread to terminate.
+void Thread::Join() {
+  if (thread_id_ != GetCurrentId()) {
+    WaitForSingleObject(thread_, INFINITE);
+  }
+}
+
+/* static */ Thread::tid_t
+Thread::GetCurrentId()
+{
+  return GetCurrentThreadId();
+}
+
+void OS::Startup() {
+}
+
+void OS::Sleep(int milliseconds) {
+  ::Sleep(milliseconds);
+}
+
+bool Sampler::RegisterCurrentThread(const char* aName,
+                                    PseudoStack* aPseudoStack,
+                                    bool aIsMainThread, void* stackTop)
+{
+  if (!Sampler::sRegisteredThreadsMutex)
+    return false;
+
+
+  ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  int id = GetCurrentThreadId();
+
+  for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+    ThreadInfo* info = sRegisteredThreads->at(i);
+    if (info->ThreadId() == id && !info->IsPendingDelete()) {
+      // Thread already registered. This means the first unregister will be
+      // too early.
+      ASSERT(false);
+      return false;
+    }
+  }
+
+  set_tls_stack_top(stackTop);
+
+  ThreadInfo* info = new StackOwningThreadInfo(aName, id,
+    aIsMainThread, aPseudoStack, stackTop);
+
+  if (sActiveSampler) {
+    sActiveSampler->RegisterThread(info);
+  }
+
+  sRegisteredThreads->push_back(info);
+
+  return true;
+}
+
+void Sampler::UnregisterCurrentThread()
+{
+  if (!Sampler::sRegisteredThreadsMutex)
+    return;
+
+  tlsStackTop.set(nullptr);
+
+  ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  int id = GetCurrentThreadId();
+
+  for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+    ThreadInfo* info = sRegisteredThreads->at(i);
+    if (info->ThreadId() == id && !info->IsPendingDelete()) {
+      if (profiler_is_active()) {
+        // We still want to show the results of this thread if you
+        // save the profile shortly after a thread is terminated.
+        // For now we will defer the delete to profile stop.
+        info->SetPendingDelete();
+        break;
+      } else {
+        delete info;
+        sRegisteredThreads->erase(sRegisteredThreads->begin() + i);
+        break;
+      }
+    }
+  }
+}
+
+void TickSample::PopulateContext(void* aContext)
+{
+  MOZ_ASSERT(aContext);
+  CONTEXT* pContext = reinterpret_cast<CONTEXT*>(aContext);
+  context = pContext;
+  RtlCaptureContext(pContext);
+
+#if defined(SPS_PLAT_amd64_windows)
+
+  pc = reinterpret_cast<Address>(pContext->Rip);
+  sp = reinterpret_cast<Address>(pContext->Rsp);
+  fp = reinterpret_cast<Address>(pContext->Rbp);
+
+#elif defined(SPS_PLAT_x86_windows)
+
+  pc = reinterpret_cast<Address>(pContext->Eip);
+  sp = reinterpret_cast<Address>(pContext->Esp);
+  fp = reinterpret_cast<Address>(pContext->Ebp);
+
+#endif
+}
+
diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp
new file mode 100644
index 000000000..0d3cb1648
--- /dev/null
+++ b/tools/profiler/core/platform.cpp
@@ -0,0 +1,1266 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <ostream>
+#include <fstream>
+#include <sstream>
+#include <errno.h>
+
+#include "platform.h"
+#include "PlatformMacros.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/UniquePtr.h"
+#include "GeckoProfiler.h"
+#ifndef SPS_STANDALONE
+#include "ProfilerIOInterposeObserver.h"
+#include "mozilla/StaticPtr.h"
+#endif
+#include "mozilla/ThreadLocal.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Sprintf.h"
+#include "PseudoStack.h"
+#include "GeckoSampler.h"
+#ifndef SPS_STANDALONE
+#include "nsIObserverService.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsXULAppAPI.h"
+#include "nsProfilerStartParams.h"
+#include "mozilla/Services.h"
+#include "nsThreadUtils.h"
+#endif
+#include "ProfilerMarkers.h"
+
+#ifdef MOZ_TASK_TRACER
+#include "GeckoTaskTracer.h"
+#endif
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+  #include "FennecJNIWrappers.h"
+#endif
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+#include "FennecJNINatives.h"
+#endif
+
+#ifndef SPS_STANDALONE
+#if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux)
+# define USE_LUL_STACKWALK
+# include "lul/LulMain.h"
+# include "lul/platform-linux-lul.h"
+#endif
+#endif
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+class GeckoJavaSampler : public java::GeckoJavaSampler::Natives<GeckoJavaSampler>
+{
+private:
+  GeckoJavaSampler();
+
+public:
+  static double GetProfilerTime() {
+    if (!profiler_is_active()) {
+      return 0.0;
+    }
+    return profiler_time();
+  };
+};
+#endif
+
+MOZ_THREAD_LOCAL(PseudoStack *) tlsPseudoStack;
+MOZ_THREAD_LOCAL(GeckoSampler *) tlsTicker;
+MOZ_THREAD_LOCAL(void *) tlsStackTop;
+// We need to track whether we've been initialized otherwise
+// we end up using tlsStack without initializing it.
+// Because tlsStack is totally opaque to us we can't reuse
+// it as the flag itself.
+bool stack_key_initialized;
+
+mozilla::TimeStamp   sLastTracerEvent; // is raced on
+mozilla::TimeStamp   sStartTime;
+int         sFrameNumber = 0;
+int         sLastFrameNumber = 0;
+int         sInitCount = 0; // Each init must have a matched shutdown.
+static bool sIsProfiling = false; // is raced on
+static bool sIsGPUProfiling = false; // is raced on
+static bool sIsLayersDump = false; // is raced on
+static bool sIsDisplayListDump = false; // is raced on
+static bool sIsRestyleProfiling = false; // is raced on
+
+// Environment variables to control the profiler
+const char* PROFILER_HELP = "MOZ_PROFILER_HELP";
+const char* PROFILER_INTERVAL = "MOZ_PROFILER_INTERVAL";
+const char* PROFILER_ENTRIES = "MOZ_PROFILER_ENTRIES";
+const char* PROFILER_STACK = "MOZ_PROFILER_STACK_SCAN";
+const char* PROFILER_FEATURES = "MOZ_PROFILING_FEATURES";
+
+/* we don't need to worry about overflow because we only treat the
+ * case of them being the same as special. i.e. we only run into
+ * a problem if 2^32 events happen between samples that we need
+ * to know are associated with different events */
+
+// Values harvested from env vars, that control the profiler.
+static int sUnwindInterval;   /* in milliseconds */
+static int sUnwindStackScan;  /* max # of dubious frames allowed */
+static int sProfileEntries;   /* how many entries do we store? */
+
+std::vector<ThreadInfo*>* Sampler::sRegisteredThreads = nullptr;
+mozilla::UniquePtr< ::Mutex> Sampler::sRegisteredThreadsMutex;
+
+GeckoSampler* Sampler::sActiveSampler;
+
+#ifndef SPS_STANDALONE
+static mozilla::StaticAutoPtr<mozilla::ProfilerIOInterposeObserver>
+                                                            sInterposeObserver;
+#endif
+
+// The name that identifies the gecko thread for calls to
+// profiler_register_thread.
+static const char * gGeckoThreadName = "GeckoMain";
+
+void Sampler::Startup() {
+  sRegisteredThreads = new std::vector<ThreadInfo*>();
+  sRegisteredThreadsMutex = OS::CreateMutex("sRegisteredThreads mutex");
+
+  // We could create the sLUL object and read unwind info into it at
+  // this point.  That would match the lifetime implied by destruction
+  // of it in Sampler::Shutdown just below.  However, that gives a big
+  // delay on startup, even if no profiling is actually to be done.
+  // So, instead, sLUL is created on demand at the first call to
+  // Sampler::Start.
+}
+
+void Sampler::Shutdown() {
+  while (sRegisteredThreads->size() > 0) {
+    delete sRegisteredThreads->back();
+    sRegisteredThreads->pop_back();
+  }
+
+  sRegisteredThreadsMutex = nullptr;
+  delete sRegisteredThreads;
+
+  // UnregisterThread can be called after shutdown in XPCShell. Thus
+  // we need to point to null to ignore such a call after shutdown.
+  sRegisteredThreadsMutex = nullptr;
+  sRegisteredThreads = nullptr;
+
+#if defined(USE_LUL_STACKWALK)
+  // Delete the sLUL object, if it actually got created.
+  if (sLUL) {
+    delete sLUL;
+    sLUL = nullptr;
+  }
+#endif
+}
+
+StackOwningThreadInfo::StackOwningThreadInfo(const char* aName, int aThreadId,
+                                             bool aIsMainThread,
+                                             PseudoStack* aPseudoStack,
+                                             void* aStackTop)
+  : ThreadInfo(aName, aThreadId, aIsMainThread, aPseudoStack, aStackTop)
+{
+  aPseudoStack->ref();
+}
+
+StackOwningThreadInfo::~StackOwningThreadInfo()
+{
+  PseudoStack* stack = Stack();
+  if (stack) {
+    stack->deref();
+  }
+}
+
+void
+StackOwningThreadInfo::SetPendingDelete()
+{
+  PseudoStack* stack = Stack();
+  if (stack) {
+    stack->deref();
+  }
+  ThreadInfo::SetPendingDelete();
+}
+
+ProfilerMarker::ProfilerMarker(const char* aMarkerName,
+                               ProfilerMarkerPayload* aPayload,
+                               double aTime)
+  : mMarkerName(strdup(aMarkerName))
+  , mPayload(aPayload)
+  , mTime(aTime)
+{
+}
+
+ProfilerMarker::~ProfilerMarker() {
+  free(mMarkerName);
+  delete mPayload;
+}
+
+void
+ProfilerMarker::SetGeneration(uint32_t aGenID) {
+  mGenID = aGenID;
+}
+
+double
+ProfilerMarker::GetTime() const {
+  return mTime;
+}
+
+void ProfilerMarker::StreamJSON(SpliceableJSONWriter& aWriter,
+                                UniqueStacks& aUniqueStacks) const
+{
+  // Schema:
+  //   [name, time, data]
+
+  aWriter.StartArrayElement();
+  {
+    aUniqueStacks.mUniqueStrings.WriteElement(aWriter, GetMarkerName());
+    aWriter.DoubleElement(mTime);
+    // TODO: Store the callsite for this marker if available:
+    // if have location data
+    //   b.NameValue(marker, "location", ...);
+    if (mPayload) {
+      aWriter.StartObjectElement();
+      {
+          mPayload->StreamPayload(aWriter, aUniqueStacks);
+      }
+      aWriter.EndObject();
+    }
+  }
+  aWriter.EndArray();
+}
+
+/* Has MOZ_PROFILER_VERBOSE been set? */
+
+// Verbosity control for the profiler.  The aim is to check env var
+// MOZ_PROFILER_VERBOSE only once.  However, we may need to temporarily
+// override that so as to print the profiler's help message.  That's
+// what moz_profiler_set_verbosity is for.
+
+enum class ProfilerVerbosity : int8_t { UNCHECKED, NOTVERBOSE, VERBOSE };
+
+// Raced on, potentially
+static ProfilerVerbosity profiler_verbosity = ProfilerVerbosity::UNCHECKED;
+
+bool moz_profiler_verbose()
+{
+  if (profiler_verbosity == ProfilerVerbosity::UNCHECKED) {
+    if (getenv("MOZ_PROFILER_VERBOSE") != nullptr)
+      profiler_verbosity = ProfilerVerbosity::VERBOSE;
+    else
+      profiler_verbosity = ProfilerVerbosity::NOTVERBOSE;
+  }
+
+  return profiler_verbosity == ProfilerVerbosity::VERBOSE;
+}
+
+void moz_profiler_set_verbosity(ProfilerVerbosity pv)
+{
+   MOZ_ASSERT(pv == ProfilerVerbosity::UNCHECKED ||
+              pv == ProfilerVerbosity::VERBOSE);
+   profiler_verbosity = pv;
+}
+
+
+bool set_profiler_interval(const char* interval) {
+  if (interval) {
+    errno = 0;
+    long int n = strtol(interval, (char**)nullptr, 10);
+    if (errno == 0 && n >= 1 && n <= 1000) {
+      sUnwindInterval = n;
+      return true;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool set_profiler_entries(const char* entries) {
+  if (entries) {
+    errno = 0;
+    long int n = strtol(entries, (char**)nullptr, 10);
+    if (errno == 0 && n > 0) {
+      sProfileEntries = n;
+      return true;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool set_profiler_scan(const char* scanCount) {
+  if (scanCount) {
+    errno = 0;
+    long int n = strtol(scanCount, (char**)nullptr, 10);
+    if (errno == 0 && n >= 0 && n <= 100) {
+      sUnwindStackScan = n;
+      return true;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool is_native_unwinding_avail() {
+# if defined(HAVE_NATIVE_UNWIND)
+  return true;
+#else
+  return false;
+#endif
+}
+
+// Read env vars at startup, so as to set:
+//   sUnwindInterval, sProfileEntries, sUnwindStackScan.
+void read_profiler_env_vars()
+{
+  /* Set defaults */
+  sUnwindInterval = 0;  /* We'll have to look elsewhere */
+  sProfileEntries = 0;
+
+  const char* interval = getenv(PROFILER_INTERVAL);
+  const char* entries = getenv(PROFILER_ENTRIES);
+  const char* scanCount = getenv(PROFILER_STACK);
+
+  if (getenv(PROFILER_HELP)) {
+     // Enable verbose output
+     moz_profiler_set_verbosity(ProfilerVerbosity::VERBOSE);
+     profiler_usage();
+     // Now force the next enquiry of moz_profiler_verbose to re-query
+     // env var MOZ_PROFILER_VERBOSE.
+     moz_profiler_set_verbosity(ProfilerVerbosity::UNCHECKED);
+  }
+
+  if (!set_profiler_interval(interval) ||
+      !set_profiler_entries(entries) ||
+      !set_profiler_scan(scanCount)) {
+      profiler_usage();
+  } else {
+    LOG( "SPS:");
+    LOGF("SPS: Sampling interval = %d ms (zero means \"platform default\")",
+        (int)sUnwindInterval);
+    LOGF("SPS: Entry store size  = %d (zero means \"platform default\")",
+        (int)sProfileEntries);
+    LOGF("SPS: UnwindStackScan   = %d (max dubious frames per unwind).",
+        (int)sUnwindStackScan);
+    LOG( "SPS:");
+  }
+}
+
+void profiler_usage() {
+  LOG( "SPS: ");
+  LOG( "SPS: Environment variable usage:");
+  LOG( "SPS: ");
+  LOG( "SPS:   MOZ_PROFILER_HELP");
+  LOG( "SPS:   If set to any value, prints this message.");
+  LOG( "SPS: ");
+  LOG( "SPS:   MOZ_PROFILER_INTERVAL=<number>   (milliseconds, 1 to 1000)");
+  LOG( "SPS:   If unset, platform default is used.");
+  LOG( "SPS: ");
+  LOG( "SPS:   MOZ_PROFILER_ENTRIES=<number>    (count, minimum of 1)");
+  LOG( "SPS:   If unset, platform default is used.");
+  LOG( "SPS: ");
+  LOG( "SPS:   MOZ_PROFILER_VERBOSE");
+  LOG( "SPS:   If set to any value, increases verbosity (recommended).");
+  LOG( "SPS: ");
+  LOG( "SPS:   MOZ_PROFILER_STACK_SCAN=<number>   (default is zero)");
+  LOG( "SPS:   The number of dubious (stack-scanned) frames allowed");
+  LOG( "SPS: ");
+  LOG( "SPS:   MOZ_PROFILER_LUL_TEST");
+  LOG( "SPS:   If set to any value, runs LUL unit tests at startup of");
+  LOG( "SPS:   the unwinder thread, and prints a short summary of results.");
+  LOG( "SPS: ");
+  LOGF("SPS:   This platform %s native unwinding.",
+       is_native_unwinding_avail() ? "supports" : "does not support");
+  LOG( "SPS: ");
+
+  /* Re-set defaults */
+  sUnwindInterval   = 0;  /* We'll have to look elsewhere */
+  sProfileEntries   = 0;
+  sUnwindStackScan  = 0;
+
+  LOG( "SPS:");
+  LOGF("SPS: Sampling interval = %d ms (zero means \"platform default\")",
+       (int)sUnwindInterval);
+  LOGF("SPS: Entry store size  = %d (zero means \"platform default\")",
+       (int)sProfileEntries);
+  LOGF("SPS: UnwindStackScan   = %d (max dubious frames per unwind).",
+       (int)sUnwindStackScan);
+  LOG( "SPS:");
+
+  return;
+}
+
+void set_tls_stack_top(void* stackTop)
+{
+  // Round |stackTop| up to the end of the containing page.  We may
+  // as well do this -- there's no danger of a fault, and we might
+  // get a few more base-of-the-stack frames as a result.  This
+  // assumes that no target has a page size smaller than 4096.
+  uintptr_t stackTopR = (uintptr_t)stackTop;
+  if (stackTop) {
+    stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095;
+  }
+  tlsStackTop.set((void*)stackTopR);
+}
+
+bool is_main_thread_name(const char* aName) {
+  if (!aName) {
+    return false;
+  }
+  return strcmp(aName, gGeckoThreadName) == 0;
+}
+
+#ifndef SPS_STANDALONE
+#ifdef HAVE_VA_COPY
+#define VARARGS_ASSIGN(foo, bar)        VA_COPY(foo,bar)
+#elif defined(HAVE_VA_LIST_AS_ARRAY)
+#define VARARGS_ASSIGN(foo, bar)     foo[0] = bar[0]
+#else
+#define VARARGS_ASSIGN(foo, bar)     (foo) = (bar)
+#endif
+
+void
+mozilla_sampler_log(const char *fmt, va_list args)
+{
+  if (profiler_is_active()) {
+    // nsAutoCString AppendPrintf would be nicer but
+    // this is mozilla external code
+    char buf[2048];
+    va_list argsCpy;
+    VARARGS_ASSIGN(argsCpy, args);
+    int required = VsprintfLiteral(buf, fmt, argsCpy);
+    va_end(argsCpy);
+
+    if (required < 0) {
+      return; // silently drop for now
+    } else if (required < 2048) {
+      profiler_tracing("log", buf, TRACING_EVENT);
+    } else {
+      char* heapBuf = new char[required+1];
+      va_list argsCpy;
+      VARARGS_ASSIGN(argsCpy, args);
+      vsnprintf(heapBuf, required+1, fmt, argsCpy);
+      va_end(argsCpy);
+      // EVENT_BACKTRACE could be used to get a source
+      // for all log events. This could be a runtime
+      // flag later.
+      profiler_tracing("log", heapBuf, TRACING_EVENT);
+      delete[] heapBuf;
+    }
+  }
+}
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// BEGIN externally visible functions
+
+void mozilla_sampler_init(void* stackTop)
+{
+  sInitCount++;
+
+  if (stack_key_initialized)
+    return;
+
+#ifdef MOZ_TASK_TRACER
+  mozilla::tasktracer::InitTaskTracer();
+#endif
+
+#ifdef SPS_STANDALONE
+  mozilla::TimeStamp::Startup();
+#endif
+
+  LOG("BEGIN mozilla_sampler_init");
+  if (!tlsPseudoStack.init() || !tlsTicker.init() || !tlsStackTop.init()) {
+    LOG("Failed to init.");
+    return;
+  }
+  bool ignore;
+  sStartTime = mozilla::TimeStamp::ProcessCreation(ignore);
+
+  stack_key_initialized = true;
+
+  Sampler::Startup();
+
+  PseudoStack *stack = PseudoStack::create();
+  tlsPseudoStack.set(stack);
+
+  bool isMainThread = true;
+  Sampler::RegisterCurrentThread(isMainThread ?
+                                   gGeckoThreadName : "Application Thread",
+                                 stack, isMainThread, stackTop);
+
+  // Read interval settings from MOZ_PROFILER_INTERVAL and stack-scan
+  // threshhold from MOZ_PROFILER_STACK_SCAN.
+  read_profiler_env_vars();
+
+  // platform specific initialization
+  OS::Startup();
+
+#ifndef SPS_STANDALONE
+  set_stderr_callback(mozilla_sampler_log);
+#endif
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+  if (mozilla::jni::IsFennec()) {
+    GeckoJavaSampler::Init();
+  }
+#endif
+
+  // We can't open pref so we use an environment variable
+  // to know if we should trigger the profiler on startup
+  // NOTE: Default
+  const char *val = getenv("MOZ_PROFILER_STARTUP");
+  if (!val || !*val) {
+    return;
+  }
+
+  const char* features[] = {"js"
+                         , "leaf"
+                         , "threads"
+#if defined(XP_WIN) || defined(XP_MACOSX) \
+    || (defined(SPS_ARCH_arm) && defined(linux)) \
+    || defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux)
+                         , "stackwalk"
+#endif
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+                         , "java"
+#endif
+                         };
+
+  const char* threadFilters[] = { "GeckoMain", "Compositor" };
+
+  profiler_start(PROFILE_DEFAULT_ENTRY, PROFILE_DEFAULT_INTERVAL,
+                         features, MOZ_ARRAY_LENGTH(features),
+                         threadFilters, MOZ_ARRAY_LENGTH(threadFilters));
+  LOG("END   mozilla_sampler_init");
+}
+
+void mozilla_sampler_shutdown()
+{
+  sInitCount--;
+
+  if (sInitCount > 0)
+    return;
+
+  // Save the profile on shutdown if requested.
+  GeckoSampler *t = tlsTicker.get();
+  if (t) {
+    const char *val = getenv("MOZ_PROFILER_SHUTDOWN");
+    if (val) {
+      std::ofstream stream;
+      stream.open(val);
+      if (stream.is_open()) {
+        t->ToStreamAsJSON(stream);
+        stream.close();
+      }
+    }
+  }
+
+  profiler_stop();
+
+#ifndef SPS_STANDALONE
+  set_stderr_callback(nullptr);
+#endif
+
+  Sampler::Shutdown();
+
+#ifdef SPS_STANDALONE
+  mozilla::TimeStamp::Shutdown();
+#endif
+
+  PseudoStack *stack = tlsPseudoStack.get();
+  stack->deref();
+  tlsPseudoStack.set(nullptr);
+
+#ifdef MOZ_TASK_TRACER
+  mozilla::tasktracer::ShutdownTaskTracer();
+#endif
+}
+
+void mozilla_sampler_save()
+{
+  GeckoSampler *t = tlsTicker.get();
+  if (!t) {
+    return;
+  }
+
+  t->RequestSave();
+  // We're on the main thread already so we don't
+  // have to wait to handle the save request.
+  t->HandleSaveRequest();
+}
+
+mozilla::UniquePtr<char[]> mozilla_sampler_get_profile(double aSinceTime)
+{
+  GeckoSampler *t = tlsTicker.get();
+  if (!t) {
+    return nullptr;
+  }
+
+  return t->ToJSON(aSinceTime);
+}
+
+#ifndef SPS_STANDALONE
+JSObject *mozilla_sampler_get_profile_data(JSContext *aCx, double aSinceTime)
+{
+  GeckoSampler *t = tlsTicker.get();
+  if (!t) {
+    return nullptr;
+  }
+
+  return t->ToJSObject(aCx, aSinceTime);
+}
+
+void mozilla_sampler_get_profile_data_async(double aSinceTime,
+                                            mozilla::dom::Promise* aPromise)
+{
+  GeckoSampler *t = tlsTicker.get();
+  if (NS_WARN_IF(!t)) {
+    return;
+  }
+
+  t->ToJSObjectAsync(aSinceTime, aPromise);
+}
+
+void mozilla_sampler_get_profiler_start_params(int* aEntrySize,
+                                               double* aInterval,
+                                               mozilla::Vector<const char*>* aFilters,
+                                               mozilla::Vector<const char*>* aFeatures)
+{
+  if (NS_WARN_IF(!aEntrySize) || NS_WARN_IF(!aInterval) ||
+      NS_WARN_IF(!aFilters) || NS_WARN_IF(!aFeatures)) {
+    return;
+  }
+
+  GeckoSampler *t = tlsTicker.get();
+  if (NS_WARN_IF(!t)) {
+    return;
+  }
+
+  *aEntrySize = t->EntrySize();
+  *aInterval = t->interval();
+
+  const ThreadNameFilterList& threadNameFilterList = t->ThreadNameFilters();
+  MOZ_ALWAYS_TRUE(aFilters->resize(threadNameFilterList.length()));
+  for (uint32_t i = 0; i < threadNameFilterList.length(); ++i) {
+    (*aFilters)[i] = threadNameFilterList[i].c_str();
+  }
+
+  const FeatureList& featureList = t->Features();
+  MOZ_ALWAYS_TRUE(aFeatures->resize(featureList.length()));
+  for (size_t i = 0; i < featureList.length(); ++i) {
+    (*aFeatures)[i] = featureList[i].c_str();
+  }
+}
+
+void mozilla_sampler_get_gatherer(nsISupports** aRetVal)
+{
+  if (!aRetVal) {
+    return;
+  }
+
+  if (NS_WARN_IF(!profiler_is_active())) {
+    *aRetVal = nullptr;
+    return;
+  }
+
+  GeckoSampler *t = tlsTicker.get();
+  if (NS_WARN_IF(!t)) {
+    *aRetVal = nullptr;
+    return;
+  }
+
+  t->GetGatherer(aRetVal);
+}
+
+#endif
+
+void mozilla_sampler_save_profile_to_file(const char* aFilename)
+{
+  GeckoSampler *t = tlsTicker.get();
+  if (!t) {
+    return;
+  }
+
+  std::ofstream stream;
+  stream.open(aFilename);
+  if (stream.is_open()) {
+    t->ToStreamAsJSON(stream);
+    stream.close();
+    LOGF("Saved to %s", aFilename);
+  } else {
+    LOG("Fail to open profile log file.");
+  }
+}
+
+
+const char** mozilla_sampler_get_features()
+{
+  static const char* features[] = {
+#if defined(MOZ_PROFILING) && defined(HAVE_NATIVE_UNWIND)
+    // Walk the C++ stack.
+    "stackwalk",
+#endif
+#if defined(ENABLE_SPS_LEAF_DATA)
+    // Include the C++ leaf node if not stackwalking. DevTools
+    // profiler doesn't want the native addresses.
+    "leaf",
+#endif
+#if !defined(SPS_OS_windows)
+    // Use a seperate thread of walking the stack.
+    "unwinder",
+#endif
+    "java",
+    // Only record samples during periods of bad responsiveness
+    "jank",
+    // Tell the JS engine to emmit pseudostack entries in the
+    // pro/epilogue.
+    "js",
+    // GPU Profiling (may not be supported by the GL)
+    "gpu",
+    // Profile the registered secondary threads.
+    "threads",
+    // Do not include user-identifiable information
+    "privacy",
+    // Dump the layer tree with the textures.
+    "layersdump",
+    // Dump the display list with the textures.
+    "displaylistdump",
+    // Add main thread I/O to the profile
+    "mainthreadio",
+    // Add RSS collection
+    "memory",
+#ifdef MOZ_TASK_TRACER
+    // Start profiling with feature TaskTracer.
+    "tasktracer",
+#endif
+#if defined(XP_WIN)
+    // Add power collection
+    "power",
+#endif
+    nullptr
+  };
+
+  return features;
+}
+
+void mozilla_sampler_get_buffer_info(uint32_t *aCurrentPosition, uint32_t *aTotalSize,
+                                     uint32_t *aGeneration)
+{
+  *aCurrentPosition = 0;
+  *aTotalSize = 0;
+  *aGeneration = 0;
+
+  if (!stack_key_initialized)
+    return;
+
+  GeckoSampler *t = tlsTicker.get();
+  if (!t)
+    return;
+
+  t->GetBufferInfo(aCurrentPosition, aTotalSize, aGeneration);
+}
+
+// Values are only honored on the first start
+void mozilla_sampler_start(int aProfileEntries, double aInterval,
+                           const char** aFeatures, uint32_t aFeatureCount,
+                           const char** aThreadNameFilters, uint32_t aFilterCount)
+
+{
+  LOG("BEGIN mozilla_sampler_start");
+
+  if (!stack_key_initialized)
+    profiler_init(nullptr);
+
+  /* If the sampling interval was set using env vars, use that
+     in preference to anything else. */
+  if (sUnwindInterval > 0)
+    aInterval = sUnwindInterval;
+
+  /* If the entry count was set using env vars, use that, too: */
+  if (sProfileEntries > 0)
+    aProfileEntries = sProfileEntries;
+
+  // Reset the current state if the profiler is running
+  profiler_stop();
+
+  GeckoSampler* t;
+  t = new GeckoSampler(aInterval ? aInterval : PROFILE_DEFAULT_INTERVAL,
+                      aProfileEntries ? aProfileEntries : PROFILE_DEFAULT_ENTRY,
+                      aFeatures, aFeatureCount,
+                      aThreadNameFilters, aFilterCount);
+
+  tlsTicker.set(t);
+  t->Start();
+  if (t->ProfileJS() || t->InPrivacyMode()) {
+      ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+      std::vector<ThreadInfo*> threads = t->GetRegisteredThreads();
+
+      for (uint32_t i = 0; i < threads.size(); i++) {
+        ThreadInfo* info = threads[i];
+        if (info->IsPendingDelete()) {
+          continue;
+        }
+        ThreadProfile* thread_profile = info->Profile();
+        if (!thread_profile) {
+          continue;
+        }
+        thread_profile->GetPseudoStack()->reinitializeOnResume();
+#ifndef SPS_STANDALONE
+        if (t->ProfileJS()) {
+          thread_profile->GetPseudoStack()->enableJSSampling();
+        }
+        if (t->InPrivacyMode()) {
+          thread_profile->GetPseudoStack()->mPrivacyMode = true;
+        }
+#endif
+      }
+  }
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+  if (t->ProfileJava()) {
+    int javaInterval = aInterval;
+    // Java sampling doesn't accuratly keep up with 1ms sampling
+    if (javaInterval < 10) {
+      aInterval = 10;
+    }
+    java::GeckoJavaSampler::Start(javaInterval, 1000);
+  }
+#endif
+
+#ifndef SPS_STANDALONE
+  if (t->AddMainThreadIO()) {
+    if (!sInterposeObserver) {
+      // Lazily create IO interposer observer
+      sInterposeObserver = new mozilla::ProfilerIOInterposeObserver();
+    }
+    mozilla::IOInterposer::Register(mozilla::IOInterposeObserver::OpAll,
+                                    sInterposeObserver);
+  }
+#endif
+
+  sIsProfiling = true;
+#ifndef SPS_STANDALONE
+  sIsGPUProfiling = t->ProfileGPU();
+  sIsLayersDump = t->LayersDump();
+  sIsDisplayListDump = t->DisplayListDump();
+  sIsRestyleProfiling = t->ProfileRestyle();
+
+  if (Sampler::CanNotifyObservers()) {
+    nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+    if (os) {
+      nsTArray<nsCString> featuresArray;
+      nsTArray<nsCString> threadNameFiltersArray;
+
+      for (size_t i = 0; i < aFeatureCount; ++i) {
+        featuresArray.AppendElement(aFeatures[i]);
+      }
+
+      for (size_t i = 0; i < aFilterCount; ++i) {
+        threadNameFiltersArray.AppendElement(aThreadNameFilters[i]);
+      }
+
+      nsCOMPtr<nsIProfilerStartParams> params =
+        new nsProfilerStartParams(aProfileEntries, aInterval, featuresArray,
+                                  threadNameFiltersArray);
+
+      os->NotifyObservers(params, "profiler-started", nullptr);
+    }
+  }
+#endif
+
+  LOG("END   mozilla_sampler_start");
+}
+
+void mozilla_sampler_stop()
+{
+  LOG("BEGIN mozilla_sampler_stop");
+
+  if (!stack_key_initialized)
+    return;
+
+  GeckoSampler *t = tlsTicker.get();
+  if (!t) {
+    LOG("END   mozilla_sampler_stop-early");
+    return;
+  }
+
+  bool disableJS = t->ProfileJS();
+
+  t->Stop();
+  delete t;
+  tlsTicker.set(nullptr);
+
+#ifndef SPS_STANDALONE
+  if (disableJS) {
+    PseudoStack *stack = tlsPseudoStack.get();
+    ASSERT(stack != nullptr);
+    stack->disableJSSampling();
+  }
+
+  mozilla::IOInterposer::Unregister(mozilla::IOInterposeObserver::OpAll,
+                                    sInterposeObserver);
+  sInterposeObserver = nullptr;
+#endif
+
+  sIsProfiling = false;
+#ifndef SPS_STANDALONE
+  sIsGPUProfiling = false;
+  sIsLayersDump = false;
+  sIsDisplayListDump = false;
+  sIsRestyleProfiling = false;
+
+  if (Sampler::CanNotifyObservers()) {
+    nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+    if (os)
+      os->NotifyObservers(nullptr, "profiler-stopped", nullptr);
+  }
+#endif
+
+  LOG("END   mozilla_sampler_stop");
+}
+
+bool mozilla_sampler_is_paused() {
+  if (Sampler::GetActiveSampler()) {
+    return Sampler::GetActiveSampler()->IsPaused();
+  } else {
+    return false;
+  }
+}
+
+void mozilla_sampler_pause() {
+  if (Sampler::GetActiveSampler()) {
+    Sampler::GetActiveSampler()->SetPaused(true);
+#ifndef SPS_STANDALONE
+  if (Sampler::CanNotifyObservers()) {
+    nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+    if (os)
+      os->NotifyObservers(nullptr, "profiler-paused", nullptr);
+  }
+#endif
+  }
+}
+
+void mozilla_sampler_resume() {
+  if (Sampler::GetActiveSampler()) {
+    Sampler::GetActiveSampler()->SetPaused(false);
+#ifndef SPS_STANDALONE
+  if (Sampler::CanNotifyObservers()) {
+    nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+    if (os)
+      os->NotifyObservers(nullptr, "profiler-resumed", nullptr);
+  }
+#endif
+  }
+}
+
+bool mozilla_sampler_feature_active(const char* aName)
+{
+  if (!profiler_is_active()) {
+    return false;
+  }
+
+  if (strcmp(aName, "gpu") == 0) {
+    return sIsGPUProfiling;
+  }
+
+  if (strcmp(aName, "layersdump") == 0) {
+    return sIsLayersDump;
+  }
+
+  if (strcmp(aName, "displaylistdump") == 0) {
+    return sIsDisplayListDump;
+  }
+
+  if (strcmp(aName, "restyle") == 0) {
+    return sIsRestyleProfiling;
+  }
+
+  return false;
+}
+
+bool mozilla_sampler_is_active()
+{
+  return sIsProfiling;
+}
+
+void mozilla_sampler_responsiveness(const mozilla::TimeStamp& aTime)
+{
+  sLastTracerEvent = aTime;
+}
+
+void mozilla_sampler_frame_number(int frameNumber)
+{
+  sFrameNumber = frameNumber;
+}
+
+void mozilla_sampler_lock()
+{
+  profiler_stop();
+#ifndef SPS_STANDALONE
+  nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+  if (os)
+    os->NotifyObservers(nullptr, "profiler-locked", nullptr);
+#endif
+}
+
+void mozilla_sampler_unlock()
+{
+#ifndef SPS_STANDALONE
+  nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+  if (os)
+    os->NotifyObservers(nullptr, "profiler-unlocked", nullptr);
+#endif
+}
+
+bool mozilla_sampler_register_thread(const char* aName, void* aGuessStackTop)
+{
+  if (sInitCount == 0) {
+    return false;
+  }
+
+#if defined(MOZ_WIDGET_GONK) && !defined(MOZ_PROFILING)
+  // The only way to profile secondary threads on b2g
+  // is to build with profiling OR have the profiler
+  // running on startup.
+  if (!profiler_is_active()) {
+    return false;
+  }
+#endif
+
+  MOZ_ASSERT(tlsPseudoStack.get() == nullptr);
+  PseudoStack* stack = PseudoStack::create();
+  tlsPseudoStack.set(stack);
+  bool isMainThread = is_main_thread_name(aName);
+  void* stackTop = GetStackTop(aGuessStackTop);
+  return Sampler::RegisterCurrentThread(aName, stack, isMainThread, stackTop);
+}
+
+void mozilla_sampler_unregister_thread()
+{
+  // Don't check sInitCount count here -- we may be unregistering the
+  // thread after the sampler was shut down.
+  if (!stack_key_initialized) {
+    return;
+  }
+
+  PseudoStack *stack = tlsPseudoStack.get();
+  if (!stack) {
+    return;
+  }
+  stack->deref();
+  tlsPseudoStack.set(nullptr);
+
+  Sampler::UnregisterCurrentThread();
+}
+
+void mozilla_sampler_sleep_start() {
+    if (sInitCount == 0) {
+	return;
+    }
+
+    PseudoStack *stack = tlsPseudoStack.get();
+    if (stack == nullptr) {
+      return;
+    }
+    stack->setSleeping(1);
+}
+
+void mozilla_sampler_sleep_end() {
+    if (sInitCount == 0) {
+	return;
+    }
+
+    PseudoStack *stack = tlsPseudoStack.get();
+    if (stack == nullptr) {
+      return;
+    }
+    stack->setSleeping(0);
+}
+
+bool mozilla_sampler_is_sleeping() {
+  if (sInitCount == 0) {
+    return false;
+  }
+  PseudoStack *stack = tlsPseudoStack.get();
+  if (stack == nullptr) {
+    return false;
+  }
+  return stack->isSleeping();
+}
+
+double mozilla_sampler_time(const mozilla::TimeStamp& aTime)
+{
+  mozilla::TimeDuration delta = aTime - sStartTime;
+  return delta.ToMilliseconds();
+}
+
+double mozilla_sampler_time()
+{
+  return mozilla_sampler_time(mozilla::TimeStamp::Now());
+}
+
+ProfilerBacktrace* mozilla_sampler_get_backtrace()
+{
+  if (!stack_key_initialized)
+    return nullptr;
+
+  // Don't capture a stack if we're not profiling
+  if (!profiler_is_active()) {
+    return nullptr;
+  }
+
+  // Don't capture a stack if we don't want to include personal information
+  if (profiler_in_privacy_mode()) {
+    return nullptr;
+  }
+
+  GeckoSampler* t = tlsTicker.get();
+  if (!t) {
+    return nullptr;
+  }
+
+  return new ProfilerBacktrace(t->GetBacktrace());
+}
+
+void mozilla_sampler_free_backtrace(ProfilerBacktrace* aBacktrace)
+{
+  delete aBacktrace;
+}
+
+// Fill the output buffer with the following pattern:
+// "Lable 1" "\0" "Label 2" "\0" ... "Label N" "\0" "\0"
+// TODO: use the unwinder instead of pseudo stack.
+void mozilla_sampler_get_backtrace_noalloc(char *output, size_t outputSize)
+{
+  MOZ_ASSERT(outputSize >= 2);
+  char *bound = output + outputSize - 2;
+  output[0] = output[1] = '\0';
+  PseudoStack *pseudoStack = tlsPseudoStack.get();
+  if (!pseudoStack) {
+    return;
+  }
+
+  volatile StackEntry *pseudoFrames = pseudoStack->mStack;
+  uint32_t pseudoCount = pseudoStack->stackSize();
+
+  for (uint32_t i = 0; i < pseudoCount; i++) {
+    size_t len = strlen(pseudoFrames[i].label());
+    if (output + len >= bound)
+      break;
+    strcpy(output, pseudoFrames[i].label());
+    output += len;
+    *output++ = '\0';
+    *output = '\0';
+  }
+}
+
+void mozilla_sampler_tracing(const char* aCategory, const char* aInfo,
+                             TracingMetadata aMetaData)
+{
+  mozilla_sampler_add_marker(aInfo, new ProfilerMarkerTracing(aCategory, aMetaData));
+}
+
+void mozilla_sampler_tracing(const char* aCategory, const char* aInfo,
+                             ProfilerBacktrace* aCause,
+                             TracingMetadata aMetaData)
+{
+  mozilla_sampler_add_marker(aInfo, new ProfilerMarkerTracing(aCategory, aMetaData, aCause));
+}
+
+void mozilla_sampler_add_marker(const char *aMarker, ProfilerMarkerPayload *aPayload)
+{
+  // Note that aPayload may be allocated by the caller, so we need to make sure
+  // that we free it at some point.
+  mozilla::UniquePtr<ProfilerMarkerPayload> payload(aPayload);
+
+  if (!stack_key_initialized)
+    return;
+
+  // Don't insert a marker if we're not profiling to avoid
+  // the heap copy (malloc).
+  if (!profiler_is_active()) {
+    return;
+  }
+
+  // Don't add a marker if we don't want to include personal information
+  if (profiler_in_privacy_mode()) {
+    return;
+  }
+
+  PseudoStack *stack = tlsPseudoStack.get();
+  if (!stack) {
+    return;
+  }
+
+  mozilla::TimeStamp origin = (aPayload && !aPayload->GetStartTime().IsNull()) ?
+                     aPayload->GetStartTime() : mozilla::TimeStamp::Now();
+  mozilla::TimeDuration delta = origin - sStartTime;
+  stack->addMarker(aMarker, payload.release(), delta.ToMilliseconds());
+}
+
+#ifndef SPS_STANDALONE
+#include "mozilla/Mutex.h"
+
+class GeckoMutex : public ::Mutex {
+ public:
+  explicit GeckoMutex(const char* aDesc) :
+    mMutex(aDesc)
+  {}
+
+  virtual ~GeckoMutex() {}
+
+  virtual int Lock() {
+    mMutex.Lock();
+    return 0;
+  }
+
+  virtual int Unlock() {
+    mMutex.Unlock();
+    return 0;
+  }
+
+ private:
+  mozilla::Mutex mMutex;
+};
+
+mozilla::UniquePtr< ::Mutex> OS::CreateMutex(const char* aDesc) {
+  return mozilla::MakeUnique<GeckoMutex>(aDesc);
+}
+
+#else
+// Otherwise use c++11 Mutex
+#include <mutex>
+
+class OSXMutex : public ::Mutex {
+ public:
+  OSXMutex(const char* aDesc) :
+    mMutex()
+  {}
+
+  virtual ~OSXMutex() {}
+
+  virtual int Lock() {
+    mMutex.lock();
+    return 0;
+  }
+
+  virtual int Unlock() {
+    mMutex.unlock();
+    return 0;
+  }
+
+ private:
+  std::mutex mMutex;
+};
+
+mozilla::UniquePtr< ::Mutex> OS::CreateMutex(const char* aDesc) {
+  return mozilla::MakeUnique<GeckoMutex>(aDesc);
+}
+
+#endif
+
+// END externally visible functions
+////////////////////////////////////////////////////////////////////////
diff --git a/tools/profiler/core/platform.h b/tools/profiler/core/platform.h
new file mode 100644
index 000000000..2e736d97c
--- /dev/null
+++ b/tools/profiler/core/platform.h
@@ -0,0 +1,431 @@
+// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in
+//    the documentation and/or other materials provided with the
+//    distribution.
+//  * Neither the name of Google, Inc. nor the names of its contributors
+//    may be used to endorse or promote products derived from this
+//    software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+// SUCH DAMAGE.
+
+#ifndef TOOLS_PLATFORM_H_
+#define TOOLS_PLATFORM_H_
+
+#ifdef SPS_STANDALONE
+#define MOZ_COUNT_CTOR(name)
+#define MOZ_COUNT_DTOR(name)
+#endif
+
+#ifdef ANDROID
+#include <android/log.h>
+#else
+#define __android_log_print(a, ...)
+#endif
+
+#ifdef XP_UNIX
+#include <pthread.h>
+#endif
+
+#include <stdint.h>
+#include <math.h>
+#ifndef SPS_STANDALONE
+#include "MainThreadUtils.h"
+#include "mozilla/Mutex.h"
+#include "ThreadResponsiveness.h"
+#endif
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+#include "PlatformMacros.h"
+#include "v8-support.h"
+#include <vector>
+#include "StackTop.h"
+
+// We need a definition of gettid(), but Linux libc implementations don't
+// provide a wrapper for it (except for Bionic)
+#if defined(__linux__)
+#include <unistd.h>
+#if !defined(__BIONIC__)
+#include <sys/syscall.h>
+static inline pid_t gettid()
+{
+  return (pid_t) syscall(SYS_gettid);
+}
+#endif
+#endif
+
+#ifdef XP_WIN
+#include <windows.h>
+#endif
+
+#define ASSERT(a) MOZ_ASSERT(a)
+
+bool moz_profiler_verbose();
+
+#ifdef ANDROID
+# if defined(__arm__) || defined(__thumb__)
+#  define ENABLE_SPS_LEAF_DATA
+#  define ENABLE_ARM_LR_SAVING
+# endif
+# define LOG(text) \
+    do { if (moz_profiler_verbose()) \
+           __android_log_write(ANDROID_LOG_ERROR, "Profiler", text); \
+    } while (0)
+# define LOGF(format, ...) \
+    do { if (moz_profiler_verbose()) \
+           __android_log_print(ANDROID_LOG_ERROR, "Profiler", format, \
+                               __VA_ARGS__); \
+    } while (0)
+
+#else
+# define LOG(text) \
+    do { if (moz_profiler_verbose()) fprintf(stderr, "Profiler: %s\n", text); \
+    } while (0)
+# define LOGF(format, ...) \
+    do { if (moz_profiler_verbose()) fprintf(stderr, "Profiler: " format \
+                                             "\n", __VA_ARGS__);        \
+    } while (0)
+
+#endif
+
+#if defined(XP_MACOSX) || defined(XP_WIN) || defined(XP_LINUX)
+#define ENABLE_SPS_LEAF_DATA
+#endif
+
+typedef int32_t Atomic32;
+
+extern mozilla::TimeStamp sStartTime;
+
+typedef uint8_t* Address;
+
+// ----------------------------------------------------------------------------
+// Mutex
+//
+// Mutexes are used for serializing access to non-reentrant sections of code.
+// The implementations of mutex should allow for nested/recursive locking.
+
+class Mutex {
+ public:
+  virtual ~Mutex() {}
+
+  // Locks the given mutex. If the mutex is currently unlocked, it becomes
+  // locked and owned by the calling thread, and immediately. If the mutex
+  // is already locked by another thread, suspends the calling thread until
+  // the mutex is unlocked.
+  virtual int Lock() = 0;
+
+  // Unlocks the given mutex. The mutex is assumed to be locked and owned by
+  // the calling thread on entrance.
+  virtual int Unlock() = 0;
+};
+
+class MutexAutoLock {
+ public:
+  explicit MutexAutoLock(::Mutex& aMutex)
+    : mMutex(&aMutex)
+  {
+    mMutex->Lock();
+  }
+
+  ~MutexAutoLock() {
+    mMutex->Unlock();
+  }
+
+ private:
+  Mutex* mMutex;
+};
+
+// ----------------------------------------------------------------------------
+// OS
+//
+// This class has static methods for the different platform specific
+// functions. Add methods here to cope with differences between the
+// supported platforms.
+
+class OS {
+ public:
+
+  // Sleep for a number of milliseconds.
+  static void Sleep(const int milliseconds);
+
+  // Sleep for a number of microseconds.
+  static void SleepMicro(const int microseconds);
+
+  // Called on startup to initialize platform specific things
+  static void Startup();
+
+  static mozilla::UniquePtr< ::Mutex> CreateMutex(const char* aDesc);
+
+ private:
+  static const int msPerSecond = 1000;
+
+};
+
+
+
+
+// ----------------------------------------------------------------------------
+// Thread
+//
+// Thread objects are used for creating and running threads. When the start()
+// method is called the new thread starts running the run() method in the new
+// thread. The Thread object should not be deallocated before the thread has
+// terminated.
+
+class Thread {
+ public:
+  // Create new thread.
+  explicit Thread(const char* name);
+  virtual ~Thread();
+
+  // Start new thread by calling the Run() method in the new thread.
+  void Start();
+
+  void Join();
+
+  inline const char* name() const {
+    return name_;
+  }
+
+  // Abstract method for run handler.
+  virtual void Run() = 0;
+
+  // The thread name length is limited to 16 based on Linux's implementation of
+  // prctl().
+  static const int kMaxThreadNameLength = 16;
+
+#ifdef XP_WIN
+  HANDLE thread_;
+  typedef DWORD tid_t;
+  tid_t thread_id_;
+#else
+  typedef ::pid_t tid_t;
+#endif
+#if defined(XP_MACOSX)
+  pthread_t thread_;
+#endif
+
+  static tid_t GetCurrentId();
+
+ private:
+  void set_name(const char *name);
+
+  char name_[kMaxThreadNameLength];
+  int stack_size_;
+
+  DISALLOW_COPY_AND_ASSIGN(Thread);
+};
+
+// ----------------------------------------------------------------------------
+// HAVE_NATIVE_UNWIND
+//
+// Pseudo backtraces are available on all platforms.  Native
+// backtraces are available only on selected platforms.  Breakpad is
+// the only supported native unwinder.  HAVE_NATIVE_UNWIND is set at
+// build time to indicate whether native unwinding is possible on this
+// platform.
+
+#undef HAVE_NATIVE_UNWIND
+#if defined(MOZ_PROFILING) \
+    && (defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_arm_android) \
+        || (defined(MOZ_WIDGET_ANDROID) && defined(__arm__)) \
+        || defined(SPS_PLAT_x86_linux) \
+        || defined(SPS_OS_windows) \
+        || defined(SPS_OS_darwin))
+# define HAVE_NATIVE_UNWIND
+#endif
+
+/* Some values extracted at startup from environment variables, that
+   control the behaviour of the breakpad unwinder. */
+extern const char* PROFILER_INTERVAL;
+extern const char* PROFILER_ENTRIES;
+extern const char* PROFILER_STACK;
+extern const char* PROFILER_FEATURES;
+
+void read_profiler_env_vars();
+void profiler_usage();
+
+// Helper methods to expose modifying profiler behavior
+bool set_profiler_interval(const char*);
+bool set_profiler_entries(const char*);
+bool set_profiler_scan(const char*);
+bool is_native_unwinding_avail();
+
+void set_tls_stack_top(void* stackTop);
+
+// ----------------------------------------------------------------------------
+// Sampler
+//
+// A sampler periodically samples the state of the VM and optionally
+// (if used for profiling) the program counter and stack pointer for
+// the thread that created it.
+
+struct PseudoStack;
+class ThreadProfile;
+
+// TickSample captures the information collected for each sample.
+class TickSample {
+ public:
+  TickSample()
+      : pc(NULL)
+      , sp(NULL)
+      , fp(NULL)
+#ifdef ENABLE_ARM_LR_SAVING
+      , lr(NULL)
+#endif
+      , context(NULL)
+      , isSamplingCurrentThread(false)
+      , threadProfile(nullptr)
+      , rssMemory(0)
+      , ussMemory(0)
+  {}
+
+  void PopulateContext(void* aContext);
+
+  Address pc;  // Instruction pointer.
+  Address sp;  // Stack pointer.
+  Address fp;  // Frame pointer.
+#ifdef ENABLE_ARM_LR_SAVING
+  Address lr;  // ARM link register
+#endif
+  void*   context;   // The context from the signal handler, if available. On
+                     // Win32 this may contain the windows thread context.
+  bool    isSamplingCurrentThread;
+  ThreadProfile* threadProfile;
+  mozilla::TimeStamp timestamp;
+  int64_t rssMemory;
+  int64_t ussMemory;
+};
+
+class ThreadInfo;
+class PlatformData;
+class GeckoSampler;
+class SyncProfile;
+class Sampler {
+ public:
+  // Initialize sampler.
+  explicit Sampler(double interval, bool profiling, int entrySize);
+  virtual ~Sampler();
+
+  double interval() const { return interval_; }
+
+  // This method is called for each sampling period with the current
+  // program counter.
+  virtual void Tick(TickSample* sample) = 0;
+
+  // Immediately captures the calling thread's call stack and returns it.
+  virtual SyncProfile* GetBacktrace() = 0;
+
+  // Request a save from a signal handler
+  virtual void RequestSave() = 0;
+  // Process any outstanding request outside a signal handler.
+  virtual void HandleSaveRequest() = 0;
+  // Delete markers which are no longer part of the profile due to buffer wraparound.
+  virtual void DeleteExpiredMarkers() = 0;
+
+  // Start and stop sampler.
+  void Start();
+  void Stop();
+
+  // Is the sampler used for profiling?
+  bool IsProfiling() const { return profiling_; }
+
+  // Whether the sampler is running (that is, consumes resources).
+  bool IsActive() const { return active_; }
+
+  // Low overhead way to stop the sampler from ticking
+  bool IsPaused() const { return paused_; }
+  void SetPaused(bool value) { NoBarrier_Store(&paused_, value); }
+
+  virtual bool ProfileThreads() const = 0;
+
+  int EntrySize() { return entrySize_; }
+
+  // We can't new/delete the type safely without defining it
+  // (-Wdelete-incomplete). Use these Alloc/Free functions instead.
+  static PlatformData* AllocPlatformData(int aThreadId);
+  static void FreePlatformData(PlatformData*);
+
+  // If we move the backtracing code into the platform files we won't
+  // need to have these hacks
+#ifdef XP_WIN
+  // xxxehsan sucky hack :(
+  static uintptr_t GetThreadHandle(PlatformData*);
+#endif
+#ifdef XP_MACOSX
+  static pthread_t GetProfiledThread(PlatformData*);
+#endif
+
+  static std::vector<ThreadInfo*> GetRegisteredThreads() {
+    return *sRegisteredThreads;
+  }
+
+  static bool RegisterCurrentThread(const char* aName,
+                                    PseudoStack* aPseudoStack,
+                                    bool aIsMainThread, void* stackTop);
+  static void UnregisterCurrentThread();
+
+  static void Startup();
+  // Should only be called on shutdown
+  static void Shutdown();
+
+  static GeckoSampler* GetActiveSampler() { return sActiveSampler; }
+  static void SetActiveSampler(GeckoSampler* sampler) { sActiveSampler = sampler; }
+
+  static mozilla::UniquePtr<Mutex> sRegisteredThreadsMutex;
+
+  static bool CanNotifyObservers() {
+#ifdef MOZ_WIDGET_GONK
+    // We use profile.sh on b2g to manually select threads and options per process.
+    return false;
+#elif defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+    // Android ANR reporter uses the profiler off the main thread
+    return NS_IsMainThread();
+#else
+    MOZ_ASSERT(NS_IsMainThread());
+    return true;
+#endif
+  }
+
+ protected:
+  static std::vector<ThreadInfo*>* sRegisteredThreads;
+  static GeckoSampler* sActiveSampler;
+
+ private:
+  void SetActive(bool value) { NoBarrier_Store(&active_, value); }
+
+  const double interval_;
+  const bool profiling_;
+  Atomic32 paused_;
+  Atomic32 active_;
+  const int entrySize_;
+
+  // Refactor me!
+#if defined(SPS_OS_linux) || defined(SPS_OS_android)
+  bool signal_handler_installed_;
+  struct sigaction old_sigprof_signal_handler_;
+  struct sigaction old_sigsave_signal_handler_;
+  bool signal_sender_launched_;
+  pthread_t signal_sender_thread_;
+#endif
+};
+
+#endif /* ndef TOOLS_PLATFORM_H_ */
diff --git a/tools/profiler/core/shared-libraries-linux.cc b/tools/profiler/core/shared-libraries-linux.cc
new file mode 100644
index 000000000..24437fb4e
--- /dev/null
+++ b/tools/profiler/core/shared-libraries-linux.cc
@@ -0,0 +1,159 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "shared-libraries.h"
+
+#define PATH_MAX_TOSTRING(x) #x
+#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x)
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <fstream>
+#include "platform.h"
+#include "shared-libraries.h"
+
+#include "common/linux/file_id.h"
+#include <algorithm>
+
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
+
+// Get the breakpad Id for the binary file pointed by bin_name
+static std::string getId(const char *bin_name)
+{
+  using namespace google_breakpad;
+  using namespace std;
+
+  PageAllocator allocator;
+  auto_wasteful_vector<uint8_t, sizeof(MDGUID)> identifier(&allocator);
+
+  FileID file_id(bin_name);
+  if (file_id.ElfFileIdentifier(identifier)) {
+    return FileID::ConvertIdentifierToUUIDString(identifier) + "0";
+  }
+
+  return "";
+}
+
+#if !defined(MOZ_WIDGET_GONK)
+// TODO fix me with proper include
+#include "nsDebug.h"
+#ifdef ANDROID
+#include "ElfLoader.h" // dl_phdr_info
+#else
+#include <link.h> // dl_phdr_info
+#endif
+#include <features.h>
+#include <dlfcn.h>
+#include <sys/types.h>
+
+#ifdef ANDROID
+extern "C" MOZ_EXPORT __attribute__((weak))
+int dl_iterate_phdr(
+          int (*callback) (struct dl_phdr_info *info,
+                           size_t size, void *data),
+          void *data);
+#endif
+
+static int
+dl_iterate_callback(struct dl_phdr_info *dl_info, size_t size, void *data)
+{
+  SharedLibraryInfo& info = *reinterpret_cast<SharedLibraryInfo*>(data);
+
+  if (dl_info->dlpi_phnum <= 0)
+    return 0;
+
+  unsigned long libStart = -1;
+  unsigned long libEnd = 0;
+
+  for (size_t i = 0; i < dl_info->dlpi_phnum; i++) {
+    if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) {
+      continue;
+    }
+    unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr;
+    unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz;
+    if (start < libStart)
+      libStart = start;
+    if (end > libEnd)
+      libEnd = end;
+  }
+  const char *name = dl_info->dlpi_name;
+  SharedLibrary shlib(libStart, libEnd, 0, getId(name), name);
+  info.AddSharedLibrary(shlib);
+
+  return 0;
+}
+
+#endif // !MOZ_WIDGET_GONK
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf()
+{
+  SharedLibraryInfo info;
+
+#if !defined(MOZ_WIDGET_GONK)
+#ifdef ANDROID
+  if (!dl_iterate_phdr) {
+    // On ARM Android, dl_iterate_phdr is provided by the custom linker.
+    // So if libxul was loaded by the system linker (e.g. as part of
+    // xpcshell when running tests), it won't be available and we should
+    // not call it.
+    return info;
+  }
+#endif // ANDROID
+
+  dl_iterate_phdr(dl_iterate_callback, &info);
+#endif // !MOZ_WIDGET_GONK
+
+#if defined(ANDROID) || defined(MOZ_WIDGET_GONK)
+  pid_t pid = getpid();
+  char path[PATH_MAX];
+  snprintf(path, PATH_MAX, "/proc/%d/maps", pid);
+  std::ifstream maps(path);
+  std::string line;
+  int count = 0;
+  while (std::getline(maps, line)) {
+    int ret;
+    //XXX: needs input sanitizing
+    unsigned long start;
+    unsigned long end;
+    char perm[6] = "";
+    unsigned long offset;
+    char name[PATH_MAX] = "";
+    ret = sscanf(line.c_str(),
+                 "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n",
+                 &start, &end, perm, &offset, name);
+    if (!strchr(perm, 'x')) {
+      // Ignore non executable entries
+      continue;
+    }
+    if (ret != 5 && ret != 4) {
+      LOG("Get maps line failed");
+      continue;
+    }
+#if defined(ANDROID) && !defined(MOZ_WIDGET_GONK)
+    // Use proc/pid/maps to get the dalvik-jit section since it has
+    // no associated phdrs
+    if (strcmp(name, "/dev/ashmem/dalvik-jit-code-cache") != 0)
+      continue;
+#else
+    if (strcmp(perm, "r-xp") != 0) {
+      // Ignore entries that are writable and/or shared.
+      // At least one graphics driver uses short-lived "rwxs" mappings
+      // (see bug 926734 comment 5), so just checking for 'x' isn't enough.
+      continue;
+    }
+#endif
+    SharedLibrary shlib(start, end, offset, getId(name), name);
+    info.AddSharedLibrary(shlib);
+    if (count > 10000) {
+      LOG("Get maps failed");
+      break;
+    }
+    count++;
+  }
+#endif // ANDROID || MOZ_WIDGET_GONK
+
+  return info;
+}
diff --git a/tools/profiler/core/shared-libraries-macos.cc b/tools/profiler/core/shared-libraries-macos.cc
new file mode 100644
index 000000000..e218d2280
--- /dev/null
+++ b/tools/profiler/core/shared-libraries-macos.cc
@@ -0,0 +1,132 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <AvailabilityMacros.h>
+#include <mach-o/loader.h>
+#include <mach-o/dyld_images.h>
+#include <mach/task_info.h>
+#include <mach/task.h>
+#include <mach/mach_init.h>
+#include <mach/mach_traps.h>
+#include <string.h>
+#include <stdlib.h>
+#include <vector>
+#include <sstream>
+
+#include "shared-libraries.h"
+
+#ifndef MAC_OS_X_VERSION_10_6
+#define MAC_OS_X_VERSION_10_6 1060
+#endif
+
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6
+// borrowed from Breakpad
+// Fallback declarations for TASK_DYLD_INFO and friends, introduced in
+// <mach/task_info.h> in the Mac OS X 10.6 SDK.
+#define TASK_DYLD_INFO 17
+struct task_dyld_info {
+    mach_vm_address_t all_image_info_addr;
+    mach_vm_size_t all_image_info_size;
+  };
+typedef struct task_dyld_info task_dyld_info_data_t;
+typedef struct task_dyld_info *task_dyld_info_t;
+#define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t))
+
+#endif
+
+// Architecture specific abstraction.
+#ifdef __i386__
+typedef mach_header platform_mach_header;
+typedef segment_command mach_segment_command_type;
+#define MACHO_MAGIC_NUMBER MH_MAGIC
+#define CMD_SEGMENT LC_SEGMENT
+#define seg_size uint32_t
+#else
+typedef mach_header_64 platform_mach_header;
+typedef segment_command_64 mach_segment_command_type;
+#define MACHO_MAGIC_NUMBER MH_MAGIC_64
+#define CMD_SEGMENT LC_SEGMENT_64
+#define seg_size uint64_t
+#endif
+
+static
+void addSharedLibrary(const platform_mach_header* header, char *name, SharedLibraryInfo &info) {
+  const struct load_command *cmd =
+    reinterpret_cast<const struct load_command *>(header + 1);
+
+  seg_size size = 0;
+  unsigned long long start = reinterpret_cast<unsigned long long>(header);
+  // Find the cmd segment in the macho image. It will contain the offset we care about.
+  const uint8_t *uuid_bytes = nullptr;
+  for (unsigned int i = 0;
+       cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0);
+       ++i) {
+    if (cmd->cmd == CMD_SEGMENT) {
+      const mach_segment_command_type *seg =
+        reinterpret_cast<const mach_segment_command_type *>(cmd);
+
+      if (!strcmp(seg->segname, "__TEXT")) {
+        size = seg->vmsize;
+      }
+    } else if (cmd->cmd == LC_UUID) {
+      const uuid_command *ucmd = reinterpret_cast<const uuid_command *>(cmd);
+      uuid_bytes = ucmd->uuid;
+    }
+
+    cmd = reinterpret_cast<const struct load_command *>
+      (reinterpret_cast<const char *>(cmd) + cmd->cmdsize);
+  }
+
+  std::stringstream uuid;
+  uuid << std::hex << std::uppercase;
+  if (uuid_bytes != nullptr) {
+    for (int i = 0; i < 16; ++i) {
+      uuid << ((uuid_bytes[i] & 0xf0) >> 4);
+      uuid << (uuid_bytes[i] & 0xf);
+    }
+    uuid << '0';
+  }
+
+  info.AddSharedLibrary(SharedLibrary(start, start + size, 0, uuid.str(),
+                                      name));
+}
+
+// Use dyld to inspect the macho image information. We can build the SharedLibraryEntry structure
+// giving us roughtly the same info as /proc/PID/maps in Linux.
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf()
+{
+  SharedLibraryInfo sharedLibraryInfo;
+
+  task_dyld_info_data_t task_dyld_info;
+  mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
+  if (task_info(mach_task_self (), TASK_DYLD_INFO, (task_info_t)&task_dyld_info,
+                &count) != KERN_SUCCESS) {
+    return sharedLibraryInfo;
+  }
+
+  struct dyld_all_image_infos* aii = (struct dyld_all_image_infos*)task_dyld_info.all_image_info_addr;
+  size_t infoCount = aii->infoArrayCount;
+
+  // Iterate through all dyld images (loaded libraries) to get their names
+  // and offests.
+  for (size_t i = 0; i < infoCount; ++i) {
+    const dyld_image_info *info = &aii->infoArray[i];
+
+    // If the magic number doesn't match then go no further
+    // since we're not pointing to where we think we are.
+    if (info->imageLoadAddress->magic != MACHO_MAGIC_NUMBER) {
+      continue;
+    }
+
+    const platform_mach_header* header =
+      reinterpret_cast<const platform_mach_header*>(info->imageLoadAddress);
+
+    // Add the entry for this image.
+    addSharedLibrary(header, (char*)info->imageFilePath, sharedLibraryInfo);
+
+  }
+  return sharedLibraryInfo;
+}
+
diff --git a/tools/profiler/core/shared-libraries-win32.cc b/tools/profiler/core/shared-libraries-win32.cc
new file mode 100644
index 000000000..e2db2579b
--- /dev/null
+++ b/tools/profiler/core/shared-libraries-win32.cc
@@ -0,0 +1,137 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <windows.h>
+#include <tlhelp32.h>
+#include <dbghelp.h>
+#include <sstream>
+
+#include "shared-libraries.h"
+#include "nsWindowsHelpers.h"
+
+#define CV_SIGNATURE 0x53445352 // 'SDSR'
+
+struct CodeViewRecord70
+{
+  uint32_t signature;
+  GUID pdbSignature;
+  uint32_t pdbAge;
+  char pdbFileName[1];
+};
+
+static bool GetPdbInfo(uintptr_t aStart, nsID& aSignature, uint32_t& aAge, char** aPdbName)
+{
+  if (!aStart) {
+    return false;
+  }
+
+  PIMAGE_DOS_HEADER dosHeader = reinterpret_cast<PIMAGE_DOS_HEADER>(aStart);
+  if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE) {
+    return false;
+  }
+
+  PIMAGE_NT_HEADERS ntHeaders = reinterpret_cast<PIMAGE_NT_HEADERS>(
+      aStart + dosHeader->e_lfanew);
+  if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) {
+    return false;
+  }
+
+  uint32_t relativeVirtualAddress =
+    ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].VirtualAddress;
+  if (!relativeVirtualAddress) {
+    return false;
+  }
+
+  PIMAGE_DEBUG_DIRECTORY debugDirectory =
+    reinterpret_cast<PIMAGE_DEBUG_DIRECTORY>(aStart + relativeVirtualAddress);
+  if (!debugDirectory || debugDirectory->Type != IMAGE_DEBUG_TYPE_CODEVIEW) {
+    return false;
+  }
+
+  CodeViewRecord70 *debugInfo = reinterpret_cast<CodeViewRecord70 *>(
+      aStart + debugDirectory->AddressOfRawData);
+  if (!debugInfo || debugInfo->signature != CV_SIGNATURE) {
+    return false;
+  }
+
+  aAge = debugInfo->pdbAge;
+  GUID& pdbSignature = debugInfo->pdbSignature;
+  aSignature.m0 = pdbSignature.Data1;
+  aSignature.m1 = pdbSignature.Data2;
+  aSignature.m2 = pdbSignature.Data3;
+  memcpy(aSignature.m3, pdbSignature.Data4, sizeof(pdbSignature.Data4));
+
+  // The PDB file name could be different from module filename, so report both
+  // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb
+  char * leafName = strrchr(debugInfo->pdbFileName, '\\');
+  if (leafName) {
+    // Only report the file portion of the path
+    *aPdbName = leafName + 1;
+  } else {
+    *aPdbName = debugInfo->pdbFileName;
+  }
+
+  return true;
+}
+
+static bool IsDashOrBraces(char c)
+{
+  return c == '-' || c == '{' || c == '}';
+}
+
+SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf()
+{
+  SharedLibraryInfo sharedLibraryInfo;
+
+  nsAutoHandle snap(CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId()));
+
+  MODULEENTRY32 module = {0};
+  module.dwSize = sizeof(MODULEENTRY32);
+  if (Module32First(snap, &module)) {
+    do {
+      nsID pdbSig;
+      uint32_t pdbAge;
+      char *pdbName = NULL;
+
+      // Load the module again to make sure that its handle will remain remain
+      // valid as we attempt to read the PDB information from it.  We load the
+      // DLL as a datafile so that if the module actually gets unloaded between
+      // the call to Module32Next and the following LoadLibraryEx, we don't end
+      // up running the now newly loaded module's DllMain function.  If the
+      // module is already loaded, LoadLibraryEx just increments its refcount.
+      //
+      // Note that because of the race condition above, merely loading the DLL
+      // again is not safe enough, therefore we also need to make sure that we
+      // can read the memory mapped at the base address before we can safely
+      // proceed to actually access those pages.
+      HMODULE handleLock = LoadLibraryEx(module.szExePath, NULL, LOAD_LIBRARY_AS_DATAFILE);
+      MEMORY_BASIC_INFORMATION vmemInfo = {0};
+      if (handleLock &&
+          sizeof(vmemInfo) == VirtualQuery(module.modBaseAddr, &vmemInfo, sizeof(vmemInfo)) &&
+          vmemInfo.State == MEM_COMMIT &&
+          GetPdbInfo((uintptr_t)module.modBaseAddr, pdbSig, pdbAge, &pdbName)) {
+        std::ostringstream stream;
+        stream << pdbSig.ToString() << std::hex << pdbAge;
+        std::string breakpadId = stream.str();
+        std::string::iterator end =
+          std::remove_if(breakpadId.begin(), breakpadId.end(), IsDashOrBraces);
+        breakpadId.erase(end, breakpadId.end());
+        std::transform(breakpadId.begin(), breakpadId.end(),
+                       breakpadId.begin(), toupper);
+
+        SharedLibrary shlib((uintptr_t)module.modBaseAddr,
+                            (uintptr_t)module.modBaseAddr+module.modBaseSize,
+                            0, // DLLs are always mapped at offset 0 on Windows
+                            breakpadId,
+                            pdbName);
+        sharedLibraryInfo.AddSharedLibrary(shlib);
+      }
+      FreeLibrary(handleLock); // ok to free null handles
+    } while (Module32Next(snap, &module));
+  }
+
+  return sharedLibraryInfo;
+}
+
diff --git a/tools/profiler/core/v8-support.h b/tools/profiler/core/v8-support.h
new file mode 100644
index 000000000..391069dcc
--- /dev/null
+++ b/tools/profiler/core/v8-support.h
@@ -0,0 +1,48 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This contains stubs and infrastructure to support code from v8 */
+
+#ifndef V8_SUPPORT_H_
+#define V8_SUPPORT_H_
+
+#if defined(_M_X64) || defined(__x86_64__)
+#define V8_HOST_ARCH_X64 1
+#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
+#define V8_HOST_ARCH_IA32 1
+#elif defined(__ARMEL__)
+#define V8_HOST_ARCH_ARM 1
+#else
+#warning Please add support for your architecture in chromium_types.h
+#endif
+
+typedef int32_t Atomic32;
+
+#if defined(V8_HOST_ARCH_X64) || defined(V8_HOST_ARCH_IA32) || defined(V8_HOST_ARCH_ARM)
+inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
+  *ptr = value;
+}
+#endif
+
+
+const int kMaxInt = 0x7FFFFFFF;
+const int kMinInt = -kMaxInt - 1;
+
+// A macro to disallow the evil copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define DISALLOW_COPY_AND_ASSIGN(TypeName)      \
+  TypeName(const TypeName&);                    \
+  void operator=(const TypeName&)
+
+
+// The USE(x) template is used to silence C++ compiler warnings
+// issued for (yet) unused variables (typically parameters).
+template <typename T>
+static inline void USE(T) { }
+
+class Malloced {
+};
+
+#endif // V8_SUPPORT_H_
diff --git a/tools/profiler/gecko/ProfileGatherer.cpp b/tools/profiler/gecko/ProfileGatherer.cpp
new file mode 100644
index 000000000..5cd45bee3
--- /dev/null
+++ b/tools/profiler/gecko/ProfileGatherer.cpp
@@ -0,0 +1,207 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ProfileGatherer.h"
+#include "mozilla/Services.h"
+#include "nsIObserverService.h"
+#include "GeckoSampler.h"
+
+using mozilla::dom::AutoJSAPI;
+using mozilla::dom::Promise;
+
+namespace mozilla {
+
+/**
+ * When a subprocess exits before we've gathered profiles, we'll
+ * store profiles for those processes until gathering starts. We'll
+ * only store up to MAX_SUBPROCESS_EXIT_PROFILES. The buffer is
+ * circular, so as soon as we receive another exit profile, we'll
+ * bump the oldest one out of the buffer.
+ */
+static const uint32_t MAX_SUBPROCESS_EXIT_PROFILES = 5;
+
+NS_IMPL_ISUPPORTS(ProfileGatherer, nsIObserver)
+
+ProfileGatherer::ProfileGatherer(GeckoSampler* aTicker)
+  : mTicker(aTicker)
+  , mSinceTime(0)
+  , mPendingProfiles(0)
+  , mGathering(false)
+{
+}
+
+void
+ProfileGatherer::GatheredOOPProfile()
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!mGathering) {
+    // If we're not actively gathering, then we don't actually
+    // care that we gathered a profile here. This can happen for
+    // processes that exit while profiling.
+    return;
+  }
+
+  if (NS_WARN_IF(!mPromise)) {
+    // If we're not holding on to a Promise, then someone is
+    // calling us erroneously.
+    return;
+  }
+
+  mPendingProfiles--;
+
+  if (mPendingProfiles == 0) {
+    // We've got all of the async profiles now. Let's
+    // finish off the profile and resolve the Promise.
+    Finish();
+  }
+}
+
+void
+ProfileGatherer::WillGatherOOPProfile()
+{
+  mPendingProfiles++;
+}
+
+void
+ProfileGatherer::Start(double aSinceTime,
+                       Promise* aPromise)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  if (mGathering) {
+    // If we're already gathering, reject the promise - this isn't going
+    // to end well.
+    if (aPromise) {
+      aPromise->MaybeReject(NS_ERROR_NOT_AVAILABLE);
+    }
+    return;
+  }
+
+  mSinceTime = aSinceTime;
+  mPromise = aPromise;
+  mGathering = true;
+  mPendingProfiles = 0;
+
+  nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+  if (os) {
+    DebugOnly<nsresult> rv =
+      os->AddObserver(this, "profiler-subprocess", false);
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "AddObserver failed");
+    rv = os->NotifyObservers(this, "profiler-subprocess-gather", nullptr);
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "NotifyObservers failed");
+  }
+
+  if (!mPendingProfiles) {
+    Finish();
+  }
+}
+
+void
+ProfileGatherer::Finish()
+{
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (!mTicker) {
+    // We somehow got called after we were cancelled! This shouldn't
+    // be possible, but doing a belt-and-suspenders check to be sure.
+    return;
+  }
+
+  UniquePtr<char[]> buf = mTicker->ToJSON(mSinceTime);
+
+  nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
+  if (os) {
+    DebugOnly<nsresult> rv = os->RemoveObserver(this, "profiler-subprocess");
+    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "RemoveObserver failed");
+  }
+
+  AutoJSAPI jsapi;
+  if (NS_WARN_IF(!jsapi.Init(mPromise->GlobalJSObject()))) {
+    // We're really hosed if we can't get a JS context for some reason.
+    Reset();
+    return;
+  }
+
+  JSContext* cx = jsapi.cx();
+
+  // Now parse the JSON so that we resolve with a JS Object.
+  JS::RootedValue val(cx);
+  {
+    NS_ConvertUTF8toUTF16 js_string(nsDependentCString(buf.get()));
+    if (!JS_ParseJSON(cx, static_cast<const char16_t*>(js_string.get()),
+                      js_string.Length(), &val)) {
+      if (!jsapi.HasException()) {
+        mPromise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR);
+      } else {
+        JS::RootedValue exn(cx);
+        DebugOnly<bool> gotException = jsapi.StealException(&exn);
+        MOZ_ASSERT(gotException);
+
+        jsapi.ClearException();
+        mPromise->MaybeReject(cx, exn);
+      }
+    } else {
+      mPromise->MaybeResolve(val);
+    }
+  }
+
+  Reset();
+}
+
+void
+ProfileGatherer::Reset()
+{
+  mSinceTime = 0;
+  mPromise = nullptr;
+  mPendingProfiles = 0;
+  mGathering = false;
+}
+
+void
+ProfileGatherer::Cancel()
+{
+  // The GeckoSampler is going away. If we have a Promise in flight, we
+  // should reject it.
+  if (mPromise) {
+    mPromise->MaybeReject(NS_ERROR_DOM_ABORT_ERR);
+  }
+
+  // Clear out the GeckoSampler reference, since it's being destroyed.
+  mTicker = nullptr;
+}
+
+void
+ProfileGatherer::OOPExitProfile(const nsCString& aProfile)
+{
+  if (mExitProfiles.Length() >= MAX_SUBPROCESS_EXIT_PROFILES) {
+    mExitProfiles.RemoveElementAt(0);
+  }
+  mExitProfiles.AppendElement(aProfile);
+
+  // If a process exited while gathering, we need to make
+  // sure we decrement the counter.
+  if (mGathering) {
+    GatheredOOPProfile();
+  }
+}
+
+NS_IMETHODIMP
+ProfileGatherer::Observe(nsISupports* aSubject,
+                         const char* aTopic,
+                         const char16_t *someData)
+{
+  if (!strcmp(aTopic, "profiler-subprocess")) {
+    nsCOMPtr<nsIProfileSaveEvent> pse = do_QueryInterface(aSubject);
+    if (pse) {
+      for (size_t i = 0; i < mExitProfiles.Length(); ++i) {
+        if (!mExitProfiles[i].IsEmpty()) {
+          pse->AddSubProfile(mExitProfiles[i].get());
+        }
+      }
+      mExitProfiles.Clear();
+    }
+  }
+  return NS_OK;
+}
+
+} // namespace mozilla
diff --git a/tools/profiler/gecko/Profiler.jsm b/tools/profiler/gecko/Profiler.jsm
new file mode 100644
index 000000000..c61218875
--- /dev/null
+++ b/tools/profiler/gecko/Profiler.jsm
@@ -0,0 +1,16 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+const Cc = Components.classes;
+const Ci = Components.interfaces;
+const Cr = Components.results;
+
+this.EXPORTED_SYMBOLS = ["Profiler"];
+
+this.Profiler = {
+
+};
+
diff --git a/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp b/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp
new file mode 100644
index 000000000..07801535d
--- /dev/null
+++ b/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp
@@ -0,0 +1,30 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "GeckoProfiler.h"
+#include "ProfilerIOInterposeObserver.h"
+#include "ProfilerMarkers.h"
+
+using namespace mozilla;
+
+void ProfilerIOInterposeObserver::Observe(Observation& aObservation)
+{
+  if (!IsMainThread()) {
+    return;
+  }
+
+  ProfilerBacktrace* stack = profiler_get_backtrace();
+
+  nsCString filename;
+  if (aObservation.Filename()) {
+    filename = NS_ConvertUTF16toUTF8(aObservation.Filename());
+  }
+
+  IOMarkerPayload* markerPayload = new IOMarkerPayload(aObservation.Reference(),
+                                                       filename.get(),
+                                                       aObservation.Start(),
+                                                       aObservation.End(),
+                                                       stack);
+  PROFILER_MARKER_PAYLOAD(aObservation.ObservedOperationString(), markerPayload);
+}
diff --git a/tools/profiler/gecko/ProfilerIOInterposeObserver.h b/tools/profiler/gecko/ProfilerIOInterposeObserver.h
new file mode 100644
index 000000000..8661b197e
--- /dev/null
+++ b/tools/profiler/gecko/ProfilerIOInterposeObserver.h
@@ -0,0 +1,28 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PROFILERIOINTERPOSEOBSERVER_H
+#define PROFILERIOINTERPOSEOBSERVER_H
+
+#ifdef MOZ_ENABLE_PROFILER_SPS
+
+#include "mozilla/IOInterposer.h"
+
+namespace mozilla {
+
+/**
+ * This class is the observer that calls into the profiler whenever
+ * main thread I/O occurs.
+ */
+class ProfilerIOInterposeObserver final : public IOInterposeObserver
+{
+public:
+  virtual void Observe(Observation& aObservation);
+};
+
+} // namespace mozilla
+
+#endif // MOZ_ENABLE_PROFILER_SPS
+
+#endif // PROFILERIOINTERPOSEOBSERVER_H
diff --git a/tools/profiler/gecko/ProfilerTypes.ipdlh b/tools/profiler/gecko/ProfilerTypes.ipdlh
new file mode 100644
index 000000000..1ef670b03
--- /dev/null
+++ b/tools/profiler/gecko/ProfilerTypes.ipdlh
@@ -0,0 +1,16 @@
+/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 8 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+namespace mozilla {
+
+struct ProfilerInitParams {
+  bool enabled;
+  uint32_t entries;
+  double interval;
+  nsCString[] threadFilters;
+  nsCString[] features;
+};
+
+} // namespace mozilla
+\ No newline at end of file
diff --git a/tools/profiler/gecko/SaveProfileTask.cpp b/tools/profiler/gecko/SaveProfileTask.cpp
new file mode 100644
index 000000000..497385355
--- /dev/null
+++ b/tools/profiler/gecko/SaveProfileTask.cpp
@@ -0,0 +1,45 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SaveProfileTask.h"
+#include "GeckoProfiler.h"
+
+nsresult
+SaveProfileTask::Run() {
+  // Get file path
+#if defined(SPS_PLAT_arm_android) && !defined(MOZ_WIDGET_GONK)
+  nsCString tmpPath;
+  tmpPath.AppendPrintf("/sdcard/profile_%i_%i.txt", XRE_GetProcessType(), getpid());
+#else
+  nsCOMPtr<nsIFile> tmpFile;
+  nsAutoCString tmpPath;
+  if (NS_FAILED(NS_GetSpecialDirectory(NS_OS_TEMP_DIR, getter_AddRefs(tmpFile)))) {
+    LOG("Failed to find temporary directory.");
+    return NS_ERROR_FAILURE;
+  }
+  tmpPath.AppendPrintf("profile_%i_%i.txt", XRE_GetProcessType(), getpid());
+
+  nsresult rv = tmpFile->AppendNative(tmpPath);
+  if (NS_FAILED(rv))
+    return rv;
+
+  rv = tmpFile->GetNativePath(tmpPath);
+  if (NS_FAILED(rv))
+    return rv;
+#endif
+
+  profiler_save_profile_to_file(tmpPath.get());
+
+  return NS_OK;
+}
+
+NS_IMPL_ISUPPORTS(ProfileSaveEvent, nsIProfileSaveEvent)
+
+nsresult
+ProfileSaveEvent::AddSubProfile(const char* aProfile) {
+  mFunc(aProfile, mClosure);
+  return NS_OK;
+}
+
diff --git a/tools/profiler/gecko/SaveProfileTask.h b/tools/profiler/gecko/SaveProfileTask.h
new file mode 100644
index 000000000..4a215bba0
--- /dev/null
+++ b/tools/profiler/gecko/SaveProfileTask.h
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PROFILER_SAVETASK_H_
+#define PROFILER_SAVETASK_H_
+
+#include "platform.h"
+#include "nsThreadUtils.h"
+#include "nsIXULRuntime.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsXULAppAPI.h"
+#include "nsIProfileSaveEvent.h"
+
+#ifdef XP_WIN
+ #include <windows.h>
+ #define getpid GetCurrentProcessId
+#else
+ #include <unistd.h>
+#endif
+
+/**
+ * This is an event used to save the profile on the main thread
+ * to be sure that it is not being modified while saving.
+ */
+class SaveProfileTask : public mozilla::Runnable {
+public:
+  SaveProfileTask() {}
+
+  NS_IMETHOD Run();
+};
+
+class ProfileSaveEvent final : public nsIProfileSaveEvent {
+public:
+  typedef void (*AddSubProfileFunc)(const char* aProfile, void* aClosure);
+  NS_DECL_ISUPPORTS
+
+  ProfileSaveEvent(AddSubProfileFunc aFunc, void* aClosure)
+    : mFunc(aFunc)
+    , mClosure(aClosure)
+  {}
+
+  NS_IMETHOD AddSubProfile(const char* aProfile) override;
+private:
+  ~ProfileSaveEvent() {}
+
+  AddSubProfileFunc mFunc;
+  void* mClosure;
+};
+
+#endif
+
diff --git a/tools/profiler/gecko/ThreadResponsiveness.cpp b/tools/profiler/gecko/ThreadResponsiveness.cpp
new file mode 100644
index 000000000..0057251e2
--- /dev/null
+++ b/tools/profiler/gecko/ThreadResponsiveness.cpp
@@ -0,0 +1,118 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ThreadResponsiveness.h"
+#include "platform.h"
+#include "nsComponentManagerUtils.h"
+#include "nsThreadUtils.h"
+#include "nsITimer.h"
+#include "mozilla/Monitor.h"
+#include "ProfileEntry.h"
+#include "ThreadProfile.h"
+
+using mozilla::Monitor;
+using mozilla::MonitorAutoLock;
+using mozilla::TimeStamp;
+
+class CheckResponsivenessTask : public mozilla::Runnable,
+                                public nsITimerCallback {
+public:
+  CheckResponsivenessTask()
+    : mLastTracerTime(TimeStamp::Now())
+    , mMonitor("CheckResponsivenessTask")
+    , mTimer(nullptr)
+    , mStop(false)
+  {
+    MOZ_COUNT_CTOR(CheckResponsivenessTask);
+  }
+
+protected:
+  ~CheckResponsivenessTask()
+  {
+    MOZ_COUNT_DTOR(CheckResponsivenessTask);
+  }
+
+public:
+  NS_IMETHOD Run() override
+  {
+    MonitorAutoLock mon(mMonitor);
+    if (mStop)
+      return NS_OK;
+
+    // This is raced on because we might pause the thread here
+    // for profiling so if we tried to use a monitor to protect
+    // mLastTracerTime we could deadlock. We're risking seeing
+    // a partial write which will show up as an outlier in our
+    // performance data.
+    mLastTracerTime = TimeStamp::Now();
+    if (!mTimer) {
+      mTimer = do_CreateInstance("@mozilla.org/timer;1");
+    }
+    mTimer->InitWithCallback(this, 16, nsITimer::TYPE_ONE_SHOT);
+
+    return NS_OK;
+  }
+
+  NS_IMETHOD Notify(nsITimer* aTimer) final
+  {
+    NS_DispatchToMainThread(this);
+    return NS_OK;
+  }
+
+  void Terminate() {
+    MonitorAutoLock mon(mMonitor);
+    mStop = true;
+  }
+
+  const TimeStamp& GetLastTracerTime() const {
+    return mLastTracerTime;
+  }
+
+  NS_DECL_ISUPPORTS_INHERITED
+
+private:
+  TimeStamp mLastTracerTime;
+  Monitor mMonitor;
+  nsCOMPtr<nsITimer> mTimer;
+  bool mStop;
+};
+
+NS_IMPL_ISUPPORTS_INHERITED(CheckResponsivenessTask, mozilla::Runnable,
+                            nsITimerCallback)
+
+ThreadResponsiveness::ThreadResponsiveness(ThreadProfile *aThreadProfile)
+  : mThreadProfile(aThreadProfile)
+  , mActiveTracerEvent(nullptr)
+{
+  MOZ_COUNT_CTOR(ThreadResponsiveness);
+}
+
+ThreadResponsiveness::~ThreadResponsiveness()
+{
+  MOZ_COUNT_DTOR(ThreadResponsiveness);
+  if (mActiveTracerEvent) {
+    mActiveTracerEvent->Terminate();
+  }
+}
+
+void
+ThreadResponsiveness::Update()
+{
+  if (!mActiveTracerEvent) {
+    if (mThreadProfile->GetThreadInfo()->IsMainThread()) {
+      mActiveTracerEvent = new CheckResponsivenessTask();
+      NS_DispatchToMainThread(mActiveTracerEvent);
+    } else if (mThreadProfile->GetThreadInfo()->GetThread()) {
+      mActiveTracerEvent = new CheckResponsivenessTask();
+      mThreadProfile->GetThreadInfo()->
+        GetThread()->Dispatch(mActiveTracerEvent, NS_DISPATCH_NORMAL);
+    }
+  }
+
+  if (mActiveTracerEvent) {
+    mLastTracerTime = mActiveTracerEvent->GetLastTracerTime();
+  }
+}
+
diff --git a/tools/profiler/gecko/ThreadResponsiveness.h b/tools/profiler/gecko/ThreadResponsiveness.h
new file mode 100644
index 000000000..5454c3c05
--- /dev/null
+++ b/tools/profiler/gecko/ThreadResponsiveness.h
@@ -0,0 +1,38 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef ThreadResponsiveness_h
+#define ThreadResponsiveness_h
+
+#include "nsISupports.h"
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+
+class ThreadProfile;
+class CheckResponsivenessTask;
+
+class ThreadResponsiveness {
+public:
+  explicit ThreadResponsiveness(ThreadProfile *aThreadProfile);
+
+  ~ThreadResponsiveness();
+
+  void Update();
+
+  mozilla::TimeDuration GetUnresponsiveDuration(const mozilla::TimeStamp& now) const {
+    return now - mLastTracerTime;
+  }
+
+  bool HasData() const {
+    return !mLastTracerTime.IsNull();
+  }
+private:
+  ThreadProfile* mThreadProfile;
+  RefPtr<CheckResponsivenessTask> mActiveTracerEvent;
+  mozilla::TimeStamp mLastTracerTime;
+};
+
+#endif
+
diff --git a/tools/profiler/gecko/nsIProfileSaveEvent.idl b/tools/profiler/gecko/nsIProfileSaveEvent.idl
new file mode 100644
index 000000000..c2c4bed02
--- /dev/null
+++ b/tools/profiler/gecko/nsIProfileSaveEvent.idl
@@ -0,0 +1,19 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+ 
+#include "nsISupports.idl"
+
+[uuid(f5ad0830-e178-41f9-b253-db9b4fae4cb3)]
+interface nsIProfileSaveEvent : nsISupports
+{
+  /**
+   * Call this method when observing this event to include
+   * a sub profile origining from an external source such
+   * as a non native thread or another process.
+   */
+  void AddSubProfile(in string aMarker);
+};
+
+
diff --git a/tools/profiler/gecko/nsIProfiler.idl b/tools/profiler/gecko/nsIProfiler.idl
new file mode 100644
index 000000000..f9b118650
--- /dev/null
+++ b/tools/profiler/gecko/nsIProfiler.idl
@@ -0,0 +1,101 @@
+/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+%{C++
+#include "nsTArrayForwardDeclare.h"
+class nsCString;
+%}
+
+[ref] native StringArrayRef(const nsTArray<nsCString>);
+
+/**
+ * Start-up parameters for subprocesses are passed through nsIObserverService,
+ * which, unfortunately, means we need to implement nsISupports in order to
+ * go through it.
+ */
+[uuid(0a175ba7-8fcf-4ce9-9c4b-ccc6272f4425)]
+interface nsIProfilerStartParams : nsISupports
+{
+  attribute uint32_t entries;
+  attribute double interval;
+
+  [noscript, notxpcom, nostdcall] StringArrayRef getFeatures();
+  [noscript, notxpcom, nostdcall] StringArrayRef getThreadFilterNames();
+};
+
+[scriptable, uuid(ead3f75c-0e0e-4fbb-901c-1e5392ef5b2a)]
+interface nsIProfiler : nsISupports
+{
+  boolean CanProfile();
+  void StartProfiler(in uint32_t aEntries, in double aInterval,
+                      [array, size_is(aFeatureCount)] in string aFeatures,
+                      in uint32_t aFeatureCount,
+                      [array, size_is(aFilterCount), optional] in string aThreadNameFilters,
+                      [optional] in uint32_t aFilterCount);
+  void StopProfiler();
+  boolean IsPaused();
+  void PauseSampling();
+  void ResumeSampling();
+  void AddMarker(in string aMarker);
+  /*
+   * Returns the JSON string of the profile. If aSinceTime is passed, only
+   * report samples taken at >= aSinceTime.
+   */
+  string GetProfile([optional] in double aSinceTime);
+
+  /*
+   * Returns a JS object of the profile. If aSinceTime is passed, only report
+   * samples taken at >= aSinceTime.
+   */
+  [implicit_jscontext]
+  jsval getProfileData([optional] in double aSinceTime);
+
+  [implicit_jscontext]
+  nsISupports getProfileDataAsync([optional] in double aSinceTime);
+
+  boolean IsActive();
+  void GetFeatures(out uint32_t aCount, [retval, array, size_is(aCount)] out string aFeatures);
+
+  /**
+   * The starting parameters that were sent to the profiler for sampling.
+   * If the profiler is not currently sampling, this will return null.
+   */
+  readonly attribute nsIProfilerStartParams startParams;
+
+  /**
+   * The profileGatherer will be null if the profiler is not currently
+   * active.
+   */
+  readonly attribute nsISupports profileGatherer;
+
+  void GetBufferInfo(out uint32_t aCurrentPosition, out uint32_t aTotalSize,
+                     out uint32_t aGeneration);
+
+  /**
+   * Returns the elapsed time, in milliseconds, since the profiler's epoch.
+   * The epoch is guaranteed to be constant for the duration of the
+   * process, but is otherwise arbitrary.
+   */
+  double getElapsedTime();
+
+  /**
+   * Returns a JSON string of an array of shared library objects.
+   * Every object has three properties: start, end, and name.
+   * start and end are integers describing the address range that the library
+   * occupies in memory. name is the path of the library as a string.
+   *
+   * On Windows profiling builds, the shared library objects will have
+   * additional pdbSignature and pdbAge properties for uniquely identifying
+   * shared library versions for stack symbolication.
+   */
+  AString getSharedLibraryInformation();
+
+  /**
+   * Dump the collected profile to a file.
+   */
+  void dumpProfileToFile(in string aFilename);
+};
diff --git a/tools/profiler/gecko/nsProfiler.cpp b/tools/profiler/gecko/nsProfiler.cpp
new file mode 100644
index 000000000..c38447381
--- /dev/null
+++ b/tools/profiler/gecko/nsProfiler.cpp
@@ -0,0 +1,308 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <string>
+#include <sstream>
+#include "GeckoProfiler.h"
+#include "nsProfiler.h"
+#include "nsProfilerStartParams.h"
+#include "nsMemory.h"
+#include "nsString.h"
+#include "mozilla/Services.h"
+#include "nsIObserverService.h"
+#include "nsIInterfaceRequestor.h"
+#include "nsILoadContext.h"
+#include "nsIWebNavigation.h"
+#include "nsIInterfaceRequestorUtils.h"
+#include "shared-libraries.h"
+#include "js/Value.h"
+#include "mozilla/ErrorResult.h"
+#include "mozilla/dom/Promise.h"
+
+using mozilla::ErrorResult;
+using mozilla::dom::Promise;
+using std::string;
+
+NS_IMPL_ISUPPORTS(nsProfiler, nsIProfiler)
+
+nsProfiler::nsProfiler()
+  : mLockedForPrivateBrowsing(false)
+{
+}
+
+nsProfiler::~nsProfiler()
+{
+  nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService();
+  if (observerService) {
+    observerService->RemoveObserver(this, "chrome-document-global-created");
+    observerService->RemoveObserver(this, "last-pb-context-exited");
+  }
+}
+
+nsresult
+nsProfiler::Init() {
+  nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService();
+  if (observerService) {
+    observerService->AddObserver(this, "chrome-document-global-created", false);
+    observerService->AddObserver(this, "last-pb-context-exited", false);
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::Observe(nsISupports *aSubject,
+                    const char *aTopic,
+                    const char16_t *aData)
+{
+  if (strcmp(aTopic, "chrome-document-global-created") == 0) {
+    nsCOMPtr<nsIInterfaceRequestor> requestor = do_QueryInterface(aSubject);
+    nsCOMPtr<nsIWebNavigation> parentWebNav = do_GetInterface(requestor);
+    nsCOMPtr<nsILoadContext> loadContext = do_QueryInterface(parentWebNav);
+    if (loadContext && loadContext->UsePrivateBrowsing() && !mLockedForPrivateBrowsing) {
+      mLockedForPrivateBrowsing = true;
+      profiler_lock();
+    }
+  } else if (strcmp(aTopic, "last-pb-context-exited") == 0) {
+    mLockedForPrivateBrowsing = false;
+    profiler_unlock();
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::CanProfile(bool *aCanProfile)
+{
+  *aCanProfile = !mLockedForPrivateBrowsing;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::StartProfiler(uint32_t aEntries, double aInterval,
+                          const char** aFeatures, uint32_t aFeatureCount,
+                          const char** aThreadNameFilters, uint32_t aFilterCount)
+{
+  if (mLockedForPrivateBrowsing) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  profiler_start(aEntries, aInterval,
+                 aFeatures, aFeatureCount,
+                 aThreadNameFilters, aFilterCount);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::StopProfiler()
+{
+  profiler_stop();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::IsPaused(bool *aIsPaused)
+{
+  *aIsPaused = profiler_is_paused();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::PauseSampling()
+{
+  profiler_pause();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::ResumeSampling()
+{
+  profiler_resume();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::AddMarker(const char *aMarker)
+{
+  PROFILER_MARKER(aMarker);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfile(double aSinceTime, char** aProfile)
+{
+  mozilla::UniquePtr<char[]> profile = profiler_get_profile(aSinceTime);
+  if (profile) {
+    size_t len = strlen(profile.get());
+    char *profileStr = static_cast<char *>
+                         (nsMemory::Clone(profile.get(), (len + 1) * sizeof(char)));
+    profileStr[len] = '\0';
+    *aProfile = profileStr;
+  }
+  return NS_OK;
+}
+
+std::string GetSharedLibraryInfoStringInternal();
+
+std::string
+GetSharedLibraryInfoString()
+{
+  return GetSharedLibraryInfoStringInternal();
+}
+
+NS_IMETHODIMP
+nsProfiler::GetSharedLibraryInformation(nsAString& aOutString)
+{
+  aOutString.Assign(NS_ConvertUTF8toUTF16(GetSharedLibraryInfoString().c_str()));
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::DumpProfileToFile(const char* aFilename)
+{
+  profiler_save_profile_to_file(aFilename);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfileData(double aSinceTime, JSContext* aCx,
+                           JS::MutableHandle<JS::Value> aResult)
+{
+  JS::RootedObject obj(aCx, profiler_get_profile_jsobject(aCx, aSinceTime));
+  if (!obj) {
+    return NS_ERROR_FAILURE;
+  }
+  aResult.setObject(*obj);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfileDataAsync(double aSinceTime, JSContext* aCx,
+                                nsISupports** aPromise)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+
+  if (NS_WARN_IF(!aCx)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  nsIGlobalObject* go = xpc::NativeGlobal(JS::CurrentGlobalOrNull(aCx));
+
+  if (NS_WARN_IF(!go)) {
+    return NS_ERROR_FAILURE;
+  }
+
+  ErrorResult result;
+  RefPtr<Promise> promise = Promise::Create(go, result);
+  if (NS_WARN_IF(result.Failed())) {
+    return result.StealNSResult();
+  }
+
+  profiler_get_profile_jsobject_async(aSinceTime, promise);
+
+  promise.forget(aPromise);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetElapsedTime(double* aElapsedTime)
+{
+  *aElapsedTime = profiler_time();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::IsActive(bool *aIsActive)
+{
+  *aIsActive = profiler_is_active();
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetFeatures(uint32_t *aCount, char ***aFeatures)
+{
+  uint32_t len = 0;
+
+  const char **features = profiler_get_features();
+  if (!features) {
+    *aCount = 0;
+    *aFeatures = nullptr;
+    return NS_OK;
+  }
+
+  while (features[len]) {
+    len++;
+  }
+
+  char **featureList = static_cast<char **>
+                       (moz_xmalloc(len * sizeof(char*)));
+
+  for (size_t i = 0; i < len; i++) {
+    size_t strLen = strlen(features[i]);
+    featureList[i] = static_cast<char *>
+                         (nsMemory::Clone(features[i], (strLen + 1) * sizeof(char)));
+  }
+
+  *aFeatures = featureList;
+  *aCount = len;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetStartParams(nsIProfilerStartParams** aRetVal)
+{
+  if (!profiler_is_active()) {
+    *aRetVal = nullptr;
+  } else {
+    int entrySize = 0;
+    double interval = 0;
+    mozilla::Vector<const char*> filters;
+    mozilla::Vector<const char*> features;
+    profiler_get_start_params(&entrySize, &interval, &filters, &features);
+
+    nsTArray<nsCString> filtersArray;
+    for (uint32_t i = 0; i < filters.length(); ++i) {
+      filtersArray.AppendElement(filters[i]);
+    }
+
+    nsTArray<nsCString> featuresArray;
+    for (size_t i = 0; i < features.length(); ++i) {
+      featuresArray.AppendElement(features[i]);
+    }
+
+    nsCOMPtr<nsIProfilerStartParams> startParams =
+      new nsProfilerStartParams(entrySize, interval, featuresArray,
+                                filtersArray);
+
+    startParams.forget(aRetVal);
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetBufferInfo(uint32_t *aCurrentPosition, uint32_t *aTotalSize, uint32_t *aGeneration)
+{
+  MOZ_ASSERT(aCurrentPosition);
+  MOZ_ASSERT(aTotalSize);
+  MOZ_ASSERT(aGeneration);
+  profiler_get_buffer_info(aCurrentPosition, aTotalSize, aGeneration);
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfiler::GetProfileGatherer(nsISupports** aRetVal)
+{
+  if (!aRetVal) {
+    return NS_ERROR_INVALID_POINTER;
+  }
+
+  // If we're not profiling, there will be no gatherer.
+  if (!profiler_is_active()) {
+    *aRetVal = nullptr;
+  } else {
+    nsCOMPtr<nsISupports> gatherer;
+    profiler_get_gatherer(getter_AddRefs(gatherer));
+    gatherer.forget(aRetVal);
+  }
+  return NS_OK;
+}
+\ No newline at end of file
diff --git a/tools/profiler/gecko/nsProfiler.h b/tools/profiler/gecko/nsProfiler.h
new file mode 100644
index 000000000..50dabd278
--- /dev/null
+++ b/tools/profiler/gecko/nsProfiler.h
@@ -0,0 +1,29 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _NSPROFILER_H_
+#define _NSPROFILER_H_
+
+#include "nsIProfiler.h"
+#include "nsIObserver.h"
+#include "mozilla/Attributes.h"
+
+class nsProfiler final : public nsIProfiler, public nsIObserver
+{
+public:
+    nsProfiler();
+
+    NS_DECL_ISUPPORTS
+    NS_DECL_NSIOBSERVER
+    NS_DECL_NSIPROFILER
+
+    nsresult Init();
+private:
+    ~nsProfiler();
+    bool mLockedForPrivateBrowsing;
+};
+
+#endif /* _NSPROFILER_H_ */
+
diff --git a/tools/profiler/gecko/nsProfilerCIID.h b/tools/profiler/gecko/nsProfilerCIID.h
new file mode 100644
index 000000000..3057a6ae0
--- /dev/null
+++ b/tools/profiler/gecko/nsProfilerCIID.h
@@ -0,0 +1,14 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsProfilerCIID_h__
+#define nsProfilerCIID_h__
+
+#define NS_PROFILER_CID \
+{ 0x25db9b8e, 0x8123, 0x4de1, \
+{ 0xb6, 0x6d, 0x8b, 0xbb, 0xed, 0xf2, 0xcd, 0xf4 } }
+
+#endif
+
diff --git a/tools/profiler/gecko/nsProfilerFactory.cpp b/tools/profiler/gecko/nsProfilerFactory.cpp
new file mode 100644
index 000000000..0cab23e89
--- /dev/null
+++ b/tools/profiler/gecko/nsProfilerFactory.cpp
@@ -0,0 +1,31 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ModuleUtils.h"
+#include "nsCOMPtr.h"
+#include "nsProfiler.h"
+#include "nsProfilerCIID.h"
+
+NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsProfiler, Init)
+
+NS_DEFINE_NAMED_CID(NS_PROFILER_CID);
+
+static const mozilla::Module::CIDEntry kProfilerCIDs[] = {
+    { &kNS_PROFILER_CID, false, nullptr, nsProfilerConstructor },
+    { nullptr }
+};
+
+static const mozilla::Module::ContractIDEntry kProfilerContracts[] = {
+    { "@mozilla.org/tools/profiler;1", &kNS_PROFILER_CID },
+    { nullptr }
+};
+
+static const mozilla::Module kProfilerModule = {
+    mozilla::Module::kVersion,
+    kProfilerCIDs,
+    kProfilerContracts
+};
+
+NSMODULE_DEFN(nsProfilerModule) = &kProfilerModule;
diff --git a/tools/profiler/gecko/nsProfilerStartParams.cpp b/tools/profiler/gecko/nsProfilerStartParams.cpp
new file mode 100644
index 000000000..5335e694e
--- /dev/null
+++ b/tools/profiler/gecko/nsProfilerStartParams.cpp
@@ -0,0 +1,67 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsProfilerStartParams.h"
+
+NS_IMPL_ISUPPORTS(nsProfilerStartParams, nsIProfilerStartParams)
+
+nsProfilerStartParams::nsProfilerStartParams(uint32_t aEntries,
+                                             double aInterval,
+                                             const nsTArray<nsCString>& aFeatures,
+                                             const nsTArray<nsCString>& aThreadFilterNames) :
+  mEntries(aEntries),
+  mInterval(aInterval),
+  mFeatures(aFeatures),
+  mThreadFilterNames(aThreadFilterNames)
+{
+}
+
+nsProfilerStartParams::~nsProfilerStartParams()
+{
+}
+
+NS_IMETHODIMP
+nsProfilerStartParams::GetEntries(uint32_t* aEntries)
+{
+  NS_ENSURE_ARG_POINTER(aEntries);
+  *aEntries = mEntries;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfilerStartParams::SetEntries(uint32_t aEntries)
+{
+  NS_ENSURE_ARG(aEntries);
+  mEntries = aEntries;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfilerStartParams::GetInterval(double* aInterval)
+{
+  NS_ENSURE_ARG_POINTER(aInterval);
+  *aInterval = mInterval;
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+nsProfilerStartParams::SetInterval(double aInterval)
+{
+  NS_ENSURE_ARG(aInterval);
+  mInterval = aInterval;
+  return NS_OK;
+}
+
+const nsTArray<nsCString>&
+nsProfilerStartParams::GetFeatures()
+{
+  return mFeatures;
+}
+
+const nsTArray<nsCString>&
+nsProfilerStartParams::GetThreadFilterNames()
+{
+  return mThreadFilterNames;
+}
diff --git a/tools/profiler/gecko/nsProfilerStartParams.h b/tools/profiler/gecko/nsProfilerStartParams.h
new file mode 100644
index 000000000..98788077f
--- /dev/null
+++ b/tools/profiler/gecko/nsProfilerStartParams.h
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _NSPROFILERSTARTPARAMS_H_
+#define _NSPROFILERSTARTPARAMS_H_
+
+#include "nsIProfiler.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+class nsProfilerStartParams : public nsIProfilerStartParams
+{
+public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSIPROFILERSTARTPARAMS
+
+  nsProfilerStartParams(uint32_t aEntries,
+                        double aInterval,
+                        const nsTArray<nsCString>& aFeatures,
+                        const nsTArray<nsCString>& aThreadFilterNames);
+
+private:
+  virtual ~nsProfilerStartParams();
+  uint32_t mEntries;
+  double mInterval;
+  nsTArray<nsCString> mFeatures;
+  nsTArray<nsCString> mThreadFilterNames;
+};
+
+#endif
diff --git a/tools/profiler/lul/AutoObjectMapper.cpp b/tools/profiler/lul/AutoObjectMapper.cpp
new file mode 100644
index 000000000..a5dc902fd
--- /dev/null
+++ b/tools/profiler/lul/AutoObjectMapper.cpp
@@ -0,0 +1,207 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <sys/mman.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "PlatformMacros.h"
+#include "AutoObjectMapper.h"
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+# include <dlfcn.h>
+# include "mozilla/Types.h"
+  // FIXME move these out of mozglue/linker/ElfLoader.h into their
+  // own header, so as to avoid conflicts arising from two definitions
+  // of Array
+  extern "C" {
+    MFBT_API size_t
+    __dl_get_mappable_length(void *handle);
+    MFBT_API void *
+    __dl_mmap(void *handle, void *addr, size_t length, off_t offset);
+    MFBT_API void
+    __dl_munmap(void *handle, void *addr, size_t length);
+  }
+  // The following are for get_installation_lib_dir()
+# include "nsString.h"
+# include "nsDirectoryServiceUtils.h"
+# include "nsDirectoryServiceDefs.h"
+#endif
+
+
+// A helper function for creating failure error messages in
+// AutoObjectMapper*::Map.
+static void
+failedToMessage(void(*aLog)(const char*),
+                const char* aHowFailed, std::string aFileName)
+{
+  char buf[300];
+  SprintfLiteral(buf, "AutoObjectMapper::Map: Failed to %s \'%s\'",
+                 aHowFailed, aFileName.c_str());
+  buf[sizeof(buf)-1] = 0;
+  aLog(buf);
+}
+
+
+AutoObjectMapperPOSIX::AutoObjectMapperPOSIX(void(*aLog)(const char*))
+  : mImage(nullptr)
+  , mSize(0)
+  , mLog(aLog)
+  , mIsMapped(false)
+{}
+
+AutoObjectMapperPOSIX::~AutoObjectMapperPOSIX() {
+  if (!mIsMapped) {
+    // There's nothing to do.
+    MOZ_ASSERT(!mImage);
+    MOZ_ASSERT(mSize == 0);
+    return;
+  }
+  MOZ_ASSERT(mSize > 0);
+  // The following assertion doesn't necessarily have to be true,
+  // but we assume (reasonably enough) that no mmap facility would
+  // be crazy enough to map anything at page zero.
+  MOZ_ASSERT(mImage);
+  munmap(mImage, mSize);
+}
+
+bool AutoObjectMapperPOSIX::Map(/*OUT*/void** start, /*OUT*/size_t* length,
+                                std::string fileName)
+{
+  MOZ_ASSERT(!mIsMapped);
+
+  int fd = open(fileName.c_str(), O_RDONLY);
+  if (fd == -1) {
+    failedToMessage(mLog, "open", fileName);
+    return false;
+  }
+
+  struct stat st;
+  int    err = fstat(fd, &st);
+  size_t sz  = (err == 0) ? st.st_size : 0;
+  if (err != 0 || sz == 0) {
+    failedToMessage(mLog, "fstat", fileName);
+    close(fd);
+    return false;
+  }
+
+  void* image = mmap(nullptr, sz, PROT_READ, MAP_SHARED, fd, 0);
+  if (image == MAP_FAILED) {
+    failedToMessage(mLog, "mmap", fileName);
+    close(fd);
+    return false;
+  }
+
+  close(fd);
+  mIsMapped = true;
+  mImage = *start  = image;
+  mSize  = *length = sz;
+  return true;
+}
+
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+// A helper function for AutoObjectMapperFaultyLib::Map.  Finds out
+// where the installation's lib directory is, since we'll have to look
+// in there to get hold of libmozglue.so.  Returned C string is heap
+// allocated and the caller must deallocate it.
+static char*
+get_installation_lib_dir()
+{
+  nsCOMPtr<nsIProperties>
+    directoryService(do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID));
+  if (!directoryService) {
+    return nullptr;
+  }
+  nsCOMPtr<nsIFile> greDir;
+  nsresult rv = directoryService->Get(NS_GRE_DIR, NS_GET_IID(nsIFile),
+                                      getter_AddRefs(greDir));
+  if (NS_FAILED(rv)) return nullptr;
+  nsCString path;
+  rv = greDir->GetNativePath(path);
+  if (NS_FAILED(rv)) {
+    return nullptr;
+  }
+  return strdup(path.get());
+}
+
+AutoObjectMapperFaultyLib::AutoObjectMapperFaultyLib(void(*aLog)(const char*))
+  : AutoObjectMapperPOSIX(aLog)
+  , mHdl(nullptr)
+{}
+
+AutoObjectMapperFaultyLib::~AutoObjectMapperFaultyLib() {
+  if (mHdl) {
+    // We've got an object mapped by faulty.lib.  Unmap it via faulty.lib.
+    MOZ_ASSERT(mSize > 0);
+    // Assert on the basis that no valid mapping would start at page zero.
+    MOZ_ASSERT(mImage);
+    __dl_munmap(mHdl, mImage, mSize);
+    dlclose(mHdl);
+    // Stop assertions in ~AutoObjectMapperPOSIX from failing.
+    mImage = nullptr;
+    mSize  = 0;
+  }
+  // At this point the parent class destructor, ~AutoObjectMapperPOSIX,
+  // gets called.  If that has something mapped in the normal way, it
+  // will unmap it in the normal way.  Unfortunately there's no
+  // obvious way to enforce the requirement that the object is mapped
+  // either by faulty.lib or by the parent class, but not by both.
+}
+
+bool AutoObjectMapperFaultyLib::Map(/*OUT*/void** start, /*OUT*/size_t* length,
+                                    std::string fileName)
+{
+  MOZ_ASSERT(!mHdl);
+
+  if (fileName == "libmozglue.so") {
+
+    // Do (2) in the comment above.
+    char* libdir = get_installation_lib_dir();
+    if (libdir) {
+      fileName = std::string(libdir) + "/lib/" + fileName;
+      free(libdir);
+    }
+    // Hand the problem off to the standard mapper.
+    return AutoObjectMapperPOSIX::Map(start, length, fileName);
+
+  } else {
+
+    // Do cases (1) and (3) in the comment above.  We have to
+    // grapple with faulty.lib directly.
+    void* hdl = dlopen(fileName.c_str(), RTLD_GLOBAL | RTLD_LAZY);
+    if (!hdl) {
+      failedToMessage(mLog, "get handle for ELF file", fileName);
+      return false;
+    }
+
+    size_t sz = __dl_get_mappable_length(hdl);
+    if (sz == 0) {
+      dlclose(hdl);
+      failedToMessage(mLog, "get size for ELF file", fileName);
+      return false;
+    }
+
+    void* image = __dl_mmap(hdl, nullptr, sz, 0);
+    if (image == MAP_FAILED) {
+      dlclose(hdl);
+      failedToMessage(mLog, "mmap ELF file", fileName);
+      return false;
+    }
+
+    mHdl   = hdl;
+    mImage = *start  = image;
+    mSize  = *length = sz;
+    return true;
+  }
+}
+
+#endif // defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
diff --git a/tools/profiler/lul/AutoObjectMapper.h b/tools/profiler/lul/AutoObjectMapper.h
new file mode 100644
index 000000000..1f813d6f2
--- /dev/null
+++ b/tools/profiler/lul/AutoObjectMapper.h
@@ -0,0 +1,115 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef AutoObjectMapper_h
+#define AutoObjectMapper_h
+
+#include <string>
+
+#include "mozilla/Attributes.h"
+#include "PlatformMacros.h"
+
+// A (nearly-) RAII class that maps an object in and then unmaps it on
+// destruction.  This base class version uses the "normal" POSIX
+// functions: open, fstat, close, mmap, munmap.
+
+class MOZ_STACK_CLASS AutoObjectMapperPOSIX {
+public:
+  // The constructor does not attempt to map the file, because that
+  // might fail.  Instead, once the object has been constructed,
+  // call Map() to attempt the mapping.  There is no corresponding
+  // Unmap() since the unmapping is done in the destructor.  Failure
+  // messages are sent to |aLog|.
+  explicit AutoObjectMapperPOSIX(void(*aLog)(const char*));
+
+  // Unmap the file on destruction of this object.
+  ~AutoObjectMapperPOSIX();
+
+  // Map |fileName| into the address space and return the mapping
+  // extents.  If the file is zero sized this will fail.  The file is
+  // mapped read-only and private.  Returns true iff the mapping
+  // succeeded, in which case *start and *length hold its extent.
+  // Once a call to Map succeeds, all subsequent calls to it will
+  // fail.
+  bool Map(/*OUT*/void** start, /*OUT*/size_t* length, std::string fileName);
+
+protected:
+  // If we are currently holding a mapped object, these record the
+  // mapped address range.
+  void*  mImage;
+  size_t mSize;
+
+  // A logging sink, for complaining about mapping failures.
+  void (*mLog)(const char*);
+
+private:
+  // Are we currently holding a mapped object?  This is private to
+  // the base class.  Derived classes need to have their own way to
+  // track whether they are holding a mapped object.
+  bool mIsMapped;
+
+  // Disable copying and assignment.
+  AutoObjectMapperPOSIX(const AutoObjectMapperPOSIX&);
+  AutoObjectMapperPOSIX& operator=(const AutoObjectMapperPOSIX&);
+  // Disable heap allocation of this class.
+  void* operator new(size_t);
+  void* operator new[](size_t);
+  void  operator delete(void*);
+  void  operator delete[](void*);
+};
+
+
+#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+// This is a variant of AutoObjectMapperPOSIX suitable for use in
+// conjunction with faulty.lib on Android.  How it behaves depends on
+// the name of the file to be mapped.  There are three possible cases:
+//
+// (1) /foo/bar/xyzzy/blah.apk!/libwurble.so
+//     We hand it as-is to faulty.lib and let it fish the relevant
+//     bits out of the APK.
+//
+// (2) libmozglue.so
+//     This is part of the Fennec installation, but is not in the
+//     APK.  Instead we have to figure out the installation path
+//     and look for it there.  Because of faulty.lib limitations,
+//     we have to use regular open/mmap instead of faulty.lib.
+//
+// (3) libanythingelse.so
+//     faulty.lib assumes this is a system library, and prepends
+//     "/system/lib/" to the path.  So as in (1), we can give it
+//     as-is to faulty.lib.
+//
+// Hence (1) and (3) require special-casing here.  Case (2) simply
+// hands the problem to the parent class.
+
+class MOZ_STACK_CLASS AutoObjectMapperFaultyLib : public AutoObjectMapperPOSIX {
+public:
+  AutoObjectMapperFaultyLib(void(*aLog)(const char*));
+
+  ~AutoObjectMapperFaultyLib();
+
+  bool Map(/*OUT*/void** start, /*OUT*/size_t* length, std::string fileName);
+
+private:
+  // faulty.lib requires us to maintain an abstract handle that can be
+  // used later to unmap the area.  If this is non-NULL, it is assumed
+  // that unmapping is to be done by faulty.lib.  Otherwise it goes
+  // via the normal mechanism.
+  void* mHdl;
+
+  // Disable copying and assignment.
+  AutoObjectMapperFaultyLib(const AutoObjectMapperFaultyLib&);
+  AutoObjectMapperFaultyLib& operator=(const AutoObjectMapperFaultyLib&);
+  // Disable heap allocation of this class.
+  void* operator new(size_t);
+  void* operator new[](size_t);
+  void  operator delete(void*);
+  void  operator delete[](void*);
+};
+
+#endif // defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+
+#endif // AutoObjectMapper_h
diff --git a/tools/profiler/lul/LulCommon.cpp b/tools/profiler/lul/LulCommon.cpp
new file mode 100644
index 000000000..7321251c8
--- /dev/null
+++ b/tools/profiler/lul/LulCommon.cpp
@@ -0,0 +1,114 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2011, 2013 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/module.cc
+//   src/common/unique_string.cc
+
+// There's no internal-only interface for LulCommon.  Hence include
+// the external interface directly.
+#include "LulCommonExt.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <map>
+
+
+namespace lul {
+
+using std::string;
+
+////////////////////////////////////////////////////////////////
+// Module
+//
+Module::Module(const string &name, const string &os,
+               const string &architecture, const string &id) :
+    name_(name),
+    os_(os),
+    architecture_(architecture),
+    id_(id) { }
+
+Module::~Module() {
+}
+
+
+////////////////////////////////////////////////////////////////
+// UniqueString
+//
+class UniqueString {
+ public:
+  explicit UniqueString(string str) { str_ = strdup(str.c_str()); }
+  ~UniqueString() { free(reinterpret_cast<void*>(const_cast<char*>(str_))); }
+  const char* str_;
+};
+
+const char* FromUniqueString(const UniqueString* ustr)
+{
+  return ustr->str_;
+}
+
+bool IsEmptyUniqueString(const UniqueString* ustr)
+{
+  return (ustr->str_)[0] == '\0';
+}
+
+
+////////////////////////////////////////////////////////////////
+// UniqueStringUniverse
+//
+UniqueStringUniverse::~UniqueStringUniverse()
+{
+  for (std::map<string, UniqueString*>::iterator it = map_.begin();
+       it != map_.end(); it++) {
+    delete it->second;
+  }
+}
+
+const UniqueString* UniqueStringUniverse::ToUniqueString(string str)
+{
+  std::map<string, UniqueString*>::iterator it = map_.find(str);
+  if (it == map_.end()) {
+    UniqueString* ustr = new UniqueString(str);
+    map_[str] = ustr;
+    return ustr;
+  } else {
+    return it->second;
+  }
+}
+
+}  // namespace lul
diff --git a/tools/profiler/lul/LulCommonExt.h b/tools/profiler/lul/LulCommonExt.h
new file mode 100644
index 000000000..99a967683
--- /dev/null
+++ b/tools/profiler/lul/LulCommonExt.h
@@ -0,0 +1,554 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2010, 2012, 2013 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// module.h: Define google_breakpad::Module. A Module holds debugging
+// information, and can write that information out as a Breakpad
+// symbol file.
+
+
+//  (C) Copyright Greg Colvin and Beman Dawes 1998, 1999.
+//  Copyright (c) 2001, 2002 Peter Dimov
+//
+//  Permission to copy, use, modify, sell and distribute this software
+//  is granted provided this copyright notice appears in all copies.
+//  This software is provided "as is" without express or implied
+//  warranty, and with no claim as to its suitability for any purpose.
+//
+//  See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation.
+//
+
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/unique_string.h
+//   src/common/scoped_ptr.h
+//   src/common/module.h
+
+// External interface for the "Common" component of LUL.
+
+#ifndef LulCommonExt_h
+#define LulCommonExt_h
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <string>
+#include <map>
+#include <vector>
+#include <cstddef>            // for std::ptrdiff_t
+
+#include "mozilla/Assertions.h"
+
+namespace lul {
+
+using std::string;
+using std::map;
+
+
+////////////////////////////////////////////////////////////////
+// UniqueString
+//
+
+// Abstract type
+class UniqueString;
+
+// Get the contained C string (debugging only)
+const char* FromUniqueString(const UniqueString*);
+
+// Is the given string empty (that is, "") ?
+bool IsEmptyUniqueString(const UniqueString*);
+
+
+////////////////////////////////////////////////////////////////
+// UniqueStringUniverse
+//
+
+// All UniqueStrings live in some specific UniqueStringUniverse.
+class UniqueStringUniverse {
+public:
+  UniqueStringUniverse() {}
+  ~UniqueStringUniverse();
+  // Convert a |string| to a UniqueString, that lives in this universe.
+  const UniqueString* ToUniqueString(string str);
+private:
+  map<string, UniqueString*> map_;
+};
+
+
+////////////////////////////////////////////////////////////////
+// GUID
+//
+
+typedef struct {
+  uint32_t data1;
+  uint16_t data2;
+  uint16_t data3;
+  uint8_t  data4[8];
+} MDGUID;  // GUID
+
+typedef MDGUID GUID;
+
+
+////////////////////////////////////////////////////////////////
+// scoped_ptr
+//
+
+//  scoped_ptr mimics a built-in pointer except that it guarantees deletion
+//  of the object pointed to, either on destruction of the scoped_ptr or via
+//  an explicit reset(). scoped_ptr is a simple solution for simple needs;
+//  use shared_ptr or std::auto_ptr if your needs are more complex.
+
+//  *** NOTE ***
+//  If your scoped_ptr is a class member of class FOO pointing to a
+//  forward declared type BAR (as shown below), then you MUST use a non-inlined
+//  version of the destructor.  The destructor of a scoped_ptr (called from
+//  FOO's destructor) must have a complete definition of BAR in order to
+//  destroy it.  Example:
+//
+//  -- foo.h --
+//  class BAR;
+//
+//  class FOO {
+//   public:
+//    FOO();
+//    ~FOO();  // Required for sources that instantiate class FOO to compile!
+//
+//   private:
+//    scoped_ptr<BAR> bar_;
+//  };
+//
+//  -- foo.cc --
+//  #include "foo.h"
+//  FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition.
+
+//  scoped_ptr_malloc added by Google
+//  When one of these goes out of scope, instead of doing a delete or
+//  delete[], it calls free().  scoped_ptr_malloc<char> is likely to see
+//  much more use than any other specializations.
+
+//  release() added by Google
+//  Use this to conditionally transfer ownership of a heap-allocated object
+//  to the caller, usually on method success.
+
+template <typename T>
+class scoped_ptr {
+ private:
+
+  T* ptr;
+
+  scoped_ptr(scoped_ptr const &);
+  scoped_ptr & operator=(scoped_ptr const &);
+
+ public:
+
+  typedef T element_type;
+
+  explicit scoped_ptr(T* p = 0): ptr(p) {}
+
+  ~scoped_ptr() {
+    delete ptr;
+  }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      delete ptr;
+      ptr = p;
+    }
+  }
+
+  T& operator*() const {
+    MOZ_ASSERT(ptr != 0);
+    return *ptr;
+  }
+
+  T* operator->() const  {
+    MOZ_ASSERT(ptr != 0);
+    return ptr;
+  }
+
+  bool operator==(T* p) const {
+    return ptr == p;
+  }
+
+  bool operator!=(T* p) const {
+    return ptr != p;
+  }
+
+  T* get() const  {
+    return ptr;
+  }
+
+  void swap(scoped_ptr & b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+
+  // no reason to use these: each scoped_ptr should have its own object
+  template <typename U> bool operator==(scoped_ptr<U> const& p) const;
+  template <typename U> bool operator!=(scoped_ptr<U> const& p) const;
+};
+
+template<typename T> inline
+void swap(scoped_ptr<T>& a, scoped_ptr<T>& b) {
+  a.swap(b);
+}
+
+template<typename T> inline
+bool operator==(T* p, const scoped_ptr<T>& b) {
+  return p == b.get();
+}
+
+template<typename T> inline
+bool operator!=(T* p, const scoped_ptr<T>& b) {
+  return p != b.get();
+}
+
+//  scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to
+//  is guaranteed, either on destruction of the scoped_array or via an explicit
+//  reset(). Use shared_array or std::vector if your needs are more complex.
+
+template<typename T>
+class scoped_array {
+ private:
+
+  T* ptr;
+
+  scoped_array(scoped_array const &);
+  scoped_array & operator=(scoped_array const &);
+
+ public:
+
+  typedef T element_type;
+
+  explicit scoped_array(T* p = 0) : ptr(p) {}
+
+  ~scoped_array() {
+    delete[] ptr;
+  }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      delete [] ptr;
+      ptr = p;
+    }
+  }
+
+  T& operator[](std::ptrdiff_t i) const {
+    MOZ_ASSERT(ptr != 0);
+    MOZ_ASSERT(i >= 0);
+    return ptr[i];
+  }
+
+  bool operator==(T* p) const {
+    return ptr == p;
+  }
+
+  bool operator!=(T* p) const {
+    return ptr != p;
+  }
+
+  T* get() const {
+    return ptr;
+  }
+
+  void swap(scoped_array & b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+
+  // no reason to use these: each scoped_array should have its own object
+  template <typename U> bool operator==(scoped_array<U> const& p) const;
+  template <typename U> bool operator!=(scoped_array<U> const& p) const;
+};
+
+template<class T> inline
+void swap(scoped_array<T>& a, scoped_array<T>& b) {
+  a.swap(b);
+}
+
+template<typename T> inline
+bool operator==(T* p, const scoped_array<T>& b) {
+  return p == b.get();
+}
+
+template<typename T> inline
+bool operator!=(T* p, const scoped_array<T>& b) {
+  return p != b.get();
+}
+
+
+// This class wraps the c library function free() in a class that can be
+// passed as a template argument to scoped_ptr_malloc below.
+class ScopedPtrMallocFree {
+ public:
+  inline void operator()(void* x) const {
+    free(x);
+  }
+};
+
+// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a
+// second template argument, the functor used to free the object.
+
+template<typename T, typename FreeProc = ScopedPtrMallocFree>
+class scoped_ptr_malloc {
+ private:
+
+  T* ptr;
+
+  scoped_ptr_malloc(scoped_ptr_malloc const &);
+  scoped_ptr_malloc & operator=(scoped_ptr_malloc const &);
+
+ public:
+
+  typedef T element_type;
+
+  explicit scoped_ptr_malloc(T* p = 0): ptr(p) {}
+
+  ~scoped_ptr_malloc() {
+    free_((void*) ptr);
+  }
+
+  void reset(T* p = 0) {
+    if (ptr != p) {
+      free_((void*) ptr);
+      ptr = p;
+    }
+  }
+
+  T& operator*() const {
+    MOZ_ASSERT(ptr != 0);
+    return *ptr;
+  }
+
+  T* operator->() const {
+    MOZ_ASSERT(ptr != 0);
+    return ptr;
+  }
+
+  bool operator==(T* p) const {
+    return ptr == p;
+  }
+
+  bool operator!=(T* p) const {
+    return ptr != p;
+  }
+
+  T* get() const {
+    return ptr;
+  }
+
+  void swap(scoped_ptr_malloc & b) {
+    T* tmp = b.ptr;
+    b.ptr = ptr;
+    ptr = tmp;
+  }
+
+  T* release() {
+    T* tmp = ptr;
+    ptr = 0;
+    return tmp;
+  }
+
+ private:
+
+  // no reason to use these: each scoped_ptr_malloc should have its own object
+  template <typename U, typename GP>
+  bool operator==(scoped_ptr_malloc<U, GP> const& p) const;
+  template <typename U, typename GP>
+  bool operator!=(scoped_ptr_malloc<U, GP> const& p) const;
+
+  static FreeProc const free_;
+};
+
+template<typename T, typename FP>
+FP const scoped_ptr_malloc<T,FP>::free_ = FP();
+
+template<typename T, typename FP> inline
+void swap(scoped_ptr_malloc<T,FP>& a, scoped_ptr_malloc<T,FP>& b) {
+  a.swap(b);
+}
+
+template<typename T, typename FP> inline
+bool operator==(T* p, const scoped_ptr_malloc<T,FP>& b) {
+  return p == b.get();
+}
+
+template<typename T, typename FP> inline
+bool operator!=(T* p, const scoped_ptr_malloc<T,FP>& b) {
+  return p != b.get();
+}
+
+
+////////////////////////////////////////////////////////////////
+// Module
+//
+
+// A Module represents the contents of a module, and supports methods
+// for adding information produced by parsing STABS or DWARF data
+// --- possibly both from the same file --- and then writing out the
+// unified contents as a Breakpad-format symbol file.
+class Module {
+public:
+  // The type of addresses and sizes in a symbol table.
+  typedef uint64_t Address;
+
+  // Representation of an expression.  This can either be a postfix
+  // expression, in which case it is stored as a string, or a simple
+  // expression of the form (identifier + imm) or *(identifier + imm).
+  // It can also be invalid (denoting "no value").
+  enum ExprHow {
+    kExprInvalid = 1,
+    kExprPostfix,
+    kExprSimple,
+    kExprSimpleMem
+  };
+
+  struct Expr {
+    // Construct a simple-form expression
+    Expr(const UniqueString* ident, long offset, bool deref) {
+      if (IsEmptyUniqueString(ident)) {
+        Expr();
+      } else {
+        postfix_ = "";
+        ident_ = ident;
+        offset_ = offset;
+        how_ = deref ? kExprSimpleMem : kExprSimple;
+      }
+    }
+
+    // Construct an invalid expression
+    Expr() {
+      postfix_ = "";
+      ident_ = nullptr;
+      offset_ = 0;
+      how_ = kExprInvalid;
+    }
+
+    // Return the postfix expression string, either directly,
+    // if this is a postfix expression, or by synthesising it
+    // for a simple expression.
+    std::string getExprPostfix() const {
+      switch (how_) {
+        case kExprPostfix:
+          return postfix_;
+        case kExprSimple:
+        case kExprSimpleMem: {
+          char buf[40];
+          sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+',
+                                    how_ == kExprSimple ? "" : " ^");
+          return std::string(FromUniqueString(ident_)) + std::string(buf);
+        }
+        case kExprInvalid:
+        default:
+          MOZ_ASSERT(0 && "getExprPostfix: invalid Module::Expr type");
+          return "Expr::genExprPostfix: kExprInvalid";
+      }
+    }
+
+    // The identifier that gives the starting value for simple expressions.
+    const UniqueString* ident_;
+    // The offset to add for simple expressions.
+    long        offset_;
+    // The Postfix expression string to evaluate for non-simple expressions.
+    std::string postfix_;
+    // The operation expressed by this expression.
+    ExprHow     how_;
+  };
+
+  // A map from register names to expressions that recover
+  // their values. This can represent a complete set of rules to
+  // follow at some address, or a set of changes to be applied to an
+  // extant set of rules.
+  // NOTE! there are two completely different types called RuleMap.  This
+  // is one of them.
+  typedef std::map<const UniqueString*, Expr> RuleMap;
+
+  // A map from addresses to RuleMaps, representing changes that take
+  // effect at given addresses.
+  typedef std::map<Address, RuleMap> RuleChangeMap;
+
+  // A range of 'STACK CFI' stack walking information. An instance of
+  // this structure corresponds to a 'STACK CFI INIT' record and the
+  // subsequent 'STACK CFI' records that fall within its range.
+  struct StackFrameEntry {
+    // The starting address and number of bytes of machine code this
+    // entry covers.
+    Address address, size;
+
+    // The initial register recovery rules, in force at the starting
+    // address.
+    RuleMap initial_rules;
+
+    // A map from addresses to rule changes. To find the rules in
+    // force at a given address, start with initial_rules, and then
+    // apply the changes given in this map for all addresses up to and
+    // including the address you're interested in.
+    RuleChangeMap rule_changes;
+  };
+
+  // Create a new module with the given name, operating system,
+  // architecture, and ID string.
+  Module(const std::string &name, const std::string &os,
+         const std::string &architecture, const std::string &id);
+  ~Module();
+
+private:
+
+  // Module header entries.
+  std::string name_, os_, architecture_, id_;
+};
+
+
+}  // namespace lul
+
+#endif // LulCommonExt_h
diff --git a/tools/profiler/lul/LulDwarf.cpp b/tools/profiler/lul/LulDwarf.cpp
new file mode 100644
index 000000000..1bdbdabb6
--- /dev/null
+++ b/tools/profiler/lul/LulDwarf.cpp
@@ -0,0 +1,2180 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
+// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/bytereader.cc
+//   src/common/dwarf/dwarf2reader.cc
+//   src/common/dwarf_cfi_to_module.cc
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <map>
+#include <stack>
+#include <string>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "LulCommonExt.h"
+#include "LulDwarfInt.h"
+
+
+// Set this to 1 for verbose logging
+#define DEBUG_DWARF 0
+
+
+namespace lul {
+
+using std::string;
+
+ByteReader::ByteReader(enum Endianness endian)
+    :offset_reader_(NULL), address_reader_(NULL), endian_(endian),
+     address_size_(0), offset_size_(0),
+     have_section_base_(), have_text_base_(), have_data_base_(),
+     have_function_base_() { }
+
+ByteReader::~ByteReader() { }
+
+void ByteReader::SetOffsetSize(uint8 size) {
+  offset_size_ = size;
+  MOZ_ASSERT(size == 4 || size == 8);
+  if (size == 4) {
+    this->offset_reader_ = &ByteReader::ReadFourBytes;
+  } else {
+    this->offset_reader_ = &ByteReader::ReadEightBytes;
+  }
+}
+
+void ByteReader::SetAddressSize(uint8 size) {
+  address_size_ = size;
+  MOZ_ASSERT(size == 4 || size == 8);
+  if (size == 4) {
+    this->address_reader_ = &ByteReader::ReadFourBytes;
+  } else {
+    this->address_reader_ = &ByteReader::ReadEightBytes;
+  }
+}
+
+uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) {
+  const uint64 initial_length = ReadFourBytes(start);
+  start += 4;
+
+  // In DWARF2/3, if the initial length is all 1 bits, then the offset
+  // size is 8 and we need to read the next 8 bytes for the real length.
+  if (initial_length == 0xffffffff) {
+    SetOffsetSize(8);
+    *len = 12;
+    return ReadOffset(start);
+  } else {
+    SetOffsetSize(4);
+    *len = 4;
+  }
+  return initial_length;
+}
+
+bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const {
+  if (encoding == DW_EH_PE_omit) return true;
+  if (encoding == DW_EH_PE_aligned) return true;
+  if ((encoding & 0x7) > DW_EH_PE_udata8)
+    return false;
+  if ((encoding & 0x70) > DW_EH_PE_funcrel)
+    return false;
+  return true;
+}
+
+bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const {
+  switch (encoding & 0x70) {
+    case DW_EH_PE_absptr:  return true;
+    case DW_EH_PE_pcrel:   return have_section_base_;
+    case DW_EH_PE_textrel: return have_text_base_;
+    case DW_EH_PE_datarel: return have_data_base_;
+    case DW_EH_PE_funcrel: return have_function_base_;
+    default:               return false;
+  }
+}
+
+uint64 ByteReader::ReadEncodedPointer(const char *buffer,
+                                      DwarfPointerEncoding encoding,
+                                      size_t *len) const {
+  // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't
+  // see it here.
+  MOZ_ASSERT(encoding != DW_EH_PE_omit);
+
+  // The Linux Standards Base 4.0 does not make this clear, but the
+  // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c)
+  // agree that aligned pointers are always absolute, machine-sized,
+  // machine-signed pointers.
+  if (encoding == DW_EH_PE_aligned) {
+    MOZ_ASSERT(have_section_base_);
+
+    // We don't need to align BUFFER in *our* address space. Rather, we
+    // need to find the next position in our buffer that would be aligned
+    // when the .eh_frame section the buffer contains is loaded into the
+    // program's memory. So align assuming that buffer_base_ gets loaded at
+    // address section_base_, where section_base_ itself may or may not be
+    // aligned.
+
+    // First, find the offset to START from the closest prior aligned
+    // address.
+    uint64 skew = section_base_ & (AddressSize() - 1);
+    // Now find the offset from that aligned address to buffer.
+    uint64 offset = skew + (buffer - buffer_base_);
+    // Round up to the next boundary.
+    uint64 aligned = (offset + AddressSize() - 1) & -AddressSize();
+    // Convert back to a pointer.
+    const char *aligned_buffer = buffer_base_ + (aligned - skew);
+    // Finally, store the length and actually fetch the pointer.
+    *len = aligned_buffer - buffer + AddressSize();
+    return ReadAddress(aligned_buffer);
+  }
+
+  // Extract the value first, ignoring whether it's a pointer or an
+  // offset relative to some base.
+  uint64 offset;
+  switch (encoding & 0x0f) {
+    case DW_EH_PE_absptr:
+      // DW_EH_PE_absptr is weird, as it is used as a meaningful value for
+      // both the high and low nybble of encoding bytes. When it appears in
+      // the high nybble, it means that the pointer is absolute, not an
+      // offset from some base address. When it appears in the low nybble,
+      // as here, it means that the pointer is stored as a normal
+      // machine-sized and machine-signed address. A low nybble of
+      // DW_EH_PE_absptr does not imply that the pointer is absolute; it is
+      // correct for us to treat the value as an offset from a base address
+      // if the upper nybble is not DW_EH_PE_absptr.
+      offset = ReadAddress(buffer);
+      *len = AddressSize();
+      break;
+
+    case DW_EH_PE_uleb128:
+      offset = ReadUnsignedLEB128(buffer, len);
+      break;
+
+    case DW_EH_PE_udata2:
+      offset = ReadTwoBytes(buffer);
+      *len = 2;
+      break;
+
+    case DW_EH_PE_udata4:
+      offset = ReadFourBytes(buffer);
+      *len = 4;
+      break;
+
+    case DW_EH_PE_udata8:
+      offset = ReadEightBytes(buffer);
+      *len = 8;
+      break;
+
+    case DW_EH_PE_sleb128:
+      offset = ReadSignedLEB128(buffer, len);
+      break;
+
+    case DW_EH_PE_sdata2:
+      offset = ReadTwoBytes(buffer);
+      // Sign-extend from 16 bits.
+      offset = (offset ^ 0x8000) - 0x8000;
+      *len = 2;
+      break;
+
+    case DW_EH_PE_sdata4:
+      offset = ReadFourBytes(buffer);
+      // Sign-extend from 32 bits.
+      offset = (offset ^ 0x80000000ULL) - 0x80000000ULL;
+      *len = 4;
+      break;
+
+    case DW_EH_PE_sdata8:
+      // No need to sign-extend; this is the full width of our type.
+      offset = ReadEightBytes(buffer);
+      *len = 8;
+      break;
+
+    default:
+      abort();
+  }
+
+  // Find the appropriate base address.
+  uint64 base;
+  switch (encoding & 0x70) {
+    case DW_EH_PE_absptr:
+      base = 0;
+      break;
+
+    case DW_EH_PE_pcrel:
+      MOZ_ASSERT(have_section_base_);
+      base = section_base_ + (buffer - buffer_base_);
+      break;
+
+    case DW_EH_PE_textrel:
+      MOZ_ASSERT(have_text_base_);
+      base = text_base_;
+      break;
+
+    case DW_EH_PE_datarel:
+      MOZ_ASSERT(have_data_base_);
+      base = data_base_;
+      break;
+
+    case DW_EH_PE_funcrel:
+      MOZ_ASSERT(have_function_base_);
+      base = function_base_;
+      break;
+
+    default:
+      abort();
+  }
+
+  uint64 pointer = base + offset;
+
+  // Remove inappropriate upper bits.
+  if (AddressSize() == 4)
+    pointer = pointer & 0xffffffff;
+  else
+    MOZ_ASSERT(AddressSize() == sizeof(uint64));
+
+  return pointer;
+}
+
+
+// A DWARF rule for recovering the address or value of a register, or
+// computing the canonical frame address. There is one subclass of this for
+// each '*Rule' member function in CallFrameInfo::Handler.
+//
+// It's annoying that we have to handle Rules using pointers (because
+// the concrete instances can have an arbitrary size). They're small,
+// so it would be much nicer if we could just handle them by value
+// instead of fretting about ownership and destruction.
+//
+// It seems like all these could simply be instances of std::tr1::bind,
+// except that we need instances to be EqualityComparable, too.
+//
+// This could logically be nested within State, but then the qualified names
+// get horrendous.
+class CallFrameInfo::Rule {
+ public:
+  virtual ~Rule() { }
+
+  // Tell HANDLER that, at ADDRESS in the program, REGISTER can be
+  // recovered using this rule. If REGISTER is kCFARegister, then this rule
+  // describes how to compute the canonical frame address. Return what the
+  // HANDLER member function returned.
+  virtual bool Handle(Handler *handler, uint64 address, int register) const = 0;
+
+  // Equality on rules. We use these to decide which rules we need
+  // to report after a DW_CFA_restore_state instruction.
+  virtual bool operator==(const Rule &rhs) const = 0;
+
+  bool operator!=(const Rule &rhs) const { return ! (*this == rhs); }
+
+  // Return a pointer to a copy of this rule.
+  virtual Rule *Copy() const = 0;
+
+  // If this is a base+offset rule, change its base register to REG.
+  // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
+  virtual void SetBaseRegister(unsigned reg) { }
+
+  // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
+  // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
+  virtual void SetOffset(long long offset) { }
+
+  // A RTTI workaround, to make it possible to implement equality
+  // comparisons on classes derived from this one.
+  enum CFIRTag {
+    CFIR_UNDEFINED_RULE,
+    CFIR_SAME_VALUE_RULE,
+    CFIR_OFFSET_RULE,
+    CFIR_VAL_OFFSET_RULE,
+    CFIR_REGISTER_RULE,
+    CFIR_EXPRESSION_RULE,
+    CFIR_VAL_EXPRESSION_RULE
+  };
+
+  // Produce the tag that identifies the child class of this object.
+  virtual CFIRTag getTag() const = 0;
+};
+
+// Rule: the value the register had in the caller cannot be recovered.
+class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
+ public:
+  UndefinedRule() { }
+  ~UndefinedRule() { }
+  CFIRTag getTag() const { return CFIR_UNDEFINED_RULE; }
+  bool Handle(Handler *handler, uint64 address, int reg) const {
+    return handler->UndefinedRule(address, reg);
+  }
+  bool operator==(const Rule &rhs) const {
+    if (rhs.getTag() != CFIR_UNDEFINED_RULE) return false;
+    return true;
+  }
+  Rule *Copy() const { return new UndefinedRule(*this); }
+};
+
+// Rule: the register's value is the same as that it had in the caller.
+class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
+ public:
+  SameValueRule() { }
+  ~SameValueRule() { }
+  CFIRTag getTag() const { return CFIR_SAME_VALUE_RULE; }
+  bool Handle(Handler *handler, uint64 address, int reg) const {
+    return handler->SameValueRule(address, reg);
+  }
+  bool operator==(const Rule &rhs) const {
+    if (rhs.getTag() != CFIR_SAME_VALUE_RULE) return false;
+    return true;
+  }
+  Rule *Copy() const { return new SameValueRule(*this); }
+};
+
+// Rule: the register is saved at OFFSET from BASE_REGISTER.  BASE_REGISTER
+// may be CallFrameInfo::Handler::kCFARegister.
+class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
+ public:
+  OffsetRule(int base_register, long offset)
+      : base_register_(base_register), offset_(offset) { }
+  ~OffsetRule() { }
+  CFIRTag getTag() const { return CFIR_OFFSET_RULE; }
+  bool Handle(Handler *handler, uint64 address, int reg) const {
+    return handler->OffsetRule(address, reg, base_register_, offset_);
+  }
+  bool operator==(const Rule &rhs) const {
+    if (rhs.getTag() != CFIR_OFFSET_RULE) return false;
+    const OffsetRule *our_rhs = static_cast<const OffsetRule *>(&rhs);
+    return (base_register_ == our_rhs->base_register_ &&
+            offset_ == our_rhs->offset_);
+  }
+  Rule *Copy() const { return new OffsetRule(*this); }
+  // We don't actually need SetBaseRegister or SetOffset here, since they
+  // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
+  // doesn't make sense to use OffsetRule for computing the CFA: it
+  // computes the address at which a register is saved, not a value.
+ private:
+  int base_register_;
+  long offset_;
+};
+
+// Rule: the value the register had in the caller is the value of
+// BASE_REGISTER plus offset. BASE_REGISTER may be
+// CallFrameInfo::Handler::kCFARegister.
+class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
+ public:
+  ValOffsetRule(int base_register, long offset)
+      : base_register_(base_register), offset_(offset) { }
+  ~ValOffsetRule() { }
+  CFIRTag getTag() const { return CFIR_VAL_OFFSET_RULE; }
+  bool Handle(Handler *handler, uint64 address, int reg) const {
+    return handler->ValOffsetRule(address, reg, base_register_, offset_);
+  }
+  bool operator==(const Rule &rhs) const {
+    if (rhs.getTag() != CFIR_VAL_OFFSET_RULE) return false;
+    const ValOffsetRule *our_rhs = static_cast<const ValOffsetRule *>(&rhs);
+    return (base_register_ == our_rhs->base_register_ &&
+            offset_ == our_rhs->offset_);
+  }
+  Rule *Copy() const { return new ValOffsetRule(*this); }
+  void SetBaseRegister(unsigned reg) { base_register_ = reg; }
+  void SetOffset(long long offset) { offset_ = offset; }
+ private:
+  int base_register_;
+  long offset_;
+};
+
+// Rule: the register has been saved in another register REGISTER_NUMBER_.
+class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
+ public:
+  explicit RegisterRule(int register_number)
+      : register_number_(register_number) { }
+  ~RegisterRule() { }
+  CFIRTag getTag() const { return CFIR_REGISTER_RULE; }
+  bool Handle(Handler *handler, uint64 address, int reg) const {
+    return handler->RegisterRule(address, reg, register_number_);
+  }
+  bool operator==(const Rule &rhs) const {
+    if (rhs.getTag() != CFIR_REGISTER_RULE) return false;
+    const RegisterRule *our_rhs = static_cast<const RegisterRule *>(&rhs);
+    return (register_number_ == our_rhs->register_number_);
+  }
+  Rule *Copy() const { return new RegisterRule(*this); }
+ private:
+  int register_number_;
+};
+
+// Rule: EXPRESSION evaluates to the address at which the register is saved.
+class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
+ public:
+  explicit ExpressionRule(const string &expression)
+      : expression_(expression) { }
+  ~ExpressionRule() { }
+  CFIRTag getTag() const { return CFIR_EXPRESSION_RULE; }
+  bool Handle(Handler *handler, uint64 address, int reg) const {
+    return handler->ExpressionRule(address, reg, expression_);
+  }
+  bool operator==(const Rule &rhs) const {
+    if (rhs.getTag() != CFIR_EXPRESSION_RULE) return false;
+    const ExpressionRule *our_rhs = static_cast<const ExpressionRule *>(&rhs);
+    return (expression_ == our_rhs->expression_);
+  }
+  Rule *Copy() const { return new ExpressionRule(*this); }
+ private:
+  string expression_;
+};
+
+// Rule: EXPRESSION evaluates to the previous value of the register.
+class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
+ public:
+  explicit ValExpressionRule(const string &expression)
+      : expression_(expression) { }
+  ~ValExpressionRule() { }
+  CFIRTag getTag() const { return CFIR_VAL_EXPRESSION_RULE; }
+  bool Handle(Handler *handler, uint64 address, int reg) const {
+    return handler->ValExpressionRule(address, reg, expression_);
+  }
+  bool operator==(const Rule &rhs) const {
+    if (rhs.getTag() != CFIR_VAL_EXPRESSION_RULE) return false;
+    const ValExpressionRule *our_rhs =
+        static_cast<const ValExpressionRule *>(&rhs);
+    return (expression_ == our_rhs->expression_);
+  }
+  Rule *Copy() const { return new ValExpressionRule(*this); }
+ private:
+  string expression_;
+};
+
+// A map from register numbers to rules.
+class CallFrameInfo::RuleMap {
+ public:
+  RuleMap() : cfa_rule_(NULL) { }
+  RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; }
+  ~RuleMap() { Clear(); }
+
+  RuleMap &operator=(const RuleMap &rhs);
+
+  // Set the rule for computing the CFA to RULE. Take ownership of RULE.
+  void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; }
+
+  // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
+  // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
+  // DW_CFA_def_cfa_register, and for detecting references to the CFA before
+  // a rule for it has been established.
+  Rule *CFARule() const { return cfa_rule_; }
+
+  // Return the rule for REG, or NULL if there is none. The caller takes
+  // ownership of the result.
+  Rule *RegisterRule(int reg) const;
+
+  // Set the rule for computing REG to RULE. Take ownership of RULE.
+  void SetRegisterRule(int reg, Rule *rule);
+
+  // Make all the appropriate calls to HANDLER as if we were changing from
+  // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
+  // DW_CFA_restore_state, where lots of rules can change simultaneously.
+  // Return true if all handlers returned true; otherwise, return false.
+  bool HandleTransitionTo(Handler *handler, uint64 address,
+                          const RuleMap &new_rules) const;
+
+ private:
+  // A map from register numbers to Rules.
+  typedef std::map<int, Rule *> RuleByNumber;
+
+  // Remove all register rules and clear cfa_rule_.
+  void Clear();
+
+  // The rule for computing the canonical frame address. This RuleMap owns
+  // this rule.
+  Rule *cfa_rule_;
+
+  // A map from register numbers to postfix expressions to recover
+  // their values. This RuleMap owns the Rules the map refers to.
+  RuleByNumber registers_;
+};
+
+CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) {
+  Clear();
+  // Since each map owns the rules it refers to, assignment must copy them.
+  if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
+  for (RuleByNumber::const_iterator it = rhs.registers_.begin();
+       it != rhs.registers_.end(); it++)
+    registers_[it->first] = it->second->Copy();
+  return *this;
+}
+
+CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const {
+  MOZ_ASSERT(reg != Handler::kCFARegister);
+  RuleByNumber::const_iterator it = registers_.find(reg);
+  if (it != registers_.end())
+    return it->second->Copy();
+  else
+    return NULL;
+}
+
+void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) {
+  MOZ_ASSERT(reg != Handler::kCFARegister);
+  MOZ_ASSERT(rule);
+  Rule **slot = &registers_[reg];
+  delete *slot;
+  *slot = rule;
+}
+
+bool CallFrameInfo::RuleMap::HandleTransitionTo(
+    Handler *handler,
+    uint64 address,
+    const RuleMap &new_rules) const {
+  // Transition from cfa_rule_ to new_rules.cfa_rule_.
+  if (cfa_rule_ && new_rules.cfa_rule_) {
+    if (*cfa_rule_ != *new_rules.cfa_rule_ &&
+        !new_rules.cfa_rule_->Handle(handler, address, Handler::kCFARegister))
+      return false;
+  } else if (cfa_rule_) {
+    // this RuleMap has a CFA rule but new_rules doesn't.
+    // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
+    // it's garbage input. The instruction interpreter should have
+    // detected this and warned, so take no action here.
+  } else if (new_rules.cfa_rule_) {
+    // This shouldn't be possible: NEW_RULES is some prior state, and
+    // there's no way to remove entries.
+    MOZ_ASSERT(0);
+  } else {
+    // Both CFA rules are empty.  No action needed.
+  }
+
+  // Traverse the two maps in order by register number, and report
+  // whatever differences we find.
+  RuleByNumber::const_iterator old_it = registers_.begin();
+  RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
+  while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
+    if (old_it->first < new_it->first) {
+      // This RuleMap has an entry for old_it->first, but NEW_RULES
+      // doesn't.
+      //
+      // This isn't really the right thing to do, but since CFI generally
+      // only mentions callee-saves registers, and GCC's convention for
+      // callee-saves registers is that they are unchanged, it's a good
+      // approximation.
+      if (!handler->SameValueRule(address, old_it->first))
+        return false;
+      old_it++;
+    } else if (old_it->first > new_it->first) {
+      // NEW_RULES has entry for new_it->first, but this RuleMap
+      // doesn't. This shouldn't be possible: NEW_RULES is some prior
+      // state, and there's no way to remove entries.
+      MOZ_ASSERT(0);
+    } else {
+      // Both maps have an entry for this register. Report the new
+      // rule if it is different.
+      if (*old_it->second != *new_it->second &&
+          !new_it->second->Handle(handler, address, new_it->first))
+        return false;
+      new_it++, old_it++;
+    }
+  }
+  // Finish off entries from this RuleMap with no counterparts in new_rules.
+  while (old_it != registers_.end()) {
+    if (!handler->SameValueRule(address, old_it->first))
+      return false;
+    old_it++;
+  }
+  // Since we only make transitions from a rule set to some previously
+  // saved rule set, and we can only add rules to the map, NEW_RULES
+  // must have fewer rules than *this.
+  MOZ_ASSERT(new_it == new_rules.registers_.end());
+
+  return true;
+}
+
+// Remove all register rules and clear cfa_rule_.
+void CallFrameInfo::RuleMap::Clear() {
+  delete cfa_rule_;
+  cfa_rule_ = NULL;
+  for (RuleByNumber::iterator it = registers_.begin();
+       it != registers_.end(); it++)
+    delete it->second;
+  registers_.clear();
+}
+
+// The state of the call frame information interpreter as it processes
+// instructions from a CIE and FDE.
+class CallFrameInfo::State {
+ public:
+  // Create a call frame information interpreter state with the given
+  // reporter, reader, handler, and initial call frame info address.
+  State(ByteReader *reader, Handler *handler, Reporter *reporter,
+        uint64 address)
+      : reader_(reader), handler_(handler), reporter_(reporter),
+        address_(address), entry_(NULL), cursor_(NULL),
+        saved_rules_(NULL) { }
+
+  ~State() {
+    if (saved_rules_)
+      delete saved_rules_;
+  }
+
+  // Interpret instructions from CIE, save the resulting rule set for
+  // DW_CFA_restore instructions, and return true. On error, report
+  // the problem to reporter_ and return false.
+  bool InterpretCIE(const CIE &cie);
+
+  // Interpret instructions from FDE, and return true. On error,
+  // report the problem to reporter_ and return false.
+  bool InterpretFDE(const FDE &fde);
+
+ private:
+  // The operands of a CFI instruction, for ParseOperands.
+  struct Operands {
+    unsigned register_number;  // A register number.
+    uint64 offset;             // An offset or address.
+    long signed_offset;        // A signed offset.
+    string expression;         // A DWARF expression.
+  };
+
+  // Parse CFI instruction operands from STATE's instruction stream as
+  // described by FORMAT. On success, populate OPERANDS with the
+  // results, and return true. On failure, report the problem and
+  // return false.
+  //
+  // Each character of FORMAT should be one of the following:
+  //
+  //   'r'  unsigned LEB128 register number (OPERANDS->register_number)
+  //   'o'  unsigned LEB128 offset          (OPERANDS->offset)
+  //   's'  signed LEB128 offset            (OPERANDS->signed_offset)
+  //   'a'  machine-size address            (OPERANDS->offset)
+  //        (If the CIE has a 'z' augmentation string, 'a' uses the
+  //        encoding specified by the 'R' argument.)
+  //   '1'  a one-byte offset               (OPERANDS->offset)
+  //   '2'  a two-byte offset               (OPERANDS->offset)
+  //   '4'  a four-byte offset              (OPERANDS->offset)
+  //   '8'  an eight-byte offset            (OPERANDS->offset)
+  //   'e'  a DW_FORM_block holding a       (OPERANDS->expression)
+  //        DWARF expression
+  bool ParseOperands(const char *format, Operands *operands);
+
+  // Interpret one CFI instruction from STATE's instruction stream, update
+  // STATE, report any rule changes to handler_, and return true. On
+  // failure, report the problem and return false.
+  bool DoInstruction();
+
+  // The following Do* member functions are subroutines of DoInstruction,
+  // factoring out the actual work of operations that have several
+  // different encodings.
+
+  // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
+  // return true. On failure, report and return false. (Used for
+  // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
+  bool DoDefCFA(unsigned base_register, long offset);
+
+  // Change the offset of the CFA rule to OFFSET, and return true. On
+  // failure, report and return false. (Subroutine for
+  // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
+  bool DoDefCFAOffset(long offset);
+
+  // Specify that REG can be recovered using RULE, and return true. On
+  // failure, report and return false.
+  bool DoRule(unsigned reg, Rule *rule);
+
+  // Specify that REG can be found at OFFSET from the CFA, and return true.
+  // On failure, report and return false. (Subroutine for DW_CFA_offset,
+  // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
+  bool DoOffset(unsigned reg, long offset);
+
+  // Specify that the caller's value for REG is the CFA plus OFFSET,
+  // and return true. On failure, report and return false. (Subroutine
+  // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
+  bool DoValOffset(unsigned reg, long offset);
+
+  // Restore REG to the rule established in the CIE, and return true. On
+  // failure, report and return false. (Subroutine for DW_CFA_restore and
+  // DW_CFA_restore_extended.)
+  bool DoRestore(unsigned reg);
+
+  // Return the section offset of the instruction at cursor. For use
+  // in error messages.
+  uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
+
+  // Report that entry_ is incomplete, and return false. For brevity.
+  bool ReportIncomplete() {
+    reporter_->Incomplete(entry_->offset, entry_->kind);
+    return false;
+  }
+
+  // For reading multi-byte values with the appropriate endianness.
+  ByteReader *reader_;
+
+  // The handler to which we should report the data we find.
+  Handler *handler_;
+
+  // For reporting problems in the info we're parsing.
+  Reporter *reporter_;
+
+  // The code address to which the next instruction in the stream applies.
+  uint64 address_;
+
+  // The entry whose instructions we are currently processing. This is
+  // first a CIE, and then an FDE.
+  const Entry *entry_;
+
+  // The next instruction to process.
+  const char *cursor_;
+
+  // The current set of rules.
+  RuleMap rules_;
+
+  // The set of rules established by the CIE, used by DW_CFA_restore
+  // and DW_CFA_restore_extended. We set this after interpreting the
+  // CIE's instructions.
+  RuleMap cie_rules_;
+
+  // A stack of saved states, for DW_CFA_remember_state and
+  // DW_CFA_restore_state.
+  std::stack<RuleMap>* saved_rules_;
+};
+
+bool CallFrameInfo::State::InterpretCIE(const CIE &cie) {
+  entry_ = &cie;
+  cursor_ = entry_->instructions;
+  while (cursor_ < entry_->end)
+    if (!DoInstruction())
+      return false;
+  // Note the rules established by the CIE, for use by DW_CFA_restore
+  // and DW_CFA_restore_extended.
+  cie_rules_ = rules_;
+  return true;
+}
+
+bool CallFrameInfo::State::InterpretFDE(const FDE &fde) {
+  entry_ = &fde;
+  cursor_ = entry_->instructions;
+  while (cursor_ < entry_->end)
+    if (!DoInstruction())
+      return false;
+  return true;
+}
+
+bool CallFrameInfo::State::ParseOperands(const char *format,
+                                         Operands *operands) {
+  size_t len;
+  const char *operand;
+
+  for (operand = format; *operand; operand++) {
+    size_t bytes_left = entry_->end - cursor_;
+    switch (*operand) {
+      case 'r':
+        operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 'o':
+        operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 's':
+        operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case 'a':
+        operands->offset =
+          reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
+                                      &len);
+        if (len > bytes_left) return ReportIncomplete();
+        cursor_ += len;
+        break;
+
+      case '1':
+        if (1 > bytes_left) return ReportIncomplete();
+        operands->offset = static_cast<unsigned char>(*cursor_++);
+        break;
+
+      case '2':
+        if (2 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadTwoBytes(cursor_);
+        cursor_ += 2;
+        break;
+
+      case '4':
+        if (4 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadFourBytes(cursor_);
+        cursor_ += 4;
+        break;
+
+      case '8':
+        if (8 > bytes_left) return ReportIncomplete();
+        operands->offset = reader_->ReadEightBytes(cursor_);
+        cursor_ += 8;
+        break;
+
+      case 'e': {
+        size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
+        if (len > bytes_left || expression_length > bytes_left - len)
+          return ReportIncomplete();
+        cursor_ += len;
+        operands->expression = string(cursor_, expression_length);
+        cursor_ += expression_length;
+        break;
+      }
+
+      default:
+        MOZ_ASSERT(0);
+    }
+  }
+
+  return true;
+}
+
+bool CallFrameInfo::State::DoInstruction() {
+  CIE *cie = entry_->cie;
+  Operands ops;
+
+  // Our entry's kind should have been set by now.
+  MOZ_ASSERT(entry_->kind != kUnknown);
+
+  // We shouldn't have been invoked unless there were more
+  // instructions to parse.
+  MOZ_ASSERT(cursor_ < entry_->end);
+
+  unsigned opcode = *cursor_++;
+  if ((opcode & 0xc0) != 0) {
+    switch (opcode & 0xc0) {
+      // Advance the address.
+      case DW_CFA_advance_loc: {
+        size_t code_offset = opcode & 0x3f;
+        address_ += code_offset * cie->code_alignment_factor;
+        break;
+      }
+
+      // Find a register at an offset from the CFA.
+      case DW_CFA_offset:
+        if (!ParseOperands("o", &ops) ||
+            !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
+          return false;
+        break;
+
+      // Restore the rule established for a register by the CIE.
+      case DW_CFA_restore:
+        if (!DoRestore(opcode & 0x3f)) return false;
+        break;
+
+      // The 'if' above should have excluded this possibility.
+      default:
+        MOZ_ASSERT(0);
+    }
+
+    // Return here, so the big switch below won't be indented.
+    return true;
+  }
+
+  switch (opcode) {
+    // Set the address.
+    case DW_CFA_set_loc:
+      if (!ParseOperands("a", &ops)) return false;
+      address_ = ops.offset;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc1:
+      if (!ParseOperands("1", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc2:
+      if (!ParseOperands("2", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_advance_loc4:
+      if (!ParseOperands("4", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Advance the address.
+    case DW_CFA_MIPS_advance_loc8:
+      if (!ParseOperands("8", &ops)) return false;
+      address_ += ops.offset * cie->code_alignment_factor;
+      break;
+
+    // Compute the CFA by adding an offset to a register.
+    case DW_CFA_def_cfa:
+      if (!ParseOperands("ro", &ops) ||
+          !DoDefCFA(ops.register_number, ops.offset))
+        return false;
+      break;
+
+    // Compute the CFA by adding an offset to a register.
+    case DW_CFA_def_cfa_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoDefCFA(ops.register_number,
+                    ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // Change the base register used to compute the CFA.
+    case DW_CFA_def_cfa_register: {
+      Rule *cfa_rule = rules_.CFARule();
+      if (!cfa_rule) {
+        reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+        return false;
+      }
+      if (!ParseOperands("r", &ops)) return false;
+      cfa_rule->SetBaseRegister(ops.register_number);
+      if (!cfa_rule->Handle(handler_, address_, Handler::kCFARegister))
+        return false;
+      break;
+    }
+
+    // Change the offset used to compute the CFA.
+    case DW_CFA_def_cfa_offset:
+      if (!ParseOperands("o", &ops) ||
+          !DoDefCFAOffset(ops.offset))
+        return false;
+      break;
+
+    // Change the offset used to compute the CFA.
+    case DW_CFA_def_cfa_offset_sf:
+      if (!ParseOperands("s", &ops) ||
+          !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // Specify an expression whose value is the CFA.
+    case DW_CFA_def_cfa_expression: {
+      if (!ParseOperands("e", &ops))
+        return false;
+      Rule *rule = new ValExpressionRule(ops.expression);
+      rules_.SetCFARule(rule);
+      if (!rule->Handle(handler_, address_, Handler::kCFARegister))
+        return false;
+      break;
+    }
+
+    // The register's value cannot be recovered.
+    case DW_CFA_undefined: {
+      if (!ParseOperands("r", &ops) ||
+          !DoRule(ops.register_number, new UndefinedRule()))
+        return false;
+      break;
+    }
+
+    // The register's value is unchanged from its value in the caller.
+    case DW_CFA_same_value: {
+      if (!ParseOperands("r", &ops) ||
+          !DoRule(ops.register_number, new SameValueRule()))
+        return false;
+      break;
+    }
+
+    // Find a register at an offset from the CFA.
+    case DW_CFA_offset_extended:
+      if (!ParseOperands("ro", &ops) ||
+          !DoOffset(ops.register_number,
+                    ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register is saved at an offset from the CFA.
+    case DW_CFA_offset_extended_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoOffset(ops.register_number,
+                    ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register is saved at an offset from the CFA.
+    case DW_CFA_GNU_negative_offset_extended:
+      if (!ParseOperands("ro", &ops) ||
+          !DoOffset(ops.register_number,
+                    -ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register's value is the sum of the CFA plus an offset.
+    case DW_CFA_val_offset:
+      if (!ParseOperands("ro", &ops) ||
+          !DoValOffset(ops.register_number,
+                       ops.offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register's value is the sum of the CFA plus an offset.
+    case DW_CFA_val_offset_sf:
+      if (!ParseOperands("rs", &ops) ||
+          !DoValOffset(ops.register_number,
+                       ops.signed_offset * cie->data_alignment_factor))
+        return false;
+      break;
+
+    // The register has been saved in another register.
+    case DW_CFA_register: {
+      if (!ParseOperands("ro", &ops) ||
+          !DoRule(ops.register_number, new RegisterRule(ops.offset)))
+        return false;
+      break;
+    }
+
+    // An expression yields the address at which the register is saved.
+    case DW_CFA_expression: {
+      if (!ParseOperands("re", &ops) ||
+          !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
+        return false;
+      break;
+    }
+
+    // An expression yields the caller's value for the register.
+    case DW_CFA_val_expression: {
+      if (!ParseOperands("re", &ops) ||
+          !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
+        return false;
+      break;
+    }
+
+    // Restore the rule established for a register by the CIE.
+    case DW_CFA_restore_extended:
+      if (!ParseOperands("r", &ops) ||
+          !DoRestore( ops.register_number))
+        return false;
+      break;
+
+    // Save the current set of rules on a stack.
+    case DW_CFA_remember_state:
+      if (!saved_rules_) {
+        saved_rules_ = new std::stack<RuleMap>();
+      }
+      saved_rules_->push(rules_);
+      break;
+
+    // Pop the current set of rules off the stack.
+    case DW_CFA_restore_state: {
+      if (!saved_rules_ || saved_rules_->empty()) {
+        reporter_->EmptyStateStack(entry_->offset, entry_->kind,
+                                   CursorOffset());
+        return false;
+      }
+      const RuleMap &new_rules = saved_rules_->top();
+      if (rules_.CFARule() && !new_rules.CFARule()) {
+        reporter_->ClearingCFARule(entry_->offset, entry_->kind,
+                                   CursorOffset());
+        return false;
+      }
+      rules_.HandleTransitionTo(handler_, address_, new_rules);
+      rules_ = new_rules;
+      saved_rules_->pop();
+      break;
+    }
+
+    // No operation.  (Padding instruction.)
+    case DW_CFA_nop:
+      break;
+
+    // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
+    // are saved in registers 24 through 31 (%i0-%i7), and registers
+    // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
+    // (0-15 * the register size). The register numbers must be
+    // hard-coded. A GNU extension, and not a pretty one.
+    case DW_CFA_GNU_window_save: {
+      // Save %o0-%o7 in %i0-%i7.
+      for (int i = 8; i < 16; i++)
+        if (!DoRule(i, new RegisterRule(i + 16)))
+          return false;
+      // Save %l0-%l7 and %i0-%i7 at the CFA.
+      for (int i = 16; i < 32; i++)
+        // Assume that the byte reader's address size is the same as
+        // the architecture's register size. !@#%*^ hilarious.
+        if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
+                                      (i - 16) * reader_->AddressSize())))
+          return false;
+      break;
+    }
+
+    // I'm not sure what this is. GDB doesn't use it for unwinding.
+    case DW_CFA_GNU_args_size:
+      if (!ParseOperands("o", &ops)) return false;
+      break;
+
+    // An opcode we don't recognize.
+    default: {
+      reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
+  Rule *rule = new ValOffsetRule(base_register, offset);
+  rules_.SetCFARule(rule);
+  return rule->Handle(handler_, address_, Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
+  Rule *cfa_rule = rules_.CFARule();
+  if (!cfa_rule) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  cfa_rule->SetOffset(offset);
+  return cfa_rule->Handle(handler_, address_, Handler::kCFARegister);
+}
+
+bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) {
+  rules_.SetRegisterRule(reg, rule);
+  return rule->Handle(handler_, address_, reg);
+}
+
+bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
+  if (!rules_.CFARule()) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  return DoRule(reg,
+                new OffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
+  if (!rules_.CFARule()) {
+    reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
+    return false;
+  }
+  return DoRule(reg,
+                new ValOffsetRule(Handler::kCFARegister, offset));
+}
+
+bool CallFrameInfo::State::DoRestore(unsigned reg) {
+  // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
+  if (entry_->kind == kCIE) {
+    reporter_->RestoreInCIE(entry_->offset, CursorOffset());
+    return false;
+  }
+  Rule *rule = cie_rules_.RegisterRule(reg);
+  if (!rule) {
+    // This isn't really the right thing to do, but since CFI generally
+    // only mentions callee-saves registers, and GCC's convention for
+    // callee-saves registers is that they are unchanged, it's a good
+    // approximation.
+    rule = new SameValueRule();
+  }
+  return DoRule(reg, rule);
+}
+
+bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) {
+  const char *buffer_end = buffer_ + buffer_length_;
+
+  // Initialize enough of ENTRY for use in error reporting.
+  entry->offset = cursor - buffer_;
+  entry->start = cursor;
+  entry->kind = kUnknown;
+  entry->end = NULL;
+
+  // Read the initial length. This sets reader_'s offset size.
+  size_t length_size;
+  uint64 length = reader_->ReadInitialLength(cursor, &length_size);
+  if (length_size > size_t(buffer_end - cursor))
+    return ReportIncomplete(entry);
+  cursor += length_size;
+
+  // In a .eh_frame section, a length of zero marks the end of the series
+  // of entries.
+  if (length == 0 && eh_frame_) {
+    entry->kind = kTerminator;
+    entry->end = cursor;
+    return true;
+  }
+
+  // Validate the length.
+  if (length > size_t(buffer_end - cursor))
+    return ReportIncomplete(entry);
+
+  // The length is the number of bytes after the initial length field;
+  // we have that position handy at this point, so compute the end
+  // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
+  // and the length didn't fit in a size_t, we would have rejected it
+  // above.)
+  entry->end = cursor + length;
+
+  // Parse the next field: either the offset of a CIE or a CIE id.
+  size_t offset_size = reader_->OffsetSize();
+  if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
+  entry->id = reader_->ReadOffset(cursor);
+
+  // Don't advance cursor past id field yet; in .eh_frame data we need
+  // the id's position to compute the section offset of an FDE's CIE.
+
+  // Now we can decide what kind of entry this is.
+  if (eh_frame_) {
+    // In .eh_frame data, an ID of zero marks the entry as a CIE, and
+    // anything else is an offset from the id field of the FDE to the start
+    // of the CIE.
+    if (entry->id == 0) {
+      entry->kind = kCIE;
+    } else {
+      entry->kind = kFDE;
+      // Turn the offset from the id into an offset from the buffer's start.
+      entry->id = (cursor - buffer_) - entry->id;
+    }
+  } else {
+    // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
+    // offset size for the entry) marks the entry as a CIE, and anything
+    // else is the offset of the CIE from the beginning of the section.
+    if (offset_size == 4)
+      entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
+    else {
+      MOZ_ASSERT(offset_size == 8);
+      entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
+    }
+  }
+
+  // Now advance cursor past the id.
+   cursor += offset_size;
+
+  // The fields specific to this kind of entry start here.
+  entry->fields = cursor;
+
+  entry->cie = NULL;
+
+  return true;
+}
+
+bool CallFrameInfo::ReadCIEFields(CIE *cie) {
+  const char *cursor = cie->fields;
+  size_t len;
+
+  MOZ_ASSERT(cie->kind == kCIE);
+
+  // Prepare for early exit.
+  cie->version = 0;
+  cie->augmentation.clear();
+  cie->code_alignment_factor = 0;
+  cie->data_alignment_factor = 0;
+  cie->return_address_register = 0;
+  cie->has_z_augmentation = false;
+  cie->pointer_encoding = DW_EH_PE_absptr;
+  cie->instructions = 0;
+
+  // Parse the version number.
+  if (cie->end - cursor < 1)
+    return ReportIncomplete(cie);
+  cie->version = reader_->ReadOneByte(cursor);
+  cursor++;
+
+  // If we don't recognize the version, we can't parse any more fields of the
+  // CIE. For DWARF CFI, we handle versions 1 through 3 (there was never a
+  // version 2 of CFI data). For .eh_frame, we handle versions 1 and 3 as well;
+  // the difference between those versions seems to be the same as for
+  // .debug_frame.
+  if (cie->version < 1 || cie->version > 3) {
+    reporter_->UnrecognizedVersion(cie->offset, cie->version);
+    return false;
+  }
+
+  const char *augmentation_start = cursor;
+  const void *augmentation_end =
+      memchr(augmentation_start, '\0', cie->end - augmentation_start);
+  if (! augmentation_end) return ReportIncomplete(cie);
+  cursor = static_cast<const char *>(augmentation_end);
+  cie->augmentation = string(augmentation_start,
+                                  cursor - augmentation_start);
+  // Skip the terminating '\0'.
+  cursor++;
+
+  // Is this CFI augmented?
+  if (!cie->augmentation.empty()) {
+    // Is it an augmentation we recognize?
+    if (cie->augmentation[0] == DW_Z_augmentation_start) {
+      // Linux C++ ABI 'z' augmentation, used for exception handling data.
+      cie->has_z_augmentation = true;
+    } else {
+      // Not an augmentation we recognize. Augmentations can have arbitrary
+      // effects on the form of rest of the content, so we have to give up.
+      reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+      return false;
+    }
+  }
+
+  // Parse the code alignment factor.
+  cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
+  if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+  cursor += len;
+
+  // Parse the data alignment factor.
+  cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
+  if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+  cursor += len;
+
+  // Parse the return address register. This is a ubyte in version 1, and
+  // a ULEB128 in version 3.
+  if (cie->version == 1) {
+    if (cursor >= cie->end) return ReportIncomplete(cie);
+    cie->return_address_register = uint8(*cursor++);
+  } else {
+    cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
+    if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
+    cursor += len;
+  }
+
+  // If we have a 'z' augmentation string, find the augmentation data and
+  // use the augmentation string to parse it.
+  if (cie->has_z_augmentation) {
+    uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
+    if (size_t(cie->end - cursor) < len + data_size)
+      return ReportIncomplete(cie);
+    cursor += len;
+    const char *data = cursor;
+    cursor += data_size;
+    const char *data_end = cursor;
+
+    cie->has_z_lsda = false;
+    cie->has_z_personality = false;
+    cie->has_z_signal_frame = false;
+
+    // Walk the augmentation string, and extract values from the
+    // augmentation data as the string directs.
+    for (size_t i = 1; i < cie->augmentation.size(); i++) {
+      switch (cie->augmentation[i]) {
+        case DW_Z_has_LSDA:
+          // The CIE's augmentation data holds the language-specific data
+          // area pointer's encoding, and the FDE's augmentation data holds
+          // the pointer itself.
+          cie->has_z_lsda = true;
+          // Fetch the LSDA encoding from the augmentation data.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->lsda_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->lsda_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
+            return false;
+          }
+          // Don't check if the encoding is usable here --- we haven't
+          // read the FDE's fields yet, so we're not prepared for
+          // DW_EH_PE_funcrel, although that's a fine encoding for the
+          // LSDA to use, since it appears in the FDE.
+          break;
+
+        case DW_Z_has_personality_routine:
+          // The CIE's augmentation data holds the personality routine
+          // pointer's encoding, followed by the pointer itself.
+          cie->has_z_personality = true;
+          // Fetch the personality routine pointer's encoding from the
+          // augmentation data.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->personality_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->personality_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset,
+                                              cie->personality_encoding);
+            return false;
+          }
+          if (!reader_->UsableEncoding(cie->personality_encoding)) {
+            reporter_->UnusablePointerEncoding(cie->offset,
+                                               cie->personality_encoding);
+            return false;
+          }
+          // Fetch the personality routine's pointer itself from the data.
+          cie->personality_address =
+            reader_->ReadEncodedPointer(data, cie->personality_encoding,
+                                        &len);
+          if (len > size_t(data_end - data))
+            return ReportIncomplete(cie);
+          data += len;
+          break;
+
+        case DW_Z_has_FDE_address_encoding:
+          // The CIE's augmentation data holds the pointer encoding to use
+          // for addresses in the FDE.
+          if (data >= data_end) return ReportIncomplete(cie);
+          cie->pointer_encoding = DwarfPointerEncoding(*data++);
+          if (!reader_->ValidEncoding(cie->pointer_encoding)) {
+            reporter_->InvalidPointerEncoding(cie->offset,
+                                              cie->pointer_encoding);
+            return false;
+          }
+          if (!reader_->UsableEncoding(cie->pointer_encoding)) {
+            reporter_->UnusablePointerEncoding(cie->offset,
+                                               cie->pointer_encoding);
+            return false;
+          }
+          break;
+
+        case DW_Z_is_signal_trampoline:
+          // Frames using this CIE are signal delivery frames.
+          cie->has_z_signal_frame = true;
+          break;
+
+        default:
+          // An augmentation we don't recognize.
+          reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
+          return false;
+      }
+    }
+  }
+
+  // The CIE's instructions start here.
+  cie->instructions = cursor;
+
+  return true;
+}
+
+bool CallFrameInfo::ReadFDEFields(FDE *fde) {
+  const char *cursor = fde->fields;
+  size_t size;
+
+  fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
+                                             &size);
+  if (size > size_t(fde->end - cursor))
+    return ReportIncomplete(fde);
+  cursor += size;
+  reader_->SetFunctionBase(fde->address);
+
+  // For the length, we strip off the upper nybble of the encoding used for
+  // the starting address.
+  DwarfPointerEncoding length_encoding =
+    DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
+  fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
+  if (size > size_t(fde->end - cursor))
+    return ReportIncomplete(fde);
+  cursor += size;
+
+  // If the CIE has a 'z' augmentation string, then augmentation data
+  // appears here.
+  if (fde->cie->has_z_augmentation) {
+    uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
+    if (size_t(fde->end - cursor) < size + data_size)
+      return ReportIncomplete(fde);
+    cursor += size;
+
+    // In the abstract, we should walk the augmentation string, and extract
+    // items from the FDE's augmentation data as we encounter augmentation
+    // string characters that specify their presence: the ordering of items
+    // in the augmentation string determines the arrangement of values in
+    // the augmentation data.
+    //
+    // In practice, there's only ever one value in FDE augmentation data
+    // that we support --- the LSDA pointer --- and we have to bail if we
+    // see any unrecognized augmentation string characters. So if there is
+    // anything here at all, we know what it is, and where it starts.
+    if (fde->cie->has_z_lsda) {
+      // Check whether the LSDA's pointer encoding is usable now: only once
+      // we've parsed the FDE's starting address do we call reader_->
+      // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
+      // usable.
+      if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
+        reporter_->UnusablePointerEncoding(fde->cie->offset,
+                                           fde->cie->lsda_encoding);
+        return false;
+      }
+
+      fde->lsda_address =
+        reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
+      if (size > data_size)
+        return ReportIncomplete(fde);
+      // Ideally, we would also complain here if there were unconsumed
+      // augmentation data.
+    }
+
+    cursor += data_size;
+  }
+
+  // The FDE's instructions start after those.
+  fde->instructions = cursor;
+
+  return true;
+}
+
+bool CallFrameInfo::Start() {
+  const char *buffer_end = buffer_ + buffer_length_;
+  const char *cursor;
+  bool all_ok = true;
+  const char *entry_end;
+  bool ok;
+
+  // Traverse all the entries in buffer_, skipping CIEs and offering
+  // FDEs to the handler.
+  for (cursor = buffer_; cursor < buffer_end;
+       cursor = entry_end, all_ok = all_ok && ok) {
+    FDE fde;
+
+    // Make it easy to skip this entry with 'continue': assume that
+    // things are not okay until we've checked all the data, and
+    // prepare the address of the next entry.
+    ok = false;
+
+    // Read the entry's prologue.
+    if (!ReadEntryPrologue(cursor, &fde)) {
+      if (!fde.end) {
+        // If we couldn't even figure out this entry's extent, then we
+        // must stop processing entries altogether.
+        all_ok = false;
+        break;
+      }
+      entry_end = fde.end;
+      continue;
+    }
+
+    // The next iteration picks up after this entry.
+    entry_end = fde.end;
+
+    // Did we see an .eh_frame terminating mark?
+    if (fde.kind == kTerminator) {
+      // If there appears to be more data left in the section after the
+      // terminating mark, warn the user. But this is just a warning;
+      // we leave all_ok true.
+      if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
+      break;
+    }
+
+    // In this loop, we skip CIEs. We only parse them fully when we
+    // parse an FDE that refers to them. This limits our memory
+    // consumption (beyond the buffer itself) to that needed to
+    // process the largest single entry.
+    if (fde.kind != kFDE) {
+      ok = true;
+      continue;
+    }
+
+    // Validate the CIE pointer.
+    if (fde.id > buffer_length_) {
+      reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
+      continue;
+    }
+
+    CIE cie;
+
+    // Parse this FDE's CIE header.
+    if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
+      continue;
+    // This had better be an actual CIE.
+    if (cie.kind != kCIE) {
+      reporter_->BadCIEId(fde.offset, fde.id);
+      continue;
+    }
+    if (!ReadCIEFields(&cie))
+      continue;
+
+    // We now have the values that govern both the CIE and the FDE.
+    cie.cie = &cie;
+    fde.cie = &cie;
+
+    // Parse the FDE's header.
+    if (!ReadFDEFields(&fde))
+      continue;
+
+    // Call Entry to ask the consumer if they're interested.
+    if (!handler_->Entry(fde.offset, fde.address, fde.size,
+                         cie.version, cie.augmentation,
+                         cie.return_address_register)) {
+      // The handler isn't interested in this entry. That's not an error.
+      ok = true;
+      continue;
+    }
+
+    if (cie.has_z_augmentation) {
+      // Report the personality routine address, if we have one.
+      if (cie.has_z_personality) {
+        if (!handler_
+            ->PersonalityRoutine(cie.personality_address,
+                                 IsIndirectEncoding(cie.personality_encoding)))
+          continue;
+      }
+
+      // Report the language-specific data area address, if we have one.
+      if (cie.has_z_lsda) {
+        if (!handler_
+            ->LanguageSpecificDataArea(fde.lsda_address,
+                                       IsIndirectEncoding(cie.lsda_encoding)))
+          continue;
+      }
+
+      // If this is a signal-handling frame, report that.
+      if (cie.has_z_signal_frame) {
+        if (!handler_->SignalHandler())
+          continue;
+      }
+    }
+
+    // Interpret the CIE's instructions, and then the FDE's instructions.
+    State state(reader_, handler_, reporter_, fde.address);
+    ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
+
+    // Tell the ByteReader that the function start address from the
+    // FDE header is no longer valid.
+    reader_->ClearFunctionBase();
+
+    // Report the end of the entry.
+    handler_->End();
+  }
+
+  return all_ok;
+}
+
+const char *CallFrameInfo::KindName(EntryKind kind) {
+  if (kind == CallFrameInfo::kUnknown)
+    return "entry";
+  else if (kind == CallFrameInfo::kCIE)
+    return "common information entry";
+  else if (kind == CallFrameInfo::kFDE)
+    return "frame description entry";
+  else {
+    MOZ_ASSERT (kind == CallFrameInfo::kTerminator);
+    return ".eh_frame sequence terminator";
+  }
+}
+
+bool CallFrameInfo::ReportIncomplete(Entry *entry) {
+  reporter_->Incomplete(entry->offset, entry->kind);
+  return false;
+}
+
+void CallFrameInfo::Reporter::Incomplete(uint64 offset,
+                                         CallFrameInfo::EntryKind kind) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
+                 " before end of section contents\n",
+                 filename_.c_str(), offset, section_.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
+                                                   uint64 cie_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE pointer is out of range: 0x%llx\n",
+                 filename_.c_str(), offset, section_.c_str(), cie_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE pointer does not point to a CIE: 0x%llx\n",
+                 filename_.c_str(), offset, section_.c_str(), cie_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE specifies unrecognized version: %d\n",
+                 filename_.c_str(), offset, section_.c_str(), version);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
+                                                       const string &aug) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI frame description entry at offset 0x%llx in '%s':"
+                 " CIE specifies unrecognized augmentation: '%s'\n",
+                 filename_.c_str(), offset, section_.c_str(), aug.c_str());
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
+                                                     uint8 encoding) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " 'z' augmentation specifies invalid pointer encoding: "
+                 "0x%02x\n",
+                 filename_.c_str(), offset, section_.c_str(), encoding);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
+                                                      uint8 encoding) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " 'z' augmentation specifies a pointer encoding for which"
+                 " we have no base address: 0x%02x\n",
+                 filename_.c_str(), offset, section_.c_str(), encoding);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI common information entry at offset 0x%llx in '%s':"
+                 " the DW_CFA_restore instruction at offset 0x%llx"
+                 " cannot be used in a common information entry\n",
+                 filename_.c_str(), offset, section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
+                                             CallFrameInfo::EntryKind kind,
+                                             uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the instruction at offset 0x%llx is unrecognized\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind),
+                 offset, section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
+                                        CallFrameInfo::EntryKind kind,
+                                        uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the instruction at offset 0x%llx assumes that a CFA rule "
+                 "has been set, but none has been set\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
+                                              CallFrameInfo::EntryKind kind,
+                                              uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the DW_CFA_restore_state instruction at offset 0x%llx"
+                 " should pop a saved state from the stack, but the stack "
+                 "is empty\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
+                                              CallFrameInfo::EntryKind kind,
+                                              uint64 insn_offset) {
+  char buf[300];
+  SprintfLiteral(buf,
+                 "%s: CFI %s at offset 0x%llx in section '%s':"
+                 " the DW_CFA_restore_state instruction at offset 0x%llx"
+                 " would clear the CFA rule in effect\n",
+                 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
+                 section_.c_str(), insn_offset);
+  log_(buf);
+}
+
+
+unsigned int DwarfCFIToModule::RegisterNames::I386() {
+  /*
+   8 "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi",
+   3 "$eip", "$eflags", "$unused1",
+   8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7",
+   2 "$unused2", "$unused3",
+   8 "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7",
+   8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7",
+   3 "$fcw", "$fsw", "$mxcsr",
+   8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5",
+   2 "$tr", "$ldtr"
+  */
+  return 8 + 3 + 8 + 2 + 8 + 8 + 3 + 8 + 2;
+}
+
+unsigned int DwarfCFIToModule::RegisterNames::X86_64() {
+  /*
+   8 "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp",
+   8 "$r8",  "$r9",  "$r10", "$r11", "$r12", "$r13", "$r14", "$r15",
+   1 "$rip",
+   8 "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7",
+   8 "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15",
+   8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7",
+   8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7",
+   1 "$rflags",
+   8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2",
+   4 "$fs.base", "$gs.base", "$unused3", "$unused4",
+   2 "$tr", "$ldtr",
+   3 "$mxcsr", "$fcw", "$fsw"
+  */
+  return 8 + 8 + 1 + 8 + 8 + 8 + 8 + 1 + 8 + 4 + 2 + 3;
+}
+
+// Per ARM IHI 0040A, section 3.1
+unsigned int DwarfCFIToModule::RegisterNames::ARM() {
+  /*
+   8 "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+   8 "r8",  "r9",  "r10", "r11", "r12", "sp",  "lr",  "pc",
+   8 "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
+   8 "fps", "cpsr", "",   "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "",    "",    "",    "",    "",    "",    "",    "",
+   8 "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
+   8 "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
+   8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+   8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
+   8 "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7"
+  */
+  return 13 * 8;
+}
+
+// See prototype for comments.
+int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader,
+                       string expr, bool debug,
+                       bool pushCfaAtStart, bool derefAtEnd)
+{
+  const char* cursor = expr.c_str();
+  const char* end1   = cursor + expr.length();
+
+  char buf[100];
+  if (debug) {
+    SprintfLiteral(buf, "LUL.DW  << DwarfExpr, len is %d\n",
+                   (int)(end1 - cursor));
+    summ->Log(buf);
+  }
+  
+  // Add a marker for the start of this expression.  In it, indicate
+  // whether or not the CFA should be pushed onto the stack prior to
+  // evaluation.
+  int32_t start_ix
+    = summ->AddPfxInstr(PfxInstr(PX_Start, pushCfaAtStart ? 1 : 0));
+  MOZ_ASSERT(start_ix >= 0);
+
+  while (cursor < end1) {
+
+    uint8 opc = reader->ReadOneByte(cursor);
+    cursor++;
+
+    const char* nm   = nullptr;
+    PfxExprOp   pxop = PX_End;
+    
+    switch (opc) {
+
+      case DW_OP_lit0 ... DW_OP_lit31: {
+        int32_t simm32 = (int32_t)(opc - DW_OP_lit0);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_lit%d\n", (int)simm32);
+          summ->Log(buf);
+        }
+        (void) summ->AddPfxInstr(PfxInstr(PX_SImm32, simm32));
+        break;
+      }
+
+      case DW_OP_breg0 ... DW_OP_breg31: {
+        size_t len;
+        int64_t n = reader->ReadSignedLEB128(cursor, &len);
+        cursor += len;
+        DW_REG_NUMBER reg = (DW_REG_NUMBER)(opc - DW_OP_breg0);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_breg%d %lld\n",
+                         (int)reg, (long long int)n);
+          summ->Log(buf);
+        }
+        // PfxInstr only allows a 32 bit signed offset.  So we
+        // must fail if the immediate is out of range.
+        if (n < INT32_MIN || INT32_MAX < n)
+          goto fail;
+        (void) summ->AddPfxInstr(PfxInstr(PX_DwReg, reg));
+        (void) summ->AddPfxInstr(PfxInstr(PX_SImm32, (int32_t)n));
+        (void) summ->AddPfxInstr(PfxInstr(PX_Add));
+        break;
+      }
+
+      case DW_OP_const4s: {
+        uint64_t u64 = reader->ReadFourBytes(cursor);
+        cursor += 4;
+        // u64 is guaranteed by |ReadFourBytes| to be in the
+        // range 0 .. FFFFFFFF inclusive.  But to be safe:
+        uint32_t u32 = (uint32_t)(u64 & 0xFFFFFFFF);
+        int32_t  s32 = (int32_t)u32;
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_const4s %d\n", (int)s32);
+          summ->Log(buf);
+        }
+        (void) summ->AddPfxInstr(PfxInstr(PX_SImm32, s32));
+        break;
+      }
+      
+      case DW_OP_deref: nm = "deref"; pxop = PX_Deref;  goto no_operands;
+      case DW_OP_and:   nm = "and";   pxop = PX_And;    goto no_operands;
+      case DW_OP_plus:  nm = "plus";  pxop = PX_Add;    goto no_operands;
+      case DW_OP_minus: nm = "minus"; pxop = PX_Sub;    goto no_operands;
+      case DW_OP_shl:   nm = "shl";   pxop = PX_Shl;    goto no_operands;
+      case DW_OP_ge:    nm = "ge";    pxop = PX_CmpGES; goto no_operands;
+      no_operands:
+        MOZ_ASSERT(nm && pxop != PX_End);
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   DW_OP_%s\n", nm);
+          summ->Log(buf);
+        }
+        (void) summ->AddPfxInstr(PfxInstr(pxop));
+        break;
+
+      default:
+        if (debug) {
+          SprintfLiteral(buf, "LUL.DW   unknown opc %d\n", (int)opc);
+          summ->Log(buf);
+        }
+        goto fail;
+
+    } // switch (opc)
+
+  } // while (cursor < end1)
+  
+  MOZ_ASSERT(cursor >= end1);
+  
+  if (cursor > end1) {
+    // We overran the Dwarf expression.  Give up.
+    goto fail;
+  }
+
+  // For DW_CFA_expression, what the expression denotes is the address
+  // of where the previous value is located.  The caller of this routine
+  // may therefore request one last dereference before the end marker is
+  // inserted.
+  if (derefAtEnd) {
+    (void) summ->AddPfxInstr(PfxInstr(PX_Deref));
+  }
+
+  // Insert an end marker, and declare success.
+  (void) summ->AddPfxInstr(PfxInstr(PX_End));
+  if (debug) {
+    SprintfLiteral(buf, "LUL.DW   conversion of dwarf expression succeeded, "
+                        "ix = %d\n", (int)start_ix);
+    summ->Log(buf);
+    summ->Log("LUL.DW  >>\n");
+  }
+  return start_ix;
+      
+ fail:
+  if (debug) {
+    summ->Log("LUL.DW   conversion of dwarf expression failed\n");
+    summ->Log("LUL.DW  >>\n");
+  }
+  return -1;
+}
+
+
+bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length,
+                             uint8 version, const string &augmentation,
+                             unsigned return_address) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW DwarfCFIToModule::Entry 0x%llx,+%lld\n",
+                   address, length);
+    summ_->Log(buf);
+  }
+  
+  summ_->Entry(address, length);
+
+  // If dwarf2reader::CallFrameInfo can handle this version and
+  // augmentation, then we should be okay with that, so there's no
+  // need to check them here.
+
+  // Get ready to collect entries.
+  return_address_ = return_address;
+
+  // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI
+  // may not establish any rule for .ra if the return address column
+  // is an ordinary register, and that register holds the return
+  // address on entry to the function. So establish an initial .ra
+  // rule citing the return address register.
+  if (return_address_ < num_dw_regs_) {
+    summ_->Rule(address, return_address_, NODEREF, return_address, 0);
+  }
+
+  return true;
+}
+
+const UniqueString* DwarfCFIToModule::RegisterName(int i) {
+  if (i < 0) {
+    MOZ_ASSERT(i == kCFARegister);
+    return usu_->ToUniqueString(".cfa");
+  }
+  unsigned reg = i;
+  if (reg == return_address_)
+    return usu_->ToUniqueString(".ra");
+
+  char buf[30];
+  SprintfLiteral(buf, "dwarf_reg_%u", reg);
+  return usu_->ToUniqueString(buf);
+}
+
+bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) {
+  reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg));
+  // Treat this as a non-fatal error.
+  return true;
+}
+
+bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = Same\n", address, reg);
+    summ_->Log(buf);
+  }
+  // reg + 0
+  summ_->Rule(address, reg, NODEREF, reg, 0);
+  return true;
+}
+
+bool DwarfCFIToModule::OffsetRule(uint64 address, int reg,
+                                  int base_register, long offset) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = *(r%d + %ld)\n",
+                   address, reg, base_register, offset);
+    summ_->Log(buf);
+  }
+  // *(base_register + offset)
+  summ_->Rule(address, reg, DEREF, base_register, offset);
+  return true;
+}
+
+bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg,
+                                     int base_register, long offset) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = r%d + %ld\n",
+                   address, reg, base_register, offset);
+    summ_->Log(buf);
+  }
+  // base_register + offset
+  summ_->Rule(address, reg, NODEREF, base_register, offset);
+  return true;
+}
+
+bool DwarfCFIToModule::RegisterRule(uint64 address, int reg,
+                                    int base_register) {
+  if (DEBUG_DWARF) {
+    char buf[100];
+    SprintfLiteral(buf, "LUL.DW  0x%llx: old r%d = r%d\n",
+                   address, reg, base_register);
+    summ_->Log(buf);
+  }
+  // base_register + 0
+  summ_->Rule(address, reg, NODEREF, base_register, 0);
+  return true;
+}
+
+bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg,
+                                      const string &expression)
+{
+  bool debug = !!DEBUG_DWARF;
+  int32_t start_ix = parseDwarfExpr(summ_, reader_, expression, debug,
+                                    true/*pushCfaAtStart*/,
+                                    true/*derefAtEnd*/);
+  if (start_ix >= 0) {
+    summ_->Rule(address, reg, PFXEXPR, 0, start_ix);
+  } else {
+    // Parsing of the Dwarf expression failed.  Treat this as a
+    // non-fatal error, hence return |true| even on this path.
+    reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg));
+  }
+  return true;
+}
+
+bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg,
+                                         const string &expression)
+{
+  bool debug = !!DEBUG_DWARF;
+  int32_t start_ix = parseDwarfExpr(summ_, reader_, expression, debug,
+                                    true/*pushCfaAtStart*/,
+                                    false/*!derefAtEnd*/);
+  if (start_ix >= 0) {
+    summ_->Rule(address, reg, PFXEXPR, 0, start_ix);
+  } else {
+    // Parsing of the Dwarf expression failed.  Treat this as a
+    // non-fatal error, hence return |true| even on this path.
+    reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg));
+  }
+  return true;
+}
+
+bool DwarfCFIToModule::End() {
+  //module_->AddStackFrameEntry(entry_);
+  if (DEBUG_DWARF) {
+    summ_->Log("LUL.DW DwarfCFIToModule::End()\n");
+  }
+  summ_->End();
+  return true;
+}
+
+void DwarfCFIToModule::Reporter::UndefinedNotSupported(
+    size_t offset,
+    const UniqueString* reg) {
+  char buf[300];
+  SprintfLiteral(buf, "DwarfCFIToModule::Reporter::UndefinedNotSupported()\n");
+  log_(buf);
+  //BPLOG(INFO) << file_ << ", section '" << section_
+  //  << "': the call frame entry at offset 0x"
+  //  << std::setbase(16) << offset << std::setbase(10)
+  //  << " sets the rule for register '" << FromUniqueString(reg)
+  //  << "' to 'undefined', but the Breakpad symbol file format cannot "
+  //  << " express this";
+}
+
+// FIXME: move this somewhere sensible
+static bool is_power_of_2(uint64_t n)
+{
+  int i, nSetBits = 0;
+  for (i = 0; i < 8*(int)sizeof(n); i++) {
+    if ((n & ((uint64_t)1) << i) != 0)
+      nSetBits++;
+  }
+  return nSetBits <= 1;
+}
+
+void DwarfCFIToModule::Reporter::ExpressionCouldNotBeSummarised(
+    size_t offset,
+    const UniqueString* reg) {
+  static uint64_t n_complaints = 0; // This isn't threadsafe
+  n_complaints++;
+  if (!is_power_of_2(n_complaints))
+    return;
+  char buf[300];
+  SprintfLiteral(buf,
+                 "DwarfCFIToModule::Reporter::"
+                 "ExpressionCouldNotBeSummarised(shown %llu times)\n",
+                 (unsigned long long int)n_complaints);
+  log_(buf);
+}
+
+} // namespace lul
diff --git a/tools/profiler/lul/LulDwarfExt.h b/tools/profiler/lul/LulDwarfExt.h
new file mode 100644
index 000000000..f3555ac55
--- /dev/null
+++ b/tools/profiler/lul/LulDwarfExt.h
@@ -0,0 +1,1287 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright 2006, 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/types.h
+//   src/common/dwarf/dwarf2enums.h
+//   src/common/dwarf/bytereader.h
+//   src/common/dwarf_cfi_to_module.h
+//   src/common/dwarf/dwarf2reader.h
+
+#ifndef LulDwarfExt_h
+#define LulDwarfExt_h
+
+#include <stdint.h>
+
+#include "mozilla/Assertions.h"
+
+#include "LulDwarfSummariser.h"
+
+typedef signed char         int8;
+typedef short               int16;
+typedef int                 int32;
+typedef long long           int64;
+
+typedef unsigned char      uint8;
+typedef unsigned short     uint16;
+typedef unsigned int       uint32;
+typedef unsigned long long uint64;
+
+#ifdef __PTRDIFF_TYPE__
+typedef          __PTRDIFF_TYPE__ intptr;
+typedef unsigned __PTRDIFF_TYPE__ uintptr;
+#else
+#error "Can't find pointer-sized integral types."
+#endif
+
+
+namespace lul {
+
+// Exception handling frame description pointer formats, as described
+// by the Linux Standard Base Core Specification 4.0, section 11.5,
+// DWARF Extensions.
+enum DwarfPointerEncoding
+  {
+    DW_EH_PE_absptr	= 0x00,
+    DW_EH_PE_omit	= 0xff,
+    DW_EH_PE_uleb128    = 0x01,
+    DW_EH_PE_udata2	= 0x02,
+    DW_EH_PE_udata4	= 0x03,
+    DW_EH_PE_udata8	= 0x04,
+    DW_EH_PE_sleb128    = 0x09,
+    DW_EH_PE_sdata2	= 0x0A,
+    DW_EH_PE_sdata4	= 0x0B,
+    DW_EH_PE_sdata8	= 0x0C,
+    DW_EH_PE_pcrel	= 0x10,
+    DW_EH_PE_textrel	= 0x20,
+    DW_EH_PE_datarel	= 0x30,
+    DW_EH_PE_funcrel	= 0x40,
+    DW_EH_PE_aligned	= 0x50,
+
+    // The GNU toolchain sources define this enum value as well,
+    // simply to help classify the lower nybble values into signed and
+    // unsigned groups.
+    DW_EH_PE_signed	= 0x08,
+
+    // This is not documented in LSB 4.0, but it is used in both the
+    // Linux and OS X toolchains. It can be added to any other
+    // encoding (except DW_EH_PE_aligned), and indicates that the
+    // encoded value represents the address at which the true address
+    // is stored, not the true address itself.
+    DW_EH_PE_indirect	= 0x80
+  };
+
+
+// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
+// because it conflicts with a macro
+enum Endianness {
+  ENDIANNESS_BIG,
+  ENDIANNESS_LITTLE
+};
+
+// A ByteReader knows how to read single- and multi-byte values of
+// various endiannesses, sizes, and encodings, as used in DWARF
+// debugging information and Linux C++ exception handling data.
+class ByteReader {
+ public:
+  // Construct a ByteReader capable of reading one-, two-, four-, and
+  // eight-byte values according to ENDIANNESS, absolute machine-sized
+  // addresses, DWARF-style "initial length" values, signed and
+  // unsigned LEB128 numbers, and Linux C++ exception handling data's
+  // encoded pointers.
+  explicit ByteReader(enum Endianness endianness);
+  virtual ~ByteReader();
+
+  // Read a single byte from BUFFER and return it as an unsigned 8 bit
+  // number.
+  uint8 ReadOneByte(const char* buffer) const;
+
+  // Read two bytes from BUFFER and return them as an unsigned 16 bit
+  // number, using this ByteReader's endianness.
+  uint16 ReadTwoBytes(const char* buffer) const;
+
+  // Read four bytes from BUFFER and return them as an unsigned 32 bit
+  // number, using this ByteReader's endianness. This function returns
+  // a uint64 so that it is compatible with ReadAddress and
+  // ReadOffset. The number it returns will never be outside the range
+  // of an unsigned 32 bit integer.
+  uint64 ReadFourBytes(const char* buffer) const;
+
+  // Read eight bytes from BUFFER and return them as an unsigned 64
+  // bit number, using this ByteReader's endianness.
+  uint64 ReadEightBytes(const char* buffer) const;
+
+  // Read an unsigned LEB128 (Little Endian Base 128) number from
+  // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
+  // the number of bytes read.
+  //
+  // The unsigned LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between 0 and 0x7f, then its unsigned LEB128
+  //   representation is a single byte whose value is N.
+  //
+  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the unsigned LEB128 representation of N /
+  //   128, rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
+  uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const;
+
+  // Read a signed LEB128 number from BUFFER and return it as an
+  // signed 64 bit integer. Set LEN to the number of bytes read.
+  //
+  // The signed LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between -0x40 and 0x3f, then its signed LEB128
+  //   representation is a single byte whose value is N in two's
+  //   complement.
+  //
+  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the signed LEB128 representation of N / 128,
+  //   rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
+  int64 ReadSignedLEB128(const char* buffer, size_t* len) const;
+
+  // Indicate that addresses on this architecture are SIZE bytes long. SIZE
+  // must be either 4 or 8. (DWARF allows addresses to be any number of
+  // bytes in length from 1 to 255, but we only support 32- and 64-bit
+  // addresses at the moment.) You must call this before using the
+  // ReadAddress member function.
+  //
+  // For data in a .debug_info section, or something that .debug_info
+  // refers to like line number or macro data, the compilation unit
+  // header's address_size field indicates the address size to use. Call
+  // frame information doesn't indicate its address size (a shortcoming of
+  // the spec); you must supply the appropriate size based on the
+  // architecture of the target machine.
+  void SetAddressSize(uint8 size);
+
+  // Return the current address size, in bytes. This is either 4,
+  // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
+  uint8 AddressSize() const { return address_size_; }
+
+  // Read an address from BUFFER and return it as an unsigned 64 bit
+  // integer, respecting this ByteReader's endianness and address size. You
+  // must call SetAddressSize before calling this function.
+  uint64 ReadAddress(const char* buffer) const;
+
+  // DWARF actually defines two slightly different formats: 32-bit DWARF
+  // and 64-bit DWARF. This is *not* related to the size of registers or
+  // addresses on the target machine; it refers only to the size of section
+  // offsets and data lengths appearing in the DWARF data. One only needs
+  // 64-bit DWARF when the debugging data itself is larger than 4GiB.
+  // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
+  // debugging data itself is very large.
+  //
+  // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
+  // compilation unit and call frame information entry begins with an
+  // "initial length" field, which, in addition to giving the length of the
+  // data, also indicates the size of section offsets and lengths appearing
+  // in that data. The ReadInitialLength member function, below, reads an
+  // initial length and sets the ByteReader's offset size as a side effect.
+  // Thus, in the normal process of reading DWARF data, the appropriate
+  // offset size is set automatically. So, you should only need to call
+  // SetOffsetSize if you are using the same ByteReader to jump from the
+  // midst of one block of DWARF data into another.
+
+  // Read a DWARF "initial length" field from START, and return it as
+  // an unsigned 64 bit integer, respecting this ByteReader's
+  // endianness. Set *LEN to the length of the initial length in
+  // bytes, either four or twelve. As a side effect, set this
+  // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
+  // initial length) or 8 (if we see a 64-bit DWARF initial length).
+  //
+  // A DWARF initial length is either:
+  //
+  // - a byte count stored as an unsigned 32-bit value less than
+  //   0xffffff00, indicating that the data whose length is being
+  //   measured uses the 32-bit DWARF format, or
+  //
+  // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
+  //   indicating that the data whose length is being measured uses
+  //   the 64-bit DWARF format.
+  uint64 ReadInitialLength(const char* start, size_t* len);
+
+  // Read an offset from BUFFER and return it as an unsigned 64 bit
+  // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
+  // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
+  // long. You must call ReadInitialLength or SetOffsetSize before calling
+  // this function; see the comments above for details.
+  uint64 ReadOffset(const char* buffer) const;
+
+  // Return the current offset size, in bytes.
+  // A return value of 4 indicates that we are reading 32-bit DWARF.
+  // A return value of 8 indicates that we are reading 64-bit DWARF.
+  uint8 OffsetSize() const { return offset_size_; }
+
+  // Indicate that section offsets and lengths are SIZE bytes long. SIZE
+  // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
+  // Usually, you should not call this function yourself; instead, let a
+  // call to ReadInitialLength establish the data's offset size
+  // automatically.
+  void SetOffsetSize(uint8 size);
+
+  // The Linux C++ ABI uses a variant of DWARF call frame information
+  // for exception handling. This data is included in the program's
+  // address space as the ".eh_frame" section, and intepreted at
+  // runtime to walk the stack, find exception handlers, and run
+  // cleanup code. The format is mostly the same as DWARF CFI, with
+  // some adjustments made to provide the additional
+  // exception-handling data, and to make the data easier to work with
+  // in memory --- for example, to allow it to be placed in read-only
+  // memory even when describing position-independent code.
+  //
+  // In particular, exception handling data can select a number of
+  // different encodings for pointers that appear in the data, as
+  // described by the DwarfPointerEncoding enum. There are actually
+  // four axes(!) to the encoding:
+  //
+  // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
+  //   the DWARF LEB128 encoding.
+  //
+  // - The pointer's signedness: pointers can be signed or unsigned.
+  //
+  // - The pointer's base address: the data stored in the exception
+  //   handling data can be the actual address (that is, an absolute
+  //   pointer), or relative to one of a number of different base
+  //   addreses --- including that of the encoded pointer itself, for
+  //   a form of "pc-relative" addressing.
+  //
+  // - The pointer may be indirect: it may be the address where the
+  //   true pointer is stored. (This is used to refer to things via
+  //   global offset table entries, program linkage table entries, or
+  //   other tricks used in position-independent code.)
+  //
+  // There are also two options that fall outside that matrix
+  // altogether: the pointer may be omitted, or it may have padding to
+  // align it on an appropriate address boundary. (That last option
+  // may seem like it should be just another axis, but it is not.)
+
+  // Indicate that the exception handling data is loaded starting at
+  // SECTION_BASE, and that the start of its buffer in our own memory
+  // is BUFFER_BASE. This allows us to find the address that a given
+  // byte in our buffer would have when loaded into the program the
+  // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
+  void SetCFIDataBase(uint64 section_base, const char *buffer_base);
+
+  // Indicate that the base address of the program's ".text" section
+  // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
+  void SetTextBase(uint64 text_base);
+
+  // Indicate that the base address for DW_EH_PE_datarel pointers is
+  // DATA_BASE. The proper value depends on the ABI; it is usually the
+  // address of the global offset table, held in a designated register in
+  // position-independent code. You will need to look at the startup code
+  // for the target system to be sure. I tried; my eyes bled.
+  void SetDataBase(uint64 data_base);
+
+  // Indicate that the base address for the FDE we are processing is
+  // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
+  // pointers. (This encoding does not seem to be used by the GNU
+  // toolchain.)
+  void SetFunctionBase(uint64 function_base);
+
+  // Indicate that we are no longer processing any FDE, so any use of
+  // a DW_EH_PE_funcrel encoding is an error.
+  void ClearFunctionBase();
+
+  // Return true if ENCODING is a valid pointer encoding.
+  bool ValidEncoding(DwarfPointerEncoding encoding) const;
+
+  // Return true if we have all the information we need to read a
+  // pointer that uses ENCODING. This checks that the appropriate
+  // SetFooBase function for ENCODING has been called.
+  bool UsableEncoding(DwarfPointerEncoding encoding) const;
+
+  // Read an encoded pointer from BUFFER using ENCODING; return the
+  // absolute address it represents, and set *LEN to the pointer's
+  // length in bytes, including any padding for aligned pointers.
+  //
+  // This function calls 'abort' if ENCODING is invalid or refers to a
+  // base address this reader hasn't been given, so you should check
+  // with ValidEncoding and UsableEncoding first if you would rather
+  // die in a more helpful way.
+  uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding,
+                            size_t *len) const;
+
+ private:
+
+  // Function pointer type for our address and offset readers.
+  typedef uint64 (ByteReader::*AddressReader)(const char*) const;
+
+  // Read an offset from BUFFER and return it as an unsigned 64 bit
+  // integer.  DWARF2/3 define offsets as either 4 or 8 bytes,
+  // generally depending on the amount of DWARF2/3 info present.
+  // This function pointer gets set by SetOffsetSize.
+  AddressReader offset_reader_;
+
+  // Read an address from BUFFER and return it as an unsigned 64 bit
+  // integer.  DWARF2/3 allow addresses to be any size from 0-255
+  // bytes currently.  Internally we support 4 and 8 byte addresses,
+  // and will CHECK on anything else.
+  // This function pointer gets set by SetAddressSize.
+  AddressReader address_reader_;
+
+  Endianness endian_;
+  uint8 address_size_;
+  uint8 offset_size_;
+
+  // Base addresses for Linux C++ exception handling data's encoded pointers.
+  bool have_section_base_, have_text_base_, have_data_base_;
+  bool have_function_base_;
+  uint64 section_base_;
+  uint64 text_base_, data_base_, function_base_;
+  const char *buffer_base_;
+};
+
+
+inline uint8 ByteReader::ReadOneByte(const char* buffer) const {
+  return buffer[0];
+}
+
+inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const {
+  const unsigned char *buffer
+    = reinterpret_cast<const unsigned char *>(signed_buffer);
+  const uint16 buffer0 = buffer[0];
+  const uint16 buffer1 = buffer[1];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8;
+  } else {
+    return buffer1 | buffer0 << 8;
+  }
+}
+
+inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const {
+  const unsigned char *buffer
+    = reinterpret_cast<const unsigned char *>(signed_buffer);
+  const uint32 buffer0 = buffer[0];
+  const uint32 buffer1 = buffer[1];
+  const uint32 buffer2 = buffer[2];
+  const uint32 buffer3 = buffer[3];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24;
+  } else {
+    return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24;
+  }
+}
+
+inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const {
+  const unsigned char *buffer
+    = reinterpret_cast<const unsigned char *>(signed_buffer);
+  const uint64 buffer0 = buffer[0];
+  const uint64 buffer1 = buffer[1];
+  const uint64 buffer2 = buffer[2];
+  const uint64 buffer3 = buffer[3];
+  const uint64 buffer4 = buffer[4];
+  const uint64 buffer5 = buffer[5];
+  const uint64 buffer6 = buffer[6];
+  const uint64 buffer7 = buffer[7];
+  if (endian_ == ENDIANNESS_LITTLE) {
+    return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 |
+      buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56;
+  } else {
+    return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 |
+      buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56;
+  }
+}
+
+// Read an unsigned LEB128 number.  Each byte contains 7 bits of
+// information, plus one bit saying whether the number continues or
+// not.
+
+inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer,
+                                             size_t* len) const {
+  uint64 result = 0;
+  size_t num_read = 0;
+  unsigned int shift = 0;
+  unsigned char byte;
+
+  do {
+    byte = *buffer++;
+    num_read++;
+
+    result |= (static_cast<uint64>(byte & 0x7f)) << shift;
+
+    shift += 7;
+
+  } while (byte & 0x80);
+
+  *len = num_read;
+
+  return result;
+}
+
+// Read a signed LEB128 number.  These are like regular LEB128
+// numbers, except the last byte may have a sign bit set.
+
+inline int64 ByteReader::ReadSignedLEB128(const char* buffer,
+                                          size_t* len) const {
+  int64 result = 0;
+  unsigned int shift = 0;
+  size_t num_read = 0;
+  unsigned char byte;
+
+  do {
+      byte = *buffer++;
+      num_read++;
+      result |= (static_cast<uint64>(byte & 0x7f) << shift);
+      shift += 7;
+  } while (byte & 0x80);
+
+  if ((shift < 8 * sizeof (result)) && (byte & 0x40))
+    result |= -((static_cast<int64>(1)) << shift);
+  *len = num_read;
+  return result;
+}
+
+inline uint64 ByteReader::ReadOffset(const char* buffer) const {
+  MOZ_ASSERT(this->offset_reader_);
+  return (this->*offset_reader_)(buffer);
+}
+
+inline uint64 ByteReader::ReadAddress(const char* buffer) const {
+  MOZ_ASSERT(this->address_reader_);
+  return (this->*address_reader_)(buffer);
+}
+
+inline void ByteReader::SetCFIDataBase(uint64 section_base,
+                                       const char *buffer_base) {
+  section_base_ = section_base;
+  buffer_base_ = buffer_base;
+  have_section_base_ = true;
+}
+
+inline void ByteReader::SetTextBase(uint64 text_base) {
+  text_base_ = text_base;
+  have_text_base_ = true;
+}
+
+inline void ByteReader::SetDataBase(uint64 data_base) {
+  data_base_ = data_base;
+  have_data_base_ = true;
+}
+
+inline void ByteReader::SetFunctionBase(uint64 function_base) {
+  function_base_ = function_base;
+  have_function_base_ = true;
+}
+
+inline void ByteReader::ClearFunctionBase() {
+  have_function_base_ = false;
+}
+
+
+// (derived from)
+// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which
+// accepts parsed DWARF call frame info and adds it to a Summariser object.
+
+// This class is a reader for DWARF's Call Frame Information.  CFI
+// describes how to unwind stack frames --- even for functions that do
+// not follow fixed conventions for saving registers, whose frame size
+// varies as they execute, etc.
+//
+// CFI describes, at each machine instruction, how to compute the
+// stack frame's base address, how to find the return address, and
+// where to find the saved values of the caller's registers (if the
+// callee has stashed them somewhere to free up the registers for its
+// own use).
+//
+// For example, suppose we have a function whose machine code looks
+// like this (imagine an assembly language that looks like C, for a
+// machine with 32-bit registers, and a stack that grows towards lower
+// addresses):
+//
+// func:                                ; entry point; return address at sp
+// func+0:      sp = sp - 16            ; allocate space for stack frame
+// func+1:      sp[12] = r0             ; save r0 at sp+12
+// ...                                  ; other code, not frame-related
+// func+10:     sp -= 4; *sp = x        ; push some x on the stack
+// ...                                  ; other code, not frame-related
+// func+20:     r0 = sp[16]             ; restore saved r0
+// func+21:     sp += 20                ; pop whole stack frame
+// func+22:     pc = *sp; sp += 4       ; pop return address and jump to it
+//
+// DWARF CFI is (a very compressed representation of) a table with a
+// row for each machine instruction address and a column for each
+// register showing how to restore it, if possible.
+//
+// A special column named "CFA", for "Canonical Frame Address", tells how
+// to compute the base address of the frame; registers' entries may
+// refer to the CFA in describing where the registers are saved.
+//
+// Another special column, named "RA", represents the return address.
+//
+// For example, here is a complete (uncompressed) table describing the
+// function above:
+//
+//     insn      cfa    r0      r1 ...  ra
+//     =======================================
+//     func+0:   sp                     cfa[0]
+//     func+1:   sp+16                  cfa[0]
+//     func+2:   sp+16  cfa[-4]         cfa[0]
+//     func+11:  sp+20  cfa[-4]         cfa[0]
+//     func+21:  sp+20                  cfa[0]
+//     func+22:  sp                     cfa[0]
+//
+// Some things to note here:
+//
+// - Each row describes the state of affairs *before* executing the
+//   instruction at the given address.  Thus, the row for func+0
+//   describes the state before we allocate the stack frame.  In the
+//   next row, the formula for computing the CFA has changed,
+//   reflecting that allocation.
+//
+// - The other entries are written in terms of the CFA; this allows
+//   them to remain unchanged as the stack pointer gets bumped around.
+//   For example, the rule for recovering the return address (the "ra"
+//   column) remains unchanged throughout the function, even as the
+//   stack pointer takes on three different offsets from the return
+//   address.
+//
+// - Although we haven't shown it, most calling conventions designate
+//   "callee-saves" and "caller-saves" registers. The callee must
+//   preserve the values of callee-saves registers; if it uses them,
+//   it must save their original values somewhere, and restore them
+//   before it returns. In contrast, the callee is free to trash
+//   caller-saves registers; if the callee uses these, it will
+//   probably not bother to save them anywhere, and the CFI will
+//   probably mark their values as "unrecoverable".
+//
+//   (However, since the caller cannot assume the callee was going to
+//   save them, caller-saves registers are probably dead in the caller
+//   anyway, so compilers usually don't generate CFA for caller-saves
+//   registers.)
+//
+// - Exactly where the CFA points is a matter of convention that
+//   depends on the architecture and ABI in use. In the example, the
+//   CFA is the value the stack pointer had upon entry to the
+//   function, pointing at the saved return address. But on the x86,
+//   the call frame information generated by GCC follows the
+//   convention that the CFA is the address *after* the saved return
+//   address.
+//
+//   But by definition, the CFA remains constant throughout the
+//   lifetime of the frame. This makes it a useful value for other
+//   columns to refer to. It is also gives debuggers a useful handle
+//   for identifying a frame.
+//
+// If you look at the table above, you'll notice that a given entry is
+// often the same as the one immediately above it: most instructions
+// change only one or two aspects of the stack frame, if they affect
+// it at all. The DWARF format takes advantage of this fact, and
+// reduces the size of the data by mentioning only the addresses and
+// columns at which changes take place. So for the above, DWARF CFI
+// data would only actually mention the following:
+//
+//     insn      cfa    r0      r1 ...  ra
+//     =======================================
+//     func+0:   sp                     cfa[0]
+//     func+1:   sp+16
+//     func+2:          cfa[-4]
+//     func+11:  sp+20
+//     func+21:         r0
+//     func+22:  sp
+//
+// In fact, this is the way the parser reports CFI to the consumer: as
+// a series of statements of the form, "At address X, column Y changed
+// to Z," and related conventions for describing the initial state.
+//
+// Naturally, it would be impractical to have to scan the entire
+// program's CFI, noting changes as we go, just to recover the
+// unwinding rules in effect at one particular instruction. To avoid
+// this, CFI data is grouped into "entries", each of which covers a
+// specified range of addresses and begins with a complete statement
+// of the rules for all recoverable registers at that starting
+// address. Each entry typically covers a single function.
+//
+// Thus, to compute the contents of a given row of the table --- that
+// is, rules for recovering the CFA, RA, and registers at a given
+// instruction --- the consumer should find the entry that covers that
+// instruction's address, start with the initial state supplied at the
+// beginning of the entry, and work forward until it has processed all
+// the changes up to and including those for the present instruction.
+//
+// There are seven kinds of rules that can appear in an entry of the
+// table:
+//
+// - "undefined": The given register is not preserved by the callee;
+//   its value cannot be recovered.
+//
+// - "same value": This register has the same value it did in the callee.
+//
+// - offset(N): The register is saved at offset N from the CFA.
+//
+// - val_offset(N): The value the register had in the caller is the
+//   CFA plus offset N. (This is usually only useful for describing
+//   the stack pointer.)
+//
+// - register(R): The register's value was saved in another register R.
+//
+// - expression(E): Evaluating the DWARF expression E using the
+//   current frame's registers' values yields the address at which the
+//   register was saved.
+//
+// - val_expression(E): Evaluating the DWARF expression E using the
+//   current frame's registers' values yields the value the register
+//   had in the caller.
+
+class CallFrameInfo {
+ public:
+  // The different kinds of entries one finds in CFI. Used internally,
+  // and for error reporting.
+  enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
+
+  // The handler class to which the parser hands the parsed call frame
+  // information.  Defined below.
+  class Handler;
+
+  // A reporter class, which CallFrameInfo uses to report errors
+  // encountered while parsing call frame information.  Defined below.
+  class Reporter;
+
+  // Create a DWARF CFI parser. BUFFER points to the contents of the
+  // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
+  // REPORTER is an error reporter the parser should use to report
+  // problems. READER is a ByteReader instance that has the endianness and
+  // address size set properly. Report the data we find to HANDLER.
+  //
+  // This class can also parse Linux C++ exception handling data, as found
+  // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
+  // placed in loadable segments so that it is present in the program's
+  // address space, and is interpreted by the C++ runtime to search the
+  // call stack for a handler interested in the exception being thrown,
+  // actually pop the frames, and find cleanup code to run.
+  //
+  // There are two differences between the call frame information described
+  // in the DWARF standard and the exception handling data Linux places in
+  // the .eh_frame section:
+  //
+  // - Exception handling data uses uses a different format for call frame
+  //   information entry headers. The distinguished CIE id, the way FDEs
+  //   refer to their CIEs, and the way the end of the series of entries is
+  //   determined are all slightly different.
+  //
+  //   If the constructor's EH_FRAME argument is true, then the
+  //   CallFrameInfo parses the entry headers as Linux C++ exception
+  //   handling data. If EH_FRAME is false or omitted, the CallFrameInfo
+  //   parses standard DWARF call frame information.
+  //
+  // - Linux C++ exception handling data uses CIE augmentation strings
+  //   beginning with 'z' to specify the presence of additional data after
+  //   the CIE and FDE headers and special encodings used for addresses in
+  //   frame description entries.
+  //
+  //   CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
+  //   exception handling data if you have supplied READER with the base
+  //   addresses needed to interpret the pointer encodings that 'z'
+  //   augmentations can specify. See the ByteReader interface for details
+  //   about the base addresses. See the CallFrameInfo::Handler interface
+  //   for details about the additional information one might find in
+  //   'z'-augmented data.
+  //
+  // Thus:
+  //
+  // - If you are parsing standard DWARF CFI, as found in a .debug_frame
+  //   section, you should pass false for the EH_FRAME argument, or omit
+  //   it, and you need not worry about providing READER with the
+  //   additional base addresses.
+  //
+  // - If you want to parse Linux C++ exception handling data from a
+  //   .eh_frame section, you should pass EH_FRAME as true, and call
+  //   READER's Set*Base member functions before calling our Start method.
+  //
+  // - If you want to parse DWARF CFI that uses the 'z' augmentations
+  //   (although I don't think any toolchain ever emits such data), you
+  //   could pass false for EH_FRAME, but call READER's Set*Base members.
+  //
+  // The extensions the Linux C++ ABI makes to DWARF for exception
+  // handling are described here, rather poorly:
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+  //
+  // The mechanics of C++ exception handling, personality routines,
+  // and language-specific data areas are described here, rather nicely:
+  // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
+
+  CallFrameInfo(const char *buffer, size_t buffer_length,
+                ByteReader *reader, Handler *handler, Reporter *reporter,
+                bool eh_frame = false)
+      : buffer_(buffer), buffer_length_(buffer_length),
+        reader_(reader), handler_(handler), reporter_(reporter),
+        eh_frame_(eh_frame) { }
+
+  ~CallFrameInfo() { }
+
+  // Parse the entries in BUFFER, reporting what we find to HANDLER.
+  // Return true if we reach the end of the section successfully, or
+  // false if we encounter an error.
+  bool Start();
+
+  // Return the textual name of KIND. For error reporting.
+  static const char *KindName(EntryKind kind);
+
+ private:
+
+  struct CIE;
+
+  // A CFI entry, either an FDE or a CIE.
+  struct Entry {
+    // The starting offset of the entry in the section, for error
+    // reporting.
+    size_t offset;
+
+    // The start of this entry in the buffer.
+    const char *start;
+
+    // Which kind of entry this is.
+    //
+    // We want to be able to use this for error reporting even while we're
+    // in the midst of parsing. Error reporting code may assume that kind,
+    // offset, and start fields are valid, although kind may be kUnknown.
+    EntryKind kind;
+
+    // The end of this entry's common prologue (initial length and id), and
+    // the start of this entry's kind-specific fields.
+    const char *fields;
+
+    // The start of this entry's instructions.
+    const char *instructions;
+
+    // The address past the entry's last byte in the buffer. (Note that
+    // since offset points to the entry's initial length field, and the
+    // length field is the number of bytes after that field, this is not
+    // simply buffer_ + offset + length.)
+    const char *end;
+
+    // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
+    // CIE, and the offset of the associated CIE in an FDE.
+    uint64 id;
+
+    // The CIE that applies to this entry, if we've parsed it. If this is a
+    // CIE, then this field points to this structure.
+    CIE *cie;
+  };
+
+  // A common information entry (CIE).
+  struct CIE: public Entry {
+    uint8 version;                      // CFI data version number
+    std::string augmentation;           // vendor format extension markers
+    uint64 code_alignment_factor;       // scale for code address adjustments
+    int data_alignment_factor;          // scale for stack pointer adjustments
+    unsigned return_address_register;   // which register holds the return addr
+
+    // True if this CIE includes Linux C++ ABI 'z' augmentation data.
+    bool has_z_augmentation;
+
+    // Parsed 'z' augmentation data. These are meaningful only if
+    // has_z_augmentation is true.
+    bool has_z_lsda;                    // The 'z' augmentation included 'L'.
+    bool has_z_personality;             // The 'z' augmentation included 'P'.
+    bool has_z_signal_frame;            // The 'z' augmentation included 'S'.
+
+    // If has_z_lsda is true, this is the encoding to be used for language-
+    // specific data area pointers in FDEs.
+    DwarfPointerEncoding lsda_encoding;
+
+    // If has_z_personality is true, this is the encoding used for the
+    // personality routine pointer in the augmentation data.
+    DwarfPointerEncoding personality_encoding;
+
+    // If has_z_personality is true, this is the address of the personality
+    // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
+    // address where the personality routine's address is stored.
+    uint64 personality_address;
+
+    // This is the encoding used for addresses in the FDE header and
+    // in DW_CFA_set_loc instructions. This is always valid, whether
+    // or not we saw a 'z' augmentation string; its default value is
+    // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
+    DwarfPointerEncoding pointer_encoding;
+  };
+
+  // A frame description entry (FDE).
+  struct FDE: public Entry {
+    uint64 address;                     // start address of described code
+    uint64 size;                        // size of described code, in bytes
+
+    // If cie->has_z_lsda is true, then this is the language-specific data
+    // area's address --- or its address's address, if cie->lsda_encoding
+    // has the DW_EH_PE_indirect bit set.
+    uint64 lsda_address;
+  };
+
+  // Internal use.
+  class Rule;
+  class UndefinedRule;
+  class SameValueRule;
+  class OffsetRule;
+  class ValOffsetRule;
+  class RegisterRule;
+  class ExpressionRule;
+  class ValExpressionRule;
+  class RuleMap;
+  class State;
+
+  // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
+  // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
+  // data to parse. On success, populate ENTRY as appropriate, and return
+  // true. On failure, report the problem, and return false. Even if we
+  // return false, set ENTRY->end to the first byte after the entry if we
+  // were able to figure that out, or NULL if we weren't.
+  bool ReadEntryPrologue(const char *cursor, Entry *entry);
+
+  // Parse the fields of a CIE after the entry prologue, including any 'z'
+  // augmentation data. Assume that the 'Entry' fields of CIE are
+  // populated; use CIE->fields and CIE->end as the start and limit for
+  // parsing. On success, populate the rest of *CIE, and return true; on
+  // failure, report the problem and return false.
+  bool ReadCIEFields(CIE *cie);
+
+  // Parse the fields of an FDE after the entry prologue, including any 'z'
+  // augmentation data. Assume that the 'Entry' fields of *FDE are
+  // initialized; use FDE->fields and FDE->end as the start and limit for
+  // parsing. Assume that FDE->cie is fully initialized. On success,
+  // populate the rest of *FDE, and return true; on failure, report the
+  // problem and return false.
+  bool ReadFDEFields(FDE *fde);
+
+  // Report that ENTRY is incomplete, and return false. This is just a
+  // trivial wrapper for invoking reporter_->Incomplete; it provides a
+  // little brevity.
+  bool ReportIncomplete(Entry *entry);
+
+  // Return true if ENCODING has the DW_EH_PE_indirect bit set.
+  static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
+    return encoding & DW_EH_PE_indirect;
+  }
+
+  // The contents of the DWARF .debug_info section we're parsing.
+  const char *buffer_;
+  size_t buffer_length_;
+
+  // For reading multi-byte values with the appropriate endianness.
+  ByteReader *reader_;
+
+  // The handler to which we should report the data we find.
+  Handler *handler_;
+
+  // For reporting problems in the info we're parsing.
+  Reporter *reporter_;
+
+  // True if we are processing .eh_frame-format data.
+  bool eh_frame_;
+};
+
+
+// The handler class for CallFrameInfo.  The a CFI parser calls the
+// member functions of a handler object to report the data it finds.
+class CallFrameInfo::Handler {
+ public:
+  // The pseudo-register number for the canonical frame address.
+  enum { kCFARegister = DW_REG_CFA };
+
+  Handler() { }
+  virtual ~Handler() { }
+
+  // The parser has found CFI for the machine code at ADDRESS,
+  // extending for LENGTH bytes. OFFSET is the offset of the frame
+  // description entry in the section, for use in error messages.
+  // VERSION is the version number of the CFI format. AUGMENTATION is
+  // a string describing any producer-specific extensions present in
+  // the data. RETURN_ADDRESS is the number of the register that holds
+  // the address to which the function should return.
+  //
+  // Entry should return true to process this CFI, or false to skip to
+  // the next entry.
+  //
+  // The parser invokes Entry for each Frame Description Entry (FDE)
+  // it finds.  The parser doesn't report Common Information Entries
+  // to the handler explicitly; instead, if the handler elects to
+  // process a given FDE, the parser reiterates the appropriate CIE's
+  // contents at the beginning of the FDE's rules.
+  virtual bool Entry(size_t offset, uint64 address, uint64 length,
+                     uint8 version, const std::string &augmentation,
+                     unsigned return_address) = 0;
+
+  // When the Entry function returns true, the parser calls these
+  // handler functions repeatedly to describe the rules for recovering
+  // registers at each instruction in the given range of machine code.
+  // Immediately after a call to Entry, the handler should assume that
+  // the rule for each callee-saves register is "unchanged" --- that
+  // is, that the register still has the value it had in the caller.
+  //
+  // If a *Rule function returns true, we continue processing this entry's
+  // instructions. If a *Rule function returns false, we stop evaluating
+  // instructions, and skip to the next entry. Either way, we call End
+  // before going on to the next entry.
+  //
+  // In all of these functions, if the REG parameter is kCFARegister, then
+  // the rule describes how to find the canonical frame address.
+  // kCFARegister may be passed as a BASE_REGISTER argument, meaning that
+  // the canonical frame address should be used as the base address for the
+  // computation. All other REG values will be positive.
+
+  // At ADDRESS, register REG's value is not recoverable.
+  virtual bool UndefinedRule(uint64 address, int reg) = 0;
+
+  // At ADDRESS, register REG's value is the same as that it had in
+  // the caller.
+  virtual bool SameValueRule(uint64 address, int reg) = 0;
+
+  // At ADDRESS, register REG has been saved at offset OFFSET from
+  // BASE_REGISTER.
+  virtual bool OffsetRule(uint64 address, int reg,
+                          int base_register, long offset) = 0;
+
+  // At ADDRESS, the caller's value of register REG is the current
+  // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
+  // address at which the register's value is saved.)
+  virtual bool ValOffsetRule(uint64 address, int reg,
+                             int base_register, long offset) = 0;
+
+  // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
+  // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
+  // BASE_REGISTER is the "home" for REG's saved value: if you want to
+  // assign to a variable whose home is REG in the calling frame, you
+  // should put the value in BASE_REGISTER.
+  virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0;
+
+  // At ADDRESS, the DWARF expression EXPRESSION yields the address at
+  // which REG was saved.
+  virtual bool ExpressionRule(uint64 address, int reg,
+                              const std::string &expression) = 0;
+
+  // At ADDRESS, the DWARF expression EXPRESSION yields the caller's
+  // value for REG. (This rule doesn't provide an address at which the
+  // register's value is saved.)
+  virtual bool ValExpressionRule(uint64 address, int reg,
+                                 const std::string &expression) = 0;
+
+  // Indicate that the rules for the address range reported by the
+  // last call to Entry are complete.  End should return true if
+  // everything is okay, or false if an error has occurred and parsing
+  // should stop.
+  virtual bool End() = 0;
+
+  // Handler functions for Linux C++ exception handling data. These are
+  // only called if the data includes 'z' augmentation strings.
+
+  // The Linux C++ ABI uses an extension of the DWARF CFI format to
+  // walk the stack to propagate exceptions from the throw to the
+  // appropriate catch, and do the appropriate cleanups along the way.
+  // CFI entries used for exception handling have two additional data
+  // associated with them:
+  //
+  // - The "language-specific data area" describes which exception
+  //   types the function has 'catch' clauses for, and indicates how
+  //   to go about re-entering the function at the appropriate catch
+  //   clause. If the exception is not caught, it describes the
+  //   destructors that must run before the frame is popped.
+  //
+  // - The "personality routine" is responsible for interpreting the
+  //   language-specific data area's contents, and deciding whether
+  //   the exception should continue to propagate down the stack,
+  //   perhaps after doing some cleanup for this frame, or whether the
+  //   exception will be caught here.
+  //
+  // In principle, the language-specific data area is opaque to
+  // everybody but the personality routine. In practice, these values
+  // may be useful or interesting to readers with extra context, and
+  // we have to at least skip them anyway, so we might as well report
+  // them to the handler.
+
+  // This entry's exception handling personality routine's address is
+  // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+  // which the routine's address is stored. The default definition for
+  // this handler function simply returns true, allowing parsing of
+  // the entry to continue.
+  virtual bool PersonalityRoutine(uint64 address, bool indirect) {
+    return true;
+  }
+
+  // This entry's language-specific data area (LSDA) is located at
+  // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
+  // which the area's address is stored. The default definition for
+  // this handler function simply returns true, allowing parsing of
+  // the entry to continue.
+  virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) {
+    return true;
+  }
+
+  // This entry describes a signal trampoline --- this frame is the
+  // caller of a signal handler. The default definition for this
+  // handler function simply returns true, allowing parsing of the
+  // entry to continue.
+  //
+  // The best description of the rationale for and meaning of signal
+  // trampoline CFI entries seems to be in the GCC bug database:
+  // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
+  virtual bool SignalHandler() { return true; }
+};
+
+
+// The CallFrameInfo class makes calls on an instance of this class to
+// report errors or warn about problems in the data it is parsing.
+// These messages are sent to the message sink |aLog| provided to the
+// constructor.
+class CallFrameInfo::Reporter {
+ public:
+  // Create an error reporter which attributes troubles to the section
+  // named SECTION in FILENAME.
+  //
+  // Normally SECTION would be .debug_frame, but the Mac puts CFI data
+  // in a Mach-O section named __debug_frame. If we support
+  // Linux-style exception handling data, we could be reading an
+  // .eh_frame section.
+  Reporter(void (*aLog)(const char*),
+           const std::string &filename,
+           const std::string &section = ".debug_frame")
+      : log_(aLog), filename_(filename), section_(section) { }
+  virtual ~Reporter() { }
+
+  // The CFI entry at OFFSET ends too early to be well-formed. KIND
+  // indicates what kind of entry it is; KIND can be kUnknown if we
+  // haven't parsed enough of the entry to tell yet.
+  virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind);
+
+  // The .eh_frame data has a four-byte zero at OFFSET where the next
+  // entry's length would be; this is a terminator. However, the buffer
+  // length as given to the CallFrameInfo constructor says there should be
+  // more data.
+  virtual void EarlyEHTerminator(uint64 offset);
+
+  // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
+  // section is not that large.
+  virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset);
+
+  // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
+  // there is not a CIE.
+  virtual void BadCIEId(uint64 offset, uint64 cie_offset);
+
+  // The FDE at OFFSET refers to a CIE with version number VERSION,
+  // which we don't recognize. We cannot parse DWARF CFI if it uses
+  // a version number we don't recognize.
+  virtual void UnrecognizedVersion(uint64 offset, int version);
+
+  // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
+  // which we don't recognize. We cannot parse DWARF CFI if it uses
+  // augmentations we don't recognize.
+  virtual void UnrecognizedAugmentation(uint64 offset,
+                                        const std::string &augmentation);
+
+  // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
+  // a valid encoding.
+  virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding);
+
+  // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
+  // on a base address which has not been supplied.
+  virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding);
+
+  // The CIE at OFFSET contains a DW_CFA_restore instruction at
+  // INSN_OFFSET, which may not appear in a CIE.
+  virtual void RestoreInCIE(uint64 offset, uint64 insn_offset);
+
+  // The entry at OFFSET, of kind KIND, has an unrecognized
+  // instruction at INSN_OFFSET.
+  virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind,
+                              uint64 insn_offset);
+
+  // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+  // KIND, establishes a rule that cites the CFA, but we have not
+  // established a CFA rule yet.
+  virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
+                         uint64 insn_offset);
+
+  // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
+  // KIND, is a DW_CFA_restore_state instruction, but the stack of
+  // saved states is empty.
+  virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind,
+                               uint64 insn_offset);
+
+  // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
+  // at OFFSET, of kind KIND, would restore a state that has no CFA
+  // rule, whereas the current state does have a CFA rule. This is
+  // bogus input, which the CallFrameInfo::Handler interface doesn't
+  // (and shouldn't) have any way to report.
+  virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind,
+                               uint64 insn_offset);
+
+ private:
+  // A logging sink function, as supplied by LUL's user.
+  void (*log_)(const char*);
+
+ protected:
+  // The name of the file whose CFI we're reading.
+  std::string filename_;
+
+  // The name of the CFI section in that file.
+  std::string section_;
+};
+
+
+using lul::CallFrameInfo;
+using lul::Summariser;
+
+// A class that accepts parsed call frame information from the DWARF
+// CFI parser and populates a google_breakpad::Module object with the
+// contents.
+class DwarfCFIToModule: public CallFrameInfo::Handler {
+ public:
+
+  // DwarfCFIToModule uses an instance of this class to report errors
+  // detected while converting DWARF CFI to Breakpad STACK CFI records.
+  class Reporter {
+   public:
+    // Create a reporter that writes messages to the message sink
+    // |aLog|. FILE is the name of the file we're processing, and
+    // SECTION is the name of the section within that file that we're
+    // looking at (.debug_frame, .eh_frame, etc.).
+    Reporter(void (*aLog)(const char*),
+             const std::string &file, const std::string &section)
+      : log_(aLog), file_(file), section_(section) { }
+    virtual ~Reporter() { }
+
+    // The DWARF CFI entry at OFFSET says that REG is undefined, but the
+    // Breakpad symbol file format cannot express this.
+    virtual void UndefinedNotSupported(size_t offset,
+                                       const UniqueString* reg);
+
+    // The DWARF CFI entry at OFFSET says that REG uses a DWARF
+    // expression to find its value, but parseDwarfExpr could not
+    // convert it to a sequence of PfxInstrs.
+    virtual void ExpressionCouldNotBeSummarised(size_t offset,
+                                                const UniqueString* reg);
+
+  private:
+    // A logging sink function, as supplied by LUL's user.
+    void (*log_)(const char*);
+  protected:
+    std::string file_, section_;
+  };
+
+  // Register name tables. If TABLE is a vector returned by one of these
+  // functions, then TABLE[R] is the name of the register numbered R in
+  // DWARF call frame information.
+  class RegisterNames {
+   public:
+    // Intel's "x86" or IA-32.
+    static unsigned int I386();
+
+    // AMD x86_64, AMD64, Intel EM64T, or Intel 64
+    static unsigned int X86_64();
+
+    // ARM.
+    static unsigned int ARM();
+  };
+
+  // Create a handler for the dwarf2reader::CallFrameInfo parser that
+  // records the stack unwinding information it receives in SUMM.
+  //
+  // Use REGISTER_NAMES[I] as the name of register number I; *this
+  // keeps a reference to the vector, so the vector should remain
+  // alive for as long as the DwarfCFIToModule does.
+  //
+  // Use REPORTER for reporting problems encountered in the conversion
+  // process.
+  DwarfCFIToModule(const unsigned int num_dw_regs,
+                   Reporter *reporter,
+                   ByteReader* reader,
+                   /*MOD*/UniqueStringUniverse* usu,
+                   /*OUT*/Summariser* summ)
+      : summ_(summ), usu_(usu), num_dw_regs_(num_dw_regs),
+        reporter_(reporter), reader_(reader), return_address_(-1) {
+  }
+  virtual ~DwarfCFIToModule() {}
+
+  virtual bool Entry(size_t offset, uint64 address, uint64 length,
+                     uint8 version, const std::string &augmentation,
+                     unsigned return_address);
+  virtual bool UndefinedRule(uint64 address, int reg);
+  virtual bool SameValueRule(uint64 address, int reg);
+  virtual bool OffsetRule(uint64 address, int reg,
+                          int base_register, long offset);
+  virtual bool ValOffsetRule(uint64 address, int reg,
+                             int base_register, long offset);
+  virtual bool RegisterRule(uint64 address, int reg, int base_register);
+  virtual bool ExpressionRule(uint64 address, int reg,
+                              const std::string &expression);
+  virtual bool ValExpressionRule(uint64 address, int reg,
+                                 const std::string &expression);
+  virtual bool End();
+
+ private:
+  // Return the name to use for register I.
+  const UniqueString* RegisterName(int i);
+
+  // The Summariser to which we should give entries
+  Summariser* summ_;
+
+  // Universe for creating UniqueStrings in, should that be necessary.
+  UniqueStringUniverse* usu_;
+
+  // The number of Dwarf-defined register names for this architecture.
+  const unsigned int num_dw_regs_;
+
+  // The reporter to use to report problems.
+  Reporter *reporter_;
+
+  // The ByteReader to use for parsing Dwarf expressions.
+  ByteReader* reader_;
+
+  // The section offset of the current frame description entry, for
+  // use in error messages.
+  size_t entry_offset_;
+
+  // The return address column for that entry.
+  unsigned return_address_;
+};
+
+
+// Convert the Dwarf expression in |expr| into PfxInstrs stored in the
+// SecMap referred to by |summ|, and return the index of the starting
+// PfxInstr added, which must be >= 0.  In case of failure return -1.
+int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader,
+                       string expr, bool debug,
+                       bool pushCfaAtStart, bool derefAtEnd);
+
+} // namespace lul
+
+#endif // LulDwarfExt_h
diff --git a/tools/profiler/lul/LulDwarfInt.h b/tools/profiler/lul/LulDwarfInt.h
new file mode 100644
index 000000000..05c231f84
--- /dev/null
+++ b/tools/profiler/lul/LulDwarfInt.h
@@ -0,0 +1,194 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2008, 2010 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// This file is derived from the following file in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/dwarf/dwarf2enums.h
+
+#ifndef LulDwarfInt_h
+#define LulDwarfInt_h
+
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+
+namespace lul {
+
+// These enums do not follow the google3 style only because they are
+// known universally (specs, other implementations) by the names in
+// exactly this capitalization.
+// Tag names and codes.
+
+// Call Frame Info instructions.
+enum DwarfCFI
+  {
+    DW_CFA_advance_loc        = 0x40,
+    DW_CFA_offset             = 0x80,
+    DW_CFA_restore            = 0xc0,
+    DW_CFA_nop                = 0x00,
+    DW_CFA_set_loc            = 0x01,
+    DW_CFA_advance_loc1       = 0x02,
+    DW_CFA_advance_loc2       = 0x03,
+    DW_CFA_advance_loc4       = 0x04,
+    DW_CFA_offset_extended    = 0x05,
+    DW_CFA_restore_extended   = 0x06,
+    DW_CFA_undefined          = 0x07,
+    DW_CFA_same_value         = 0x08,
+    DW_CFA_register           = 0x09,
+    DW_CFA_remember_state     = 0x0a,
+    DW_CFA_restore_state      = 0x0b,
+    DW_CFA_def_cfa            = 0x0c,
+    DW_CFA_def_cfa_register   = 0x0d,
+    DW_CFA_def_cfa_offset     = 0x0e,
+    DW_CFA_def_cfa_expression = 0x0f,
+    DW_CFA_expression         = 0x10,
+    DW_CFA_offset_extended_sf = 0x11,
+    DW_CFA_def_cfa_sf         = 0x12,
+    DW_CFA_def_cfa_offset_sf  = 0x13,
+    DW_CFA_val_offset         = 0x14,
+    DW_CFA_val_offset_sf      = 0x15,
+    DW_CFA_val_expression     = 0x16,
+
+    // Opcodes in this range are reserved for user extensions.
+    DW_CFA_lo_user = 0x1c,
+    DW_CFA_hi_user = 0x3f,
+
+    // SGI/MIPS specific.
+    DW_CFA_MIPS_advance_loc8 = 0x1d,
+
+    // GNU extensions.
+    DW_CFA_GNU_window_save = 0x2d,
+    DW_CFA_GNU_args_size = 0x2e,
+    DW_CFA_GNU_negative_offset_extended = 0x2f
+  };
+
+// Exception handling 'z' augmentation letters.
+enum DwarfZAugmentationCodes {
+  // If the CFI augmentation string begins with 'z', then the CIE and FDE
+  // have an augmentation data area just before the instructions, whose
+  // contents are determined by the subsequent augmentation letters.
+  DW_Z_augmentation_start = 'z',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding, and the FDE
+  // augmentation data includes a language-specific data area pointer,
+  // represented using that encoding.
+  DW_Z_has_LSDA = 'L',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding, followed by a pointer
+  // to a personality routine, represented using that encoding.
+  DW_Z_has_personality_routine = 'P',
+
+  // If this letter is present in a 'z' augmentation string, the CIE
+  // augmentation data includes a pointer encoding describing how the FDE's
+  // initial location, address range, and DW_CFA_set_loc operands are
+  // encoded.
+  DW_Z_has_FDE_address_encoding = 'R',
+
+  // If this letter is present in a 'z' augmentation string, then code
+  // addresses covered by FDEs that cite this CIE are signal delivery
+  // trampolines. Return addresses of frames in trampolines should not be
+  // adjusted as described in section 6.4.4 of the DWARF 3 spec.
+  DW_Z_is_signal_trampoline = 'S'
+};
+
+// Expression opcodes
+enum DwarfExpressionOpcodes {
+  DW_OP_addr    = 0x03,
+  DW_OP_deref   = 0x06,
+  DW_OP_const1s = 0x09,
+  DW_OP_const2u = 0x0a,
+  DW_OP_const2s = 0x0b,
+  DW_OP_const4u = 0x0c,
+  DW_OP_const4s = 0x0d,
+  DW_OP_const8u = 0x0e,
+  DW_OP_const8s = 0x0f,
+  DW_OP_constu  = 0x10,
+  DW_OP_consts  = 0x11,
+  DW_OP_dup     = 0x12,
+  DW_OP_drop    = 0x13,
+  DW_OP_over    = 0x14,
+  DW_OP_pick    = 0x15,
+  DW_OP_swap    = 0x16,
+  DW_OP_rot     = 0x17,
+  DW_OP_xderef  = 0x18,
+  DW_OP_abs     = 0x19,
+  DW_OP_and     = 0x1a,
+  DW_OP_div     = 0x1b,
+  DW_OP_minus   = 0x1c,
+  DW_OP_mod     = 0x1d,
+  DW_OP_mul     = 0x1e,
+  DW_OP_neg     = 0x1f,
+  DW_OP_not     = 0x20,
+  DW_OP_or      = 0x21,
+  DW_OP_plus    = 0x22,
+  DW_OP_plus_uconst = 0x23,
+  DW_OP_shl     = 0x24,
+  DW_OP_shr     = 0x25,
+  DW_OP_shra    = 0x26,
+  DW_OP_xor     = 0x27,
+  DW_OP_skip    = 0x2f,
+  DW_OP_bra     = 0x28,
+  DW_OP_eq      = 0x29,
+  DW_OP_ge      = 0x2a,
+  DW_OP_gt      = 0x2b,
+  DW_OP_le      = 0x2c,
+  DW_OP_lt      = 0x2d,
+  DW_OP_ne      = 0x2e,
+  DW_OP_lit0    = 0x30,
+  DW_OP_lit31   = 0x4f,
+  DW_OP_reg0    = 0x50,
+  DW_OP_reg31   = 0x6f,
+  DW_OP_breg0   = 0x70,
+  DW_OP_breg31  = 0x8f,
+  DW_OP_regx    = 0x90,
+  DW_OP_fbreg     = 0x91,
+  DW_OP_bregx     = 0x92,
+  DW_OP_piece     = 0x93,
+  DW_OP_deref_size = 0x94,
+  DW_OP_xderef_size = 0x95,
+  DW_OP_nop       = 0x96,
+  DW_OP_push_object_address = 0x97,
+  DW_OP_call2     = 0x98,
+  DW_OP_call4     = 0x99,
+  DW_OP_call_ref  = 0x9a,
+  DW_OP_form_tls_address = 0x9b,
+  DW_OP_call_frame_cfa = 0x9c,
+  DW_OP_bit_piece = 0x9d,
+  DW_OP_lo_user   = 0xe0,
+  DW_OP_hi_user   = 0xff
+};
+
+} // namespace lul
+
+#endif // LulDwarfInt_h
diff --git a/tools/profiler/lul/LulDwarfSummariser.cpp b/tools/profiler/lul/LulDwarfSummariser.cpp
new file mode 100644
index 000000000..74c2565df
--- /dev/null
+++ b/tools/profiler/lul/LulDwarfSummariser.cpp
@@ -0,0 +1,359 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LulDwarfSummariser.h"
+
+#include "mozilla/Assertions.h"
+
+// Set this to 1 for verbose logging
+#define DEBUG_SUMMARISER 0
+
+namespace lul {
+
+// Do |s64|'s lowest 32 bits sign extend back to |s64| itself?
+static inline bool fitsIn32Bits(int64 s64) {
+  return s64 == ((s64 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+}
+
+// Check a LExpr prefix expression, starting at pfxInstrs[start] up to
+// the next PX_End instruction, to ensure that:
+// * It only mentions registers that are tracked on this target
+// * The start point is sane
+// If the expression is ok, return NULL.  Else return a pointer
+// a const char* holding a bit of text describing the problem.
+static const char*
+checkPfxExpr(const vector<PfxInstr>* pfxInstrs, int64_t start)
+{
+  size_t nInstrs = pfxInstrs->size();
+  if (start < 0 || start >= (ssize_t)nInstrs) {
+    return "bogus start point";
+  }
+  size_t i;
+  for (i = start; i < nInstrs; i++) {
+    PfxInstr pxi = (*pfxInstrs)[i];
+    if (pxi.mOpcode == PX_End)
+      break;
+    if (pxi.mOpcode == PX_DwReg &&
+        !registerIsTracked((DW_REG_NUMBER)pxi.mOperand)) {
+      return "uses untracked reg";
+    }
+  }
+  return nullptr; // success
+}
+
+
+Summariser::Summariser(SecMap* aSecMap, uintptr_t aTextBias,
+                       void(*aLog)(const char*))
+  : mSecMap(aSecMap)
+  , mTextBias(aTextBias)
+  , mLog(aLog)
+{
+  mCurrAddr = 0;
+  mMax1Addr = 0; // Gives an empty range.
+
+  // Initialise the running RuleSet to "haven't got a clue" status.
+  new (&mCurrRules) RuleSet();
+}
+
+void
+Summariser::Entry(uintptr_t aAddress, uintptr_t aLength)
+{
+  aAddress += mTextBias;
+  if (DEBUG_SUMMARISER) {
+    char buf[100];
+    SprintfLiteral(buf,
+                   "LUL Entry(%llx, %llu)\n",
+                   (unsigned long long int)aAddress,
+                   (unsigned long long int)aLength);
+    mLog(buf);
+  }
+  // This throws away any previous summary, that is, assumes
+  // that the previous summary, if any, has been properly finished
+  // by a call to End().
+  mCurrAddr = aAddress;
+  mMax1Addr = aAddress + aLength;
+  new (&mCurrRules) RuleSet();
+}
+
+void
+Summariser::Rule(uintptr_t aAddress, int aNewReg,
+                 LExprHow how, int16_t oldReg, int64_t offset)
+{
+  aAddress += mTextBias;
+  if (DEBUG_SUMMARISER) {
+    char buf[100];
+    if (how == NODEREF || how == DEREF) {
+      bool deref = how == DEREF;
+      SprintfLiteral(buf,
+                     "LUL  0x%llx  old-r%d = %sr%d + %lld%s\n",
+                     (unsigned long long int)aAddress, aNewReg,
+                     deref ? "*(" : "", (int)oldReg, (long long int)offset,
+                     deref ? ")" : "");
+    } else if (how == PFXEXPR) {
+      SprintfLiteral(buf,
+                     "LUL  0x%llx  old-r%d = pfx-expr-at %lld\n",
+                     (unsigned long long int)aAddress, aNewReg,
+                     (long long int)offset);
+    } else {
+      SprintfLiteral(buf,
+                     "LUL  0x%llx  old-r%d = (invalid LExpr!)\n",
+                     (unsigned long long int)aAddress, aNewReg);
+    }
+    mLog(buf);
+  }
+
+  if (mCurrAddr < aAddress) {
+    // Flush the existing summary first.
+    mCurrRules.mAddr = mCurrAddr;
+    mCurrRules.mLen  = aAddress - mCurrAddr;
+    mSecMap->AddRuleSet(&mCurrRules);
+    if (DEBUG_SUMMARISER) {
+      mLog("LUL  "); mCurrRules.Print(mLog);
+      mLog("\n");
+    }
+    mCurrAddr = aAddress;
+  }
+
+  // If for some reason summarisation fails, either or both of these
+  // become non-null and point at constant text describing the
+  // problem.  Using two rather than just one avoids complications of
+  // having to concatenate two strings to produce a complete error message.
+  const char* reason1 = nullptr;
+  const char* reason2 = nullptr;
+  
+  // |offset| needs to be a 32 bit value that sign extends to 64 bits
+  // on a 64 bit target.  We will need to incorporate |offset| into
+  // any LExpr made here.  So we may as well check it right now.
+  if (!fitsIn32Bits(offset)) {
+    reason1 = "offset not in signed 32-bit range";
+    goto cant_summarise;
+  }
+
+  // FIXME: factor out common parts of the arch-dependent summarisers.
+
+#if defined(LUL_ARCH_arm)
+
+  // ----------------- arm ----------------- //
+
+  // Now, can we add the rule to our summary?  This depends on whether
+  // the registers and the overall expression are representable.  This
+  // is the heart of the summarisation process.
+  switch (aNewReg) {
+
+    case DW_REG_CFA:
+      // This is a rule that defines the CFA.  The only forms we
+      // choose to represent are: r7/11/12/13 + offset.  The offset
+      // must fit into 32 bits since 'uintptr_t' is 32 bit on ARM,
+      // hence there is no need to check it for overflow.
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      switch (oldReg) {
+        case DW_REG_ARM_R7:  case DW_REG_ARM_R11:
+        case DW_REG_ARM_R12: case DW_REG_ARM_R13:
+          break;
+        default:
+          reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+          goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_ARM_R7:  case DW_REG_ARM_R11: case DW_REG_ARM_R12:
+    case DW_REG_ARM_R13: case DW_REG_ARM_R14: case DW_REG_ARM_R15: {
+      // This is a new rule for R7, R11, R12, R13 (SP), R14 (LR) or
+      // R15 (the return address).
+      switch (how) {
+        case NODEREF: case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for R7/11/12/13/14/15: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for R7/11/12/13/14/15: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_ARM_R7:  mCurrRules.mR7expr  = expr; break;
+        case DW_REG_ARM_R11: mCurrRules.mR11expr = expr; break;
+        case DW_REG_ARM_R12: mCurrRules.mR12expr = expr; break;
+        case DW_REG_ARM_R13: mCurrRules.mR13expr = expr; break;
+        case DW_REG_ARM_R14: mCurrRules.mR14expr = expr; break;
+        case DW_REG_ARM_R15: mCurrRules.mR15expr = expr; break;
+        default: MOZ_ASSERT(0);
+      }
+      break;
+    }
+
+    default:
+      // Leave |reason1| and |reason2| unset here.  This program point
+      // is reached so often that it causes a flood of "Can't
+      // summarise" messages.  In any case, we don't really care about
+      // the fact that this summary would produce a new value for a
+      // register that we're not tracking.  We do on the other hand
+      // care if the summary's expression *uses* a register that we're
+      // not tracking.  But in that case one of the above failures
+      // should tell us which.
+      goto cant_summarise;
+  }
+
+  // Mark callee-saved registers (r4 .. r11) as unchanged, if there is
+  // no other information about them.  FIXME: do this just once, at
+  // the point where the ruleset is committed.
+  if (mCurrRules.mR7expr.mHow == UNKNOWN) {
+    mCurrRules.mR7expr = LExpr(NODEREF, DW_REG_ARM_R7, 0);
+  }
+  if (mCurrRules.mR11expr.mHow == UNKNOWN) {
+    mCurrRules.mR11expr = LExpr(NODEREF, DW_REG_ARM_R11, 0);
+  }
+  if (mCurrRules.mR12expr.mHow == UNKNOWN) {
+    mCurrRules.mR12expr = LExpr(NODEREF, DW_REG_ARM_R12, 0);
+  }
+
+  // The old r13 (SP) value before the call is always the same as the
+  // CFA.
+  mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0);
+
+  // If there's no information about R15 (the return address), say
+  // it's a copy of R14 (the link register).
+  if (mCurrRules.mR15expr.mHow == UNKNOWN) {
+    mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0);
+  }
+
+#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+
+  // ---------------- x64/x86 ---------------- //
+
+  // Now, can we add the rule to our summary?  This depends on whether
+  // the registers and the overall expression are representable.  This
+  // is the heart of the summarisation process.
+  switch (aNewReg) {
+
+    case DW_REG_CFA:
+      // This is a rule that defines the CFA.  The only forms we can
+      // represent are: = SP+offset or = FP+offset.
+      if (how != NODEREF) {
+        reason1 = "rule for DW_REG_CFA: invalid |how|";
+        goto cant_summarise;
+      }
+      if (oldReg != DW_REG_INTEL_XSP && oldReg != DW_REG_INTEL_XBP) {
+        reason1 = "rule for DW_REG_CFA: invalid |oldReg|";
+        goto cant_summarise;
+      }
+      mCurrRules.mCfaExpr = LExpr(how, oldReg, offset);
+      break;
+
+    case DW_REG_INTEL_XSP: case DW_REG_INTEL_XBP: case DW_REG_INTEL_XIP: {
+      // This is a new rule for XSP, XBP or XIP (the return address).
+      switch (how) {
+        case NODEREF: case DEREF:
+          // Check the old register is one we're tracking.
+          if (!registerIsTracked((DW_REG_NUMBER)oldReg) &&
+              oldReg != DW_REG_CFA) {
+            reason1 = "rule for XSP/XBP/XIP: uses untracked reg";
+            goto cant_summarise;
+          }
+          break;
+        case PFXEXPR: {
+          // Check that the prefix expression only mentions tracked registers.
+          const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs();
+          reason2 = checkPfxExpr(pfxInstrs, offset);
+          if (reason2) {
+            reason1 = "rule for XSP/XBP/XIP: ";
+            goto cant_summarise;
+          }
+          break;
+        }
+        default:
+          goto cant_summarise;
+      }
+      LExpr expr = LExpr(how, oldReg, offset);
+      switch (aNewReg) {
+        case DW_REG_INTEL_XBP: mCurrRules.mXbpExpr = expr; break;
+        case DW_REG_INTEL_XSP: mCurrRules.mXspExpr = expr; break;
+        case DW_REG_INTEL_XIP: mCurrRules.mXipExpr = expr; break;
+        default: MOZ_CRASH("impossible value for aNewReg");
+      }
+      break;
+    }
+
+    default:
+      // Leave |reason1| and |reason2| unset here, for the reasons
+      // explained in the analogous point in the ARM case just above.
+      goto cant_summarise;
+
+  }
+
+  // On Intel, it seems the old SP value before the call is always the
+  // same as the CFA.  Therefore, in the absence of any other way to
+  // recover the SP, specify that the CFA should be copied.
+  if (mCurrRules.mXspExpr.mHow == UNKNOWN) {
+    mCurrRules.mXspExpr = LExpr(NODEREF, DW_REG_CFA, 0);
+  }
+
+  // Also, gcc says "Undef" for BP when it is unchanged.
+  if (mCurrRules.mXbpExpr.mHow == UNKNOWN) {
+    mCurrRules.mXbpExpr = LExpr(NODEREF, DW_REG_INTEL_XBP, 0);
+  }
+
+#else
+
+# error "Unsupported arch"
+#endif
+
+  return;
+
+ cant_summarise:
+  if (reason1 || reason2) {
+    char buf[200];
+    SprintfLiteral(buf, "LUL  can't summarise: "
+                        "SVMA=0x%llx: %s%s, expr=LExpr(%s,%u,%lld)\n",
+                   (unsigned long long int)(aAddress - mTextBias),
+                   reason1 ? reason1 : "", reason2 ? reason2 : "",
+                   NameOf_LExprHow(how),
+                   (unsigned int)oldReg, (long long int)offset);
+    mLog(buf);
+  }
+}
+
+uint32_t
+Summariser::AddPfxInstr(PfxInstr pfxi)
+{
+  return mSecMap->AddPfxInstr(pfxi);
+}
+
+void
+Summariser::End()
+{
+  if (DEBUG_SUMMARISER) {
+    mLog("LUL End\n");
+  }
+  if (mCurrAddr < mMax1Addr) {
+    mCurrRules.mAddr = mCurrAddr;
+    mCurrRules.mLen  = mMax1Addr - mCurrAddr;
+    mSecMap->AddRuleSet(&mCurrRules);
+    if (DEBUG_SUMMARISER) {
+      mLog("LUL  "); mCurrRules.Print(mLog);
+      mLog("\n");
+    }
+  }
+}
+
+} // namespace lul
diff --git a/tools/profiler/lul/LulDwarfSummariser.h b/tools/profiler/lul/LulDwarfSummariser.h
new file mode 100644
index 000000000..b41db1ee3
--- /dev/null
+++ b/tools/profiler/lul/LulDwarfSummariser.h
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulDwarfSummariser_h
+#define LulDwarfSummariser_h
+
+#include "LulMainInt.h"
+
+namespace lul {
+
+class Summariser
+{
+public:
+  Summariser(SecMap* aSecMap, uintptr_t aTextBias, void(*aLog)(const char*));
+
+  virtual void Entry(uintptr_t aAddress, uintptr_t aLength);
+  virtual void End();
+
+  // Tell the summariser that the value for |aNewReg| at |aAddress| is
+  // recovered using the LExpr that can be constructed using the
+  // components |how|, |oldReg| and |offset|.  The summariser will
+  // inspect the components and may reject them for various reasons,
+  // but the hope is that it will find them acceptable and record this
+  // rule permanently.
+  virtual void Rule(uintptr_t aAddress, int aNewReg,
+                    LExprHow how, int16_t oldReg, int64_t offset);
+
+  virtual uint32_t AddPfxInstr(PfxInstr pfxi);
+
+  // Send output to the logging sink, for debugging.
+  virtual void Log(const char* str) { mLog(str); }
+  
+private:
+  // The SecMap in which we park the finished summaries (RuleSets) and
+  // also any PfxInstrs derived from Dwarf expressions.
+  SecMap* mSecMap;
+
+  // Running state for the current summary (RuleSet) under construction.
+  RuleSet mCurrRules;
+
+  // The start of the address range to which the RuleSet under
+  // construction applies.
+  uintptr_t mCurrAddr;
+
+  // The highest address, plus one, for which the RuleSet under
+  // construction could possibly apply.  If there are no further
+  // incoming events then mCurrRules will eventually be emitted
+  // as-is, for the range mCurrAddr.. mMax1Addr - 1, if that is
+  // nonempty.
+  uintptr_t mMax1Addr;
+
+  // The bias value (to add to the SVMAs, to get AVMAs) to be used
+  // when adding entries into mSecMap.
+  uintptr_t mTextBias;
+
+  // A logging sink, for debugging.
+  void (*mLog)(const char* aFmt);
+};
+
+} // namespace lul
+
+#endif // LulDwarfSummariser_h
diff --git a/tools/profiler/lul/LulElf.cpp b/tools/profiler/lul/LulElf.cpp
new file mode 100644
index 000000000..6f90d5f13
--- /dev/null
+++ b/tools/profiler/lul/LulElf.cpp
@@ -0,0 +1,915 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2011, 2012 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// (derived from)
+// dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
+// Find all the debugging info in a file and dump it as a Breakpad symbol file.
+//
+// dump_symbols.h: Read debugging information from an ELF file, and write
+// it out as a Breakpad symbol file.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/linux/dump_symbols.cc
+//   src/common/linux/elfutils.cc
+//   src/common/linux/file_id.cc
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Sprintf.h"
+
+#include "LulPlatformMacros.h"
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+#include "LulElfInt.h"
+#include "LulMainInt.h"
+
+
+#if defined(LUL_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
+// bionic and older glibsc don't define it
+# define SHT_ARM_EXIDX (SHT_LOPROC + 1)
+#endif
+
+
+// This namespace contains helper functions.
+namespace {
+
+using lul::DwarfCFIToModule;
+using lul::FindElfSectionByName;
+using lul::GetOffset;
+using lul::IsValidElf;
+using lul::Module;
+using lul::UniqueStringUniverse;
+using lul::scoped_ptr;
+using lul::Summariser;
+using std::string;
+using std::vector;
+using std::set;
+
+//
+// FDWrapper
+//
+// Wrapper class to make sure opened file is closed.
+//
+class FDWrapper {
+ public:
+  explicit FDWrapper(int fd) :
+    fd_(fd) {}
+  ~FDWrapper() {
+    if (fd_ != -1)
+      close(fd_);
+  }
+  int get() {
+    return fd_;
+  }
+  int release() {
+    int fd = fd_;
+    fd_ = -1;
+    return fd;
+  }
+ private:
+  int fd_;
+};
+
+//
+// MmapWrapper
+//
+// Wrapper class to make sure mapped regions are unmapped.
+//
+class MmapWrapper {
+ public:
+  MmapWrapper() : is_set_(false), base_(NULL), size_(0){}
+  ~MmapWrapper() {
+    if (is_set_ && base_ != NULL) {
+      MOZ_ASSERT(size_ > 0);
+      munmap(base_, size_);
+    }
+  }
+  void set(void *mapped_address, size_t mapped_size) {
+    is_set_ = true;
+    base_ = mapped_address;
+    size_ = mapped_size;
+  }
+  void release() {
+    MOZ_ASSERT(is_set_);
+    is_set_ = false;
+    base_ = NULL;
+    size_ = 0;
+  }
+
+ private:
+  bool is_set_;
+  void *base_;
+  size_t size_;
+};
+
+
+// Set NUM_DW_REGNAMES to be the number of Dwarf register names
+// appropriate to the machine architecture given in HEADER.  Return
+// true on success, or false if HEADER's machine architecture is not
+// supported.
+template<typename ElfClass>
+bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
+                           unsigned int* num_dw_regnames) {
+  switch (elf_header->e_machine) {
+    case EM_386:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
+      return true;
+    case EM_ARM:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
+      return true;
+    case EM_X86_64:
+      *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
+      return true;
+    default:
+      MOZ_ASSERT(0);
+      return false;
+  }
+}
+
+template<typename ElfClass>
+bool LoadDwarfCFI(const string& dwarf_filename,
+                  const typename ElfClass::Ehdr* elf_header,
+                  const char* section_name,
+                  const typename ElfClass::Shdr* section,
+                  const bool eh_frame,
+                  const typename ElfClass::Shdr* got_section,
+                  const typename ElfClass::Shdr* text_section,
+                  const bool big_endian,
+                  SecMap* smap,
+                  uintptr_t text_bias,
+                  UniqueStringUniverse* usu,
+                  void (*log)(const char*)) {
+  // Find the appropriate set of register names for this file's
+  // architecture.
+  unsigned int num_dw_regs = 0;
+  if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
+    fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
+            " cannot convert DWARF call frame information\n",
+            dwarf_filename.c_str(), elf_header->e_machine);
+    return false;
+  }
+
+  const lul::Endianness endianness
+    = big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
+
+  // Find the call frame information and its size.
+  const char* cfi =
+      GetOffset<ElfClass, char>(elf_header, section->sh_offset);
+  size_t cfi_size = section->sh_size;
+
+  // Plug together the parser, handler, and their entourages.
+
+  // Here's a summariser, which will receive the output of the
+  // parser, create summaries, and add them to |smap|.
+  Summariser summ(smap, text_bias, log);
+
+  lul::ByteReader reader(endianness);
+  reader.SetAddressSize(ElfClass::kAddrSize);
+
+  DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
+  DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ);
+
+  // Provide the base addresses for .eh_frame encoded pointers, if
+  // possible.
+  reader.SetCFIDataBase(section->sh_addr, cfi);
+  if (got_section)
+    reader.SetDataBase(got_section->sh_addr);
+  if (text_section)
+    reader.SetTextBase(text_section->sh_addr);
+
+  lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
+                                              section_name);
+  lul::CallFrameInfo parser(cfi, cfi_size,
+                            &reader, &handler, &dwarf_reporter,
+                            eh_frame);
+  parser.Start();
+
+  return true;
+}
+
+bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
+             void** elf_header) {
+  int obj_fd = open(obj_file.c_str(), O_RDONLY);
+  if (obj_fd < 0) {
+    fprintf(stderr, "Failed to open ELF file '%s': %s\n",
+            obj_file.c_str(), strerror(errno));
+    return false;
+  }
+  FDWrapper obj_fd_wrapper(obj_fd);
+  struct stat st;
+  if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
+    fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
+            obj_file.c_str(), strerror(errno));
+    return false;
+  }
+  // Mapping it read-only is good enough.  In any case, mapping it
+  // read-write confuses Valgrind's debuginfo acquire/discard
+  // heuristics, making it hard to profile the profiler.
+  void *obj_base = mmap(nullptr, st.st_size,
+                        PROT_READ, MAP_PRIVATE, obj_fd, 0);
+  if (obj_base == MAP_FAILED) {
+    fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
+            obj_file.c_str(), strerror(errno));
+    return false;
+  }
+  map_wrapper->set(obj_base, st.st_size);
+  *elf_header = obj_base;
+  if (!IsValidElf(*elf_header)) {
+    fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
+    return false;
+  }
+  return true;
+}
+
+// Get the endianness of ELF_HEADER. If it's invalid, return false.
+template<typename ElfClass>
+bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
+                   bool* big_endian) {
+  if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
+    *big_endian = false;
+    return true;
+  }
+  if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
+    *big_endian = true;
+    return true;
+  }
+
+  fprintf(stderr, "bad data encoding in ELF header: %d\n",
+          elf_header->e_ident[EI_DATA]);
+  return false;
+}
+
+//
+// LoadSymbolsInfo
+//
+// Holds the state between the two calls to LoadSymbols() in case it's necessary
+// to follow the .gnu_debuglink section and load debug information from a
+// different file.
+//
+template<typename ElfClass>
+class LoadSymbolsInfo {
+ public:
+  typedef typename ElfClass::Addr Addr;
+
+  explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) :
+    debug_dirs_(dbg_dirs),
+    has_loading_addr_(false) {}
+
+  // Keeps track of which sections have been loaded so sections don't
+  // accidentally get loaded twice from two different files.
+  void LoadedSection(const string &section) {
+    if (loaded_sections_.count(section) == 0) {
+      loaded_sections_.insert(section);
+    } else {
+      fprintf(stderr, "Section %s has already been loaded.\n",
+              section.c_str());
+    }
+  }
+
+  string debuglink_file() const {
+    return debuglink_file_;
+  }
+
+ private:
+  const vector<string>& debug_dirs_; // Directories in which to
+                                     // search for the debug ELF file.
+
+  string debuglink_file_; // Full path to the debug ELF file.
+
+  bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
+
+  set<string> loaded_sections_; // Tracks the Loaded ELF sections
+                                // between calls to LoadSymbols().
+};
+
+// Find the preferred loading address of the binary.
+template<typename ElfClass>
+typename ElfClass::Addr GetLoadingAddress(
+    const typename ElfClass::Phdr* program_headers,
+    int nheader) {
+  typedef typename ElfClass::Phdr Phdr;
+
+  // For non-PIC executables (e_type == ET_EXEC), the load address is
+  // the start address of the first PT_LOAD segment.  (ELF requires
+  // the segments to be sorted by load address.)  For PIC executables
+  // and dynamic libraries (e_type == ET_DYN), this address will
+  // normally be zero.
+  for (int i = 0; i < nheader; ++i) {
+    const Phdr& header = program_headers[i];
+    if (header.p_type == PT_LOAD)
+      return header.p_vaddr;
+  }
+  return 0;
+}
+
+template<typename ElfClass>
+bool LoadSymbols(const string& obj_file,
+                 const bool big_endian,
+                 const typename ElfClass::Ehdr* elf_header,
+                 const bool read_gnu_debug_link,
+                 LoadSymbolsInfo<ElfClass>* info,
+                 SecMap* smap,
+                 void* rx_avma, size_t rx_size,
+                 UniqueStringUniverse* usu,
+                 void (*log)(const char*)) {
+  typedef typename ElfClass::Phdr Phdr;
+  typedef typename ElfClass::Shdr Shdr;
+
+  char buf[500];
+  SprintfLiteral(buf, "LoadSymbols: BEGIN   %s\n", obj_file.c_str());
+  buf[sizeof(buf)-1] = 0;
+  log(buf);
+
+  // This is how the text bias is calculated.
+  // BEGIN CALCULATE BIAS
+  uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
+      GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
+      elf_header->e_phnum);
+  uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
+  SprintfLiteral(buf,
+           "LoadSymbols:   rx_avma=%llx, text_bias=%llx",
+           (unsigned long long int)(uintptr_t)rx_avma,
+           (unsigned long long int)text_bias);
+  buf[sizeof(buf)-1] = 0;
+  log(buf);
+  // END CALCULATE BIAS
+
+  const Shdr* sections =
+      GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
+  const Shdr* section_names = sections + elf_header->e_shstrndx;
+  const char* names =
+      GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
+  const char *names_end = names + section_names->sh_size;
+  bool found_usable_info = false;
+
+  // Dwarf Call Frame Information (CFI) is actually independent from
+  // the other DWARF debugging information, and can be used alone.
+  const Shdr* dwarf_cfi_section =
+      FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
+                                     sections, names, names_end,
+                                     elf_header->e_shnum);
+  if (dwarf_cfi_section) {
+    // Ignore the return value of this function; even without call frame
+    // information, the other debugging information could be perfectly
+    // useful.
+    info->LoadedSection(".debug_frame");
+    bool result =
+        LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
+                               dwarf_cfi_section, false, 0, 0, big_endian,
+                               smap, text_bias, usu, log);
+    found_usable_info = found_usable_info || result;
+    if (result)
+      log("LoadSymbols:   read CFI from .debug_frame");
+  }
+
+  // Linux C++ exception handling information can also provide
+  // unwinding data.
+  const Shdr* eh_frame_section =
+      FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
+                                     sections, names, names_end,
+                                     elf_header->e_shnum);
+  if (eh_frame_section) {
+    // Pointers in .eh_frame data may be relative to the base addresses of
+    // certain sections. Provide those sections if present.
+    const Shdr* got_section =
+        FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
+                                       sections, names, names_end,
+                                       elf_header->e_shnum);
+    const Shdr* text_section =
+        FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
+                                       sections, names, names_end,
+                                       elf_header->e_shnum);
+    info->LoadedSection(".eh_frame");
+    // As above, ignore the return value of this function.
+    bool result =
+        LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
+                               eh_frame_section, true,
+                               got_section, text_section, big_endian,
+                               smap, text_bias, usu, log);
+    found_usable_info = found_usable_info || result;
+    if (result)
+      log("LoadSymbols:   read CFI from .eh_frame");
+  }
+
+  SprintfLiteral(buf, "LoadSymbols: END     %s\n", obj_file.c_str());
+  buf[sizeof(buf)-1] = 0;
+  log(buf);
+
+  return found_usable_info;
+}
+
+// Return the breakpad symbol file identifier for the architecture of
+// ELF_HEADER.
+template<typename ElfClass>
+const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
+  typedef typename ElfClass::Half Half;
+  Half arch = elf_header->e_machine;
+  switch (arch) {
+    case EM_386:        return "x86";
+    case EM_ARM:        return "arm";
+    case EM_MIPS:       return "mips";
+    case EM_PPC64:      return "ppc64";
+    case EM_PPC:        return "ppc";
+    case EM_S390:       return "s390";
+    case EM_SPARC:      return "sparc";
+    case EM_SPARCV9:    return "sparcv9";
+    case EM_X86_64:     return "x86_64";
+    default: return NULL;
+  }
+}
+
+// Format the Elf file identifier in IDENTIFIER as a UUID with the
+// dashes removed.
+string FormatIdentifier(unsigned char identifier[16]) {
+  char identifier_str[40];
+  lul::FileID::ConvertIdentifierToString(
+      identifier,
+      identifier_str,
+      sizeof(identifier_str));
+  string id_no_dash;
+  for (int i = 0; identifier_str[i] != '\0'; ++i)
+    if (identifier_str[i] != '-')
+      id_no_dash += identifier_str[i];
+  // Add an extra "0" by the end.  PDB files on Windows have an 'age'
+  // number appended to the end of the file identifier; this isn't
+  // really used or necessary on other platforms, but be consistent.
+  id_no_dash += '0';
+  return id_no_dash;
+}
+
+// Return the non-directory portion of FILENAME: the portion after the
+// last slash, or the whole filename if there are no slashes.
+string BaseFileName(const string &filename) {
+  // Lots of copies!  basename's behavior is less than ideal.
+  char *c_filename = strdup(filename.c_str());
+  string base = basename(c_filename);
+  free(c_filename);
+  return base;
+}
+
+template<typename ElfClass>
+bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
+                            const string& obj_filename,
+                            const vector<string>& debug_dirs,
+                            SecMap* smap, void* rx_avma, size_t rx_size,
+                            UniqueStringUniverse* usu,
+                            void (*log)(const char*)) {
+  typedef typename ElfClass::Ehdr Ehdr;
+
+  unsigned char identifier[16];
+  if (!lul
+      ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
+    fprintf(stderr, "%s: unable to generate file identifier\n",
+            obj_filename.c_str());
+    return false;
+  }
+
+  const char *architecture = ElfArchitecture<ElfClass>(elf_header);
+  if (!architecture) {
+    fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
+            obj_filename.c_str(), elf_header->e_machine);
+    return false;
+  }
+
+  // Figure out what endianness this file is.
+  bool big_endian;
+  if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
+    return false;
+
+  string name = BaseFileName(obj_filename);
+  string os = "Linux";
+  string id = FormatIdentifier(identifier);
+
+  LoadSymbolsInfo<ElfClass> info(debug_dirs);
+  if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
+                             !debug_dirs.empty(), &info,
+                             smap, rx_avma, rx_size, usu, log)) {
+    const string debuglink_file = info.debuglink_file();
+    if (debuglink_file.empty())
+      return false;
+
+    // Load debuglink ELF file.
+    fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
+    MmapWrapper debug_map_wrapper;
+    Ehdr* debug_elf_header = NULL;
+    if (!LoadELF(debuglink_file, &debug_map_wrapper,
+                 reinterpret_cast<void**>(&debug_elf_header)))
+      return false;
+    // Sanity checks to make sure everything matches up.
+    const char *debug_architecture =
+        ElfArchitecture<ElfClass>(debug_elf_header);
+    if (!debug_architecture) {
+      fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
+              debuglink_file.c_str(), debug_elf_header->e_machine);
+      return false;
+    }
+    if (strcmp(architecture, debug_architecture)) {
+      fprintf(stderr, "%s with ELF machine architecture %s does not match "
+              "%s with ELF architecture %s\n",
+              debuglink_file.c_str(), debug_architecture,
+              obj_filename.c_str(), architecture);
+      return false;
+    }
+
+    bool debug_big_endian;
+    if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
+      return false;
+    if (debug_big_endian != big_endian) {
+      fprintf(stderr, "%s and %s does not match in endianness\n",
+              obj_filename.c_str(), debuglink_file.c_str());
+      return false;
+    }
+
+    if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
+                               debug_elf_header, false, &info,
+                               smap, rx_avma, rx_size, usu, log)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace (anon)
+
+
+namespace lul {
+
+bool ReadSymbolDataInternal(const uint8_t* obj_file,
+                            const string& obj_filename,
+                            const vector<string>& debug_dirs,
+                            SecMap* smap, void* rx_avma, size_t rx_size,
+                            UniqueStringUniverse* usu,
+                            void (*log)(const char*)) {
+
+  if (!IsValidElf(obj_file)) {
+    fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
+    return false;
+  }
+
+  int elfclass = ElfClass(obj_file);
+  if (elfclass == ELFCLASS32) {
+    return ReadSymbolDataElfClass<ElfClass32>(
+        reinterpret_cast<const Elf32_Ehdr*>(obj_file),
+        obj_filename, debug_dirs, smap, rx_avma, rx_size, usu, log);
+  }
+  if (elfclass == ELFCLASS64) {
+    return ReadSymbolDataElfClass<ElfClass64>(
+        reinterpret_cast<const Elf64_Ehdr*>(obj_file),
+        obj_filename, debug_dirs, smap, rx_avma, rx_size, usu, log);
+  }
+
+  return false;
+}
+
+bool ReadSymbolData(const string& obj_file,
+                    const vector<string>& debug_dirs,
+                    SecMap* smap, void* rx_avma, size_t rx_size,
+                    UniqueStringUniverse* usu,
+                    void (*log)(const char*)) {
+  MmapWrapper map_wrapper;
+  void* elf_header = NULL;
+  if (!LoadELF(obj_file, &map_wrapper, &elf_header))
+    return false;
+
+  return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
+                                obj_file, debug_dirs,
+                                smap, rx_avma, rx_size, usu, log);
+}
+
+
+namespace {
+
+template<typename ElfClass>
+void FindElfClassSection(const char *elf_base,
+                         const char *section_name,
+                         typename ElfClass::Word section_type,
+                         const void **section_start,
+                         int *section_size) {
+  typedef typename ElfClass::Ehdr Ehdr;
+  typedef typename ElfClass::Shdr Shdr;
+
+  MOZ_ASSERT(elf_base);
+  MOZ_ASSERT(section_start);
+  MOZ_ASSERT(section_size);
+
+  MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
+
+  const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+  MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
+
+  const Shdr* sections =
+    GetOffset<ElfClass,Shdr>(elf_header, elf_header->e_shoff);
+  const Shdr* section_names = sections + elf_header->e_shstrndx;
+  const char* names =
+    GetOffset<ElfClass,char>(elf_header, section_names->sh_offset);
+  const char *names_end = names + section_names->sh_size;
+
+  const Shdr* section =
+    FindElfSectionByName<ElfClass>(section_name, section_type,
+                                   sections, names, names_end,
+                                   elf_header->e_shnum);
+
+  if (section != NULL && section->sh_size > 0) {
+    *section_start = elf_base + section->sh_offset;
+    *section_size = section->sh_size;
+  }
+}
+
+template<typename ElfClass>
+void FindElfClassSegment(const char *elf_base,
+                         typename ElfClass::Word segment_type,
+                         const void **segment_start,
+                         int *segment_size) {
+  typedef typename ElfClass::Ehdr Ehdr;
+  typedef typename ElfClass::Phdr Phdr;
+
+  MOZ_ASSERT(elf_base);
+  MOZ_ASSERT(segment_start);
+  MOZ_ASSERT(segment_size);
+
+  MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
+
+  const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
+  MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
+
+  const Phdr* phdrs =
+    GetOffset<ElfClass,Phdr>(elf_header, elf_header->e_phoff);
+
+  for (int i = 0; i < elf_header->e_phnum; ++i) {
+    if (phdrs[i].p_type == segment_type) {
+      *segment_start = elf_base + phdrs[i].p_offset;
+      *segment_size = phdrs[i].p_filesz;
+      return;
+    }
+  }
+}
+
+}  // namespace (anon)
+
+bool IsValidElf(const void* elf_base) {
+  return strncmp(reinterpret_cast<const char*>(elf_base),
+                 ELFMAG, SELFMAG) == 0;
+}
+
+int ElfClass(const void* elf_base) {
+  const ElfW(Ehdr)* elf_header =
+    reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
+
+  return elf_header->e_ident[EI_CLASS];
+}
+
+bool FindElfSection(const void *elf_mapped_base,
+                    const char *section_name,
+                    uint32_t section_type,
+                    const void **section_start,
+                    int *section_size,
+                    int *elfclass) {
+  MOZ_ASSERT(elf_mapped_base);
+  MOZ_ASSERT(section_start);
+  MOZ_ASSERT(section_size);
+
+  *section_start = NULL;
+  *section_size = 0;
+
+  if (!IsValidElf(elf_mapped_base))
+    return false;
+
+  int cls = ElfClass(elf_mapped_base);
+  if (elfclass) {
+    *elfclass = cls;
+  }
+
+  const char* elf_base =
+    static_cast<const char*>(elf_mapped_base);
+
+  if (cls == ELFCLASS32) {
+    FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
+                                    section_start, section_size);
+    return *section_start != NULL;
+  } else if (cls == ELFCLASS64) {
+    FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
+                                    section_start, section_size);
+    return *section_start != NULL;
+  }
+
+  return false;
+}
+
+bool FindElfSegment(const void *elf_mapped_base,
+                    uint32_t segment_type,
+                    const void **segment_start,
+                    int *segment_size,
+                    int *elfclass) {
+  MOZ_ASSERT(elf_mapped_base);
+  MOZ_ASSERT(segment_start);
+  MOZ_ASSERT(segment_size);
+
+  *segment_start = NULL;
+  *segment_size = 0;
+
+  if (!IsValidElf(elf_mapped_base))
+    return false;
+
+  int cls = ElfClass(elf_mapped_base);
+  if (elfclass) {
+    *elfclass = cls;
+  }
+
+  const char* elf_base =
+    static_cast<const char*>(elf_mapped_base);
+
+  if (cls == ELFCLASS32) {
+    FindElfClassSegment<ElfClass32>(elf_base, segment_type,
+                                    segment_start, segment_size);
+    return *segment_start != NULL;
+  } else if (cls == ELFCLASS64) {
+    FindElfClassSegment<ElfClass64>(elf_base, segment_type,
+                                    segment_start, segment_size);
+    return *segment_start != NULL;
+  }
+
+  return false;
+}
+
+
+// (derived from)
+// file_id.cc: Return a unique identifier for a file
+//
+// See file_id.h for documentation
+//
+
+// ELF note name and desc are 32-bits word padded.
+#define NOTE_PADDING(a) ((a + 3) & ~3)
+
+// These functions are also used inside the crashed process, so be safe
+// and use the syscall/libc wrappers instead of direct syscalls or libc.
+
+template<typename ElfClass>
+static bool ElfClassBuildIDNoteIdentifier(const void *section, int length,
+                                          uint8_t identifier[kMDGUIDSize]) {
+  typedef typename ElfClass::Nhdr Nhdr;
+
+  const void* section_end = reinterpret_cast<const char*>(section) + length;
+  const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
+  while (reinterpret_cast<const void *>(note_header) < section_end) {
+    if (note_header->n_type == NT_GNU_BUILD_ID)
+      break;
+    note_header = reinterpret_cast<const Nhdr*>(
+                  reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
+                  NOTE_PADDING(note_header->n_namesz) +
+                  NOTE_PADDING(note_header->n_descsz));
+  }
+  if (reinterpret_cast<const void *>(note_header) >= section_end ||
+      note_header->n_descsz == 0) {
+    return false;
+  }
+
+  const char* build_id = reinterpret_cast<const char*>(note_header) +
+    sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
+  // Copy as many bits of the build ID as will fit
+  // into the GUID space.
+  memset(identifier, 0, kMDGUIDSize);
+  memcpy(identifier, build_id,
+         std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
+
+  return true;
+}
+
+// Attempt to locate a .note.gnu.build-id section in an ELF binary
+// and copy as many bytes of it as will fit into |identifier|.
+static bool FindElfBuildIDNote(const void *elf_mapped_base,
+                               uint8_t identifier[kMDGUIDSize]) {
+  void* note_section;
+  int note_size, elfclass;
+  if ((!FindElfSegment(elf_mapped_base, PT_NOTE,
+                       (const void**)&note_section, &note_size, &elfclass) ||
+      note_size == 0)  &&
+      (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
+                       (const void**)&note_section, &note_size, &elfclass) ||
+      note_size == 0)) {
+    return false;
+  }
+
+  if (elfclass == ELFCLASS32) {
+    return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
+                                                     identifier);
+  } else if (elfclass == ELFCLASS64) {
+    return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
+                                                     identifier);
+  }
+
+  return false;
+}
+
+// Attempt to locate the .text section of an ELF binary and generate
+// a simple hash by XORing the first page worth of bytes into |identifier|.
+static bool HashElfTextSection(const void *elf_mapped_base,
+                               uint8_t identifier[kMDGUIDSize]) {
+  void* text_section;
+  int text_size;
+  if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
+                      (const void**)&text_section, &text_size, NULL) ||
+      text_size == 0) {
+    return false;
+  }
+
+  memset(identifier, 0, kMDGUIDSize);
+  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
+  const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
+  while (ptr < ptr_end) {
+    for (unsigned i = 0; i < kMDGUIDSize; i++)
+      identifier[i] ^= ptr[i];
+    ptr += kMDGUIDSize;
+  }
+  return true;
+}
+
+// static
+bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
+                                             uint8_t identifier[kMDGUIDSize]) {
+  // Look for a build id note first.
+  if (FindElfBuildIDNote(base, identifier))
+    return true;
+
+  // Fall back on hashing the first page of the text section.
+  return HashElfTextSection(base, identifier);
+}
+
+// static
+void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
+                                       char* buffer, int buffer_length) {
+  uint8_t identifier_swapped[kMDGUIDSize];
+
+  // Endian-ness swap to match dump processor expectation.
+  memcpy(identifier_swapped, identifier, kMDGUIDSize);
+  uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
+  *data1 = htonl(*data1);
+  uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
+  *data2 = htons(*data2);
+  uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
+  *data3 = htons(*data3);
+
+  int buffer_idx = 0;
+  for (unsigned int idx = 0;
+       (buffer_idx < buffer_length) && (idx < kMDGUIDSize);
+       ++idx) {
+    int hi = (identifier_swapped[idx] >> 4) & 0x0F;
+    int lo = (identifier_swapped[idx]) & 0x0F;
+
+    if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
+      buffer[buffer_idx++] = '-';
+
+    buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
+    buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
+  }
+
+  // NULL terminate
+  buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
+}
+
+}  // namespace lul
diff --git a/tools/profiler/lul/LulElfExt.h b/tools/profiler/lul/LulElfExt.h
new file mode 100644
index 000000000..b127d96d9
--- /dev/null
+++ b/tools/profiler/lul/LulElfExt.h
@@ -0,0 +1,68 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2011, 2012 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/linux/dump_symbols.h
+
+#ifndef LulElfExt_h
+#define LulElfExt_h
+
+// These two functions are the external interface to the
+// ELF/Dwarf/EXIDX reader.
+
+#include "LulMainInt.h"
+
+using lul::SecMap;
+
+namespace lul {
+
+// Find all the unwind information in OBJ_FILE, an ELF executable
+// or shared library, and add it to SMAP.
+bool ReadSymbolData(const std::string& obj_file,
+                    const std::vector<std::string>& debug_dirs,
+                    SecMap* smap,
+                    void* rx_avma, size_t rx_size,
+                    void (*log)(const char*));
+
+// The same as ReadSymbolData, except that OBJ_FILE is assumed to
+// point to a mapped-in image of OBJ_FILENAME.
+bool ReadSymbolDataInternal(const uint8_t* obj_file,
+                            const std::string& obj_filename,
+                            const std::vector<std::string>& debug_dirs,
+                            SecMap* smap,
+                            void* rx_avma, size_t rx_size,
+                            void (*log)(const char*));
+
+}  // namespace lul
+
+#endif // LulElfExt_h
diff --git a/tools/profiler/lul/LulElfInt.h b/tools/profiler/lul/LulElfInt.h
new file mode 100644
index 000000000..899d7d3ee
--- /dev/null
+++ b/tools/profiler/lul/LulElfInt.h
@@ -0,0 +1,234 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+
+// Copyright (c) 2006, 2012, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is derived from the following files in
+// toolkit/crashreporter/google-breakpad:
+//   src/common/android/include/elf.h
+//   src/common/linux/elfutils.h
+//   src/common/linux/file_id.h
+//   src/common/linux/elfutils-inl.h
+
+#ifndef LulElfInt_h
+#define LulElfInt_h
+
+// This header defines functions etc internal to the ELF reader.  It
+// should not be included outside of LulElf.cpp.
+
+#include <elf.h>
+#include <stdlib.h>
+
+#include "mozilla/Assertions.h"
+
+#include "LulPlatformMacros.h"
+
+
+// (derived from)
+// elfutils.h: Utilities for dealing with ELF files.
+//
+
+#if defined(LUL_OS_android)
+
+// From toolkit/crashreporter/google-breakpad/src/common/android/include/elf.h
+// The Android headers don't always define this constant.
+#ifndef EM_X86_64
+#define EM_X86_64  62
+#endif
+
+#ifndef EM_PPC64
+#define EM_PPC64   21
+#endif
+
+#ifndef EM_S390
+#define EM_S390    22
+#endif
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+#define ElfW(type)      _ElfW (Elf, ELFSIZE, type)
+#define _ElfW(e,w,t)    _ElfW_1 (e, w, _##t)
+#define _ElfW_1(e,w,t)  e##w##t
+
+//FIXME
+extern "C" {
+  extern char*  basename(const char*  path);
+};
+#else
+
+# include <link.h>
+#endif
+
+
+namespace lul {
+
+// Traits classes so consumers can write templatized code to deal
+// with specific ELF bits.
+struct ElfClass32 {
+  typedef Elf32_Addr Addr;
+  typedef Elf32_Ehdr Ehdr;
+  typedef Elf32_Nhdr Nhdr;
+  typedef Elf32_Phdr Phdr;
+  typedef Elf32_Shdr Shdr;
+  typedef Elf32_Half Half;
+  typedef Elf32_Off Off;
+  typedef Elf32_Word Word;
+  static const int kClass = ELFCLASS32;
+  static const size_t kAddrSize = sizeof(Elf32_Addr);
+};
+
+struct ElfClass64 {
+  typedef Elf64_Addr Addr;
+  typedef Elf64_Ehdr Ehdr;
+  typedef Elf64_Nhdr Nhdr;
+  typedef Elf64_Phdr Phdr;
+  typedef Elf64_Shdr Shdr;
+  typedef Elf64_Half Half;
+  typedef Elf64_Off Off;
+  typedef Elf64_Word Word;
+  static const int kClass = ELFCLASS64;
+  static const size_t kAddrSize = sizeof(Elf64_Addr);
+};
+
+bool IsValidElf(const void* elf_header);
+int ElfClass(const void* elf_base);
+
+// Attempt to find a section named |section_name| of type |section_type|
+// in the ELF binary data at |elf_mapped_base|. On success, returns true
+// and sets |*section_start| to point to the start of the section data,
+// and |*section_size| to the size of the section's data. If |elfclass|
+// is not NULL, set |*elfclass| to the ELF file class.
+bool FindElfSection(const void *elf_mapped_base,
+                    const char *section_name,
+                    uint32_t section_type,
+                    const void **section_start,
+                    int *section_size,
+                    int *elfclass);
+
+// Internal helper method, exposed for convenience for callers
+// that already have more info.
+template<typename ElfClass>
+const typename ElfClass::Shdr*
+FindElfSectionByName(const char* name,
+                     typename ElfClass::Word section_type,
+                     const typename ElfClass::Shdr* sections,
+                     const char* section_names,
+                     const char* names_end,
+                     int nsection);
+
+// Attempt to find the first segment of type |segment_type| in the ELF
+// binary data at |elf_mapped_base|. On success, returns true and sets
+// |*segment_start| to point to the start of the segment data, and
+// and |*segment_size| to the size of the segment's data. If |elfclass|
+// is not NULL, set |*elfclass| to the ELF file class.
+bool FindElfSegment(const void *elf_mapped_base,
+                    uint32_t segment_type,
+                    const void **segment_start,
+                    int *segment_size,
+                    int *elfclass);
+
+// Convert an offset from an Elf header into a pointer to the mapped
+// address in the current process. Takes an extra template parameter
+// to specify the return type to avoid having to dynamic_cast the
+// result.
+template<typename ElfClass, typename T>
+const T*
+GetOffset(const typename ElfClass::Ehdr* elf_header,
+          typename ElfClass::Off offset);
+
+
+// (derived from)
+// file_id.h: Return a unique identifier for a file
+//
+
+static const size_t kMDGUIDSize = sizeof(MDGUID);
+
+class FileID {
+ public:
+
+  // Load the identifier for the elf file mapped into memory at |base| into
+  // |identifier|.  Return false if the identifier could not be created for the
+  // file.
+  static bool ElfFileIdentifierFromMappedFile(const void* base,
+                                              uint8_t identifier[kMDGUIDSize]);
+
+  // Convert the |identifier| data to a NULL terminated string.  The string will
+  // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE).
+  // The |buffer| should be at least 37 bytes long to receive all of the data
+  // and termination.  Shorter buffers will contain truncated data.
+  static void ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
+                                        char* buffer, int buffer_length);
+};
+
+
+
+template<typename ElfClass, typename T>
+const T* GetOffset(const typename ElfClass::Ehdr* elf_header,
+                   typename ElfClass::Off offset) {
+  return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) +
+                                    offset);
+}
+
+template<typename ElfClass>
+const typename ElfClass::Shdr* FindElfSectionByName(
+    const char* name,
+    typename ElfClass::Word section_type,
+    const typename ElfClass::Shdr* sections,
+    const char* section_names,
+    const char* names_end,
+    int nsection) {
+  MOZ_ASSERT(name != NULL);
+  MOZ_ASSERT(sections != NULL);
+  MOZ_ASSERT(nsection > 0);
+
+  int name_len = strlen(name);
+  if (name_len == 0)
+    return NULL;
+
+  for (int i = 0; i < nsection; ++i) {
+    const char* section_name = section_names + sections[i].sh_name;
+    if (sections[i].sh_type == section_type &&
+        names_end - section_name >= name_len + 1 &&
+        strcmp(name, section_name) == 0) {
+      return sections + i;
+    }
+  }
+  return NULL;
+}
+
+} // namespace lul
+
+
+// And finally, the external interface, offered to LulMain.cpp
+#include "LulElfExt.h"
+
+#endif // LulElfInt_h
diff --git a/tools/profiler/lul/LulMain.cpp b/tools/profiler/lul/LulMain.cpp
new file mode 100644
index 000000000..2e78f03ec
--- /dev/null
+++ b/tools/profiler/lul/LulMain.cpp
@@ -0,0 +1,1963 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LulMain.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <algorithm>  // std::sort
+#include <string>
+
+#include "mozilla/Assertions.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryChecking.h"
+#include "mozilla/Sprintf.h"
+
+#include "LulCommonExt.h"
+#include "LulElfExt.h"
+
+#include "LulMainInt.h"
+
+#include "platform-linux-lul.h"  // for gettid()
+
+// Set this to 1 for verbose logging
+#define DEBUG_MAIN 0
+
+namespace lul {
+
+using std::string;
+using std::vector;
+using std::pair;
+using mozilla::CheckedInt;
+using mozilla::DebugOnly;
+
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+//
+// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
+// Any such function -- and, hence, the transitive closure of those
+// reachable from it -- must not do any dynamic memory allocation.
+// Doing so risks deadlock.  There is exactly one root function for
+// the transitive closure: Lul::Unwind.
+//
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+
+
+////////////////////////////////////////////////////////////////
+// RuleSet                                                    //
+////////////////////////////////////////////////////////////////
+
+static const char* 
+NameOf_DW_REG(int16_t aReg)
+{
+  switch (aReg) {
+    case DW_REG_CFA:       return "cfa";
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+    case DW_REG_INTEL_XBP: return "xbp";
+    case DW_REG_INTEL_XSP: return "xsp";
+    case DW_REG_INTEL_XIP: return "xip";
+#elif defined(LUL_ARCH_arm)
+    case DW_REG_ARM_R7:    return "r7";
+    case DW_REG_ARM_R11:   return "r11";
+    case DW_REG_ARM_R12:   return "r12";
+    case DW_REG_ARM_R13:   return "r13";
+    case DW_REG_ARM_R14:   return "r14";
+    case DW_REG_ARM_R15:   return "r15";
+#else
+# error "Unsupported arch"
+#endif
+    default: return "???";
+  }
+}
+
+string
+LExpr::ShowRule(const char* aNewReg) const
+{
+  char buf[64];
+  string res = string(aNewReg) + "=";
+  switch (mHow) {
+    case UNKNOWN:
+      res += "Unknown";
+      break;
+    case NODEREF:
+      SprintfLiteral(buf, "%s+%d",
+                     NameOf_DW_REG(mReg), (int)mOffset);
+      res += buf;
+      break;
+    case DEREF:
+      SprintfLiteral(buf, "*(%s+%d)",
+                     NameOf_DW_REG(mReg), (int)mOffset);
+      res += buf;
+      break;
+    case PFXEXPR:
+      SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
+      res += buf;
+      break;
+    default:
+      res += "???";
+      break;
+  }
+  return res;
+}
+
+void
+RuleSet::Print(void(*aLog)(const char*)) const
+{
+  char buf[96];
+  SprintfLiteral(buf, "[%llx .. %llx]: let ",
+                 (unsigned long long int)mAddr,
+                 (unsigned long long int)(mAddr + mLen - 1));
+  string res = string(buf);
+  res += mCfaExpr.ShowRule("cfa");
+  res += " in";
+  // For each reg we care about, print the recovery expression.
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+  res += mXipExpr.ShowRule(" RA");
+  res += mXspExpr.ShowRule(" SP");
+  res += mXbpExpr.ShowRule(" BP");
+#elif defined(LUL_ARCH_arm)
+  res += mR15expr.ShowRule(" R15");
+  res += mR7expr .ShowRule(" R7" );
+  res += mR11expr.ShowRule(" R11");
+  res += mR12expr.ShowRule(" R12");
+  res += mR13expr.ShowRule(" R13");
+  res += mR14expr.ShowRule(" R14");
+#else
+# error "Unsupported arch"
+#endif
+  aLog(res.c_str());
+}
+
+LExpr*
+RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
+  switch (aRegno) {
+    case DW_REG_CFA: return &mCfaExpr;
+#   if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+    case DW_REG_INTEL_XIP: return &mXipExpr;
+    case DW_REG_INTEL_XSP: return &mXspExpr;
+    case DW_REG_INTEL_XBP: return &mXbpExpr;
+#   elif defined(LUL_ARCH_arm)
+    case DW_REG_ARM_R15:   return &mR15expr;
+    case DW_REG_ARM_R14:   return &mR14expr;
+    case DW_REG_ARM_R13:   return &mR13expr;
+    case DW_REG_ARM_R12:   return &mR12expr;
+    case DW_REG_ARM_R11:   return &mR11expr;
+    case DW_REG_ARM_R7:    return &mR7expr;
+#   else
+#     error "Unknown arch"
+#   endif
+    default: return nullptr;
+  }
+}
+
+RuleSet::RuleSet()
+{
+  mAddr = 0;
+  mLen  = 0;
+  // The only other fields are of type LExpr and those are initialised
+  // by LExpr::LExpr().
+}
+
+
+////////////////////////////////////////////////////////////////
+// SecMap                                                     //
+////////////////////////////////////////////////////////////////
+
+// See header file LulMainInt.h for comments about invariants.
+
+SecMap::SecMap(void(*aLog)(const char*))
+  : mSummaryMinAddr(1)
+  , mSummaryMaxAddr(0)
+  , mUsable(true)
+  , mLog(aLog)
+{}
+
+SecMap::~SecMap() {
+  mRuleSets.clear();
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+RuleSet*
+SecMap::FindRuleSet(uintptr_t ia) {
+  // Binary search mRuleSets to find one that brackets |ia|.
+  // lo and hi need to be signed, else the loop termination tests
+  // don't work properly.  Note that this works correctly even when
+  // mRuleSets.size() == 0.
+
+  // Can't do this until the array has been sorted and preened.
+  MOZ_ASSERT(mUsable);
+
+  long int lo = 0;
+  long int hi = (long int)mRuleSets.size() - 1;
+  while (true) {
+    // current unsearched space is from lo to hi, inclusive.
+    if (lo > hi) {
+      // not found
+      return nullptr;
+    }
+    long int  mid         = lo + ((hi - lo) / 2);
+    RuleSet*  mid_ruleSet = &mRuleSets[mid];
+    uintptr_t mid_minAddr = mid_ruleSet->mAddr;
+    uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
+    if (ia < mid_minAddr) { hi = mid-1; continue; }
+    if (ia > mid_maxAddr) { lo = mid+1; continue; }
+    MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
+    return mid_ruleSet;
+  }
+  // NOTREACHED
+}
+
+// Add a RuleSet to the collection.  The rule is copied in.  Calling
+// this makes the map non-searchable.
+void
+SecMap::AddRuleSet(const RuleSet* rs) {
+  mUsable = false;
+  mRuleSets.push_back(*rs);
+}
+
+// Add a PfxInstr to the vector of such instrs, and return the index
+// in the vector.  Calling this makes the map non-searchable.
+uint32_t
+SecMap::AddPfxInstr(PfxInstr pfxi) {
+  mUsable = false;
+  mPfxInstrs.push_back(pfxi);
+  return mPfxInstrs.size() - 1;
+}
+
+
+static bool
+CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
+  return rs1.mAddr < rs2.mAddr;
+}
+
+// Prepare the map for searching.  Completely remove any which don't
+// fall inside the specified range [start, +len).
+void
+SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen)
+{
+  if (mRuleSets.empty()) {
+    return;
+  }
+
+  MOZ_ASSERT(aLen > 0);
+  if (aLen == 0) {
+    // This should never happen.
+    mRuleSets.clear();
+    return;
+  }
+
+  // Sort by start addresses.
+  std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
+
+  // Detect any entry not completely contained within [start, +len).
+  // Set its length to zero, so that the next pass will remove it.
+  for (size_t i = 0; i < mRuleSets.size(); ++i) {
+    RuleSet* rs = &mRuleSets[i];
+    if (rs->mLen > 0 &&
+        (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
+      rs->mLen = 0;
+    }
+  }
+
+  // Iteratively truncate any overlaps and remove any zero length
+  // entries that might result, or that may have been present
+  // initially.  Unless the input is seriously screwy, this is
+  // expected to iterate only once.
+  while (true) {
+    size_t i;
+    size_t n = mRuleSets.size();
+    size_t nZeroLen = 0;
+
+    if (n == 0) {
+      break;
+    }
+
+    for (i = 1; i < n; ++i) {
+      RuleSet* prev = &mRuleSets[i-1];
+      RuleSet* here = &mRuleSets[i];
+      MOZ_ASSERT(prev->mAddr <= here->mAddr);
+      if (prev->mAddr + prev->mLen > here->mAddr) {
+        prev->mLen = here->mAddr - prev->mAddr;
+      }
+      if (prev->mLen == 0)
+        nZeroLen++;
+    }
+
+    if (mRuleSets[n-1].mLen == 0) {
+      nZeroLen++;
+    }
+
+    // At this point, the entries are in-order and non-overlapping.
+    // If none of them are zero-length, we are done.
+    if (nZeroLen == 0) {
+      break;
+    }
+
+    // Slide back the entries to remove the zero length ones.
+    size_t j = 0;  // The write-point.
+    for (i = 0; i < n; ++i) {
+      if (mRuleSets[i].mLen == 0) {
+        continue;
+      }
+      if (j != i) mRuleSets[j] = mRuleSets[i];
+      ++j;
+    }
+    MOZ_ASSERT(i == n);
+    MOZ_ASSERT(nZeroLen <= n);
+    MOZ_ASSERT(j == n - nZeroLen);
+    while (nZeroLen > 0) {
+      mRuleSets.pop_back();
+      nZeroLen--;
+    }
+
+    MOZ_ASSERT(mRuleSets.size() == j);
+  }
+
+  size_t n = mRuleSets.size();
+
+#ifdef DEBUG
+  // Do a final check on the rules: their address ranges must be
+  // ascending, non overlapping, non zero sized.
+  if (n > 0) {
+    MOZ_ASSERT(mRuleSets[0].mLen > 0);
+    for (size_t i = 1; i < n; ++i) {
+      RuleSet* prev = &mRuleSets[i-1];
+      RuleSet* here = &mRuleSets[i];
+      MOZ_ASSERT(prev->mAddr < here->mAddr);
+      MOZ_ASSERT(here->mLen > 0);
+      MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
+    }
+  }
+#endif
+
+  // Set the summary min and max address values.
+  if (n == 0) {
+    // Use the values defined in comments in the class declaration.
+    mSummaryMinAddr = 1;
+    mSummaryMaxAddr = 0;
+  } else {
+    mSummaryMinAddr = mRuleSets[0].mAddr;
+    mSummaryMaxAddr = mRuleSets[n-1].mAddr + mRuleSets[n-1].mLen - 1;
+  }
+  char buf[150];
+  SprintfLiteral(buf,
+                 "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
+                 (int)n, (unsigned long long int)mSummaryMinAddr,
+                         (unsigned long long int)mSummaryMaxAddr);
+  buf[sizeof(buf)-1] = 0;
+  mLog(buf);
+
+  // Is now usable for binary search.
+  mUsable = true;
+
+  if (0) {
+    mLog("\nRulesets after preening\n");
+    for (size_t i = 0; i < mRuleSets.size(); ++i) {
+      mRuleSets[i].Print(mLog);
+      mLog("\n");
+    }
+    mLog("\n");
+  }
+}
+
+bool SecMap::IsEmpty() {
+  return mRuleSets.empty();
+}
+
+
+////////////////////////////////////////////////////////////////
+// SegArray                                                   //
+////////////////////////////////////////////////////////////////
+
+// A SegArray holds a set of address ranges that together exactly
+// cover an address range, with no overlaps or holes.  Each range has
+// an associated value, which in this case has been specialised to be
+// a simple boolean.  The representation is kept to minimal canonical
+// form in which adjacent ranges with the same associated value are
+// merged together.  Each range is represented by a |struct Seg|.
+//
+// SegArrays are used to keep track of which parts of the address
+// space are known to contain instructions.
+class SegArray {
+
+ public:
+  void add(uintptr_t lo, uintptr_t hi, bool val) {
+    if (lo > hi) {
+      return;
+    }
+    split_at(lo);
+    if (hi < UINTPTR_MAX) {
+      split_at(hi+1);
+    }
+    std::vector<Seg>::size_type iLo, iHi, i;
+    iLo = find(lo);
+    iHi = find(hi);
+    for (i = iLo; i <= iHi; ++i) {
+      mSegs[i].val = val;
+    }
+    preen();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min,
+                              /*OUT*/uintptr_t* rx_max, uintptr_t addr) {
+    std::vector<Seg>::size_type i = find(addr);
+    if (!mSegs[i].val) {
+      return false;
+    }
+    *rx_min = mSegs[i].lo;
+    *rx_max = mSegs[i].hi;
+    return true;
+  }
+
+  SegArray() {
+    Seg s(0, UINTPTR_MAX, false);
+    mSegs.push_back(s);
+  }
+
+ private:
+  struct Seg {
+    Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
+    uintptr_t lo;
+    uintptr_t hi;
+    bool val;
+  };
+
+  void preen() {
+    for (std::vector<Seg>::iterator iter = mSegs.begin();
+         iter < mSegs.end()-1;
+         ++iter) {
+      if (iter[0].val != iter[1].val) {
+        continue;
+      }
+      iter[0].hi = iter[1].hi;
+      mSegs.erase(iter+1);
+      // Back up one, so as not to miss an opportunity to merge
+      // with the entry after this one.
+      --iter;
+    }
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  std::vector<Seg>::size_type find(uintptr_t a) {
+    long int lo = 0;
+    long int hi = (long int)mSegs.size();
+    while (true) {
+      // The unsearched space is lo .. hi inclusive.
+      if (lo > hi) {
+        // Not found.  This can't happen.
+        return (std::vector<Seg>::size_type)(-1);
+      }
+      long int  mid    = lo + ((hi - lo) / 2);
+      uintptr_t mid_lo = mSegs[mid].lo;
+      uintptr_t mid_hi = mSegs[mid].hi;
+      if (a < mid_lo) { hi = mid-1; continue; }
+      if (a > mid_hi) { lo = mid+1; continue; }
+      return (std::vector<Seg>::size_type)mid;
+    }
+  }
+
+  void split_at(uintptr_t a) {
+    std::vector<Seg>::size_type i = find(a);
+    if (mSegs[i].lo == a) {
+      return;
+    }
+    mSegs.insert( mSegs.begin()+i+1, mSegs[i] );
+    mSegs[i].hi = a-1;
+    mSegs[i+1].lo = a;
+  }
+
+  void show() {
+    printf("<< %d entries:\n", (int)mSegs.size());
+    for (std::vector<Seg>::iterator iter = mSegs.begin();
+         iter < mSegs.end();
+         ++iter) {
+      printf("  %016llx  %016llx  %s\n",
+             (unsigned long long int)(*iter).lo,
+             (unsigned long long int)(*iter).hi,
+             (*iter).val ? "true" : "false");
+    }
+    printf(">>\n");
+  }
+
+  std::vector<Seg> mSegs;
+};
+
+
+////////////////////////////////////////////////////////////////
+// PriMap                                                     //
+////////////////////////////////////////////////////////////////
+
+class PriMap {
+ public:
+  explicit PriMap(void (*aLog)(const char*))
+    : mLog(aLog)
+  {}
+
+  ~PriMap() {
+    for (std::vector<SecMap*>::iterator iter = mSecMaps.begin();
+         iter != mSecMaps.end();
+         ++iter) {
+      delete *iter;
+    }
+    mSecMaps.clear();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  pair<const RuleSet*, const vector<PfxInstr>*>
+  Lookup(uintptr_t ia)
+  {
+    SecMap* sm = FindSecMap(ia);
+    return pair<const RuleSet*, const vector<PfxInstr>*>
+             (sm ? sm->FindRuleSet(ia) : nullptr,
+              sm ? sm->GetPfxInstrs() : nullptr);
+  }
+
+  // Add a secondary map.  No overlaps allowed w.r.t. existing
+  // secondary maps.
+  void AddSecMap(SecMap* aSecMap) {
+    // We can't add an empty SecMap to the PriMap.  But that's OK
+    // since we'd never be able to find anything in it anyway.
+    if (aSecMap->IsEmpty()) {
+      return;
+    }
+
+    // Iterate through the SecMaps and find the right place for this
+    // one.  At the same time, ensure that the in-order
+    // non-overlapping invariant is preserved (and, generally, holds).
+    // FIXME: this gives a cost that is O(N^2) in the total number of
+    // shared objects in the system.  ToDo: better.
+    MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
+
+    size_t num_secMaps = mSecMaps.size();
+    uintptr_t i;
+    for (i = 0; i < num_secMaps; ++i) {
+      SecMap* sm_i = mSecMaps[i];
+      MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
+      if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
+        // |aSecMap| needs to be inserted immediately before mSecMaps[i].
+        break;
+      }
+    }
+    MOZ_ASSERT(i <= num_secMaps);
+    if (i == num_secMaps) {
+      // It goes at the end.
+      mSecMaps.push_back(aSecMap);
+    } else {
+      std::vector<SecMap*>::iterator iter = mSecMaps.begin() + i;
+      mSecMaps.insert(iter, aSecMap);
+    }
+    char buf[100];
+    SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
+                   (int)mSecMaps.size());
+    buf[sizeof(buf)-1] = 0;
+    mLog(buf);
+  }
+
+  // Remove and delete any SecMaps in the mapping, that intersect
+  // with the specified address range.
+  void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
+    MOZ_ASSERT(avma_min <= avma_max);
+    size_t num_secMaps = mSecMaps.size();
+    if (num_secMaps > 0) {
+      intptr_t i;
+      // Iterate from end to start over the vector, so as to ensure
+      // that the special case where |avma_min| and |avma_max| denote
+      // the entire address space, can be completed in time proportional
+      // to the number of elements in the map.
+      for (i = (intptr_t)num_secMaps-1; i >= 0; i--) {
+        SecMap* sm_i = mSecMaps[i];
+        if (sm_i->mSummaryMaxAddr < avma_min ||
+            avma_max < sm_i->mSummaryMinAddr) {
+          // There's no overlap.  Move on.
+          continue;
+        }
+        // We need to remove mSecMaps[i] and slide all those above it
+        // downwards to cover the hole.
+        mSecMaps.erase(mSecMaps.begin() + i);
+        delete sm_i;
+      }
+    }
+  }
+
+  // Return the number of currently contained SecMaps.
+  size_t CountSecMaps() {
+    return mSecMaps.size();
+  }
+
+  // Assess heuristically whether the given address is an instruction
+  // immediately following a call instruction.
+  // RUNS IN NO-MALLOC CONTEXT
+  bool MaybeIsReturnPoint(TaggedUWord aInstrAddr, SegArray* aSegArray) {
+    if (!aInstrAddr.Valid()) {
+      return false;
+    }
+
+    uintptr_t ia = aInstrAddr.Value();
+
+    // Assume that nobody would be crazy enough to put code in the
+    // first or last page.
+    if (ia < 4096 || ((uintptr_t)(-ia)) < 4096) {
+      return false;
+    }
+
+    // See if it falls inside a known r-x mapped area.  Poking around
+    // outside such places risks segfaulting.
+    uintptr_t insns_min, insns_max;
+    bool b = aSegArray->getBoundingCodeSegment(&insns_min, &insns_max, ia);
+    if (!b) {
+      // no code (that we know about) at this address
+      return false;
+    }
+
+    // |ia| falls within an r-x range.  So we can
+    // safely poke around in [insns_min, insns_max].
+
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+    // Is the previous instruction recognisably a CALL?  This is
+    // common for the 32- and 64-bit versions, except for the
+    // simm32(%rip) case, which is 64-bit only.
+    //
+    // For all other cases, the 64 bit versions are either identical
+    // to the 32 bit versions, or have an optional extra leading REX.W
+    // byte (0x41).  Since the extra 0x41 is optional we have to
+    // ignore it, with the convenient result that the same matching
+    // logic works for both 32- and 64-bit cases.
+
+    uint8_t* p = (uint8_t*)ia;
+#   if defined(LUL_ARCH_x64)
+    // CALL simm32(%rip)  == FF15 simm32
+    if (ia - 6 >= insns_min && p[-6] == 0xFF && p[-5] == 0x15) {
+      return true;
+    }
+#   endif
+    // CALL rel32  == E8 rel32  (both 32- and 64-bit)
+    if (ia - 5 >= insns_min && p[-5] == 0xE8) {
+      return true;
+    }
+    // CALL *%eax .. CALL *%edi  ==   FFD0 ..   FFD7  (32-bit)
+    // CALL *%rax .. CALL *%rdi  ==   FFD0 ..   FFD7  (64-bit)
+    // CALL *%r8  .. CALL *%r15  == 41FFD0 .. 41FFD7  (64-bit)
+    if (ia - 2 >= insns_min &&
+        p[-2] == 0xFF && p[-1] >= 0xD0 && p[-1] <= 0xD7) {
+      return true;
+    }
+    // Almost all of the remaining cases that occur in practice are
+    // of the form CALL *simm8(reg) or CALL *simm32(reg).
+    //
+    // 64 bit cases:
+    //
+    // call  *simm8(%rax)         FF50   simm8
+    // call  *simm8(%rcx)         FF51   simm8
+    // call  *simm8(%rdx)         FF52   simm8
+    // call  *simm8(%rbx)         FF53   simm8
+    // call  *simm8(%rsp)         FF5424 simm8
+    // call  *simm8(%rbp)         FF55   simm8
+    // call  *simm8(%rsi)         FF56   simm8
+    // call  *simm8(%rdi)         FF57   simm8
+    //
+    // call  *simm8(%r8)        41FF50   simm8
+    // call  *simm8(%r9)        41FF51   simm8
+    // call  *simm8(%r10)       41FF52   simm8
+    // call  *simm8(%r11)       41FF53   simm8
+    // call  *simm8(%r12)       41FF5424 simm8
+    // call  *simm8(%r13)       41FF55   simm8
+    // call  *simm8(%r14)       41FF56   simm8
+    // call  *simm8(%r15)       41FF57   simm8
+    //
+    // call  *simm32(%rax)        FF90   simm32
+    // call  *simm32(%rcx)        FF91   simm32
+    // call  *simm32(%rdx)        FF92   simm32
+    // call  *simm32(%rbx)        FF93   simm32
+    // call  *simm32(%rsp)        FF9424 simm32
+    // call  *simm32(%rbp)        FF95   simm32
+    // call  *simm32(%rsi)        FF96   simm32
+    // call  *simm32(%rdi)        FF97   simm32
+    //
+    // call  *simm32(%r8)       41FF90   simm32
+    // call  *simm32(%r9)       41FF91   simm32
+    // call  *simm32(%r10)      41FF92   simm32
+    // call  *simm32(%r11)      41FF93   simm32
+    // call  *simm32(%r12)      41FF9424 simm32
+    // call  *simm32(%r13)      41FF95   simm32
+    // call  *simm32(%r14)      41FF96   simm32
+    // call  *simm32(%r15)      41FF97   simm32
+    //
+    // 32 bit cases:
+    //
+    // call  *simm8(%eax)         FF50   simm8
+    // call  *simm8(%ecx)         FF51   simm8
+    // call  *simm8(%edx)         FF52   simm8
+    // call  *simm8(%ebx)         FF53   simm8
+    // call  *simm8(%esp)         FF5424 simm8
+    // call  *simm8(%ebp)         FF55   simm8
+    // call  *simm8(%esi)         FF56   simm8
+    // call  *simm8(%edi)         FF57   simm8
+    //
+    // call  *simm32(%eax)        FF90   simm32
+    // call  *simm32(%ecx)        FF91   simm32
+    // call  *simm32(%edx)        FF92   simm32
+    // call  *simm32(%ebx)        FF93   simm32
+    // call  *simm32(%esp)        FF9424 simm32
+    // call  *simm32(%ebp)        FF95   simm32
+    // call  *simm32(%esi)        FF96   simm32
+    // call  *simm32(%edi)        FF97   simm32
+    if (ia - 3 >= insns_min &&
+        p[-3] == 0xFF &&
+        (p[-2] >= 0x50 && p[-2] <= 0x57 && p[-2] != 0x54)) {
+      // imm8 case, not including %esp/%rsp
+      return true;
+    }
+    if (ia - 4 >= insns_min &&
+        p[-4] == 0xFF && p[-3] == 0x54 && p[-2] == 0x24) {
+      // imm8 case for %esp/%rsp
+      return true;
+    }
+    if (ia - 6 >= insns_min &&
+        p[-6] == 0xFF &&
+        (p[-5] >= 0x90 && p[-5] <= 0x97 && p[-5] != 0x94)) {
+      // imm32 case, not including %esp/%rsp
+      return true;
+    }
+    if (ia - 7 >= insns_min &&
+        p[-7] == 0xFF && p[-6] == 0x94 && p[-5] == 0x24) {
+      // imm32 case for %esp/%rsp
+      return true;
+    }
+
+#elif defined(LUL_ARCH_arm)
+    if (ia & 1) {
+      uint16_t w0 = 0, w1 = 0;
+      // The return address has its lowest bit set, indicating a return
+      // to Thumb code.
+      ia &= ~(uintptr_t)1;
+      if (ia - 2 >= insns_min && ia - 1 <= insns_max) {
+        w1 = *(uint16_t*)(ia - 2);
+      }
+      if (ia - 4 >= insns_min && ia - 1 <= insns_max) {
+        w0 = *(uint16_t*)(ia - 4);
+      }
+      // Is it a 32-bit Thumb call insn?
+      // BL  simm26 (Encoding T1)
+      if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
+        return true;
+      }
+      // BLX simm26 (Encoding T2)
+      if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
+        return true;
+      }
+      // Other possible cases:
+      // (BLX Rm, Encoding T1).
+      // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.)
+      // 0100 0111 1 Rm 000
+    } else {
+      // Returning to ARM code.
+      uint32_t a0 = 0;
+      if ((ia & 3) == 0 && ia - 4 >= insns_min && ia - 1 <= insns_max) {
+        a0 = *(uint32_t*)(ia - 4);
+      }
+      // Leading E forces unconditional only -- fix.  It could be
+      // anything except F, which is the deprecated NV code.
+      // BL simm26 (Encoding A1)
+      if ((a0 & 0xFF000000) == 0xEB000000) {
+        return true;
+      }
+      // Other possible cases:
+      // BLX simm26 (Encoding A2)
+      //if ((a0 & 0xFE000000) == 0xFA000000)
+      //  return true;
+      // BLX (register) (A1): BLX <c> <Rm>
+      // cond 0001 0010 1111 1111 1111 0011 Rm
+      // again, cond can be anything except NV (0xF)
+    }
+
+#else
+# error "Unsupported arch"
+#endif
+
+    // Not an insn we recognise.
+    return false;
+  }
+
+ private:
+  // RUNS IN NO-MALLOC CONTEXT
+  SecMap* FindSecMap(uintptr_t ia) {
+    // Binary search mSecMaps to find one that brackets |ia|.
+    // lo and hi need to be signed, else the loop termination tests
+    // don't work properly.
+    long int lo = 0;
+    long int hi = (long int)mSecMaps.size() - 1;
+    while (true) {
+      // current unsearched space is from lo to hi, inclusive.
+      if (lo > hi) {
+        // not found
+        return nullptr;
+      }
+      long int  mid         = lo + ((hi - lo) / 2);
+      SecMap*   mid_secMap  = mSecMaps[mid];
+      uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
+      uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
+      if (ia < mid_minAddr) { hi = mid-1; continue; }
+      if (ia > mid_maxAddr) { lo = mid+1; continue; }
+      MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
+      return mid_secMap;
+    }
+    // NOTREACHED
+  }
+
+ private:
+  // sorted array of per-object ranges, non overlapping, non empty
+  std::vector<SecMap*> mSecMaps;
+
+  // a logging sink, for debugging.
+  void (*mLog)(const char*);
+};
+
+
+////////////////////////////////////////////////////////////////
+// LUL                                                        //
+////////////////////////////////////////////////////////////////
+
+#define LUL_LOG(_str) \
+  do { \
+    char buf[200]; \
+    SprintfLiteral(buf, \
+                   "LUL: pid %d tid %d lul-obj %p: %s", \
+                   getpid(), gettid(), this, (_str));   \
+    buf[sizeof(buf)-1] = 0; \
+    mLog(buf); \
+  } while (0)
+
+LUL::LUL(void (*aLog)(const char*))
+  : mLog(aLog)
+  , mAdminMode(true)
+  , mAdminThreadId(gettid())
+  , mPriMap(new PriMap(aLog))
+  , mSegArray(new SegArray())
+  , mUSU(new UniqueStringUniverse())
+{
+  LUL_LOG("LUL::LUL: Created object");
+}
+
+
+LUL::~LUL()
+{
+  LUL_LOG("LUL::~LUL: Destroyed object");
+  delete mPriMap;
+  delete mSegArray;
+  mLog = nullptr;
+  delete mUSU;
+}
+
+
+void
+LUL::MaybeShowStats()
+{
+  // This is racey in the sense that it can't guarantee that
+  //   n_new == n_new_Context + n_new_CFI + n_new_Scanned
+  // if it should happen that mStats is updated by some other thread
+  // in between computation of n_new and n_new_{Context,CFI,Scanned}.
+  // But it's just stats printing, so we don't really care.
+  uint32_t n_new = mStats - mStatsPrevious;
+  if (n_new >= 5000) {
+    uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
+    uint32_t n_new_CFI     = mStats.mCFI     - mStatsPrevious.mCFI;
+    uint32_t n_new_Scanned = mStats.mScanned - mStatsPrevious.mScanned;
+    mStatsPrevious = mStats;
+    char buf[200];
+    SprintfLiteral(buf,
+                   "LUL frame stats: TOTAL %5u"
+                   "    CTX %4u    CFI %4u    SCAN %4u",
+                   n_new, n_new_Context, n_new_CFI, n_new_Scanned);
+    buf[sizeof(buf)-1] = 0;
+    mLog(buf);
+  }
+}
+
+
+void
+LUL::EnableUnwinding()
+{
+  LUL_LOG("LUL::EnableUnwinding");
+  // Don't assert for Admin mode here.  That is, tolerate a call here
+  // if we are already in Unwinding mode.
+  MOZ_ASSERT(gettid() == mAdminThreadId);
+
+  mAdminMode = false;
+}
+
+
+void
+LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize,
+                    const char* aFileName, const void* aMappedImage)
+{
+  MOZ_ASSERT(mAdminMode);
+  MOZ_ASSERT(gettid() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[200];
+  SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
+                 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
+                 aFileName);
+  buf[sizeof(buf)-1] = 0;
+  mLog(buf);
+
+  // Ignore obviously-stupid notifications.
+  if (aSize > 0) {
+
+    // Here's a new mapping, for this object.
+    SecMap* smap = new SecMap(mLog);
+
+    // Read CFI or EXIDX unwind data into |smap|.
+    if (!aMappedImage) {
+      (void)lul::ReadSymbolData(
+              string(aFileName), std::vector<string>(), smap,
+              (void*)aRXavma, aSize, mUSU, mLog);
+    } else {
+      (void)lul::ReadSymbolDataInternal(
+              (const uint8_t*)aMappedImage,
+              string(aFileName), std::vector<string>(), smap,
+              (void*)aRXavma, aSize, mUSU, mLog);
+    }
+
+    mLog("NotifyMap .. preparing entries\n");
+
+    smap->PrepareRuleSets(aRXavma, aSize);
+
+    SprintfLiteral(buf,
+                   "NotifyMap got %lld entries\n", (long long int)smap->Size());
+    buf[sizeof(buf)-1] = 0;
+    mLog(buf);
+
+    // Add it to the primary map (the top level set of mapped objects).
+    mPriMap->AddSecMap(smap);
+
+    // Tell the segment array about the mapping, so that the stack
+    // scan and __kernel_syscall mechanisms know where valid code is.
+    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
+  }
+}
+
+
+void
+LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize)
+{
+  MOZ_ASSERT(mAdminMode);
+  MOZ_ASSERT(gettid() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[200];
+  SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
+                   (unsigned long long int)aRXavma, (unsigned long long int)aSize);
+  buf[sizeof(buf)-1] = 0;
+  mLog(buf);
+
+  // Ignore obviously-stupid notifications.
+  if (aSize > 0) {
+    // Tell the segment array about the mapping, so that the stack
+    // scan and __kernel_syscall mechanisms know where valid code is.
+    mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
+  }
+}
+
+
+void
+LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax)
+{
+  MOZ_ASSERT(mAdminMode);
+  MOZ_ASSERT(gettid() == mAdminThreadId);
+
+  mLog(":\n");
+  char buf[100];
+  SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
+                 (unsigned long long int)aRXavmaMin,
+                 (unsigned long long int)aRXavmaMax);
+  buf[sizeof(buf)-1] = 0;
+  mLog(buf);
+
+  MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
+
+  // Remove from the primary map, any secondary maps that intersect
+  // with the address range.  Also delete the secondary maps.
+  mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
+
+  // Tell the segment array that the address range no longer
+  // contains valid code.
+  mSegArray->add(aRXavmaMin, aRXavmaMax, false);
+
+  SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
+                 (int)mPriMap->CountSecMaps());
+  buf[sizeof(buf)-1] = 0;
+  mLog(buf);
+}
+
+
+size_t
+LUL::CountMappings()
+{
+  MOZ_ASSERT(mAdminMode);
+  MOZ_ASSERT(gettid() == mAdminThreadId);
+
+  return mPriMap->CountSecMaps();
+}
+
+
+// RUNS IN NO-MALLOC CONTEXT
+static
+TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg)
+{
+  if (!aAddr.Valid()) {
+    return TaggedUWord();
+  }
+
+  // Lower limit check.  |aAddr.Value()| is the lowest requested address
+  // and |aStackImg->mStartAvma| is the lowest address we actually have,
+  // so the comparison is straightforward.
+  if (aAddr.Value() < aStackImg->mStartAvma) {
+    return TaggedUWord();
+  }
+
+  // Upper limit check.  We must compute the highest requested address
+  // and the highest address we actually have, but being careful to
+  // avoid overflow.  In particular if |aAddr| is 0xFFF...FFF or the
+  // 3/7 values below that, then we will get overflow.  See bug #1245477.
+  typedef CheckedInt<uintptr_t> CheckedUWord;
+  CheckedUWord highest_requested_plus_one
+    = CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
+  CheckedUWord highest_available_plus_one
+    = CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
+  if (!highest_requested_plus_one.isValid()     // overflow?
+      || !highest_available_plus_one.isValid()  // overflow?
+      || (highest_requested_plus_one.value()
+          > highest_available_plus_one.value())) { // in range?
+    return TaggedUWord();
+  }
+
+  return TaggedUWord(*(uintptr_t*)(aStackImg->mContents + aAddr.Value()
+                                   - aStackImg->mStartAvma));
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static
+TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
+                        TaggedUWord aCFA)
+{
+  switch (aReg) {
+    case DW_REG_CFA:       return aCFA;
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+    case DW_REG_INTEL_XBP: return aOldRegs->xbp;
+    case DW_REG_INTEL_XSP: return aOldRegs->xsp;
+    case DW_REG_INTEL_XIP: return aOldRegs->xip;
+#elif defined(LUL_ARCH_arm)
+    case DW_REG_ARM_R7:    return aOldRegs->r7;
+    case DW_REG_ARM_R11:   return aOldRegs->r11;
+    case DW_REG_ARM_R12:   return aOldRegs->r12;
+    case DW_REG_ARM_R13:   return aOldRegs->r13;
+    case DW_REG_ARM_R14:   return aOldRegs->r14;
+    case DW_REG_ARM_R15:   return aOldRegs->r15;
+#else
+# error "Unsupported arch"
+#endif
+    default: MOZ_ASSERT(0); return TaggedUWord();
+  }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+// See prototype for comment.
+TaggedUWord EvaluatePfxExpr(int32_t start,
+                            const UnwindRegs* aOldRegs,
+                            TaggedUWord aCFA, const StackImage* aStackImg,
+                            const vector<PfxInstr>& aPfxInstrs)
+{
+  // A small evaluation stack, and a stack pointer, which points to
+  // the highest numbered in-use element.
+  const int N_STACK = 10;
+  TaggedUWord stack[N_STACK];
+  int stackPointer = -1;
+  for (int i = 0; i < N_STACK; i++)
+    stack[i] = TaggedUWord();
+
+# define PUSH(_tuw) \
+    do { \
+      if (stackPointer >= N_STACK-1) goto fail; /* overflow */ \
+      stack[++stackPointer] = (_tuw); \
+    } while (0)
+
+# define POP(_lval) \
+    do { \
+      if (stackPointer < 0) goto fail; /* underflow */ \
+     _lval = stack[stackPointer--]; \
+   } while (0)
+
+  // Cursor in the instruction sequence.
+  size_t curr = start + 1;
+
+  // Check the start point is sane.
+  size_t nInstrs = aPfxInstrs.size();
+  if (start < 0 || (size_t)start >= nInstrs)
+    goto fail;
+
+  {
+    // The instruction sequence must start with PX_Start.  If not,
+    // something is seriously wrong.
+    PfxInstr first = aPfxInstrs[start];
+    if (first.mOpcode != PX_Start)
+      goto fail;
+
+    // Push the CFA on the stack to start with (or not), as required by
+    // the original DW_OP_*expression* CFI.
+    if (first.mOperand != 0)
+      PUSH(aCFA);
+  }
+
+  while (true) {
+    if (curr >= nInstrs)
+      goto fail; // ran off the end of the sequence
+
+    PfxInstr pfxi = aPfxInstrs[curr++];
+    if (pfxi.mOpcode == PX_End)
+      break; // we're done
+
+    switch (pfxi.mOpcode) {
+      case PX_Start:
+        // This should appear only at the start of the sequence.
+        goto fail;
+      case PX_End:
+        // We just took care of that, so we shouldn't see it again.
+        MOZ_ASSERT(0);
+        goto fail;
+      case PX_SImm32:
+        PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
+        break;
+      case PX_DwReg: {
+        DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
+        MOZ_ASSERT(reg != DW_REG_CFA);
+        PUSH(EvaluateReg(reg, aOldRegs, aCFA));
+        break;
+      }
+      case PX_Deref: {
+        TaggedUWord addr;
+        POP(addr);
+        PUSH(DerefTUW(addr, aStackImg));
+        break;
+      }
+      case PX_Add: {
+        TaggedUWord x, y;
+        POP(x); POP(y); PUSH(y + x);
+        break;
+      }
+      case PX_Sub: {
+        TaggedUWord x, y;
+        POP(x); POP(y); PUSH(y - x);
+        break;
+      }
+      case PX_And: {
+        TaggedUWord x, y;
+        POP(x); POP(y); PUSH(y & x);
+        break;
+      }
+      case PX_Or: {
+        TaggedUWord x, y;
+        POP(x); POP(y); PUSH(y | x);
+        break;
+      }
+      case PX_CmpGES: {
+        TaggedUWord x, y;
+        POP(x); POP(y); PUSH(y.CmpGEs(x));
+        break;
+      }
+      case PX_Shl: {
+        TaggedUWord x, y;
+        POP(x); POP(y); PUSH(y << x);
+        break;
+      }
+      default:
+        MOZ_ASSERT(0);
+        goto fail;
+    }
+  } // while (true)
+
+  // Evaluation finished.  The top value on the stack is the result.
+  if (stackPointer >= 0) {
+    return stack[stackPointer];
+  }
+  // Else fall through
+
+ fail:
+  return TaggedUWord();
+
+# undef PUSH
+# undef POP
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs,
+                                TaggedUWord aCFA, const StackImage* aStackImg,
+                                const vector<PfxInstr>* aPfxInstrs) const
+{
+  switch (mHow) {
+    case UNKNOWN:
+      return TaggedUWord();
+    case NODEREF: {
+      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
+      tuw = tuw + TaggedUWord((intptr_t)mOffset);
+      return tuw;
+    }
+    case DEREF: {
+      TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
+      tuw = tuw + TaggedUWord((intptr_t)mOffset);
+      return DerefTUW(tuw, aStackImg);
+    }
+    case PFXEXPR: {
+      MOZ_ASSERT(aPfxInstrs);
+      if (!aPfxInstrs) {
+        return TaggedUWord();
+      }
+      return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
+    }
+    default:
+      MOZ_ASSERT(0);
+      return TaggedUWord();
+  }
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+static
+void UseRuleSet(/*MOD*/UnwindRegs* aRegs,
+                const StackImage* aStackImg, const RuleSet* aRS,
+                const vector<PfxInstr>* aPfxInstrs)
+{
+  // Take a copy of regs, since we'll need to refer to the old values
+  // whilst computing the new ones.
+  UnwindRegs old_regs = *aRegs;
+
+  // Mark all the current register values as invalid, so that the
+  // caller can see, on our return, which ones have been computed
+  // anew.  If we don't even manage to compute a new PC value, then
+  // the caller will have to abandon the unwind.
+  // FIXME: Create and use instead: aRegs->SetAllInvalid();
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+  aRegs->xbp = TaggedUWord();
+  aRegs->xsp = TaggedUWord();
+  aRegs->xip = TaggedUWord();
+#elif defined(LUL_ARCH_arm)
+  aRegs->r7  = TaggedUWord();
+  aRegs->r11 = TaggedUWord();
+  aRegs->r12 = TaggedUWord();
+  aRegs->r13 = TaggedUWord();
+  aRegs->r14 = TaggedUWord();
+  aRegs->r15 = TaggedUWord();
+#else
+#  error "Unsupported arch"
+#endif
+
+  // This is generally useful.
+  const TaggedUWord inval = TaggedUWord();
+
+  // First, compute the CFA.
+  TaggedUWord cfa
+    = aRS->mCfaExpr.EvaluateExpr(&old_regs,
+                                 inval/*old cfa*/, aStackImg, aPfxInstrs);
+
+  // If we didn't manage to compute the CFA, well .. that's ungood,
+  // but keep going anyway.  It'll be OK provided none of the register
+  // value rules mention the CFA.  In any case, compute the new values
+  // for each register that we're tracking.
+
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+  aRegs->xbp
+    = aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->xsp
+    = aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->xip
+    = aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#elif defined(LUL_ARCH_arm)
+  aRegs->r7
+    = aRS->mR7expr .EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r11
+    = aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r12
+    = aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r13
+    = aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r14
+    = aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+  aRegs->r15
+    = aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
+#else
+# error "Unsupported arch"
+#endif
+
+  // We're done.  Any regs for which we didn't manage to compute a
+  // new value will now be marked as invalid.
+}
+
+// RUNS IN NO-MALLOC CONTEXT
+void
+LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
+            /*OUT*/uintptr_t* aFrameSPs,
+            /*OUT*/size_t* aFramesUsed, 
+            /*OUT*/size_t* aScannedFramesAcquired,
+            size_t aFramesAvail,
+            size_t aScannedFramesAllowed,
+            UnwindRegs* aStartRegs, StackImage* aStackImg)
+{
+  MOZ_ASSERT(!mAdminMode);
+
+  /////////////////////////////////////////////////////////
+  // BEGIN UNWIND
+
+  *aFramesUsed = 0;
+
+  UnwindRegs  regs          = *aStartRegs;
+  TaggedUWord last_valid_sp = TaggedUWord();
+
+  // Stack-scan control
+  unsigned int n_scanned_frames      = 0;  // # s-s frames recovered so far
+  static const int NUM_SCANNED_WORDS = 50; // max allowed scan length
+
+  while (true) {
+
+    if (DEBUG_MAIN) {
+      char buf[300];
+      mLog("\n");
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+      SprintfLiteral(buf,
+                     "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
+                     (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
+                     (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
+                     (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
+      buf[sizeof(buf)-1] = 0;
+      mLog(buf);
+#elif defined(LUL_ARCH_arm)
+      SprintfLiteral(buf,
+                     "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
+                     "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
+                     (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
+                     (int)regs.r7.Valid(),  (unsigned long long int)regs.r7.Value(),
+                     (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
+                     (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
+                     (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
+                     (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
+      buf[sizeof(buf)-1] = 0;
+      mLog(buf);
+#else
+# error "Unsupported arch"
+#endif
+    }
+
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+    TaggedUWord ia = regs.xip;
+    TaggedUWord sp = regs.xsp;
+#elif defined(LUL_ARCH_arm)
+    TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
+    TaggedUWord sp = regs.r13;
+#else
+# error "Unsupported arch"
+#endif
+
+    if (*aFramesUsed >= aFramesAvail) {
+      break;
+    }
+
+    // If we don't have a valid value for the PC, give up.
+    if (!ia.Valid()) {
+      break;
+    }
+
+    // If this is the innermost frame, record the SP value, which
+    // presumably is valid.  If this isn't the innermost frame, and we
+    // have a valid SP value, check that its SP value isn't less that
+    // the one we've seen so far, so as to catch potential SP value
+    // cycles.
+    if (*aFramesUsed == 0) {
+      last_valid_sp = sp;
+    } else {
+      MOZ_ASSERT(last_valid_sp.Valid());
+      if (sp.Valid()) {
+        if (sp.Value() < last_valid_sp.Value()) {
+          // Hmm, SP going in the wrong direction.  Let's stop.
+          break;
+        }
+        // Remember where we got to.
+        last_valid_sp = sp;
+      }
+    }
+
+    // For the innermost frame, the IA value is what we need.  For all
+    // other frames, it's actually the return address, so back up one
+    // byte so as to get it into the calling instruction.
+    aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
+    aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
+    (*aFramesUsed)++;
+
+    // Find the RuleSet for the current IA, if any.  This will also
+    // query the backing (secondary) maps if it isn't found in the
+    // thread-local cache.
+
+    // If this isn't the innermost frame, back up into the calling insn.
+    if (*aFramesUsed > 1) {
+      ia = ia + TaggedUWord((uintptr_t)(-1));
+    }
+
+    pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs
+      = mPriMap->Lookup(ia.Value());
+    const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
+    const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
+
+    if (DEBUG_MAIN) {
+      char buf[100];
+      SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
+                     (unsigned long long int)ia.Value(), ruleset);
+      buf[sizeof(buf)-1] = 0;
+      mLog(buf);
+    }
+
+    /////////////////////////////////////////////
+    ////
+    // On 32 bit x86-linux, syscalls are often done via the VDSO
+    // function __kernel_vsyscall, which doesn't have a corresponding
+    // object that we can read debuginfo from.  That effectively kills
+    // off all stack traces for threads blocked in syscalls.  Hence
+    // special-case by looking at the code surrounding the program
+    // counter.
+    //
+    // 0xf7757420 <__kernel_vsyscall+0>:	push   %ecx
+    // 0xf7757421 <__kernel_vsyscall+1>:	push   %edx
+    // 0xf7757422 <__kernel_vsyscall+2>:	push   %ebp
+    // 0xf7757423 <__kernel_vsyscall+3>:	mov    %esp,%ebp
+    // 0xf7757425 <__kernel_vsyscall+5>:	sysenter
+    // 0xf7757427 <__kernel_vsyscall+7>:	nop
+    // 0xf7757428 <__kernel_vsyscall+8>:	nop
+    // 0xf7757429 <__kernel_vsyscall+9>:	nop
+    // 0xf775742a <__kernel_vsyscall+10>:	nop
+    // 0xf775742b <__kernel_vsyscall+11>:	nop
+    // 0xf775742c <__kernel_vsyscall+12>:	nop
+    // 0xf775742d <__kernel_vsyscall+13>:	nop
+    // 0xf775742e <__kernel_vsyscall+14>:	int    $0x80
+    // 0xf7757430 <__kernel_vsyscall+16>:	pop    %ebp
+    // 0xf7757431 <__kernel_vsyscall+17>:	pop    %edx
+    // 0xf7757432 <__kernel_vsyscall+18>:	pop    %ecx
+    // 0xf7757433 <__kernel_vsyscall+19>:	ret
+    //
+    // In cases where the sampled thread is blocked in a syscall, its
+    // program counter will point at "pop %ebp".  Hence we look for
+    // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
+    // the corresponding register-recovery actions are:
+    //    new_ebp = *(old_esp + 0)
+    //    new eip = *(old_esp + 12)
+    //    new_esp = old_esp + 16
+    //
+    // It may also be the case that the program counter points two
+    // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
+    // the case where the syscall has been restarted but the thread
+    // hasn't been rescheduled.  The code below doesn't handle that;
+    // it could easily be made to.
+    //
+#if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux)
+    if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
+      uintptr_t insns_min, insns_max;
+      uintptr_t eip = ia.Value();
+      bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
+      if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
+        uint8_t* eipC = (uint8_t*)eip;
+        if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
+            eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
+          TaggedUWord sp_plus_0  = sp;
+          TaggedUWord sp_plus_12 = sp;
+          TaggedUWord sp_plus_16 = sp;
+          sp_plus_12 = sp_plus_12 + TaggedUWord(12);
+          sp_plus_16 = sp_plus_16 + TaggedUWord(16);
+          TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
+          TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
+          TaggedUWord new_esp = sp_plus_16;
+          if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
+            regs.xbp = new_ebp;
+            regs.xip = new_eip;
+            regs.xsp = new_esp;
+            continue;
+          }
+        }
+      }
+    }
+#endif
+    ////
+    /////////////////////////////////////////////
+
+    // So, do we have a ruleset for this address?  If so, use it now.
+    if (ruleset) {
+
+      if (DEBUG_MAIN) {
+        ruleset->Print(mLog); mLog("\n");
+      }
+      // Use the RuleSet to compute the registers for the previous
+      // frame.  |regs| is modified in-place.
+      UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
+
+    } else {
+
+      // There's no RuleSet for the specified address, so see if
+      // it's possible to get anywhere by stack-scanning.
+
+      // Use stack scanning frugally.
+      if (n_scanned_frames++ >= aScannedFramesAllowed) {
+        break;
+      }
+
+      // We can't scan the stack without a valid, aligned stack pointer.
+      if (!sp.IsAligned()) {
+        break;
+      }
+
+      bool scan_succeeded = false;
+      for (int i = 0; i < NUM_SCANNED_WORDS; ++i) {
+        TaggedUWord aWord = DerefTUW(sp, aStackImg);
+        // aWord is something we fished off the stack.  It should be
+        // valid, unless we overran the stack bounds.
+        if (!aWord.Valid()) {
+          break;
+        }
+
+        // Now, does aWord point inside a text section and immediately
+        // after something that looks like a call instruction?
+        if (mPriMap->MaybeIsReturnPoint(aWord, mSegArray)) {
+          // Yes it does.  Update the unwound registers heuristically,
+          // using the same schemes as Breakpad does.
+          scan_succeeded = true;
+          (*aScannedFramesAcquired)++;
+
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+          // The same logic applies for the 32- and 64-bit cases.
+          // Register names of the form xsp etc refer to (eg) esp in
+          // the 32-bit case and rsp in the 64-bit case.
+#         if defined(LUL_ARCH_x64)
+          const int wordSize = 8;
+#         else
+          const int wordSize = 4;
+#         endif
+          // The return address -- at XSP -- will have been pushed by
+          // the CALL instruction.  So the caller's XSP value
+          // immediately before and after that CALL instruction is the
+          // word above XSP.
+          regs.xsp = sp + TaggedUWord(wordSize);
+
+          // aWord points at the return point, so back up one byte
+          // to put it in the calling instruction.
+          regs.xip = aWord + TaggedUWord((uintptr_t)(-1));
+
+          // Computing a new value from the frame pointer is more tricky.
+          if (regs.xbp.Valid() &&
+              sp.Valid() && regs.xbp.Value() == sp.Value() - wordSize) {
+            // One possibility is that the callee begins with the standard
+            // preamble "push %xbp; mov %xsp, %xbp".  In which case, the
+            // (1) caller's XBP value will be at the word below XSP, and
+            // (2) the current (callee's) XBP will point at that word:
+            regs.xbp = DerefTUW(regs.xbp, aStackImg);
+          } else if (regs.xbp.Valid() &&
+                     sp.Valid() && regs.xbp.Value() >= sp.Value() + wordSize) {
+            // If that didn't work out, maybe the callee didn't change
+            // XBP, so it still holds the caller's value.  For that to
+            // be plausible, XBP will need to have a value at least
+            // higher than XSP since that holds the purported return
+            // address.  In which case do nothing, since XBP already
+            // holds the "right" value.
+          } else {
+            // Mark XBP as invalid, so that subsequent unwind iterations
+            // don't assume it holds valid data.
+            regs.xbp = TaggedUWord();
+          }
+
+          // Move on to the next word up the stack
+          sp = sp + TaggedUWord(wordSize);
+
+#elif defined(LUL_ARCH_arm)
+          // Set all registers to be undefined, except for SP(R13) and
+          // PC(R15).
+
+          // aWord points either at the return point, if returning to
+          // ARM code, or one insn past the return point if returning
+          // to Thumb code.  In both cases, aWord-2 is guaranteed to
+          // fall within the calling instruction.
+          regs.r15 = aWord + TaggedUWord((uintptr_t)(-2));
+
+          // Make SP be the word above the location where the return
+          // address was found.
+          regs.r13 = sp + TaggedUWord(4);
+
+          // All other regs are undefined.
+          regs.r7 = regs.r11 = regs.r12 = regs.r14 = TaggedUWord();
+
+          // Move on to the next word up the stack
+          sp = sp + TaggedUWord(4);
+
+#else
+# error "Unknown plat"
+#endif
+
+          break;
+        }
+
+      } // for (int i = 0; i < NUM_SCANNED_WORDS; i++)
+
+      // We tried to make progress by scanning the stack, but failed.
+      // So give up -- fall out of the top level unwind loop.
+      if (!scan_succeeded) {
+        break;
+      }
+    }
+
+  } // top level unwind loop
+
+  // END UNWIND
+  /////////////////////////////////////////////////////////
+}
+
+
+////////////////////////////////////////////////////////////////
+// LUL Unit Testing                                           //
+////////////////////////////////////////////////////////////////
+
+static const int LUL_UNIT_TEST_STACK_SIZE = 16384;
+
+// This function is innermost in the test call sequence.  It uses LUL
+// to unwind, and compares the result with the sequence specified in
+// the director string.  These need to agree in order for the test to
+// pass.  In order not to screw up the results, this function needs
+// to have a not-very big stack frame, since we're only presenting
+// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
+// that chunk unavoidably includes the frame for this function.
+//
+// This function must not be inlined into its callers.  Doing so will
+// cause the expected-vs-actual backtrace consistency checking to
+// fail.  Prints summary results to |aLUL|'s logging sink and also
+// returns a boolean indicating whether or not the test passed.
+static __attribute__((noinline))
+bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring)
+{
+  // Get hold of the current unwind-start registers.
+  UnwindRegs startRegs;
+  memset(&startRegs, 0, sizeof(startRegs));
+#if defined(LUL_PLAT_x64_linux)
+  volatile uintptr_t block[3];
+  MOZ_ASSERT(sizeof(block) == 24);
+  __asm__ __volatile__(
+    "leaq 0(%%rip), %%r15"   "\n\t"
+    "movq %%r15, 0(%0)"      "\n\t"
+    "movq %%rsp, 8(%0)"      "\n\t"
+    "movq %%rbp, 16(%0)"     "\n"
+    : : "r"(&block[0]) : "memory", "r15"
+  );
+  startRegs.xip = TaggedUWord(block[0]);
+  startRegs.xsp = TaggedUWord(block[1]);
+  startRegs.xbp = TaggedUWord(block[2]);
+  const uintptr_t REDZONE_SIZE = 128;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android)
+  volatile uintptr_t block[3];
+  MOZ_ASSERT(sizeof(block) == 12);
+  __asm__ __volatile__(
+    ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/  "\n\t"
+    "popl %%edi"             "\n\t"
+    "movl %%edi, 0(%0)"      "\n\t"
+    "movl %%esp, 4(%0)"      "\n\t"
+    "movl %%ebp, 8(%0)"      "\n"
+    : : "r"(&block[0]) : "memory", "edi"
+  );
+  startRegs.xip = TaggedUWord(block[0]);
+  startRegs.xsp = TaggedUWord(block[1]);
+  startRegs.xbp = TaggedUWord(block[2]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#elif defined(LUL_PLAT_arm_android)
+  volatile uintptr_t block[6];
+  MOZ_ASSERT(sizeof(block) == 24);
+  __asm__ __volatile__(
+    "mov r0, r15"            "\n\t"
+    "str r0,  [%0, #0]"      "\n\t"
+    "str r14, [%0, #4]"      "\n\t"
+    "str r13, [%0, #8]"      "\n\t"
+    "str r12, [%0, #12]"     "\n\t"
+    "str r11, [%0, #16]"     "\n\t"
+    "str r7,  [%0, #20]"     "\n"
+    : : "r"(&block[0]) : "memory", "r0"
+  );
+  startRegs.r15 = TaggedUWord(block[0]);
+  startRegs.r14 = TaggedUWord(block[1]);
+  startRegs.r13 = TaggedUWord(block[2]);
+  startRegs.r12 = TaggedUWord(block[3]);
+  startRegs.r11 = TaggedUWord(block[4]);
+  startRegs.r7  = TaggedUWord(block[5]);
+  const uintptr_t REDZONE_SIZE = 0;
+  uintptr_t start = block[1] - REDZONE_SIZE;
+#else
+# error "Unsupported platform"
+#endif
+
+  // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
+  // stack.
+  uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
+  uintptr_t ws  = sizeof(void*);
+  start &= ~(ws-1);
+  end   &= ~(ws-1);
+  uintptr_t nToCopy = end - start;
+  if (nToCopy > lul::N_STACK_BYTES) {
+    nToCopy = lul::N_STACK_BYTES;
+  }
+  MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
+  StackImage* stackImg = new StackImage();
+  stackImg->mLen       = nToCopy;
+  stackImg->mStartAvma = start;
+  if (nToCopy > 0) {
+    MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
+    memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
+  }
+
+  // Unwind it.
+  const int MAX_TEST_FRAMES = 64;
+  uintptr_t framePCs[MAX_TEST_FRAMES];
+  uintptr_t frameSPs[MAX_TEST_FRAMES];
+  size_t framesAvail = mozilla::ArrayLength(framePCs);
+  size_t framesUsed  = 0;
+  size_t scannedFramesAllowed = 0;
+  size_t scannedFramesAcquired = 0;
+  aLUL->Unwind( &framePCs[0], &frameSPs[0],
+                &framesUsed, &scannedFramesAcquired,
+                framesAvail, scannedFramesAllowed,
+                &startRegs, stackImg );
+
+  delete stackImg;
+
+  //if (0) {
+  //  // Show what we have.
+  //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
+  //  for (size_t i = 0; i < framesUsed; i++) {
+  //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
+  //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
+  //  }
+  //  fprintf(stderr, "\n");
+  //}
+
+  // Check to see if there's a consistent binding between digits in
+  // the director string ('1' .. '8') and the PC values acquired by
+  // the unwind.  If there isn't, the unwinding has failed somehow.
+  uintptr_t binding[8];  // binding for '1' .. binding for '8'
+  memset((void*)binding, 0, sizeof(binding));
+
+  // The general plan is to work backwards along the director string
+  // and forwards along the framePCs array.  Doing so corresponds to
+  // working outwards from the innermost frame of the recursive test set.
+  const char* cursor = dstring;
+
+  // Find the end.  This leaves |cursor| two bytes past the first
+  // character we want to look at -- see comment below.
+  while (*cursor) cursor++;
+
+  // Counts the number of consistent frames.
+  size_t nConsistent = 0;
+
+  // Iterate back to the start of the director string.  The starting
+  // points are a bit complex.  We can't use framePCs[0] because that
+  // contains the PC in this frame (above).  We can't use framePCs[1]
+  // because that will contain the PC at return point in the recursive
+  // test group (TestFn[1-8]) for their call "out" to this function,
+  // GetAndCheckStackTrace.  Although LUL will compute a correct
+  // return address, that will not be the same return address as for a
+  // recursive call out of the the function to another function in the
+  // group.  Hence we can only start consistency checking at
+  // framePCs[2].
+  //
+  // To be consistent, then, we must ignore the last element in the
+  // director string as that corresponds to framePCs[1].  Hence the
+  // start points are: framePCs[2] and the director string 2 bytes
+  // before the terminating zero.
+  //
+  // Also as a result of this, the number of consistent frames counted
+  // will always be one less than the length of the director string
+  // (not including its terminating zero).
+  size_t frameIx;
+  for (cursor = cursor-2, frameIx = 2;
+       cursor >= dstring && frameIx < framesUsed;
+       cursor--, frameIx++) {
+    char      c  = *cursor;
+    uintptr_t pc = framePCs[frameIx];
+    // If this doesn't hold, the director string is ill-formed.
+    MOZ_ASSERT(c >= '1' && c <= '8');
+    int n = ((int)c) - ((int)'1');
+    if (binding[n] == 0) {
+      // There's no binding for |c| yet, so install |pc| and carry on.
+      binding[n] = pc;
+      nConsistent++;
+      continue;
+    }
+    // There's a pre-existing binding for |c|.  Check it's consistent.
+    if (binding[n] != pc) {
+      // Not consistent.  Give up now.
+      break;
+    }
+    // Consistent.  Keep going.
+    nConsistent++;
+  }
+
+  // So, did we succeed?
+  bool passed = nConsistent+1 == strlen(dstring);
+
+  // Show the results.
+  char buf[200];
+  SprintfLiteral(buf, "LULUnitTest:   dstring = %s\n", dstring);
+  buf[sizeof(buf)-1] = 0;
+  aLUL->mLog(buf);
+  SprintfLiteral(buf,
+                 "LULUnitTest:     %d consistent, %d in dstring: %s\n",
+                 (int)nConsistent, (int)strlen(dstring),
+                 passed ? "PASS" : "FAIL");
+  buf[sizeof(buf)-1] = 0;
+  aLUL->mLog(buf);
+
+  return passed;
+}
+
+
+// Macro magic to create a set of 8 mutually recursive functions with
+// varying frame sizes.  These will recurse amongst themselves as
+// specified by |strP|, the directory string, and call
+// GetAndCheckStackTrace when the string becomes empty, passing it the
+// original value of the string.  This checks the result, printing
+// results on |aLUL|'s logging sink, and also returns a boolean
+// indicating whether or not the results are acceptable (correct).
+
+#define DECL_TEST_FN(NAME) \
+  bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
+
+#define GEN_TEST_FN(NAME, FRAMESIZE) \
+  bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
+    volatile char space[FRAMESIZE]; \
+    memset((char*)&space[0], 0, sizeof(space)); \
+    if (*strP == '\0') { \
+      /* We've come to the end of the director string. */ \
+      /* Take a stack snapshot. */ \
+      return GetAndCheckStackTrace(aLUL, strPorig); \
+    } else { \
+      /* Recurse onwards.  This is a bit subtle.  The obvious */ \
+      /* thing to do here is call onwards directly, from within the */ \
+      /* arms of the case statement.  That gives a problem in that */ \
+      /* there will be multiple return points inside each function when */ \
+      /* unwinding, so it will be difficult to check for consistency */ \
+      /* against the director string.  Instead, we make an indirect */ \
+      /* call, so as to guarantee that there is only one call site */ \
+      /* within each function.  This does assume that the compiler */ \
+      /* won't transform it back to the simple direct-call form. */ \
+      /* To discourage it from doing so, the call is bracketed with */ \
+      /* __asm__ __volatile__ sections so as to make it not-movable. */ \
+      bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
+      switch (*strP) { \
+        case '1': nextFn = TestFn1; break; \
+        case '2': nextFn = TestFn2; break; \
+        case '3': nextFn = TestFn3; break; \
+        case '4': nextFn = TestFn4; break; \
+        case '5': nextFn = TestFn5; break; \
+        case '6': nextFn = TestFn6; break; \
+        case '7': nextFn = TestFn7; break; \
+        case '8': nextFn = TestFn8; break; \
+        default:  nextFn = TestFn8; break; \
+      } \
+      __asm__ __volatile__("":::"cc","memory"); \
+      bool passed = nextFn(aLUL, strPorig, strP+1); \
+      __asm__ __volatile__("":::"cc","memory"); \
+      return passed; \
+    } \
+  }
+
+// The test functions are mutually recursive, so it is necessary to
+// declare them before defining them.
+DECL_TEST_FN(TestFn1)
+DECL_TEST_FN(TestFn2)
+DECL_TEST_FN(TestFn3)
+DECL_TEST_FN(TestFn4)
+DECL_TEST_FN(TestFn5)
+DECL_TEST_FN(TestFn6)
+DECL_TEST_FN(TestFn7)
+DECL_TEST_FN(TestFn8)
+
+GEN_TEST_FN(TestFn1, 123)
+GEN_TEST_FN(TestFn2, 456)
+GEN_TEST_FN(TestFn3, 789)
+GEN_TEST_FN(TestFn4, 23)
+GEN_TEST_FN(TestFn5, 47)
+GEN_TEST_FN(TestFn6, 117)
+GEN_TEST_FN(TestFn7, 1)
+GEN_TEST_FN(TestFn8, 99)
+
+
+// This starts the test sequence going.  Call here to generate a
+// sequence of calls as directed by the string |dstring|.  The call
+// sequence will, from its innermost frame, finish by calling
+// GetAndCheckStackTrace() and passing it |dstring|.
+// GetAndCheckStackTrace() will unwind the stack, check consistency
+// of those results against |dstring|, and print a pass/fail message
+// to aLUL's logging sink.  It also updates the counters in *aNTests
+// and aNTestsPassed.
+__attribute__((noinline)) void
+TestUnw(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed,
+        LUL* aLUL, const char* dstring)
+{
+  // Ensure that the stack has at least this much space on it.  This
+  // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
+  // and hand it to LUL.  Safe in the sense that no segfault can
+  // happen because the stack is at least this big.  This is all
+  // somewhat dubious in the sense that a sufficiently clever compiler
+  // (clang, for one) can figure out that space[] is unused and delete
+  // it from the frame.  Hence the somewhat elaborate hoop jumping to
+  // fill it up before the call and to at least appear to use the
+  // value afterwards.
+  int i;
+  volatile char space[LUL_UNIT_TEST_STACK_SIZE];
+  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
+    space[i] = (char)(i & 0x7F);
+  }
+
+  // Really run the test.
+  bool passed = TestFn1(aLUL, dstring, dstring);
+
+  // Appear to use space[], by visiting the value to compute some kind
+  // of checksum, and then (apparently) using the checksum.
+  int sum = 0;
+  for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
+    // If this doesn't fool LLVM, I don't know what will.
+    sum += space[i] - 3*i;
+  }
+  __asm__ __volatile__("" : : "r"(sum));
+
+  // Update the counters.
+  (*aNTests)++;
+  if (passed) {
+    (*aNTestsPassed)++;
+  }
+}
+
+
+void
+RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL)
+{
+  aLUL->mLog(":\n");
+  aLUL->mLog("LULUnitTest: BEGIN\n");
+  *aNTests = *aNTestsPassed = 0;
+  TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
+  TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
+  TestUnw(aNTests, aNTestsPassed, aLUL,
+          "123456781122334455667788777777777777777777777");
+  aLUL->mLog("LULUnitTest: END\n");
+  aLUL->mLog(":\n");
+}
+
+
+} // namespace lul
diff --git a/tools/profiler/lul/LulMain.h b/tools/profiler/lul/LulMain.h
new file mode 100644
index 000000000..0916d1b26
--- /dev/null
+++ b/tools/profiler/lul/LulMain.h
@@ -0,0 +1,397 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulMain_h
+#define LulMain_h
+
+#include "LulPlatformMacros.h"
+#include "mozilla/Atomics.h"
+
+// LUL: A Lightweight Unwind Library.
+// This file provides the end-user (external) interface for LUL.
+
+// Some comments about naming in the implementation.  These are safe
+// to ignore if you are merely using LUL, but are important if you
+// hack on its internals.
+//
+// Debuginfo readers in general have tended to use the word "address"
+// to mean several different things.  This sometimes makes them
+// difficult to understand and maintain.  LUL tries hard to avoid
+// using the word "address" and instead uses the following more
+// precise terms:
+//
+// * SVMA ("Stated Virtual Memory Address"): this is an address of a
+//   symbol (etc) as it is stated in the symbol table, or other
+//   metadata, of an object.  Such values are typically small and
+//   start from zero or thereabouts, unless the object has been
+//   prelinked.
+//
+// * AVMA ("Actual Virtual Memory Address"): this is the address of a
+//   symbol (etc) in a running process, that is, once the associated
+//   object has been mapped into a process.  Such values are typically
+//   much larger than SVMAs, since objects can get mapped arbitrarily
+//   far along the address space.
+//
+// * "Bias": the difference between AVMA and SVMA for a given symbol
+//   (specifically, AVMA - SVMA).  The bias is always an integral
+//   number of pages.  Once we know the bias for a given object's
+//   text section (for example), we can compute the AVMAs of all of
+//   its text symbols by adding the bias to their SVMAs.
+//
+// * "Image address": typically, to read debuginfo from an object we
+//   will temporarily mmap in the file so as to read symbol tables
+//   etc.  Addresses in this temporary mapping are called "Image
+//   addresses".  Note that the temporary mapping is entirely
+//   unrelated to the mappings of the file that the dynamic linker
+//   must perform merely in order to get the program to run.  Hence
+//   image addresses are unrelated to either SVMAs or AVMAs.
+
+
+namespace lul {
+
+// A machine word plus validity tag.
+class TaggedUWord {
+public:
+  // RUNS IN NO-MALLOC CONTEXT
+  // Construct a valid one.
+  explicit TaggedUWord(uintptr_t w)
+    : mValue(w)
+    , mValid(true)
+  {}
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Construct an invalid one.
+  TaggedUWord()
+    : mValue(0)
+    , mValid(false)
+  {}
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator+(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() + rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator-(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() - rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator&(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() & rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator|(TaggedUWord rhs) const {
+    return (Valid() && rhs.Valid()) ? TaggedUWord(Value() | rhs.Value())
+                                    : TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord CmpGEs(TaggedUWord rhs) const {
+    if (Valid() && rhs.Valid()) {
+      intptr_t s1 = (intptr_t)Value();
+      intptr_t s2 = (intptr_t)rhs.Value();
+      return TaggedUWord(s1 >= s2 ? 1 : 0);
+    }
+    return TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord operator<<(TaggedUWord rhs) const {
+    if (Valid() && rhs.Valid()) {
+      uintptr_t shift = rhs.Value();
+      if (shift < 8 * sizeof(uintptr_t))
+        return TaggedUWord(Value() << shift);
+    }
+    return TaggedUWord();
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Is equal?  Note: non-validity on either side gives non-equality.
+  bool operator==(TaggedUWord other) const {
+    return (mValid && other.Valid()) ? (mValue == other.Value()) : false;
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  // Is it word-aligned?
+  bool IsAligned() const {
+    return mValid && (mValue & (sizeof(uintptr_t)-1)) == 0;
+  }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  uintptr_t Value() const { return mValue; }
+
+  // RUNS IN NO-MALLOC CONTEXT
+  bool      Valid() const { return mValid; }
+
+private:
+  uintptr_t mValue;
+  bool mValid;
+};
+
+
+// The registers, with validity tags, that will be unwound.
+
+struct UnwindRegs {
+#if defined(LUL_ARCH_arm)
+  TaggedUWord r7;
+  TaggedUWord r11;
+  TaggedUWord r12;
+  TaggedUWord r13;
+  TaggedUWord r14;
+  TaggedUWord r15;
+#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+  TaggedUWord xbp;
+  TaggedUWord xsp;
+  TaggedUWord xip;
+#else
+# error "Unknown plat"
+#endif
+};
+
+
+// The maximum number of bytes in a stack snapshot.  This can be
+// increased if necessary, but larger values cost performance, since a
+// stack snapshot needs to be copied between sampling and worker
+// threads for each snapshot.  In practice 32k seems to be enough
+// to get good backtraces.
+static const size_t N_STACK_BYTES = 32768;
+
+// The stack chunk image that will be unwound.
+struct StackImage {
+  // [start_avma, +len) specify the address range in the buffer.
+  // Obviously we require 0 <= len <= N_STACK_BYTES.
+  uintptr_t mStartAvma;
+  size_t    mLen;
+  uint8_t   mContents[N_STACK_BYTES];
+};
+
+
+// Statistics collection for the unwinder.
+template<typename T>
+class LULStats {
+public:
+  LULStats()
+    : mContext(0)
+    , mCFI(0)
+    , mScanned(0)
+  {}
+
+  template <typename S>
+  explicit LULStats(const LULStats<S>& aOther)
+    : mContext(aOther.mContext)
+    , mCFI(aOther.mCFI)
+    , mScanned(aOther.mScanned)
+  {}
+
+  template <typename S>
+  LULStats<T>& operator=(const LULStats<S>& aOther)
+  {
+    mContext = aOther.mContext;
+    mCFI     = aOther.mCFI;
+    mScanned = aOther.mScanned;
+    return *this;
+  }
+
+  template <typename S>
+  uint32_t operator-(const LULStats<S>& aOther) {
+    return (mContext - aOther.mContext) +
+           (mCFI - aOther.mCFI) + (mScanned - aOther.mScanned);
+  }
+
+  T mContext; // Number of context frames
+  T mCFI;     // Number of CFI/EXIDX frames
+  T mScanned; // Number of scanned frames
+};
+
+
+// The core unwinder library class.  Just one of these is needed, and
+// it can be shared by multiple unwinder threads.
+//
+// The library operates in one of two modes.
+//
+// * Admin mode.  The library is this state after creation.  In Admin
+//   mode, no unwinding may be performed.  It is however allowable to
+//   perform administrative tasks -- primarily, loading of unwind info
+//   -- in this mode.  In particular, it is safe for the library to
+//   perform dynamic memory allocation in this mode.  Safe in the
+//   sense that there is no risk of deadlock against unwinding threads
+//   that might -- because of where they have been sampled -- hold the
+//   system's malloc lock.
+//
+// * Unwind mode.  In this mode, calls to ::Unwind may be made, but
+//   nothing else.  ::Unwind guarantees not to make any dynamic memory
+//   requests, so as to guarantee that the calling thread won't
+//   deadlock in the case where it already holds the system's malloc lock.
+//
+// The library is created in Admin mode.  After debuginfo is loaded,
+// the caller must switch it into Unwind mode by calling
+// ::EnableUnwinding.  There is no way to switch it back to Admin mode
+// after that.  To safely switch back to Admin mode would require the
+// caller (or other external agent) to guarantee that there are no
+// pending ::Unwind calls.
+
+class PriMap;
+class SegArray;
+class UniqueStringUniverse;
+
+class LUL {
+public:
+  // Create; supply a logging sink.  Sets the object in Admin mode.
+  explicit LUL(void (*aLog)(const char*));
+
+  // Destroy.  Caller is responsible for ensuring that no other
+  // threads are in Unwind calls.  All resources are freed and all
+  // registered unwinder threads are deregistered.  Can be called
+  // either in Admin or Unwind mode.
+  ~LUL();
+
+  // Notify the library that unwinding is now allowed and so
+  // admin-mode calls are no longer allowed.  The object is initially
+  // created in admin mode.  The only possible transition is
+  // admin->unwinding, therefore.
+  void EnableUnwinding();
+
+  // Notify of a new r-x mapping, and load the associated unwind info.
+  // The filename is strdup'd and used for debug printing.  If
+  // aMappedImage is NULL, this function will mmap/munmap the file
+  // itself, so as to be able to read the unwind info.  If
+  // aMappedImage is non-NULL then it is assumed to point to a
+  // called-supplied and caller-managed mapped image of the file.
+  // May only be called in Admin mode.
+  void NotifyAfterMap(uintptr_t aRXavma, size_t aSize,
+                      const char* aFileName, const void* aMappedImage);
+
+  // In rare cases we know an executable area exists but don't know
+  // what the associated file is.  This call notifies LUL of such
+  // areas.  This is important for correct functioning of stack
+  // scanning and of the x86-{linux,android} special-case
+  // __kernel_syscall function handling.
+  // This must be called only after the code area in
+  // question really has been mapped.
+  // May only be called in Admin mode.
+  void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize);
+
+  // Notify that a mapped area has been unmapped; discard any
+  // associated unwind info.  Acquires mRWlock for writing.  Note that
+  // to avoid segfaulting the stack-scan unwinder, which inspects code
+  // areas, this must be called before the code area in question is
+  // really unmapped.  Note that, unlike NotifyAfterMap(), this
+  // function takes the start and end addresses of the range to be
+  // unmapped, rather than a start and a length parameter.  This is so
+  // as to make it possible to notify an unmap for the entire address
+  // space using a single call.
+  // May only be called in Admin mode.
+  void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax);
+
+  // Apply NotifyBeforeUnmap to the entire address space.  This causes
+  // LUL to discard all unwind and executable-area information for the
+  // entire address space.
+  // May only be called in Admin mode.
+  void NotifyBeforeUnmapAll() {
+    NotifyBeforeUnmap(0, UINTPTR_MAX);
+  }
+
+  // Returns the number of mappings currently registered.
+  // May only be called in Admin mode.
+  size_t CountMappings();
+
+  // Unwind |aStackImg| starting with the context in |aStartRegs|.
+  // Write the number of frames recovered in *aFramesUsed.  Put
+  // the PC values in aFramePCs[0 .. *aFramesUsed-1] and
+  // the SP values in aFrameSPs[0 .. *aFramesUsed-1].
+  // |aFramesAvail| is the size of the two output arrays and hence the
+  // largest possible value of *aFramesUsed.  PC values are always
+  // valid, and the unwind will stop when the PC becomes invalid, but
+  // the SP values might be invalid, in which case the value zero will
+  // be written in the relevant frameSPs[] slot.
+  //
+  // Unwinding may optionally use stack scanning.  The maximum number
+  // of frames that may be recovered by stack scanning is
+  // |aScannedFramesAllowed| and the actual number recovered is
+  // written into *aScannedFramesAcquired.  |aScannedFramesAllowed|
+  // must be less than or equal to |aFramesAvail|.
+  //
+  // This function assumes that the SP values increase as it unwinds
+  // away from the innermost frame -- that is, that the stack grows
+  // down.  It monitors SP values as it unwinds to check they
+  // decrease, so as to avoid looping on corrupted stacks.
+  //
+  // May only be called in Unwind mode.  Multiple threads may unwind
+  // at once.  LUL user is responsible for ensuring that no thread makes
+  // any Admin calls whilst in Unwind mode.
+  // MOZ_CRASHes if the calling thread is not registered for unwinding.
+  //
+  // Up to aScannedFramesAllowed stack-scanned frames may be recovered.
+  //
+  // The calling thread must previously have been registered via a call to
+  // RegisterSampledThread.
+  void Unwind(/*OUT*/uintptr_t* aFramePCs,
+              /*OUT*/uintptr_t* aFrameSPs,
+              /*OUT*/size_t* aFramesUsed,
+              /*OUT*/size_t* aScannedFramesAcquired,
+              size_t aFramesAvail,
+              size_t aScannedFramesAllowed,
+              UnwindRegs* aStartRegs, StackImage* aStackImg);
+
+  // The logging sink.  Call to send debug strings to the caller-
+  // specified destination.  Can only be called by the Admin thread.
+  void (*mLog)(const char*);
+
+  // Statistics relating to unwinding.  These have to be atomic since
+  // unwinding can occur on different threads simultaneously.
+  LULStats<mozilla::Atomic<uint32_t>> mStats;
+
+  // Possibly show the statistics.  This may not be called from any
+  // registered sampling thread, since it involves I/O.
+  void MaybeShowStats();
+
+private:
+  // The statistics counters at the point where they were last printed.
+  LULStats<uint32_t> mStatsPrevious;
+
+  // Are we in admin mode?  Initially |true| but changes to |false|
+  // once unwinding begins.
+  bool mAdminMode;
+
+  // The thread ID associated with admin mode.  This is the only thread
+  // that is allowed do perform non-Unwind calls on this object.  Conversely,
+  // no registered Unwinding thread may be the admin thread.  This is so
+  // as to clearly partition the one thread that may do dynamic memory
+  // allocation from the threads that are being sampled, since the latter
+  // absolutely may not do dynamic memory allocation.
+  int mAdminThreadId;
+
+  // The top level mapping from code address ranges to postprocessed
+  // unwind info.  Basically a sorted array of (addr, len, info)
+  // records.  This field is updated by NotifyAfterMap and NotifyBeforeUnmap.
+  PriMap* mPriMap;
+
+  // An auxiliary structure that records which address ranges are
+  // mapped r-x, for the benefit of the stack scanner.
+  SegArray* mSegArray;
+
+  // A UniqueStringUniverse that holds all the strdup'd strings created
+  // whilst reading unwind information.  This is included so as to make
+  // it possible to free them in ~LUL.
+  UniqueStringUniverse* mUSU;
+};
+
+
+// Run unit tests on an initialised, loaded-up LUL instance, and print
+// summary results on |aLUL|'s logging sink.  Also return the number
+// of tests run in *aNTests and the number that passed in
+// *aNTestsPassed.
+void
+RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL);
+
+} // namespace lul
+
+#endif // LulMain_h
diff --git a/tools/profiler/lul/LulMainInt.h b/tools/profiler/lul/LulMainInt.h
new file mode 100644
index 000000000..54bd76c88
--- /dev/null
+++ b/tools/profiler/lul/LulMainInt.h
@@ -0,0 +1,393 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulMainInt_h
+#define LulMainInt_h
+
+#include "LulPlatformMacros.h"
+#include "LulMain.h" // for TaggedUWord
+
+#include <vector>
+
+#include "mozilla/Assertions.h"
+
+// This file is provides internal interface inside LUL.  If you are an
+// end-user of LUL, do not include it in your code.  The end-user
+// interface is in LulMain.h.
+
+
+namespace lul {
+
+using std::vector;
+
+////////////////////////////////////////////////////////////////
+// DW_REG_ constants                                          //
+////////////////////////////////////////////////////////////////
+
+// These are the Dwarf CFI register numbers, as (presumably) defined
+// in the ELF ABI supplements for each architecture.
+
+enum DW_REG_NUMBER {
+  // No real register has this number.  It's convenient to be able to
+  // treat the CFA (Canonical Frame Address) as "just another
+  // register", though.
+  DW_REG_CFA = -1,
+#if defined(LUL_ARCH_arm)
+  // ARM registers
+  DW_REG_ARM_R7  = 7,
+  DW_REG_ARM_R11 = 11,
+  DW_REG_ARM_R12 = 12,
+  DW_REG_ARM_R13 = 13,
+  DW_REG_ARM_R14 = 14,
+  DW_REG_ARM_R15 = 15,
+#elif defined(LUL_ARCH_x64)
+  // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are
+  // combined, a merged set of register constants is needed.
+  DW_REG_INTEL_XBP = 6,
+  DW_REG_INTEL_XSP = 7,
+  DW_REG_INTEL_XIP = 16,
+#elif defined(LUL_ARCH_x86)
+  DW_REG_INTEL_XBP = 5,
+  DW_REG_INTEL_XSP = 4,
+  DW_REG_INTEL_XIP = 8,
+#else
+# error "Unknown arch"
+#endif
+};
+
+
+////////////////////////////////////////////////////////////////
+// PfxExpr                                                    //
+////////////////////////////////////////////////////////////////
+
+enum PfxExprOp {
+  //             meaning of mOperand     effect on stack
+  PX_Start,   // bool start-with-CFA?    start, with CFA on stack, or not
+  PX_End,     // none                    stop; result is at top of stack
+  PX_SImm32,  // int32                   push signed int32
+  PX_DwReg,   // DW_REG_NUMBER           push value of the specified reg
+  PX_Deref,   // none                    pop X ; push *X
+  PX_Add,     // none                    pop X ; pop Y ; push Y + X
+  PX_Sub,     // none                    pop X ; pop Y ; push Y - X
+  PX_And,     // none                    pop X ; pop Y ; push Y & X
+  PX_Or,      // none                    pop X ; pop Y ; push Y | X
+  PX_CmpGES,  // none                    pop X ; pop Y ; push (Y >=s X) ? 1 : 0
+  PX_Shl      // none                    pop X ; pop Y ; push Y << X
+};
+
+struct PfxInstr {
+  PfxInstr(PfxExprOp opcode, int32_t operand)
+    : mOpcode(opcode)
+    , mOperand(operand)
+  {}
+  explicit PfxInstr(PfxExprOp opcode)
+    : mOpcode(opcode)
+    , mOperand(0)
+  {}
+  bool operator==(const PfxInstr& other) {
+    return mOpcode == other.mOpcode && mOperand == other.mOperand;
+  }
+  PfxExprOp mOpcode;
+  int32_t   mOperand;
+};
+
+static_assert(sizeof(PfxInstr) <= 8, "PfxInstr size changed unexpectedly");
+
+// Evaluate the prefix expression whose PfxInstrs start at aPfxInstrs[start].
+// In the case of any mishap (stack over/underflow, running off the end of
+// the instruction vector, obviously malformed sequences),
+// return an invalid TaggedUWord.
+// RUNS IN NO-MALLOC CONTEXT
+TaggedUWord EvaluatePfxExpr(int32_t start,
+                            const UnwindRegs* aOldRegs,
+                            TaggedUWord aCFA, const StackImage* aStackImg,
+                            const vector<PfxInstr>& aPfxInstrs);
+
+
+////////////////////////////////////////////////////////////////
+// LExpr                                                      //
+////////////////////////////////////////////////////////////////
+
+// An expression -- very primitive.  Denotes either "register +
+// offset", a dereferenced version of the same, or a reference to a
+// prefix expression stored elsewhere.  So as to allow convenient
+// handling of Dwarf-derived unwind info, the register may also denote
+// the CFA.  A large number of these need to be stored, so we ensure
+// it fits into 8 bytes.  See comment below on RuleSet to see how
+// expressions fit into the bigger picture.
+
+enum LExprHow {
+  UNKNOWN=0, // This LExpr denotes no value.
+  NODEREF,   // Value is  (mReg + mOffset).
+  DEREF,     // Value is *(mReg + mOffset).
+  PFXEXPR    // Value is EvaluatePfxExpr(secMap->mPfxInstrs[mOffset])
+};
+
+inline static const char* NameOf_LExprHow(LExprHow how) {
+  switch (how) {
+    case UNKNOWN: return "UNKNOWN";
+    case NODEREF: return "NODEREF";
+    case DEREF:   return "DEREF";
+    case PFXEXPR: return "PFXEXPR";
+    default:      return "LExpr-??";
+  }
+}
+
+
+struct LExpr {
+  // Denotes an expression with no value.
+  LExpr()
+    : mHow(UNKNOWN)
+    , mReg(0)
+    , mOffset(0)
+  {}
+
+  // Denotes any expressible expression.
+  LExpr(LExprHow how, int16_t reg, int32_t offset)
+    : mHow(how)
+    , mReg(reg)
+    , mOffset(offset)
+  {
+    switch (how) {
+      case UNKNOWN: MOZ_ASSERT(reg == 0 && offset == 0); break;
+      case NODEREF: break;
+      case DEREF:   break;
+      case PFXEXPR: MOZ_ASSERT(reg == 0 && offset >= 0); break;
+      default:      MOZ_ASSERT(0, "LExpr::LExpr: invalid how");
+    }
+  }
+
+  // Change the offset for an expression that references memory.
+  LExpr add_delta(long delta)
+  {
+    MOZ_ASSERT(mHow == NODEREF);
+    // If this is a non-debug build and the above assertion would have
+    // failed, at least return LExpr() so that the machinery that uses
+    // the resulting expression fails in a repeatable way.
+    return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset+delta)
+                             : LExpr(); // Gone bad
+  }
+
+  // Dereference an expression that denotes a memory address.
+  LExpr deref()
+  {
+    MOZ_ASSERT(mHow == NODEREF);
+    // Same rationale as for add_delta().
+    return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset)
+                             : LExpr(); // Gone bad
+  }
+
+  // Print a rule for recovery of |aNewReg| whose recovered value
+  // is this LExpr.
+  string ShowRule(const char* aNewReg) const;
+
+  // Evaluate this expression, producing a TaggedUWord.  |aOldRegs|
+  // holds register values that may be referred to by the expression.
+  // |aCFA| holds the CFA value, if any, that applies.  |aStackImg|
+  // contains a chuck of stack that will be consulted if the expression
+  // references memory.  |aPfxInstrs| holds the vector of PfxInstrs
+  // that will be consulted if this is a PFXEXPR.
+  // RUNS IN NO-MALLOC CONTEXT
+  TaggedUWord EvaluateExpr(const UnwindRegs* aOldRegs,
+                           TaggedUWord aCFA, const StackImage* aStackImg,
+                           const vector<PfxInstr>* aPfxInstrs) const;
+
+  // Representation of expressions.  If |mReg| is DW_REG_CFA (-1) then
+  // it denotes the CFA.  All other allowed values for |mReg| are
+  // nonnegative and are DW_REG_ values.
+  LExprHow mHow:8;
+  int16_t  mReg;    // A DW_REG_ value
+  int32_t  mOffset; // 32-bit signed offset should be more than enough.
+};
+
+static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly");
+
+
+////////////////////////////////////////////////////////////////
+// RuleSet                                                    //
+////////////////////////////////////////////////////////////////
+
+// This is platform-dependent.  For some address range, describes how
+// to recover the CFA and then how to recover the registers for the
+// previous frame.
+//
+// The set of LExprs contained in a given RuleSet describe a DAG which
+// says how to compute the caller's registers ("new registers") from
+// the callee's registers ("old registers").  The DAG can contain a
+// single internal node, which is the value of the CFA for the callee.
+// It would be possible to construct a DAG that omits the CFA, but
+// including it makes the summarisers simpler, and the Dwarf CFI spec
+// has the CFA as a central concept.
+//
+// For this to make sense, |mCfaExpr| can't have
+// |mReg| == DW_REG_CFA since we have no previous value for the CFA.
+// All of the other |Expr| fields can -- and usually do -- specify
+// |mReg| == DW_REG_CFA.
+//
+// With that in place, the unwind algorithm proceeds as follows.
+//
+// (0) Initially: we have values for the old registers, and a memory
+//     image.
+//
+// (1) Compute the CFA by evaluating |mCfaExpr|.  Add the computed
+//     value to the set of "old registers".
+//
+// (2) Compute values for the registers by evaluating all of the other
+//     |Expr| fields in the RuleSet.  These can depend on both the old
+//     register values and the just-computed CFA.
+//
+// If we are unwinding without computing a CFA, perhaps because the
+// RuleSets are derived from EXIDX instead of Dwarf, then
+// |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will
+// be invalid -- that is, TaggedUWord() -- and so any attempt to use
+// that will result in the same value.  But that's OK because the
+// RuleSet would make no sense if depended on the CFA but specified no
+// way to compute it.
+//
+// A RuleSet is not allowed to cover zero address range.  Having zero
+// length would break binary searching in SecMaps and PriMaps.
+
+class RuleSet {
+public:
+  RuleSet();
+  void   Print(void(*aLog)(const char*)) const;
+
+  // Find the LExpr* for a given DW_REG_ value in this class.
+  LExpr* ExprForRegno(DW_REG_NUMBER aRegno);
+
+  uintptr_t mAddr;
+  uintptr_t mLen;
+  // How to compute the CFA.
+  LExpr  mCfaExpr;
+  // How to compute caller register values.  These may reference the
+  // value defined by |mCfaExpr|.
+#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+  LExpr  mXipExpr; // return address
+  LExpr  mXspExpr;
+  LExpr  mXbpExpr;
+#elif defined(LUL_ARCH_arm)
+  LExpr  mR15expr; // return address
+  LExpr  mR14expr;
+  LExpr  mR13expr;
+  LExpr  mR12expr;
+  LExpr  mR11expr;
+  LExpr  mR7expr;
+#else
+#   error "Unknown arch"
+#endif
+};
+
+// Returns |true| for Dwarf register numbers which are members
+// of the set of registers that LUL unwinds on this target.
+static inline bool registerIsTracked(DW_REG_NUMBER reg) {
+  switch (reg) {
+#   if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+    case DW_REG_INTEL_XBP: case DW_REG_INTEL_XSP: case DW_REG_INTEL_XIP:
+      return true;
+#   elif defined(LUL_ARCH_arm)
+    case DW_REG_ARM_R7:  case DW_REG_ARM_R11: case DW_REG_ARM_R12:
+    case DW_REG_ARM_R13: case DW_REG_ARM_R14: case DW_REG_ARM_R15:
+      return true;
+#   else
+#     error "Unknown arch"
+#   endif
+    default:
+      return false;
+  }
+}
+
+
+////////////////////////////////////////////////////////////////
+// SecMap                                                     //
+////////////////////////////////////////////////////////////////
+
+// A SecMap may have zero address range, temporarily, whilst RuleSets
+// are being added to it.  But adding a zero-range SecMap to a PriMap
+// will make it impossible to maintain the total order of the PriMap
+// entries, and so that can't be allowed to happen.
+
+class SecMap {
+public:
+  // These summarise the contained mRuleSets, in that they give
+  // exactly the lowest and highest addresses that any of the entries
+  // in this SecMap cover.  Hence invariants:
+  //
+  // mRuleSets is nonempty
+  //    <=> mSummaryMinAddr <= mSummaryMaxAddr
+  //        && mSummaryMinAddr == mRuleSets[0].mAddr
+  //        && mSummaryMaxAddr == mRuleSets[#rulesets-1].mAddr
+  //                              + mRuleSets[#rulesets-1].mLen - 1;
+  //
+  // This requires that no RuleSet has zero length.
+  //
+  // mRuleSets is empty
+  //    <=> mSummaryMinAddr > mSummaryMaxAddr
+  //
+  // This doesn't constrain mSummaryMinAddr and mSummaryMaxAddr uniquely,
+  // so let's use mSummaryMinAddr == 1 and mSummaryMaxAddr == 0 to denote
+  // this case.
+
+  explicit SecMap(void(*aLog)(const char*));
+  ~SecMap();
+
+  // Binary search mRuleSets to find one that brackets |ia|, or nullptr
+  // if none is found.  It's not allowable to do this until PrepareRuleSets
+  // has been called first.
+  RuleSet* FindRuleSet(uintptr_t ia);
+
+  // Add a RuleSet to the collection.  The rule is copied in.  Calling
+  // this makes the map non-searchable.
+  void AddRuleSet(const RuleSet* rs);
+
+  // Add a PfxInstr to the vector of such instrs, and return the index
+  // in the vector.  Calling this makes the map non-searchable.
+  uint32_t AddPfxInstr(PfxInstr pfxi);
+
+  // Returns the entire vector of PfxInstrs.
+  const vector<PfxInstr>* GetPfxInstrs() { return &mPfxInstrs; }
+
+  // Prepare the map for searching.  Also, remove any rules for code
+  // address ranges which don't fall inside [start, +len).  |len| may
+  // not be zero.
+  void PrepareRuleSets(uintptr_t start, size_t len);
+
+  bool IsEmpty();
+
+  size_t Size() { return mRuleSets.size(); }
+
+  // The min and max addresses of the addresses in the contained
+  // RuleSets.  See comment above for invariants.
+  uintptr_t mSummaryMinAddr;
+  uintptr_t mSummaryMaxAddr;
+
+private:
+  // False whilst adding entries; true once it is safe to call FindRuleSet.
+  // Transition (false->true) is caused by calling PrepareRuleSets().
+  bool mUsable;
+
+  // A vector of RuleSets, sorted, nonoverlapping (post Prepare()).
+  vector<RuleSet> mRuleSets;
+
+  // A vector of PfxInstrs, which are referred to by the RuleSets.
+  // These are provided as a representation of Dwarf expressions
+  // (DW_CFA_val_expression, DW_CFA_expression, DW_CFA_def_cfa_expression),
+  // are relatively expensive to evaluate, and and are therefore
+  // expected to be used only occasionally.
+  //
+  // The vector holds a bunch of separate PfxInstr programs, each one
+  // starting with a PX_Start and terminated by a PX_End, all
+  // concatenated together.  When a RuleSet can't recover a value
+  // using a self-contained LExpr, it uses a PFXEXPR whose mOffset is
+  // the index in this vector of start of the necessary PfxInstr program.
+  vector<PfxInstr> mPfxInstrs;
+
+  // A logging sink, for debugging.
+  void (*mLog)(const char*);
+};
+
+} // namespace lul
+
+#endif // ndef LulMainInt_h
diff --git a/tools/profiler/lul/LulPlatformMacros.h b/tools/profiler/lul/LulPlatformMacros.h
new file mode 100644
index 000000000..8659a8fbe
--- /dev/null
+++ b/tools/profiler/lul/LulPlatformMacros.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef LulPlatformMacros_h
+#define LulPlatformMacros_h
+
+#include <stdint.h>
+#include <stdlib.h>
+
+// Define platform selection macros in a consistent way.  The primary
+// factorisation is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_
+// macros are defined too, since they are sometimes convenient.
+
+#undef LUL_PLAT_x64_linux
+#undef LUL_PLAT_x86_linux
+#undef LUL_PLAT_arm_android
+#undef LUL_PLAT_x86_android
+
+#undef LUL_ARCH_arm
+#undef LUL_ARCH_x86
+#undef LUL_ARCH_x64
+
+#undef LUL_OS_android
+#undef LUL_OS_linux
+
+#if defined(__linux__) && defined(__x86_64__)
+# define LUL_PLAT_x64_linux 1
+# define LUL_ARCH_x64 1
+# define LUL_OS_linux 1
+
+#elif defined(__linux__) && defined(__i386__) && !defined(__ANDROID__)
+# define LUL_PLAT_x86_linux 1
+# define LUL_ARCH_x86 1
+# define LUL_OS_linux 1
+
+#elif defined(__ANDROID__) && defined(__arm__)
+# define LUL_PLAT_arm_android 1
+# define LUL_ARCH_arm 1
+# define LUL_OS_android 1
+
+#elif defined(__ANDROID__) && defined(__i386__)
+# define LUL_PLAT_x86_android 1
+# define LUL_ARCH_x86 1
+# define LUL_OS_android 1
+
+#else
+# error "Unsupported platform"
+#endif
+
+#endif // LulPlatformMacros_h
diff --git a/tools/profiler/lul/platform-linux-lul.cpp b/tools/profiler/lul/platform-linux-lul.cpp
new file mode 100644
index 000000000..9541534a1
--- /dev/null
+++ b/tools/profiler/lul/platform-linux-lul.cpp
@@ -0,0 +1,88 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "platform.h"
+#include "PlatformMacros.h"
+#include "LulMain.h"
+#include "shared-libraries.h"
+#include "AutoObjectMapper.h"
+
+// Contains miscellaneous helpers that are used to connect SPS and LUL.
+
+
+// Find out, in a platform-dependent way, where the code modules got
+// mapped in the process' virtual address space, and get |aLUL| to
+// load unwind info for them.
+void
+read_procmaps(lul::LUL* aLUL)
+{
+  MOZ_ASSERT(aLUL->CountMappings() == 0);
+
+# if defined(SPS_OS_linux) || defined(SPS_OS_android) || defined(SPS_OS_darwin)
+  SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
+
+  for (size_t i = 0; i < info.GetSize(); i++) {
+    const SharedLibrary& lib = info.GetEntry(i);
+
+#   if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK)
+    // We're using faulty.lib.  Use a special-case object mapper.
+    AutoObjectMapperFaultyLib mapper(aLUL->mLog);
+#   else
+    // We can use the standard POSIX-based mapper.
+    AutoObjectMapperPOSIX mapper(aLUL->mLog);
+#   endif
+
+    // Ask |mapper| to map the object.  Then hand its mapped address
+    // to NotifyAfterMap().
+    void*  image = nullptr;
+    size_t size  = 0;
+    bool ok = mapper.Map(&image, &size, lib.GetName());
+    if (ok && image && size > 0) {
+      aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd()-lib.GetStart(),
+                           lib.GetName().c_str(), image);
+    } else if (!ok && lib.GetName() == "") {
+      // The object has no name and (as a consequence) the mapper
+      // failed to map it.  This happens on Linux, where
+      // GetInfoForSelf() produces two such mappings: one for the
+      // executable and one for the VDSO.  The executable one isn't a
+      // big deal since there's not much interesting code in there,
+      // but the VDSO one is a problem on x86-{linux,android} because
+      // lack of knowledge about the mapped area inhibits LUL's
+      // special __kernel_syscall handling.  Hence notify |aLUL| at
+      // least of the mapping, even though it can't read any unwind
+      // information for the area.
+      aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd()-lib.GetStart());
+    }
+
+    // |mapper| goes out of scope at this point and so its destructor
+    // unmaps the object.
+  }
+
+# else
+#  error "Unknown platform"
+# endif
+}
+
+
+// LUL needs a callback for its logging sink.
+void
+logging_sink_for_LUL(const char* str) {
+  // Ignore any trailing \n, since LOG will add one anyway.
+  size_t n = strlen(str);
+  if (n > 0 && str[n-1] == '\n') {
+    char* tmp = strdup(str);
+    tmp[n-1] = 0;
+    LOG(tmp);
+    free(tmp);
+  } else {
+    LOG(str);
+  }
+}
diff --git a/tools/profiler/lul/platform-linux-lul.h b/tools/profiler/lul/platform-linux-lul.h
new file mode 100644
index 000000000..4698cd388
--- /dev/null
+++ b/tools/profiler/lul/platform-linux-lul.h
@@ -0,0 +1,24 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PLATFORM_LINUX_LUL_H
+#define MOZ_PLATFORM_LINUX_LUL_H
+
+#include "platform.h"
+
+// Find out, in a platform-dependent way, where the code modules got
+// mapped in the process' virtual address space, and get |aLUL| to
+// load unwind info for them.
+void
+read_procmaps(lul::LUL* aLUL);
+
+// LUL needs a callback for its logging sink.
+void
+logging_sink_for_LUL(const char* str);
+
+// A singleton instance of the library.
+extern lul::LUL* sLUL;
+
+#endif /* ndef MOZ_PLATFORM_LINUX_LUL_H */
diff --git a/tools/profiler/merge-profiles.py b/tools/profiler/merge-profiles.py
new file mode 100755
index 000000000..0c10c60e1
--- /dev/null
+++ b/tools/profiler/merge-profiles.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python 
+#
+# This script takes b2g process profiles and merged them into a single profile.
+# The meta data is taken from the first profile. The startTime for each profile
+# is used to syncronized the samples. Each thread is moved into the merged
+# profile.
+#
+import json
+import re
+import sys
+
+def MergeProfiles(files):
+    threads = []
+    fileData = []
+    symTable = dict()
+    meta = None
+    libs = None
+    videoUrl = None
+    minStartTime = None
+
+    for fname in files:
+        if fname.startswith("--video="):
+            videoUrl = fname[8:]
+            continue
+
+        match = re.match('profile_([0-9]+)_(.+)\.sym', fname)
+        if match is None:
+            raise Exception("Filename '" + fname + "' doesn't match expected pattern")
+        pid = match.groups(0)[0]
+        pname = match.groups(0)[1]
+
+        fp = open(fname, "r")
+        fileData = json.load(fp)
+        fp.close()
+
+        if meta is None:
+            meta = fileData['profileJSON']['meta'].copy()
+            libs = fileData['profileJSON']['libs']
+            minStartTime = meta['startTime']
+        else:
+            minStartTime = min(minStartTime, fileData['profileJSON']['meta']['startTime'])
+            meta['startTime'] = minStartTime
+
+        for thread in fileData['profileJSON']['threads']:
+            thread['name'] = thread['name'] + " (" + pname + ":" + pid + ")"
+            threads.append(thread)
+
+            # Note that pid + sym, pid + location could be ambigious
+            # if we had pid=11 sym=1 && pid=1 sym=11.
+            pidStr = pid + ":"
+
+            thread['startTime'] = fileData['profileJSON']['meta']['startTime']
+            if meta['version'] >= 3:
+                stringTable = thread['stringTable']
+                for i, str in enumerate(stringTable):
+                    if str[:2] == '0x':
+                        newLoc = pidStr + str
+                        stringTable[i] = newLoc
+                        symTable[newLoc] = str
+            else:
+                samples = thread['samples']
+                for sample in thread['samples']:
+                    for frame in sample['frames']:
+                        if "location" in frame and frame['location'][0:2] == '0x':
+                            oldLoc = frame['location']
+                            newLoc = pidStr + oldLoc
+                            frame['location'] = newLoc
+                            # Default to the unprefixed symbol if no translation is
+                            symTable[newLoc] = oldLoc
+
+        filesyms = fileData['symbolicationTable']
+        for sym in filesyms.keys():
+            symTable[pidStr + sym] = filesyms[sym]
+
+    # For each thread, make the time offsets line up based on the
+    # earliest start
+    for thread in threads:
+        delta = thread['startTime'] - minStartTime
+        if meta['version'] >= 3:
+            idxTime = thread['samples']['schema']['time']
+            for sample in thread['samples']['data']:
+                sample[idxTime] += delta
+            idxTime = thread['markers']['schema']['time']
+            for marker in thread['markers']['data']:
+                marker[idxTime] += delta
+        else:
+            for sample in thread['samples']:
+                if "time" in sample:
+                    sample['time'] += delta
+            for marker in thread['markers']:
+                marker['time'] += delta
+
+    result = dict()
+    result['profileJSON'] = dict()
+    result['profileJSON']['meta'] = meta
+    result['profileJSON']['libs'] = libs
+    result['profileJSON']['threads'] = threads
+    result['symbolicationTable'] = symTable
+    result['format'] = "profileJSONWithSymbolicationTable,1"
+    if videoUrl:
+        result['profileJSON']['meta']['videoCapture'] = {"src": videoUrl}
+
+    json.dump(result, sys.stdout)
+
+
+if len(sys.argv) > 1:
+    MergeProfiles(sys.argv[1:])
+    sys.exit(0)
+
+print "Usage: merge-profile.py profile_<pid1>_<pname1>.sym profile_<pid2>_<pname2>.sym > merged.sym"
+
+
+
diff --git a/tools/profiler/moz.build b/tools/profiler/moz.build
new file mode 100644
index 000000000..e48ae8f94
--- /dev/null
+++ b/tools/profiler/moz.build
@@ -0,0 +1,147 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG['MOZ_ENABLE_PROFILER_SPS']:
+    XPIDL_MODULE = 'profiler'
+    XPIDL_SOURCES += [
+        'gecko/nsIProfiler.idl',
+        'gecko/nsIProfileSaveEvent.idl',
+    ]
+    EXPORTS += [
+        'public/GeckoProfilerFunc.h',
+        'public/GeckoProfilerImpl.h',
+        'public/ProfilerBacktrace.h',
+        'public/ProfilerMarkers.h',
+        'public/PseudoStack.h',
+        'public/shared-libraries.h',
+    ]
+    EXPORTS.mozilla += [
+        'public/ProfileGatherer.h',
+    ]
+    EXTRA_JS_MODULES += [
+        'gecko/Profiler.jsm',
+    ]
+    UNIFIED_SOURCES += [
+        'core/GeckoSampler.cpp',
+        'core/platform.cpp',
+        'core/ProfileBuffer.cpp',
+        'core/ProfileEntry.cpp',
+        'core/ProfileJSONWriter.cpp',
+        'core/ProfilerBacktrace.cpp',
+        'core/ProfilerMarkers.cpp',
+        'core/StackTop.cpp',
+        'core/SyncProfile.cpp',
+        'core/ThreadInfo.cpp',
+        'core/ThreadProfile.cpp',
+        'gecko/nsProfiler.cpp',
+        'gecko/nsProfilerFactory.cpp',
+        'gecko/nsProfilerStartParams.cpp',
+        'gecko/ProfileGatherer.cpp',
+        'gecko/ProfilerIOInterposeObserver.cpp',
+        'gecko/SaveProfileTask.cpp',
+        'gecko/ThreadResponsiveness.cpp',
+    ]
+
+    if CONFIG['OS_TARGET'] in ('Android', 'Linux'):
+        UNIFIED_SOURCES += [
+            'lul/AutoObjectMapper.cpp',
+            'lul/LulCommon.cpp',
+            'lul/LulDwarf.cpp',
+            'lul/LulDwarfSummariser.cpp',
+            'lul/LulElf.cpp',
+            'lul/LulMain.cpp',
+            'lul/platform-linux-lul.cpp',
+        ]
+        # These files cannot be built in unified mode because of name clashes with mozglue headers on Android.
+        SOURCES += [
+            'core/platform-linux.cc',
+            'core/shared-libraries-linux.cc',
+        ]
+        if not CONFIG['MOZ_CRASHREPORTER']:
+            SOURCES += [
+                '/toolkit/crashreporter/google-breakpad/src/common/linux/elfutils.cc',
+                '/toolkit/crashreporter/google-breakpad/src/common/linux/file_id.cc',
+                '/toolkit/crashreporter/google-breakpad/src/common/linux/guid_creator.cc',
+                '/toolkit/crashreporter/google-breakpad/src/common/linux/linux_libc_support.cc',
+                '/toolkit/crashreporter/google-breakpad/src/common/linux/memory_mapped_file.cc',
+            ]
+        if CONFIG['CPU_ARCH'] == 'arm':
+            SOURCES += [
+                'core/EHABIStackWalk.cpp',
+            ]
+    elif CONFIG['OS_TARGET'] == 'Darwin':
+        UNIFIED_SOURCES += [
+            'core/platform-macos.cc',
+            'core/shared-libraries-macos.cc',
+        ]
+    elif CONFIG['OS_TARGET'] == 'WINNT':
+        SOURCES += [
+            'core/IntelPowerGadget.cpp',
+            'core/platform-win32.cc',
+            'core/shared-libraries-win32.cc',
+        ]
+
+    LOCAL_INCLUDES += [
+        '/docshell/base',
+        '/ipc/chromium/src',
+        '/mozglue/linker',
+        '/toolkit/crashreporter/google-breakpad/src',
+        '/tools/profiler/core/',
+        '/tools/profiler/gecko/',
+        '/xpcom/base',
+    ]
+
+    if CONFIG['OS_TARGET'] == 'Android':
+        LOCAL_INCLUDES += [
+            # We need access to Breakpad's getcontext(3) which is suitable for Android
+            '/toolkit/crashreporter/google-breakpad/src/common/android/include',
+        ]
+
+    if not CONFIG['MOZ_CRASHREPORTER'] and CONFIG['OS_TARGET'] == 'Android':
+        SOURCES += ['/toolkit/crashreporter/google-breakpad/src/common/android/breakpad_getcontext.S']
+
+    if CONFIG['ANDROID_CPU_ARCH'] == 'armeabi':
+        DEFINES['ARCH_ARMV6'] = True
+
+    if CONFIG['ENABLE_TESTS']:
+        DIRS += ['tests/gtest']
+
+    if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'gonk' and (CONFIG['ANDROID_VERSION'] <= '17' or CONFIG['ANDROID_VERSION'] >= '21'):
+        DEFINES['ELFSIZE'] = 32
+
+    FINAL_LIBRARY = 'xul'
+
+IPDL_SOURCES += [
+    'gecko/ProfilerTypes.ipdlh',
+]
+
+include('/ipc/chromium/chromium-config.mozbuild')
+
+EXPORTS += [
+    'public/GeckoProfiler.h',
+]
+
+if CONFIG['MOZ_TASK_TRACER']:
+    EXPORTS += [
+        'tasktracer/GeckoTaskTracer.h',
+        'tasktracer/GeckoTaskTracerImpl.h',
+        'tasktracer/TracedTaskCommon.h',
+    ]
+    UNIFIED_SOURCES += [
+        'tasktracer/GeckoTaskTracer.cpp',
+        'tasktracer/TracedTaskCommon.cpp',
+    ]
+
+XPCSHELL_TESTS_MANIFESTS += ['tests/xpcshell.ini']
+
+if CONFIG['GNU_CXX']:
+    CXXFLAGS += [
+        '-Wno-error=shadow',
+        '-Wno-ignored-qualifiers', # due to use of breakpad headers
+    ]
+
+with Files('**'):
+    BUG_COMPONENT = ('Core', 'Gecko Profiler')
diff --git a/tools/profiler/nm-symbolicate.py b/tools/profiler/nm-symbolicate.py
new file mode 100755
index 000000000..f51d7f75f
--- /dev/null
+++ b/tools/profiler/nm-symbolicate.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import sys, subprocess, os
+
+def NMSymbolicate(library, addresses):
+  target_tools_prefix = os.environ.get("TARGET_TOOLS_PREFIX", "")
+  args = [
+    target_tools_prefix + "nm", "-D", "-S", library
+  ]
+  nm_lines = subprocess.check_output(args).split("\n")
+  symbol_table = []
+  for line in nm_lines:
+    pieces = line.split(" ", 4)
+    if len(pieces) != 4 or pieces[2] != "T":
+      continue
+    start = int(pieces[0], 16)
+    end = int(pieces[1], 16)
+    symbol = pieces[3]
+    symbol_table.append({
+      "start": int(pieces[0], 16),
+      "end": int(pieces[0], 16) + int(pieces[1], 16),
+      "funcName": pieces[3]
+    });
+
+  for addressStr in addresses:
+    address = int(addressStr, 16)
+    symbolForAddress = None
+    for symbol in symbol_table:
+      if address >= symbol["start"] and address <= symbol["end"]:
+        symbolForAddress = symbol
+        break
+    if symbolForAddress:
+      print symbolForAddress["funcName"]
+    else:
+      print "??" # match addr2line
+    print ":0" # no line information from nm
+
+if len(sys.argv) > 1:
+    NMSymbolicate(sys.argv[1], sys.argv[2:])
+    sys.exit(0)
+
+print "Usage: nm-symbolicate.py <library> <addresses> > merged.sym"
+
+
diff --git a/tools/profiler/public/GeckoProfiler.h b/tools/profiler/public/GeckoProfiler.h
new file mode 100644
index 000000000..bef017d11
--- /dev/null
+++ b/tools/profiler/public/GeckoProfiler.h
@@ -0,0 +1,300 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* *************** SPS Sampler Information ****************
+ *
+ * SPS is an always on profiler that takes fast and low overheads samples
+ * of the program execution using only userspace functionity for portability.
+ * The goal of this module is to provide performance data in a generic
+ * cross platform way without requiring custom tools or kernel support.
+ *
+ * Non goals: Support features that are platform specific or replace
+ *            platform specific profilers.
+ *
+ * Samples are collected to form a timeline with optional timeline event (markers)
+ * used for filtering.
+ *
+ * SPS collects samples in a platform independant way by using a speudo stack abstraction
+ * of the real program stack by using 'sample stack frames'. When a sample is collected
+ * all active sample stack frames and the program counter are recorded.
+ */
+
+/* *************** SPS Sampler File Format ****************
+ *
+ * Simple new line seperated tag format:
+ * S      -> BOF tags EOF
+ * tags   -> tag tags
+ * tag    -> CHAR - STRING
+ *
+ * Tags:
+ * 's' - Sample tag followed by the first stack frame followed by 0 or more 'c' tags.
+ * 'c' - Continue Sample tag gives remaining tag element. If a 'c' tag is seen without
+ *         a preceding 's' tag it should be ignored. This is to support the behavior
+ *         of circular buffers.
+ *         If the 'stackwalk' feature is enabled this tag will have the format
+ *         'l-<library name>@<hex address>' and will expect an external tool to translate
+ *         the tag into something readable through a symbolication processing step.
+ * 'm' - Timeline marker. Zero or more may appear before a 's' tag.
+ * 'l' - Information about the program counter library and address. Post processing
+ *         can include function and source line. If built with leaf data enabled
+ *         this tag will describe the last 'c' tag.
+ * 'r' - Responsiveness tag following an 's' tag. Gives an indication on how well the
+ *          application is responding to the event loop. Lower is better.
+ * 't' - Elapse time since recording started.
+ *
+ */
+
+#ifndef SAMPLER_H
+#define SAMPLER_H
+
+#include "mozilla/Assertions.h"
+#include "mozilla/Attributes.h"
+#ifndef SPS_STANDALONE
+#include "js/TypeDecls.h"
+#endif
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+
+namespace mozilla {
+class TimeStamp;
+
+namespace dom {
+class Promise;
+} // namespace dom
+
+} // namespace mozilla
+
+#ifndef SPS_STANDALONE
+class nsIProfilerStartParams;
+#endif
+
+enum TracingMetadata {
+  TRACING_DEFAULT,
+  TRACING_INTERVAL_START,
+  TRACING_INTERVAL_END,
+  TRACING_EVENT,
+  TRACING_EVENT_BACKTRACE,
+  TRACING_TIMESTAMP
+};
+
+#if !defined(MOZ_ENABLE_PROFILER_SPS)
+
+#include <stdint.h>
+#include <stdarg.h>
+
+// Insert a RAII in this scope to active a pseudo label. Any samples collected
+// in this scope will contain this annotation. For dynamic strings use
+// PROFILER_LABEL_PRINTF. Arguments must be string literals.
+#define PROFILER_LABEL(name_space, info, category) do {} while (0)
+
+// Similar to PROFILER_LABEL, PROFILER_LABEL_FUNC will push/pop the enclosing
+// functon name as the pseudostack label.
+#define PROFILER_LABEL_FUNC(category) do {} while (0)
+
+// Format a dynamic string as a pseudo label. These labels will a considerable
+// storage size in the circular buffer compared to regular labels. This function
+// can be used to annotate custom information such as URL for the resource being
+// decoded or the size of the paint.
+#define PROFILER_LABEL_PRINTF(name_space, info, category, format, ...) do {} while (0)
+
+// Insert a marker in the profile timeline. This is useful to delimit something
+// important happening such as the first paint. Unlike profiler_label that are
+// only recorded if a sample is collected while it is active, marker will always
+// be collected.
+#define PROFILER_MARKER(info) do {} while (0)
+#define PROFILER_MARKER_PAYLOAD(info, payload) do { mozilla::UniquePtr<ProfilerMarkerPayload> payloadDeletor(payload); } while (0)
+
+// Main thread specilization to avoid TLS lookup for performance critical use.
+#define PROFILER_MAIN_THREAD_LABEL(name_space, info, category) do {} while (0)
+#define PROFILER_MAIN_THREAD_LABEL_PRINTF(name_space, info, category, format, ...) do {} while (0)
+
+static inline void profiler_tracing(const char* aCategory, const char* aInfo,
+                                    TracingMetadata metaData = TRACING_DEFAULT) {}
+class ProfilerBacktrace;
+
+static inline void profiler_tracing(const char* aCategory, const char* aInfo,
+                                    ProfilerBacktrace* aCause,
+                                    TracingMetadata metaData = TRACING_DEFAULT) {}
+
+// Initilize the profiler TLS, signal handlers on linux. If MOZ_PROFILER_STARTUP
+// is set the profiler will be started. This call must happen before any other
+// sampler calls. Particularly sampler_label/sampler_marker.
+static inline void profiler_init(void* stackTop) {};
+
+// Clean up the profiler module, stopping it if required. This function may
+// also save a shutdown profile if requested. No profiler calls should happen
+// after this point and all pseudo labels should have been popped.
+static inline void profiler_shutdown() {};
+
+// Start the profiler with the selected options. The samples will be
+// recorded in a circular buffer.
+//   "aProfileEntries" is an abstract size indication of how big
+//       the profile's circular buffer should be. Multiply by 4
+//       words to get the cost.
+//   "aInterval" the sampling interval. The profiler will do its
+//       best to sample at this interval. The profiler visualization
+//       should represent the actual sampling accuracy.
+static inline void profiler_start(int aProfileEntries, double aInterval,
+                              const char** aFeatures, uint32_t aFeatureCount,
+                              const char** aThreadNameFilters, uint32_t aFilterCount) {}
+
+// Stop the profiler and discard the profile. Call 'profiler_save' before this
+// to retrieve the profile.
+static inline void profiler_stop() {}
+
+// These functions pause and resume the profiler. While paused the profile will not
+// take any samples and will not record any data into its buffers. The profiler
+// remains fully initialized in this state. Timeline markers will still be stored.
+// This feature will keep javascript profiling enabled, thus allowing toggling the
+// profiler without invalidating the JIT.
+static inline bool profiler_is_paused() { return false; }
+static inline void profiler_pause() {}
+static inline void profiler_resume() {}
+
+
+// Immediately capture the current thread's call stack and return it
+static inline ProfilerBacktrace* profiler_get_backtrace() { return nullptr; }
+static inline void profiler_get_backtrace_noalloc(char *output, size_t outputSize) { return; }
+
+// Free a ProfilerBacktrace returned by profiler_get_backtrace()
+static inline void profiler_free_backtrace(ProfilerBacktrace* aBacktrace) {}
+
+static inline bool profiler_is_active() { return false; }
+
+// Check if an external profiler feature is active.
+// Supported:
+//  * gpu
+static inline bool profiler_feature_active(const char*) { return false; }
+
+// Internal-only. Used by the event tracer.
+static inline void profiler_responsiveness(const mozilla::TimeStamp& aTime) {}
+
+// Internal-only.
+static inline void profiler_set_frame_number(int frameNumber) {}
+
+// Get the profile encoded as a JSON string.
+static inline mozilla::UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0) {
+  return nullptr;
+}
+
+// Get the profile encoded as a JSON object.
+static inline JSObject* profiler_get_profile_jsobject(JSContext* aCx,
+                                                      double aSinceTime = 0) {
+  return nullptr;
+}
+
+#ifndef SPS_STANDALONE
+// Get the profile encoded as a JSON object.
+static inline void profiler_get_profile_jsobject_async(double aSinceTime = 0,
+                                                       mozilla::dom::Promise* = 0) {}
+static inline void profiler_get_start_params(int* aEntrySize,
+                                             double* aInterval,
+                                             mozilla::Vector<const char*>* aFilters,
+                                             mozilla::Vector<const char*>* aFeatures) {}
+#endif
+
+// Get the profile and write it into a file
+static inline void profiler_save_profile_to_file(char* aFilename) { }
+
+// Get the features supported by the profiler that are accepted by profiler_init.
+// Returns a null terminated char* array.
+static inline char** profiler_get_features() { return nullptr; }
+
+// Get information about the current buffer status.
+// Retursn (using outparams) the current write position in the buffer,
+// the total size of the buffer, and the generation of the buffer.
+// This information may be useful to a user-interface displaying the
+// current status of the profiler, allowing the user to get a sense
+// for how fast the buffer is being written to, and how much
+// data is visible.
+static inline void profiler_get_buffer_info(uint32_t *aCurrentPosition,
+                                            uint32_t *aTotalSize,
+                                            uint32_t *aGeneration)
+{
+  *aCurrentPosition = 0;
+  *aTotalSize = 0;
+  *aGeneration = 0;
+}
+
+// Discard the profile, throw away the profile and notify 'profiler-locked'.
+// This function is to be used when entering private browsing to prevent
+// the profiler from collecting sensitive data.
+static inline void profiler_lock() {}
+
+// Re-enable the profiler and notify 'profiler-unlocked'.
+static inline void profiler_unlock() {}
+
+static inline void profiler_register_thread(const char* name, void* guessStackTop) {}
+static inline void profiler_unregister_thread() {}
+
+// These functions tell the profiler that a thread went to sleep so that we can avoid
+// sampling it while it's sleeping. Calling profiler_sleep_start() twice without
+// profiler_sleep_end() is an error.
+static inline void profiler_sleep_start() {}
+static inline void profiler_sleep_end() {}
+static inline bool profiler_is_sleeping() { return false; }
+
+// Call by the JSRuntime's operation callback. This is used to enable
+// profiling on auxilerary threads.
+static inline void profiler_js_operation_callback() {}
+
+static inline double profiler_time() { return 0; }
+static inline double profiler_time(const mozilla::TimeStamp& aTime) { return 0; }
+
+static inline bool profiler_in_privacy_mode() { return false; }
+
+static inline void profiler_log(const char *str) {}
+static inline void profiler_log(const char *fmt, va_list args) {}
+
+#else
+
+#include "GeckoProfilerImpl.h"
+
+#endif
+
+class MOZ_RAII GeckoProfilerInitRAII {
+public:
+  explicit GeckoProfilerInitRAII(void* stackTop) {
+    profiler_init(stackTop);
+  }
+  ~GeckoProfilerInitRAII() {
+    profiler_shutdown();
+  }
+};
+
+class MOZ_RAII GeckoProfilerSleepRAII {
+public:
+  GeckoProfilerSleepRAII() {
+    profiler_sleep_start();
+  }
+  ~GeckoProfilerSleepRAII() {
+    profiler_sleep_end();
+  }
+};
+
+/**
+ * Temporarily wake up the profiler while servicing events such as
+ * Asynchronous Procedure Calls (APCs).
+ */
+class MOZ_RAII GeckoProfilerWakeRAII {
+public:
+  GeckoProfilerWakeRAII()
+    : mIssuedWake(profiler_is_sleeping())
+  {
+    if (mIssuedWake) {
+      profiler_sleep_end();
+    }
+  }
+  ~GeckoProfilerWakeRAII() {
+    if (mIssuedWake) {
+      MOZ_ASSERT(!profiler_is_sleeping());
+      profiler_sleep_start();
+    }
+  }
+private:
+  bool mIssuedWake;
+};
+
+#endif // ifndef SAMPLER_H
diff --git a/tools/profiler/public/GeckoProfilerFunc.h b/tools/profiler/public/GeckoProfilerFunc.h
new file mode 100644
index 000000000..e0d27f593
--- /dev/null
+++ b/tools/profiler/public/GeckoProfilerFunc.h
@@ -0,0 +1,125 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PROFILER_FUNCS_H
+#define PROFILER_FUNCS_H
+
+#ifndef SPS_STANDALONE
+#include "js/TypeDecls.h"
+#endif
+#include "js/ProfilingStack.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Vector.h"
+#include <stdint.h>
+
+class nsISupports;
+
+namespace mozilla {
+class TimeStamp;
+
+namespace dom {
+class Promise;
+} // namespace dom
+
+} // namespace mozilla
+
+class ProfilerBacktrace;
+class ProfilerMarkerPayload;
+
+// Returns a handle to pass on exit. This can check that we are popping the
+// correct callstack.
+inline void* mozilla_sampler_call_enter(const char *aInfo, js::ProfileEntry::Category aCategory,
+                                        void *aFrameAddress = nullptr, bool aCopy = false,
+                                        uint32_t line = 0);
+
+inline void  mozilla_sampler_call_exit(void* handle);
+
+void  mozilla_sampler_add_marker(const char *aInfo,
+                                 ProfilerMarkerPayload *aPayload = nullptr);
+
+void mozilla_sampler_start(int aEntries, double aInterval,
+                           const char** aFeatures, uint32_t aFeatureCount,
+                           const char** aThreadNameFilters, uint32_t aFilterCount);
+
+void mozilla_sampler_stop();
+
+bool mozilla_sampler_is_paused();
+void mozilla_sampler_pause();
+void mozilla_sampler_resume();
+
+ProfilerBacktrace* mozilla_sampler_get_backtrace();
+void mozilla_sampler_free_backtrace(ProfilerBacktrace* aBacktrace);
+void mozilla_sampler_get_backtrace_noalloc(char *output, size_t outputSize);
+
+bool mozilla_sampler_is_active();
+
+bool mozilla_sampler_feature_active(const char* aName);
+
+void mozilla_sampler_responsiveness(const mozilla::TimeStamp& time);
+
+void mozilla_sampler_frame_number(int frameNumber);
+
+const double* mozilla_sampler_get_responsiveness();
+
+void mozilla_sampler_save();
+
+mozilla::UniquePtr<char[]> mozilla_sampler_get_profile(double aSinceTime);
+
+#ifndef SPS_STANDALONE
+JSObject *mozilla_sampler_get_profile_data(JSContext* aCx, double aSinceTime);
+void mozilla_sampler_get_profile_data_async(double aSinceTime,
+                                            mozilla::dom::Promise* aPromise);
+void mozilla_sampler_get_profiler_start_params(int* aEntrySize,
+                                               double* aInterval,
+                                               mozilla::Vector<const char*>* aFilters,
+                                               mozilla::Vector<const char*>* aFeatures);
+void mozilla_sampler_get_gatherer(nsISupports** aRetVal);
+#endif
+
+// Make this function easily callable from a debugger in a build without
+// debugging information (work around http://llvm.org/bugs/show_bug.cgi?id=22211)
+extern "C" {
+  void mozilla_sampler_save_profile_to_file(const char* aFilename);
+}
+
+const char** mozilla_sampler_get_features();
+
+void mozilla_sampler_get_buffer_info(uint32_t *aCurrentPosition, uint32_t *aTotalSize,
+                                     uint32_t *aGeneration);
+
+void mozilla_sampler_init(void* stackTop);
+
+void mozilla_sampler_shutdown();
+
+// Lock the profiler. When locked the profiler is (1) stopped,
+// (2) profile data is cleared, (3) profiler-locked is fired.
+// This is used to lock down the profiler during private browsing
+void mozilla_sampler_lock();
+
+// Unlock the profiler, leaving it stopped and fires profiler-unlocked.
+void mozilla_sampler_unlock();
+
+// Register/unregister threads with the profiler
+bool mozilla_sampler_register_thread(const char* name, void* stackTop);
+void mozilla_sampler_unregister_thread();
+
+void mozilla_sampler_sleep_start();
+void mozilla_sampler_sleep_end();
+bool mozilla_sampler_is_sleeping();
+
+double mozilla_sampler_time();
+double mozilla_sampler_time(const mozilla::TimeStamp& aTime);
+
+void mozilla_sampler_tracing(const char* aCategory, const char* aInfo,
+                             TracingMetadata aMetaData);
+
+void mozilla_sampler_tracing(const char* aCategory, const char* aInfo,
+                             ProfilerBacktrace* aCause,
+                             TracingMetadata aMetaData);
+
+void mozilla_sampler_log(const char *fmt, va_list args);
+
+#endif
+
diff --git a/tools/profiler/public/GeckoProfilerImpl.h b/tools/profiler/public/GeckoProfilerImpl.h
new file mode 100644
index 000000000..a32096b94
--- /dev/null
+++ b/tools/profiler/public/GeckoProfilerImpl.h
@@ -0,0 +1,522 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+// IWYU pragma: private, include "GeckoProfiler.h"
+
+#ifndef TOOLS_SPS_SAMPLER_H_
+#define TOOLS_SPS_SAMPLER_H_
+
+#include <stdlib.h>
+#include <signal.h>
+#include <stdarg.h>
+#include "mozilla/Assertions.h"
+#include "mozilla/GuardObjects.h"
+#include "mozilla/Sprintf.h"
+#include "mozilla/ThreadLocal.h"
+#include "mozilla/UniquePtr.h"
+#ifndef SPS_STANDALONE
+#include "nscore.h"
+#include "nsISupports.h"
+#endif
+#include "GeckoProfilerFunc.h"
+#include "PseudoStack.h"
+#include "ProfilerBacktrace.h"
+
+// Make sure that we can use std::min here without the Windows headers messing with us.
+#ifdef min
+#undef min
+#endif
+
+class GeckoSampler;
+
+namespace mozilla {
+class TimeStamp;
+} // namespace mozilla
+
+extern MOZ_THREAD_LOCAL(PseudoStack *) tlsPseudoStack;
+extern MOZ_THREAD_LOCAL(GeckoSampler *) tlsTicker;
+extern MOZ_THREAD_LOCAL(void *) tlsStackTop;
+extern bool stack_key_initialized;
+
+#ifndef SAMPLE_FUNCTION_NAME
+# ifdef __GNUC__
+#  define SAMPLE_FUNCTION_NAME __FUNCTION__
+# elif defined(_MSC_VER)
+#  define SAMPLE_FUNCTION_NAME __FUNCTION__
+# else
+#  define SAMPLE_FUNCTION_NAME __func__  // defined in C99, supported in various C++ compilers. Just raw function name.
+# endif
+#endif
+
+static inline
+void profiler_init(void* stackTop)
+{
+  mozilla_sampler_init(stackTop);
+}
+
+static inline
+void profiler_shutdown()
+{
+  mozilla_sampler_shutdown();
+}
+
+static inline
+void profiler_start(int aProfileEntries, double aInterval,
+                    const char** aFeatures, uint32_t aFeatureCount,
+                    const char** aThreadNameFilters, uint32_t aFilterCount)
+{
+  mozilla_sampler_start(aProfileEntries, aInterval, aFeatures, aFeatureCount, aThreadNameFilters, aFilterCount);
+}
+
+static inline
+void profiler_stop()
+{
+  mozilla_sampler_stop();
+}
+
+static inline
+bool profiler_is_paused()
+{
+  return mozilla_sampler_is_paused();
+}
+
+static inline
+void profiler_pause()
+{
+  mozilla_sampler_pause();
+}
+
+static inline
+void profiler_resume()
+{
+  mozilla_sampler_resume();
+}
+
+static inline
+ProfilerBacktrace* profiler_get_backtrace()
+{
+  return mozilla_sampler_get_backtrace();
+}
+
+static inline
+void profiler_free_backtrace(ProfilerBacktrace* aBacktrace)
+{
+  mozilla_sampler_free_backtrace(aBacktrace);
+}
+
+static inline
+void profiler_get_backtrace_noalloc(char *output, size_t outputSize)
+{
+  return mozilla_sampler_get_backtrace_noalloc(output, outputSize);
+}
+
+static inline
+bool profiler_is_active()
+{
+  return mozilla_sampler_is_active();
+}
+
+static inline
+bool profiler_feature_active(const char* aName)
+{
+  return mozilla_sampler_feature_active(aName);
+}
+
+static inline
+void profiler_responsiveness(const mozilla::TimeStamp& aTime)
+{
+  mozilla_sampler_responsiveness(aTime);
+}
+
+static inline
+void profiler_set_frame_number(int frameNumber)
+{
+  return mozilla_sampler_frame_number(frameNumber);
+}
+
+static inline
+mozilla::UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0)
+{
+  return mozilla_sampler_get_profile(aSinceTime);
+}
+
+#ifndef SPS_STANDALONE
+static inline
+JSObject* profiler_get_profile_jsobject(JSContext* aCx, double aSinceTime = 0)
+{
+  return mozilla_sampler_get_profile_data(aCx, aSinceTime);
+}
+
+static inline
+void profiler_get_profile_jsobject_async(double aSinceTime = 0,
+                                         mozilla::dom::Promise* aPromise = 0)
+{
+  mozilla_sampler_get_profile_data_async(aSinceTime, aPromise);
+}
+
+static inline
+void profiler_get_start_params(int* aEntrySize,
+                               double* aInterval,
+                               mozilla::Vector<const char*>* aFilters,
+                               mozilla::Vector<const char*>* aFeatures)
+{
+  mozilla_sampler_get_profiler_start_params(aEntrySize, aInterval, aFilters, aFeatures);
+}
+
+static inline
+void profiler_get_gatherer(nsISupports** aRetVal)
+{
+  mozilla_sampler_get_gatherer(aRetVal);
+}
+
+#endif
+
+static inline
+void profiler_save_profile_to_file(const char* aFilename)
+{
+  return mozilla_sampler_save_profile_to_file(aFilename);
+}
+
+static inline
+const char** profiler_get_features()
+{
+  return mozilla_sampler_get_features();
+}
+
+static inline
+void profiler_get_buffer_info(uint32_t *aCurrentPosition, uint32_t *aTotalSize,
+                              uint32_t *aGeneration)
+{
+  return mozilla_sampler_get_buffer_info(aCurrentPosition, aTotalSize, aGeneration);
+}
+
+static inline
+void profiler_lock()
+{
+  return mozilla_sampler_lock();
+}
+
+static inline
+void profiler_unlock()
+{
+  return mozilla_sampler_unlock();
+}
+
+static inline
+void profiler_register_thread(const char* name, void* guessStackTop)
+{
+  mozilla_sampler_register_thread(name, guessStackTop);
+}
+
+static inline
+void profiler_unregister_thread()
+{
+  mozilla_sampler_unregister_thread();
+}
+
+static inline
+void profiler_sleep_start()
+{
+  mozilla_sampler_sleep_start();
+}
+
+static inline
+void profiler_sleep_end()
+{
+  mozilla_sampler_sleep_end();
+}
+
+static inline
+bool profiler_is_sleeping()
+{
+  return mozilla_sampler_is_sleeping();
+}
+
+#ifndef SPS_STANDALONE
+static inline
+void profiler_js_operation_callback()
+{
+  PseudoStack *stack = tlsPseudoStack.get();
+  if (!stack) {
+    return;
+  }
+
+  stack->jsOperationCallback();
+}
+#endif
+
+static inline
+double profiler_time()
+{
+  return mozilla_sampler_time();
+}
+
+static inline
+double profiler_time(const mozilla::TimeStamp& aTime)
+{
+  return mozilla_sampler_time(aTime);
+}
+
+static inline
+bool profiler_in_privacy_mode()
+{
+  PseudoStack *stack = tlsPseudoStack.get();
+  if (!stack) {
+    return false;
+  }
+  return stack->mPrivacyMode;
+}
+
+static inline void profiler_tracing(const char* aCategory, const char* aInfo,
+                                    ProfilerBacktrace* aCause,
+                                    TracingMetadata aMetaData = TRACING_DEFAULT)
+{
+  // Don't insert a marker if we're not profiling to avoid
+  // the heap copy (malloc).
+  if (!stack_key_initialized || !profiler_is_active()) {
+    delete aCause;
+    return;
+  }
+
+  mozilla_sampler_tracing(aCategory, aInfo, aCause, aMetaData);
+}
+
+static inline void profiler_tracing(const char* aCategory, const char* aInfo,
+                                    TracingMetadata aMetaData = TRACING_DEFAULT)
+{
+  if (!stack_key_initialized)
+    return;
+
+  // Don't insert a marker if we're not profiling to avoid
+  // the heap copy (malloc).
+  if (!profiler_is_active()) {
+    return;
+  }
+
+  mozilla_sampler_tracing(aCategory, aInfo, aMetaData);
+}
+
+#define SAMPLER_APPEND_LINE_NUMBER_PASTE(id, line) id ## line
+#define SAMPLER_APPEND_LINE_NUMBER_EXPAND(id, line) SAMPLER_APPEND_LINE_NUMBER_PASTE(id, line)
+#define SAMPLER_APPEND_LINE_NUMBER(id) SAMPLER_APPEND_LINE_NUMBER_EXPAND(id, __LINE__)
+
+// Uncomment this to turn on systrace or build with
+// ac_add_options --enable-systace
+//#define MOZ_USE_SYSTRACE
+#ifdef MOZ_USE_SYSTRACE
+#ifndef ATRACE_TAG
+# define ATRACE_TAG ATRACE_TAG_ALWAYS
+#endif
+// We need HAVE_ANDROID_OS to be defined for Trace.h.
+// If its not set we will set it temporary and remove it.
+# ifndef HAVE_ANDROID_OS
+#   define HAVE_ANDROID_OS
+#   define REMOVE_HAVE_ANDROID_OS
+# endif
+// Android source code will include <cutils/trace.h> before this. There is no
+// HAVE_ANDROID_OS defined in Firefox OS build at that time. Enabled it globally
+// will cause other build break. So atrace_begin and atrace_end are not defined.
+// It will cause a build-break when we include <utils/Trace.h>. Use undef
+// _LIBS_CUTILS_TRACE_H will force <cutils/trace.h> to define atrace_begin and
+// atrace_end with defined HAVE_ANDROID_OS again. Then there is no build-break.
+# undef _LIBS_CUTILS_TRACE_H
+# include <utils/Trace.h>
+# define MOZ_PLATFORM_TRACING(name) android::ScopedTrace SAMPLER_APPEND_LINE_NUMBER(scopedTrace)(ATRACE_TAG, name);
+# ifdef REMOVE_HAVE_ANDROID_OS
+#  undef HAVE_ANDROID_OS
+#  undef REMOVE_HAVE_ANDROID_OS
+# endif
+#else
+# define MOZ_PLATFORM_TRACING(name)
+#endif
+
+// we want the class and function name but can't easily get that using preprocessor macros
+// __func__ doesn't have the class name and __PRETTY_FUNCTION__ has the parameters
+
+#define PROFILER_LABEL(name_space, info, category) MOZ_PLATFORM_TRACING(name_space "::" info) mozilla::SamplerStackFrameRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, category, __LINE__)
+#define PROFILER_LABEL_FUNC(category) MOZ_PLATFORM_TRACING(SAMPLE_FUNCTION_NAME) mozilla::SamplerStackFrameRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(SAMPLE_FUNCTION_NAME, category, __LINE__)
+#define PROFILER_LABEL_PRINTF(name_space, info, category, ...) MOZ_PLATFORM_TRACING(name_space "::" info) mozilla::SamplerStackFramePrintfRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, category, __LINE__, __VA_ARGS__)
+
+#define PROFILER_MARKER(info) mozilla_sampler_add_marker(info)
+#define PROFILER_MARKER_PAYLOAD(info, payload) mozilla_sampler_add_marker(info, payload)
+#define PROFILER_MAIN_THREAD_MARKER(info)  MOZ_ASSERT(NS_IsMainThread(), "This can only be called on the main thread"); mozilla_sampler_add_marker(info)
+
+#define PROFILER_MAIN_THREAD_LABEL(name_space, info, category)  MOZ_ASSERT(NS_IsMainThread(), "This can only be called on the main thread"); mozilla::SamplerStackFrameRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, category, __LINE__)
+#define PROFILER_MAIN_THREAD_LABEL_PRINTF(name_space, info, category, ...)  MOZ_ASSERT(NS_IsMainThread(), "This can only be called on the main thread"); mozilla::SamplerStackFramePrintfRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, category, __LINE__, __VA_ARGS__)
+
+
+/* FIXME/bug 789667: memory constraints wouldn't much of a problem for
+ * this small a sample buffer size, except that serializing the
+ * profile data is extremely, unnecessarily memory intensive. */
+#ifdef MOZ_WIDGET_GONK
+# define PLATFORM_LIKELY_MEMORY_CONSTRAINED
+#endif
+
+#if !defined(PLATFORM_LIKELY_MEMORY_CONSTRAINED) && !defined(ARCH_ARMV6)
+# define PROFILE_DEFAULT_ENTRY 1000000
+#else
+# define PROFILE_DEFAULT_ENTRY 100000
+#endif
+
+// In the case of profiler_get_backtrace we know that we only need enough space
+// for a single backtrace.
+#define GET_BACKTRACE_DEFAULT_ENTRY 1000
+
+#if defined(PLATFORM_LIKELY_MEMORY_CONSTRAINED)
+/* A 1ms sampling interval has been shown to be a large perf hit
+ * (10fps) on memory-contrained (low-end) platforms, and additionally
+ * to yield different results from the profiler.  Where this is the
+ * important case, b2g, there are also many gecko processes which
+ * magnify these effects. */
+# define PROFILE_DEFAULT_INTERVAL 10
+#elif defined(ANDROID)
+// We use a lower frequency on Android, in order to make things work
+// more smoothly on phones.  This value can be adjusted later with
+// some libunwind optimizations.
+// In one sample measurement on Galaxy Nexus, out of about 700 backtraces,
+// 60 of them took more than 25ms, and the average and standard deviation
+// were 6.17ms and 9.71ms respectively.
+
+// For now since we don't support stackwalking let's use 1ms since it's fast
+// enough.
+#define PROFILE_DEFAULT_INTERVAL 1
+#else
+#define PROFILE_DEFAULT_INTERVAL 1
+#endif
+#define PROFILE_DEFAULT_FEATURES NULL
+#define PROFILE_DEFAULT_FEATURE_COUNT 0
+
+namespace mozilla {
+
+class MOZ_RAII GeckoProfilerTracingRAII {
+public:
+  GeckoProfilerTracingRAII(const char* aCategory, const char* aInfo,
+                           mozilla::UniquePtr<ProfilerBacktrace> aBacktrace
+                           MOZ_GUARD_OBJECT_NOTIFIER_PARAM)
+    : mCategory(aCategory)
+    , mInfo(aInfo)
+  {
+    MOZ_GUARD_OBJECT_NOTIFIER_INIT;
+    profiler_tracing(mCategory, mInfo, aBacktrace.release(), TRACING_INTERVAL_START);
+  }
+
+  ~GeckoProfilerTracingRAII() {
+    profiler_tracing(mCategory, mInfo, TRACING_INTERVAL_END);
+  }
+
+protected:
+  MOZ_DECL_USE_GUARD_OBJECT_NOTIFIER
+  const char* mCategory;
+  const char* mInfo;
+};
+
+class MOZ_RAII SamplerStackFrameRAII {
+public:
+  // we only copy the strings at save time, so to take multiple parameters we'd need to copy them then.
+  SamplerStackFrameRAII(const char *aInfo,
+    js::ProfileEntry::Category aCategory, uint32_t line
+    MOZ_GUARD_OBJECT_NOTIFIER_PARAM)
+  {
+    MOZ_GUARD_OBJECT_NOTIFIER_INIT;
+    mHandle = mozilla_sampler_call_enter(aInfo, aCategory, this, false, line);
+  }
+  ~SamplerStackFrameRAII() {
+    mozilla_sampler_call_exit(mHandle);
+  }
+private:
+  MOZ_DECL_USE_GUARD_OBJECT_NOTIFIER
+  void* mHandle;
+};
+
+static const int SAMPLER_MAX_STRING = 128;
+class MOZ_RAII SamplerStackFramePrintfRAII {
+public:
+  // we only copy the strings at save time, so to take multiple parameters we'd need to copy them then.
+  SamplerStackFramePrintfRAII(const char *aInfo,
+    js::ProfileEntry::Category aCategory, uint32_t line, const char *aFormat, ...)
+    : mHandle(nullptr)
+  {
+    if (profiler_is_active() && !profiler_in_privacy_mode()) {
+      va_list args;
+      va_start(args, aFormat);
+      char buff[SAMPLER_MAX_STRING];
+
+      // We have to use seperate printf's because we're using
+      // the vargs.
+      VsprintfLiteral(buff, aFormat, args);
+      SprintfLiteral(mDest, "%s %s", aInfo, buff);
+
+      mHandle = mozilla_sampler_call_enter(mDest, aCategory, this, true, line);
+      va_end(args);
+    } else {
+      mHandle = mozilla_sampler_call_enter(aInfo, aCategory, this, false, line);
+    }
+  }
+  ~SamplerStackFramePrintfRAII() {
+    mozilla_sampler_call_exit(mHandle);
+  }
+private:
+  char mDest[SAMPLER_MAX_STRING];
+  void* mHandle;
+};
+
+} // namespace mozilla
+
+inline PseudoStack* mozilla_get_pseudo_stack(void)
+{
+  if (!stack_key_initialized)
+    return nullptr;
+  return tlsPseudoStack.get();
+}
+
+inline void* mozilla_sampler_call_enter(const char *aInfo,
+  js::ProfileEntry::Category aCategory, void *aFrameAddress, bool aCopy, uint32_t line)
+{
+  // check if we've been initialized to avoid calling pthread_getspecific
+  // with a null tlsStack which will return undefined results.
+  if (!stack_key_initialized)
+    return nullptr;
+
+  PseudoStack *stack = tlsPseudoStack.get();
+  // we can't infer whether 'stack' has been initialized
+  // based on the value of stack_key_intiailized because
+  // 'stack' is only intialized when a thread is being
+  // profiled.
+  if (!stack) {
+    return stack;
+  }
+  stack->push(aInfo, aCategory, aFrameAddress, aCopy, line);
+
+  // The handle is meant to support future changes
+  // but for now it is simply use to save a call to
+  // pthread_getspecific on exit. It also supports the
+  // case where the sampler is initialized between
+  // enter and exit.
+  return stack;
+}
+
+inline void mozilla_sampler_call_exit(void *aHandle)
+{
+  if (!aHandle)
+    return;
+
+  PseudoStack *stack = (PseudoStack*)aHandle;
+  stack->popAndMaybeDelete();
+}
+
+void mozilla_sampler_add_marker(const char *aMarker, ProfilerMarkerPayload *aPayload);
+
+static inline
+void profiler_log(const char *str)
+{
+  profiler_tracing("log", str, TRACING_EVENT);
+}
+
+static inline
+void profiler_log(const char *fmt, va_list args)
+{
+  mozilla_sampler_log(fmt, args);
+}
+
+#endif /* ndef TOOLS_SPS_SAMPLER_H_ */
diff --git a/tools/profiler/public/ProfileGatherer.h b/tools/profiler/public/ProfileGatherer.h
new file mode 100644
index 000000000..4e39a4f5c
--- /dev/null
+++ b/tools/profiler/public/ProfileGatherer.h
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZ_PROFILE_GATHERER_H
+#define MOZ_PROFILE_GATHERER_H
+
+#include "mozilla/dom/Promise.h"
+
+class GeckoSampler;
+
+namespace mozilla {
+
+class ProfileGatherer final : public nsIObserver
+{
+public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSIOBSERVER
+
+  explicit ProfileGatherer(GeckoSampler* aTicker);
+  void WillGatherOOPProfile();
+  void GatheredOOPProfile();
+  void Start(double aSinceTime, mozilla::dom::Promise* aPromise);
+  void Cancel();
+  void OOPExitProfile(const nsCString& aProfile);
+
+private:
+  ~ProfileGatherer() {};
+  void Finish();
+  void Reset();
+
+  nsTArray<nsCString> mExitProfiles;
+  RefPtr<mozilla::dom::Promise> mPromise;
+  GeckoSampler* mTicker;
+  double mSinceTime;
+  uint32_t mPendingProfiles;
+  bool mGathering;
+};
+
+} // namespace mozilla
+
+#endif
diff --git a/tools/profiler/public/ProfilerBacktrace.h b/tools/profiler/public/ProfilerBacktrace.h
new file mode 100644
index 000000000..bcaab3563
--- /dev/null
+++ b/tools/profiler/public/ProfilerBacktrace.h
@@ -0,0 +1,36 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __PROFILER_BACKTRACE_H
+#define __PROFILER_BACKTRACE_H
+
+class SyncProfile;
+class SpliceableJSONWriter;
+class UniqueStacks;
+
+class ProfilerBacktrace
+{
+public:
+  explicit ProfilerBacktrace(SyncProfile* aProfile);
+  ~ProfilerBacktrace();
+
+  // ProfilerBacktraces' stacks are deduplicated in the context of the
+  // profile that contains the backtrace as a marker payload.
+  //
+  // That is, markers that contain backtraces should not need their own stack,
+  // frame, and string tables. They should instead reuse their parent
+  // profile's tables.
+  void StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks);
+
+private:
+  ProfilerBacktrace(const ProfilerBacktrace&);
+  ProfilerBacktrace& operator=(const ProfilerBacktrace&);
+
+  SyncProfile*  mProfile;
+};
+
+#endif // __PROFILER_BACKTRACE_H
+
diff --git a/tools/profiler/public/ProfilerMarkers.h b/tools/profiler/public/ProfilerMarkers.h
new file mode 100644
index 000000000..29711f210
--- /dev/null
+++ b/tools/profiler/public/ProfilerMarkers.h
@@ -0,0 +1,193 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PROFILER_MARKERS_H
+#define PROFILER_MARKERS_H
+
+#include "mozilla/TimeStamp.h"
+#include "mozilla/Attributes.h"
+
+namespace mozilla {
+namespace layers {
+class Layer;
+} // namespace layers
+} // namespace mozilla
+
+class SpliceableJSONWriter;
+class UniqueStacks;
+
+/**
+ * This is an abstract object that can be implied to supply
+ * data to be attached with a profiler marker. Most data inserted
+ * into a profile is stored in a circular buffer. This buffer
+ * typically wraps around and overwrites most entries. Because
+ * of this, this structure is designed to defer the work of
+ * prepare the payload only when 'preparePayload' is called.
+ *
+ * Note when implementing that this object is typically constructed
+ * on a particular thread but 'preparePayload' and the destructor
+ * is called from the main thread.
+ */
+class ProfilerMarkerPayload
+{
+public:
+  /**
+   * ProfilerMarkerPayload takes ownership of aStack
+   */
+  explicit ProfilerMarkerPayload(ProfilerBacktrace* aStack = nullptr);
+  ProfilerMarkerPayload(const mozilla::TimeStamp& aStartTime,
+                        const mozilla::TimeStamp& aEndTime,
+                        ProfilerBacktrace* aStack = nullptr);
+
+  /**
+   * Called from the main thread
+   */
+  virtual ~ProfilerMarkerPayload();
+
+  /**
+   * Called from the main thread
+   */
+  virtual void StreamPayload(SpliceableJSONWriter& aWriter,
+                             UniqueStacks& aUniqueStacks) = 0;
+
+  mozilla::TimeStamp GetStartTime() const { return mStartTime; }
+
+protected:
+  /**
+   * Called from the main thread
+   */
+  void streamCommonProps(const char* aMarkerType, SpliceableJSONWriter& aWriter,
+                         UniqueStacks& aUniqueStacks);
+
+  void SetStack(ProfilerBacktrace* aStack) { mStack = aStack; }
+
+private:
+  mozilla::TimeStamp  mStartTime;
+  mozilla::TimeStamp  mEndTime;
+  ProfilerBacktrace*  mStack;
+};
+
+class ProfilerMarkerTracing : public ProfilerMarkerPayload
+{
+public:
+  ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData);
+  ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData, ProfilerBacktrace* aCause);
+
+  const char *GetCategory() const { return mCategory; }
+  TracingMetadata GetMetaData() const { return mMetaData; }
+
+  virtual void StreamPayload(SpliceableJSONWriter& aWriter,
+                             UniqueStacks& aUniqueStacks) override;
+
+private:
+  const char *mCategory;
+  TracingMetadata mMetaData;
+};
+
+
+#ifndef SPS_STANDALONE
+#include "gfxASurface.h"
+class ProfilerMarkerImagePayload : public ProfilerMarkerPayload
+{
+public:
+  explicit ProfilerMarkerImagePayload(gfxASurface *aImg);
+
+  virtual void StreamPayload(SpliceableJSONWriter& aWriter,
+                             UniqueStacks& aUniqueStacks) override;
+
+private:
+  RefPtr<gfxASurface> mImg;
+};
+
+class IOMarkerPayload : public ProfilerMarkerPayload
+{
+public:
+  IOMarkerPayload(const char* aSource, const char* aFilename, const mozilla::TimeStamp& aStartTime,
+                  const mozilla::TimeStamp& aEndTime,
+                  ProfilerBacktrace* aStack);
+  ~IOMarkerPayload();
+
+  virtual void StreamPayload(SpliceableJSONWriter& aWriter,
+                             UniqueStacks& aUniqueStacks) override;
+
+private:
+  const char* mSource;
+  char* mFilename;
+};
+
+/**
+ * Contains the translation applied to a 2d layer so we can
+ * track the layer position at each frame.
+ */
+class LayerTranslationPayload : public ProfilerMarkerPayload
+{
+public:
+  LayerTranslationPayload(mozilla::layers::Layer* aLayer,
+                          mozilla::gfx::Point aPoint);
+
+  virtual void StreamPayload(SpliceableJSONWriter& aWriter,
+                             UniqueStacks& aUniqueStacks) override;
+
+private:
+  mozilla::layers::Layer* mLayer;
+  mozilla::gfx::Point mPoint;
+};
+
+#include "Units.h"    // For ScreenIntPoint
+
+/**
+ * Tracks when touch events are processed by gecko, not when
+ * the touch actually occured in gonk/android.
+ */
+class TouchDataPayload : public ProfilerMarkerPayload
+{
+public:
+  explicit TouchDataPayload(const mozilla::ScreenIntPoint& aPoint);
+  virtual ~TouchDataPayload() {}
+
+  virtual void StreamPayload(SpliceableJSONWriter& aWriter,
+                             UniqueStacks& aUniqueStacks) override;
+
+private:
+  mozilla::ScreenIntPoint mPoint;
+};
+
+/**
+ * Tracks when a vsync occurs according to the HardwareComposer.
+ */
+class VsyncPayload : public ProfilerMarkerPayload
+{
+public:
+  explicit VsyncPayload(mozilla::TimeStamp aVsyncTimestamp);
+  virtual ~VsyncPayload() {}
+
+  virtual void StreamPayload(SpliceableJSONWriter& aWriter,
+                             UniqueStacks& aUniqueStacks) override;
+
+private:
+  mozilla::TimeStamp mVsyncTimestamp;
+};
+
+class GPUMarkerPayload : public ProfilerMarkerPayload
+{
+public:
+  GPUMarkerPayload(const mozilla::TimeStamp& aCpuTimeStart,
+                   const mozilla::TimeStamp& aCpuTimeEnd,
+                   uint64_t aGpuTimeStart,
+                   uint64_t aGpuTimeEnd);
+  ~GPUMarkerPayload() {}
+
+  virtual void StreamPayload(SpliceableJSONWriter& aWriter,
+                             UniqueStacks& aUniqueStacks) override;
+
+private:
+  mozilla::TimeStamp mCpuTimeStart;
+  mozilla::TimeStamp mCpuTimeEnd;
+  uint64_t mGpuTimeStart;
+  uint64_t mGpuTimeEnd;
+};
+#endif
+
+#endif // PROFILER_MARKERS_H
diff --git a/tools/profiler/public/PseudoStack.h b/tools/profiler/public/PseudoStack.h
new file mode 100644
index 000000000..f9e3836ea
--- /dev/null
+++ b/tools/profiler/public/PseudoStack.h
@@ -0,0 +1,469 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PROFILER_PSEUDO_STACK_H_
+#define PROFILER_PSEUDO_STACK_H_
+
+#include "mozilla/ArrayUtils.h"
+#include <stdint.h>
+#include "js/ProfilingStack.h"
+#include <stdlib.h>
+#include "mozilla/Atomics.h"
+#ifndef SPS_STANDALONE
+#include "nsISupportsImpl.h"
+#endif
+
+/* we duplicate this code here to avoid header dependencies
+ * which make it more difficult to include in other places */
+#if defined(_M_X64) || defined(__x86_64__)
+#define V8_HOST_ARCH_X64 1
+#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
+#define V8_HOST_ARCH_IA32 1
+#elif defined(__ARMEL__)
+#define V8_HOST_ARCH_ARM 1
+#else
+#warning Please add support for your architecture in chromium_types.h
+#endif
+
+// STORE_SEQUENCER: Because signals can interrupt our profile modification
+//                  we need to make stores are not re-ordered by the compiler
+//                  or hardware to make sure the profile is consistent at
+//                  every point the signal can fire.
+#ifdef V8_HOST_ARCH_ARM
+// TODO Is there something cheaper that will prevent
+//      memory stores from being reordered
+
+typedef void (*LinuxKernelMemoryBarrierFunc)(void);
+LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) =
+    (LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
+
+# define STORE_SEQUENCER() pLinuxKernelMemoryBarrier()
+#elif defined(V8_HOST_ARCH_IA32) || defined(V8_HOST_ARCH_X64)
+# if defined(_MSC_VER)
+#  include <intrin.h>
+#  define STORE_SEQUENCER() _ReadWriteBarrier();
+# elif defined(__INTEL_COMPILER)
+#  define STORE_SEQUENCER() __memory_barrier();
+# elif __GNUC__
+#  define STORE_SEQUENCER() asm volatile("" ::: "memory");
+# else
+#  error "Memory clobber not supported for your compiler."
+# endif
+#else
+# error "Memory clobber not supported for your platform."
+#endif
+
+// We can't include <algorithm> because it causes issues on OS X, so we use
+// our own min function.
+static inline uint32_t sMin(uint32_t l, uint32_t r) {
+  return l < r ? l : r;
+}
+
+// A stack entry exists to allow the JS engine to inform SPS of the current
+// backtrace, but also to instrument particular points in C++ in case stack
+// walking is not available on the platform we are running on.
+//
+// Each entry has a descriptive string, a relevant stack address, and some extra
+// information the JS engine might want to inform SPS of. This class inherits
+// from the JS engine's version of the entry to ensure that the size and layout
+// of the two representations are consistent.
+class StackEntry : public js::ProfileEntry
+{
+};
+
+class ProfilerMarkerPayload;
+template<typename T>
+class ProfilerLinkedList;
+class SpliceableJSONWriter;
+class UniqueStacks;
+
+class ProfilerMarker {
+  friend class ProfilerLinkedList<ProfilerMarker>;
+public:
+  explicit ProfilerMarker(const char* aMarkerName,
+                          ProfilerMarkerPayload* aPayload = nullptr,
+                          double aTime = 0);
+
+  ~ProfilerMarker();
+
+  const char* GetMarkerName() const {
+    return mMarkerName;
+  }
+
+  void StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) const;
+
+  void SetGeneration(uint32_t aGenID);
+
+  bool HasExpired(uint32_t aGenID) const {
+    return mGenID + 2 <= aGenID;
+  }
+
+  double GetTime() const;
+
+private:
+  char* mMarkerName;
+  ProfilerMarkerPayload* mPayload;
+  ProfilerMarker* mNext;
+  double mTime;
+  uint32_t mGenID;
+};
+
+template<typename T>
+class ProfilerLinkedList {
+public:
+  ProfilerLinkedList()
+    : mHead(nullptr)
+    , mTail(nullptr)
+  {}
+
+  void insert(T* elem)
+  {
+    if (!mTail) {
+      mHead = elem;
+      mTail = elem;
+    } else {
+      mTail->mNext = elem;
+      mTail = elem;
+    }
+    elem->mNext = nullptr;
+  }
+
+  T* popHead()
+  {
+    if (!mHead) {
+      MOZ_ASSERT(false);
+      return nullptr;
+    }
+
+    T* head = mHead;
+
+    mHead = head->mNext;
+    if (!mHead) {
+      mTail = nullptr;
+    }
+
+    return head;
+  }
+
+  const T* peek() {
+    return mHead;
+  }
+
+private:
+  T* mHead;
+  T* mTail;
+};
+
+typedef ProfilerLinkedList<ProfilerMarker> ProfilerMarkerLinkedList;
+
+template<typename T>
+class ProfilerSignalSafeLinkedList {
+public:
+  ProfilerSignalSafeLinkedList()
+    : mSignalLock(false)
+  {}
+
+  ~ProfilerSignalSafeLinkedList()
+  {
+    if (mSignalLock) {
+      // Some thread is modifying the list. We should only be released on that
+      // thread.
+      abort();
+    }
+
+    while (mList.peek()) {
+      delete mList.popHead();
+    }
+  }
+
+  // Insert an item into the list.
+  // Must only be called from the owning thread.
+  // Must not be called while the list from accessList() is being accessed.
+  // In the profiler, we ensure that by interrupting the profiled thread
+  // (which is the one that owns this list and calls insert() on it) until
+  // we're done reading the list from the signal handler.
+  void insert(T* aElement) {
+    MOZ_ASSERT(aElement);
+
+    mSignalLock = true;
+    STORE_SEQUENCER();
+
+    mList.insert(aElement);
+
+    STORE_SEQUENCER();
+    mSignalLock = false;
+  }
+
+  // Called within signal, from any thread, possibly while insert() is in the
+  // middle of modifying the list (on the owning thread). Will return null if
+  // that is the case.
+  // Function must be reentrant.
+  ProfilerLinkedList<T>* accessList()
+  {
+    if (mSignalLock) {
+      return nullptr;
+    }
+    return &mList;
+  }
+
+private:
+  ProfilerLinkedList<T> mList;
+
+  // If this is set, then it's not safe to read the list because its contents
+  // are being changed.
+  volatile bool mSignalLock;
+};
+
+// Stub eventMarker function for js-engine event generation.
+void ProfilerJSEventMarker(const char *event);
+
+// the PseudoStack members are read by signal
+// handlers, so the mutation of them needs to be signal-safe.
+struct PseudoStack
+{
+public:
+  // Create a new PseudoStack and acquire a reference to it.
+  static PseudoStack *create()
+  {
+    return new PseudoStack();
+  }
+
+  // This is called on every profiler restart. Put things that should happen at that time here.
+  void reinitializeOnResume() {
+    // This is needed to cause an initial sample to be taken from sleeping threads. Otherwise sleeping
+    // threads would not have any samples to copy forward while sleeping.
+    mSleepId++;
+  }
+
+  void addMarker(const char* aMarkerStr, ProfilerMarkerPayload* aPayload, double aTime)
+  {
+    ProfilerMarker* marker = new ProfilerMarker(aMarkerStr, aPayload, aTime);
+    mPendingMarkers.insert(marker);
+  }
+
+  // called within signal. Function must be reentrant
+  ProfilerMarkerLinkedList* getPendingMarkers()
+  {
+    // The profiled thread is interrupted, so we can access the list safely.
+    // Unless the profiled thread was in the middle of changing the list when
+    // we interrupted it - in that case, accessList() will return null.
+    return mPendingMarkers.accessList();
+  }
+
+  void push(const char *aName, js::ProfileEntry::Category aCategory, uint32_t line)
+  {
+    push(aName, aCategory, nullptr, false, line);
+  }
+
+  void push(const char *aName, js::ProfileEntry::Category aCategory,
+    void *aStackAddress, bool aCopy, uint32_t line)
+  {
+    if (size_t(mStackPointer) >= mozilla::ArrayLength(mStack)) {
+      mStackPointer++;
+      return;
+    }
+
+    // In order to ensure this object is kept alive while it is
+    // active, we acquire a reference at the outermost push.  This is
+    // released by the corresponding pop.
+    if (mStackPointer == 0) {
+      ref();
+    }
+
+    volatile StackEntry &entry = mStack[mStackPointer];
+
+    // Make sure we increment the pointer after the name has
+    // been written such that mStack is always consistent.
+    entry.initCppFrame(aStackAddress, line);
+    entry.setLabel(aName);
+    MOZ_ASSERT(entry.flags() == js::ProfileEntry::IS_CPP_ENTRY);
+    entry.setCategory(aCategory);
+
+    // Track if mLabel needs a copy.
+    if (aCopy)
+      entry.setFlag(js::ProfileEntry::FRAME_LABEL_COPY);
+    else
+      entry.unsetFlag(js::ProfileEntry::FRAME_LABEL_COPY);
+
+    // Prevent the optimizer from re-ordering these instructions
+    STORE_SEQUENCER();
+    mStackPointer++;
+  }
+
+  // Pop the stack.  If the stack is empty and all other references to
+  // this PseudoStack have been dropped, then the PseudoStack is
+  // deleted and "false" is returned.  Otherwise "true" is returned.
+  bool popAndMaybeDelete()
+  {
+    mStackPointer--;
+    if (mStackPointer == 0) {
+      // Release our self-owned reference count.  See 'push'.
+      deref();
+      return false;
+    } else {
+      return true;
+    }
+  }
+  bool isEmpty()
+  {
+    return mStackPointer == 0;
+  }
+  uint32_t stackSize() const
+  {
+    return sMin(mStackPointer, mozilla::sig_safe_t(mozilla::ArrayLength(mStack)));
+  }
+
+  void sampleContext(JSContext* context) {
+#ifndef SPS_STANDALONE
+    if (mContext && !context) {
+      // On JS shut down, flush the current buffer as stringifying JIT samples
+      // requires a live JSContext.
+      flushSamplerOnJSShutdown();
+    }
+
+    mContext = context;
+
+    if (!context) {
+      return;
+    }
+
+    static_assert(sizeof(mStack[0]) == sizeof(js::ProfileEntry),
+                  "mStack must be binary compatible with js::ProfileEntry.");
+    js::SetContextProfilingStack(context,
+                                 (js::ProfileEntry*) mStack,
+                                 (uint32_t*) &mStackPointer,
+                                 (uint32_t) mozilla::ArrayLength(mStack));
+    if (mStartJSSampling)
+      enableJSSampling();
+#endif
+  }
+#ifndef SPS_STANDALONE
+  void enableJSSampling() {
+    if (mContext) {
+      js::EnableContextProfilingStack(mContext, true);
+      js::RegisterContextProfilingEventMarker(mContext, &ProfilerJSEventMarker);
+      mStartJSSampling = false;
+    } else {
+      mStartJSSampling = true;
+    }
+  }
+  void jsOperationCallback() {
+    if (mStartJSSampling)
+      enableJSSampling();
+  }
+  void disableJSSampling() {
+    mStartJSSampling = false;
+    if (mContext)
+      js::EnableContextProfilingStack(mContext, false);
+  }
+#endif
+
+  // Keep a list of active checkpoints
+  StackEntry volatile mStack[1024];
+ private:
+
+  // A PseudoStack can only be created via the "create" method.
+  PseudoStack()
+    : mStackPointer(0)
+    , mSleepId(0)
+    , mSleepIdObserved(0)
+    , mSleeping(false)
+    , mRefCnt(1)
+#ifndef SPS_STANDALONE
+    , mContext(nullptr)
+#endif
+    , mStartJSSampling(false)
+    , mPrivacyMode(false)
+  {
+    MOZ_COUNT_CTOR(PseudoStack);
+  }
+
+  // A PseudoStack can only be deleted via deref.
+  ~PseudoStack() {
+    MOZ_COUNT_DTOR(PseudoStack);
+    if (mStackPointer != 0) {
+      // We're releasing the pseudostack while it's still in use.
+      // The label macros keep a non ref counted reference to the
+      // stack to avoid a TLS. If these are not all cleared we will
+      // get a use-after-free so better to crash now.
+      abort();
+    }
+  }
+
+  // No copying.
+  PseudoStack(const PseudoStack&) = delete;
+  void operator=(const PseudoStack&) = delete;
+
+  void flushSamplerOnJSShutdown();
+
+  // Keep a list of pending markers that must be moved
+  // to the circular buffer
+  ProfilerSignalSafeLinkedList<ProfilerMarker> mPendingMarkers;
+  // This may exceed the length of mStack, so instead use the stackSize() method
+  // to determine the number of valid samples in mStack
+  mozilla::sig_safe_t mStackPointer;
+  // Incremented at every sleep/wake up of the thread
+  int mSleepId;
+  // Previous id observed. If this is not the same as mSleepId, this thread is not sleeping in the same place any more
+  mozilla::Atomic<int> mSleepIdObserved;
+  // Keeps tack of whether the thread is sleeping or not (1 when sleeping 0 when awake)
+  mozilla::Atomic<int> mSleeping;
+  // This class is reference counted because it must be kept alive by
+  // the ThreadInfo, by the reference from tlsPseudoStack, and by the
+  // current thread when callbacks are in progress.
+  mozilla::Atomic<int> mRefCnt;
+
+ public:
+#ifndef SPS_STANDALONE
+  // The context which is being sampled
+  JSContext *mContext;
+#endif
+  // Start JS Profiling when possible
+  bool mStartJSSampling;
+  bool mPrivacyMode;
+
+  enum SleepState {NOT_SLEEPING, SLEEPING_FIRST, SLEEPING_AGAIN};
+
+  // The first time this is called per sleep cycle we return SLEEPING_FIRST
+  // and any other subsequent call within the same sleep cycle we return SLEEPING_AGAIN
+  SleepState observeSleeping() {
+    if (mSleeping != 0) {
+      if (mSleepIdObserved == mSleepId) {
+        return SLEEPING_AGAIN;
+      } else {
+        mSleepIdObserved = mSleepId;
+        return SLEEPING_FIRST;
+      }
+    } else {
+      return NOT_SLEEPING;
+    }
+  }
+
+
+  // Call this whenever the current thread sleeps or wakes up
+  // Calling setSleeping with the same value twice in a row is an error
+  void setSleeping(int sleeping) {
+    MOZ_ASSERT(mSleeping != sleeping);
+    mSleepId++;
+    mSleeping = sleeping;
+  }
+
+  bool isSleeping() {
+    return !!mSleeping;
+  }
+
+  void ref() {
+    ++mRefCnt;
+  }
+
+  void deref() {
+    int newValue = --mRefCnt;
+    if (newValue == 0) {
+      delete this;
+    }
+  }
+};
+
+#endif
diff --git a/tools/profiler/public/shared-libraries.h b/tools/profiler/public/shared-libraries.h
new file mode 100644
index 000000000..b02a1fb08
--- /dev/null
+++ b/tools/profiler/public/shared-libraries.h
@@ -0,0 +1,137 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef SHARED_LIBRARIES_H_
+#define SHARED_LIBRARIES_H_
+
+#ifndef MOZ_ENABLE_PROFILER_SPS
+#error This header does not have a useful implementation on your platform!
+#endif
+
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <stdlib.h>
+#include <stdint.h>
+#ifndef SPS_STANDALONE
+#include <nsID.h>
+#endif
+
+class SharedLibrary {
+public:
+
+  SharedLibrary(uintptr_t aStart,
+                uintptr_t aEnd,
+                uintptr_t aOffset,
+                const std::string& aBreakpadId,
+                const std::string& aName)
+    : mStart(aStart)
+    , mEnd(aEnd)
+    , mOffset(aOffset)
+    , mBreakpadId(aBreakpadId)
+    , mName(aName)
+  {}
+
+  SharedLibrary(const SharedLibrary& aEntry)
+    : mStart(aEntry.mStart)
+    , mEnd(aEntry.mEnd)
+    , mOffset(aEntry.mOffset)
+    , mBreakpadId(aEntry.mBreakpadId)
+    , mName(aEntry.mName)
+  {}
+
+  SharedLibrary& operator=(const SharedLibrary& aEntry)
+  {
+    // Gracefully handle self assignment
+    if (this == &aEntry) return *this;
+
+    mStart = aEntry.mStart;
+    mEnd = aEntry.mEnd;
+    mOffset = aEntry.mOffset;
+    mBreakpadId = aEntry.mBreakpadId;
+    mName = aEntry.mName;
+    return *this;
+  }
+
+  bool operator==(const SharedLibrary& other) const
+  {
+    return (mStart == other.mStart) &&
+           (mEnd == other.mEnd) &&
+           (mOffset == other.mOffset) &&
+           (mName == other.mName) &&
+           (mBreakpadId == other.mBreakpadId);
+  }
+
+  uintptr_t GetStart() const { return mStart; }
+  uintptr_t GetEnd() const { return mEnd; }
+  uintptr_t GetOffset() const { return mOffset; }
+  const std::string &GetBreakpadId() const { return mBreakpadId; }
+  const std::string &GetName() const { return mName; }
+
+private:
+  SharedLibrary() {}
+
+  uintptr_t mStart;
+  uintptr_t mEnd;
+  uintptr_t mOffset;
+  std::string mBreakpadId;
+  std::string mName;
+};
+
+static bool
+CompareAddresses(const SharedLibrary& first, const SharedLibrary& second)
+{
+  return first.GetStart() < second.GetStart();
+}
+
+class SharedLibraryInfo {
+public:
+  static SharedLibraryInfo GetInfoForSelf();
+  SharedLibraryInfo() {}
+
+  void AddSharedLibrary(SharedLibrary entry)
+  {
+    mEntries.push_back(entry);
+  }
+
+  const SharedLibrary& GetEntry(size_t i) const
+  {
+    return mEntries[i];
+  }
+
+  // Removes items in the range [first, last)
+  // i.e. element at the "last" index is not removed
+  void RemoveEntries(size_t first, size_t last)
+  {
+    mEntries.erase(mEntries.begin() + first, mEntries.begin() + last);
+  }
+
+  bool Contains(const SharedLibrary& searchItem) const
+  {
+    return (mEntries.end() !=
+              std::find(mEntries.begin(), mEntries.end(), searchItem));
+  }
+
+  size_t GetSize() const
+  {
+    return mEntries.size();
+  }
+
+  void SortByAddress()
+  {
+    std::sort(mEntries.begin(), mEntries.end(), CompareAddresses);
+  }
+
+  void Clear()
+  {
+    mEntries.clear();
+  }
+
+private:
+  std::vector<SharedLibrary> mEntries;
+};
+
+#endif
diff --git a/tools/profiler/tasktracer/GeckoTaskTracer.cpp b/tools/profiler/tasktracer/GeckoTaskTracer.cpp
new file mode 100644
index 000000000..ada695614
--- /dev/null
+++ b/tools/profiler/tasktracer/GeckoTaskTracer.cpp
@@ -0,0 +1,472 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "GeckoTaskTracer.h"
+#include "GeckoTaskTracerImpl.h"
+
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/StaticMutex.h"
+#include "mozilla/ThreadLocal.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+#include "mozilla/Unused.h"
+
+#include "nsString.h"
+#include "nsThreadUtils.h"
+#include "prtime.h"
+
+#include <stdarg.h>
+
+// We need a definition of gettid(), but glibc doesn't provide a
+// wrapper for it.
+#if defined(__GLIBC__)
+#include <unistd.h>
+#include <sys/syscall.h>
+static inline pid_t gettid()
+{
+  return (pid_t) syscall(SYS_gettid);
+}
+#elif defined(XP_MACOSX)
+#include <unistd.h>
+#include <sys/syscall.h>
+static inline pid_t gettid()
+{
+  return (pid_t) syscall(SYS_thread_selfid);
+}
+#elif defined(LINUX)
+#include <sys/types.h>
+pid_t gettid();
+#endif
+
+// NS_ENSURE_TRUE_VOID() without the warning on the debug build.
+#define ENSURE_TRUE_VOID(x)   \
+  do {                        \
+    if (MOZ_UNLIKELY(!(x))) { \
+       return;                \
+    }                         \
+  } while(0)
+
+// NS_ENSURE_TRUE() without the warning on the debug build.
+#define ENSURE_TRUE(x, ret)   \
+  do {                        \
+    if (MOZ_UNLIKELY(!(x))) { \
+       return ret;            \
+    }                         \
+  } while(0)
+
+namespace mozilla {
+namespace tasktracer {
+
+static MOZ_THREAD_LOCAL(TraceInfo*) sTraceInfoTLS;
+static mozilla::StaticMutex sMutex;
+
+// The generation of TraceInfo. It will be > 0 if the Task Tracer is started and
+// <= 0 if stopped.
+static mozilla::Atomic<bool> sStarted;
+static nsTArray<UniquePtr<TraceInfo>>* sTraceInfos = nullptr;
+static PRTime sStartTime;
+
+static const char sJSLabelPrefix[] = "#tt#";
+
+namespace {
+
+static PRTime
+GetTimestamp()
+{
+  return PR_Now() / 1000;
+}
+
+static TraceInfo*
+AllocTraceInfo(int aTid)
+{
+  StaticMutexAutoLock lock(sMutex);
+
+  auto* info = sTraceInfos->AppendElement(MakeUnique<TraceInfo>(aTid));
+
+  return info->get();
+}
+
+static void
+SaveCurTraceInfo()
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  info->mSavedCurTraceSourceId = info->mCurTraceSourceId;
+  info->mSavedCurTraceSourceType = info->mCurTraceSourceType;
+  info->mSavedCurTaskId = info->mCurTaskId;
+}
+
+static void
+RestoreCurTraceInfo()
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  info->mCurTraceSourceId = info->mSavedCurTraceSourceId;
+  info->mCurTraceSourceType = info->mSavedCurTraceSourceType;
+  info->mCurTaskId = info->mSavedCurTaskId;
+}
+
+static void
+CreateSourceEvent(SourceEventType aType)
+{
+  // Save the currently traced source event info.
+  SaveCurTraceInfo();
+
+  // Create a new unique task id.
+  uint64_t newId = GenNewUniqueTaskId();
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  info->mCurTraceSourceId = newId;
+  info->mCurTraceSourceType = aType;
+  info->mCurTaskId = newId;
+
+  uintptr_t* namePtr;
+#define SOURCE_EVENT_NAME(type)         \
+  case SourceEventType::type:           \
+  {                                     \
+    static int CreateSourceEvent##type; \
+    namePtr = (uintptr_t*)&CreateSourceEvent##type; \
+    break;                              \
+  }
+
+  switch (aType) {
+#include "SourceEventTypeMap.h"
+    default:
+      MOZ_CRASH("Unknown SourceEvent.");
+  }
+#undef CREATE_SOURCE_EVENT_NAME
+
+  // Log a fake dispatch and start for this source event.
+  LogDispatch(newId, newId, newId, aType);
+  LogVirtualTablePtr(newId, newId, namePtr);
+  LogBegin(newId, newId);
+}
+
+static void
+DestroySourceEvent()
+{
+  // Log a fake end for this source event.
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  LogEnd(info->mCurTraceSourceId, info->mCurTraceSourceId);
+
+  // Restore the previously saved source event info.
+  RestoreCurTraceInfo();
+}
+
+inline static bool
+IsStartLogging()
+{
+  return sStarted;
+}
+
+static void
+SetLogStarted(bool aIsStartLogging)
+{
+  MOZ_ASSERT(aIsStartLogging != IsStartLogging());
+  sStarted = aIsStartLogging;
+
+  StaticMutexAutoLock lock(sMutex);
+  if (!aIsStartLogging) {
+    for (uint32_t i = 0; i < sTraceInfos->Length(); ++i) {
+      (*sTraceInfos)[i]->mObsolete = true;
+    }
+  }
+}
+
+static void
+CleanUp()
+{
+  SetLogStarted(false);
+  StaticMutexAutoLock lock(sMutex);
+
+  if (sTraceInfos) {
+    delete sTraceInfos;
+    sTraceInfos = nullptr;
+  }
+}
+
+inline static void
+ObsoleteCurrentTraceInfos()
+{
+  // Note that we can't and don't need to acquire sMutex here because this
+  // function is called before the other threads are recreated.
+  for (uint32_t i = 0; i < sTraceInfos->Length(); ++i) {
+    (*sTraceInfos)[i]->mObsolete = true;
+  }
+}
+
+} // namespace anonymous
+
+nsCString*
+TraceInfo::AppendLog()
+{
+  MutexAutoLock lock(mLogsMutex);
+  return mLogs.AppendElement();
+}
+
+void
+TraceInfo::MoveLogsInto(TraceInfoLogsType& aResult)
+{
+  MutexAutoLock lock(mLogsMutex);
+  aResult.AppendElements(Move(mLogs));
+}
+
+void
+InitTaskTracer(uint32_t aFlags)
+{
+  if (aFlags & FORKED_AFTER_NUWA) {
+    ObsoleteCurrentTraceInfos();
+    return;
+  }
+
+  MOZ_ASSERT(!sTraceInfos);
+  sTraceInfos = new nsTArray<UniquePtr<TraceInfo>>();
+
+  if (!sTraceInfoTLS.initialized()) {
+    Unused << sTraceInfoTLS.init();
+  }
+}
+
+void
+ShutdownTaskTracer()
+{
+  CleanUp();
+}
+
+static void
+FreeTraceInfo(TraceInfo* aTraceInfo)
+{
+  StaticMutexAutoLock lock(sMutex);
+  if (aTraceInfo) {
+    sTraceInfos->RemoveElement(aTraceInfo);
+  }
+}
+
+void FreeTraceInfo()
+{
+  FreeTraceInfo(sTraceInfoTLS.get());
+}
+
+TraceInfo*
+GetOrCreateTraceInfo()
+{
+  ENSURE_TRUE(sTraceInfoTLS.initialized(), nullptr);
+  ENSURE_TRUE(IsStartLogging(), nullptr);
+
+  TraceInfo* info = sTraceInfoTLS.get();
+  if (info && info->mObsolete) {
+    // TraceInfo is obsolete: remove it.
+    FreeTraceInfo(info);
+    info = nullptr;
+  }
+
+  if (!info) {
+    info = AllocTraceInfo(gettid());
+    sTraceInfoTLS.set(info);
+  }
+
+  return info;
+}
+
+uint64_t
+GenNewUniqueTaskId()
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE(info, 0);
+
+  pid_t tid = gettid();
+  uint64_t taskid = ((uint64_t)tid << 32) | ++info->mLastUniqueTaskId;
+  return taskid;
+}
+
+AutoSaveCurTraceInfo::AutoSaveCurTraceInfo()
+{
+  SaveCurTraceInfo();
+}
+
+AutoSaveCurTraceInfo::~AutoSaveCurTraceInfo()
+{
+  RestoreCurTraceInfo();
+}
+
+void
+SetCurTraceInfo(uint64_t aSourceEventId, uint64_t aParentTaskId,
+                SourceEventType aSourceEventType)
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  info->mCurTraceSourceId = aSourceEventId;
+  info->mCurTaskId = aParentTaskId;
+  info->mCurTraceSourceType = aSourceEventType;
+}
+
+void
+GetCurTraceInfo(uint64_t* aOutSourceEventId, uint64_t* aOutParentTaskId,
+                SourceEventType* aOutSourceEventType)
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  *aOutSourceEventId = info->mCurTraceSourceId;
+  *aOutParentTaskId = info->mCurTaskId;
+  *aOutSourceEventType = info->mCurTraceSourceType;
+}
+
+void
+LogDispatch(uint64_t aTaskId, uint64_t aParentTaskId, uint64_t aSourceEventId,
+            SourceEventType aSourceEventType)
+{
+  LogDispatch(aTaskId, aParentTaskId, aSourceEventId, aSourceEventType, 0);
+}
+
+void
+LogDispatch(uint64_t aTaskId, uint64_t aParentTaskId, uint64_t aSourceEventId,
+            SourceEventType aSourceEventType, int aDelayTimeMs)
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  // aDelayTimeMs is the expected delay time in milliseconds, thus the dispatch
+  // time calculated of it might be slightly off in the real world.
+  uint64_t time = (aDelayTimeMs <= 0) ? GetTimestamp() :
+                  GetTimestamp() + aDelayTimeMs;
+
+  // Log format:
+  // [0 taskId dispatchTime sourceEventId sourceEventType parentTaskId]
+  nsCString* log = info->AppendLog();
+  if (log) {
+    log->AppendPrintf("%d %lld %lld %lld %d %lld",
+                      ACTION_DISPATCH, aTaskId, time, aSourceEventId,
+                      aSourceEventType, aParentTaskId);
+  }
+}
+
+void
+LogBegin(uint64_t aTaskId, uint64_t aSourceEventId)
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  // Log format:
+  // [1 taskId beginTime processId threadId]
+  nsCString* log = info->AppendLog();
+  if (log) {
+    log->AppendPrintf("%d %lld %lld %d %d",
+                      ACTION_BEGIN, aTaskId, GetTimestamp(), getpid(), gettid());
+  }
+}
+
+void
+LogEnd(uint64_t aTaskId, uint64_t aSourceEventId)
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  // Log format:
+  // [2 taskId endTime]
+  nsCString* log = info->AppendLog();
+  if (log) {
+    log->AppendPrintf("%d %lld %lld", ACTION_END, aTaskId, GetTimestamp());
+  }
+}
+
+void
+LogVirtualTablePtr(uint64_t aTaskId, uint64_t aSourceEventId, uintptr_t* aVptr)
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  // Log format:
+  // [4 taskId address]
+  nsCString* log = info->AppendLog();
+  if (log) {
+    log->AppendPrintf("%d %lld %p", ACTION_GET_VTABLE, aTaskId, aVptr);
+  }
+}
+
+AutoSourceEvent::AutoSourceEvent(SourceEventType aType)
+{
+  CreateSourceEvent(aType);
+}
+
+AutoSourceEvent::~AutoSourceEvent()
+{
+  DestroySourceEvent();
+}
+
+void AddLabel(const char* aFormat, ...)
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  va_list args;
+  va_start(args, aFormat);
+  nsAutoCString buffer;
+  buffer.AppendPrintf(aFormat, args);
+  va_end(args);
+
+  // Log format:
+  // [3 taskId "label"]
+  nsCString* log = info->AppendLog();
+  if (log) {
+    log->AppendPrintf("%d %lld %lld \"%s\"", ACTION_ADD_LABEL, info->mCurTaskId,
+                      GetTimestamp(), buffer.get());
+  }
+}
+
+// Functions used by GeckoProfiler.
+
+void
+StartLogging()
+{
+  sStartTime = GetTimestamp();
+  SetLogStarted(true);
+}
+
+void
+StopLogging()
+{
+  SetLogStarted(false);
+}
+
+UniquePtr<TraceInfoLogsType>
+GetLoggedData(TimeStamp aTimeStamp)
+{
+  auto result = MakeUnique<TraceInfoLogsType>();
+
+  // TODO: This is called from a signal handler. Use semaphore instead.
+  StaticMutexAutoLock lock(sMutex);
+
+  for (uint32_t i = 0; i < sTraceInfos->Length(); ++i) {
+    (*sTraceInfos)[i]->MoveLogsInto(*result);
+  }
+
+  return result;
+}
+
+const PRTime
+GetStartTime()
+{
+  return sStartTime;
+}
+
+const char*
+GetJSLabelPrefix()
+{
+  return sJSLabelPrefix;
+}
+
+#undef ENSURE_TRUE_VOID
+#undef ENSURE_TRUE
+
+} // namespace tasktracer
+} // namespace mozilla
diff --git a/tools/profiler/tasktracer/GeckoTaskTracer.h b/tools/profiler/tasktracer/GeckoTaskTracer.h
new file mode 100644
index 000000000..9e36b3f0b
--- /dev/null
+++ b/tools/profiler/tasktracer/GeckoTaskTracer.h
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef GECKO_TASK_TRACER_H
+#define GECKO_TASK_TRACER_H
+
+#include "mozilla/UniquePtr.h"
+#include "nsCOMPtr.h"
+#include "nsTArrayForwardDeclare.h"
+
+/**
+ * TaskTracer provides a way to trace the correlation between different tasks
+ * across threads and processes. Unlike sampling based profilers, TaskTracer can
+ * tell you where a task is dispatched from, what its original source was, how
+ * long it waited in the event queue, and how long it took to execute.
+ *
+ * Source Events are usually some kinds of I/O events we're interested in, such
+ * as touch events, timer events, network events, etc. When a source event is
+ * created, TaskTracer records the entire chain of Tasks and nsRunnables as they
+ * are dispatched to different threads and processes. It records latency,
+ * execution time, etc. for each Task and nsRunnable that chains back to the
+ * original source event.
+ */
+
+class Task;
+class nsIRunnable;
+class nsCString;
+
+namespace mozilla {
+
+class TimeStamp;
+
+namespace tasktracer {
+
+enum {
+  FORKED_AFTER_NUWA = 1 << 0
+};
+
+enum SourceEventType {
+  Unknown = 0,
+  Touch,
+  Mouse,
+  Key,
+  Bluetooth,
+  Unixsocket,
+  Wifi
+};
+
+class AutoSourceEvent
+{
+public:
+  AutoSourceEvent(SourceEventType aType);
+  ~AutoSourceEvent();
+};
+
+void InitTaskTracer(uint32_t aFlags = 0);
+void ShutdownTaskTracer();
+
+// Add a label to the currently running task, aFormat is the message to log,
+// followed by corresponding parameters.
+void AddLabel(const char* aFormat, ...);
+
+void StartLogging();
+void StopLogging();
+UniquePtr<nsTArray<nsCString>> GetLoggedData(TimeStamp aStartTime);
+
+// Returns the timestamp when Task Tracer is enabled in this process.
+const PRTime GetStartTime();
+
+/**
+ * Internal functions.
+ */
+
+Task* CreateTracedTask(Task* aTask);
+
+already_AddRefed<nsIRunnable>
+CreateTracedRunnable(already_AddRefed<nsIRunnable>&& aRunnable);
+
+// Free the TraceInfo allocated on a thread's TLS. Currently we are wrapping
+// tasks running on nsThreads and base::thread, so FreeTraceInfo is called at
+// where nsThread and base::thread release themselves.
+void FreeTraceInfo();
+
+const char* GetJSLabelPrefix();
+
+} // namespace tasktracer
+} // namespace mozilla.
+
+#endif
diff --git a/tools/profiler/tasktracer/GeckoTaskTracerImpl.h b/tools/profiler/tasktracer/GeckoTaskTracerImpl.h
new file mode 100644
index 000000000..5b748fb96
--- /dev/null
+++ b/tools/profiler/tasktracer/GeckoTaskTracerImpl.h
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef GECKO_TASK_TRACER_IMPL_H
+#define GECKO_TASK_TRACER_IMPL_H
+
+#include "GeckoTaskTracer.h"
+#include "mozilla/Mutex.h"
+#include "nsTArray.h"
+
+namespace mozilla {
+namespace tasktracer {
+
+typedef nsTArray<nsCString> TraceInfoLogsType;
+
+struct TraceInfo
+{
+  TraceInfo(uint32_t aThreadId)
+    : mCurTraceSourceId(0)
+    , mCurTaskId(0)
+    , mSavedCurTraceSourceId(0)
+    , mSavedCurTaskId(0)
+    , mCurTraceSourceType(Unknown)
+    , mSavedCurTraceSourceType(Unknown)
+    , mThreadId(aThreadId)
+    , mLastUniqueTaskId(0)
+    , mObsolete(false)
+    , mLogsMutex("TraceInfoMutex")
+  {
+    MOZ_COUNT_CTOR(TraceInfo);
+  }
+
+  ~TraceInfo() { MOZ_COUNT_DTOR(TraceInfo); }
+
+  nsCString* AppendLog();
+  void MoveLogsInto(TraceInfoLogsType& aResult);
+
+  uint64_t mCurTraceSourceId;
+  uint64_t mCurTaskId;
+  uint64_t mSavedCurTraceSourceId;
+  uint64_t mSavedCurTaskId;
+  SourceEventType mCurTraceSourceType;
+  SourceEventType mSavedCurTraceSourceType;
+  uint32_t mThreadId;
+  uint32_t mLastUniqueTaskId;
+  mozilla::Atomic<bool> mObsolete;
+
+  // This mutex protects the following log array because MoveLogsInto() might
+  // be called on another thread.
+  mozilla::Mutex mLogsMutex;
+  TraceInfoLogsType mLogs;
+};
+
+// Return the TraceInfo of current thread, allocate a new one if not exit.
+TraceInfo* GetOrCreateTraceInfo();
+
+uint64_t GenNewUniqueTaskId();
+
+class AutoSaveCurTraceInfo
+{
+public:
+  AutoSaveCurTraceInfo();
+  ~AutoSaveCurTraceInfo();
+};
+
+void SetCurTraceInfo(uint64_t aSourceEventId, uint64_t aParentTaskId,
+                     SourceEventType aSourceEventType);
+
+void GetCurTraceInfo(uint64_t* aOutSourceEventId, uint64_t* aOutParentTaskId,
+                     SourceEventType* aOutSourceEventType);
+
+/**
+ * Logging functions of different trace actions.
+ */
+enum ActionType {
+  ACTION_DISPATCH = 0,
+  ACTION_BEGIN,
+  ACTION_END,
+  ACTION_ADD_LABEL,
+  ACTION_GET_VTABLE
+};
+
+void LogDispatch(uint64_t aTaskId, uint64_t aParentTaskId,
+                 uint64_t aSourceEventId, SourceEventType aSourceEventType);
+
+void LogDispatch(uint64_t aTaskId, uint64_t aParentTaskId,
+                 uint64_t aSourceEventId, SourceEventType aSourceEventType,
+                 int aDelayTimeMs);
+
+void LogBegin(uint64_t aTaskId, uint64_t aSourceEventId);
+
+void LogEnd(uint64_t aTaskId, uint64_t aSourceEventId);
+
+void LogVirtualTablePtr(uint64_t aTaskId, uint64_t aSourceEventId, uintptr_t* aVptr);
+
+} // namespace mozilla
+} // namespace tasktracer
+
+#endif
diff --git a/tools/profiler/tasktracer/SourceEventTypeMap.h b/tools/profiler/tasktracer/SourceEventTypeMap.h
new file mode 100644
index 000000000..77dbc8330
--- /dev/null
+++ b/tools/profiler/tasktracer/SourceEventTypeMap.h
@@ -0,0 +1,11 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+SOURCE_EVENT_NAME(Unknown)
+SOURCE_EVENT_NAME(Touch)
+SOURCE_EVENT_NAME(Mouse)
+SOURCE_EVENT_NAME(Key)
+SOURCE_EVENT_NAME(Bluetooth)
+SOURCE_EVENT_NAME(Unixsocket)
+SOURCE_EVENT_NAME(Wifi)
diff --git a/tools/profiler/tasktracer/TracedTaskCommon.cpp b/tools/profiler/tasktracer/TracedTaskCommon.cpp
new file mode 100644
index 000000000..770eb202c
--- /dev/null
+++ b/tools/profiler/tasktracer/TracedTaskCommon.cpp
@@ -0,0 +1,169 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "GeckoTaskTracerImpl.h"
+#include "TracedTaskCommon.h"
+
+// NS_ENSURE_TRUE_VOID() without the warning on the debug build.
+#define ENSURE_TRUE_VOID(x)   \
+  do {                        \
+    if (MOZ_UNLIKELY(!(x))) { \
+       return;                \
+    }                         \
+  } while(0)
+
+namespace mozilla {
+namespace tasktracer {
+
+TracedTaskCommon::TracedTaskCommon()
+  : mSourceEventType(SourceEventType::Unknown)
+  , mSourceEventId(0)
+  , mParentTaskId(0)
+  , mTaskId(0)
+  , mIsTraceInfoInit(false)
+{
+}
+
+TracedTaskCommon::~TracedTaskCommon()
+{
+}
+
+void
+TracedTaskCommon::Init()
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  mTaskId = GenNewUniqueTaskId();
+  mSourceEventId = info->mCurTraceSourceId;
+  mSourceEventType = info->mCurTraceSourceType;
+  mParentTaskId = info->mCurTaskId;
+  mIsTraceInfoInit = true;
+}
+
+void
+TracedTaskCommon::DispatchTask(int aDelayTimeMs)
+{
+  LogDispatch(mTaskId, mParentTaskId, mSourceEventId, mSourceEventType,
+              aDelayTimeMs);
+}
+
+void
+TracedTaskCommon::GetTLSTraceInfo()
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  mSourceEventType = info->mCurTraceSourceType;
+  mSourceEventId = info->mCurTraceSourceId;
+  mTaskId = info->mCurTaskId;
+  mIsTraceInfoInit = true;
+}
+
+void
+TracedTaskCommon::SetTLSTraceInfo()
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  if (mIsTraceInfoInit) {
+    info->mCurTraceSourceId = mSourceEventId;
+    info->mCurTraceSourceType = mSourceEventType;
+    info->mCurTaskId = mTaskId;
+  }
+}
+
+void
+TracedTaskCommon::ClearTLSTraceInfo()
+{
+  TraceInfo* info = GetOrCreateTraceInfo();
+  ENSURE_TRUE_VOID(info);
+
+  info->mCurTraceSourceId = 0;
+  info->mCurTraceSourceType = SourceEventType::Unknown;
+  info->mCurTaskId = 0;
+}
+
+/**
+ * Implementation of class TracedRunnable.
+ */
+TracedRunnable::TracedRunnable(already_AddRefed<nsIRunnable>&& aOriginalObj)
+  : TracedTaskCommon()
+  , mOriginalObj(Move(aOriginalObj))
+{
+  Init();
+  LogVirtualTablePtr(mTaskId, mSourceEventId, reinterpret_cast<uintptr_t*>(mOriginalObj.get()));
+}
+
+TracedRunnable::~TracedRunnable()
+{
+}
+
+NS_IMETHODIMP
+TracedRunnable::Run()
+{
+  SetTLSTraceInfo();
+  LogBegin(mTaskId, mSourceEventId);
+  nsresult rv = mOriginalObj->Run();
+  LogEnd(mTaskId, mSourceEventId);
+  ClearTLSTraceInfo();
+
+  return rv;
+}
+
+/**
+ * Implementation of class TracedTask.
+ */
+TracedTask::TracedTask(Task* aOriginalObj)
+  : TracedTaskCommon()
+  , mOriginalObj(aOriginalObj)
+{
+  Init();
+  LogVirtualTablePtr(mTaskId, mSourceEventId, reinterpret_cast<uintptr_t*>(aOriginalObj));
+}
+
+TracedTask::~TracedTask()
+{
+  if (mOriginalObj) {
+    delete mOriginalObj;
+    mOriginalObj = nullptr;
+  }
+}
+
+void
+TracedTask::Run()
+{
+  SetTLSTraceInfo();
+  LogBegin(mTaskId, mSourceEventId);
+  mOriginalObj->Run();
+  LogEnd(mTaskId, mSourceEventId);
+  ClearTLSTraceInfo();
+}
+
+/**
+ * CreateTracedRunnable() returns a TracedRunnable wrapping the original
+ * nsIRunnable object, aRunnable.
+ */
+already_AddRefed<nsIRunnable>
+CreateTracedRunnable(already_AddRefed<nsIRunnable>&& aRunnable)
+{
+  nsCOMPtr<nsIRunnable> runnable = new TracedRunnable(Move(aRunnable));
+  return runnable.forget();
+}
+
+/**
+ * CreateTracedTask() returns a TracedTask wrapping the original Task object,
+ * aTask.
+ */
+Task*
+CreateTracedTask(Task* aTask)
+{
+  Task* task = new TracedTask(aTask);
+  return task;
+}
+
+} // namespace tasktracer
+} // namespace mozilla
diff --git a/tools/profiler/tasktracer/TracedTaskCommon.h b/tools/profiler/tasktracer/TracedTaskCommon.h
new file mode 100644
index 000000000..3594b8e9e
--- /dev/null
+++ b/tools/profiler/tasktracer/TracedTaskCommon.h
@@ -0,0 +1,73 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef TRACED_TASK_COMMON_H
+#define TRACED_TASK_COMMON_H
+
+#include "base/task.h"
+#include "GeckoTaskTracer.h"
+#include "nsCOMPtr.h"
+#include "nsThreadUtils.h"
+
+namespace mozilla {
+namespace tasktracer {
+
+class TracedTaskCommon
+{
+public:
+  TracedTaskCommon();
+  virtual ~TracedTaskCommon();
+
+  void DispatchTask(int aDelayTimeMs = 0);
+
+  void SetTLSTraceInfo();
+  void GetTLSTraceInfo();
+  void ClearTLSTraceInfo();
+
+protected:
+  void Init();
+
+  // TraceInfo of TLS will be set by the following parameters, including source
+  // event type, source event ID, parent task ID, and task ID of this traced
+  // task/runnable.
+  SourceEventType mSourceEventType;
+  uint64_t mSourceEventId;
+  uint64_t mParentTaskId;
+  uint64_t mTaskId;
+  bool mIsTraceInfoInit;
+};
+
+class TracedRunnable : public TracedTaskCommon
+                     , public nsRunnable
+{
+public:
+  NS_DECL_NSIRUNNABLE
+
+  TracedRunnable(already_AddRefed<nsIRunnable>&& aOriginalObj);
+
+private:
+  virtual ~TracedRunnable();
+
+  nsCOMPtr<nsIRunnable> mOriginalObj;
+};
+
+class TracedTask : public TracedTaskCommon
+                 , public Task
+{
+public:
+  TracedTask(Task* aOriginalObj);
+  ~TracedTask();
+
+  virtual void Run();
+
+private:
+  Task* mOriginalObj;
+};
+
+} // namespace tasktracer
+} // namespace mozilla
+
+#endif
diff --git a/tools/profiler/tests/gtest/LulTest.cpp b/tools/profiler/tests/gtest/LulTest.cpp
new file mode 100644
index 000000000..8a165ab34
--- /dev/null
+++ b/tools/profiler/tests/gtest/LulTest.cpp
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "mozilla/Atomics.h"
+#include "LulMain.h"
+#include "GeckoProfiler.h"       // for TracingMetadata
+#include "platform-linux-lul.h"  // for read_procmaps
+
+// Set this to 0 to make LUL be completely silent during tests.
+// Set it to 1 to get logging output from LUL, presumably for
+// the purpose of debugging it.
+#define DEBUG_LUL_TEST 0
+
+// LUL needs a callback for its logging sink.
+static void
+gtest_logging_sink_for_LulIntegration(const char* str) {
+  if (DEBUG_LUL_TEST == 0) {
+    return;
+  }
+  // Ignore any trailing \n, since LOG will add one anyway.
+  size_t n = strlen(str);
+  if (n > 0 && str[n-1] == '\n') {
+    char* tmp = strdup(str);
+    tmp[n-1] = 0;
+    fprintf(stderr, "LUL-in-gtest: %s\n", tmp);
+    free(tmp);
+  } else {
+    fprintf(stderr, "LUL-in-gtest: %s\n", str);
+  }
+}
+
+TEST(LulIntegration, unwind_consistency) {
+  // Set up LUL and get it to read unwind info for libxul.so, which is
+  // all we care about here, plus (incidentally) practically every
+  // other object in the process too.
+  lul::LUL* lul = new lul::LUL(gtest_logging_sink_for_LulIntegration);
+  read_procmaps(lul);
+
+  // Run unwind tests and receive information about how many there
+  // were and how many were successful.
+  lul->EnableUnwinding();
+  int nTests = 0, nTestsPassed = 0;
+  RunLulUnitTests(&nTests, &nTestsPassed, lul);
+  EXPECT_TRUE(nTests == 6) << "Unexpected number of tests";
+  EXPECT_TRUE(nTestsPassed == nTests) << "Not all tests passed";
+
+  delete lul;
+}
diff --git a/tools/profiler/tests/gtest/LulTestDwarf.cpp b/tools/profiler/tests/gtest/LulTestDwarf.cpp
new file mode 100644
index 000000000..5cfd71fd4
--- /dev/null
+++ b/tools/profiler/tests/gtest/LulTestDwarf.cpp
@@ -0,0 +1,2597 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+#include "LulCommonExt.h"
+#include "LulDwarfExt.h"
+#include "LulDwarfInt.h"
+#include "LulTestInfrastructure.h"
+
+using testing::Test;
+using testing::Return;
+using testing::Sequence;
+using testing::InSequence;
+using testing::_;
+using lul_test::CFISection;
+using lul_test::test_assembler::kBigEndian;
+using lul_test::test_assembler::kLittleEndian;
+using lul_test::test_assembler::Label;
+
+#define PERHAPS_WRITE_DEBUG_FRAME_FILE(name, section) /**/
+#define PERHAPS_WRITE_EH_FRAME_FILE(name, section)    /**/
+
+// Set this to 0 to make LUL be completely silent during tests.
+// Set it to 1 to get logging output from LUL, presumably for
+// the purpose of debugging it.
+#define DEBUG_LUL_TEST_DWARF 0
+
+// LUL needs a callback for its logging sink.
+static void
+gtest_logging_sink_for_LulTestDwarf(const char* str) {
+  if (DEBUG_LUL_TEST_DWARF == 0) {
+    return;
+  }
+  // Ignore any trailing \n, since LOG will add one anyway.
+  size_t n = strlen(str);
+  if (n > 0 && str[n-1] == '\n') {
+    char* tmp = strdup(str);
+    tmp[n-1] = 0;
+    fprintf(stderr, "LUL-in-gtest: %s\n", tmp);
+    free(tmp);
+  } else {
+    fprintf(stderr, "LUL-in-gtest: %s\n", str);
+  }
+}
+
+namespace lul {
+
+class MockCallFrameInfoHandler : public CallFrameInfo::Handler {
+ public:
+  MOCK_METHOD6(Entry, bool(size_t offset, uint64 address, uint64 length,
+                           uint8 version, const std::string &augmentation,
+                           unsigned return_address));
+  MOCK_METHOD2(UndefinedRule, bool(uint64 address, int reg));
+  MOCK_METHOD2(SameValueRule, bool(uint64 address, int reg));
+  MOCK_METHOD4(OffsetRule, bool(uint64 address, int reg, int base_register,
+                                long offset));
+  MOCK_METHOD4(ValOffsetRule, bool(uint64 address, int reg, int base_register,
+                                   long offset));
+  MOCK_METHOD3(RegisterRule, bool(uint64 address, int reg, int base_register));
+  MOCK_METHOD3(ExpressionRule, bool(uint64 address, int reg,
+                                    const std::string &expression));
+  MOCK_METHOD3(ValExpressionRule, bool(uint64 address, int reg,
+                                       const std::string &expression));
+  MOCK_METHOD0(End, bool());
+  MOCK_METHOD2(PersonalityRoutine, bool(uint64 address, bool indirect));
+  MOCK_METHOD2(LanguageSpecificDataArea, bool(uint64 address, bool indirect));
+  MOCK_METHOD0(SignalHandler, bool());
+};
+
+class MockCallFrameErrorReporter : public CallFrameInfo::Reporter {
+ public:
+  MockCallFrameErrorReporter()
+     : Reporter(gtest_logging_sink_for_LulTestDwarf,
+                "mock filename", "mock section")
+  { }
+  MOCK_METHOD2(Incomplete, void(uint64, CallFrameInfo::EntryKind));
+  MOCK_METHOD1(EarlyEHTerminator, void(uint64));
+  MOCK_METHOD2(CIEPointerOutOfRange, void(uint64, uint64));
+  MOCK_METHOD2(BadCIEId, void(uint64, uint64));
+  MOCK_METHOD2(UnrecognizedVersion, void(uint64, int version));
+  MOCK_METHOD2(UnrecognizedAugmentation, void(uint64, const string &));
+  MOCK_METHOD2(InvalidPointerEncoding, void(uint64, uint8));
+  MOCK_METHOD2(UnusablePointerEncoding, void(uint64, uint8));
+  MOCK_METHOD2(RestoreInCIE, void(uint64, uint64));
+  MOCK_METHOD3(BadInstruction, void(uint64, CallFrameInfo::EntryKind, uint64));
+  MOCK_METHOD3(NoCFARule, void(uint64, CallFrameInfo::EntryKind, uint64));
+  MOCK_METHOD3(EmptyStateStack, void(uint64, CallFrameInfo::EntryKind, uint64));
+  MOCK_METHOD3(ClearingCFARule, void(uint64, CallFrameInfo::EntryKind, uint64));
+};
+
+struct CFIFixture {
+
+  enum { kCFARegister = CallFrameInfo::Handler::kCFARegister };
+
+  CFIFixture() {
+    // Default expectations for the data handler.
+    //
+    // - Leave Entry and End without expectations, as it's probably a
+    //   good idea to set those explicitly in each test.
+    //
+    // - Expect the *Rule functions to not be called, 
+    //   so that each test can simply list the calls they expect.
+    //
+    // I gather I could use StrictMock for this, but the manual seems
+    // to suggest using that only as a last resort, and this isn't so
+    // bad.
+    EXPECT_CALL(handler, UndefinedRule(_, _)).Times(0);
+    EXPECT_CALL(handler, SameValueRule(_, _)).Times(0);
+    EXPECT_CALL(handler, OffsetRule(_, _, _, _)).Times(0);
+    EXPECT_CALL(handler, ValOffsetRule(_, _, _, _)).Times(0);
+    EXPECT_CALL(handler, RegisterRule(_, _, _)).Times(0);
+    EXPECT_CALL(handler, ExpressionRule(_, _, _)).Times(0);
+    EXPECT_CALL(handler, ValExpressionRule(_, _, _)).Times(0);
+    EXPECT_CALL(handler, PersonalityRoutine(_, _)).Times(0);
+    EXPECT_CALL(handler, LanguageSpecificDataArea(_, _)).Times(0);
+    EXPECT_CALL(handler, SignalHandler()).Times(0);
+
+    // Default expectations for the error/warning reporer.
+    EXPECT_CALL(reporter, Incomplete(_, _)).Times(0);
+    EXPECT_CALL(reporter, EarlyEHTerminator(_)).Times(0);
+    EXPECT_CALL(reporter, CIEPointerOutOfRange(_, _)).Times(0);
+    EXPECT_CALL(reporter, BadCIEId(_, _)).Times(0);
+    EXPECT_CALL(reporter, UnrecognizedVersion(_, _)).Times(0);
+    EXPECT_CALL(reporter, UnrecognizedAugmentation(_, _)).Times(0);
+    EXPECT_CALL(reporter, InvalidPointerEncoding(_, _)).Times(0);
+    EXPECT_CALL(reporter, UnusablePointerEncoding(_, _)).Times(0);
+    EXPECT_CALL(reporter, RestoreInCIE(_, _)).Times(0);
+    EXPECT_CALL(reporter, BadInstruction(_, _, _)).Times(0);
+    EXPECT_CALL(reporter, NoCFARule(_, _, _)).Times(0);
+    EXPECT_CALL(reporter, EmptyStateStack(_, _, _)).Times(0);
+    EXPECT_CALL(reporter, ClearingCFARule(_, _, _)).Times(0);
+  }
+
+  MockCallFrameInfoHandler handler;
+  MockCallFrameErrorReporter reporter;
+};
+
+class LulDwarfCFI: public CFIFixture, public Test { };
+
+TEST_F(LulDwarfCFI, EmptyRegion) {
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+  static const char data[1] = { 42 };
+
+  ByteReader reader(ENDIANNESS_BIG);
+  CallFrameInfo parser(data, 0, &reader, &handler, &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+TEST_F(LulDwarfCFI, IncompleteLength32) {
+  CFISection section(kBigEndian, 8);
+  section
+      // Not even long enough for an initial length.
+      .D16(0xa0f)
+      // Padding to keep valgrind happy. We subtract these off when we
+      // construct the parser.
+      .D16(0);
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown))
+      .WillOnce(Return());
+
+  string contents;
+  ASSERT_TRUE(section.GetContents(&contents));
+
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(8);
+  CallFrameInfo parser(contents.data(), contents.size() - 2,
+                       &reader, &handler, &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+TEST_F(LulDwarfCFI, IncompleteLength64) {
+  CFISection section(kLittleEndian, 4);
+  section
+      // An incomplete 64-bit DWARF initial length.
+      .D32(0xffffffff).D32(0x71fbaec2)
+      // Padding to keep valgrind happy. We subtract these off when we
+      // construct the parser.
+      .D32(0);
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown))
+      .WillOnce(Return());
+
+  string contents;
+  ASSERT_TRUE(section.GetContents(&contents));
+
+  ByteReader reader(ENDIANNESS_LITTLE);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size() - 4,
+                       &reader, &handler, &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+TEST_F(LulDwarfCFI, IncompleteId32) {
+  CFISection section(kBigEndian, 8);
+  section
+      .D32(3)                      // Initial length, not long enough for id
+      .D8(0xd7).D8(0xe5).D8(0xf1)  // incomplete id
+      .CIEHeader(8727, 3983, 8889, 3, "")
+      .FinishEntry();
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown))
+      .WillOnce(Return());
+
+  string contents;
+  ASSERT_TRUE(section.GetContents(&contents));
+
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(8);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+TEST_F(LulDwarfCFI, BadId32) {
+  CFISection section(kBigEndian, 8);
+  section
+      .D32(0x100)                       // Initial length
+      .D32(0xe802fade)                  // bogus ID
+      .Append(0x100 - 4, 0x42);         // make the length true
+  section
+      .CIEHeader(1672, 9872, 8529, 3, "")
+      .FinishEntry();
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  EXPECT_CALL(reporter, CIEPointerOutOfRange(_, 0xe802fade))
+      .WillOnce(Return());
+
+  string contents;
+  ASSERT_TRUE(section.GetContents(&contents));
+
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(8);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+// A lone CIE shouldn't cause any handler calls.
+TEST_F(LulDwarfCFI, SingleCIE) {
+  CFISection section(kLittleEndian, 4);
+  section.CIEHeader(0xffe799a8, 0x3398dcdd, 0x6e9683de, 3, "");
+  section.Append(10, lul::DW_CFA_nop);
+  section.FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("SingleCIE", section);
+
+  EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0);
+  EXPECT_CALL(handler, End()).Times(0);
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_LITTLE);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// One FDE, one CIE.
+TEST_F(LulDwarfCFI, OneFDE) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(0x4be22f75, 0x2492236e, 0x6b6efb87, 3, "")
+      .FinishEntry()
+      .FDEHeader(cie, 0x7714740d, 0x3d5a10cd)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("OneFDE", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, 0x7714740d, 0x3d5a10cd, 3, "", 0x6b6efb87))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// Two FDEs share a CIE.
+TEST_F(LulDwarfCFI, TwoFDEsOneCIE) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      // First FDE. readelf complains about this one because it makes
+      // a forward reference to its CIE.
+      .FDEHeader(cie, 0xa42744df, 0xa3b42121)
+      .FinishEntry()
+      // CIE.
+      .Mark(&cie)
+      .CIEHeader(0x04f7dc7b, 0x3d00c05f, 0xbd43cb59, 3, "")
+      .FinishEntry()
+      // Second FDE.
+      .FDEHeader(cie, 0x6057d391, 0x700f608d)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsOneCIE", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, 0xa42744df, 0xa3b42121, 3, "", 0xbd43cb59))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, 0x6057d391, 0x700f608d, 3, "", 0xbd43cb59))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// Two FDEs, two CIEs.
+TEST_F(LulDwarfCFI, TwoFDEsTwoCIEs) {
+  CFISection section(kLittleEndian, 8);
+  Label cie1, cie2;
+  section
+      // First CIE.
+      .Mark(&cie1)
+      .CIEHeader(0x694d5d45, 0x4233221b, 0xbf45e65a, 3, "")
+      .FinishEntry()
+      // First FDE which cites second CIE. readelf complains about
+      // this one because it makes a forward reference to its CIE.
+      .FDEHeader(cie2, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL)
+      .FinishEntry()
+      // Second FDE, which cites first CIE.
+      .FDEHeader(cie1, 0xf6054ca18b10bf5fULL, 0x45fdb970d8bca342ULL)
+      .FinishEntry()
+      // Second CIE.
+      .Mark(&cie2)
+      .CIEHeader(0xfba3fad7, 0x6287e1fd, 0x61d2c581, 2, "")
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsTwoCIEs", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL, 2,
+                      "", 0x61d2c581))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, 0xf6054ca18b10bf5fULL, 0x45fdb970d8bca342ULL, 3,
+                      "", 0xbf45e65a))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_LITTLE);
+  reader.SetAddressSize(8);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// An FDE whose CIE specifies a version we don't recognize.
+TEST_F(LulDwarfCFI, BadVersion) {
+  CFISection section(kBigEndian, 4);
+  Label cie1, cie2;
+  section
+      .Mark(&cie1)
+      .CIEHeader(0xca878cf0, 0x7698ec04, 0x7b616f54, 0x52, "")
+      .FinishEntry()
+      // We should skip this entry, as its CIE specifies a version we
+      // don't recognize.
+      .FDEHeader(cie1, 0x08852292, 0x2204004a)
+      .FinishEntry()
+      // Despite the above, we should visit this entry.
+      .Mark(&cie2)
+      .CIEHeader(0x7c3ae7c9, 0xb9b9a512, 0x96cb3264, 3, "")
+      .FinishEntry()
+      .FDEHeader(cie2, 0x2094735a, 0x6e875501)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("BadVersion", section);
+
+  EXPECT_CALL(reporter, UnrecognizedVersion(_, 0x52))
+    .WillOnce(Return());
+
+  {
+    InSequence s;
+    // We should see no mention of the first FDE, but we should get
+    // a call to Entry for the second.
+    EXPECT_CALL(handler, Entry(_, 0x2094735a, 0x6e875501, 3, "",
+                               0x96cb3264))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End())
+        .WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+// An FDE whose CIE specifies an augmentation we don't recognize.
+TEST_F(LulDwarfCFI, BadAugmentation) {
+  CFISection section(kBigEndian, 4);
+  Label cie1, cie2;
+  section
+      .Mark(&cie1)
+      .CIEHeader(0x4be22f75, 0x2492236e, 0x6b6efb87, 3, "spaniels!")
+      .FinishEntry()
+      // We should skip this entry, as its CIE specifies an
+      // augmentation we don't recognize.
+      .FDEHeader(cie1, 0x7714740d, 0x3d5a10cd)
+      .FinishEntry()
+      // Despite the above, we should visit this entry.
+      .Mark(&cie2)
+      .CIEHeader(0xf8bc4399, 0x8cf09931, 0xf2f519b2, 3, "")
+      .FinishEntry()
+      .FDEHeader(cie2, 0x7bf0fda0, 0xcbcd28d8)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("BadAugmentation", section);
+
+  EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "spaniels!"))
+    .WillOnce(Return());
+
+  {
+    InSequence s;
+    // We should see no mention of the first FDE, but we should get
+    // a call to Entry for the second.
+    EXPECT_CALL(handler, Entry(_, 0x7bf0fda0, 0xcbcd28d8, 3, "",
+                               0xf2f519b2))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End())
+        .WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_FALSE(parser.Start());
+}
+
+// The return address column field is a byte in CFI version 1
+// (DWARF2), but a ULEB128 value in version 3 (DWARF3).
+TEST_F(LulDwarfCFI, CIEVersion1ReturnColumn) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      // CIE, using the version 1 format: return column is a ubyte.
+      .Mark(&cie)
+      // Use a value for the return column that is parsed differently
+      // as a ubyte and as a ULEB128.
+      .CIEHeader(0xbcdea24f, 0x5be28286, 0x9f, 1, "")
+      .FinishEntry()
+      // FDE, citing that CIE.
+      .FDEHeader(cie, 0xb8d347b5, 0x825e55dc)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion1ReturnColumn", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0xb8d347b5, 0x825e55dc, 1, "", 0x9f))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+// The return address column field is a byte in CFI version 1
+// (DWARF2), but a ULEB128 value in version 3 (DWARF3).
+TEST_F(LulDwarfCFI, CIEVersion3ReturnColumn) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      // CIE, using the version 3 format: return column is a ULEB128.
+      .Mark(&cie)
+      // Use a value for the return column that is parsed differently
+      // as a ubyte and as a ULEB128.
+      .CIEHeader(0x0ab4758d, 0xc010fdf7, 0x89, 3, "")
+      .FinishEntry()
+      // FDE, citing that CIE.
+      .FDEHeader(cie, 0x86763f2b, 0x2a66dc23)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion3ReturnColumn", section);
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler, Entry(_, 0x86763f2b, 0x2a66dc23, 3, "", 0x89))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+
+  string contents;
+  EXPECT_TRUE(section.GetContents(&contents));
+  ByteReader reader(ENDIANNESS_BIG);
+  reader.SetAddressSize(4);
+  CallFrameInfo parser(contents.data(), contents.size(),
+                       &reader, &handler, &reporter);
+  EXPECT_TRUE(parser.Start());
+}
+
+struct CFIInsnFixture: public CFIFixture {
+  CFIInsnFixture() : CFIFixture() {
+    data_factor = 0xb6f;
+    return_register = 0x9be1ed9f;
+    version = 3;
+    cfa_base_register = 0x383a3aa;
+    cfa_offset = 0xf748;
+  }
+  
+  // Prepare SECTION to receive FDE instructions.
+  //
+  // - Append a stock CIE header that establishes the fixture's
+  //   code_factor, data_factor, return_register, version, and
+  //   augmentation values.
+  // - Have the CIE set up a CFA rule using cfa_base_register and
+  //   cfa_offset.
+  // - Append a stock FDE header, referring to the above CIE, for the
+  //   fde_size bytes at fde_start. Choose fde_start and fde_size
+  //   appropriately for the section's address size.
+  // - Set appropriate expectations on handler in sequence s for the
+  //   frame description entry and the CIE's CFA rule.
+  //
+  // On return, SECTION is ready to have FDE instructions appended to
+  // it, and its FinishEntry member called.
+  void StockCIEAndFDE(CFISection *section) {
+    // Choose appropriate constants for our address size.
+    if (section->AddressSize() == 4) {
+      fde_start = 0xc628ecfbU;
+      fde_size = 0x5dee04a2;
+      code_factor = 0x60b;
+    } else {
+      assert(section->AddressSize() == 8);
+      fde_start = 0x0005c57ce7806bd3ULL;
+      fde_size = 0x2699521b5e333100ULL;
+      code_factor = 0x01008e32855274a8ULL;
+    }
+
+    // Create the CIE.
+    (*section)
+        .Mark(&cie_label)
+        .CIEHeader(code_factor, data_factor, return_register, version,
+                   "")
+        .D8(lul::DW_CFA_def_cfa)
+        .ULEB128(cfa_base_register)
+        .ULEB128(cfa_offset)
+        .FinishEntry();
+
+    // Create the FDE.
+    section->FDEHeader(cie_label, fde_start, fde_size);
+
+    // Expect an Entry call for the FDE and a ValOffsetRule call for the
+    // CIE's CFA rule.
+    EXPECT_CALL(handler, Entry(_, fde_start, fde_size, version, "",
+                               return_register))
+        .InSequence(s)
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister,
+                                       cfa_base_register, cfa_offset))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  }
+
+  // Run the contents of SECTION through a CallFrameInfo parser,
+  // expecting parser.Start to return SUCCEEDS.  Caller may optionally
+  // supply, via READER, its own ByteReader.  If that's absent, a
+  // local one is used.
+  void ParseSection(CFISection *section,
+                    bool succeeds = true, ByteReader* reader = nullptr) {
+    string contents;
+    EXPECT_TRUE(section->GetContents(&contents));
+    lul::Endianness endianness;
+    if (section->endianness() == kBigEndian)
+      endianness = ENDIANNESS_BIG;
+    else {
+      assert(section->endianness() == kLittleEndian);
+      endianness = ENDIANNESS_LITTLE;
+    }
+    ByteReader local_reader(endianness);
+    ByteReader* reader_to_use = reader ? reader : &local_reader;
+    reader_to_use->SetAddressSize(section->AddressSize());
+    CallFrameInfo parser(contents.data(), contents.size(),
+                         reader_to_use, &handler, &reporter);
+    if (succeeds)
+      EXPECT_TRUE(parser.Start());
+    else
+      EXPECT_FALSE(parser.Start());
+  }
+
+  Label cie_label;
+  Sequence s;
+  uint64 code_factor;
+  int data_factor;
+  unsigned return_register;
+  unsigned version;
+  unsigned cfa_base_register;
+  int cfa_offset;
+  uint64 fde_start, fde_size;
+};
+
+class LulDwarfCFIInsn: public CFIInsnFixture, public Test { };
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_set_loc) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_set_loc).D32(0xb1ee3e7a)
+      // Use DW_CFA_def_cfa to force a handler call that we can use to
+      // check the effect of the DW_CFA_set_loc.
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x4defb431).ULEB128(0x6d17b0ee)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_set_loc", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(0xb1ee3e7a, kCFARegister, 0x4defb431, 0x6d17b0ee))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc) {
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_advance_loc | 0x2a)
+      // Use DW_CFA_def_cfa to force a handler call that we can use to
+      // check the effect of the DW_CFA_advance_loc.
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x5bbb3715).ULEB128(0x0186c7bf)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start + 0x2a * code_factor,
+                            kCFARegister, 0x5bbb3715, 0x0186c7bf))
+        .InSequence(s)
+        .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc1) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_advance_loc1).D8(0xd8)
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x69d5696a).ULEB128(0x1eb7fc93)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc1", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule((fde_start + 0xd8 * code_factor),
+                            kCFARegister, 0x69d5696a, 0x1eb7fc93))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc2) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_advance_loc2).D16(0x3adb)
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x3a368bed).ULEB128(0x3194ee37)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc2", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule((fde_start + 0x3adb * code_factor),
+                            kCFARegister, 0x3a368bed, 0x3194ee37))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc4) {
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_advance_loc4).D32(0x15813c88)
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x135270c5).ULEB128(0x24bad7cb)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc4", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule((fde_start + 0x15813c88ULL * code_factor),
+                            kCFARegister, 0x135270c5, 0x24bad7cb))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_MIPS_advance_loc8) {
+  code_factor = 0x2d;
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_MIPS_advance_loc8).D64(0x3c4f3945b92c14ULL)
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0xe17ed602).ULEB128(0x3d162e7f)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc8", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule((fde_start + 0x3c4f3945b92c14ULL * code_factor),
+                            kCFARegister, 0xe17ed602, 0x3d162e7f))
+      .InSequence(s)
+      .WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x4e363a85).ULEB128(0x815f9aa7)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_def_cfa", section);
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, 0x4e363a85, 0x815f9aa7))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_sf) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_def_cfa_sf).ULEB128(0x8ccb32b7).LEB128(0x9ea)
+      .D8(lul::DW_CFA_def_cfa_sf).ULEB128(0x9b40f5da).LEB128(-0x40a2)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, 0x8ccb32b7,
+                            0x9ea * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, 0x9b40f5da,
+                            -0x40a2 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_register) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_def_cfa_register).ULEB128(0x3e7e9363)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, 0x3e7e9363, cfa_offset))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+// DW_CFA_def_cfa_register should have no effect when applied to a
+// non-base/offset rule.
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_registerBadRule) {
+  ByteReader reader(ENDIANNESS_BIG);
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_def_cfa_expression).Block("needle in a haystack")
+      .D8(lul::DW_CFA_def_cfa_register).ULEB128(0xf1b49e49)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValExpressionRule(fde_start, kCFARegister,
+                                "needle in a haystack"))
+      .WillRepeatedly(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offset) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_def_cfa_offset).ULEB128(0x1e8e3b9b)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, cfa_base_register,
+                            0x1e8e3b9b))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offset_sf) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_def_cfa_offset_sf).LEB128(0x970)
+      .D8(lul::DW_CFA_def_cfa_offset_sf).LEB128(-0x2cd)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, cfa_base_register,
+                            0x970 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, cfa_base_register,
+                            -0x2cd * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+// DW_CFA_def_cfa_offset should have no effect when applied to a
+// non-base/offset rule.
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offsetBadRule) {
+  ByteReader reader(ENDIANNESS_BIG);
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_def_cfa_expression).Block("six ways to Sunday")
+      .D8(lul::DW_CFA_def_cfa_offset).ULEB128(0x1e8e3b9b)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValExpressionRule(fde_start, kCFARegister,
+                                "six ways to Sunday"))
+      .WillRepeatedly(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_expression) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_def_cfa_expression).Block("eating crow")
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, kCFARegister,
+                                         "eating crow"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_undefined) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_undefined).ULEB128(0x300ce45d)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, UndefinedRule(fde_start, 0x300ce45d))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_same_value) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_same_value).ULEB128(0x3865a760)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, SameValueRule(fde_start, 0x3865a760))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_offset) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_offset | 0x2c).ULEB128(0x9f6)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x2c, kCFARegister, 0x9f6 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_offset_extended) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_offset_extended).ULEB128(0x402b).ULEB128(0xb48)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start,
+                         0x402b, kCFARegister, 0xb48 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_offset_extended_sf) {
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_offset_extended_sf)
+          .ULEB128(0x997c23ee).LEB128(0x2d00)
+      .D8(lul::DW_CFA_offset_extended_sf)
+          .ULEB128(0x9519eb82).LEB128(-0xa77)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x997c23ee,
+                         kCFARegister, 0x2d00 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x9519eb82,
+                         kCFARegister, -0xa77 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_val_offset) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_offset).ULEB128(0x623562fe).ULEB128(0x673)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, 0x623562fe,
+                            kCFARegister, 0x673 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_val_offset_sf) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_offset_sf).ULEB128(0x6f4f).LEB128(0xaab)
+      .D8(lul::DW_CFA_val_offset_sf).ULEB128(0x2483).LEB128(-0x8a2)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, 0x6f4f,
+                            kCFARegister, 0xaab * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, 0x2483,
+                            kCFARegister, -0x8a2 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_register) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_register).ULEB128(0x278d18f9).ULEB128(0x1a684414)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0x278d18f9, 0x1a684414))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_expression) {
+  ByteReader reader(ENDIANNESS_BIG);
+  CFISection section(kBigEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_expression).ULEB128(0xa1619fb2)
+      .Block("plus ça change, plus c'est la même chose")
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ExpressionRule(fde_start, 0xa1619fb2,
+                             "plus ça change, plus c'est la même chose"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_val_expression) {
+  ByteReader reader(ENDIANNESS_BIG);
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_expression).ULEB128(0xc5e4a9e3)
+      .Block("he who has the gold makes the rules")
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValExpressionRule(fde_start, 0xc5e4a9e3,
+                                "he who has the gold makes the rules"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_restore) {
+  CFISection section(kLittleEndian, 8);
+  code_factor = 0x01bd188a9b1fa083ULL;
+  data_factor = -0x1ac8;
+  return_register = 0x8c35b049;
+  version = 2;
+  fde_start = 0x2d70fe998298bbb1ULL;
+  fde_size = 0x46ccc2e63cf0b108ULL;
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(code_factor, data_factor, return_register, version,
+                 "")
+      // Provide a CFA rule, because register rules require them.
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x6ca1d50e).ULEB128(0x372e38e8)
+      // Provide an offset(N) rule for register 0x3c.
+      .D8(lul::DW_CFA_offset | 0x3c).ULEB128(0xb348)
+      .FinishEntry()
+      // In the FDE...
+      .FDEHeader(cie, fde_start, fde_size)
+      // At a second address, provide a new offset(N) rule for register 0x3c.
+      .D8(lul::DW_CFA_advance_loc | 0x13)
+      .D8(lul::DW_CFA_offset | 0x3c).ULEB128(0x9a50)
+      // At a third address, restore the original rule for register 0x3c.
+      .D8(lul::DW_CFA_advance_loc | 0x01)
+      .D8(lul::DW_CFA_restore | 0x3c)
+      .FinishEntry();
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, fde_start, fde_size, version, "", return_register))
+        .WillOnce(Return(true));
+    // CIE's CFA rule.
+    EXPECT_CALL(handler,
+                ValOffsetRule(fde_start,
+                              kCFARegister, 0x6ca1d50e, 0x372e38e8))
+        .WillOnce(Return(true));
+    // CIE's rule for register 0x3c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start, 0x3c,
+                           kCFARegister, 0xb348 * data_factor))
+        .WillOnce(Return(true));
+    // FDE's rule for register 0x3c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start + 0x13 * code_factor, 0x3c,
+                           kCFARegister, 0x9a50 * data_factor))
+        .WillOnce(Return(true));
+    // Restore CIE's rule for register 0x3c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start + (0x13 + 0x01) * code_factor, 0x3c,
+                           kCFARegister, 0xb348 * data_factor))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+    
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_restoreNoRule) {
+  CFISection section(kBigEndian, 4);
+  code_factor = 0x005f78143c1c3b82ULL;
+  data_factor = 0x25d0;
+  return_register = 0xe8;
+  version = 1;
+  fde_start = 0x4062e30f;
+  fde_size = 0x5302a389;
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(code_factor, data_factor, return_register, version, "")
+      // Provide a CFA rule, because register rules require them.
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x470aa334).ULEB128(0x099ef127)
+      .FinishEntry()
+      // In the FDE...
+      .FDEHeader(cie, fde_start, fde_size)
+      // At a second address, provide an offset(N) rule for register 0x2c.
+      .D8(lul::DW_CFA_advance_loc | 0x7)
+      .D8(lul::DW_CFA_offset | 0x2c).ULEB128(0x1f47)
+      // At a third address, restore the (missing) CIE rule for register 0x2c.
+      .D8(lul::DW_CFA_advance_loc | 0xb)
+      .D8(lul::DW_CFA_restore | 0x2c)
+      .FinishEntry();
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, fde_start, fde_size, version, "", return_register))
+        .WillOnce(Return(true));
+    // CIE's CFA rule.
+    EXPECT_CALL(handler,
+                ValOffsetRule(fde_start,
+                              kCFARegister, 0x470aa334, 0x099ef127))
+        .WillOnce(Return(true));
+    // FDE's rule for register 0x2c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start + 0x7 * code_factor, 0x2c,
+                           kCFARegister, 0x1f47 * data_factor))
+        .WillOnce(Return(true));
+    // Restore CIE's (missing) rule for register 0x2c.
+    EXPECT_CALL(handler,
+                SameValueRule(fde_start + (0x7 + 0xb) * code_factor, 0x2c))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+    
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_restore_extended) {
+  CFISection section(kBigEndian, 4);
+  code_factor = 0x126e;
+  data_factor = -0xd8b;
+  return_register = 0x77711787;
+  version = 3;
+  fde_start = 0x01f55a45;
+  fde_size = 0x452adb80;
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(code_factor, data_factor, return_register, version,
+                 "", true /* dwarf64 */ )
+      // Provide a CFA rule, because register rules require them.
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x56fa0edd).ULEB128(0x097f78a5)
+      // Provide an offset(N) rule for register 0x0f9b8a1c.
+      .D8(lul::DW_CFA_offset_extended)
+          .ULEB128(0x0f9b8a1c).ULEB128(0xc979)
+      .FinishEntry()
+      // In the FDE...
+      .FDEHeader(cie, fde_start, fde_size)
+      // At a second address, provide a new offset(N) rule for reg 0x0f9b8a1c.
+      .D8(lul::DW_CFA_advance_loc | 0x3)
+      .D8(lul::DW_CFA_offset_extended)
+          .ULEB128(0x0f9b8a1c).ULEB128(0x3b7b)
+      // At a third address, restore the original rule for register 0x0f9b8a1c.
+      .D8(lul::DW_CFA_advance_loc | 0x04)
+      .D8(lul::DW_CFA_restore_extended).ULEB128(0x0f9b8a1c)
+      .FinishEntry();
+
+  {
+    InSequence s;
+    EXPECT_CALL(handler,
+                Entry(_, fde_start, fde_size, version, "", return_register))
+        .WillOnce(Return(true));
+    // CIE's CFA rule.
+    EXPECT_CALL(handler,
+                ValOffsetRule(fde_start, kCFARegister, 0x56fa0edd, 0x097f78a5))
+        .WillOnce(Return(true));
+    // CIE's rule for register 0x0f9b8a1c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start, 0x0f9b8a1c, kCFARegister,
+                           0xc979 * data_factor))
+        .WillOnce(Return(true));
+    // FDE's rule for register 0x0f9b8a1c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start + 0x3 * code_factor, 0x0f9b8a1c,
+                           kCFARegister, 0x3b7b * data_factor))
+        .WillOnce(Return(true));
+    // Restore CIE's rule for register 0x0f9b8a1c.
+    EXPECT_CALL(handler,
+                OffsetRule(fde_start + (0x3 + 0x4) * code_factor, 0x0f9b8a1c,
+                           kCFARegister, 0xc979 * data_factor))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End()).WillOnce(Return(true));
+  }
+    
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_remember_and_restore_state) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+
+  // We create a state, save it, modify it, and then restore. We
+  // refer to the state that is overridden the restore as the
+  // "outgoing" state, and the restored state the "incoming" state.
+  //
+  // Register         outgoing        incoming        expect
+  // 1                offset(N)       no rule         new "same value" rule
+  // 2                register(R)     offset(N)       report changed rule
+  // 3                offset(N)       offset(M)       report changed offset
+  // 4                offset(N)       offset(N)       no report
+  // 5                offset(N)       no rule         new "same value" rule
+  section
+      // Create the "incoming" state, which we will save and later restore.
+      .D8(lul::DW_CFA_offset | 2).ULEB128(0x9806)
+      .D8(lul::DW_CFA_offset | 3).ULEB128(0x995d)
+      .D8(lul::DW_CFA_offset | 4).ULEB128(0x7055)
+      .D8(lul::DW_CFA_remember_state)
+      // Advance to a new instruction; an implementation could legitimately
+      // ignore all but the final rule for a given register at a given address.
+      .D8(lul::DW_CFA_advance_loc | 1)
+      // Create the "outgoing" state, which we will discard.
+      .D8(lul::DW_CFA_offset | 1).ULEB128(0xea1a)
+      .D8(lul::DW_CFA_register).ULEB128(2).ULEB128(0x1d2a3767)
+      .D8(lul::DW_CFA_offset | 3).ULEB128(0xdd29)
+      .D8(lul::DW_CFA_offset | 5).ULEB128(0xf1ce)
+      // At a third address, restore the incoming state.
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  uint64 addr = fde_start;
+
+  // Expect the incoming rules to be reported.
+  EXPECT_CALL(handler, OffsetRule(addr, 2, kCFARegister, 0x9806 * data_factor))
+    .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0x995d * data_factor))
+    .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 4, kCFARegister, 0x7055 * data_factor))
+    .InSequence(s).WillOnce(Return(true));
+
+  addr += code_factor;
+
+  // After the save, we establish the outgoing rule set.
+  EXPECT_CALL(handler, OffsetRule(addr, 1, kCFARegister, 0xea1a * data_factor))
+    .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, RegisterRule(addr, 2, 0x1d2a3767))
+    .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0xdd29 * data_factor))
+    .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 5, kCFARegister, 0xf1ce * data_factor))
+    .InSequence(s).WillOnce(Return(true));
+
+  addr += code_factor;
+
+  // Finally, after the restore, expect to see the differences from
+  // the outgoing to the incoming rules reported.
+  EXPECT_CALL(handler, SameValueRule(addr, 1))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 2, kCFARegister, 0x9806 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0x995d * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, SameValueRule(addr, 5))
+      .InSequence(s).WillOnce(Return(true));
+
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+// Check that restoring a rule set reports changes to the CFA rule.
+TEST_F(LulDwarfCFIInsn, DW_CFA_remember_and_restore_stateCFA) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+
+  section
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_def_cfa_offset).ULEB128(0x90481102)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor, kCFARegister,
+                                     cfa_base_register, 0x90481102))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor * 2, kCFARegister,
+                                     cfa_base_register, cfa_offset))
+      .InSequence(s).WillOnce(Return(true));
+
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_nop) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_nop)
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x3fb8d4f1).ULEB128(0x078dc67b)
+      .D8(lul::DW_CFA_nop)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              ValOffsetRule(fde_start, kCFARegister, 0x3fb8d4f1, 0x078dc67b))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_window_save) {
+  CFISection section(kBigEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_GNU_window_save)
+      .FinishEntry();
+
+  // Don't include all the rules in any particular sequence.
+
+  // The caller's %o0-%o7 have become the callee's %i0-%i7. This is
+  // the GCC register numbering.
+  for (int i = 8; i < 16; i++)
+    EXPECT_CALL(handler, RegisterRule(fde_start, i, i + 16))
+        .WillOnce(Return(true));
+  // The caller's %l0-%l7 and %i0-%i7 have been saved at the top of
+  // its frame.
+  for (int i = 16; i < 32; i++)
+    EXPECT_CALL(handler, OffsetRule(fde_start, i, kCFARegister, (i-16) * 4))
+        .WillOnce(Return(true));
+
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_args_size) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_GNU_args_size).ULEB128(0xeddfa520)
+      // Verify that we see this, meaning we parsed the above properly.
+      .D8(lul::DW_CFA_offset | 0x23).ULEB128(0x269)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x23, kCFARegister, 0x269 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_negative_offset_extended) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_GNU_negative_offset_extended)
+      .ULEB128(0x430cc87a).ULEB128(0x613)
+      .FinishEntry();
+
+  EXPECT_CALL(handler,
+              OffsetRule(fde_start, 0x430cc87a,
+                         kCFARegister, -0x613 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+// Three FDEs: skip the second
+TEST_F(LulDwarfCFIInsn, SkipFDE) {
+  CFISection section(kBigEndian, 4);
+  Label cie;
+  section
+      // CIE, used by all FDEs.
+      .Mark(&cie)
+      .CIEHeader(0x010269f2, 0x9177, 0xedca5849, 2, "")
+      .D8(lul::DW_CFA_def_cfa).ULEB128(0x42ed390b).ULEB128(0x98f43aad)
+      .FinishEntry()
+      // First FDE.
+      .FDEHeader(cie, 0xa870ebdd, 0x60f6aa4)
+      .D8(lul::DW_CFA_register).ULEB128(0x3a860351).ULEB128(0x6c9a6bcf)
+      .FinishEntry()
+      // Second FDE.
+      .FDEHeader(cie, 0xc534f7c0, 0xf6552e9, true /* dwarf64 */)
+      .D8(lul::DW_CFA_register).ULEB128(0x1b62c234).ULEB128(0x26586b18)
+      .FinishEntry()
+      // Third FDE.
+      .FDEHeader(cie, 0xf681cfc8, 0x7e4594e)
+      .D8(lul::DW_CFA_register).ULEB128(0x26c53934).ULEB128(0x18eeb8a4)
+      .FinishEntry();
+
+  {
+    InSequence s;
+
+    // Process the first FDE.
+    EXPECT_CALL(handler, Entry(_, 0xa870ebdd, 0x60f6aa4, 2, "", 0xedca5849))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, ValOffsetRule(0xa870ebdd, kCFARegister,
+                                       0x42ed390b, 0x98f43aad))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, RegisterRule(0xa870ebdd, 0x3a860351, 0x6c9a6bcf))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End())
+        .WillOnce(Return(true));
+
+    // Skip the second FDE.
+    EXPECT_CALL(handler, Entry(_, 0xc534f7c0, 0xf6552e9, 2, "", 0xedca5849))
+        .WillOnce(Return(false));
+
+    // Process the third FDE.
+    EXPECT_CALL(handler, Entry(_, 0xf681cfc8, 0x7e4594e, 2, "", 0xedca5849))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, ValOffsetRule(0xf681cfc8, kCFARegister,
+                                       0x42ed390b, 0x98f43aad))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, RegisterRule(0xf681cfc8, 0x26c53934, 0x18eeb8a4))
+        .WillOnce(Return(true));
+    EXPECT_CALL(handler, End())
+        .WillOnce(Return(true));
+  }
+
+  ParseSection(&section);
+}
+
+// Quit processing in the middle of an entry's instructions.
+TEST_F(LulDwarfCFIInsn, QuitMidentry) {
+  CFISection section(kLittleEndian, 8);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_register).ULEB128(0xe0cf850d).ULEB128(0x15aab431)
+      .D8(lul::DW_CFA_expression).ULEB128(0x46750aa5).Block("meat")
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0xe0cf850d, 0x15aab431))
+      .InSequence(s).WillOnce(Return(false));
+  EXPECT_CALL(handler, End())
+      .InSequence(s).WillOnce(Return(true));
+  
+  ParseSection(&section, false);
+}
+
+class LulDwarfCFIRestore: public CFIInsnFixture, public Test { };
+
+TEST_F(LulDwarfCFIRestore, RestoreUndefinedRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_undefined).ULEB128(0x0bac878e)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, UndefinedRule(fde_start, 0x0bac878e))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreUndefinedRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_undefined).ULEB128(0x7dedff5f)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_same_value).ULEB128(0x7dedff5f)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, UndefinedRule(fde_start, 0x7dedff5f))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, SameValueRule(fde_start + code_factor, 0x7dedff5f))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + 2 * code_factor, 0x7dedff5f))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreSameValueRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_same_value).ULEB128(0xadbc9b3a)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, SameValueRule(fde_start, 0xadbc9b3a))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreSameValueRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_same_value).ULEB128(0x3d90dcb5)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined).ULEB128(0x3d90dcb5)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, SameValueRule(fde_start, 0x3d90dcb5))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0x3d90dcb5))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, SameValueRule(fde_start + 2 * code_factor, 0x3d90dcb5))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_offset | 0x14).ULEB128(0xb6f)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, OffsetRule(fde_start, 0x14,
+                                  kCFARegister, 0xb6f * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_offset | 0x21).ULEB128(0xeb7)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined).ULEB128(0x21)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, OffsetRule(fde_start, 0x21,
+                                  kCFARegister, 0xeb7 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0x21))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(fde_start + 2 * code_factor, 0x21,
+                                  kCFARegister, 0xeb7 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleChangedOffset) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_offset | 0x21).ULEB128(0x134)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_offset | 0x21).ULEB128(0xf4f)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, OffsetRule(fde_start, 0x21,
+                                  kCFARegister, 0x134 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(fde_start + code_factor, 0x21,
+                                  kCFARegister, 0xf4f * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, OffsetRule(fde_start + 2 * code_factor, 0x21,
+                                  kCFARegister, 0x134 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_offset).ULEB128(0x829caee6).ULEB128(0xe4c)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x829caee6,
+                                  kCFARegister, 0xe4c * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_offset).ULEB128(0xf17c36d6).ULEB128(0xeb7)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined).ULEB128(0xf17c36d6)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0xf17c36d6,
+                                     kCFARegister, 0xeb7 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xf17c36d6))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + 2 * code_factor, 0xf17c36d6,
+                                  kCFARegister, 0xeb7 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleChangedValOffset) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_offset).ULEB128(0x2cf0ab1b).ULEB128(0x562)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_val_offset).ULEB128(0x2cf0ab1b).ULEB128(0xe88)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x2cf0ab1b,
+                                  kCFARegister, 0x562 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor, 0x2cf0ab1b,
+                                  kCFARegister, 0xe88 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(fde_start + 2 * code_factor, 0x2cf0ab1b,
+                                  kCFARegister, 0x562 * data_factor))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleUnchanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_register).ULEB128(0x77514acc).ULEB128(0x464de4ce)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0x77514acc, 0x464de4ce))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleChanged) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_register).ULEB128(0xe39acce5).ULEB128(0x095f1559)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined).ULEB128(0xe39acce5)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0xe39acce5, 0x095f1559))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xe39acce5))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, RegisterRule(fde_start + 2 * code_factor, 0xe39acce5,
+                                    0x095f1559))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleChangedRegister) {
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_register).ULEB128(0xd40e21b1).ULEB128(0x16607d6a)
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_register).ULEB128(0xd40e21b1).ULEB128(0xbabb4742)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, RegisterRule(fde_start, 0xd40e21b1, 0x16607d6a))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, RegisterRule(fde_start + code_factor, 0xd40e21b1,
+                                    0xbabb4742))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, RegisterRule(fde_start + 2 * code_factor, 0xd40e21b1,
+                                    0x16607d6a))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleUnchanged) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_expression).ULEB128(0x666ae152).Block("dwarf")
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ExpressionRule(fde_start, 0x666ae152, "dwarf"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleChanged) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_expression).ULEB128(0xb5ca5c46).Block("elf")
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined).ULEB128(0xb5ca5c46)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ExpressionRule(fde_start, 0xb5ca5c46, "elf"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ExpressionRule(fde_start + 2 * code_factor, 0xb5ca5c46,
+                                      "elf"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleChangedExpression) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_expression).ULEB128(0x500f5739).Block("smurf")
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_expression).ULEB128(0x500f5739).Block("orc")
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ExpressionRule(fde_start, 0x500f5739, "smurf"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ExpressionRule(fde_start + code_factor, 0x500f5739,
+                                      "orc"))
+      .InSequence(s).WillOnce(Return(true));
+  // Expectations are not wishes.
+  EXPECT_CALL(handler, ExpressionRule(fde_start + 2 * code_factor, 0x500f5739,
+                                      "smurf"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleUnchanged) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_expression).ULEB128(0x666ae152)
+      .Block("hideous")
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x666ae152, "hideous"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleChanged) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_expression).ULEB128(0xb5ca5c46)
+      .Block("revolting")
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_undefined).ULEB128(0xb5ca5c46)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChanged", section);
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, 0xb5ca5c46, "revolting"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValExpressionRule(fde_start + 2 * code_factor, 0xb5ca5c46,
+                                         "revolting"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleChangedValExpression) {
+  ByteReader reader(ENDIANNESS_LITTLE);
+  CFISection section(kLittleEndian, 4);
+  StockCIEAndFDE(&section);
+  section
+      .D8(lul::DW_CFA_val_expression).ULEB128(0x500f5739)
+      .Block("repulsive")
+      .D8(lul::DW_CFA_remember_state)
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_val_expression).ULEB128(0x500f5739)
+      .Block("nauseous")
+      .D8(lul::DW_CFA_advance_loc | 1)
+      .D8(lul::DW_CFA_restore_state)
+      .FinishEntry();
+
+  PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChangedValExpression",
+                                 section);
+
+  EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x500f5739, "repulsive"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValExpressionRule(fde_start + code_factor, 0x500f5739,
+                                         "nauseous"))
+      .InSequence(s).WillOnce(Return(true));
+  // Expectations are not wishes.
+  EXPECT_CALL(handler, ValExpressionRule(fde_start + 2 * code_factor, 0x500f5739,
+                                         "repulsive"))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End()).WillOnce(Return(true));
+
+  ParseSection(&section, true, &reader);
+}
+
+struct EHFrameFixture: public CFIInsnFixture {
+  EHFrameFixture() 
+      : CFIInsnFixture(), section(kBigEndian, 4, true) {
+    encoded_pointer_bases.cfi  = 0x7f496cb2;
+    encoded_pointer_bases.text = 0x540f67b6;
+    encoded_pointer_bases.data = 0xe3eab768;
+    section.SetEncodedPointerBases(encoded_pointer_bases);
+  }
+  CFISection section;
+  CFISection::EncodedPointerBases encoded_pointer_bases;
+
+  // Parse CFIInsnFixture::ParseSection, but parse the section as
+  // .eh_frame data, supplying stock base addresses.
+  void ParseEHFrameSection(CFISection *section, bool succeeds = true) {
+    EXPECT_TRUE(section->ContainsEHFrame());
+    string contents;
+    EXPECT_TRUE(section->GetContents(&contents));
+    lul::Endianness endianness;
+    if (section->endianness() == kBigEndian)
+      endianness = ENDIANNESS_BIG;
+    else {
+      assert(section->endianness() == kLittleEndian);
+      endianness = ENDIANNESS_LITTLE;
+    }
+    ByteReader reader(endianness);
+    reader.SetAddressSize(section->AddressSize());
+    reader.SetCFIDataBase(encoded_pointer_bases.cfi, contents.data());
+    reader.SetTextBase(encoded_pointer_bases.text);
+    reader.SetDataBase(encoded_pointer_bases.data);
+    CallFrameInfo parser(contents.data(), contents.size(),
+                         &reader, &handler, &reporter, true);
+    if (succeeds)
+      EXPECT_TRUE(parser.Start());
+    else
+      EXPECT_FALSE(parser.Start());
+  }
+
+};
+
+class LulDwarfEHFrame: public EHFrameFixture, public Test { };
+
+// A simple CIE, an FDE, and a terminator.
+TEST_F(LulDwarfEHFrame, Terminator) {
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(9968, 2466, 67, 1, "")
+      .D8(lul::DW_CFA_def_cfa).ULEB128(3772).ULEB128(1372)
+      .FinishEntry()
+      .FDEHeader(cie, 0x848037a1, 0x7b30475e)
+      .D8(lul::DW_CFA_set_loc).D32(0x17713850)
+      .D8(lul::DW_CFA_undefined).ULEB128(5721)
+      .FinishEntry()
+      .D32(0)                           // Terminate the sequence.
+      // This FDE should be ignored.
+      .FDEHeader(cie, 0xf19629fe, 0x439fb09b)
+      .FinishEntry();
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.Terminator", section);
+
+  EXPECT_CALL(handler, Entry(_, 0x848037a1, 0x7b30475e, 1, "", 67))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(0x848037a1, kCFARegister, 3772, 1372))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(0x17713850, 5721))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End())
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(reporter, EarlyEHTerminator(_))
+      .InSequence(s).WillOnce(Return());
+
+  ParseEHFrameSection(&section);
+}
+
+// The parser should recognize the Linux Standards Base 'z' augmentations.
+TEST_F(LulDwarfEHFrame, SimpleFDE) {
+   lul::DwarfPointerEncoding lsda_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_indirect
+                                | lul::DW_EH_PE_datarel
+                                | lul::DW_EH_PE_sdata2);
+  lul::DwarfPointerEncoding fde_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_textrel
+                                | lul::DW_EH_PE_udata2);
+  
+  section.SetPointerEncoding(fde_encoding);
+  section.SetEncodedPointerBases(encoded_pointer_bases);
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(4873, 7012, 100, 1, "zSLPR")
+      .ULEB128(7)                                // Augmentation data length
+      .D8(lsda_encoding)                         // LSDA pointer format
+      .D8(lul::DW_EH_PE_pcrel)                   // personality pointer format
+      .EncodedPointer(0x97baa00, lul::DW_EH_PE_pcrel) // and value 
+      .D8(fde_encoding)                          // FDE pointer format
+      .D8(lul::DW_CFA_def_cfa).ULEB128(6706).ULEB128(31)
+      .FinishEntry()
+      .FDEHeader(cie, 0x540f6b56, 0xf686)
+      .ULEB128(2)                                // Augmentation data length
+      .EncodedPointer(0xe3eab475, lsda_encoding) // LSDA pointer, signed
+      .D8(lul::DW_CFA_set_loc)
+      .EncodedPointer(0x540fa4ce, fde_encoding)
+      .D8(lul::DW_CFA_undefined).ULEB128(0x675e)
+      .FinishEntry()
+      .D32(0);                                   // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.SimpleFDE", section);
+
+  EXPECT_CALL(handler, Entry(_, 0x540f6b56, 0xf686, 1, "zSLPR", 100))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, PersonalityRoutine(0x97baa00, false))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, LanguageSpecificDataArea(0xe3eab475, true))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, SignalHandler())
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(0x540f6b56, kCFARegister, 6706, 31))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(0x540fa4ce, 0x675e))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End())
+      .InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+// Check that we can handle an empty 'z' augmentation.
+TEST_F(LulDwarfEHFrame, EmptyZ) {
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(5955, 5805, 228, 1, "z")
+      .ULEB128(0)                                // Augmentation data length
+      .D8(lul::DW_CFA_def_cfa).ULEB128(3629).ULEB128(247)
+      .FinishEntry()
+      .FDEHeader(cie, 0xda007738, 0xfb55c641)
+      .ULEB128(0)                                // Augmentation data length
+      .D8(lul::DW_CFA_advance_loc1).D8(11)
+      .D8(lul::DW_CFA_undefined).ULEB128(3769)
+      .FinishEntry();
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.EmptyZ", section);
+
+  EXPECT_CALL(handler, Entry(_, 0xda007738, 0xfb55c641, 1, "z", 228))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, ValOffsetRule(0xda007738, kCFARegister, 3629, 247))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, UndefinedRule(0xda007738 + 11 * 5955, 3769))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End())
+      .InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+// Check that we recognize bad 'z' augmentation characters.
+TEST_F(LulDwarfEHFrame, BadZ) {
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(6937, 1045, 142, 1, "zQ")
+      .ULEB128(0)                                // Augmentation data length
+      .D8(lul::DW_CFA_def_cfa).ULEB128(9006).ULEB128(7725)
+      .FinishEntry()
+      .FDEHeader(cie, 0x1293efa8, 0x236f53f2)
+      .ULEB128(0)                                // Augmentation data length
+      .D8(lul::DW_CFA_advance_loc | 12)
+      .D8(lul::DW_CFA_register).ULEB128(5667).ULEB128(3462)
+      .FinishEntry();
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.BadZ", section);
+
+  EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "zQ"))
+      .WillOnce(Return());
+
+  ParseEHFrameSection(&section, false);
+}
+
+TEST_F(LulDwarfEHFrame, zL) {
+  Label cie;
+  lul::DwarfPointerEncoding lsda_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_funcrel | lul::DW_EH_PE_udata2);
+  section
+      .Mark(&cie)
+      .CIEHeader(9285, 9959, 54, 1, "zL")
+      .ULEB128(1)                       // Augmentation data length
+      .D8(lsda_encoding)                // encoding for LSDA pointer in FDE
+
+      .FinishEntry()
+      .FDEHeader(cie, 0xd40091aa, 0x9aa6e746)
+      .ULEB128(2)                       // Augmentation data length
+      .EncodedPointer(0xd40099cd, lsda_encoding) // LSDA pointer
+      .FinishEntry()
+      .D32(0);                                   // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zL", section);
+
+  EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zL", 54))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, LanguageSpecificDataArea(0xd40099cd, false))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End())
+      .InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+TEST_F(LulDwarfEHFrame, zP) {
+  Label cie;
+  lul::DwarfPointerEncoding personality_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_datarel | lul::DW_EH_PE_udata2);
+  section
+      .Mark(&cie)
+      .CIEHeader(1097, 6313, 17, 1, "zP")
+      .ULEB128(3)                  // Augmentation data length
+      .D8(personality_encoding)    // encoding for personality routine
+      .EncodedPointer(0xe3eaccac, personality_encoding) // value
+      .FinishEntry()
+      .FDEHeader(cie, 0x0c8350c9, 0xbef11087)
+      .ULEB128(0)                       // Augmentation data length
+      .FinishEntry()
+      .D32(0);                                   // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zP", section);
+
+  EXPECT_CALL(handler, Entry(_, 0x0c8350c9, 0xbef11087, 1, "zP", 17))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, PersonalityRoutine(0xe3eaccac, false))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End())
+      .InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+TEST_F(LulDwarfEHFrame, zR) {
+  Label cie;
+  lul::DwarfPointerEncoding pointer_encoding =
+      lul::DwarfPointerEncoding(lul::DW_EH_PE_textrel | lul::DW_EH_PE_sdata2);
+  section.SetPointerEncoding(pointer_encoding);
+  section
+      .Mark(&cie)
+      .CIEHeader(8011, 5496, 75, 1, "zR")
+      .ULEB128(1)                       // Augmentation data length
+      .D8(pointer_encoding)             // encoding for FDE addresses
+      .FinishEntry()
+      .FDEHeader(cie, 0x540f9431, 0xbd0)
+      .ULEB128(0)                       // Augmentation data length
+      .FinishEntry()
+      .D32(0);                          // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zR", section);
+
+  EXPECT_CALL(handler, Entry(_, 0x540f9431, 0xbd0, 1, "zR", 75))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End())
+      .InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+TEST_F(LulDwarfEHFrame, zS) {
+  Label cie;
+  section
+      .Mark(&cie)
+      .CIEHeader(9217, 7694, 57, 1, "zS")
+      .ULEB128(0)                                // Augmentation data length
+      .FinishEntry()
+      .FDEHeader(cie, 0xd40091aa, 0x9aa6e746)
+      .ULEB128(0)                                // Augmentation data length
+      .FinishEntry()
+      .D32(0);                                   // terminator
+
+  PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zS", section);
+
+  EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zS", 57))
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, SignalHandler())
+      .InSequence(s).WillOnce(Return(true));
+  EXPECT_CALL(handler, End())
+      .InSequence(s).WillOnce(Return(true));
+
+  ParseEHFrameSection(&section);
+}
+
+// These tests require manual inspection of the test output.
+struct CFIReporterFixture {
+  CFIReporterFixture() : reporter(gtest_logging_sink_for_LulTestDwarf,
+                                  "test file name", "test section name") { }
+  CallFrameInfo::Reporter reporter;
+};
+
+class LulDwarfCFIReporter: public CFIReporterFixture, public Test { };
+
+TEST_F(LulDwarfCFIReporter, Incomplete) {
+  reporter.Incomplete(0x0102030405060708ULL, CallFrameInfo::kUnknown);
+}
+
+TEST_F(LulDwarfCFIReporter, EarlyEHTerminator) {
+  reporter.EarlyEHTerminator(0x0102030405060708ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, CIEPointerOutOfRange) {
+  reporter.CIEPointerOutOfRange(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, BadCIEId) {
+  reporter.BadCIEId(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, UnrecognizedVersion) {
+  reporter.UnrecognizedVersion(0x0123456789abcdefULL, 43);
+}
+
+TEST_F(LulDwarfCFIReporter, UnrecognizedAugmentation) {
+  reporter.UnrecognizedAugmentation(0x0123456789abcdefULL, "poodles");
+}
+
+TEST_F(LulDwarfCFIReporter, InvalidPointerEncoding) {
+  reporter.InvalidPointerEncoding(0x0123456789abcdefULL, 0x42);
+}
+
+TEST_F(LulDwarfCFIReporter, UnusablePointerEncoding) {
+  reporter.UnusablePointerEncoding(0x0123456789abcdefULL, 0x42);
+}
+
+TEST_F(LulDwarfCFIReporter, RestoreInCIE) {
+  reporter.RestoreInCIE(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, BadInstruction) {
+  reporter.BadInstruction(0x0123456789abcdefULL, CallFrameInfo::kFDE,
+                          0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, NoCFARule) {
+  reporter.NoCFARule(0x0123456789abcdefULL, CallFrameInfo::kCIE,
+                     0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, EmptyStateStack) {
+  reporter.EmptyStateStack(0x0123456789abcdefULL, CallFrameInfo::kTerminator,
+                           0xfedcba9876543210ULL);
+}
+
+TEST_F(LulDwarfCFIReporter, ClearingCFARule) {
+  reporter.ClearingCFARule(0x0123456789abcdefULL, CallFrameInfo::kFDE,
+                           0xfedcba9876543210ULL);
+}
+class LulDwarfExpr : public Test { };
+
+class MockSummariser : public Summariser {
+public:
+  MockSummariser() : Summariser(nullptr, 0, nullptr) {}
+  MOCK_METHOD2(Entry, void(uintptr_t, uintptr_t));
+  MOCK_METHOD0(End, void());
+  MOCK_METHOD5(Rule, void(uintptr_t, int, LExprHow, int16_t, int64_t));
+  MOCK_METHOD1(AddPfxInstr, uint32_t(PfxInstr));
+};
+
+TEST_F(LulDwarfExpr, SimpleTransliteration) {
+  MockSummariser summ;
+  ByteReader reader(ENDIANNESS_LITTLE);
+
+  CFISection section(kLittleEndian, 8);
+  section
+     .D8(DW_OP_lit0)
+     .D8(DW_OP_lit31)
+     .D8(DW_OP_breg0 + 17).LEB128(-1234)
+     .D8(DW_OP_const4s).D32(0xFEDC9876)
+     .D8(DW_OP_deref)
+     .D8(DW_OP_and)
+     .D8(DW_OP_plus)
+     .D8(DW_OP_minus)
+     .D8(DW_OP_shl)
+     .D8(DW_OP_ge);
+  string expr;
+  bool ok = section.GetContents(&expr);
+  EXPECT_TRUE(ok);
+
+  {
+    InSequence s;
+    // required start marker
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0)));
+    // DW_OP_lit0
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 0)));
+    // DW_OP_lit31
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 31)));
+    // DW_OP_breg17 -1234
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_DwReg, 17)));
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, -1234)));
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Add)));
+    // DW_OP_const4s 0xFEDC9876
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 0xFEDC9876)));
+    // DW_OP_deref
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Deref)));
+    // DW_OP_and
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_And)));
+    // DW_OP_plus
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Add)));
+    // DW_OP_minus
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Sub)));
+    // DW_OP_shl
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Shl)));
+    // DW_OP_ge
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_CmpGES)));
+    // required end marker
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_End)));
+  }
+
+  int32_t ix = parseDwarfExpr(&summ, &reader, expr, false, false, false);
+  EXPECT_TRUE(ix >= 0);
+}
+
+TEST_F(LulDwarfExpr, UnknownOpcode) {
+  MockSummariser summ;
+  ByteReader reader(ENDIANNESS_LITTLE);
+
+  CFISection section(kLittleEndian, 8);
+  section
+     .D8(DW_OP_lo_user - 1);
+  string expr;
+  bool ok = section.GetContents(&expr);
+  EXPECT_TRUE(ok);
+
+  {
+    InSequence s;
+    // required start marker
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0)));
+  }
+
+  int32_t ix = parseDwarfExpr(&summ, &reader, expr, false, false, false);
+  EXPECT_TRUE(ix == -1);
+}
+
+TEST_F(LulDwarfExpr, ExpressionOverrun) {
+  MockSummariser summ;
+  ByteReader reader(ENDIANNESS_LITTLE);
+
+  CFISection section(kLittleEndian, 8);
+  section
+     .D8(DW_OP_const4s).D8(0x12).D8(0x34).D8(0x56);
+  string expr;
+  bool ok = section.GetContents(&expr);
+  EXPECT_TRUE(ok);
+
+  {
+    InSequence s;
+    // required start marker
+    EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0)));
+    // DW_OP_const4s followed by 3 (a.k.a. not enough) bytes
+    // We expect PfxInstr(PX_Simm32, not-known-for-sure-32-bit-immediate)
+    // Hence must use _ as the argument.
+    EXPECT_CALL(summ, AddPfxInstr(_));
+  }
+
+  int32_t ix = parseDwarfExpr(&summ, &reader, expr, false, false, false);
+  EXPECT_TRUE(ix == -1);
+}
+
+// We'll need to mention specific Dwarf registers in the EvaluatePfxExpr tests,
+// and those names are arch-specific, so a bit of macro magic is helpful.
+#if defined(LUL_ARCH_arm)
+# define TESTED_REG_STRUCT_NAME  r11
+# define TESTED_REG_DWARF_NAME   DW_REG_ARM_R11
+#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
+# define TESTED_REG_STRUCT_NAME  xbp
+# define TESTED_REG_DWARF_NAME   DW_REG_INTEL_XBP
+#else
+# error "Unknown plat"
+#endif
+
+struct EvaluatePfxExprFixture {
+  // Creates:
+  // initial stack, AVMA 0x12345678, at offset 4 bytes = 0xdeadbeef
+  // initial regs, with XBP = 0x14141356
+  // initial CFA = 0x5432ABCD
+  EvaluatePfxExprFixture() {
+    // The test stack.
+    si.mStartAvma = 0x12345678;
+    si.mLen = 0;
+#   define XX(_byte) do { si.mContents[si.mLen++] = (_byte); } while (0)
+    XX(0x55); XX(0x55); XX(0x55); XX(0x55);
+    if (sizeof(void*) == 8) {
+      // le64
+      XX(0xEF); XX(0xBE); XX(0xAD); XX(0xDE); XX(0); XX(0); XX(0); XX(0);
+    } else {
+      // le32
+      XX(0xEF); XX(0xBE); XX(0xAD); XX(0xDE);
+    }
+    XX(0xAA); XX(0xAA); XX(0xAA); XX(0xAA);
+#   undef XX
+    // The initial CFA.
+    initialCFA = TaggedUWord(0x5432ABCD);
+    // The initial register state.
+    memset(&regs, 0, sizeof(regs));
+    regs.TESTED_REG_STRUCT_NAME = TaggedUWord(0x14141356);
+  }
+
+  StackImage  si;
+  TaggedUWord initialCFA;
+  UnwindRegs  regs;
+};
+
+class LulDwarfEvaluatePfxExpr : public EvaluatePfxExprFixture, public Test { };
+
+TEST_F(LulDwarfEvaluatePfxExpr, NormalEvaluation) {
+  vector<PfxInstr> instrs;
+  // Put some junk at the start of the insn sequence.
+  instrs.push_back(PfxInstr(PX_End));
+  instrs.push_back(PfxInstr(PX_End));
+
+  // Now the real sequence
+  // stack is empty
+  instrs.push_back(PfxInstr(PX_Start, 1));
+  // 0x5432ABCD
+  instrs.push_back(PfxInstr(PX_SImm32, 0x31415927));
+  // 0x5432ABCD 0x31415927
+  instrs.push_back(PfxInstr(PX_DwReg, TESTED_REG_DWARF_NAME));
+  // 0x5432ABCD 0x31415927 0x14141356
+  instrs.push_back(PfxInstr(PX_SImm32, 42));
+  // 0x5432ABCD 0x31415927 0x14141356 42
+  instrs.push_back(PfxInstr(PX_Sub));
+  // 0x5432ABCD 0x31415927 0x1414132c
+  instrs.push_back(PfxInstr(PX_Add));
+  // 0x5432ABCD 0x45556c53
+  instrs.push_back(PfxInstr(PX_SImm32, si.mStartAvma + 4));
+  // 0x5432ABCD 0x45556c53 0x1234567c
+  instrs.push_back(PfxInstr(PX_Deref));
+  // 0x5432ABCD 0x45556c53 0xdeadbeef
+  instrs.push_back(PfxInstr(PX_SImm32, 0xFE01DC23));
+  // 0x5432ABCD 0x45556c53 0xdeadbeef 0xFE01DC23
+  instrs.push_back(PfxInstr(PX_And));
+  // 0x5432ABCD 0x45556c53 0xde019c23
+  instrs.push_back(PfxInstr(PX_SImm32, 7));
+  // 0x5432ABCD 0x45556c53 0xde019c23 7
+  instrs.push_back(PfxInstr(PX_Shl));
+  // 0x5432ABCD 0x45556c53 0x6f00ce1180
+  instrs.push_back(PfxInstr(PX_SImm32, 0x7fffffff));
+  // 0x5432ABCD 0x45556c53 0x6f00ce1180 7fffffff
+  instrs.push_back(PfxInstr(PX_And));
+  // 0x5432ABCD 0x45556c53 0x00ce1180
+  instrs.push_back(PfxInstr(PX_Add));
+  // 0x5432ABCD 0x46237dd3
+  instrs.push_back(PfxInstr(PX_Sub));
+  // 0xe0f2dfa
+
+  instrs.push_back(PfxInstr(PX_End));
+
+  TaggedUWord res = EvaluatePfxExpr(2/*offset of start insn*/,
+                                    &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(res.Valid());
+  EXPECT_TRUE(res.Value() == 0xe0f2dfa);
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, EmptySequence) {
+  vector<PfxInstr> instrs;
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, BogusStartPoint) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_SImm32, 42));
+  instrs.push_back(PfxInstr(PX_SImm32, 24));
+  instrs.push_back(PfxInstr(PX_SImm32, 4224));
+  TaggedUWord res = EvaluatePfxExpr(1, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, MissingEndMarker) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  instrs.push_back(PfxInstr(PX_SImm32, 24));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, StackUnderflow) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, StackNoUnderflow) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 1/*push the initial CFA*/));
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(res.Valid());
+  EXPECT_TRUE(res == initialCFA);
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, StackOverflow) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  for (int i = 0; i < 10+1; i++) {
+     instrs.push_back(PfxInstr(PX_SImm32, i + 100));
+  }
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_FALSE(res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, StackNoOverflow) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  for (int i = 0; i < 10+0; i++) {
+     instrs.push_back(PfxInstr(PX_SImm32, i + 100));
+  }
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(res.Valid());
+  EXPECT_TRUE(res == TaggedUWord(109));
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, OutOfRangeShl) {
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  instrs.push_back(PfxInstr(PX_SImm32, 1234));
+  instrs.push_back(PfxInstr(PX_SImm32, 5678));
+  instrs.push_back(PfxInstr(PX_Shl));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(!res.Valid());
+}
+
+TEST_F(LulDwarfEvaluatePfxExpr, TestCmpGES) {
+  const int32_t argsL[6] = { 0, 0, 1, -2, -1, -2 };
+  const int32_t argsR[6] = { 0, 1, 0, -2, -2, -1 };
+  // expecting:              t  f  t  t   t    f   = 101110 = 0x2E
+  vector<PfxInstr> instrs;
+  instrs.push_back(PfxInstr(PX_Start, 0));
+  // The "running total"
+  instrs.push_back(PfxInstr(PX_SImm32, 0));
+  for (unsigned int i = 0; i < sizeof(argsL)/sizeof(argsL[0]); i++) {
+     // Shift the "running total" at the bottom of the stack left by one bit
+     instrs.push_back(PfxInstr(PX_SImm32, 1));
+     instrs.push_back(PfxInstr(PX_Shl));
+     // Push both test args and do the comparison
+     instrs.push_back(PfxInstr(PX_SImm32, argsL[i]));
+     instrs.push_back(PfxInstr(PX_SImm32, argsR[i]));
+     instrs.push_back(PfxInstr(PX_CmpGES));
+     // Or the result into the running total
+     instrs.push_back(PfxInstr(PX_Or));
+  }
+  instrs.push_back(PfxInstr(PX_End));
+  TaggedUWord res = EvaluatePfxExpr(0, &regs, initialCFA, &si, instrs);
+  EXPECT_TRUE(res.Valid());
+  EXPECT_TRUE(res == TaggedUWord(0x2E));
+}
+
+} // namespace lul
diff --git a/tools/profiler/tests/gtest/LulTestInfrastructure.cpp b/tools/profiler/tests/gtest/LulTestInfrastructure.cpp
new file mode 100644
index 000000000..ba8e2e41e
--- /dev/null
+++ b/tools/profiler/tests/gtest/LulTestInfrastructure.cpp
@@ -0,0 +1,491 @@
+// Copyright (c) 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Derived from:
+// test_assembler.cc: Implementation of google_breakpad::TestAssembler.
+// See test_assembler.h for details.
+
+// Derived from:
+// cfi_assembler.cc: Implementation of google_breakpad::CFISection class.
+// See cfi_assembler.h for details.
+
+#include "LulTestInfrastructure.h"
+
+namespace lul_test {
+namespace test_assembler {
+
+using std::back_insert_iterator;
+
+Label::Label() : value_(new Binding()) { }
+Label::Label(uint64_t value) : value_(new Binding(value)) { }
+Label::Label(const Label &label) {
+  value_ = label.value_;
+  value_->Acquire();
+}
+Label::~Label() {
+  if (value_->Release()) delete value_;
+}
+
+Label &Label::operator=(uint64_t value) {
+  value_->Set(NULL, value);
+  return *this;
+}
+
+Label &Label::operator=(const Label &label) {
+  value_->Set(label.value_, 0);
+  return *this;
+}
+
+Label Label::operator+(uint64_t addend) const {
+  Label l;
+  l.value_->Set(this->value_, addend);
+  return l;
+}
+
+Label Label::operator-(uint64_t subtrahend) const {
+  Label l;
+  l.value_->Set(this->value_, -subtrahend);
+  return l;
+}
+
+// When NDEBUG is #defined, assert doesn't evaluate its argument. This
+// means you can't simply use assert to check the return value of a
+// function with necessary side effects.
+//
+// ALWAYS_EVALUATE_AND_ASSERT(x) evaluates x regardless of whether
+// NDEBUG is #defined; when NDEBUG is not #defined, it further asserts
+// that x is true.
+#ifdef NDEBUG
+#define ALWAYS_EVALUATE_AND_ASSERT(x) x
+#else
+#define ALWAYS_EVALUATE_AND_ASSERT(x) assert(x)
+#endif
+
+uint64_t Label::operator-(const Label &label) const {
+  uint64_t offset;
+  ALWAYS_EVALUATE_AND_ASSERT(IsKnownOffsetFrom(label, &offset));
+  return offset;
+}
+
+bool Label::IsKnownConstant(uint64_t *value_p) const {
+  Binding *base;
+  uint64_t addend;
+  value_->Get(&base, &addend);
+  if (base != NULL) return false;
+  if (value_p) *value_p = addend;
+  return true;
+}
+
+bool Label::IsKnownOffsetFrom(const Label &label, uint64_t *offset_p) const
+{
+  Binding *label_base, *this_base;
+  uint64_t label_addend, this_addend;
+  label.value_->Get(&label_base, &label_addend);
+  value_->Get(&this_base, &this_addend);
+  // If this and label are related, Get will find their final
+  // common ancestor, regardless of how indirect the relation is. This
+  // comparison also handles the constant vs. constant case.
+  if (this_base != label_base) return false;
+  if (offset_p) *offset_p = this_addend - label_addend;
+  return true;
+}
+
+Label::Binding::Binding() : base_(this), addend_(), reference_count_(1) { }
+
+Label::Binding::Binding(uint64_t addend)
+    : base_(NULL), addend_(addend), reference_count_(1) { }
+
+Label::Binding::~Binding() {
+  assert(reference_count_ == 0);
+  if (base_ && base_ != this && base_->Release())
+    delete base_;
+}
+
+void Label::Binding::Set(Binding *binding, uint64_t addend) {
+  if (!base_ && !binding) {
+    // We're equating two constants. This could be okay.
+    assert(addend_ == addend);
+  } else if (!base_) {
+    // We are a known constant, but BINDING may not be, so turn the
+    // tables and try to set BINDING's value instead.
+    binding->Set(NULL, addend_ - addend);
+  } else {
+    if (binding) {
+      // Find binding's final value. Since the final value is always either
+      // completely unconstrained or a constant, never a reference to
+      // another variable (otherwise, it wouldn't be final), this
+      // guarantees we won't create cycles here, even for code like this:
+      //   l = m, m = n, n = l;
+      uint64_t binding_addend;
+      binding->Get(&binding, &binding_addend);
+      addend += binding_addend;
+    }
+
+    // It seems likely that setting a binding to itself is a bug
+    // (although I can imagine this might turn out to be helpful to
+    // permit).
+    assert(binding != this);
+
+    if (base_ != this) {
+      // Set the other bindings on our chain as well. Note that this
+      // is sufficient even though binding relationships form trees:
+      // All binding operations traverse their chains to the end, and
+      // all bindings related to us share some tail of our chain, so
+      // they will see the changes we make here.
+      base_->Set(binding, addend - addend_);
+      // We're not going to use base_ any more.
+      if (base_->Release()) delete base_;
+    }
+    
+    // Adopt BINDING as our base. Note that it should be correct to
+    // acquire here, after the release above, even though the usual
+    // reference-counting rules call for acquiring first, and then
+    // releasing: the self-reference assertion above should have
+    // complained if BINDING were 'this' or anywhere along our chain,
+    // so we didn't release BINDING.
+    if (binding) binding->Acquire();
+    base_ = binding;
+    addend_ = addend;
+  }
+}
+
+void Label::Binding::Get(Binding **base, uint64_t *addend) {
+  if (base_ && base_ != this) {
+    // Recurse to find the end of our reference chain (the root of our
+    // tree), and then rewrite every binding along the chain to refer
+    // to it directly, adjusting addends appropriately. (This is why
+    // this member function isn't this-const.)
+    Binding *final_base;
+    uint64_t final_addend;
+    base_->Get(&final_base, &final_addend);
+    if (final_base) final_base->Acquire();
+    if (base_->Release()) delete base_;
+    base_ = final_base;
+    addend_ += final_addend;
+  }
+  *base = base_;
+  *addend = addend_;
+}
+
+template<typename Inserter>
+static inline void InsertEndian(test_assembler::Endianness endianness,
+                                size_t size, uint64_t number, Inserter dest) {
+  assert(size > 0);
+  if (endianness == kLittleEndian) {
+    for (size_t i = 0; i < size; i++) {
+      *dest++ = (char) (number & 0xff);
+      number >>= 8;
+    }
+  } else {
+    assert(endianness == kBigEndian);
+    // The loop condition is odd, but it's correct for size_t.
+    for (size_t i = size - 1; i < size; i--)
+      *dest++ = (char) ((number >> (i * 8)) & 0xff);
+  }
+}
+
+Section &Section::Append(Endianness endianness, size_t size, uint64_t number) {
+  InsertEndian(endianness, size, number,
+               back_insert_iterator<string>(contents_));
+  return *this;
+}
+
+Section &Section::Append(Endianness endianness, size_t size,
+                         const Label &label) {
+  // If this label's value is known, there's no reason to waste an
+  // entry in references_ on it.
+  uint64_t value;
+  if (label.IsKnownConstant(&value))
+    return Append(endianness, size, value);
+
+  // This will get caught when the references are resolved, but it's
+  // nicer to find out earlier.
+  assert(endianness != kUnsetEndian);
+
+  references_.push_back(Reference(contents_.size(), endianness, size, label));
+  contents_.append(size, 0);
+  return *this;
+}
+
+#define ENDIANNESS_L kLittleEndian
+#define ENDIANNESS_B kBigEndian
+#define ENDIANNESS(e) ENDIANNESS_ ## e
+
+#define DEFINE_SHORT_APPEND_NUMBER_ENDIAN(e, bits)                      \
+  Section &Section::e ## bits(uint ## bits ## _t v) {                  \
+    InsertEndian(ENDIANNESS(e), bits / 8, v,                            \
+                 back_insert_iterator<string>(contents_));              \
+    return *this;                                                       \
+  }
+
+#define DEFINE_SHORT_APPEND_LABEL_ENDIAN(e, bits)                       \
+  Section &Section::e ## bits(const Label &v) {                         \
+    return Append(ENDIANNESS(e), bits / 8, v);                          \
+  }
+
+// Define L16, B32, and friends.
+#define DEFINE_SHORT_APPEND_ENDIAN(e, bits)                             \
+  DEFINE_SHORT_APPEND_NUMBER_ENDIAN(e, bits)                            \
+  DEFINE_SHORT_APPEND_LABEL_ENDIAN(e, bits)
+
+DEFINE_SHORT_APPEND_LABEL_ENDIAN(L, 8);
+DEFINE_SHORT_APPEND_LABEL_ENDIAN(B, 8);
+DEFINE_SHORT_APPEND_ENDIAN(L, 16);
+DEFINE_SHORT_APPEND_ENDIAN(L, 32);
+DEFINE_SHORT_APPEND_ENDIAN(L, 64);
+DEFINE_SHORT_APPEND_ENDIAN(B, 16);
+DEFINE_SHORT_APPEND_ENDIAN(B, 32);
+DEFINE_SHORT_APPEND_ENDIAN(B, 64);
+
+#define DEFINE_SHORT_APPEND_NUMBER_DEFAULT(bits)                        \
+  Section &Section::D ## bits(uint ## bits ## _t v) {                  \
+    InsertEndian(endianness_, bits / 8, v,                              \
+                 back_insert_iterator<string>(contents_));              \
+    return *this;                                                       \
+  }
+#define DEFINE_SHORT_APPEND_LABEL_DEFAULT(bits)                         \
+  Section &Section::D ## bits(const Label &v) {                         \
+    return Append(endianness_, bits / 8, v);                            \
+  }
+#define DEFINE_SHORT_APPEND_DEFAULT(bits)                               \
+  DEFINE_SHORT_APPEND_NUMBER_DEFAULT(bits)                              \
+  DEFINE_SHORT_APPEND_LABEL_DEFAULT(bits)
+
+DEFINE_SHORT_APPEND_LABEL_DEFAULT(8)
+DEFINE_SHORT_APPEND_DEFAULT(16);
+DEFINE_SHORT_APPEND_DEFAULT(32);
+DEFINE_SHORT_APPEND_DEFAULT(64);
+
+Section &Section::LEB128(long long value) {
+  while (value < -0x40 || 0x3f < value) {
+    contents_ += (value & 0x7f) | 0x80;
+    if (value < 0)
+      value = (value >> 7) | ~(((unsigned long long) -1) >> 7);
+    else
+      value = (value >> 7);
+  }
+  contents_ += value & 0x7f;
+  return *this;
+}
+
+Section &Section::ULEB128(uint64_t value) {
+  while (value > 0x7f) {
+    contents_ += (value & 0x7f) | 0x80;
+    value = (value >> 7);
+  }
+  contents_ += value;
+  return *this;
+}
+
+Section &Section::Align(size_t alignment, uint8_t pad_byte) {
+  // ALIGNMENT must be a power of two.
+  assert(((alignment - 1) & alignment) == 0);
+  size_t new_size = (contents_.size() + alignment - 1) & ~(alignment - 1);
+  contents_.append(new_size - contents_.size(), pad_byte);
+  assert((contents_.size() & (alignment - 1)) == 0);
+  return *this;
+}
+
+bool Section::GetContents(string *contents) {
+  // For each label reference, find the label's value, and patch it into
+  // the section's contents.
+  for (size_t i = 0; i < references_.size(); i++) {
+    Reference &r = references_[i];
+    uint64_t value;
+    if (!r.label.IsKnownConstant(&value)) {
+      fprintf(stderr, "Undefined label #%zu at offset 0x%zx\n", i, r.offset);
+      return false;
+    }
+    assert(r.offset < contents_.size());
+    assert(contents_.size() - r.offset >= r.size);
+    InsertEndian(r.endianness, r.size, value, contents_.begin() + r.offset);
+  }
+  contents->clear();
+  std::swap(contents_, *contents);
+  references_.clear();
+  return true;
+}
+
+}  // namespace test_assembler
+}  // namespace lul_test
+
+
+namespace lul_test {
+
+CFISection &CFISection::CIEHeader(uint64_t code_alignment_factor,
+                                  int data_alignment_factor,
+                                  unsigned return_address_register,
+                                  uint8_t version,
+                                  const string &augmentation,
+                                  bool dwarf64) {
+  assert(!entry_length_);
+  entry_length_ = new PendingLength();
+  in_fde_ = false;
+
+  if (dwarf64) {
+    D32(kDwarf64InitialLengthMarker);
+    D64(entry_length_->length);
+    entry_length_->start = Here();
+    D64(eh_frame_ ? kEHFrame64CIEIdentifier : kDwarf64CIEIdentifier);
+  } else {
+    D32(entry_length_->length);
+    entry_length_->start = Here();
+    D32(eh_frame_ ? kEHFrame32CIEIdentifier : kDwarf32CIEIdentifier);
+  }
+  D8(version);
+  AppendCString(augmentation);
+  ULEB128(code_alignment_factor);
+  LEB128(data_alignment_factor);
+  if (version == 1)
+    D8(return_address_register);
+  else
+    ULEB128(return_address_register);
+  return *this;
+}
+
+CFISection &CFISection::FDEHeader(Label cie_pointer,
+                                  uint64_t initial_location,
+                                  uint64_t address_range,
+                                  bool dwarf64) {
+  assert(!entry_length_);
+  entry_length_ = new PendingLength();
+  in_fde_ = true;
+  fde_start_address_ = initial_location;
+
+  if (dwarf64) {
+    D32(0xffffffff);
+    D64(entry_length_->length);
+    entry_length_->start = Here();
+    if (eh_frame_)
+      D64(Here() - cie_pointer);
+    else
+      D64(cie_pointer);
+  } else {
+    D32(entry_length_->length);
+    entry_length_->start = Here();
+    if (eh_frame_)
+      D32(Here() - cie_pointer);
+    else
+      D32(cie_pointer);
+  }
+  EncodedPointer(initial_location);
+  // The FDE length in an .eh_frame section uses the same encoding as the
+  // initial location, but ignores the base address (selected by the upper
+  // nybble of the encoding), as it's a length, not an address that can be
+  // made relative.
+  EncodedPointer(address_range,
+                 DwarfPointerEncoding(pointer_encoding_ & 0x0f));
+  return *this;
+}
+
+CFISection &CFISection::FinishEntry() {
+  assert(entry_length_);
+  Align(address_size_, lul::DW_CFA_nop);
+  entry_length_->length = Here() - entry_length_->start;
+  delete entry_length_;
+  entry_length_ = NULL;
+  in_fde_ = false;
+  return *this;
+}
+
+CFISection &CFISection::EncodedPointer(uint64_t address,
+                                       DwarfPointerEncoding encoding,
+                                       const EncodedPointerBases &bases) {
+  // Omitted data is extremely easy to emit.
+  if (encoding == lul::DW_EH_PE_omit)
+    return *this;
+
+  // If (encoding & lul::DW_EH_PE_indirect) != 0, then we assume
+  // that ADDRESS is the address at which the pointer is stored --- in
+  // other words, that bit has no effect on how we write the pointer.
+  encoding = DwarfPointerEncoding(encoding & ~lul::DW_EH_PE_indirect);
+
+  // Find the base address to which this pointer is relative. The upper
+  // nybble of the encoding specifies this.
+  uint64_t base;
+  switch (encoding & 0xf0) {
+    case lul::DW_EH_PE_absptr:  base = 0;                  break;
+    case lul::DW_EH_PE_pcrel:   base = bases.cfi + Size(); break;
+    case lul::DW_EH_PE_textrel: base = bases.text;         break;
+    case lul::DW_EH_PE_datarel: base = bases.data;         break;
+    case lul::DW_EH_PE_funcrel: base = fde_start_address_; break;
+    case lul::DW_EH_PE_aligned: base = 0;                  break;
+    default: abort();
+  };
+
+  // Make ADDRESS relative. Yes, this is appropriate even for "absptr"
+  // values; see gcc/unwind-pe.h.
+  address -= base;
+
+  // Align the pointer, if required.
+  if ((encoding & 0xf0) == lul::DW_EH_PE_aligned)
+    Align(AddressSize());
+
+  // Append ADDRESS to this section in the appropriate form. For the
+  // fixed-width forms, we don't need to differentiate between signed and
+  // unsigned encodings, because ADDRESS has already been extended to 64
+  // bits before it was passed to us.
+  switch (encoding & 0x0f) {
+    case lul::DW_EH_PE_absptr:
+      Address(address);
+      break;
+
+    case lul::DW_EH_PE_uleb128:
+      ULEB128(address);
+      break;
+
+    case lul::DW_EH_PE_sleb128:
+      LEB128(address);
+      break;
+
+    case lul::DW_EH_PE_udata2:
+    case lul::DW_EH_PE_sdata2:
+      D16(address);
+      break;
+
+    case lul::DW_EH_PE_udata4:
+    case lul::DW_EH_PE_sdata4:
+      D32(address);
+      break;
+
+    case lul::DW_EH_PE_udata8:
+    case lul::DW_EH_PE_sdata8:
+      D64(address);
+      break;
+
+    default:
+      abort();
+  }
+
+  return *this;
+};
+
+} // namespace lul_test
diff --git a/tools/profiler/tests/gtest/LulTestInfrastructure.h b/tools/profiler/tests/gtest/LulTestInfrastructure.h
new file mode 100644
index 000000000..37b1b7d49
--- /dev/null
+++ b/tools/profiler/tests/gtest/LulTestInfrastructure.h
@@ -0,0 +1,666 @@
+// -*- mode: C++ -*-
+
+// Copyright (c) 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
+
+// Derived from:
+// cfi_assembler.h: Define CFISection, a class for creating properly
+// (and improperly) formatted DWARF CFI data for unit tests.
+
+// Derived from:
+// test-assembler.h: interface to class for building complex binary streams.
+
+// To test the Breakpad symbol dumper and processor thoroughly, for
+// all combinations of host system and minidump processor
+// architecture, we need to be able to easily generate complex test
+// data like debugging information and minidump files.
+// 
+// For example, if we want our unit tests to provide full code
+// coverage for stack walking, it may be difficult to persuade the
+// compiler to generate every possible sort of stack walking
+// information that we want to support; there are probably DWARF CFI
+// opcodes that GCC never emits. Similarly, if we want to test our
+// error handling, we will need to generate damaged minidumps or
+// debugging information that (we hope) the client or compiler will
+// never produce on its own.
+//
+// google_breakpad::TestAssembler provides a predictable and
+// (relatively) simple way to generate complex formatted data streams
+// like minidumps and CFI. Furthermore, because TestAssembler is
+// portable, developers without access to (say) Visual Studio or a
+// SPARC assembler can still work on test data for those targets.
+
+#ifndef LUL_TEST_INFRASTRUCTURE_H
+#define LUL_TEST_INFRASTRUCTURE_H
+
+#include <string>
+#include <vector>
+
+using std::string;
+using std::vector;
+
+namespace lul_test {
+namespace test_assembler {
+
+// A Label represents a value not yet known that we need to store in a
+// section. As long as all the labels a section refers to are defined
+// by the time we retrieve its contents as bytes, we can use undefined
+// labels freely in that section's construction.
+//
+// A label can be in one of three states:
+// - undefined,
+// - defined as the sum of some other label and a constant, or
+// - a constant.
+// 
+// A label's value never changes, but it can accumulate constraints.
+// Adding labels and integers is permitted, and yields a label.
+// Subtracting a constant from a label is permitted, and also yields a
+// label. Subtracting two labels that have some relationship to each
+// other is permitted, and yields a constant.
+//
+// For example:
+//
+//   Label a;               // a's value is undefined
+//   Label b;               // b's value is undefined
+//   {
+//     Label c = a + 4;     // okay, even though a's value is unknown
+//     b = c + 4;           // also okay; b is now a+8
+//   }
+//   Label d = b - 2;       // okay; d == a+6, even though c is gone
+//   d.Value();             // error: d's value is not yet known
+//   d - a;                 // is 6, even though their values are not known
+//   a = 12;                // now b == 20, and d == 18
+//   d.Value();             // 18: no longer an error
+//   b.Value();             // 20
+//   d = 10;                // error: d is already defined.
+//
+// Label objects' lifetimes are unconstrained: notice that, in the
+// above example, even though a and b are only related through c, and
+// c goes out of scope, the assignment to a sets b's value as well. In
+// particular, it's not necessary to ensure that a Label lives beyond
+// Sections that refer to it.
+class Label {
+ public:
+  Label();                               // An undefined label.
+  explicit Label(uint64_t value);        // A label with a fixed value
+  Label(const Label &value);             // A label equal to another.
+  ~Label();
+
+  Label &operator=(uint64_t value);
+  Label &operator=(const Label &value);
+  Label operator+(uint64_t addend) const;
+  Label operator-(uint64_t subtrahend) const;
+  uint64_t operator-(const Label &subtrahend) const;
+
+  // We could also provide == and != that work on undefined, but
+  // related, labels.
+
+  // Return true if this label's value is known. If VALUE_P is given,
+  // set *VALUE_P to the known value if returning true.
+  bool IsKnownConstant(uint64_t *value_p = NULL) const;
+
+  // Return true if the offset from LABEL to this label is known. If
+  // OFFSET_P is given, set *OFFSET_P to the offset when returning true.
+  //
+  // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m',
+  // except that it also returns a value indicating whether the
+  // subtraction is possible given what we currently know of l and m.
+  // It can be possible even if we don't know l and m's values. For
+  // example:
+  // 
+  //   Label l, m;
+  //   m = l + 10;
+  //   l.IsKnownConstant();             // false
+  //   m.IsKnownConstant();             // false
+  //   uint64_t d;                     
+  //   l.IsKnownOffsetFrom(m, &d);      // true, and sets d to -10.
+  //   l-m                              // -10
+  //   m-l                              // 10
+  //   m.Value()                        // error: m's value is not known
+  bool IsKnownOffsetFrom(const Label &label, uint64_t *offset_p = NULL) const;
+
+ private:
+  // A label's value, or if that is not yet known, how the value is
+  // related to other labels' values. A binding may be:
+  // - a known constant,
+  // - constrained to be equal to some other binding plus a constant, or
+  // - unconstrained, and free to take on any value.
+  //
+  // Many labels may point to a single binding, and each binding may
+  // refer to another, so bindings and labels form trees whose leaves
+  // are labels, whose interior nodes (and roots) are bindings, and
+  // where links point from children to parents. Bindings are
+  // reference counted, allowing labels to be lightweight, copyable,
+  // assignable, placed in containers, and so on.
+  class Binding {
+   public:
+    Binding();
+    explicit Binding(uint64_t addend);
+    ~Binding();
+
+    // Increment our reference count.
+    void Acquire() { reference_count_++; };
+    // Decrement our reference count, and return true if it is zero.
+    bool Release() { return --reference_count_ == 0; }
+
+    // Set this binding to be equal to BINDING + ADDEND. If BINDING is
+    // NULL, then set this binding to the known constant ADDEND.
+    // Update every binding on this binding's chain to point directly
+    // to BINDING, or to be a constant, with addends adjusted
+    // appropriately.
+    void Set(Binding *binding, uint64_t value);
+
+    // Return what we know about the value of this binding.
+    // - If this binding's value is a known constant, set BASE to
+    //   NULL, and set ADDEND to its value.
+    // - If this binding is not a known constant but related to other
+    //   bindings, set BASE to the binding at the end of the relation
+    //   chain (which will always be unconstrained), and set ADDEND to the
+    //   value to add to that binding's value to get this binding's
+    //   value.
+    // - If this binding is unconstrained, set BASE to this, and leave
+    //   ADDEND unchanged.
+    void Get(Binding **base, uint64_t *addend);
+
+   private:
+    // There are three cases:
+    //
+    // - A binding representing a known constant value has base_ NULL,
+    //   and addend_ equal to the value.
+    //
+    // - A binding representing a completely unconstrained value has
+    //   base_ pointing to this; addend_ is unused.
+    //
+    // - A binding whose value is related to some other binding's
+    //   value has base_ pointing to that other binding, and addend_
+    //   set to the amount to add to that binding's value to get this
+    //   binding's value. We only represent relationships of the form
+    //   x = y+c.
+    //
+    // Thus, the bind_ links form a chain terminating in either a
+    // known constant value or a completely unconstrained value. Most
+    // operations on bindings do path compression: they change every
+    // binding on the chain to point directly to the final value,
+    // adjusting addends as appropriate.
+    Binding *base_;
+    uint64_t addend_;
+
+    // The number of Labels and Bindings pointing to this binding.
+    // (When a binding points to itself, indicating a completely
+    // unconstrained binding, that doesn't count as a reference.)
+    int reference_count_;
+  };
+
+  // This label's value.
+  Binding *value_;
+};
+
+// Conventions for representing larger numbers as sequences of bytes.
+enum Endianness {
+  kBigEndian,        // Big-endian: the most significant byte comes first.
+  kLittleEndian,     // Little-endian: the least significant byte comes first.
+  kUnsetEndian,      // used internally
+};
+ 
+// A section is a sequence of bytes, constructed by appending bytes
+// to the end. Sections have a convenient and flexible set of member
+// functions for appending data in various formats: big-endian and
+// little-endian signed and unsigned values of different sizes;
+// LEB128 and ULEB128 values (see below), and raw blocks of bytes.
+//
+// If you need to append a value to a section that is not convenient
+// to compute immediately, you can create a label, append the
+// label's value to the section, and then set the label's value
+// later, when it's convenient to do so. Once a label's value is
+// known, the section class takes care of updating all previously
+// appended references to it.
+//
+// Once all the labels to which a section refers have had their
+// values determined, you can get a copy of the section's contents
+// as a string.
+//
+// Note that there is no specified "start of section" label. This is
+// because there are typically several different meanings for "the
+// start of a section": the offset of the section within an object
+// file, the address in memory at which the section's content appear,
+// and so on. It's up to the code that uses the Section class to 
+// keep track of these explicitly, as they depend on the application.
+class Section {
+ public:
+  explicit Section(Endianness endianness = kUnsetEndian)
+      : endianness_(endianness) { };
+
+  // A base class destructor should be either public and virtual,
+  // or protected and nonvirtual.
+  virtual ~Section() { };
+
+  // Return the default endianness of this section.
+  Endianness endianness() const { return endianness_; }
+
+  // Append the SIZE bytes at DATA to the end of this section. Return
+  // a reference to this section.
+  Section &Append(const string &data) {
+    contents_.append(data);
+    return *this;
+  };
+
+  // Append SIZE copies of BYTE to the end of this section. Return a
+  // reference to this section.
+  Section &Append(size_t size, uint8_t byte) {
+    contents_.append(size, (char) byte);
+    return *this;
+  }
+      
+  // Append NUMBER to this section. ENDIANNESS is the endianness to
+  // use to write the number. SIZE is the length of the number in
+  // bytes. Return a reference to this section.
+  Section &Append(Endianness endianness, size_t size, uint64_t number);
+  Section &Append(Endianness endianness, size_t size, const Label &label);
+
+  // Append SECTION to the end of this section. The labels SECTION
+  // refers to need not be defined yet.
+  //
+  // Note that this has no effect on any Labels' values, or on
+  // SECTION. If placing SECTION within 'this' provides new
+  // constraints on existing labels' values, then it's up to the
+  // caller to fiddle with those labels as needed.
+  Section &Append(const Section &section);
+
+  // Append the contents of DATA as a series of bytes terminated by
+  // a NULL character.
+  Section &AppendCString(const string &data) {
+    Append(data);
+    contents_ += '\0';
+    return *this;
+  }
+
+  // Append VALUE or LABEL to this section, with the given bit width and
+  // endianness. Return a reference to this section.
+  //
+  // The names of these functions have the form <ENDIANNESS><BITWIDTH>:
+  // <ENDIANNESS> is either 'L' (little-endian, least significant byte first),
+  //                        'B' (big-endian, most significant byte first), or
+  //                        'D' (default, the section's default endianness)
+  // <BITWIDTH> is 8, 16, 32, or 64.
+  //
+  // Since endianness doesn't matter for a single byte, all the
+  // <BITWIDTH>=8 functions are equivalent.
+  //
+  // These can be used to write both signed and unsigned values, as
+  // the compiler will properly sign-extend a signed value before
+  // passing it to the function, at which point the function's
+  // behavior is the same either way.
+  Section &L8(uint8_t value) { contents_ += value; return *this; }
+  Section &B8(uint8_t value) { contents_ += value; return *this; }
+  Section &D8(uint8_t value) { contents_ += value; return *this; }
+  Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t),
+          &B16(uint16_t), &B32(uint32_t), &B64(uint64_t),
+          &D16(uint16_t), &D32(uint32_t), &D64(uint64_t);
+  Section &L8(const Label &label),  &L16(const Label &label),
+          &L32(const Label &label), &L64(const Label &label),
+          &B8(const Label &label),  &B16(const Label &label),
+          &B32(const Label &label), &B64(const Label &label),
+          &D8(const Label &label),  &D16(const Label &label),
+          &D32(const Label &label), &D64(const Label &label);
+
+  // Append VALUE in a signed LEB128 (Little-Endian Base 128) form.
+  // 
+  // The signed LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between -0x40 and 0x3f, then its signed LEB128
+  //   representation is a single byte whose value is N.
+  // 
+  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the signed LEB128 representation of N / 128,
+  //   rounded towards negative infinity.
+  //
+  // In other words, we break VALUE into groups of seven bits, put
+  // them in little-endian order, and then write them as eight-bit
+  // bytes with the high bit on all but the last.
+  //
+  // Note that VALUE cannot be a Label (we would have to implement
+  // relaxation).
+  Section &LEB128(long long value);
+
+  // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form.
+  // 
+  // The unsigned LEB128 representation of an integer N is a variable
+  // number of bytes:
+  //
+  // - If N is between 0 and 0x7f, then its unsigned LEB128
+  //   representation is a single byte whose value is N.
+  // 
+  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
+  //   0x80, followed by the unsigned LEB128 representation of N /
+  //   128, rounded towards negative infinity.
+  //
+  // Note that VALUE cannot be a Label (we would have to implement
+  // relaxation).
+  Section &ULEB128(uint64_t value);
+
+  // Jump to the next location aligned on an ALIGNMENT-byte boundary,
+  // relative to the start of the section. Fill the gap with PAD_BYTE.
+  // ALIGNMENT must be a power of two. Return a reference to this
+  // section.
+  Section &Align(size_t alignment, uint8_t pad_byte = 0);
+
+  // Return the current size of the section.
+  size_t Size() const { return contents_.size(); }
+
+  // Return a label representing the start of the section.
+  // 
+  // It is up to the user whether this label represents the section's
+  // position in an object file, the section's address in memory, or
+  // what have you; some applications may need both, in which case
+  // this simple-minded interface won't be enough. This class only
+  // provides a single start label, for use with the Here and Mark
+  // member functions.
+  //
+  // Ideally, we'd provide this in a subclass that actually knows more
+  // about the application at hand and can provide an appropriate
+  // collection of start labels. But then the appending member
+  // functions like Append and D32 would return a reference to the
+  // base class, not the derived class, and the chaining won't work.
+  // Since the only value here is in pretty notation, that's a fatal
+  // flaw.
+  Label start() const { return start_; }
+
+  // Return a label representing the point at which the next Appended
+  // item will appear in the section, relative to start().
+  Label Here() const { return start_ + Size(); }
+
+  // Set *LABEL to Here, and return a reference to this section.
+  Section &Mark(Label *label) { *label = Here(); return *this; }
+
+  // If there are no undefined label references left in this
+  // section, set CONTENTS to the contents of this section, as a
+  // string, and clear this section. Return true on success, or false
+  // if there were still undefined labels.
+  bool GetContents(string *contents);
+
+ private:
+  // Used internally. A reference to a label's value.
+  struct Reference {
+    Reference(size_t set_offset, Endianness set_endianness,  size_t set_size,
+              const Label &set_label)
+        : offset(set_offset), endianness(set_endianness), size(set_size),
+          label(set_label) { }
+      
+    // The offset of the reference within the section.
+    size_t offset;
+
+    // The endianness of the reference.
+    Endianness endianness;
+
+    // The size of the reference.
+    size_t size;
+
+    // The label to which this is a reference.
+    Label label;
+  };
+
+  // The default endianness of this section.
+  Endianness endianness_;
+
+  // The contents of the section.
+  string contents_;
+  
+  // References to labels within those contents.
+  vector<Reference> references_;
+
+  // A label referring to the beginning of the section.
+  Label start_;
+};
+
+}  // namespace test_assembler
+}  // namespace lul_test
+
+
+namespace lul_test {
+
+using lul::DwarfPointerEncoding;
+using lul_test::test_assembler::Endianness;
+using lul_test::test_assembler::Label;
+using lul_test::test_assembler::Section;
+
+class CFISection: public Section {
+ public:
+
+  // CFI augmentation strings beginning with 'z', defined by the
+  // Linux/IA-64 C++ ABI, can specify interesting encodings for
+  // addresses appearing in FDE headers and call frame instructions (and
+  // for additional fields whose presence the augmentation string
+  // specifies). In particular, pointers can be specified to be relative
+  // to various base address: the start of the .text section, the
+  // location holding the address itself, and so on. These allow the
+  // frame data to be position-independent even when they live in
+  // write-protected pages. These variants are specified at the
+  // following two URLs:
+  //
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
+  // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
+  //
+  // CFISection leaves the production of well-formed 'z'-augmented CIEs and
+  // FDEs to the user, but does provide EncodedPointer, to emit
+  // properly-encoded addresses for a given pointer encoding.
+  // EncodedPointer uses an instance of this structure to find the base
+  // addresses it should use; you can establish a default for all encoded
+  // pointers appended to this section with SetEncodedPointerBases.
+  struct EncodedPointerBases {
+    EncodedPointerBases() : cfi(), text(), data() { }
+
+    // The starting address of this CFI section in memory, for
+    // DW_EH_PE_pcrel. DW_EH_PE_pcrel pointers may only be used in data
+    // that has is loaded into the program's address space.
+    uint64_t cfi;
+
+    // The starting address of this file's .text section, for DW_EH_PE_textrel.
+    uint64_t text;
+
+    // The starting address of this file's .got or .eh_frame_hdr section,
+    // for DW_EH_PE_datarel.
+    uint64_t data;
+  };
+
+  // Create a CFISection whose endianness is ENDIANNESS, and where
+  // machine addresses are ADDRESS_SIZE bytes long. If EH_FRAME is
+  // true, use the .eh_frame format, as described by the Linux
+  // Standards Base Core Specification, instead of the DWARF CFI
+  // format.
+  CFISection(Endianness endianness, size_t address_size,
+             bool eh_frame = false)
+      : Section(endianness), address_size_(address_size), eh_frame_(eh_frame),
+        pointer_encoding_(lul::DW_EH_PE_absptr),
+        encoded_pointer_bases_(), entry_length_(NULL), in_fde_(false) {
+    // The 'start', 'Here', and 'Mark' members of a CFISection all refer
+    // to section offsets.
+    start() = 0;
+  }
+
+  // Return this CFISection's address size.
+  size_t AddressSize() const { return address_size_; }
+
+  // Return true if this CFISection uses the .eh_frame format, or
+  // false if it contains ordinary DWARF CFI data.
+  bool ContainsEHFrame() const { return eh_frame_; }
+
+  // Use ENCODING for pointers in calls to FDEHeader and EncodedPointer.
+  void SetPointerEncoding(DwarfPointerEncoding encoding) {
+    pointer_encoding_ = encoding;
+  }
+
+  // Use the addresses in BASES as the base addresses for encoded
+  // pointers in subsequent calls to FDEHeader or EncodedPointer.
+  // This function makes a copy of BASES.
+  void SetEncodedPointerBases(const EncodedPointerBases &bases) {
+    encoded_pointer_bases_ = bases;
+  }
+
+  // Append a Common Information Entry header to this section with the
+  // given values. If dwarf64 is true, use the 64-bit DWARF initial
+  // length format for the CIE's initial length. Return a reference to
+  // this section. You should call FinishEntry after writing the last
+  // instruction for the CIE.
+  //
+  // Before calling this function, you will typically want to use Mark
+  // or Here to make a label to pass to FDEHeader that refers to this
+  // CIE's position in the section.
+  CFISection &CIEHeader(uint64_t code_alignment_factor,
+                        int data_alignment_factor,
+                        unsigned return_address_register,
+                        uint8_t version = 3,
+                        const string &augmentation = "",
+                        bool dwarf64 = false);
+
+  // Append a Frame Description Entry header to this section with the
+  // given values. If dwarf64 is true, use the 64-bit DWARF initial
+  // length format for the CIE's initial length. Return a reference to
+  // this section. You should call FinishEntry after writing the last
+  // instruction for the CIE.
+  //
+  // This function doesn't support entries that are longer than
+  // 0xffffff00 bytes. (The "initial length" is always a 32-bit
+  // value.) Nor does it support .debug_frame sections longer than
+  // 0xffffff00 bytes.
+  CFISection &FDEHeader(Label cie_pointer,
+                        uint64_t initial_location,
+                        uint64_t address_range,
+                        bool dwarf64 = false);
+
+  // Note the current position as the end of the last CIE or FDE we
+  // started, after padding with DW_CFA_nops for alignment. This
+  // defines the label representing the entry's length, cited in the
+  // entry's header. Return a reference to this section.
+  CFISection &FinishEntry();
+
+  // Append the contents of BLOCK as a DW_FORM_block value: an
+  // unsigned LEB128 length, followed by that many bytes of data.
+  CFISection &Block(const string &block) {
+    ULEB128(block.size());
+    Append(block);
+    return *this;
+  }
+
+  // Append ADDRESS to this section, in the appropriate size and
+  // endianness. Return a reference to this section.
+  CFISection &Address(uint64_t address) {
+    Section::Append(endianness(), address_size_, address);
+    return *this;
+  }
+
+  // Append ADDRESS to this section, using ENCODING and BASES. ENCODING
+  // defaults to this section's default encoding, established by
+  // SetPointerEncoding. BASES defaults to this section's bases, set by
+  // SetEncodedPointerBases. If the DW_EH_PE_indirect bit is set in the
+  // encoding, assume that ADDRESS is where the true address is stored.
+  // Return a reference to this section.
+  // 
+  // (C++ doesn't let me use default arguments here, because I want to
+  // refer to members of *this in the default argument expression.)
+  CFISection &EncodedPointer(uint64_t address) {
+    return EncodedPointer(address, pointer_encoding_, encoded_pointer_bases_);
+  }
+  CFISection &EncodedPointer(uint64_t address, DwarfPointerEncoding encoding) {
+    return EncodedPointer(address, encoding, encoded_pointer_bases_);
+  }
+  CFISection &EncodedPointer(uint64_t address, DwarfPointerEncoding encoding,
+                             const EncodedPointerBases &bases);
+
+  // Restate some member functions, to keep chaining working nicely.
+  CFISection &Mark(Label *label)   { Section::Mark(label); return *this; }
+  CFISection &D8(uint8_t v)       { Section::D8(v);       return *this; }
+  CFISection &D16(uint16_t v)     { Section::D16(v);      return *this; }
+  CFISection &D16(Label v)         { Section::D16(v);      return *this; }
+  CFISection &D32(uint32_t v)     { Section::D32(v);      return *this; }
+  CFISection &D32(const Label &v)  { Section::D32(v);      return *this; }
+  CFISection &D64(uint64_t v)     { Section::D64(v);      return *this; }
+  CFISection &D64(const Label &v)  { Section::D64(v);      return *this; }
+  CFISection &LEB128(long long v)  { Section::LEB128(v);   return *this; }
+  CFISection &ULEB128(uint64_t v) { Section::ULEB128(v);  return *this; }
+
+ private:
+  // A length value that we've appended to the section, but is not yet
+  // known. LENGTH is the appended value; START is a label referring
+  // to the start of the data whose length was cited.
+  struct PendingLength {
+    Label length;
+    Label start;
+  };
+
+  // Constants used in CFI/.eh_frame data:
+
+  // If the first four bytes of an "initial length" are this constant, then
+  // the data uses the 64-bit DWARF format, and the length itself is the
+  // subsequent eight bytes.
+  static const uint32_t kDwarf64InitialLengthMarker = 0xffffffffU;
+
+  // The CIE identifier for 32- and 64-bit DWARF CFI and .eh_frame data.
+  static const uint32_t kDwarf32CIEIdentifier = ~(uint32_t)0;
+  static const uint64_t kDwarf64CIEIdentifier = ~(uint64_t)0;
+  static const uint32_t kEHFrame32CIEIdentifier = 0;
+  static const uint64_t kEHFrame64CIEIdentifier = 0;
+
+  // The size of a machine address for the data in this section.
+  size_t address_size_;
+
+  // If true, we are generating a Linux .eh_frame section, instead of
+  // a standard DWARF .debug_frame section.
+  bool eh_frame_;
+
+  // The encoding to use for FDE pointers.
+  DwarfPointerEncoding pointer_encoding_;
+
+  // The base addresses to use when emitting encoded pointers.
+  EncodedPointerBases encoded_pointer_bases_;
+
+  // The length value for the current entry.
+  //
+  // Oddly, this must be dynamically allocated. Labels never get new
+  // values; they only acquire constraints on the value they already
+  // have, or assert if you assign them something incompatible. So
+  // each header needs truly fresh Label objects to cite in their
+  // headers and track their positions. The alternative is explicit
+  // destructor invocation and a placement new. Ick.
+  PendingLength *entry_length_;
+
+  // True if we are currently emitting an FDE --- that is, we have
+  // called FDEHeader but have not yet called FinishEntry.
+  bool in_fde_;
+
+  // If in_fde_ is true, this is its starting address. We use this for
+  // emitting DW_EH_PE_funcrel pointers.
+  uint64_t fde_start_address_;
+};
+
+}  // namespace lul_test
+
+#endif // LUL_TEST_INFRASTRUCTURE_H
diff --git a/tools/profiler/tests/gtest/ThreadProfileTest.cpp b/tools/profiler/tests/gtest/ThreadProfileTest.cpp
new file mode 100644
index 000000000..4399a5bc2
--- /dev/null
+++ b/tools/profiler/tests/gtest/ThreadProfileTest.cpp
@@ -0,0 +1,75 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+
+#include "ProfileEntry.h"
+#include "ThreadProfile.h"
+
+// Make sure we can initialize our ThreadProfile
+TEST(ThreadProfile, Initialization) {
+  PseudoStack* stack = PseudoStack::create();
+  Thread::tid_t tid = 1000;
+  ThreadInfo info("testThread", tid, true, stack, nullptr);
+  RefPtr<ProfileBuffer> pb = new ProfileBuffer(10);
+  ThreadProfile tp(&info, pb);
+}
+
+// Make sure we can record one tag and read it
+TEST(ThreadProfile, InsertOneTag) {
+  PseudoStack* stack = PseudoStack::create();
+  Thread::tid_t tid = 1000;
+  ThreadInfo info("testThread", tid, true, stack, nullptr);
+  RefPtr<ProfileBuffer> pb = new ProfileBuffer(10);
+  pb->addTag(ProfileEntry('t', 123.1));
+  ASSERT_TRUE(pb->mEntries != nullptr);
+  ASSERT_TRUE(pb->mEntries[pb->mReadPos].mTagName == 't');
+  ASSERT_TRUE(pb->mEntries[pb->mReadPos].mTagDouble == 123.1);
+}
+
+// See if we can insert some tags
+TEST(ThreadProfile, InsertTagsNoWrap) {
+  PseudoStack* stack = PseudoStack::create();
+  Thread::tid_t tid = 1000;
+  ThreadInfo info("testThread", tid, true, stack, nullptr);
+  RefPtr<ProfileBuffer> pb = new ProfileBuffer(100);
+  int test_size = 50;
+  for (int i = 0; i < test_size; i++) {
+    pb->addTag(ProfileEntry('t', i));
+  }
+  ASSERT_TRUE(pb->mEntries != nullptr);
+  int readPos = pb->mReadPos;
+  while (readPos != pb->mWritePos) {
+    ASSERT_TRUE(pb->mEntries[readPos].mTagName == 't');
+    ASSERT_TRUE(pb->mEntries[readPos].mTagInt == readPos);
+    readPos = (readPos + 1) % pb->mEntrySize;
+  }
+}
+
+// See if wrapping works as it should in the basic case
+TEST(ThreadProfile, InsertTagsWrap) {
+  PseudoStack* stack = PseudoStack::create();
+  Thread::tid_t tid = 1000;
+  // we can fit only 24 tags in this buffer because of the empty slot
+  int tags = 24;
+  int buffer_size = tags + 1;
+  ThreadInfo info("testThread", tid, true, stack, nullptr);
+  RefPtr<ProfileBuffer> pb = new ProfileBuffer(buffer_size);
+  int test_size = 43;
+  for (int i = 0; i < test_size; i++) {
+    pb->addTag(ProfileEntry('t', i));
+  }
+  ASSERT_TRUE(pb->mEntries != nullptr);
+  int readPos = pb->mReadPos;
+  int ctr = 0;
+  while (readPos != pb->mWritePos) {
+    ASSERT_TRUE(pb->mEntries[readPos].mTagName == 't');
+    // the first few tags were discarded when we wrapped
+    ASSERT_TRUE(pb->mEntries[readPos].mTagInt == ctr + (test_size - tags));
+    ctr++;
+    readPos = (readPos + 1) % pb->mEntrySize;
+  }
+}
+
diff --git a/tools/profiler/tests/gtest/moz.build b/tools/profiler/tests/gtest/moz.build
new file mode 100644
index 000000000..33aded164
--- /dev/null
+++ b/tools/profiler/tests/gtest/moz.build
@@ -0,0 +1,30 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, you can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG['OS_TARGET'] in ('Android', 'Linux'):
+    UNIFIED_SOURCES += [
+        'LulTestDwarf.cpp',
+        'LulTestInfrastructure.cpp',
+    ]
+    if CONFIG['CPU_ARCH'] != 'x86':
+        UNIFIED_SOURCES += [
+            'LulTest.cpp',
+        ]
+
+LOCAL_INCLUDES += [
+    '/tools/profiler/core',
+    '/tools/profiler/gecko',
+    '/tools/profiler/lul',
+]
+
+UNIFIED_SOURCES += [
+    'ThreadProfileTest.cpp',
+]
+
+FINAL_LIBRARY = 'xul-gtest'
+
+if CONFIG['GNU_CXX']:
+    CXXFLAGS += ['-Wno-error=shadow']
diff --git a/tools/profiler/tests/head_profiler.js b/tools/profiler/tests/head_profiler.js
new file mode 100644
index 000000000..a3821f51f
--- /dev/null
+++ b/tools/profiler/tests/head_profiler.js
@@ -0,0 +1,31 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const Cc = Components.classes;
+const Ci = Components.interfaces;
+const Cu = Components.utils;
+
+function getInflatedStackLocations(thread, sample) {
+  let stackTable = thread.stackTable;
+  let frameTable = thread.frameTable;
+  let stringTable = thread.stringTable;
+  let SAMPLE_STACK_SLOT = thread.samples.schema.stack;
+  let STACK_PREFIX_SLOT = stackTable.schema.prefix;
+  let STACK_FRAME_SLOT = stackTable.schema.frame;
+  let FRAME_LOCATION_SLOT = frameTable.schema.location;
+
+  // Build the stack from the raw data and accumulate the locations in
+  // an array.
+  let stackIndex = sample[SAMPLE_STACK_SLOT];
+  let locations = [];
+  while (stackIndex !== null) {
+    let stackEntry = stackTable.data[stackIndex];
+    let frame = frameTable.data[stackEntry[STACK_FRAME_SLOT]];
+    locations.push(stringTable[frame[FRAME_LOCATION_SLOT]]);
+    stackIndex = stackEntry[STACK_PREFIX_SLOT];
+  }
+
+  // The profiler tree is inverted, so reverse the array.
+  return locations.reverse();
+}
diff --git a/tools/profiler/tests/test_asm.js b/tools/profiler/tests/test_asm.js
new file mode 100644
index 000000000..4d273a559
--- /dev/null
+++ b/tools/profiler/tests/test_asm.js
@@ -0,0 +1,79 @@
+// Check that asm.js code shows up on the stack.
+function run_test() {
+    let p = Cc["@mozilla.org/tools/profiler;1"];
+
+    // Just skip the test if the profiler component isn't present.
+    if (!p)
+        return;
+    p = p.getService(Ci.nsIProfiler);
+    if (!p)
+        return;
+
+    // This test assumes that it's starting on an empty SPS stack.
+    // (Note that the other profiler tests also assume the profiler
+    // isn't already started.)
+    do_check_true(!p.IsActive());
+
+    let jsFuns = Cu.getJSTestingFunctions();
+    if (!jsFuns.isAsmJSCompilationAvailable())
+        return;
+
+    const ms = 10;
+    p.StartProfiler(10000, ms, ["js"], 1);
+
+    let stack = null;
+    function ffi_function(){
+        var delayMS = 5;
+        while (1) {
+            let then = Date.now();
+            do {} while (Date.now() - then < delayMS);
+
+            var thread0 = p.getProfileData().threads[0];
+
+            if (delayMS > 30000)
+                return;
+
+            delayMS *= 2;
+
+            if (thread0.samples.data.length == 0)
+                continue;
+
+            var lastSample = thread0.samples.data[thread0.samples.data.length - 1];
+            stack = String(getInflatedStackLocations(thread0, lastSample));
+            if (stack.indexOf("trampoline") !== -1)
+                return;
+        }
+    }
+
+    function asmjs_module(global, ffis) {
+        "use asm";
+        var ffi = ffis.ffi;
+        function asmjs_function() {
+            ffi();
+        }
+        return asmjs_function;
+    }
+
+    do_check_true(jsFuns.isAsmJSModule(asmjs_module));
+
+    var asmjs_function = asmjs_module(null, {ffi:ffi_function});
+    do_check_true(jsFuns.isAsmJSFunction(asmjs_function));
+
+    asmjs_function();
+
+    do_check_neq(stack, null);
+
+    var i1 = stack.indexOf("entry trampoline");
+    do_check_true(i1 !== -1);
+    var i2 = stack.indexOf("asmjs_function");
+    do_check_true(i2 !== -1);
+    var i3 = stack.indexOf("FFI trampoline");
+    do_check_true(i3 !== -1);
+    var i4 = stack.indexOf("ffi_function");
+    do_check_true(i4 !== -1);
+    do_check_true(i1 < i2);
+    do_check_true(i2 < i3);
+    do_check_true(i3 < i4);
+
+    p.StopProfiler();
+}
diff --git a/tools/profiler/tests/test_enterjit_osr.js b/tools/profiler/tests/test_enterjit_osr.js
new file mode 100644
index 000000000..a4bca590f
--- /dev/null
+++ b/tools/profiler/tests/test_enterjit_osr.js
@@ -0,0 +1,59 @@
+// Check that the EnterJIT frame, added by the JIT trampoline and
+// usable by a native unwinder to resume unwinding after encountering
+// JIT code, is pushed as expected.
+function run_test() {
+    let p = Cc["@mozilla.org/tools/profiler;1"];
+    // Just skip the test if the profiler component isn't present.
+    if (!p)
+        return;
+    p = p.getService(Ci.nsIProfiler);
+    if (!p)
+        return;
+
+    // This test assumes that it's starting on an empty SPS stack.
+    // (Note that the other profiler tests also assume the profiler
+    // isn't already started.)
+    do_check_true(!p.IsActive());
+
+    const ms = 5;
+    p.StartProfiler(100, ms, ["js"], 1);
+
+    function arbitrary_name(){
+        // A frame for |arbitrary_name| has been pushed.  Do a sequence of
+        // increasingly long spins until we get a sample.
+        var delayMS = 5;
+        while (1) {
+            do_print("loop: ms = " + delayMS);
+            let then = Date.now();
+            do {
+                let n = 10000;
+                while (--n); // OSR happens here
+                // Spin in the hope of getting a sample.
+            } while (Date.now() - then < delayMS);
+            let pr = p.getProfileData().threads[0];
+            if (pr.samples.data.length > 0 || delayMS > 30000)
+                return pr;
+            delayMS *= 2;
+        }
+    };
+
+    var profile = arbitrary_name();
+
+    do_check_neq(profile.samples.data.length, 0);
+    var lastSample = profile.samples.data[profile.samples.data.length - 1];
+    var stack = getInflatedStackLocations(profile, lastSample);
+    do_print(stack);
+
+    // All we can really check here is ensure that there is exactly
+    // one arbitrary_name frame in the list.
+    var gotName = false;
+    for (var i = 0; i < stack.length; i++) {
+        if (stack[i].match(/arbitrary_name/)) {
+            do_check_eq(gotName, false);
+            gotName = true;
+        }
+    }
+    do_check_eq(gotName, true);
+
+    p.StopProfiler();
+}
diff --git a/tools/profiler/tests/test_enterjit_osr_disabling.js b/tools/profiler/tests/test_enterjit_osr_disabling.js
new file mode 100644
index 000000000..dbf74c93a
--- /dev/null
+++ b/tools/profiler/tests/test_enterjit_osr_disabling.js
@@ -0,0 +1,21 @@
+function run_test() {
+    let p = Cc["@mozilla.org/tools/profiler;1"];
+    // Just skip the test if the profiler component isn't present.
+    if (!p)
+	return;
+    p = p.getService(Ci.nsIProfiler);
+    if (!p)
+	return;
+
+    do_check_true(!p.IsActive());
+
+    p.StartProfiler(100, 10, ["js"], 1);
+    // The function is entered with the profiler enabled
+    (function (){
+	p.StopProfiler();
+	let n = 10000;
+	while (--n);  // OSR happens here with the profiler disabled.
+	// An assertion will fail when this function returns, if the
+	// SPS stack was misbalanced.
+    })();
+}
diff --git a/tools/profiler/tests/test_enterjit_osr_enabling.js b/tools/profiler/tests/test_enterjit_osr_enabling.js
new file mode 100644
index 000000000..ae696057b
--- /dev/null
+++ b/tools/profiler/tests/test_enterjit_osr_enabling.js
@@ -0,0 +1,21 @@
+function run_test() {
+    let p = Cc["@mozilla.org/tools/profiler;1"];
+    // Just skip the test if the profiler component isn't present.
+    if (!p)
+	return;
+    p = p.getService(Ci.nsIProfiler);
+    if (!p)
+	return;
+
+    do_check_true(!p.IsActive());
+
+    // The function is entered with the profiler disabled.
+    (function (){
+	p.StartProfiler(100, 10, ["js"], 1);
+	let n = 10000;
+	while (--n); // OSR happens here with the profiler enabled.
+	// An assertion will fail when this function returns, if the
+	// SPS stack was misbalanced.
+    })();
+    p.StopProfiler();
+}
diff --git a/tools/profiler/tests/test_get_features.js b/tools/profiler/tests/test_get_features.js
new file mode 100644
index 000000000..4fbd5891c
--- /dev/null
+++ b/tools/profiler/tests/test_get_features.js
@@ -0,0 +1,18 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+  
+function run_test() {
+  // If we can't get the profiler component then assume gecko was
+  // built without it and pass all the tests
+  var profilerCc = Cc["@mozilla.org/tools/profiler;1"];
+  if (!profilerCc)
+    return;
+
+  var profiler = Cc["@mozilla.org/tools/profiler;1"].getService(Ci.nsIProfiler);
+  if (!profiler)
+    return;
+
+  var profilerFeatures = profiler.GetFeatures([]);
+  do_check_true(profilerFeatures != null);
+}
diff --git a/tools/profiler/tests/test_pause.js b/tools/profiler/tests/test_pause.js
new file mode 100644
index 000000000..fedff70c4
--- /dev/null
+++ b/tools/profiler/tests/test_pause.js
@@ -0,0 +1,35 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+function run_test() {
+  // If we can't get the profiler component then assume gecko was
+  // built without it and pass all the tests
+  var profilerCc = Cc["@mozilla.org/tools/profiler;1"];
+  if (!profilerCc)
+    return;
+
+  var profiler = profilerCc.getService(Ci.nsIProfiler);
+  if (!profiler)
+    return;
+
+  do_check_true(!profiler.IsActive());
+  do_check_true(!profiler.IsPaused());
+
+  profiler.StartProfiler(1000, 10, [], 0);
+
+  do_check_true(profiler.IsActive());
+
+  profiler.PauseSampling();
+
+  do_check_true(profiler.IsPaused());
+
+  profiler.ResumeSampling();
+
+  do_check_true(!profiler.IsPaused());
+
+  profiler.StopProfiler();
+  do_check_true(!profiler.IsActive());
+  do_check_true(!profiler.IsPaused());
+  do_test_finished();
+}
diff --git a/tools/profiler/tests/test_run.js b/tools/profiler/tests/test_run.js
new file mode 100644
index 000000000..fef03a07d
--- /dev/null
+++ b/tools/profiler/tests/test_run.js
@@ -0,0 +1,44 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+function run_test() {
+  // If we can't get the profiler component then assume gecko was
+  // built without it and pass all the tests
+  var profilerCc = Cc["@mozilla.org/tools/profiler;1"];
+  if (!profilerCc)
+    return;
+
+  var profiler = Cc["@mozilla.org/tools/profiler;1"].getService(Ci.nsIProfiler);
+  if (!profiler)
+    return;
+
+  do_check_true(!profiler.IsActive());
+
+  profiler.StartProfiler(1000, 10, [], 0);
+
+  do_check_true(profiler.IsActive());
+
+  do_test_pending();
+
+  do_timeout(1000, function wait() {
+    // Check text profile format
+    var profileStr = profiler.GetProfile();
+    do_check_true(profileStr.length > 10);
+
+    // check json profile format
+    var profileObj = profiler.getProfileData();
+    do_check_neq(profileObj, null);
+    do_check_neq(profileObj.threads, null);
+    do_check_true(profileObj.threads.length >= 1);
+    do_check_neq(profileObj.threads[0].samples, null);
+    // NOTE: The number of samples will be empty since we
+    //       don't have any labels in the xpcshell code
+
+    profiler.StopProfiler();
+    do_check_true(!profiler.IsActive());
+    do_test_finished();
+  });
+
+
+}
diff --git a/tools/profiler/tests/test_shared_library.js b/tools/profiler/tests/test_shared_library.js
new file mode 100644
index 000000000..2bdbc0109
--- /dev/null
+++ b/tools/profiler/tests/test_shared_library.js
@@ -0,0 +1,23 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+function run_test() {
+  // If we can't get the profiler component then assume gecko was
+  // built without it and pass all the tests
+  var profilerCc = Cc["@mozilla.org/tools/profiler;1"];
+  if (!profilerCc)
+    return;
+
+  var profiler = Cc["@mozilla.org/tools/profiler;1"].getService(Ci.nsIProfiler);
+  if (!profiler)
+    return;
+
+  var sharedStr = profiler.getSharedLibraryInformation();
+  sharedStr = sharedStr.toLowerCase();
+
+  // Let's not hardcode anything too specific
+  // just some sanity checks.
+  do_check_neq(sharedStr, null);
+  do_check_neq(sharedStr, "");
+}
diff --git a/tools/profiler/tests/test_start.js b/tools/profiler/tests/test_start.js
new file mode 100644
index 000000000..b04b130ff
--- /dev/null
+++ b/tools/profiler/tests/test_start.js
@@ -0,0 +1,25 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+  
+function run_test() {
+  // If we can't get the profiler component then assume gecko was
+  // built without it and pass all the tests
+  var profilerCc = Cc["@mozilla.org/tools/profiler;1"];
+  if (!profilerCc)
+    return;
+
+  var profiler = Cc["@mozilla.org/tools/profiler;1"].getService(Ci.nsIProfiler);
+  if (!profiler)
+    return;
+
+  do_check_true(!profiler.IsActive());
+
+  profiler.StartProfiler(10, 100, [], 0);
+
+  do_check_true(profiler.IsActive());
+
+  profiler.StopProfiler();
+
+  do_check_true(!profiler.IsActive());
+}
diff --git a/tools/profiler/tests/xpcshell.ini b/tools/profiler/tests/xpcshell.ini
new file mode 100644
index 000000000..997a7c142
--- /dev/null
+++ b/tools/profiler/tests/xpcshell.ini
@@ -0,0 +1,18 @@
+[DEFAULT]
+head = head_profiler.js
+tail =
+skip-if = toolkit == 'android'
+
+[test_start.js]
+skip-if = true
+[test_get_features.js]
+[test_shared_library.js]
+[test_run.js]
+skip-if = true
+[test_pause.js]
+[test_enterjit_osr.js]
+[test_enterjit_osr_disabling.js]
+skip-if = !debug
+[test_enterjit_osr_enabling.js]
+skip-if = !debug
+[test_asm.js]
+\ No newline at end of file