diff options
Diffstat (limited to 'tools/profiler')
101 files changed, 26801 insertions, 0 deletions
diff --git a/tools/profiler/core/EHABIStackWalk.cpp b/tools/profiler/core/EHABIStackWalk.cpp new file mode 100644 index 000000000..76068cdea --- /dev/null +++ b/tools/profiler/core/EHABIStackWalk.cpp @@ -0,0 +1,678 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI, as described in: + * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf + * + * This handles only the ARM-defined "personality routines" (chapter + * 9), and don't track the value of FP registers, because profiling + * needs only chain of PC/SP values. + * + * Because the exception handling info may not be accurate for all + * possible places where an async signal could occur (e.g., in a + * prologue or epilogue), this bounds-checks all stack accesses. + * + * This file uses "struct" for structures in the exception tables and + * "class" otherwise. We should avoid violating the C++11 + * standard-layout rules in the former. + */ + +#include "EHABIStackWalk.h" + +#include "shared-libraries.h" +#include "platform.h" + +#include "mozilla/Atomics.h" +#include "mozilla/Attributes.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/EndianUtils.h" + +#include <algorithm> +#include <elf.h> +#include <stdint.h> +#include <vector> +#include <string> + +#ifndef PT_ARM_EXIDX +#define PT_ARM_EXIDX 0x70000001 +#endif + +// Bug 1082817: ICS B2G has a buggy linker that doesn't always ensure +// that the EXIDX is sorted by address, as the spec requires. So in +// that case we build and sort an array of pointers into the index, +// and binary-search that; otherwise, we search the index in place +// (avoiding the time and space overhead of the indirection). +#if defined(ANDROID_VERSION) && ANDROID_VERSION < 16 +#define HAVE_UNSORTED_EXIDX +#endif + +namespace mozilla { + +struct PRel31 { + uint32_t mBits; + bool topBit() const { return mBits & 0x80000000; } + uint32_t value() const { return mBits & 0x7fffffff; } + int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; } + const void *compute() const { + return reinterpret_cast<const char *>(this) + offset(); + } +private: + PRel31(const PRel31 &copied) = delete; + PRel31() = delete; +}; + +struct EHEntry { + PRel31 startPC; + PRel31 exidx; +private: + EHEntry(const EHEntry &copied) = delete; + EHEntry() = delete; +}; + +class EHState { + // Note that any core register can be used as a "frame pointer" to + // influence the unwinding process, so this must track all of them. + uint32_t mRegs[16]; +public: + bool unwind(const EHEntry *aEntry, const void *stackBase); + uint32_t &operator[](int i) { return mRegs[i]; } + const uint32_t &operator[](int i) const { return mRegs[i]; } + EHState(const mcontext_t &); +}; + +enum { + R_SP = 13, + R_LR = 14, + R_PC = 15 +}; + +#ifdef HAVE_UNSORTED_EXIDX +class EHEntryHandle { + const EHEntry *mValue; +public: + EHEntryHandle(const EHEntry *aEntry) : mValue(aEntry) { } + const EHEntry *value() const { return mValue; } +}; + +bool operator<(const EHEntryHandle &lhs, const EHEntryHandle &rhs) { + return lhs.value()->startPC.compute() < rhs.value()->startPC.compute(); +} +#endif + +class EHTable { + uint32_t mStartPC; + uint32_t mEndPC; + uint32_t mLoadOffset; +#ifdef HAVE_UNSORTED_EXIDX + // In principle we should be able to binary-search the index section in + // place, but the ICS toolchain's linker is noncompliant and produces + // indices that aren't entirely sorted (e.g., libc). So we have this: + std::vector<EHEntryHandle> mEntries; + typedef std::vector<EHEntryHandle>::const_iterator EntryIterator; + EntryIterator entriesBegin() const { return mEntries.begin(); } + EntryIterator entriesEnd() const { return mEntries.end(); } + static const EHEntry* entryGet(EntryIterator aEntry) { + return aEntry->value(); + } +#else + typedef const EHEntry *EntryIterator; + EntryIterator mEntriesBegin, mEntriesEnd; + EntryIterator entriesBegin() const { return mEntriesBegin; } + EntryIterator entriesEnd() const { return mEntriesEnd; } + static const EHEntry* entryGet(EntryIterator aEntry) { return aEntry; } +#endif + std::string mName; +public: + EHTable(const void *aELF, size_t aSize, const std::string &aName); + const EHEntry *lookup(uint32_t aPC) const; + bool isValid() const { return entriesEnd() != entriesBegin(); } + const std::string &name() const { return mName; } + uint32_t startPC() const { return mStartPC; } + uint32_t endPC() const { return mEndPC; } + uint32_t loadOffset() const { return mLoadOffset; } +}; + +class EHAddrSpace { + std::vector<uint32_t> mStarts; + std::vector<EHTable> mTables; + static mozilla::Atomic<const EHAddrSpace*> sCurrent; +public: + explicit EHAddrSpace(const std::vector<EHTable>& aTables); + const EHTable *lookup(uint32_t aPC) const; + static void Update(); + static const EHAddrSpace *Get(); +}; + + +void EHABIStackWalkInit() +{ + EHAddrSpace::Update(); +} + +size_t EHABIStackWalk(const mcontext_t &aContext, void *stackBase, + void **aSPs, void **aPCs, const size_t aNumFrames) +{ + const EHAddrSpace *space = EHAddrSpace::Get(); + EHState state(aContext); + size_t count = 0; + + while (count < aNumFrames) { + uint32_t pc = state[R_PC], sp = state[R_SP]; + aPCs[count] = reinterpret_cast<void *>(pc); + aSPs[count] = reinterpret_cast<void *>(sp); + count++; + + if (!space) + break; + // TODO: cache these lookups. Binary-searching libxul is + // expensive (possibly more expensive than doing the actual + // unwind), and even a small cache should help. + const EHTable *table = space->lookup(pc); + if (!table) + break; + const EHEntry *entry = table->lookup(pc); + if (!entry) + break; + if (!state.unwind(entry, stackBase)) + break; + } + + return count; +} + + +class EHInterp { +public: + // Note that stackLimit is exclusive and stackBase is inclusive + // (i.e, stackLimit < SP <= stackBase), following the convention + // set by the AAPCS spec. + EHInterp(EHState &aState, const EHEntry *aEntry, + uint32_t aStackLimit, uint32_t aStackBase) + : mState(aState), + mStackLimit(aStackLimit), + mStackBase(aStackBase), + mNextWord(0), + mWordsLeft(0), + mFailed(false) + { + const PRel31 &exidx = aEntry->exidx; + uint32_t firstWord; + + if (exidx.mBits == 1) { // EXIDX_CANTUNWIND + mFailed = true; + return; + } + if (exidx.topBit()) { + firstWord = exidx.mBits; + } else { + mNextWord = reinterpret_cast<const uint32_t *>(exidx.compute()); + firstWord = *mNextWord++; + } + + switch (firstWord >> 24) { + case 0x80: // short + mWord = firstWord << 8; + mBytesLeft = 3; + break; + case 0x81: case 0x82: // long; catch descriptor size ignored + mWord = firstWord << 16; + mBytesLeft = 2; + mWordsLeft = (firstWord >> 16) & 0xff; + break; + default: + // unknown personality + mFailed = true; + } + } + + bool unwind(); + +private: + // TODO: GCC has been observed not CSEing repeated reads of + // mState[R_SP] with writes to mFailed between them, suggesting that + // it hasn't determined that they can't alias and is thus missing + // optimization opportunities. So, we may want to flatten EHState + // into this class; this may also make the code simpler. + EHState &mState; + uint32_t mStackLimit; + uint32_t mStackBase; + const uint32_t *mNextWord; + uint32_t mWord; + uint8_t mWordsLeft; + uint8_t mBytesLeft; + bool mFailed; + + enum { + I_ADDSP = 0x00, // 0sxxxxxx (subtract if s) + M_ADDSP = 0x80, + I_POPMASK = 0x80, // 1000iiii iiiiiiii (if any i set) + M_POPMASK = 0xf0, + I_MOVSP = 0x90, // 1001nnnn + M_MOVSP = 0xf0, + I_POPN = 0xa0, // 1010lnnn + M_POPN = 0xf0, + I_FINISH = 0xb0, // 10110000 + I_POPLO = 0xb1, // 10110001 0000iiii (if any i set) + I_ADDSPBIG = 0xb2, // 10110010 uleb128 + I_POPFDX = 0xb3, // 10110011 sssscccc + I_POPFDX8 = 0xb8, // 10111nnn + M_POPFDX8 = 0xf8, + // "Intel Wireless MMX" extensions omitted. + I_POPFDD = 0xc8, // 1100100h sssscccc + M_POPFDD = 0xfe, + I_POPFDD8 = 0xd0, // 11010nnn + M_POPFDD8 = 0xf8 + }; + + uint8_t next() { + if (mBytesLeft == 0) { + if (mWordsLeft == 0) { + return I_FINISH; + } + mWordsLeft--; + mWord = *mNextWord++; + mBytesLeft = 4; + } + mBytesLeft--; + mWord = (mWord << 8) | (mWord >> 24); // rotate + return mWord; + } + + uint32_t &vSP() { return mState[R_SP]; } + uint32_t *ptrSP() { return reinterpret_cast<uint32_t *>(vSP()); } + + void checkStackBase() { if (vSP() > mStackBase) mFailed = true; } + void checkStackLimit() { if (vSP() <= mStackLimit) mFailed = true; } + void checkStackAlign() { if ((vSP() & 3) != 0) mFailed = true; } + void checkStack() { + checkStackBase(); + checkStackLimit(); + checkStackAlign(); + } + + void popRange(uint8_t first, uint8_t last, uint16_t mask) { + bool hasSP = false; + uint32_t tmpSP; + if (mask == 0) + mFailed = true; + for (uint8_t r = first; r <= last; ++r) { + if (mask & 1) { + if (r == R_SP) { + hasSP = true; + tmpSP = *ptrSP(); + } else + mState[r] = *ptrSP(); + vSP() += 4; + checkStackBase(); + if (mFailed) + return; + } + mask >>= 1; + } + if (hasSP) { + vSP() = tmpSP; + checkStack(); + } + } +}; + + +bool EHState::unwind(const EHEntry *aEntry, const void *stackBasePtr) { + // The unwinding program cannot set SP to less than the initial value. + uint32_t stackLimit = mRegs[R_SP] - 4; + uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr); + EHInterp interp(*this, aEntry, stackLimit, stackBase); + return interp.unwind(); +} + +bool EHInterp::unwind() { + mState[R_PC] = 0; + checkStack(); + while (!mFailed) { + uint8_t insn = next(); +#if DEBUG_EHABI_UNWIND + LOGF("unwind insn = %02x", (unsigned)insn); +#endif + // Try to put the common cases first. + + // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4 + // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4 + if ((insn & M_ADDSP) == I_ADDSP) { + uint32_t offset = ((insn & 0x3f) << 2) + 4; + if (insn & 0x40) { + vSP() -= offset; + checkStackLimit(); + } else { + vSP() += offset; + checkStackBase(); + } + continue; + } + + // 10100nnn: Pop r4-r[4+nnn] + // 10101nnn: Pop r4-r[4+nnn], r14 + if ((insn & M_POPN) == I_POPN) { + uint8_t n = (insn & 0x07) + 1; + bool lr = insn & 0x08; + uint32_t *ptr = ptrSP(); + vSP() += (n + (lr ? 1 : 0)) * 4; + checkStackBase(); + for (uint8_t r = 4; r < 4 + n; ++r) + mState[r] = *ptr++; + if (lr) + mState[R_LR] = *ptr++; + continue; + } + + // 1011000: Finish + if (insn == I_FINISH) { + if (mState[R_PC] == 0) { + mState[R_PC] = mState[R_LR]; + // Non-standard change (bug 916106): Prevent the caller from + // re-using LR. Since the caller is by definition not a leaf + // routine, it will have to restore LR from somewhere to + // return to its own caller, so we can safely zero it here. + // This makes a difference only if an error in unwinding + // (e.g., caused by starting from within a prologue/epilogue) + // causes us to load a pointer to a leaf routine as LR; if we + // don't do something, we'll go into an infinite loop of + // "returning" to that same function. + mState[R_LR] = 0; + } + return true; + } + + // 1001nnnn: Set vsp = r[nnnn] + if ((insn & M_MOVSP) == I_MOVSP) { + vSP() = mState[insn & 0x0f]; + checkStack(); + continue; + } + + // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD) + // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD) + if ((insn & M_POPFDD) == I_POPFDD) { + uint8_t n = (next() & 0x0f) + 1; + // Note: if the 16+ssss+cccc > 31, the encoding is reserved. + // As the space is currently unused, we don't try to check. + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD) + if ((insn & M_POPFDD8) == I_POPFDD8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n; + checkStackBase(); + continue; + } + + // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2) + if (insn == I_ADDSPBIG) { + uint32_t acc = 0; + uint8_t shift = 0; + uint8_t byte; + do { + if (shift >= 32) + return false; + byte = next(); + acc |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + uint32_t offset = 0x204 + (acc << 2); + // The calculations above could have overflowed. + // But the one we care about is this: + if (vSP() + offset < vSP()) + mFailed = true; + vSP() += offset; + // ...so that this is the only other check needed: + checkStackBase(); + continue; + } + + // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4} + if ((insn & M_POPMASK) == I_POPMASK) { + popRange(4, 15, ((insn & 0x0f) << 8) | next()); + continue; + } + + // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0} + if (insn == I_POPLO) { + popRange(0, 3, next() & 0x0f); + continue; + } + + // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX) + if (insn == I_POPFDX) { + uint8_t n = (next() & 0x0f) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX) + if ((insn & M_POPFDX8) == I_POPFDX8) { + uint8_t n = (insn & 0x07) + 1; + vSP() += 8 * n + 4; + checkStackBase(); + continue; + } + + // unhandled instruction +#ifdef DEBUG_EHABI_UNWIND + LOGF("Unhandled EHABI instruction 0x%02x", insn); +#endif + mFailed = true; + } + return false; +} + + +bool operator<(const EHTable &lhs, const EHTable &rhs) { + return lhs.startPC() < rhs.startPC(); +} + +// Async signal unsafe. +EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables) + : mTables(aTables) +{ + std::sort(mTables.begin(), mTables.end()); + DebugOnly<uint32_t> lastEnd = 0; + for (std::vector<EHTable>::iterator i = mTables.begin(); + i != mTables.end(); ++i) { + MOZ_ASSERT(i->startPC() >= lastEnd); + mStarts.push_back(i->startPC()); + lastEnd = i->endPC(); + } +} + +const EHTable *EHAddrSpace::lookup(uint32_t aPC) const { + ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC) + - mStarts.begin()) - 1; + + if (i < 0 || aPC >= mTables[i].endPC()) + return 0; + return &mTables[i]; +} + + +const EHEntry *EHTable::lookup(uint32_t aPC) const { + MOZ_ASSERT(aPC >= mStartPC); + if (aPC >= mEndPC) + return nullptr; + + EntryIterator begin = entriesBegin(); + EntryIterator end = entriesEnd(); + MOZ_ASSERT(begin < end); + if (aPC < reinterpret_cast<uint32_t>(entryGet(begin)->startPC.compute())) + return nullptr; + + while (end - begin > 1) { +#ifdef EHABI_UNWIND_MORE_ASSERTS + if (entryGet(end - 1)->startPC.compute() + < entryGet(begin)->startPC.compute()) { + MOZ_CRASH("unsorted exidx"); + } +#endif + EntryIterator mid = begin + (end - begin) / 2; + if (aPC < reinterpret_cast<uint32_t>(entryGet(mid)->startPC.compute())) + end = mid; + else + begin = mid; + } + return entryGet(begin); +} + + +#if MOZ_LITTLE_ENDIAN +static const unsigned char hostEndian = ELFDATA2LSB; +#elif MOZ_BIG_ENDIAN +static const unsigned char hostEndian = ELFDATA2MSB; +#else +#error "No endian?" +#endif + +// Async signal unsafe: std::vector::reserve, std::string copy ctor. +EHTable::EHTable(const void *aELF, size_t aSize, const std::string &aName) + : mStartPC(~0), // largest uint32_t + mEndPC(0), +#ifndef HAVE_UNSORTED_EXIDX + mEntriesBegin(nullptr), + mEntriesEnd(nullptr), +#endif + mName(aName) +{ + const uint32_t base = reinterpret_cast<uint32_t>(aELF); + + if (aSize < sizeof(Elf32_Ehdr)) + return; + + const Elf32_Ehdr &file = *(reinterpret_cast<Elf32_Ehdr *>(base)); + if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 || + file.e_ident[EI_CLASS] != ELFCLASS32 || + file.e_ident[EI_DATA] != hostEndian || + file.e_ident[EI_VERSION] != EV_CURRENT || + file.e_ident[EI_OSABI] != ELFOSABI_SYSV || +#ifdef EI_ABIVERSION + file.e_ident[EI_ABIVERSION] != 0 || +#endif + file.e_machine != EM_ARM || + file.e_version != EV_CURRENT) + // e_flags? + return; + + MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize); + const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0; + for (unsigned i = 0; i < file.e_phnum; ++i) { + const Elf32_Phdr &phdr = + *(reinterpret_cast<Elf32_Phdr *>(base + file.e_phoff + + i * file.e_phentsize)); + if (phdr.p_type == PT_ARM_EXIDX) { + exidxHdr = &phdr; + } else if (phdr.p_type == PT_LOAD) { + if (phdr.p_offset == 0) { + zeroHdr = &phdr; + } + if (phdr.p_flags & PF_X) { + mStartPC = std::min(mStartPC, phdr.p_vaddr); + mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz); + } + } + } + if (!exidxHdr) + return; + if (!zeroHdr) + return; + mLoadOffset = base - zeroHdr->p_vaddr; + mStartPC += mLoadOffset; + mEndPC += mLoadOffset; + + // Create a sorted index of the index to work around linker bugs. + const EHEntry *startTable = + reinterpret_cast<const EHEntry *>(mLoadOffset + exidxHdr->p_vaddr); + const EHEntry *endTable = + reinterpret_cast<const EHEntry *>(mLoadOffset + exidxHdr->p_vaddr + + exidxHdr->p_memsz); +#ifdef HAVE_UNSORTED_EXIDX + mEntries.reserve(endTable - startTable); + for (const EHEntry *i = startTable; i < endTable; ++i) + mEntries.push_back(i); + std::sort(mEntries.begin(), mEntries.end()); +#else + mEntriesBegin = startTable; + mEntriesEnd = endTable; +#endif +} + + +mozilla::Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr); + +// Async signal safe; can fail if Update() hasn't returned yet. +const EHAddrSpace *EHAddrSpace::Get() { + return sCurrent; +} + +// Collect unwinding information from loaded objects. Calls after the +// first have no effect. Async signal unsafe. +void EHAddrSpace::Update() { + const EHAddrSpace *space = sCurrent; + if (space) + return; + + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + std::vector<EHTable> tables; + + for (size_t i = 0; i < info.GetSize(); ++i) { + const SharedLibrary &lib = info.GetEntry(i); + if (lib.GetOffset() != 0) + // TODO: if it has a name, and we haven't seen a mapping of + // offset 0 for that file, try opening it and reading the + // headers instead. The only thing I've seen so far that's + // linked so as to need that treatment is the dynamic linker + // itself. + continue; + EHTable tab(reinterpret_cast<const void *>(lib.GetStart()), + lib.GetEnd() - lib.GetStart(), lib.GetName()); + if (tab.isValid()) + tables.push_back(tab); + } + space = new EHAddrSpace(tables); + + if (!sCurrent.compareExchange(nullptr, space)) { + delete space; + space = sCurrent; + } +} + + +EHState::EHState(const mcontext_t &context) { +#ifdef linux + mRegs[0] = context.arm_r0; + mRegs[1] = context.arm_r1; + mRegs[2] = context.arm_r2; + mRegs[3] = context.arm_r3; + mRegs[4] = context.arm_r4; + mRegs[5] = context.arm_r5; + mRegs[6] = context.arm_r6; + mRegs[7] = context.arm_r7; + mRegs[8] = context.arm_r8; + mRegs[9] = context.arm_r9; + mRegs[10] = context.arm_r10; + mRegs[11] = context.arm_fp; + mRegs[12] = context.arm_ip; + mRegs[13] = context.arm_sp; + mRegs[14] = context.arm_lr; + mRegs[15] = context.arm_pc; +#else +# error "Unhandled OS for ARM EHABI unwinding" +#endif +} + +} // namespace mozilla + diff --git a/tools/profiler/core/EHABIStackWalk.h b/tools/profiler/core/EHABIStackWalk.h new file mode 100644 index 000000000..5529d9511 --- /dev/null +++ b/tools/profiler/core/EHABIStackWalk.h @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This is an implementation of stack unwinding according to a subset + * of the ARM Exception Handling ABI; see the comment at the top of + * the .cpp file for details. + */ + +#ifndef mozilla_EHABIStackWalk_h__ +#define mozilla_EHABIStackWalk_h__ + +#include <stddef.h> +#include <ucontext.h> + +namespace mozilla { + +void EHABIStackWalkInit(); + +size_t EHABIStackWalk(const mcontext_t &aContext, void *stackBase, + void **aSPs, void **aPCs, size_t aNumFrames); + +} + +#endif diff --git a/tools/profiler/core/GeckoSampler.cpp b/tools/profiler/core/GeckoSampler.cpp new file mode 100644 index 000000000..f4249a7a5 --- /dev/null +++ b/tools/profiler/core/GeckoSampler.cpp @@ -0,0 +1,1306 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <algorithm> +#include <string> +#include <stdio.h> +#include <fstream> +#include <sstream> +#include "GeckoProfiler.h" +#ifndef SPS_STANDALONE +#include "SaveProfileTask.h" +#include "nsThreadUtils.h" +#include "prenv.h" +#include "prtime.h" +#include "nsXULAppAPI.h" +#endif +#include "ProfileEntry.h" +#include "SyncProfile.h" +#include "platform.h" +#include "shared-libraries.h" +#include "mozilla/StackWalk.h" +#include "GeckoSampler.h" + +// JSON +#include "ProfileJSONWriter.h" + +#ifndef SPS_STANDALONE +// Meta +#include "nsXPCOM.h" +#include "nsXPCOMCID.h" +#include "nsIHttpProtocolHandler.h" +#include "nsServiceManagerUtils.h" +#include "nsIXULRuntime.h" +#include "nsIXULAppInfo.h" +#include "nsDirectoryServiceUtils.h" +#include "nsDirectoryServiceDefs.h" +#include "nsIObserverService.h" +#include "mozilla/Services.h" +#include "PlatformMacros.h" +#include "nsTArray.h" + +#include "mozilla/ProfileGatherer.h" +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + #include "FennecJNIWrappers.h" +#endif + +#ifndef SPS_STANDALONE +// JS +#include "jsfriendapi.h" +#include "js/ProfilingFrameIterator.h" +#endif + +#if defined(MOZ_PROFILING) && (defined(XP_MACOSX) || defined(XP_WIN)) + #define USE_NS_STACKWALK +#endif + +#if defined(XP_WIN) +typedef CONTEXT tickcontext_t; +#elif defined(LINUX) +#include <ucontext.h> +typedef ucontext_t tickcontext_t; +#endif + +#if defined(LINUX) || defined(XP_MACOSX) +#include <sys/types.h> +pid_t gettid(); +#endif + +#if defined(__arm__) && defined(ANDROID) + // Should also work on ARM Linux, but not tested there yet. + #define USE_EHABI_STACKWALK +#endif +#ifdef USE_EHABI_STACKWALK + #include "EHABIStackWalk.h" +#endif + +#ifndef SPS_STANDALONE +#if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux) +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" +#endif +#endif + +using std::string; +using namespace mozilla; + +#ifndef MAXPATHLEN + #ifdef PATH_MAX + #define MAXPATHLEN PATH_MAX + #elif defined(MAX_PATH) + #define MAXPATHLEN MAX_PATH + #elif defined(_MAX_PATH) + #define MAXPATHLEN _MAX_PATH + #elif defined(CCHMAXPATH) + #define MAXPATHLEN CCHMAXPATH + #else + #define MAXPATHLEN 1024 + #endif +#endif + +#ifdef MOZ_VALGRIND +# include <valgrind/memcheck.h> +#else +# define VALGRIND_MAKE_MEM_DEFINED(_addr,_len) ((void)0) +#endif + + +/////////////////////////////////////////////////////////////////////// +// BEGIN SaveProfileTask et al + +static void +AddSharedLibraryInfoToStream(std::ostream& aStream, const SharedLibrary& aLib) +{ + aStream << "{"; + aStream << "\"start\":" << aLib.GetStart(); + aStream << ",\"end\":" << aLib.GetEnd(); + aStream << ",\"offset\":" << aLib.GetOffset(); + aStream << ",\"name\":\"" << aLib.GetName() << "\""; + const std::string &breakpadId = aLib.GetBreakpadId(); + aStream << ",\"breakpadId\":\"" << breakpadId << "\""; +#ifdef XP_WIN + // FIXME: remove this XP_WIN code when the profiler plugin has switched to + // using breakpadId. + std::string pdbSignature = breakpadId.substr(0, 32); + std::string pdbAgeStr = breakpadId.substr(32, breakpadId.size() - 1); + + std::stringstream stream; + stream << pdbAgeStr; + + unsigned pdbAge; + stream << std::hex; + stream >> pdbAge; + +#ifdef DEBUG + std::ostringstream oStream; + oStream << pdbSignature << std::hex << std::uppercase << pdbAge; + MOZ_ASSERT(breakpadId == oStream.str()); +#endif + + aStream << ",\"pdbSignature\":\"" << pdbSignature << "\""; + aStream << ",\"pdbAge\":" << pdbAge; + aStream << ",\"pdbName\":\"" << aLib.GetName() << "\""; +#endif + aStream << "}"; +} + +std::string +GetSharedLibraryInfoStringInternal() +{ + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + if (info.GetSize() == 0) + return "[]"; + + std::ostringstream os; + os << "["; + AddSharedLibraryInfoToStream(os, info.GetEntry(0)); + + for (size_t i = 1; i < info.GetSize(); i++) { + os << ","; + AddSharedLibraryInfoToStream(os, info.GetEntry(i)); + } + + os << "]"; + return os.str(); +} + +static bool +hasFeature(const char** aFeatures, uint32_t aFeatureCount, const char* aFeature) { + for(size_t i = 0; i < aFeatureCount; i++) { + if (strcmp(aFeatures[i], aFeature) == 0) + return true; + } + return false; +} + +GeckoSampler::GeckoSampler(double aInterval, int aEntrySize, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount) + : Sampler(aInterval, true, aEntrySize) + , mPrimaryThreadProfile(nullptr) + , mBuffer(new ProfileBuffer(aEntrySize)) + , mSaveRequested(false) +#if defined(XP_WIN) + , mIntelPowerGadget(nullptr) +#endif +{ + mUseStackWalk = hasFeature(aFeatures, aFeatureCount, "stackwalk"); + + mProfileJS = hasFeature(aFeatures, aFeatureCount, "js"); + mProfileGPU = hasFeature(aFeatures, aFeatureCount, "gpu"); + mProfilePower = hasFeature(aFeatures, aFeatureCount, "power"); + // Users sometimes ask to filter by a list of threads but forget to request + // profiling non main threads. Let's make it implificit if we have a filter + mProfileThreads = hasFeature(aFeatures, aFeatureCount, "threads") || aFilterCount > 0; + mAddLeafAddresses = hasFeature(aFeatures, aFeatureCount, "leaf"); + mPrivacyMode = hasFeature(aFeatures, aFeatureCount, "privacy"); + mAddMainThreadIO = hasFeature(aFeatures, aFeatureCount, "mainthreadio"); + mProfileMemory = hasFeature(aFeatures, aFeatureCount, "memory"); + mTaskTracer = hasFeature(aFeatures, aFeatureCount, "tasktracer"); + mLayersDump = hasFeature(aFeatures, aFeatureCount, "layersdump"); + mDisplayListDump = hasFeature(aFeatures, aFeatureCount, "displaylistdump"); + mProfileRestyle = hasFeature(aFeatures, aFeatureCount, "restyle"); + +#if defined(XP_WIN) + if (mProfilePower) { + mIntelPowerGadget = new IntelPowerGadget(); + mProfilePower = mIntelPowerGadget->Init(); + } +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + mProfileJava = mozilla::jni::IsFennec() && + hasFeature(aFeatures, aFeatureCount, "java"); +#else + mProfileJava = false; +#endif + + // Deep copy aThreadNameFilters + MOZ_ALWAYS_TRUE(mThreadNameFilters.resize(aFilterCount)); + for (uint32_t i = 0; i < aFilterCount; ++i) { + mThreadNameFilters[i] = aThreadNameFilters[i]; + } + + // Deep copy aFeatures + MOZ_ALWAYS_TRUE(mFeatures.resize(aFeatureCount)); + for (uint32_t i = 0; i < aFeatureCount; ++i) { + mFeatures[i] = aFeatures[i]; + } + + bool ignore; + sStartTime = mozilla::TimeStamp::ProcessCreation(ignore); + + { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + // Create ThreadProfile for each registered thread + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + + RegisterThread(info); + } + + SetActiveSampler(this); + } + +#ifdef MOZ_TASK_TRACER + if (mTaskTracer) { + mozilla::tasktracer::StartLogging(); + } +#endif + + mGatherer = new mozilla::ProfileGatherer(this); +} + +GeckoSampler::~GeckoSampler() +{ + if (IsActive()) + Stop(); + + SetActiveSampler(nullptr); + + // Destroy ThreadProfile for all threads + { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + ThreadProfile* profile = info->Profile(); + if (profile) { + delete profile; + info->SetProfile(nullptr); + } + // We've stopped profiling. We no longer need to retain + // information for an old thread. + if (info->IsPendingDelete()) { + delete info; + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); + i--; + } + } + } +#if defined(XP_WIN) + delete mIntelPowerGadget; +#endif + + // Cancel any in-flight async profile gatherering + // requests + mGatherer->Cancel(); +} + +void GeckoSampler::HandleSaveRequest() +{ + if (!mSaveRequested) + return; + mSaveRequested = false; + +#ifndef SPS_STANDALONE + // TODO: Use use the ipc/chromium Tasks here to support processes + // without XPCOM. + nsCOMPtr<nsIRunnable> runnable = new SaveProfileTask(); + NS_DispatchToMainThread(runnable); +#endif +} + +void GeckoSampler::DeleteExpiredMarkers() +{ + mBuffer->deleteExpiredStoredMarkers(); +} + +void GeckoSampler::StreamTaskTracer(SpliceableJSONWriter& aWriter) +{ +#ifdef MOZ_TASK_TRACER + aWriter.StartArrayProperty("data"); + UniquePtr<nsTArray<nsCString>> data = mozilla::tasktracer::GetLoggedData(sStartTime); + for (uint32_t i = 0; i < data->Length(); ++i) { + aWriter.StringElement((data->ElementAt(i)).get()); + } + aWriter.EndArray(); + + aWriter.StartArrayProperty("threads"); + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + for (size_t i = 0; i < sRegisteredThreads->size(); i++) { + // Thread meta data + ThreadInfo* info = sRegisteredThreads->at(i); + aWriter.StartObjectElement(); + if (XRE_GetProcessType() == GeckoProcessType_Plugin) { + // TODO Add the proper plugin name + aWriter.StringProperty("name", "Plugin"); + } else { + aWriter.StringProperty("name", info->Name()); + } + aWriter.IntProperty("tid", static_cast<int>(info->ThreadId())); + aWriter.EndObject(); + } + aWriter.EndArray(); + + aWriter.DoubleProperty("start", static_cast<double>(mozilla::tasktracer::GetStartTime())); +#endif +} + + +void GeckoSampler::StreamMetaJSCustomObject(SpliceableJSONWriter& aWriter) +{ + aWriter.IntProperty("version", 3); + aWriter.DoubleProperty("interval", interval()); + aWriter.IntProperty("stackwalk", mUseStackWalk); + +#ifndef SPS_STANDALONE + mozilla::TimeDuration delta = mozilla::TimeStamp::Now() - sStartTime; + aWriter.DoubleProperty("startTime", static_cast<double>(PR_Now()/1000.0 - delta.ToMilliseconds())); + + aWriter.IntProperty("processType", XRE_GetProcessType()); + + nsresult res; + nsCOMPtr<nsIHttpProtocolHandler> http = do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res); + if (!NS_FAILED(res)) { + nsAutoCString string; + + res = http->GetPlatform(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("platform", string.Data()); + + res = http->GetOscpu(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("oscpu", string.Data()); + + res = http->GetMisc(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("misc", string.Data()); + } + + nsCOMPtr<nsIXULRuntime> runtime = do_GetService("@mozilla.org/xre/runtime;1"); + if (runtime) { + nsAutoCString string; + + res = runtime->GetXPCOMABI(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("abi", string.Data()); + + res = runtime->GetWidgetToolkit(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("toolkit", string.Data()); + } + + nsCOMPtr<nsIXULAppInfo> appInfo = do_GetService("@mozilla.org/xre/app-info;1"); + if (appInfo) { + nsAutoCString string; + + res = appInfo->GetName(string); + if (!NS_FAILED(res)) + aWriter.StringProperty("product", string.Data()); + } +#endif +} + +void GeckoSampler::ToStreamAsJSON(std::ostream& stream, double aSinceTime) +{ + SpliceableJSONWriter b(mozilla::MakeUnique<OStreamJSONWriteFunc>(stream)); + StreamJSON(b, aSinceTime); +} + +#ifndef SPS_STANDALONE +JSObject* GeckoSampler::ToJSObject(JSContext *aCx, double aSinceTime) +{ + JS::RootedValue val(aCx); + { + UniquePtr<char[]> buf = ToJSON(aSinceTime); + NS_ConvertUTF8toUTF16 js_string(nsDependentCString(buf.get())); + MOZ_ALWAYS_TRUE(JS_ParseJSON(aCx, static_cast<const char16_t*>(js_string.get()), + js_string.Length(), &val)); + } + return &val.toObject(); +} + +void GeckoSampler::GetGatherer(nsISupports** aRetVal) +{ + if (!aRetVal || NS_WARN_IF(!mGatherer)) { + return; + } + NS_ADDREF(*aRetVal = mGatherer); +} +#endif + +UniquePtr<char[]> GeckoSampler::ToJSON(double aSinceTime) +{ + SpliceableChunkedJSONWriter b; + StreamJSON(b, aSinceTime); + return b.WriteFunc()->CopyData(); +} + +void GeckoSampler::ToJSObjectAsync(double aSinceTime, + mozilla::dom::Promise* aPromise) +{ + if (NS_WARN_IF(!mGatherer)) { + return; + } + + mGatherer->Start(aSinceTime, aPromise); +} + +struct SubprocessClosure { + explicit SubprocessClosure(SpliceableJSONWriter* aWriter) + : mWriter(aWriter) + {} + + SpliceableJSONWriter* mWriter; +}; + +void SubProcessCallback(const char* aProfile, void* aClosure) +{ + // Called by the observer to get their profile data included + // as a sub profile + SubprocessClosure* closure = (SubprocessClosure*)aClosure; + + // Add the string profile into the profile + closure->mWriter->StringElement(aProfile); +} + + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +static +void BuildJavaThreadJSObject(SpliceableJSONWriter& aWriter) +{ + aWriter.StringProperty("name", "Java Main Thread"); + + aWriter.StartArrayProperty("samples"); + + // for each sample + for (int sampleId = 0; true; sampleId++) { + bool firstRun = true; + // for each frame + for (int frameId = 0; true; frameId++) { + jni::String::LocalRef frameName = + java::GeckoJavaSampler::GetFrameName(0, sampleId, frameId); + // when we run out of frames, we stop looping + if (!frameName) { + // if we found at least one frame, we have objects to close + if (!firstRun) { + aWriter.EndArray(); + aWriter.EndObject(); + } + break; + } + // the first time around, open the sample object and frames array + if (firstRun) { + firstRun = false; + + double sampleTime = + java::GeckoJavaSampler::GetSampleTime(0, sampleId); + + aWriter.StartObjectElement(); + aWriter.DoubleProperty("time", sampleTime); + + aWriter.StartArrayProperty("frames"); + } + // add a frame to the sample + aWriter.StartObjectElement(); + aWriter.StringProperty("location", + frameName->ToCString().BeginReading()); + aWriter.EndObject(); + } + // if we found no frames for this sample, we are done + if (firstRun) { + break; + } + } + + aWriter.EndArray(); +} +#endif + +void GeckoSampler::StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime) +{ + aWriter.Start(SpliceableJSONWriter::SingleLineStyle); + { + // Put shared library info + aWriter.StringProperty("libs", GetSharedLibraryInfoStringInternal().c_str()); + + // Put meta data + aWriter.StartObjectProperty("meta"); + StreamMetaJSCustomObject(aWriter); + aWriter.EndObject(); + + // Data of TaskTracer doesn't belong in the circular buffer. + if (TaskTracer()) { + aWriter.StartObjectProperty("tasktracer"); + StreamTaskTracer(aWriter); + aWriter.EndObject(); + } + + // Lists the samples for each ThreadProfile + aWriter.StartArrayProperty("threads"); + { + SetPaused(true); + + { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + for (size_t i = 0; i < sRegisteredThreads->size(); i++) { + // Thread not being profiled, skip it + if (!sRegisteredThreads->at(i)->Profile()) + continue; + + // Note that we intentionally include ThreadProfile which + // have been marked for pending delete. + + ::MutexAutoLock lock(sRegisteredThreads->at(i)->Profile()->GetMutex()); + + sRegisteredThreads->at(i)->Profile()->StreamJSON(aWriter, aSinceTime); + } + } + +#ifndef SPS_STANDALONE + if (Sampler::CanNotifyObservers()) { + // Send a event asking any subprocesses (plugins) to + // give us their information + SubprocessClosure closure(&aWriter); + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) { + RefPtr<ProfileSaveEvent> pse = new ProfileSaveEvent(SubProcessCallback, &closure); + os->NotifyObservers(pse, "profiler-subprocess", nullptr); + } + } + + #if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + if (ProfileJava()) { + java::GeckoJavaSampler::Pause(); + + aWriter.Start(); + { + BuildJavaThreadJSObject(aWriter); + } + aWriter.End(); + + java::GeckoJavaSampler::Unpause(); + } + #endif +#endif + + SetPaused(false); + } + aWriter.EndArray(); + } + aWriter.End(); +} + +void GeckoSampler::FlushOnJSShutdown(JSContext* aContext) +{ +#ifndef SPS_STANDALONE + SetPaused(true); + + { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + for (size_t i = 0; i < sRegisteredThreads->size(); i++) { + // Thread not being profiled, skip it. + if (!sRegisteredThreads->at(i)->Profile() || + sRegisteredThreads->at(i)->IsPendingDelete()) { + continue; + } + + // Thread not profiling the context that's going away, skip it. + if (sRegisteredThreads->at(i)->Profile()->GetPseudoStack()->mContext != aContext) { + continue; + } + + ::MutexAutoLock lock(sRegisteredThreads->at(i)->Profile()->GetMutex()); + sRegisteredThreads->at(i)->Profile()->FlushSamplesAndMarkers(); + } + } + + SetPaused(false); +#endif +} + +void PseudoStack::flushSamplerOnJSShutdown() +{ +#ifndef SPS_STANDALONE + MOZ_ASSERT(mContext); + GeckoSampler* t = tlsTicker.get(); + if (t) { + t->FlushOnJSShutdown(mContext); + } +#endif +} + +// END SaveProfileTask et al +//////////////////////////////////////////////////////////////////////// + +static +void addDynamicTag(ThreadProfile &aProfile, char aTagName, const char *aStr) +{ + aProfile.addTag(ProfileEntry(aTagName, "")); + // Add one to store the null termination + size_t strLen = strlen(aStr) + 1; + for (size_t j = 0; j < strLen;) { + // Store as many characters in the void* as the platform allows + char text[sizeof(void*)]; + size_t len = sizeof(void*)/sizeof(char); + if (j+len >= strLen) { + len = strLen - j; + } + memcpy(text, &aStr[j], len); + j += sizeof(void*)/sizeof(char); + // Cast to *((void**) to pass the text data to a void* + aProfile.addTag(ProfileEntry('d', *((void**)(&text[0])))); + } +} + +static +void addPseudoEntry(volatile StackEntry &entry, ThreadProfile &aProfile, + PseudoStack *stack, void *lastpc) +{ + // Pseudo-frames with the BEGIN_PSEUDO_JS flag are just annotations + // and should not be recorded in the profile. + if (entry.hasFlag(StackEntry::BEGIN_PSEUDO_JS)) + return; + + int lineno = -1; + + // First entry has tagName 's' (start) + // Check for magic pointer bit 1 to indicate copy + const char* sampleLabel = entry.label(); + if (entry.isCopyLabel()) { + // Store the string using 1 or more 'd' (dynamic) tags + // that will happen to the preceding tag + + addDynamicTag(aProfile, 'c', sampleLabel); +#ifndef SPS_STANDALONE + if (entry.isJs()) { + JSScript* script = entry.script(); + if (script) { + if (!entry.pc()) { + // The JIT only allows the top-most entry to have a nullptr pc + MOZ_ASSERT(&entry == &stack->mStack[stack->stackSize() - 1]); + // If stack-walking was disabled, then that's just unfortunate + if (lastpc) { + jsbytecode *jspc = js::ProfilingGetPC(stack->mContext, script, + lastpc); + if (jspc) { + lineno = JS_PCToLineNumber(script, jspc); + } + } + } else { + lineno = JS_PCToLineNumber(script, entry.pc()); + } + } + } else { + lineno = entry.line(); + } +#endif + } else { + aProfile.addTag(ProfileEntry('c', sampleLabel)); + + // XXX: Bug 1010578. Don't assume a CPP entry and try to get the + // line for js entries as well. + if (entry.isCpp()) { + lineno = entry.line(); + } + } + + if (lineno != -1) { + aProfile.addTag(ProfileEntry('n', lineno)); + } + + uint32_t category = entry.category(); + MOZ_ASSERT(!(category & StackEntry::IS_CPP_ENTRY)); + MOZ_ASSERT(!(category & StackEntry::FRAME_LABEL_COPY)); + + if (category) { + aProfile.addTag(ProfileEntry('y', (int)category)); + } +} + +struct NativeStack +{ + void** pc_array; + void** sp_array; + size_t size; + size_t count; +}; + +mozilla::Atomic<bool> WALKING_JS_STACK(false); + +struct AutoWalkJSStack { + bool walkAllowed; + + AutoWalkJSStack() : walkAllowed(false) { + walkAllowed = WALKING_JS_STACK.compareExchange(false, true); + } + + ~AutoWalkJSStack() { + if (walkAllowed) + WALKING_JS_STACK = false; + } +}; + +static +void mergeStacksIntoProfile(ThreadProfile& aProfile, TickSample* aSample, NativeStack& aNativeStack) +{ + PseudoStack* pseudoStack = aProfile.GetPseudoStack(); + volatile StackEntry *pseudoFrames = pseudoStack->mStack; + uint32_t pseudoCount = pseudoStack->stackSize(); + + // Make a copy of the JS stack into a JSFrame array. This is necessary since, + // like the native stack, the JS stack is iterated youngest-to-oldest and we + // need to iterate oldest-to-youngest when adding entries to aProfile. + + // Synchronous sampling reports an invalid buffer generation to + // ProfilingFrameIterator to avoid incorrectly resetting the generation of + // sampled JIT entries inside the JS engine. See note below concerning 'J' + // entries. + uint32_t startBufferGen; + if (aSample->isSamplingCurrentThread) { + startBufferGen = UINT32_MAX; + } else { + startBufferGen = aProfile.bufferGeneration(); + } + uint32_t jsCount = 0; +#ifndef SPS_STANDALONE + JS::ProfilingFrameIterator::Frame jsFrames[1000]; + // Only walk jit stack if profiling frame iterator is turned on. + if (pseudoStack->mContext && JS::IsProfilingEnabledForContext(pseudoStack->mContext)) { + AutoWalkJSStack autoWalkJSStack; + const uint32_t maxFrames = mozilla::ArrayLength(jsFrames); + + if (aSample && autoWalkJSStack.walkAllowed) { + JS::ProfilingFrameIterator::RegisterState registerState; + registerState.pc = aSample->pc; + registerState.sp = aSample->sp; +#ifdef ENABLE_ARM_LR_SAVING + registerState.lr = aSample->lr; +#endif + + JS::ProfilingFrameIterator jsIter(pseudoStack->mContext, + registerState, + startBufferGen); + for (; jsCount < maxFrames && !jsIter.done(); ++jsIter) { + // See note below regarding 'J' entries. + if (aSample->isSamplingCurrentThread || jsIter.isWasm()) { + uint32_t extracted = jsIter.extractStack(jsFrames, jsCount, maxFrames); + jsCount += extracted; + if (jsCount == maxFrames) + break; + } else { + mozilla::Maybe<JS::ProfilingFrameIterator::Frame> frame = + jsIter.getPhysicalFrameWithoutLabel(); + if (frame.isSome()) + jsFrames[jsCount++] = mozilla::Move(frame.ref()); + } + } + } + } +#endif + + // Start the sample with a root entry. + aProfile.addTag(ProfileEntry('s', "(root)")); + + // While the pseudo-stack array is ordered oldest-to-youngest, the JS and + // native arrays are ordered youngest-to-oldest. We must add frames to + // aProfile oldest-to-youngest. Thus, iterate over the pseudo-stack forwards + // and JS and native arrays backwards. Note: this means the terminating + // condition jsIndex and nativeIndex is being < 0. + uint32_t pseudoIndex = 0; + int32_t jsIndex = jsCount - 1; + int32_t nativeIndex = aNativeStack.count - 1; + + uint8_t *lastPseudoCppStackAddr = nullptr; + + // Iterate as long as there is at least one frame remaining. + while (pseudoIndex != pseudoCount || jsIndex >= 0 || nativeIndex >= 0) { + // There are 1 to 3 frames available. Find and add the oldest. + + uint8_t *pseudoStackAddr = nullptr; + uint8_t *jsStackAddr = nullptr; + uint8_t *nativeStackAddr = nullptr; + + if (pseudoIndex != pseudoCount) { + volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; + + if (pseudoFrame.isCpp()) + lastPseudoCppStackAddr = (uint8_t *) pseudoFrame.stackAddress(); + +#ifndef SPS_STANDALONE + // Skip any pseudo-stack JS frames which are marked isOSR + // Pseudostack frames are marked isOSR when the JS interpreter + // enters a jit frame on a loop edge (via on-stack-replacement, + // or OSR). To avoid both the pseudoframe and jit frame being + // recorded (and showing up twice), the interpreter marks the + // interpreter pseudostack entry with the OSR flag to ensure that + // it doesn't get counted. + if (pseudoFrame.isJs() && pseudoFrame.isOSR()) { + pseudoIndex++; + continue; + } +#endif + + MOZ_ASSERT(lastPseudoCppStackAddr); + pseudoStackAddr = lastPseudoCppStackAddr; + } + +#ifndef SPS_STANDALONE + if (jsIndex >= 0) + jsStackAddr = (uint8_t *) jsFrames[jsIndex].stackAddress; +#endif + + if (nativeIndex >= 0) + nativeStackAddr = (uint8_t *) aNativeStack.sp_array[nativeIndex]; + + // If there's a native stack entry which has the same SP as a + // pseudo stack entry, pretend we didn't see the native stack + // entry. Ditto for a native stack entry which has the same SP as + // a JS stack entry. In effect this means pseudo or JS entries + // trump conflicting native entries. + if (nativeStackAddr && (pseudoStackAddr == nativeStackAddr || jsStackAddr == nativeStackAddr)) { + nativeStackAddr = nullptr; + nativeIndex--; + MOZ_ASSERT(pseudoStackAddr || jsStackAddr); + } + + // Sanity checks. + MOZ_ASSERT_IF(pseudoStackAddr, pseudoStackAddr != jsStackAddr && + pseudoStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != pseudoStackAddr && + jsStackAddr != nativeStackAddr); + MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != pseudoStackAddr && + nativeStackAddr != jsStackAddr); + + // Check to see if pseudoStack frame is top-most. + if (pseudoStackAddr > jsStackAddr && pseudoStackAddr > nativeStackAddr) { + MOZ_ASSERT(pseudoIndex < pseudoCount); + volatile StackEntry &pseudoFrame = pseudoFrames[pseudoIndex]; + addPseudoEntry(pseudoFrame, aProfile, pseudoStack, nullptr); + pseudoIndex++; + continue; + } + +#ifndef SPS_STANDALONE + // Check to see if JS jit stack frame is top-most + if (jsStackAddr > nativeStackAddr) { + MOZ_ASSERT(jsIndex >= 0); + const JS::ProfilingFrameIterator::Frame& jsFrame = jsFrames[jsIndex]; + + // Stringifying non-wasm JIT frames is delayed until streaming + // time. To re-lookup the entry in the JitcodeGlobalTable, we need to + // store the JIT code address ('J') in the circular buffer. + // + // Note that we cannot do this when we are sychronously sampling the + // current thread; that is, when called from profiler_get_backtrace. The + // captured backtrace is usually externally stored for an indeterminate + // amount of time, such as in nsRefreshDriver. Problematically, the + // stored backtrace may be alive across a GC during which the profiler + // itself is disabled. In that case, the JS engine is free to discard + // its JIT code. This means that if we inserted such 'J' entries into + // the buffer, nsRefreshDriver would now be holding on to a backtrace + // with stale JIT code return addresses. + if (aSample->isSamplingCurrentThread || + jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) { + addDynamicTag(aProfile, 'c', jsFrame.label.get()); + } else { + MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion || + jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline); + aProfile.addTag(ProfileEntry('J', jsFrames[jsIndex].returnAddress)); + } + + jsIndex--; + continue; + } +#endif + + // If we reach here, there must be a native stack entry and it must be the + // greatest entry. + if (nativeStackAddr) { + MOZ_ASSERT(nativeIndex >= 0); + aProfile + .addTag(ProfileEntry('l', (void*)aNativeStack.pc_array[nativeIndex])); + } + if (nativeIndex >= 0) { + nativeIndex--; + } + } + +#ifndef SPS_STANDALONE + // Update the JS context with the current profile sample buffer generation. + // + // Do not do this for synchronous sampling, which create their own + // ProfileBuffers. + if (!aSample->isSamplingCurrentThread && pseudoStack->mContext) { + MOZ_ASSERT(aProfile.bufferGeneration() >= startBufferGen); + uint32_t lapCount = aProfile.bufferGeneration() - startBufferGen; + JS::UpdateJSContextProfilerSampleBufferGen(pseudoStack->mContext, + aProfile.bufferGeneration(), + lapCount); + } +#endif +} + +#ifdef USE_NS_STACKWALK +static +void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP, + void* aClosure) +{ + NativeStack* nativeStack = static_cast<NativeStack*>(aClosure); + MOZ_ASSERT(nativeStack->count < nativeStack->size); + nativeStack->sp_array[nativeStack->count] = aSP; + nativeStack->pc_array[nativeStack->count] = aPC; + nativeStack->count++; +} + +void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample) +{ + void* pc_array[1000]; + void* sp_array[1000]; + NativeStack nativeStack = { + pc_array, + sp_array, + mozilla::ArrayLength(pc_array), + 0 + }; + + // Start with the current function. We use 0 as the frame number here because + // the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N. + // This is a bit weird but it doesn't matter because StackWalkCallback() + // doesn't use the frame number argument. + StackWalkCallback(/* frameNumber */ 0, aSample->pc, aSample->sp, &nativeStack); + + uint32_t maxFrames = uint32_t(nativeStack.size - nativeStack.count); + // win X64 doesn't support disabling frame pointers emission so we need + // to fallback to using StackWalk64 which is slower. +#if defined(XP_MACOSX) || (defined(XP_WIN) && !defined(V8_HOST_ARCH_X64)) + void *stackEnd = aSample->threadProfile->GetStackTop(); + bool rv = true; + if (aSample->fp >= aSample->sp && aSample->fp <= stackEnd) + rv = FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, + maxFrames, &nativeStack, + reinterpret_cast<void**>(aSample->fp), stackEnd); +#else + void *platformData = nullptr; + + uintptr_t thread = GetThreadHandle(aSample->threadProfile->GetPlatformData()); + MOZ_ASSERT(thread); + bool rv = MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, + &nativeStack, thread, platformData); +#endif + if (rv) + mergeStacksIntoProfile(aProfile, aSample, nativeStack); +} +#endif + + +#ifdef USE_EHABI_STACKWALK +void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample) +{ + void *pc_array[1000]; + void *sp_array[1000]; + NativeStack nativeStack = { + pc_array, + sp_array, + mozilla::ArrayLength(pc_array), + 0 + }; + + const mcontext_t *mcontext = &reinterpret_cast<ucontext_t *>(aSample->context)->uc_mcontext; + mcontext_t savedContext; + PseudoStack *pseudoStack = aProfile.GetPseudoStack(); + + nativeStack.count = 0; + // The pseudostack contains an "EnterJIT" frame whenever we enter + // JIT code with profiling enabled; the stack pointer value points + // the saved registers. We use this to unwind resume unwinding + // after encounting JIT code. + for (uint32_t i = pseudoStack->stackSize(); i > 0; --i) { + // The pseudostack grows towards higher indices, so we iterate + // backwards (from callee to caller). + volatile StackEntry &entry = pseudoStack->mStack[i - 1]; + if (!entry.isJs() && strcmp(entry.label(), "EnterJIT") == 0) { + // Found JIT entry frame. Unwind up to that point (i.e., force + // the stack walk to stop before the block of saved registers; + // note that it yields nondecreasing stack pointers), then restore + // the saved state. + uint32_t *vSP = reinterpret_cast<uint32_t*>(entry.stackAddress()); + + nativeStack.count += EHABIStackWalk(*mcontext, + /* stackBase = */ vSP, + sp_array + nativeStack.count, + pc_array + nativeStack.count, + nativeStack.size - nativeStack.count); + + memset(&savedContext, 0, sizeof(savedContext)); + // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp + savedContext.arm_r4 = *vSP++; + savedContext.arm_r5 = *vSP++; + savedContext.arm_r6 = *vSP++; + savedContext.arm_r7 = *vSP++; + savedContext.arm_r8 = *vSP++; + savedContext.arm_r9 = *vSP++; + savedContext.arm_r10 = *vSP++; + savedContext.arm_fp = *vSP++; + savedContext.arm_lr = *vSP++; + savedContext.arm_sp = reinterpret_cast<uint32_t>(vSP); + savedContext.arm_pc = savedContext.arm_lr; + mcontext = &savedContext; + } + } + + // Now unwind whatever's left (starting from either the last EnterJIT + // frame or, if no EnterJIT was found, the original registers). + nativeStack.count += EHABIStackWalk(*mcontext, + aProfile.GetStackTop(), + sp_array + nativeStack.count, + pc_array + nativeStack.count, + nativeStack.size - nativeStack.count); + + mergeStacksIntoProfile(aProfile, aSample, nativeStack); +} +#endif + + +#ifdef USE_LUL_STACKWALK +void GeckoSampler::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample) +{ + const mcontext_t* mc + = &reinterpret_cast<ucontext_t *>(aSample->context)->uc_mcontext; + + lul::UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); + +# if defined(SPS_PLAT_amd64_linux) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]); +# elif defined(SPS_PLAT_arm_android) + startRegs.r15 = lul::TaggedUWord(mc->arm_pc); + startRegs.r14 = lul::TaggedUWord(mc->arm_lr); + startRegs.r13 = lul::TaggedUWord(mc->arm_sp); + startRegs.r12 = lul::TaggedUWord(mc->arm_ip); + startRegs.r11 = lul::TaggedUWord(mc->arm_fp); + startRegs.r7 = lul::TaggedUWord(mc->arm_r7); +# elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android) + startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]); + startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]); + startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]); +# else +# error "Unknown plat" +# endif + + /* Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not + going past the stack's registered top point. Do some basic + sanity checks too. This assumes that the TaggedUWord holding + the stack pointer value is valid, but it should be, since it + was constructed that way in the code just above. */ + + lul::StackImage stackImg; + + { +# if defined(SPS_PLAT_amd64_linux) + uintptr_t rEDZONE_SIZE = 128; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# elif defined(SPS_PLAT_arm_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE; +# elif defined(SPS_PLAT_x86_linux) || defined(SPS_PLAT_x86_android) + uintptr_t rEDZONE_SIZE = 0; + uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE; +# else +# error "Unknown plat" +# endif + uintptr_t end = reinterpret_cast<uintptr_t>(aProfile.GetStackTop()); + uintptr_t ws = sizeof(void*); + start &= ~(ws-1); + end &= ~(ws-1); + uintptr_t nToCopy = 0; + if (start < end) { + nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) + nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + stackImg.mLen = nToCopy; + stackImg.mStartAvma = start; + if (nToCopy > 0) { + memcpy(&stackImg.mContents[0], (void*)start, nToCopy); + (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy); + } + } + + // The maximum number of frames that LUL will produce. Setting it + // too high gives a risk of it wasting a lot of time looping on + // corrupted stacks. + const int MAX_NATIVE_FRAMES = 256; + + size_t scannedFramesAllowed = 0; + + uintptr_t framePCs[MAX_NATIVE_FRAMES]; + uintptr_t frameSPs[MAX_NATIVE_FRAMES]; + size_t framesAvail = mozilla::ArrayLength(framePCs); + size_t framesUsed = 0; + size_t scannedFramesAcquired = 0; + sLUL->Unwind( &framePCs[0], &frameSPs[0], + &framesUsed, &scannedFramesAcquired, + framesAvail, scannedFramesAllowed, + &startRegs, &stackImg ); + + NativeStack nativeStack = { + reinterpret_cast<void**>(framePCs), + reinterpret_cast<void**>(frameSPs), + mozilla::ArrayLength(framePCs), + 0 + }; + + nativeStack.count = framesUsed; + + mergeStacksIntoProfile(aProfile, aSample, nativeStack); + + // Update stats in the LUL stats object. Unfortunately this requires + // three global memory operations. + sLUL->mStats.mContext += 1; + sLUL->mStats.mCFI += framesUsed - 1 - scannedFramesAcquired; + sLUL->mStats.mScanned += scannedFramesAcquired; +} +#endif + + +static +void doSampleStackTrace(ThreadProfile &aProfile, TickSample *aSample, bool aAddLeafAddresses) +{ + NativeStack nativeStack = { nullptr, nullptr, 0, 0 }; + mergeStacksIntoProfile(aProfile, aSample, nativeStack); + +#ifdef ENABLE_SPS_LEAF_DATA + if (aSample && aAddLeafAddresses) { + aProfile.addTag(ProfileEntry('l', (void*)aSample->pc)); +#ifdef ENABLE_ARM_LR_SAVING + aProfile.addTag(ProfileEntry('L', (void*)aSample->lr)); +#endif + } +#endif +} + +void GeckoSampler::Tick(TickSample* sample) +{ + // Don't allow for ticks to happen within other ticks. + InplaceTick(sample); +} + +void GeckoSampler::InplaceTick(TickSample* sample) +{ + ThreadProfile& currThreadProfile = *sample->threadProfile; + + currThreadProfile.addTag(ProfileEntry('T', currThreadProfile.ThreadId())); + + if (sample) { + mozilla::TimeDuration delta = sample->timestamp - sStartTime; + currThreadProfile.addTag(ProfileEntry('t', delta.ToMilliseconds())); + } + + PseudoStack* stack = currThreadProfile.GetPseudoStack(); + +#if defined(USE_NS_STACKWALK) || defined(USE_EHABI_STACKWALK) || \ + defined(USE_LUL_STACKWALK) + if (mUseStackWalk) { + doNativeBacktrace(currThreadProfile, sample); + } else { + doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses); + } +#else + doSampleStackTrace(currThreadProfile, sample, mAddLeafAddresses); +#endif + + // Don't process the PeudoStack's markers if we're + // synchronously sampling the current thread. + if (!sample->isSamplingCurrentThread) { + ProfilerMarkerLinkedList* pendingMarkersList = stack->getPendingMarkers(); + while (pendingMarkersList && pendingMarkersList->peek()) { + ProfilerMarker* marker = pendingMarkersList->popHead(); + currThreadProfile.addStoredMarker(marker); + currThreadProfile.addTag(ProfileEntry('m', marker)); + } + } + +#ifndef SPS_STANDALONE + if (sample && currThreadProfile.GetThreadResponsiveness()->HasData()) { + mozilla::TimeDuration delta = currThreadProfile.GetThreadResponsiveness()->GetUnresponsiveDuration(sample->timestamp); + currThreadProfile.addTag(ProfileEntry('r', delta.ToMilliseconds())); + } +#endif + + // rssMemory is equal to 0 when we are not recording. + if (sample && sample->rssMemory != 0) { + currThreadProfile.addTag(ProfileEntry('R', static_cast<double>(sample->rssMemory))); + } + + // ussMemory is equal to 0 when we are not recording. + if (sample && sample->ussMemory != 0) { + currThreadProfile.addTag(ProfileEntry('U', static_cast<double>(sample->ussMemory))); + } + +#if defined(XP_WIN) + if (mProfilePower) { + mIntelPowerGadget->TakeSample(); + currThreadProfile.addTag(ProfileEntry('p', static_cast<double>(mIntelPowerGadget->GetTotalPackagePowerInWatts()))); + } +#endif + + if (sLastFrameNumber != sFrameNumber) { + currThreadProfile.addTag(ProfileEntry('f', sFrameNumber)); + sLastFrameNumber = sFrameNumber; + } +} + +namespace { + +SyncProfile* NewSyncProfile() +{ + PseudoStack* stack = tlsPseudoStack.get(); + if (!stack) { + MOZ_ASSERT(stack); + return nullptr; + } + Thread::tid_t tid = Thread::GetCurrentId(); + + ThreadInfo* info = new ThreadInfo("SyncProfile", tid, false, stack, nullptr); + SyncProfile* profile = new SyncProfile(info, GET_BACKTRACE_DEFAULT_ENTRY); + return profile; +} + +} // namespace + +SyncProfile* GeckoSampler::GetBacktrace() +{ + SyncProfile* profile = NewSyncProfile(); + + TickSample sample; + sample.threadProfile = profile; + +#if defined(HAVE_NATIVE_UNWIND) || defined(USE_LUL_STACKWALK) +#if defined(XP_WIN) || defined(LINUX) + tickcontext_t context; + sample.PopulateContext(&context); +#elif defined(XP_MACOSX) + sample.PopulateContext(nullptr); +#endif +#endif + + sample.isSamplingCurrentThread = true; + sample.timestamp = mozilla::TimeStamp::Now(); + + profile->BeginUnwind(); + Tick(&sample); + profile->EndUnwind(); + + return profile; +} + +void +GeckoSampler::GetBufferInfo(uint32_t *aCurrentPosition, uint32_t *aTotalSize, uint32_t *aGeneration) +{ + *aCurrentPosition = mBuffer->mWritePos; + *aTotalSize = mBuffer->mEntrySize; + *aGeneration = mBuffer->mGeneration; +} diff --git a/tools/profiler/core/GeckoSampler.h b/tools/profiler/core/GeckoSampler.h new file mode 100644 index 000000000..da1fdfe43 --- /dev/null +++ b/tools/profiler/core/GeckoSampler.h @@ -0,0 +1,181 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef GeckoSampler_h +#define GeckoSampler_h + +#include "platform.h" +#include "ProfileEntry.h" +#include "mozilla/Vector.h" +#include "ThreadProfile.h" +#include "ThreadInfo.h" +#ifndef SPS_STANDALONE +#include "IntelPowerGadget.h" +#endif +#ifdef MOZ_TASK_TRACER +#include "GeckoTaskTracer.h" +#endif + +#include <algorithm> + +namespace mozilla { +class ProfileGatherer; +} // namespace mozilla + +typedef mozilla::Vector<std::string> ThreadNameFilterList; +typedef mozilla::Vector<std::string> FeatureList; + +static bool +threadSelected(ThreadInfo* aInfo, const ThreadNameFilterList &aThreadNameFilters) { + if (aThreadNameFilters.empty()) { + return true; + } + + std::string name = aInfo->Name(); + std::transform(name.begin(), name.end(), name.begin(), ::tolower); + + for (uint32_t i = 0; i < aThreadNameFilters.length(); ++i) { + std::string filter = aThreadNameFilters[i]; + std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower); + + // Crude, non UTF-8 compatible, case insensitive substring search + if (name.find(filter) != std::string::npos) { + return true; + } + } + + return false; +} + +extern mozilla::TimeStamp sLastTracerEvent; +extern int sFrameNumber; +extern int sLastFrameNumber; + +class GeckoSampler: public Sampler { + public: + GeckoSampler(double aInterval, int aEntrySize, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount); + ~GeckoSampler(); + + void RegisterThread(ThreadInfo* aInfo) { + if (!aInfo->IsMainThread() && !mProfileThreads) { + return; + } + + if (!threadSelected(aInfo, mThreadNameFilters)) { + return; + } + + ThreadProfile* profile = new ThreadProfile(aInfo, mBuffer); + aInfo->SetProfile(profile); + } + + // Called within a signal. This function must be reentrant + virtual void Tick(TickSample* sample) override; + + // Immediately captures the calling thread's call stack and returns it. + virtual SyncProfile* GetBacktrace() override; + + // Called within a signal. This function must be reentrant + virtual void RequestSave() override + { + mSaveRequested = true; +#ifdef MOZ_TASK_TRACER + if (mTaskTracer) { + mozilla::tasktracer::StopLogging(); + } +#endif + } + + virtual void HandleSaveRequest() override; + virtual void DeleteExpiredMarkers() override; + + ThreadProfile* GetPrimaryThreadProfile() + { + if (!mPrimaryThreadProfile) { + ::MutexAutoLock lock(*sRegisteredThreadsMutex); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->IsMainThread() && !info->IsPendingDelete()) { + mPrimaryThreadProfile = info->Profile(); + break; + } + } + } + + return mPrimaryThreadProfile; + } + + void ToStreamAsJSON(std::ostream& stream, double aSinceTime = 0); +#ifndef SPS_STANDALONE + virtual JSObject *ToJSObject(JSContext *aCx, double aSinceTime = 0); + void GetGatherer(nsISupports** aRetVal); +#endif + mozilla::UniquePtr<char[]> ToJSON(double aSinceTime = 0); + virtual void ToJSObjectAsync(double aSinceTime = 0, mozilla::dom::Promise* aPromise = 0); + void StreamMetaJSCustomObject(SpliceableJSONWriter& aWriter); + void StreamTaskTracer(SpliceableJSONWriter& aWriter); + void FlushOnJSShutdown(JSContext* aContext); + bool ProfileJS() const { return mProfileJS; } + bool ProfileJava() const { return mProfileJava; } + bool ProfileGPU() const { return mProfileGPU; } + bool ProfilePower() const { return mProfilePower; } + bool ProfileThreads() const override { return mProfileThreads; } + bool InPrivacyMode() const { return mPrivacyMode; } + bool AddMainThreadIO() const { return mAddMainThreadIO; } + bool ProfileMemory() const { return mProfileMemory; } + bool TaskTracer() const { return mTaskTracer; } + bool LayersDump() const { return mLayersDump; } + bool DisplayListDump() const { return mDisplayListDump; } + bool ProfileRestyle() const { return mProfileRestyle; } + const ThreadNameFilterList& ThreadNameFilters() { return mThreadNameFilters; } + const FeatureList& Features() { return mFeatures; } + + void GetBufferInfo(uint32_t *aCurrentPosition, uint32_t *aTotalSize, uint32_t *aGeneration); + +protected: + // Called within a signal. This function must be reentrant + virtual void InplaceTick(TickSample* sample); + + // Not implemented on platforms which do not support backtracing + void doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample); + + void StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime); + + // This represent the application's main thread (SAMPLER_INIT) + ThreadProfile* mPrimaryThreadProfile; + RefPtr<ProfileBuffer> mBuffer; + bool mSaveRequested; + bool mAddLeafAddresses; + bool mUseStackWalk; + bool mProfileJS; + bool mProfileGPU; + bool mProfileThreads; + bool mProfileJava; + bool mProfilePower; + bool mLayersDump; + bool mDisplayListDump; + bool mProfileRestyle; + + // Keep the thread filter to check against new thread that + // are started while profiling + ThreadNameFilterList mThreadNameFilters; + FeatureList mFeatures; + bool mPrivacyMode; + bool mAddMainThreadIO; + bool mProfileMemory; + bool mTaskTracer; +#if defined(XP_WIN) + IntelPowerGadget* mIntelPowerGadget; +#endif + +private: + RefPtr<mozilla::ProfileGatherer> mGatherer; +}; + +#endif + diff --git a/tools/profiler/core/IntelPowerGadget.cpp b/tools/profiler/core/IntelPowerGadget.cpp new file mode 100644 index 000000000..fe267b80f --- /dev/null +++ b/tools/profiler/core/IntelPowerGadget.cpp @@ -0,0 +1,310 @@ +/*
+ * Copyright 2013, Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Author: Joe Olivas <joseph.k.olivas@intel.com>
+ */
+
+#include "nsDebug.h"
+#include "nsString.h"
+#include "IntelPowerGadget.h"
+#include "prenv.h"
+
+IntelPowerGadget::IntelPowerGadget() :
+ libpowergadget(nullptr),
+ Initialize(nullptr),
+ GetNumNodes(nullptr),
+ GetMsrName(nullptr),
+ GetMsrFunc(nullptr),
+ ReadMSR(nullptr),
+ WriteMSR(nullptr),
+ GetIAFrequency(nullptr),
+ GetTDP(nullptr),
+ GetMaxTemperature(nullptr),
+ GetThresholds(nullptr),
+ GetTemperature(nullptr),
+ ReadSample(nullptr),
+ GetSysTime(nullptr),
+ GetRDTSC(nullptr),
+ GetTimeInterval(nullptr),
+ GetBaseFrequency(nullptr),
+ GetPowerData(nullptr),
+ StartLog(nullptr),
+ StopLog(nullptr),
+ GetNumMsrs(nullptr),
+ packageMSR(-1),
+ cpuMSR(-1),
+ freqMSR(-1),
+ tempMSR(-1)
+{
+}
+
+bool
+IntelPowerGadget::Init()
+{
+ bool success = false;
+ const char *path = PR_GetEnv("IPG_Dir");
+ nsCString ipg_library;
+ if (path && *path) {
+ ipg_library.Append(path);
+ ipg_library.Append('/');
+ ipg_library.AppendLiteral(PG_LIBRARY_NAME);
+ libpowergadget = PR_LoadLibrary(ipg_library.get());
+ }
+
+ if(libpowergadget) {
+ Initialize = (IPGInitialize) PR_FindFunctionSymbol(libpowergadget, "IntelEnergyLibInitialize");
+ GetNumNodes = (IPGGetNumNodes) PR_FindFunctionSymbol(libpowergadget, "GetNumNodes");
+ GetMsrName = (IPGGetMsrName) PR_FindFunctionSymbol(libpowergadget, "GetMsrName");
+ GetMsrFunc = (IPGGetMsrFunc) PR_FindFunctionSymbol(libpowergadget, "GetMsrFunc");
+ ReadMSR = (IPGReadMSR) PR_FindFunctionSymbol(libpowergadget, "ReadMSR");
+ WriteMSR = (IPGWriteMSR) PR_FindFunctionSymbol(libpowergadget, "WriteMSR");
+ GetIAFrequency = (IPGGetIAFrequency) PR_FindFunctionSymbol(libpowergadget, "GetIAFrequency");
+ GetTDP = (IPGGetTDP) PR_FindFunctionSymbol(libpowergadget, "GetTDP");
+ GetMaxTemperature = (IPGGetMaxTemperature) PR_FindFunctionSymbol(libpowergadget, "GetMaxTemperature");
+ GetThresholds = (IPGGetThresholds) PR_FindFunctionSymbol(libpowergadget, "GetThresholds");
+ GetTemperature = (IPGGetTemperature) PR_FindFunctionSymbol(libpowergadget, "GetTemperature");
+ ReadSample = (IPGReadSample) PR_FindFunctionSymbol(libpowergadget, "ReadSample");
+ GetSysTime = (IPGGetSysTime) PR_FindFunctionSymbol(libpowergadget, "GetSysTime");
+ GetRDTSC = (IPGGetRDTSC) PR_FindFunctionSymbol(libpowergadget, "GetRDTSC");
+ GetTimeInterval = (IPGGetTimeInterval) PR_FindFunctionSymbol(libpowergadget, "GetTimeInterval");
+ GetBaseFrequency = (IPGGetBaseFrequency) PR_FindFunctionSymbol(libpowergadget, "GetBaseFrequency");
+ GetPowerData = (IPGGetPowerData) PR_FindFunctionSymbol(libpowergadget, "GetPowerData");
+ StartLog = (IPGStartLog) PR_FindFunctionSymbol(libpowergadget, "StartLog");
+ StopLog = (IPGStopLog) PR_FindFunctionSymbol(libpowergadget, "StopLog");
+ GetNumMsrs = (IPGGetNumMsrs) PR_FindFunctionSymbol(libpowergadget, "GetNumMsrs");
+ }
+
+ if(Initialize) {
+ Initialize();
+ int msrCount = GetNumberMsrs();
+ wchar_t name[1024] = {0};
+ for(int i = 0; i < msrCount; ++i) {
+ GetMsrName(i, name);
+ int func = 0;
+ GetMsrFunc(i, &func);
+ // MSR for frequency
+ if(wcscmp(name, L"CPU Frequency") == 0 && (func == 0)) {
+ this->freqMSR = i;
+ }
+ // MSR for Package
+ else if(wcscmp(name, L"Processor") == 0 && (func == 1)) {
+ this->packageMSR = i;
+ }
+ // MSR for CPU
+ else if(wcscmp(name, L"IA") == 0 && (func == 1)) {
+ this->cpuMSR = i;
+ }
+ // MSR for Temperature
+ else if(wcscmp(name, L"Package") == 0 && (func == 2)) {
+ this->tempMSR = i;
+ }
+ }
+ // Grab one sample at startup for a diff
+ TakeSample();
+ success = true;
+ }
+ return success;
+}
+
+IntelPowerGadget::~IntelPowerGadget()
+{
+ if(libpowergadget) {
+ NS_WARNING("Unloading PowerGadget library!\n");
+ PR_UnloadLibrary(libpowergadget);
+ libpowergadget = nullptr;
+ Initialize = nullptr;
+ GetNumNodes = nullptr;
+ GetMsrName = nullptr;
+ GetMsrFunc = nullptr;
+ ReadMSR = nullptr;
+ WriteMSR = nullptr;
+ GetIAFrequency = nullptr;
+ GetTDP = nullptr;
+ GetMaxTemperature = nullptr;
+ GetThresholds = nullptr;
+ GetTemperature = nullptr;
+ ReadSample = nullptr;
+ GetSysTime = nullptr;
+ GetRDTSC = nullptr;
+ GetTimeInterval = nullptr;
+ GetBaseFrequency = nullptr;
+ GetPowerData = nullptr;
+ StartLog = nullptr;
+ StopLog = nullptr;
+ GetNumMsrs = nullptr;
+ }
+}
+
+int
+IntelPowerGadget::GetNumberNodes()
+{
+ int nodes = 0;
+ if(GetNumNodes) {
+ int ok = GetNumNodes(&nodes);
+ }
+ return nodes;
+}
+
+int
+IntelPowerGadget::GetNumberMsrs()
+{
+ int msrs = 0;
+ if(GetNumMsrs) {
+ int ok = GetNumMsrs(&msrs);
+ }
+ return msrs;
+}
+
+int
+IntelPowerGadget::GetCPUFrequency(int node)
+{
+ int frequency = 0;
+ if(GetIAFrequency) {
+ int ok = GetIAFrequency(node, &frequency);
+ }
+ return frequency;
+}
+
+double
+IntelPowerGadget::GetTdp(int node)
+{
+ double tdp = 0.0;
+ if(GetTDP) {
+ int ok = GetTDP(node, &tdp);
+ }
+ return tdp;
+}
+
+int
+IntelPowerGadget::GetMaxTemp(int node)
+{
+ int maxTemperatureC = 0;
+ if(GetMaxTemperature) {
+ int ok = GetMaxTemperature(node, &maxTemperatureC);
+ }
+ return maxTemperatureC;
+}
+
+int
+IntelPowerGadget::GetTemp(int node)
+{
+ int temperatureC = 0;
+ if(GetTemperature) {
+ int ok = GetTemperature(node, &temperatureC);
+ }
+ return temperatureC;
+}
+
+int
+IntelPowerGadget::TakeSample()
+{
+ int ok = 0;
+ if(ReadSample) {
+ ok = ReadSample();
+ }
+ return ok;
+}
+
+uint64_t
+IntelPowerGadget::GetRdtsc()
+{
+ uint64_t rdtsc = 0;
+ if(GetRDTSC) {
+ int ok = GetRDTSC(&rdtsc);
+ }
+ return rdtsc;
+}
+
+double
+IntelPowerGadget::GetInterval()
+{
+ double interval = 0.0;
+ if(GetTimeInterval) {
+ int ok = GetTimeInterval(&interval);
+ }
+ return interval;
+}
+
+double
+IntelPowerGadget::GetCPUBaseFrequency(int node)
+{
+ double freq = 0.0;
+ if(GetBaseFrequency) {
+ int ok = GetBaseFrequency(node, &freq);
+ }
+ return freq;
+}
+
+double
+IntelPowerGadget::GetTotalPackagePowerInWatts()
+{
+ int nodes = GetNumberNodes();
+ double totalPower = 0.0;
+ for(int i = 0; i < nodes; ++i) {
+ totalPower += GetPackagePowerInWatts(i);
+ }
+ return totalPower;
+}
+
+double
+IntelPowerGadget::GetPackagePowerInWatts(int node)
+{
+ int numResult = 0;
+ double result[] = {0.0, 0.0, 0.0};
+ if(GetPowerData && packageMSR != -1) {
+ int ok = GetPowerData(node, packageMSR, result, &numResult);
+ }
+ return result[0];
+}
+
+double
+IntelPowerGadget::GetTotalCPUPowerInWatts()
+{
+ int nodes = GetNumberNodes();
+ double totalPower = 0.0;
+ for(int i = 0; i < nodes; ++i) {
+ totalPower += GetCPUPowerInWatts(i);
+ }
+ return totalPower;
+}
+
+double
+IntelPowerGadget::GetCPUPowerInWatts(int node)
+{
+ int numResult = 0;
+ double result[] = {0.0, 0.0, 0.0};
+ if(GetPowerData && cpuMSR != -1) {
+ int ok = GetPowerData(node, cpuMSR, result, &numResult);
+ }
+ return result[0];
+}
+
+double
+IntelPowerGadget::GetTotalGPUPowerInWatts()
+{
+ int nodes = GetNumberNodes();
+ double totalPower = 0.0;
+ for(int i = 0; i < nodes; ++i) {
+ totalPower += GetGPUPowerInWatts(i);
+ }
+ return totalPower;
+}
+
+double
+IntelPowerGadget::GetGPUPowerInWatts(int node)
+{
+ return 0.0;
+}
+
diff --git a/tools/profiler/core/IntelPowerGadget.h b/tools/profiler/core/IntelPowerGadget.h new file mode 100644 index 000000000..4a24215b6 --- /dev/null +++ b/tools/profiler/core/IntelPowerGadget.h @@ -0,0 +1,150 @@ +/* + * Copyright 2013, Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Author: Joe Olivas <joseph.k.olivas@intel.com> + */ + +#ifndef profiler_IntelPowerGadget_h +#define profiler_IntelPowerGadget_h + +#ifdef _MSC_VER +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include <stdint.h> +#endif +#include "prlink.h" + +typedef int (*IPGInitialize) (); +typedef int (*IPGGetNumNodes) (int *nNodes); +typedef int (*IPGGetNumMsrs) (int *nMsr); +typedef int (*IPGGetMsrName) (int iMsr, wchar_t *szName); +typedef int (*IPGGetMsrFunc) (int iMsr, int *pFuncID); +typedef int (*IPGReadMSR) (int iNode, unsigned int address, uint64_t *value); +typedef int (*IPGWriteMSR) (int iNode, unsigned int address, uint64_t value); +typedef int (*IPGGetIAFrequency) (int iNode, int *freqInMHz); +typedef int (*IPGGetTDP) (int iNode, double *TDP); +typedef int (*IPGGetMaxTemperature) (int iNode, int *degreeC); +typedef int (*IPGGetThresholds) (int iNode, int *degree1C, int *degree2C); +typedef int (*IPGGetTemperature) (int iNode, int *degreeC); +typedef int (*IPGReadSample) (); +typedef int (*IPGGetSysTime) (void *pSysTime); +typedef int (*IPGGetRDTSC) (uint64_t *pTSC); +typedef int (*IPGGetTimeInterval) (double *pOffset); +typedef int (*IPGGetBaseFrequency) (int iNode, double *pBaseFrequency); +typedef int (*IPGGetPowerData) (int iNode, int iMSR, double *pResult, int *nResult); +typedef int (*IPGStartLog) (wchar_t *szFileName); +typedef int (*IPGStopLog) (); + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +#define PG_LIBRARY_NAME "EnergyLib64" +#else +#define PG_LIBRARY_NAME "EnergyLib32" +#endif + + +class IntelPowerGadget +{ +public: + + IntelPowerGadget(); + ~IntelPowerGadget(); + + // Fails if initialization is incomplete + bool Init(); + + // Returns the number of packages on the system + int GetNumberNodes(); + + // Returns the number of MSRs being tracked + int GetNumberMsrs(); + + // Given a node, returns the temperature + int GetCPUFrequency(int); + + // Returns the TDP of the given node + double GetTdp(int); + + // Returns the maximum temperature for the given node + int GetMaxTemp(int); + + // Returns the current temperature in degrees C + // of the given node + int GetTemp(int); + + // Takes a sample of data. Must be called before + // any current data is retrieved. + int TakeSample(); + + // Gets the timestamp of the most recent sample + uint64_t GetRdtsc(); + + // returns number of seconds between the last + // two samples + double GetInterval(); + + // Returns the base frequency for the given node + double GetCPUBaseFrequency(int node); + + // Returns the combined package power for all + // packages on the system for the last sample. + double GetTotalPackagePowerInWatts(); + double GetPackagePowerInWatts(int node); + + // Returns the combined CPU power for all + // packages on the system for the last sample. + // If the reading is not available, returns 0.0 + double GetTotalCPUPowerInWatts(); + double GetCPUPowerInWatts(int node); + + // Returns the combined GPU power for all + // packages on the system for the last sample. + // If the reading is not available, returns 0.0 + double GetTotalGPUPowerInWatts(); + double GetGPUPowerInWatts(int node); + +private: + + PRLibrary *libpowergadget; + IPGInitialize Initialize; + IPGGetNumNodes GetNumNodes; + IPGGetNumMsrs GetNumMsrs; + IPGGetMsrName GetMsrName; + IPGGetMsrFunc GetMsrFunc; + IPGReadMSR ReadMSR; + IPGWriteMSR WriteMSR; + IPGGetIAFrequency GetIAFrequency; + IPGGetTDP GetTDP; + IPGGetMaxTemperature GetMaxTemperature; + IPGGetThresholds GetThresholds; + IPGGetTemperature GetTemperature; + IPGReadSample ReadSample; + IPGGetSysTime GetSysTime; + IPGGetRDTSC GetRDTSC; + IPGGetTimeInterval GetTimeInterval; + IPGGetBaseFrequency GetBaseFrequency; + IPGGetPowerData GetPowerData; + IPGStartLog StartLog; + IPGStopLog StopLog; + + int packageMSR; + int cpuMSR; + int freqMSR; + int tempMSR; +}; + +#endif // profiler_IntelPowerGadget_h diff --git a/tools/profiler/core/PlatformMacros.h b/tools/profiler/core/PlatformMacros.h new file mode 100644 index 000000000..9a544a42e --- /dev/null +++ b/tools/profiler/core/PlatformMacros.h @@ -0,0 +1,76 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef SPS_PLATFORM_MACROS_H +#define SPS_PLATFORM_MACROS_H + +/* Define platform selection macros in a consistent way. Don't add + anything else to this file, so it can remain freestanding. The + primary factorisation is on (ARCH,OS) pairs ("PLATforms") but ARCH_ + and OS_ macros are defined too, since they are sometimes + convenient. */ + +#undef SPS_PLAT_arm_android +#undef SPS_PLAT_amd64_linux +#undef SPS_PLAT_x86_linux +#undef SPS_PLAT_amd64_darwin +#undef SPS_PLAT_x86_darwin +#undef SPS_PLAT_x86_windows +#undef SPS_PLAT_amd64_windows + +#undef SPS_ARCH_arm +#undef SPS_ARCH_x86 +#undef SPS_ARCH_amd64 + +#undef SPS_OS_android +#undef SPS_OS_linux +#undef SPS_OS_darwin +#undef SPS_OS_windows + +#if defined(__linux__) && defined(__x86_64__) +# define SPS_PLAT_amd64_linux 1 +# define SPS_ARCH_amd64 1 +# define SPS_OS_linux 1 + +#elif defined(__ANDROID__) && defined(__arm__) +# define SPS_PLAT_arm_android 1 +# define SPS_ARCH_arm 1 +# define SPS_OS_android 1 + +#elif defined(__ANDROID__) && defined(__i386__) +# define SPS_PLAT_x86_android 1 +# define SPS_ARCH_x86 1 +# define SPS_OS_android 1 + +#elif defined(__linux__) && defined(__i386__) +# define SPS_PLAT_x86_linux 1 +# define SPS_ARCH_x86 1 +# define SPS_OS_linux 1 + +#elif defined(__APPLE__) && defined(__x86_64__) +# define SPS_PLAT_amd64_darwin 1 +# define SPS_ARCH_amd64 1 +# define SPS_OS_darwin 1 + +#elif defined(__APPLE__) && defined(__i386__) +# define SPS_PLAT_x86_darwin 1 +# define SPS_ARCH_x86 1 +# define SPS_OS_darwin 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && (defined(_M_IX86) || defined(__i386__)) +# define SPS_PLAT_x86_windows 1 +# define SPS_ARCH_x86 1 +# define SPS_OS_windows 1 + +#elif (defined(_MSC_VER) || defined(__MINGW32__)) && (defined(_M_X64) || defined(__x86_64__)) +# define SPS_PLAT_amd64_windows 1 +# define SPS_ARCH_amd64 1 +# define SPS_OS_windows 1 + +#else +# error "Unsupported platform" +#endif + +#endif /* ndef SPS_PLATFORM_MACROS_H */ diff --git a/tools/profiler/core/ProfileBuffer.cpp b/tools/profiler/core/ProfileBuffer.cpp new file mode 100644 index 000000000..a4b91d8fc --- /dev/null +++ b/tools/profiler/core/ProfileBuffer.cpp @@ -0,0 +1,89 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfileBuffer.h" + +ProfileBuffer::ProfileBuffer(int aEntrySize) + : mEntries(MakeUnique<ProfileEntry[]>(aEntrySize)) + , mWritePos(0) + , mReadPos(0) + , mEntrySize(aEntrySize) + , mGeneration(0) +{ +} + +ProfileBuffer::~ProfileBuffer() +{ + while (mStoredMarkers.peek()) { + delete mStoredMarkers.popHead(); + } +} + +// Called from signal, call only reentrant functions +void ProfileBuffer::addTag(const ProfileEntry& aTag) +{ + mEntries[mWritePos++] = aTag; + if (mWritePos == mEntrySize) { + // Wrapping around may result in things referenced in the buffer (e.g., + // JIT code addresses and markers) being incorrectly collected. + MOZ_ASSERT(mGeneration != UINT32_MAX); + mGeneration++; + mWritePos = 0; + } + if (mWritePos == mReadPos) { + // Keep one slot open. + mEntries[mReadPos] = ProfileEntry(); + mReadPos = (mReadPos + 1) % mEntrySize; + } +} + +void ProfileBuffer::addStoredMarker(ProfilerMarker *aStoredMarker) { + aStoredMarker->SetGeneration(mGeneration); + mStoredMarkers.insert(aStoredMarker); +} + +void ProfileBuffer::deleteExpiredStoredMarkers() { + // Delete markers of samples that have been overwritten due to circular + // buffer wraparound. + uint32_t generation = mGeneration; + while (mStoredMarkers.peek() && + mStoredMarkers.peek()->HasExpired(generation)) { + delete mStoredMarkers.popHead(); + } +} + +void ProfileBuffer::reset() { + mGeneration += 2; + mReadPos = mWritePos = 0; +} + +#define DYNAMIC_MAX_STRING 8192 + +char* ProfileBuffer::processDynamicTag(int readPos, + int* tagsConsumed, char* tagBuff) +{ + int readAheadPos = (readPos + 1) % mEntrySize; + int tagBuffPos = 0; + + // Read the string stored in mTagData until the null character is seen + bool seenNullByte = false; + while (readAheadPos != mWritePos && !seenNullByte) { + (*tagsConsumed)++; + ProfileEntry readAheadEntry = mEntries[readAheadPos]; + for (size_t pos = 0; pos < sizeof(void*); pos++) { + tagBuff[tagBuffPos] = readAheadEntry.mTagChars[pos]; + if (tagBuff[tagBuffPos] == '\0' || tagBuffPos == DYNAMIC_MAX_STRING-2) { + seenNullByte = true; + break; + } + tagBuffPos++; + } + if (!seenNullByte) + readAheadPos = (readAheadPos + 1) % mEntrySize; + } + return tagBuff; +} + + diff --git a/tools/profiler/core/ProfileBuffer.h b/tools/profiler/core/ProfileBuffer.h new file mode 100644 index 000000000..7d90fe385 --- /dev/null +++ b/tools/profiler/core/ProfileBuffer.h @@ -0,0 +1,61 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PROFILE_BUFFER_H +#define MOZ_PROFILE_BUFFER_H + +#include "ProfileEntry.h" +#include "platform.h" +#include "ProfileJSONWriter.h" +#include "mozilla/RefPtr.h" +#include "mozilla/RefCounted.h" + +class ProfileBuffer : public mozilla::RefCounted<ProfileBuffer> { +public: + MOZ_DECLARE_REFCOUNTED_VIRTUAL_TYPENAME(ProfileBuffer) + + explicit ProfileBuffer(int aEntrySize); + + virtual ~ProfileBuffer(); + + void addTag(const ProfileEntry& aTag); + void StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId, double aSinceTime, + JSContext* cx, UniqueStacks& aUniqueStacks); + void StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId, double aSinceTime, + UniqueStacks& aUniqueStacks); + void DuplicateLastSample(int aThreadId); + + void addStoredMarker(ProfilerMarker* aStoredMarker); + + // The following two methods are not signal safe! They delete markers. + void deleteExpiredStoredMarkers(); + void reset(); + +protected: + char* processDynamicTag(int readPos, int* tagsConsumed, char* tagBuff); + int FindLastSampleOfThread(int aThreadId); + +public: + // Circular buffer 'Keep One Slot Open' implementation for simplicity + mozilla::UniquePtr<ProfileEntry[]> mEntries; + + // Points to the next entry we will write to, which is also the one at which + // we need to stop reading. + int mWritePos; + + // Points to the entry at which we can start reading. + int mReadPos; + + // The number of entries in our buffer. + int mEntrySize; + + // How many times mWritePos has wrapped around. + uint32_t mGeneration; + + // Markers that marker entries in the buffer might refer to. + ProfilerMarkerLinkedList mStoredMarkers; +}; + +#endif diff --git a/tools/profiler/core/ProfileEntry.cpp b/tools/profiler/core/ProfileEntry.cpp new file mode 100644 index 000000000..22d53a6f3 --- /dev/null +++ b/tools/profiler/core/ProfileEntry.cpp @@ -0,0 +1,881 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ostream> +#include "platform.h" +#include "mozilla/HashFunctions.h" + +#ifndef SPS_STANDALONE +#include "nsThreadUtils.h" +#include "nsXULAppAPI.h" + +// JS +#include "jsapi.h" +#include "jsfriendapi.h" +#include "js/TrackedOptimizationInfo.h" +#endif + +// Self +#include "ProfileEntry.h" + +using mozilla::MakeUnique; +using mozilla::UniquePtr; +using mozilla::Maybe; +using mozilla::Some; +using mozilla::Nothing; +using mozilla::JSONWriter; + + +//////////////////////////////////////////////////////////////////////// +// BEGIN ProfileEntry + +ProfileEntry::ProfileEntry() + : mTagData(nullptr) + , mTagName(0) +{ } + +// aTagData must not need release (i.e. be a string from the text segment) +ProfileEntry::ProfileEntry(char aTagName, const char *aTagData) + : mTagData(aTagData) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, ProfilerMarker *aTagMarker) + : mTagMarker(aTagMarker) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, void *aTagPtr) + : mTagPtr(aTagPtr) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, double aTagDouble) + : mTagDouble(aTagDouble) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, uintptr_t aTagOffset) + : mTagOffset(aTagOffset) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, Address aTagAddress) + : mTagAddress(aTagAddress) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, int aTagInt) + : mTagInt(aTagInt) + , mTagName(aTagName) +{ } + +ProfileEntry::ProfileEntry(char aTagName, char aTagChar) + : mTagChar(aTagChar) + , mTagName(aTagName) +{ } + +bool ProfileEntry::is_ent_hint(char hintChar) { + return mTagName == 'h' && mTagChar == hintChar; +} + +bool ProfileEntry::is_ent_hint() { + return mTagName == 'h'; +} + +bool ProfileEntry::is_ent(char tagChar) { + return mTagName == tagChar; +} + +void* ProfileEntry::get_tagPtr() { + // No consistency checking. Oh well. + return mTagPtr; +} + +// END ProfileEntry +//////////////////////////////////////////////////////////////////////// + +class JSONSchemaWriter +{ + JSONWriter& mWriter; + uint32_t mIndex; + +public: + explicit JSONSchemaWriter(JSONWriter& aWriter) + : mWriter(aWriter) + , mIndex(0) + { + aWriter.StartObjectProperty("schema"); + } + + void WriteField(const char* aName) { + mWriter.IntProperty(aName, mIndex++); + } + + ~JSONSchemaWriter() { + mWriter.EndObject(); + } +}; + +#ifndef SPS_STANDALONE +class StreamOptimizationTypeInfoOp : public JS::ForEachTrackedOptimizationTypeInfoOp +{ + JSONWriter& mWriter; + UniqueJSONStrings& mUniqueStrings; + bool mStartedTypeList; + +public: + StreamOptimizationTypeInfoOp(JSONWriter& aWriter, UniqueJSONStrings& aUniqueStrings) + : mWriter(aWriter) + , mUniqueStrings(aUniqueStrings) + , mStartedTypeList(false) + { } + + void readType(const char* keyedBy, const char* name, + const char* location, Maybe<unsigned> lineno) override { + if (!mStartedTypeList) { + mStartedTypeList = true; + mWriter.StartObjectElement(); + mWriter.StartArrayProperty("typeset"); + } + + mWriter.StartObjectElement(); + { + mUniqueStrings.WriteProperty(mWriter, "keyedBy", keyedBy); + if (name) { + mUniqueStrings.WriteProperty(mWriter, "name", name); + } + if (location) { + mUniqueStrings.WriteProperty(mWriter, "location", location); + } + if (lineno.isSome()) { + mWriter.IntProperty("line", *lineno); + } + } + mWriter.EndObject(); + } + + void operator()(JS::TrackedTypeSite site, const char* mirType) override { + if (mStartedTypeList) { + mWriter.EndArray(); + mStartedTypeList = false; + } else { + mWriter.StartObjectElement(); + } + + { + mUniqueStrings.WriteProperty(mWriter, "site", JS::TrackedTypeSiteString(site)); + mUniqueStrings.WriteProperty(mWriter, "mirType", mirType); + } + mWriter.EndObject(); + } +}; + +// As mentioned in ProfileEntry.h, the JSON format contains many arrays whose +// elements are laid out according to various schemas to help +// de-duplication. This RAII class helps write these arrays by keeping track of +// the last non-null element written and adding the appropriate number of null +// elements when writing new non-null elements. It also automatically opens and +// closes an array element on the given JSON writer. +// +// Example usage: +// +// // Define the schema of elements in this type of array: [FOO, BAR, BAZ] +// enum Schema : uint32_t { +// FOO = 0, +// BAR = 1, +// BAZ = 2 +// }; +// +// AutoArraySchemaWriter writer(someJsonWriter, someUniqueStrings); +// if (shouldWriteFoo) { +// writer.IntElement(FOO, getFoo()); +// } +// ... etc ... +class MOZ_RAII AutoArraySchemaWriter +{ + friend class AutoObjectWriter; + + SpliceableJSONWriter& mJSONWriter; + UniqueJSONStrings* mStrings; + uint32_t mNextFreeIndex; + +public: + AutoArraySchemaWriter(SpliceableJSONWriter& aWriter, UniqueJSONStrings& aStrings) + : mJSONWriter(aWriter) + , mStrings(&aStrings) + , mNextFreeIndex(0) + { + mJSONWriter.StartArrayElement(); + } + + // If you don't have access to a UniqueStrings, you had better not try and + // write a string element down the line! + explicit AutoArraySchemaWriter(SpliceableJSONWriter& aWriter) + : mJSONWriter(aWriter) + , mStrings(nullptr) + , mNextFreeIndex(0) + { + mJSONWriter.StartArrayElement(); + } + + ~AutoArraySchemaWriter() { + mJSONWriter.EndArray(); + } + + void FillUpTo(uint32_t aIndex) { + MOZ_ASSERT(aIndex >= mNextFreeIndex); + mJSONWriter.NullElements(aIndex - mNextFreeIndex); + mNextFreeIndex = aIndex + 1; + } + + void IntElement(uint32_t aIndex, uint32_t aValue) { + FillUpTo(aIndex); + mJSONWriter.IntElement(aValue); + } + + void DoubleElement(uint32_t aIndex, double aValue) { + FillUpTo(aIndex); + mJSONWriter.DoubleElement(aValue); + } + + void StringElement(uint32_t aIndex, const char* aValue) { + MOZ_RELEASE_ASSERT(mStrings); + FillUpTo(aIndex); + mStrings->WriteElement(mJSONWriter, aValue); + } +}; + +class StreamOptimizationAttemptsOp : public JS::ForEachTrackedOptimizationAttemptOp +{ + SpliceableJSONWriter& mWriter; + UniqueJSONStrings& mUniqueStrings; + +public: + StreamOptimizationAttemptsOp(SpliceableJSONWriter& aWriter, UniqueJSONStrings& aUniqueStrings) + : mWriter(aWriter), + mUniqueStrings(aUniqueStrings) + { } + + void operator()(JS::TrackedStrategy strategy, JS::TrackedOutcome outcome) override { + enum Schema : uint32_t { + STRATEGY = 0, + OUTCOME = 1 + }; + + AutoArraySchemaWriter writer(mWriter, mUniqueStrings); + writer.StringElement(STRATEGY, JS::TrackedStrategyString(strategy)); + writer.StringElement(OUTCOME, JS::TrackedOutcomeString(outcome)); + } +}; + +class StreamJSFramesOp : public JS::ForEachProfiledFrameOp +{ + void* mReturnAddress; + UniqueStacks::Stack& mStack; + unsigned mDepth; + +public: + StreamJSFramesOp(void* aReturnAddr, UniqueStacks::Stack& aStack) + : mReturnAddress(aReturnAddr) + , mStack(aStack) + , mDepth(0) + { } + + unsigned depth() const { + MOZ_ASSERT(mDepth > 0); + return mDepth; + } + + void operator()(const JS::ForEachProfiledFrameOp::FrameHandle& aFrameHandle) override { + UniqueStacks::OnStackFrameKey frameKey(mReturnAddress, mDepth, aFrameHandle); + mStack.AppendFrame(frameKey); + mDepth++; + } +}; +#endif + +uint32_t UniqueJSONStrings::GetOrAddIndex(const char* aStr) +{ + uint32_t index; + StringKey key(aStr); + + auto it = mStringToIndexMap.find(key); + + if (it != mStringToIndexMap.end()) { + return it->second; + } + index = mStringToIndexMap.size(); + mStringToIndexMap[key] = index; + mStringTableWriter.StringElement(aStr); + return index; +} + +bool UniqueStacks::FrameKey::operator==(const FrameKey& aOther) const +{ + return mLocation == aOther.mLocation && + mLine == aOther.mLine && + mCategory == aOther.mCategory && + mJITAddress == aOther.mJITAddress && + mJITDepth == aOther.mJITDepth; +} + +bool UniqueStacks::StackKey::operator==(const StackKey& aOther) const +{ + MOZ_ASSERT_IF(mPrefix == aOther.mPrefix, mPrefixHash == aOther.mPrefixHash); + return mPrefix == aOther.mPrefix && mFrame == aOther.mFrame; +} + +UniqueStacks::Stack::Stack(UniqueStacks& aUniqueStacks, const OnStackFrameKey& aRoot) + : mUniqueStacks(aUniqueStacks) + , mStack(aUniqueStacks.GetOrAddFrameIndex(aRoot)) +{ +} + +void UniqueStacks::Stack::AppendFrame(const OnStackFrameKey& aFrame) +{ + // Compute the prefix hash and index before mutating mStack. + uint32_t prefixHash = mStack.Hash(); + uint32_t prefix = mUniqueStacks.GetOrAddStackIndex(mStack); + mStack.UpdateHash(prefixHash, prefix, mUniqueStacks.GetOrAddFrameIndex(aFrame)); +} + +uint32_t UniqueStacks::Stack::GetOrAddIndex() const +{ + return mUniqueStacks.GetOrAddStackIndex(mStack); +} + +uint32_t UniqueStacks::FrameKey::Hash() const +{ + uint32_t hash = 0; + if (!mLocation.IsEmpty()) { +#ifdef SPS_STANDALONE + hash = mozilla::HashString(mLocation.c_str()); +#else + hash = mozilla::HashString(mLocation.get()); +#endif + } + if (mLine.isSome()) { + hash = mozilla::AddToHash(hash, *mLine); + } + if (mCategory.isSome()) { + hash = mozilla::AddToHash(hash, *mCategory); + } + if (mJITAddress.isSome()) { + hash = mozilla::AddToHash(hash, *mJITAddress); + if (mJITDepth.isSome()) { + hash = mozilla::AddToHash(hash, *mJITDepth); + } + } + return hash; +} + +uint32_t UniqueStacks::StackKey::Hash() const +{ + if (mPrefix.isNothing()) { + return mozilla::HashGeneric(mFrame); + } + return mozilla::AddToHash(*mPrefixHash, mFrame); +} + +UniqueStacks::Stack UniqueStacks::BeginStack(const OnStackFrameKey& aRoot) +{ + return Stack(*this, aRoot); +} + +UniqueStacks::UniqueStacks(JSContext* aContext) + : mContext(aContext) + , mFrameCount(0) +{ + mFrameTableWriter.StartBareList(); + mStackTableWriter.StartBareList(); +} + +#ifdef SPS_STANDALONE +uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) +{ + uint32_t index; + auto it = mStackToIndexMap.find(aStack); + + if (it != mStackToIndexMap.end()) { + return it->second; + } + + index = mStackToIndexMap.size(); + mStackToIndexMap[aStack] = index; + StreamStack(aStack); + return index; +} +#else +uint32_t UniqueStacks::GetOrAddStackIndex(const StackKey& aStack) +{ + uint32_t index; + if (mStackToIndexMap.Get(aStack, &index)) { + MOZ_ASSERT(index < mStackToIndexMap.Count()); + return index; + } + + index = mStackToIndexMap.Count(); + mStackToIndexMap.Put(aStack, index); + StreamStack(aStack); + return index; +} +#endif + +#ifdef SPS_STANDALONE +uint32_t UniqueStacks::GetOrAddFrameIndex(const OnStackFrameKey& aFrame) +{ + uint32_t index; + auto it = mFrameToIndexMap.find(aFrame); + if (it != mFrameToIndexMap.end()) { + MOZ_ASSERT(it->second < mFrameCount); + return it->second; + } + + // A manual count is used instead of mFrameToIndexMap.Count() due to + // forwarding of canonical JIT frames above. + index = mFrameCount++; + mFrameToIndexMap[aFrame] = index; + StreamFrame(aFrame); + return index; +} +#else +uint32_t UniqueStacks::GetOrAddFrameIndex(const OnStackFrameKey& aFrame) +{ + uint32_t index; + if (mFrameToIndexMap.Get(aFrame, &index)) { + MOZ_ASSERT(index < mFrameCount); + return index; + } + + // If aFrame isn't canonical, forward it to the canonical frame's index. + if (aFrame.mJITFrameHandle) { + void* canonicalAddr = aFrame.mJITFrameHandle->canonicalAddress(); + if (canonicalAddr != *aFrame.mJITAddress) { + OnStackFrameKey canonicalKey(canonicalAddr, *aFrame.mJITDepth, *aFrame.mJITFrameHandle); + uint32_t canonicalIndex = GetOrAddFrameIndex(canonicalKey); + mFrameToIndexMap.Put(aFrame, canonicalIndex); + return canonicalIndex; + } + } + + // A manual count is used instead of mFrameToIndexMap.Count() due to + // forwarding of canonical JIT frames above. + index = mFrameCount++; + mFrameToIndexMap.Put(aFrame, index); + StreamFrame(aFrame); + return index; +} +#endif + +uint32_t UniqueStacks::LookupJITFrameDepth(void* aAddr) +{ + uint32_t depth; + + auto it = mJITFrameDepthMap.find(aAddr); + if (it != mJITFrameDepthMap.end()) { + depth = it->second; + MOZ_ASSERT(depth > 0); + return depth; + } + return 0; +} + +void UniqueStacks::AddJITFrameDepth(void* aAddr, unsigned depth) +{ + mJITFrameDepthMap[aAddr] = depth; +} + +void UniqueStacks::SpliceFrameTableElements(SpliceableJSONWriter& aWriter) +{ + mFrameTableWriter.EndBareList(); + aWriter.TakeAndSplice(mFrameTableWriter.WriteFunc()); +} + +void UniqueStacks::SpliceStackTableElements(SpliceableJSONWriter& aWriter) +{ + mStackTableWriter.EndBareList(); + aWriter.TakeAndSplice(mStackTableWriter.WriteFunc()); +} + +void UniqueStacks::StreamStack(const StackKey& aStack) +{ + enum Schema : uint32_t { + PREFIX = 0, + FRAME = 1 + }; + + AutoArraySchemaWriter writer(mStackTableWriter, mUniqueStrings); + if (aStack.mPrefix.isSome()) { + writer.IntElement(PREFIX, *aStack.mPrefix); + } + writer.IntElement(FRAME, aStack.mFrame); +} + +void UniqueStacks::StreamFrame(const OnStackFrameKey& aFrame) +{ + enum Schema : uint32_t { + LOCATION = 0, + IMPLEMENTATION = 1, + OPTIMIZATIONS = 2, + LINE = 3, + CATEGORY = 4 + }; + + AutoArraySchemaWriter writer(mFrameTableWriter, mUniqueStrings); + +#ifndef SPS_STANDALONE + if (!aFrame.mJITFrameHandle) { +#else + { +#endif +#ifdef SPS_STANDALONE + writer.StringElement(LOCATION, aFrame.mLocation.c_str()); +#else + writer.StringElement(LOCATION, aFrame.mLocation.get()); +#endif + if (aFrame.mLine.isSome()) { + writer.IntElement(LINE, *aFrame.mLine); + } + if (aFrame.mCategory.isSome()) { + writer.IntElement(CATEGORY, *aFrame.mCategory); + } + } +#ifndef SPS_STANDALONE + else { + const JS::ForEachProfiledFrameOp::FrameHandle& jitFrame = *aFrame.mJITFrameHandle; + + writer.StringElement(LOCATION, jitFrame.label()); + + JS::ProfilingFrameIterator::FrameKind frameKind = jitFrame.frameKind(); + MOZ_ASSERT(frameKind == JS::ProfilingFrameIterator::Frame_Ion || + frameKind == JS::ProfilingFrameIterator::Frame_Baseline); + writer.StringElement(IMPLEMENTATION, + frameKind == JS::ProfilingFrameIterator::Frame_Ion + ? "ion" + : "baseline"); + + if (jitFrame.hasTrackedOptimizations()) { + writer.FillUpTo(OPTIMIZATIONS); + mFrameTableWriter.StartObjectElement(); + { + mFrameTableWriter.StartArrayProperty("types"); + { + StreamOptimizationTypeInfoOp typeInfoOp(mFrameTableWriter, mUniqueStrings); + jitFrame.forEachOptimizationTypeInfo(typeInfoOp); + } + mFrameTableWriter.EndArray(); + + JS::Rooted<JSScript*> script(mContext); + jsbytecode* pc; + mFrameTableWriter.StartObjectProperty("attempts"); + { + { + JSONSchemaWriter schema(mFrameTableWriter); + schema.WriteField("strategy"); + schema.WriteField("outcome"); + } + + mFrameTableWriter.StartArrayProperty("data"); + { + StreamOptimizationAttemptsOp attemptOp(mFrameTableWriter, mUniqueStrings); + jitFrame.forEachOptimizationAttempt(attemptOp, script.address(), &pc); + } + mFrameTableWriter.EndArray(); + } + mFrameTableWriter.EndObject(); + + if (JSAtom* name = js::GetPropertyNameFromPC(script, pc)) { + char buf[512]; + JS_PutEscapedFlatString(buf, mozilla::ArrayLength(buf), js::AtomToFlatString(name), 0); + mUniqueStrings.WriteProperty(mFrameTableWriter, "propertyName", buf); + } + + unsigned line, column; + line = JS_PCToLineNumber(script, pc, &column); + mFrameTableWriter.IntProperty("line", line); + mFrameTableWriter.IntProperty("column", column); + } + mFrameTableWriter.EndObject(); + } + } +#endif +} + +struct ProfileSample +{ + uint32_t mStack; + Maybe<double> mTime; + Maybe<double> mResponsiveness; + Maybe<double> mRSS; + Maybe<double> mUSS; + Maybe<int> mFrameNumber; + Maybe<double> mPower; +}; + +static void WriteSample(SpliceableJSONWriter& aWriter, ProfileSample& aSample) +{ + enum Schema : uint32_t { + STACK = 0, + TIME = 1, + RESPONSIVENESS = 2, + RSS = 3, + USS = 4, + FRAME_NUMBER = 5, + POWER = 6 + }; + + AutoArraySchemaWriter writer(aWriter); + + writer.IntElement(STACK, aSample.mStack); + + if (aSample.mTime.isSome()) { + writer.DoubleElement(TIME, *aSample.mTime); + } + + if (aSample.mResponsiveness.isSome()) { + writer.DoubleElement(RESPONSIVENESS, *aSample.mResponsiveness); + } + + if (aSample.mRSS.isSome()) { + writer.DoubleElement(RSS, *aSample.mRSS); + } + + if (aSample.mUSS.isSome()) { + writer.DoubleElement(USS, *aSample.mUSS); + } + + if (aSample.mFrameNumber.isSome()) { + writer.IntElement(FRAME_NUMBER, *aSample.mFrameNumber); + } + + if (aSample.mPower.isSome()) { + writer.DoubleElement(POWER, *aSample.mPower); + } +} + +void ProfileBuffer::StreamSamplesToJSON(SpliceableJSONWriter& aWriter, int aThreadId, + double aSinceTime, JSContext* aContext, + UniqueStacks& aUniqueStacks) +{ + Maybe<ProfileSample> sample; + int readPos = mReadPos; + int currentThreadID = -1; + Maybe<double> currentTime; + UniquePtr<char[]> tagBuff = MakeUnique<char[]>(DYNAMIC_MAX_STRING); + + while (readPos != mWritePos) { + ProfileEntry entry = mEntries[readPos]; + if (entry.mTagName == 'T') { + currentThreadID = entry.mTagInt; + currentTime.reset(); + int readAheadPos = (readPos + 1) % mEntrySize; + if (readAheadPos != mWritePos) { + ProfileEntry readAheadEntry = mEntries[readAheadPos]; + if (readAheadEntry.mTagName == 't') { + currentTime = Some(readAheadEntry.mTagDouble); + } + } + } + if (currentThreadID == aThreadId && (currentTime.isNothing() || *currentTime >= aSinceTime)) { + switch (entry.mTagName) { + case 'r': + if (sample.isSome()) { + sample->mResponsiveness = Some(entry.mTagDouble); + } + break; + case 'p': + if (sample.isSome()) { + sample->mPower = Some(entry.mTagDouble); + } + break; + case 'R': + if (sample.isSome()) { + sample->mRSS = Some(entry.mTagDouble); + } + break; + case 'U': + if (sample.isSome()) { + sample->mUSS = Some(entry.mTagDouble); + } + break; + case 'f': + if (sample.isSome()) { + sample->mFrameNumber = Some(entry.mTagInt); + } + break; + case 's': + { + // end the previous sample if there was one + if (sample.isSome()) { + WriteSample(aWriter, *sample); + sample.reset(); + } + // begin the next sample + sample.emplace(); + sample->mTime = currentTime; + + // Seek forward through the entire sample, looking for frames + // this is an easier approach to reason about than adding more + // control variables and cases to the loop that goes through the buffer once + + UniqueStacks::Stack stack = + aUniqueStacks.BeginStack(UniqueStacks::OnStackFrameKey("(root)")); + + int framePos = (readPos + 1) % mEntrySize; + ProfileEntry frame = mEntries[framePos]; + while (framePos != mWritePos && frame.mTagName != 's' && frame.mTagName != 'T') { + int incBy = 1; + frame = mEntries[framePos]; + + // Read ahead to the next tag, if it's a 'd' tag process it now + const char* tagStringData = frame.mTagData; + int readAheadPos = (framePos + 1) % mEntrySize; + // Make sure the string is always null terminated if it fills up + // DYNAMIC_MAX_STRING-2 + tagBuff[DYNAMIC_MAX_STRING-1] = '\0'; + + if (readAheadPos != mWritePos && mEntries[readAheadPos].mTagName == 'd') { + tagStringData = processDynamicTag(framePos, &incBy, tagBuff.get()); + } + + // Write one frame. It can have either + // 1. only location - 'l' containing a memory address + // 2. location and line number - 'c' followed by 'd's, + // an optional 'n' and an optional 'y' + // 3. a JIT return address - 'j' containing native code address + if (frame.mTagName == 'l') { + // Bug 753041 + // We need a double cast here to tell GCC that we don't want to sign + // extend 32-bit addresses starting with 0xFXXXXXX. + unsigned long long pc = (unsigned long long)(uintptr_t)frame.mTagPtr; + snprintf(tagBuff.get(), DYNAMIC_MAX_STRING, "%#llx", pc); + stack.AppendFrame(UniqueStacks::OnStackFrameKey(tagBuff.get())); + } else if (frame.mTagName == 'c') { + UniqueStacks::OnStackFrameKey frameKey(tagStringData); + readAheadPos = (framePos + incBy) % mEntrySize; + if (readAheadPos != mWritePos && + mEntries[readAheadPos].mTagName == 'n') { + frameKey.mLine = Some((unsigned) mEntries[readAheadPos].mTagInt); + incBy++; + } + readAheadPos = (framePos + incBy) % mEntrySize; + if (readAheadPos != mWritePos && + mEntries[readAheadPos].mTagName == 'y') { + frameKey.mCategory = Some((unsigned) mEntries[readAheadPos].mTagInt); + incBy++; + } + stack.AppendFrame(frameKey); +#ifndef SPS_STANDALONE + } else if (frame.mTagName == 'J') { + // A JIT frame may expand to multiple frames due to inlining. + void* pc = frame.mTagPtr; + unsigned depth = aUniqueStacks.LookupJITFrameDepth(pc); + if (depth == 0) { + StreamJSFramesOp framesOp(pc, stack); + JS::ForEachProfiledFrame(aContext, pc, framesOp); + aUniqueStacks.AddJITFrameDepth(pc, framesOp.depth()); + } else { + for (unsigned i = 0; i < depth; i++) { + UniqueStacks::OnStackFrameKey inlineFrameKey(pc, i); + stack.AppendFrame(inlineFrameKey); + } + } +#endif + } + framePos = (framePos + incBy) % mEntrySize; + } + + sample->mStack = stack.GetOrAddIndex(); + break; + } + } + } + readPos = (readPos + 1) % mEntrySize; + } + if (sample.isSome()) { + WriteSample(aWriter, *sample); + } +} + +void ProfileBuffer::StreamMarkersToJSON(SpliceableJSONWriter& aWriter, int aThreadId, + double aSinceTime, UniqueStacks& aUniqueStacks) +{ + int readPos = mReadPos; + int currentThreadID = -1; + while (readPos != mWritePos) { + ProfileEntry entry = mEntries[readPos]; + if (entry.mTagName == 'T') { + currentThreadID = entry.mTagInt; + } else if (currentThreadID == aThreadId && entry.mTagName == 'm') { + const ProfilerMarker* marker = entry.getMarker(); + if (marker->GetTime() >= aSinceTime) { + entry.getMarker()->StreamJSON(aWriter, aUniqueStacks); + } + } + readPos = (readPos + 1) % mEntrySize; + } +} + +int ProfileBuffer::FindLastSampleOfThread(int aThreadId) +{ + // We search backwards from mWritePos-1 to mReadPos. + // Adding mEntrySize makes the result of the modulus positive. + for (int readPos = (mWritePos + mEntrySize - 1) % mEntrySize; + readPos != (mReadPos + mEntrySize - 1) % mEntrySize; + readPos = (readPos + mEntrySize - 1) % mEntrySize) { + ProfileEntry entry = mEntries[readPos]; + if (entry.mTagName == 'T' && entry.mTagInt == aThreadId) { + return readPos; + } + } + + return -1; +} + +void ProfileBuffer::DuplicateLastSample(int aThreadId) +{ + int lastSampleStartPos = FindLastSampleOfThread(aThreadId); + if (lastSampleStartPos == -1) { + return; + } + + MOZ_ASSERT(mEntries[lastSampleStartPos].mTagName == 'T'); + + addTag(mEntries[lastSampleStartPos]); + + // Go through the whole entry and duplicate it, until we find the next one. + for (int readPos = (lastSampleStartPos + 1) % mEntrySize; + readPos != mWritePos; + readPos = (readPos + 1) % mEntrySize) { + switch (mEntries[readPos].mTagName) { + case 'T': + // We're done. + return; + case 't': + // Copy with new time + addTag(ProfileEntry('t', (mozilla::TimeStamp::Now() - sStartTime).ToMilliseconds())); + break; + case 'm': + // Don't copy markers + break; + // Copy anything else we don't know about + // L, B, S, c, s, d, l, f, h, r, t, p + default: + addTag(mEntries[readPos]); + break; + } + } +} + +// END ProfileBuffer +//////////////////////////////////////////////////////////////////////// + + +//////////////////////////////////////////////////////////////////////// +// BEGIN ThreadProfile + +// END ThreadProfile +//////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/ProfileEntry.h b/tools/profiler/core/ProfileEntry.h new file mode 100644 index 000000000..b82a2f271 --- /dev/null +++ b/tools/profiler/core/ProfileEntry.h @@ -0,0 +1,407 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PROFILE_ENTRY_H +#define MOZ_PROFILE_ENTRY_H + +#include <ostream> +#include "GeckoProfiler.h" +#include "platform.h" +#include "ProfileJSONWriter.h" +#include "ProfilerBacktrace.h" +#include "mozilla/RefPtr.h" +#include <string> +#include <map> +#ifndef SPS_STANDALONE +#include "js/ProfilingFrameIterator.h" +#include "js/TrackedOptimizationInfo.h" +#include "nsHashKeys.h" +#include "nsDataHashtable.h" +#endif +#include "mozilla/Maybe.h" +#include "mozilla/Vector.h" +#ifndef SPS_STANDALONE +#include "gtest/MozGtestFriend.h" +#else +#define FRIEND_TEST(a, b) // TODO Support standalone gtest +#endif +#include "mozilla/HashFunctions.h" +#include "mozilla/UniquePtr.h" + +class ThreadProfile; + +// NB: Packing this structure has been shown to cause SIGBUS issues on ARM. +#ifndef __arm__ +#pragma pack(push, 1) +#endif + +class ProfileEntry +{ +public: + ProfileEntry(); + + // aTagData must not need release (i.e. be a string from the text segment) + ProfileEntry(char aTagName, const char *aTagData); + ProfileEntry(char aTagName, void *aTagPtr); + ProfileEntry(char aTagName, ProfilerMarker *aTagMarker); + ProfileEntry(char aTagName, double aTagDouble); + ProfileEntry(char aTagName, uintptr_t aTagOffset); + ProfileEntry(char aTagName, Address aTagAddress); + ProfileEntry(char aTagName, int aTagLine); + ProfileEntry(char aTagName, char aTagChar); + bool is_ent_hint(char hintChar); + bool is_ent_hint(); + bool is_ent(char tagName); + void* get_tagPtr(); + const ProfilerMarker* getMarker() { + MOZ_ASSERT(mTagName == 'm'); + return mTagMarker; + } + + char getTagName() const { return mTagName; } + +private: + FRIEND_TEST(ThreadProfile, InsertOneTag); + FRIEND_TEST(ThreadProfile, InsertOneTagWithTinyBuffer); + FRIEND_TEST(ThreadProfile, InsertTagsNoWrap); + FRIEND_TEST(ThreadProfile, InsertTagsWrap); + FRIEND_TEST(ThreadProfile, MemoryMeasure); + friend class ProfileBuffer; + union { + const char* mTagData; + char mTagChars[sizeof(void*)]; + void* mTagPtr; + ProfilerMarker* mTagMarker; + double mTagDouble; + Address mTagAddress; + uintptr_t mTagOffset; + int mTagInt; + char mTagChar; + }; + char mTagName; +}; + +#ifndef __arm__ +#pragma pack(pop) +#endif + +class UniqueJSONStrings +{ +public: + UniqueJSONStrings() { + mStringTableWriter.StartBareList(); + } + + void SpliceStringTableElements(SpliceableJSONWriter& aWriter) { + aWriter.TakeAndSplice(mStringTableWriter.WriteFunc()); + } + + void WriteProperty(mozilla::JSONWriter& aWriter, const char* aName, const char* aStr) { + aWriter.IntProperty(aName, GetOrAddIndex(aStr)); + } + + void WriteElement(mozilla::JSONWriter& aWriter, const char* aStr) { + aWriter.IntElement(GetOrAddIndex(aStr)); + } + + uint32_t GetOrAddIndex(const char* aStr); + + struct StringKey { + + explicit StringKey(const char* aStr) + : mStr(strdup(aStr)) + { + mHash = mozilla::HashString(mStr); + } + + StringKey(const StringKey& aOther) + : mStr(strdup(aOther.mStr)) + { + mHash = aOther.mHash; + } + + ~StringKey() { + free(mStr); + } + + uint32_t Hash() const; + bool operator==(const StringKey& aOther) const { + return strcmp(mStr, aOther.mStr) == 0; + } + bool operator<(const StringKey& aOther) const { + return mHash < aOther.mHash; + } + + private: + uint32_t mHash; + char* mStr; + }; +private: + SpliceableChunkedJSONWriter mStringTableWriter; + std::map<StringKey, uint32_t> mStringToIndexMap; +}; + +class UniqueStacks +{ +public: + struct FrameKey { +#ifdef SPS_STANDALONE + std::string mLocation; +#else + // This cannot be a std::string, as it is not memmove compatible, which + // is used by nsHashTable + nsCString mLocation; +#endif + mozilla::Maybe<unsigned> mLine; + mozilla::Maybe<unsigned> mCategory; + mozilla::Maybe<void*> mJITAddress; + mozilla::Maybe<uint32_t> mJITDepth; + + explicit FrameKey(const char* aLocation) + : mLocation(aLocation) + { + mHash = Hash(); + } + + FrameKey(const FrameKey& aToCopy) + : mLocation(aToCopy.mLocation) + , mLine(aToCopy.mLine) + , mCategory(aToCopy.mCategory) + , mJITAddress(aToCopy.mJITAddress) + , mJITDepth(aToCopy.mJITDepth) + { + mHash = Hash(); + } + + FrameKey(void* aJITAddress, uint32_t aJITDepth) + : mJITAddress(mozilla::Some(aJITAddress)) + , mJITDepth(mozilla::Some(aJITDepth)) + { + mHash = Hash(); + } + + uint32_t Hash() const; + bool operator==(const FrameKey& aOther) const; + bool operator<(const FrameKey& aOther) const { + return mHash < aOther.mHash; + } + + private: + uint32_t mHash; + }; + + // A FrameKey that holds a scoped reference to a JIT FrameHandle. + struct MOZ_STACK_CLASS OnStackFrameKey : public FrameKey { + explicit OnStackFrameKey(const char* aLocation) + : FrameKey(aLocation) +#ifndef SPS_STANDALONE + , mJITFrameHandle(nullptr) +#endif + { } + + OnStackFrameKey(const OnStackFrameKey& aToCopy) + : FrameKey(aToCopy) +#ifndef SPS_STANDALONE + , mJITFrameHandle(aToCopy.mJITFrameHandle) +#endif + { } + +#ifndef SPS_STANDALONE + const JS::ForEachProfiledFrameOp::FrameHandle* mJITFrameHandle; + + OnStackFrameKey(void* aJITAddress, unsigned aJITDepth) + : FrameKey(aJITAddress, aJITDepth) + , mJITFrameHandle(nullptr) + { } + + OnStackFrameKey(void* aJITAddress, unsigned aJITDepth, + const JS::ForEachProfiledFrameOp::FrameHandle& aJITFrameHandle) + : FrameKey(aJITAddress, aJITDepth) + , mJITFrameHandle(&aJITFrameHandle) + { } +#endif + }; + + struct StackKey { + mozilla::Maybe<uint32_t> mPrefixHash; + mozilla::Maybe<uint32_t> mPrefix; + uint32_t mFrame; + + explicit StackKey(uint32_t aFrame) + : mFrame(aFrame) + { + mHash = Hash(); + } + + uint32_t Hash() const; + bool operator==(const StackKey& aOther) const; + bool operator<(const StackKey& aOther) const { + return mHash < aOther.mHash; + } + + void UpdateHash(uint32_t aPrefixHash, uint32_t aPrefix, uint32_t aFrame) { + mPrefixHash = mozilla::Some(aPrefixHash); + mPrefix = mozilla::Some(aPrefix); + mFrame = aFrame; + mHash = Hash(); + } + + private: + uint32_t mHash; + }; + + class Stack { + public: + Stack(UniqueStacks& aUniqueStacks, const OnStackFrameKey& aRoot); + + void AppendFrame(const OnStackFrameKey& aFrame); + uint32_t GetOrAddIndex() const; + + private: + UniqueStacks& mUniqueStacks; + StackKey mStack; + }; + + explicit UniqueStacks(JSContext* aContext); + + Stack BeginStack(const OnStackFrameKey& aRoot); + uint32_t LookupJITFrameDepth(void* aAddr); + void AddJITFrameDepth(void* aAddr, unsigned depth); + void SpliceFrameTableElements(SpliceableJSONWriter& aWriter); + void SpliceStackTableElements(SpliceableJSONWriter& aWriter); + +private: + uint32_t GetOrAddFrameIndex(const OnStackFrameKey& aFrame); + uint32_t GetOrAddStackIndex(const StackKey& aStack); + void StreamFrame(const OnStackFrameKey& aFrame); + void StreamStack(const StackKey& aStack); + +public: + UniqueJSONStrings mUniqueStrings; + +private: + JSContext* mContext; + + // To avoid incurring JitcodeGlobalTable lookup costs for every JIT frame, + // we cache the depth of frames keyed by JIT code address. If an address a + // maps to a depth d, then frames keyed by a for depths 0 to d are + // guaranteed to be in mFrameToIndexMap. + std::map<void*, uint32_t> mJITFrameDepthMap; + + uint32_t mFrameCount; + SpliceableChunkedJSONWriter mFrameTableWriter; +#ifdef SPS_STANDALNOE + std::map<FrameKey, uint32_t> mFrameToIndexMap; +#else + nsDataHashtable<nsGenericHashKey<FrameKey>, uint32_t> mFrameToIndexMap; +#endif + + SpliceableChunkedJSONWriter mStackTableWriter; + + // This sucks but this is really performance critical, nsDataHashtable is way faster + // than map/unordered_map but nsDataHashtable is tied to xpcom so we ifdef + // until we can find a better solution. +#ifdef SPS_STANDALONE + std::map<StackKey, uint32_t> mStackToIndexMap; +#else + nsDataHashtable<nsGenericHashKey<StackKey>, uint32_t> mStackToIndexMap; +#endif +}; + +// +// ThreadProfile JSON Format +// ------------------------- +// +// The profile contains much duplicate information. The output JSON of the +// profile attempts to deduplicate strings, frames, and stack prefixes, to cut +// down on size and to increase JSON streaming speed. Deduplicated values are +// streamed as indices into their respective tables. +// +// Further, arrays of objects with the same set of properties (e.g., samples, +// frames) are output as arrays according to a schema instead of an object +// with property names. A property that is not present is represented in the +// array as null or undefined. +// +// The format of the thread profile JSON is shown by the following example +// with 1 sample and 1 marker: +// +// { +// "name": "Foo", +// "tid": 42, +// "samples": +// { +// "schema": +// { +// "stack": 0, /* index into stackTable */ +// "time": 1, /* number */ +// "responsiveness": 2, /* number */ +// "rss": 3, /* number */ +// "uss": 4, /* number */ +// "frameNumber": 5, /* number */ +// "power": 6 /* number */ +// }, +// "data": +// [ +// [ 1, 0.0, 0.0 ] /* { stack: 1, time: 0.0, responsiveness: 0.0 } */ +// ] +// }, +// +// "markers": +// { +// "schema": +// { +// "name": 0, /* index into stringTable */ +// "time": 1, /* number */ +// "data": 2 /* arbitrary JSON */ +// }, +// "data": +// [ +// [ 3, 0.1 ] /* { name: 'example marker', time: 0.1 } */ +// ] +// }, +// +// "stackTable": +// { +// "schema": +// { +// "prefix": 0, /* index into stackTable */ +// "frame": 1 /* index into frameTable */ +// }, +// "data": +// [ +// [ null, 0 ], /* (root) */ +// [ 0, 1 ] /* (root) > foo.js */ +// ] +// }, +// +// "frameTable": +// { +// "schema": +// { +// "location": 0, /* index into stringTable */ +// "implementation": 1, /* index into stringTable */ +// "optimizations": 2, /* arbitrary JSON */ +// "line": 3, /* number */ +// "category": 4 /* number */ +// }, +// "data": +// [ +// [ 0 ], /* { location: '(root)' } */ +// [ 1, 2 ] /* { location: 'foo.js', implementation: 'baseline' } */ +// ] +// }, +// +// "stringTable": +// [ +// "(root)", +// "foo.js", +// "baseline", +// "example marker" +// ] +// } +// + +#endif /* ndef MOZ_PROFILE_ENTRY_H */ diff --git a/tools/profiler/core/ProfileJSONWriter.cpp b/tools/profiler/core/ProfileJSONWriter.cpp new file mode 100644 index 000000000..65a9425a3 --- /dev/null +++ b/tools/profiler/core/ProfileJSONWriter.cpp @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/HashFunctions.h" + +#include "ProfileJSONWriter.h" + +void +ChunkedJSONWriteFunc::Write(const char* aStr) +{ + MOZ_ASSERT(mChunkPtr >= mChunkList.back().get() && mChunkPtr <= mChunkEnd); + MOZ_ASSERT(mChunkEnd >= mChunkList.back().get() + mChunkLengths.back()); + MOZ_ASSERT(*mChunkPtr == '\0'); + + size_t len = strlen(aStr); + + // Most strings to be written are small, but subprocess profiles (e.g., + // from the content process in e10s) may be huge. If the string is larger + // than a chunk, allocate its own chunk. + char* newPtr; + if (len >= kChunkSize) { + AllocChunk(len + 1); + newPtr = mChunkPtr + len; + } else { + newPtr = mChunkPtr + len; + if (newPtr >= mChunkEnd) { + AllocChunk(kChunkSize); + newPtr = mChunkPtr + len; + } + } + + memcpy(mChunkPtr, aStr, len); + *newPtr = '\0'; + mChunkPtr = newPtr; + mChunkLengths.back() += len; +} + +mozilla::UniquePtr<char[]> +ChunkedJSONWriteFunc::CopyData() const +{ + MOZ_ASSERT(mChunkLengths.length() == mChunkList.length()); + size_t totalLen = 1; + for (size_t i = 0; i < mChunkLengths.length(); i++) { + MOZ_ASSERT(strlen(mChunkList[i].get()) == mChunkLengths[i]); + totalLen += mChunkLengths[i]; + } + mozilla::UniquePtr<char[]> c = mozilla::MakeUnique<char[]>(totalLen); + char* ptr = c.get(); + for (size_t i = 0; i < mChunkList.length(); i++) { + size_t len = mChunkLengths[i]; + memcpy(ptr, mChunkList[i].get(), len); + ptr += len; + } + *ptr = '\0'; + return c; +} + +void +ChunkedJSONWriteFunc::Take(ChunkedJSONWriteFunc&& aOther) +{ + for (size_t i = 0; i < aOther.mChunkList.length(); i++) { + MOZ_ALWAYS_TRUE(mChunkLengths.append(aOther.mChunkLengths[i])); + MOZ_ALWAYS_TRUE(mChunkList.append(mozilla::Move(aOther.mChunkList[i]))); + } + mChunkPtr = mChunkList.back().get() + mChunkLengths.back(); + mChunkEnd = mChunkPtr; + aOther.mChunkPtr = nullptr; + aOther.mChunkEnd = nullptr; + aOther.mChunkList.clear(); + aOther.mChunkLengths.clear(); +} + +void +ChunkedJSONWriteFunc::AllocChunk(size_t aChunkSize) +{ + MOZ_ASSERT(mChunkLengths.length() == mChunkList.length()); + mozilla::UniquePtr<char[]> newChunk = mozilla::MakeUnique<char[]>(aChunkSize); + mChunkPtr = newChunk.get(); + mChunkEnd = mChunkPtr + aChunkSize; + *mChunkPtr = '\0'; + MOZ_ALWAYS_TRUE(mChunkLengths.append(0)); + MOZ_ALWAYS_TRUE(mChunkList.append(mozilla::Move(newChunk))); +} + +void +SpliceableJSONWriter::TakeAndSplice(ChunkedJSONWriteFunc* aFunc) +{ + Separator(); + for (size_t i = 0; i < aFunc->mChunkList.length(); i++) { + WriteFunc()->Write(aFunc->mChunkList[i].get()); + } + aFunc->mChunkPtr = nullptr; + aFunc->mChunkEnd = nullptr; + aFunc->mChunkList.clear(); + aFunc->mChunkLengths.clear(); + mNeedComma[mDepth] = true; +} + +void +SpliceableJSONWriter::Splice(const char* aStr) +{ + Separator(); + WriteFunc()->Write(aStr); + mNeedComma[mDepth] = true; +} + +void +SpliceableChunkedJSONWriter::TakeAndSplice(ChunkedJSONWriteFunc* aFunc) +{ + Separator(); + WriteFunc()->Take(mozilla::Move(*aFunc)); + mNeedComma[mDepth] = true; +} diff --git a/tools/profiler/core/ProfileJSONWriter.h b/tools/profiler/core/ProfileJSONWriter.h new file mode 100644 index 000000000..d9e2115f9 --- /dev/null +++ b/tools/profiler/core/ProfileJSONWriter.h @@ -0,0 +1,126 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILEJSONWRITER_H +#define PROFILEJSONWRITER_H + +#include <ostream> +#include <string> +#include <string.h> + +#include "mozilla/JSONWriter.h" +#include "mozilla/UniquePtr.h" + +class SpliceableChunkedJSONWriter; + +// On average, profile JSONs are large enough such that we want to avoid +// reallocating its buffer when expanding. Additionally, the contents of the +// profile are not accessed until the profile is entirely written. For these +// reasons we use a chunked writer that keeps an array of chunks, which is +// concatenated together after writing is finished. +class ChunkedJSONWriteFunc : public mozilla::JSONWriteFunc +{ +public: + friend class SpliceableJSONWriter; + + ChunkedJSONWriteFunc() { + AllocChunk(kChunkSize); + } + + bool IsEmpty() const { + MOZ_ASSERT_IF(!mChunkPtr, !mChunkEnd && + mChunkList.length() == 0 && + mChunkLengths.length() == 0); + return !mChunkPtr; + } + + void Write(const char* aStr) override; + mozilla::UniquePtr<char[]> CopyData() const; + void Take(ChunkedJSONWriteFunc&& aOther); + +private: + void AllocChunk(size_t aChunkSize); + + static const size_t kChunkSize = 4096 * 512; + + // Pointer for writing inside the current chunk. + // + // The current chunk is always at the back of mChunkList, i.e., + // mChunkList.back() <= mChunkPtr <= mChunkEnd. + char* mChunkPtr; + + // Pointer to the end of the current chunk. + // + // The current chunk is always at the back of mChunkList, i.e., + // mChunkEnd >= mChunkList.back() + mChunkLengths.back(). + char* mChunkEnd; + + // List of chunks and their lengths. + // + // For all i, the length of the string in mChunkList[i] is + // mChunkLengths[i]. + mozilla::Vector<mozilla::UniquePtr<char[]>> mChunkList; + mozilla::Vector<size_t> mChunkLengths; +}; + +struct OStreamJSONWriteFunc : public mozilla::JSONWriteFunc +{ + explicit OStreamJSONWriteFunc(std::ostream& aStream) + : mStream(aStream) + { } + + void Write(const char* aStr) override { + mStream << aStr; + } + + std::ostream& mStream; +}; + +class SpliceableJSONWriter : public mozilla::JSONWriter +{ +public: + explicit SpliceableJSONWriter(mozilla::UniquePtr<mozilla::JSONWriteFunc> aWriter) + : JSONWriter(mozilla::Move(aWriter)) + { } + + void StartBareList(CollectionStyle aStyle = SingleLineStyle) { + StartCollection(nullptr, "", aStyle); + } + + void EndBareList() { + EndCollection(""); + } + + void NullElements(uint32_t aCount) { + for (uint32_t i = 0; i < aCount; i++) { + NullElement(); + } + } + + void Splice(const ChunkedJSONWriteFunc* aFunc); + void Splice(const char* aStr); + + // Takes the chunks from aFunc and write them. If move is not possible + // (e.g., using OStreamJSONWriteFunc), aFunc's chunks are copied and its + // storage cleared. + virtual void TakeAndSplice(ChunkedJSONWriteFunc* aFunc); +}; + +class SpliceableChunkedJSONWriter : public SpliceableJSONWriter +{ +public: + explicit SpliceableChunkedJSONWriter() + : SpliceableJSONWriter(mozilla::MakeUnique<ChunkedJSONWriteFunc>()) + { } + + ChunkedJSONWriteFunc* WriteFunc() const { + return static_cast<ChunkedJSONWriteFunc*>(JSONWriter::WriteFunc()); + } + + // Adopts the chunks from aFunc without copying. + virtual void TakeAndSplice(ChunkedJSONWriteFunc* aFunc) override; +}; + +#endif // PROFILEJSONWRITER_H diff --git a/tools/profiler/core/ProfilerBacktrace.cpp b/tools/profiler/core/ProfilerBacktrace.cpp new file mode 100644 index 000000000..7302dd64c --- /dev/null +++ b/tools/profiler/core/ProfilerBacktrace.cpp @@ -0,0 +1,33 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ProfilerBacktrace.h" + +#include "ProfileJSONWriter.h" +#include "SyncProfile.h" + +ProfilerBacktrace::ProfilerBacktrace(SyncProfile* aProfile) + : mProfile(aProfile) +{ + MOZ_COUNT_CTOR(ProfilerBacktrace); + MOZ_ASSERT(aProfile); +} + +ProfilerBacktrace::~ProfilerBacktrace() +{ + MOZ_COUNT_DTOR(ProfilerBacktrace); + if (mProfile->ShouldDestroy()) { + delete mProfile; + } +} + +void +ProfilerBacktrace::StreamJSON(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + ::MutexAutoLock lock(mProfile->GetMutex()); + mProfile->StreamJSON(aWriter, aUniqueStacks); +} diff --git a/tools/profiler/core/ProfilerMarkers.cpp b/tools/profiler/core/ProfilerMarkers.cpp new file mode 100644 index 000000000..3cb47de48 --- /dev/null +++ b/tools/profiler/core/ProfilerMarkers.cpp @@ -0,0 +1,210 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "GeckoProfiler.h" +#include "ProfilerBacktrace.h" +#include "ProfilerMarkers.h" +#include "SyncProfile.h" +#ifndef SPS_STANDALONE +#include "gfxASurface.h" +#include "Layers.h" +#include "mozilla/Sprintf.h" +#endif + +ProfilerMarkerPayload::ProfilerMarkerPayload(ProfilerBacktrace* aStack) + : mStack(aStack) +{} + +ProfilerMarkerPayload::ProfilerMarkerPayload(const mozilla::TimeStamp& aStartTime, + const mozilla::TimeStamp& aEndTime, + ProfilerBacktrace* aStack) + : mStartTime(aStartTime) + , mEndTime(aEndTime) + , mStack(aStack) +{} + +ProfilerMarkerPayload::~ProfilerMarkerPayload() +{ + profiler_free_backtrace(mStack); +} + +void +ProfilerMarkerPayload::streamCommonProps(const char* aMarkerType, + SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + MOZ_ASSERT(aMarkerType); + aWriter.StringProperty("type", aMarkerType); + if (!mStartTime.IsNull()) { + aWriter.DoubleProperty("startTime", profiler_time(mStartTime)); + } + if (!mEndTime.IsNull()) { + aWriter.DoubleProperty("endTime", profiler_time(mEndTime)); + } + if (mStack) { + aWriter.StartObjectProperty("stack"); + { + mStack->StreamJSON(aWriter, aUniqueStacks); + } + aWriter.EndObject(); + } +} + +ProfilerMarkerTracing::ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData) + : mCategory(aCategory) + , mMetaData(aMetaData) +{ + if (aMetaData == TRACING_EVENT_BACKTRACE) { + SetStack(profiler_get_backtrace()); + } +} + +ProfilerMarkerTracing::ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData, + ProfilerBacktrace* aCause) + : mCategory(aCategory) + , mMetaData(aMetaData) +{ + if (aCause) { + SetStack(aCause); + } +} + +void +ProfilerMarkerTracing::StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + streamCommonProps("tracing", aWriter, aUniqueStacks); + + if (GetCategory()) { + aWriter.StringProperty("category", GetCategory()); + } + if (GetMetaData() != TRACING_DEFAULT) { + if (GetMetaData() == TRACING_INTERVAL_START) { + aWriter.StringProperty("interval", "start"); + } else if (GetMetaData() == TRACING_INTERVAL_END) { + aWriter.StringProperty("interval", "end"); + } + } +} + +#ifndef SPS_STANDALONE +GPUMarkerPayload::GPUMarkerPayload( + const mozilla::TimeStamp& aCpuTimeStart, + const mozilla::TimeStamp& aCpuTimeEnd, + uint64_t aGpuTimeStart, + uint64_t aGpuTimeEnd) + + : ProfilerMarkerPayload(aCpuTimeStart, aCpuTimeEnd) + , mCpuTimeStart(aCpuTimeStart) + , mCpuTimeEnd(aCpuTimeEnd) + , mGpuTimeStart(aGpuTimeStart) + , mGpuTimeEnd(aGpuTimeEnd) +{ } + +void +GPUMarkerPayload::StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + streamCommonProps("gpu_timer_query", aWriter, aUniqueStacks); + + aWriter.DoubleProperty("cpustart", profiler_time(mCpuTimeStart)); + aWriter.DoubleProperty("cpuend", profiler_time(mCpuTimeEnd)); + aWriter.IntProperty("gpustart", (int)mGpuTimeStart); + aWriter.IntProperty("gpuend", (int)mGpuTimeEnd); +} + +ProfilerMarkerImagePayload::ProfilerMarkerImagePayload(gfxASurface *aImg) + : mImg(aImg) +{ } + +void +ProfilerMarkerImagePayload::StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + streamCommonProps("innerHTML", aWriter, aUniqueStacks); + // TODO: Finish me + //aWriter.NameValue("innerHTML", "<img src=''/>"); +} + +IOMarkerPayload::IOMarkerPayload(const char* aSource, + const char* aFilename, + const mozilla::TimeStamp& aStartTime, + const mozilla::TimeStamp& aEndTime, + ProfilerBacktrace* aStack) + : ProfilerMarkerPayload(aStartTime, aEndTime, aStack), + mSource(aSource) +{ + mFilename = aFilename ? strdup(aFilename) : nullptr; + MOZ_ASSERT(aSource); +} + +IOMarkerPayload::~IOMarkerPayload(){ + free(mFilename); +} + +void +IOMarkerPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) +{ + streamCommonProps("io", aWriter, aUniqueStacks); + aWriter.StringProperty("source", mSource); + if (mFilename != nullptr) { + aWriter.StringProperty("filename", mFilename); + } +} + +void +ProfilerJSEventMarker(const char *event) +{ + PROFILER_MARKER(event); +} + +LayerTranslationPayload::LayerTranslationPayload(mozilla::layers::Layer* aLayer, + mozilla::gfx::Point aPoint) + : ProfilerMarkerPayload(mozilla::TimeStamp::Now(), mozilla::TimeStamp::Now(), nullptr) + , mLayer(aLayer) + , mPoint(aPoint) +{ +} + +void +LayerTranslationPayload::StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) +{ + const size_t bufferSize = 32; + char buffer[bufferSize]; + SprintfLiteral(buffer, "%p", mLayer); + + aWriter.StringProperty("layer", buffer); + aWriter.IntProperty("x", mPoint.x); + aWriter.IntProperty("y", mPoint.y); + aWriter.StringProperty("category", "LayerTranslation"); +} + +TouchDataPayload::TouchDataPayload(const mozilla::ScreenIntPoint& aPoint) + : ProfilerMarkerPayload(mozilla::TimeStamp::Now(), mozilla::TimeStamp::Now(), nullptr) +{ + mPoint = aPoint; +} + +void +TouchDataPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) +{ + aWriter.IntProperty("x", mPoint.x); + aWriter.IntProperty("y", mPoint.y); +} + +VsyncPayload::VsyncPayload(mozilla::TimeStamp aVsyncTimestamp) + : ProfilerMarkerPayload(aVsyncTimestamp, aVsyncTimestamp, nullptr) + , mVsyncTimestamp(aVsyncTimestamp) +{ +} + +void +VsyncPayload::StreamPayload(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) +{ + aWriter.DoubleProperty("vsync", profiler_time(mVsyncTimestamp)); + aWriter.StringProperty("category", "VsyncTimestamp"); +} +#endif diff --git a/tools/profiler/core/StackTop.cpp b/tools/profiler/core/StackTop.cpp new file mode 100644 index 000000000..1f7944e5e --- /dev/null +++ b/tools/profiler/core/StackTop.cpp @@ -0,0 +1,48 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef XP_MACOSX +#include <mach/task.h> +#include <mach/thread_act.h> +#include <pthread.h> +#elif XP_WIN +#include <windows.h> +#endif + +#include "StackTop.h" + +void *GetStackTop(void *guess) { +#if defined(XP_MACOSX) + pthread_t thread = pthread_self(); + return pthread_get_stackaddr_np(thread); +#elif defined(XP_WIN) +#if defined(_MSC_VER) && defined(_M_IX86) + // offset 0x18 from the FS segment register gives a pointer to + // the thread information block for the current thread + NT_TIB* pTib; + __asm { + MOV EAX, FS:[18h] + MOV pTib, EAX + } + return static_cast<void*>(pTib->StackBase); +#elif defined(__GNUC__) && defined(i386) + // offset 0x18 from the FS segment register gives a pointer to + // the thread information block for the current thread + NT_TIB* pTib; + asm ( "movl %%fs:0x18, %0\n" + : "=r" (pTib) + ); + return static_cast<void*>(pTib->StackBase); +#elif defined(_M_X64) || defined(__x86_64) + PNT_TIB64 pTib = reinterpret_cast<PNT_TIB64>(NtCurrentTeb()); + return reinterpret_cast<void*>(pTib->StackBase); +#else +#error Need a way to get the stack bounds on this platform (Windows) +#endif +#else + return guess; +#endif +} diff --git a/tools/profiler/core/StackTop.h b/tools/profiler/core/StackTop.h new file mode 100644 index 000000000..a933d10b4 --- /dev/null +++ b/tools/profiler/core/StackTop.h @@ -0,0 +1,10 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_STACK_TOP_H +#define MOZ_STACK_TOP_H +void *GetStackTop(void *guess); +#endif diff --git a/tools/profiler/core/SyncProfile.cpp b/tools/profiler/core/SyncProfile.cpp new file mode 100644 index 000000000..4c4742f34 --- /dev/null +++ b/tools/profiler/core/SyncProfile.cpp @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SyncProfile.h" + +SyncProfile::SyncProfile(ThreadInfo* aInfo, int aEntrySize) + : ThreadProfile(aInfo, new ProfileBuffer(aEntrySize)) + , mOwnerState(REFERENCED) +{ + MOZ_COUNT_CTOR(SyncProfile); +} + +SyncProfile::~SyncProfile() +{ + MOZ_COUNT_DTOR(SyncProfile); + + // SyncProfile owns the ThreadInfo; see NewSyncProfile. + ThreadInfo* info = GetThreadInfo(); + delete info; +} + +bool +SyncProfile::ShouldDestroy() +{ + ::MutexAutoLock lock(GetMutex()); + if (mOwnerState == OWNED) { + mOwnerState = OWNER_DESTROYING; + return true; + } + mOwnerState = ORPHANED; + return false; +} + +void +SyncProfile::EndUnwind() +{ + if (mOwnerState != ORPHANED) { + mOwnerState = OWNED; + } + // Save mOwnerState before we release the mutex + OwnerState ownerState = mOwnerState; + ThreadProfile::EndUnwind(); + if (ownerState == ORPHANED) { + delete this; + } +} + +// SyncProfiles' stacks are deduplicated in the context of the containing +// profile in which the backtrace is as a marker payload. +void +SyncProfile::StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) +{ + ThreadProfile::StreamSamplesAndMarkers(aWriter, /* aSinceTime = */ 0, aUniqueStacks); +} diff --git a/tools/profiler/core/SyncProfile.h b/tools/profiler/core/SyncProfile.h new file mode 100644 index 000000000..58f6b0d81 --- /dev/null +++ b/tools/profiler/core/SyncProfile.h @@ -0,0 +1,43 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __SYNCPROFILE_H +#define __SYNCPROFILE_H + +#include "ProfileEntry.h" +#include "ThreadProfile.h" + +class SyncProfile : public ThreadProfile +{ +public: + SyncProfile(ThreadInfo* aInfo, int aEntrySize); + ~SyncProfile(); + + // SyncProfiles' stacks are deduplicated in the context of the containing + // profile in which the backtrace is as a marker payload. + void StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks); + + virtual void EndUnwind(); + virtual SyncProfile* AsSyncProfile() { return this; } + +private: + friend class ProfilerBacktrace; + + enum OwnerState + { + REFERENCED, // ProfilerBacktrace has a pointer to this but doesn't own + OWNED, // ProfilerBacktrace is responsible for destroying this + OWNER_DESTROYING, // ProfilerBacktrace owns this and is destroying + ORPHANED // No owner, we must destroy ourselves + }; + + bool ShouldDestroy(); + + OwnerState mOwnerState; +}; + +#endif // __SYNCPROFILE_H + diff --git a/tools/profiler/core/ThreadInfo.cpp b/tools/profiler/core/ThreadInfo.cpp new file mode 100644 index 000000000..0e25d2330 --- /dev/null +++ b/tools/profiler/core/ThreadInfo.cpp @@ -0,0 +1,73 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ThreadInfo.h" +#include "ThreadProfile.h" + +#include "mozilla/DebugOnly.h" + +ThreadInfo::ThreadInfo(const char* aName, int aThreadId, + bool aIsMainThread, PseudoStack* aPseudoStack, + void* aStackTop) + : mName(strdup(aName)) + , mThreadId(aThreadId) + , mIsMainThread(aIsMainThread) + , mPseudoStack(aPseudoStack) + , mPlatformData(Sampler::AllocPlatformData(aThreadId)) + , mProfile(nullptr) + , mStackTop(aStackTop) + , mPendingDelete(false) +{ + MOZ_COUNT_CTOR(ThreadInfo); +#ifndef SPS_STANDALONE + mThread = NS_GetCurrentThread(); +#endif + + // We don't have to guess on mac +#ifdef XP_MACOSX + pthread_t self = pthread_self(); + mStackTop = pthread_get_stackaddr_np(self); +#endif +} + +ThreadInfo::~ThreadInfo() { + MOZ_COUNT_DTOR(ThreadInfo); + free(mName); + + if (mProfile) + delete mProfile; + + Sampler::FreePlatformData(mPlatformData); +} + +void +ThreadInfo::SetPendingDelete() +{ + mPendingDelete = true; + // We don't own the pseudostack so disconnect it. + mPseudoStack = nullptr; + if (mProfile) { + mProfile->SetPendingDelete(); + } +} + +bool +ThreadInfo::CanInvokeJS() const +{ +#ifdef SPS_STANDALONE + return false; +#else + nsIThread* thread = GetThread(); + if (!thread) { + MOZ_ASSERT(IsMainThread()); + return true; + } + bool result; + mozilla::DebugOnly<nsresult> rv = thread->GetCanInvokeJS(&result); + MOZ_ASSERT(NS_SUCCEEDED(rv)); + return result; +#endif +} diff --git a/tools/profiler/core/ThreadInfo.h b/tools/profiler/core/ThreadInfo.h new file mode 100644 index 000000000..1cb4e5dc8 --- /dev/null +++ b/tools/profiler/core/ThreadInfo.h @@ -0,0 +1,66 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_THREAD_INFO_H +#define MOZ_THREAD_INFO_H + +#include "platform.h" + +class ThreadInfo { + public: + ThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, PseudoStack* aPseudoStack, void* aStackTop); + + virtual ~ThreadInfo(); + + const char* Name() const { return mName; } + int ThreadId() const { return mThreadId; } + + bool IsMainThread() const { return mIsMainThread; } + PseudoStack* Stack() const { return mPseudoStack; } + + void SetProfile(ThreadProfile* aProfile) { mProfile = aProfile; } + ThreadProfile* Profile() const { return mProfile; } + + PlatformData* GetPlatformData() const { return mPlatformData; } + void* StackTop() const { return mStackTop; } + + virtual void SetPendingDelete(); + bool IsPendingDelete() const { return mPendingDelete; } + +#ifndef SPS_STANDALONE + /** + * May be null for the main thread if the profiler was started during startup + */ + nsIThread* GetThread() const { return mThread.get(); } + +#endif + + bool CanInvokeJS() const; + + private: + char* mName; + int mThreadId; + const bool mIsMainThread; + PseudoStack* mPseudoStack; + PlatformData* mPlatformData; + ThreadProfile* mProfile; + void* mStackTop; +#ifndef SPS_STANDALONE + nsCOMPtr<nsIThread> mThread; +#endif + bool mPendingDelete; +}; + +// Just like ThreadInfo, but owns a reference to the PseudoStack. +class StackOwningThreadInfo : public ThreadInfo { + public: + StackOwningThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, PseudoStack* aPseudoStack, void* aStackTop); + virtual ~StackOwningThreadInfo(); + + virtual void SetPendingDelete(); +}; + +#endif diff --git a/tools/profiler/core/ThreadProfile.cpp b/tools/profiler/core/ThreadProfile.cpp new file mode 100644 index 000000000..7452a7ee8 --- /dev/null +++ b/tools/profiler/core/ThreadProfile.cpp @@ -0,0 +1,260 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +ThreadProfile::ThreadProfile(ThreadInfo* aInfo, ProfileBuffer* aBuffer) + : mThreadInfo(aInfo) + , mBuffer(aBuffer) + , mPseudoStack(aInfo->Stack()) + , mMutex(OS::CreateMutex("ThreadProfile::mMutex")) + , mThreadId(int(aInfo->ThreadId())) + , mIsMainThread(aInfo->IsMainThread()) + , mPlatformData(aInfo->GetPlatformData()) + , mStackTop(aInfo->StackTop()) +#ifndef SPS_STANDALONE + , mRespInfo(this) +#endif +#ifdef XP_LINUX + , mRssMemory(0) + , mUssMemory(0) +#endif +{ + MOZ_COUNT_CTOR(ThreadProfile); + MOZ_ASSERT(aBuffer); + + // I don't know if we can assert this. But we should warn. + MOZ_ASSERT(aInfo->ThreadId() >= 0, "native thread ID is < 0"); + MOZ_ASSERT(aInfo->ThreadId() <= INT32_MAX, "native thread ID is > INT32_MAX"); +} + +ThreadProfile::~ThreadProfile() +{ + MOZ_COUNT_DTOR(ThreadProfile); +} + +void ThreadProfile::addTag(const ProfileEntry& aTag) +{ + mBuffer->addTag(aTag); +} + +void ThreadProfile::addStoredMarker(ProfilerMarker *aStoredMarker) { + mBuffer->addStoredMarker(aStoredMarker); +} + +void ThreadProfile::StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime) +{ + // mUniqueStacks may already be emplaced from FlushSamplesAndMarkers. + if (!mUniqueStacks.isSome()) { +#ifndef SPS_STANDALONE + mUniqueStacks.emplace(mPseudoStack->mContext); +#else + mUniqueStacks.emplace(nullptr); +#endif + } + + aWriter.Start(SpliceableJSONWriter::SingleLineStyle); + { + StreamSamplesAndMarkers(aWriter, aSinceTime, *mUniqueStacks); + + aWriter.StartObjectProperty("stackTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("prefix"); + schema.WriteField("frame"); + } + + aWriter.StartArrayProperty("data"); + { + mUniqueStacks->SpliceStackTableElements(aWriter); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("frameTable"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("location"); + schema.WriteField("implementation"); + schema.WriteField("optimizations"); + schema.WriteField("line"); + schema.WriteField("category"); + } + + aWriter.StartArrayProperty("data"); + { + mUniqueStacks->SpliceFrameTableElements(aWriter); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartArrayProperty("stringTable"); + { + mUniqueStacks->mUniqueStrings.SpliceStringTableElements(aWriter); + } + aWriter.EndArray(); + } + aWriter.End(); + + mUniqueStacks.reset(); +} + +void ThreadProfile::StreamSamplesAndMarkers(SpliceableJSONWriter& aWriter, double aSinceTime, + UniqueStacks& aUniqueStacks) +{ +#ifndef SPS_STANDALONE + // Thread meta data + if (XRE_GetProcessType() == GeckoProcessType_Plugin) { + // TODO Add the proper plugin name + aWriter.StringProperty("name", "Plugin"); + } else if (XRE_GetProcessType() == GeckoProcessType_Content) { + // This isn't going to really help once we have multiple content + // processes, but it'll do for now. + aWriter.StringProperty("name", "Content"); + } else { + aWriter.StringProperty("name", Name()); + } +#else + aWriter.StringProperty("name", Name()); +#endif + + aWriter.IntProperty("tid", static_cast<int>(mThreadId)); + + aWriter.StartObjectProperty("samples"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("stack"); + schema.WriteField("time"); + schema.WriteField("responsiveness"); + schema.WriteField("rss"); + schema.WriteField("uss"); + schema.WriteField("frameNumber"); + schema.WriteField("power"); + } + + aWriter.StartArrayProperty("data"); + { + if (mSavedStreamedSamples) { + // We would only have saved streamed samples during shutdown + // streaming, which cares about dumping the entire buffer, and thus + // should have passed in 0 for aSinceTime. + MOZ_ASSERT(aSinceTime == 0); + aWriter.Splice(mSavedStreamedSamples.get()); + mSavedStreamedSamples.reset(); + } + mBuffer->StreamSamplesToJSON(aWriter, mThreadId, aSinceTime, +#ifndef SPS_STANDALONE + mPseudoStack->mContext, +#else + nullptr, +#endif + aUniqueStacks); + } + aWriter.EndArray(); + } + aWriter.EndObject(); + + aWriter.StartObjectProperty("markers"); + { + { + JSONSchemaWriter schema(aWriter); + schema.WriteField("name"); + schema.WriteField("time"); + schema.WriteField("data"); + } + + aWriter.StartArrayProperty("data"); + { + if (mSavedStreamedMarkers) { + MOZ_ASSERT(aSinceTime == 0); + aWriter.Splice(mSavedStreamedMarkers.get()); + mSavedStreamedMarkers.reset(); + } + mBuffer->StreamMarkersToJSON(aWriter, mThreadId, aSinceTime, aUniqueStacks); + } + aWriter.EndArray(); + } + aWriter.EndObject(); +} + +void ThreadProfile::FlushSamplesAndMarkers() +{ + // This function is used to serialize the current buffer just before + // JSContext destruction. + MOZ_ASSERT(mPseudoStack->mContext); + + // Unlike StreamJSObject, do not surround the samples in brackets by calling + // aWriter.{Start,End}BareList. The result string will be a comma-separated + // list of JSON object literals that will prepended by StreamJSObject into + // an existing array. + // + // Note that the UniqueStacks instance is persisted so that the frame-index + // mapping is stable across JS shutdown. +#ifndef SPS_STANDALONE + mUniqueStacks.emplace(mPseudoStack->mContext); +#else + mUniqueStacks.emplace(nullptr); +#endif + + { + SpliceableChunkedJSONWriter b; + b.StartBareList(); + { + mBuffer->StreamSamplesToJSON(b, mThreadId, /* aSinceTime = */ 0, +#ifndef SPS_STANDALONE + mPseudoStack->mContext, +#else + nullptr, +#endif + *mUniqueStacks); + } + b.EndBareList(); + mSavedStreamedSamples = b.WriteFunc()->CopyData(); + } + + { + SpliceableChunkedJSONWriter b; + b.StartBareList(); + { + mBuffer->StreamMarkersToJSON(b, mThreadId, /* aSinceTime = */ 0, *mUniqueStacks); + } + b.EndBareList(); + mSavedStreamedMarkers = b.WriteFunc()->CopyData(); + } + + // Reset the buffer. Attempting to symbolicate JS samples after mContext has + // gone away will crash. + mBuffer->reset(); +} + +PseudoStack* ThreadProfile::GetPseudoStack() +{ + return mPseudoStack; +} + +void ThreadProfile::BeginUnwind() +{ + mMutex->Lock(); +} + +void ThreadProfile::EndUnwind() +{ + mMutex->Unlock(); +} + +::Mutex& ThreadProfile::GetMutex() +{ + return *mMutex.get(); +} + +void ThreadProfile::DuplicateLastSample() +{ + mBuffer->DuplicateLastSample(mThreadId); +} + diff --git a/tools/profiler/core/ThreadProfile.h b/tools/profiler/core/ThreadProfile.h new file mode 100644 index 000000000..ca2bbfe7a --- /dev/null +++ b/tools/profiler/core/ThreadProfile.h @@ -0,0 +1,107 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_THREAD_PROFILE_H +#define MOZ_THREAD_PROFILE_H + +#include "ProfileBuffer.h" +#include "ThreadInfo.h" + +class ThreadProfile +{ +public: + ThreadProfile(ThreadInfo* aThreadInfo, ProfileBuffer* aBuffer); + virtual ~ThreadProfile(); + void addTag(const ProfileEntry& aTag); + + /** + * Track a marker which has been inserted into the ThreadProfile. + * This marker can safely be deleted once the generation has + * expired. + */ + void addStoredMarker(ProfilerMarker *aStoredMarker); + PseudoStack* GetPseudoStack(); + ::Mutex& GetMutex(); + void StreamJSON(SpliceableJSONWriter& aWriter, double aSinceTime = 0); + + /** + * Call this method when the JS entries inside the buffer are about to + * become invalid, i.e., just before JS shutdown. + */ + void FlushSamplesAndMarkers(); + + void BeginUnwind(); + virtual void EndUnwind(); + virtual SyncProfile* AsSyncProfile() { return nullptr; } + + bool IsMainThread() const { return mIsMainThread; } + const char* Name() const { return mThreadInfo->Name(); } + int ThreadId() const { return mThreadId; } + + PlatformData* GetPlatformData() const { return mPlatformData; } + void* GetStackTop() const { return mStackTop; } + void DuplicateLastSample(); + + ThreadInfo* GetThreadInfo() const { return mThreadInfo; } +#ifndef SPS_STANDALONE + ThreadResponsiveness* GetThreadResponsiveness() { return &mRespInfo; } +#endif + + bool CanInvokeJS() const { return mThreadInfo->CanInvokeJS(); } + + void SetPendingDelete() + { + mPseudoStack = nullptr; + mPlatformData = nullptr; + } + + uint32_t bufferGeneration() const { + return mBuffer->mGeneration; + } + +protected: + void StreamSamplesAndMarkers(SpliceableJSONWriter& aWriter, double aSinceTime, + UniqueStacks& aUniqueStacks); + +private: + FRIEND_TEST(ThreadProfile, InsertOneTag); + FRIEND_TEST(ThreadProfile, InsertOneTagWithTinyBuffer); + FRIEND_TEST(ThreadProfile, InsertTagsNoWrap); + FRIEND_TEST(ThreadProfile, InsertTagsWrap); + FRIEND_TEST(ThreadProfile, MemoryMeasure); + ThreadInfo* mThreadInfo; + + const RefPtr<ProfileBuffer> mBuffer; + + // JS frames in the buffer may require a live JSRuntime to stream (e.g., + // stringifying JIT frames). In the case of JSRuntime destruction, + // FlushSamplesAndMarkers should be called to save them. These are spliced + // into the final stream. + mozilla::UniquePtr<char[]> mSavedStreamedSamples; + mozilla::UniquePtr<char[]> mSavedStreamedMarkers; + mozilla::Maybe<UniqueStacks> mUniqueStacks; + + PseudoStack* mPseudoStack; + mozilla::UniquePtr<Mutex> mMutex; + int mThreadId; + bool mIsMainThread; + PlatformData* mPlatformData; // Platform specific data. + void* const mStackTop; +#ifndef SPS_STANDALONE + ThreadResponsiveness mRespInfo; +#endif + + // Only Linux is using a signal sender, instead of stopping the thread, so we + // need some space to store the data which cannot be collected in the signal + // handler code. +#ifdef XP_LINUX +public: + int64_t mRssMemory; + int64_t mUssMemory; +#endif +}; + +#endif diff --git a/tools/profiler/core/platform-linux.cc b/tools/profiler/core/platform-linux.cc new file mode 100644 index 000000000..160873c9d --- /dev/null +++ b/tools/profiler/core/platform-linux.cc @@ -0,0 +1,715 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +/* +# vim: sw=2 +*/ +#include <stdio.h> +#include <math.h> + +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/prctl.h> // set name +#include <stdlib.h> +#include <sched.h> +#ifdef ANDROID +#include <android/log.h> +#else +#define __android_log_print(a, ...) +#endif +#include <ucontext.h> +// Ubuntu Dapper requires memory pages to be marked as +// executable. Otherwise, OS raises an exception when executing code +// in that page. +#include <sys/types.h> // mmap & munmap +#include <sys/mman.h> // mmap & munmap +#include <sys/stat.h> // open +#include <fcntl.h> // open +#include <unistd.h> // sysconf +#include <semaphore.h> +#ifdef __GLIBC__ +#include <execinfo.h> // backtrace, backtrace_symbols +#endif // def __GLIBC__ +#include <strings.h> // index +#include <errno.h> +#include <stdarg.h> +#include "prenv.h" +#include "platform.h" +#include "GeckoProfiler.h" +#include "mozilla/Mutex.h" +#include "mozilla/Atomics.h" +#include "mozilla/LinuxSignal.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/DebugOnly.h" +#include "ProfileEntry.h" +#include "nsThreadUtils.h" +#include "GeckoSampler.h" +#include "ThreadResponsiveness.h" + +#if defined(__ARM_EABI__) && defined(ANDROID) + // Should also work on other Android and ARM Linux, but not tested there yet. +# define USE_EHABI_STACKWALK +# include "EHABIStackWalk.h" +#elif defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux) +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" +#endif + +// Memory profile +#include "nsMemoryReporterManager.h" + +#include <string.h> +#include <list> + +#define SIGNAL_SAVE_PROFILE SIGUSR2 + +using namespace mozilla; + +#if defined(USE_LUL_STACKWALK) +// A singleton instance of the library. It is initialised at first +// use. Currently only the main thread can call Sampler::Start, so +// there is no need for a mechanism to ensure that it is only +// created once in a multi-thread-use situation. +lul::LUL* sLUL = nullptr; + +// This is the sLUL initialization routine. +static void sLUL_initialization_routine(void) +{ + MOZ_ASSERT(!sLUL); + MOZ_ASSERT(gettid() == getpid()); /* "this is the main thread" */ + sLUL = new lul::LUL(logging_sink_for_LUL); + // Read all the unwind info currently available. + read_procmaps(sLUL); +} +#endif + +/* static */ Thread::tid_t +Thread::GetCurrentId() +{ + return gettid(); +} + +#if !defined(ANDROID) +// Keep track of when any of our threads calls fork(), so we can +// temporarily disable signal delivery during the fork() call. Not +// doing so appears to cause a kind of race, in which signals keep +// getting delivered to the thread doing fork(), which keeps causing +// it to fail and be restarted; hence forward progress is delayed a +// great deal. A side effect of this is to permanently disable +// sampling in the child process. See bug 837390. + +// Unfortunately this is only doable on non-Android, since Bionic +// doesn't have pthread_atfork. + +// This records the current state at the time we paused it. +static bool was_paused = false; + +// In the parent, just before the fork, record the pausedness state, +// and then pause. +static void paf_prepare(void) { + if (Sampler::GetActiveSampler()) { + was_paused = Sampler::GetActiveSampler()->IsPaused(); + Sampler::GetActiveSampler()->SetPaused(true); + } else { + was_paused = false; + } +} + +// In the parent, just after the fork, return pausedness to the +// pre-fork state. +static void paf_parent(void) { + if (Sampler::GetActiveSampler()) + Sampler::GetActiveSampler()->SetPaused(was_paused); +} + +// Set up the fork handlers. +static void* setup_atfork() { + pthread_atfork(paf_prepare, paf_parent, NULL); + return NULL; +} +#endif /* !defined(ANDROID) */ + +struct SamplerRegistry { + static void AddActiveSampler(Sampler *sampler) { + ASSERT(!SamplerRegistry::sampler); + SamplerRegistry::sampler = sampler; + } + static void RemoveActiveSampler(Sampler *sampler) { + SamplerRegistry::sampler = NULL; + } + static Sampler *sampler; +}; + +Sampler *SamplerRegistry::sampler = NULL; + +static mozilla::Atomic<ThreadProfile*> sCurrentThreadProfile; +static sem_t sSignalHandlingDone; + +static void ProfilerSaveSignalHandler(int signal, siginfo_t* info, void* context) { + Sampler::GetActiveSampler()->RequestSave(); +} + +static void SetSampleContext(TickSample* sample, void* context) +{ + // Extracting the sample from the context is extremely machine dependent. + ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context); + mcontext_t& mcontext = ucontext->uc_mcontext; +#if V8_HOST_ARCH_IA32 + sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]); + sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]); + sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]); +#elif V8_HOST_ARCH_X64 + sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]); + sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]); + sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]); +#elif V8_HOST_ARCH_ARM +// An undefined macro evaluates to 0, so this applies to Android's Bionic also. +#if !defined(ANDROID) && (__GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 3)) + sample->pc = reinterpret_cast<Address>(mcontext.gregs[R15]); + sample->sp = reinterpret_cast<Address>(mcontext.gregs[R13]); + sample->fp = reinterpret_cast<Address>(mcontext.gregs[R11]); +#ifdef ENABLE_ARM_LR_SAVING + sample->lr = reinterpret_cast<Address>(mcontext.gregs[R14]); +#endif +#else + sample->pc = reinterpret_cast<Address>(mcontext.arm_pc); + sample->sp = reinterpret_cast<Address>(mcontext.arm_sp); + sample->fp = reinterpret_cast<Address>(mcontext.arm_fp); +#ifdef ENABLE_ARM_LR_SAVING + sample->lr = reinterpret_cast<Address>(mcontext.arm_lr); +#endif +#endif +#elif V8_HOST_ARCH_MIPS + // Implement this on MIPS. + UNIMPLEMENTED(); +#endif +} + +#ifdef ANDROID +#define V8_HOST_ARCH_ARM 1 +#define SYS_gettid __NR_gettid +#define SYS_tgkill __NR_tgkill +#else +#define V8_HOST_ARCH_X64 1 +#endif + +namespace { + +void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) { + // Avoid TSan warning about clobbering errno. + int savedErrno = errno; + + if (!Sampler::GetActiveSampler()) { + sem_post(&sSignalHandlingDone); + errno = savedErrno; + return; + } + + TickSample sample_obj; + TickSample* sample = &sample_obj; + sample->context = context; + + // If profiling, we extract the current pc and sp. + if (Sampler::GetActiveSampler()->IsProfiling()) { + SetSampleContext(sample, context); + } + sample->threadProfile = sCurrentThreadProfile; + sample->timestamp = mozilla::TimeStamp::Now(); + sample->rssMemory = sample->threadProfile->mRssMemory; + sample->ussMemory = sample->threadProfile->mUssMemory; + + Sampler::GetActiveSampler()->Tick(sample); + + sCurrentThreadProfile = NULL; + sem_post(&sSignalHandlingDone); + errno = savedErrno; +} + +} // namespace + +static void ProfilerSignalThread(ThreadProfile *profile, + bool isFirstProfiledThread) +{ + if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) { + profile->mRssMemory = nsMemoryReporterManager::ResidentFast(); + profile->mUssMemory = nsMemoryReporterManager::ResidentUnique(); + } else { + profile->mRssMemory = 0; + profile->mUssMemory = 0; + } +} + +int tgkill(pid_t tgid, pid_t tid, int signalno) { + return syscall(SYS_tgkill, tgid, tid, signalno); +} + +class PlatformData { + public: + PlatformData() + { + MOZ_COUNT_CTOR(PlatformData); + } + + ~PlatformData() + { + MOZ_COUNT_DTOR(PlatformData); + } +}; + +/* static */ PlatformData* +Sampler::AllocPlatformData(int aThreadId) +{ + return new PlatformData; +} + +/* static */ void +Sampler::FreePlatformData(PlatformData* aData) +{ + delete aData; +} + +static void* SignalSender(void* arg) { + // Taken from platform_thread_posix.cc + prctl(PR_SET_NAME, "SamplerThread", 0, 0, 0); + + int vm_tgid_ = getpid(); + DebugOnly<int> my_tid = gettid(); + + unsigned int nSignalsSent = 0; + + TimeDuration lastSleepOverhead = 0; + TimeStamp sampleStart = TimeStamp::Now(); + while (SamplerRegistry::sampler->IsActive()) { + + SamplerRegistry::sampler->HandleSaveRequest(); + SamplerRegistry::sampler->DeleteExpiredMarkers(); + + if (!SamplerRegistry::sampler->IsPaused()) { + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + std::vector<ThreadInfo*> threads = + SamplerRegistry::sampler->GetRegisteredThreads(); + + bool isFirstProfiledThread = true; + for (uint32_t i = 0; i < threads.size(); i++) { + ThreadInfo* info = threads[i]; + + // This will be null if we're not interested in profiling this thread. + if (!info->Profile() || info->IsPendingDelete()) + continue; + + PseudoStack::SleepState sleeping = info->Stack()->observeSleeping(); + if (sleeping == PseudoStack::SLEEPING_AGAIN) { + info->Profile()->DuplicateLastSample(); + continue; + } + + info->Profile()->GetThreadResponsiveness()->Update(); + + // We use sCurrentThreadProfile the ThreadProfile for the + // thread we're profiling to the signal handler + sCurrentThreadProfile = info->Profile(); + + int threadId = info->ThreadId(); + MOZ_ASSERT(threadId != my_tid); + + // Profile from the signal sender for information which is not signal + // safe, and will have low variation between the emission of the signal + // and the signal handler catch. + ProfilerSignalThread(sCurrentThreadProfile, isFirstProfiledThread); + + // Profile from the signal handler for information which is signal safe + // and needs to be precise too, such as the stack of the interrupted + // thread. + if (tgkill(vm_tgid_, threadId, SIGPROF) != 0) { + printf_stderr("profiler failed to signal tid=%d\n", threadId); +#ifdef DEBUG + abort(); +#else + continue; +#endif + } + + // Wait for the signal handler to run before moving on to the next one + sem_wait(&sSignalHandlingDone); + isFirstProfiledThread = false; + + // The LUL unwind object accumulates frame statistics. + // Periodically we should poke it to give it a chance to print + // those statistics. This involves doing I/O (fprintf, + // __android_log_print, etc) and so can't safely be done from + // the unwinder threads, which is why it is done here. + if ((++nSignalsSent & 0xF) == 0) { +# if defined(USE_LUL_STACKWALK) + sLUL->MaybeShowStats(); +# endif + } + } + } + + TimeStamp targetSleepEndTime = sampleStart + TimeDuration::FromMicroseconds(SamplerRegistry::sampler->interval() * 1000); + TimeStamp beforeSleep = TimeStamp::Now(); + TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep; + double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds()); + OS::SleepMicro(sleepTime); + sampleStart = TimeStamp::Now(); + lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime)); + } + return 0; +} + +Sampler::Sampler(double interval, bool profiling, int entrySize) + : interval_(interval), + profiling_(profiling), + paused_(false), + active_(false), + entrySize_(entrySize) { + MOZ_COUNT_CTOR(Sampler); +} + +Sampler::~Sampler() { + MOZ_COUNT_DTOR(Sampler); + ASSERT(!signal_sender_launched_); +} + + +void Sampler::Start() { + LOG("Sampler started"); + +#if defined(USE_EHABI_STACKWALK) + mozilla::EHABIStackWalkInit(); +#elif defined(USE_LUL_STACKWALK) + // NOTE: this isn't thread-safe. But we expect Sampler::Start to be + // called only from the main thread, so this is OK in general. + if (!sLUL) { + sLUL_initialization_routine(); + } +#endif + + SamplerRegistry::AddActiveSampler(this); + + // Initialize signal handler communication + sCurrentThreadProfile = NULL; + if (sem_init(&sSignalHandlingDone, /* pshared: */ 0, /* value: */ 0) != 0) { + LOG("Error initializing semaphore"); + return; + } + + // Request profiling signals. + LOG("Request signal"); + struct sigaction sa; + sa.sa_sigaction = MOZ_SIGNAL_TRAMPOLINE(ProfilerSignalHandler); + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGPROF, &sa, &old_sigprof_signal_handler_) != 0) { + LOG("Error installing signal"); + return; + } + + // Request save profile signals + struct sigaction sa2; + sa2.sa_sigaction = ProfilerSaveSignalHandler; + sigemptyset(&sa2.sa_mask); + sa2.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGNAL_SAVE_PROFILE, &sa2, &old_sigsave_signal_handler_) != 0) { + LOG("Error installing start signal"); + return; + } + LOG("Signal installed"); + signal_handler_installed_ = true; + +#if defined(USE_LUL_STACKWALK) + // Switch into unwind mode. After this point, we can't add or + // remove any unwind info to/from this LUL instance. The only thing + // we can do with it is Unwind() calls. + sLUL->EnableUnwinding(); + + // Has a test been requested? + if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) { + int nTests = 0, nTestsPassed = 0; + RunLulUnitTests(&nTests, &nTestsPassed, sLUL); + } +#endif + + // Start a thread that sends SIGPROF signal to VM thread. + // Sending the signal ourselves instead of relying on itimer provides + // much better accuracy. + SetActive(true); + if (pthread_create( + &signal_sender_thread_, NULL, SignalSender, NULL) == 0) { + signal_sender_launched_ = true; + } + LOG("Profiler thread started"); +} + + +void Sampler::Stop() { + SetActive(false); + + // Wait for signal sender termination (it will exit after setting + // active_ to false). + if (signal_sender_launched_) { + pthread_join(signal_sender_thread_, NULL); + signal_sender_launched_ = false; + } + + SamplerRegistry::RemoveActiveSampler(this); + + // Restore old signal handler + if (signal_handler_installed_) { + sigaction(SIGNAL_SAVE_PROFILE, &old_sigsave_signal_handler_, 0); + sigaction(SIGPROF, &old_sigprof_signal_handler_, 0); + signal_handler_installed_ = false; + } +} + +bool Sampler::RegisterCurrentThread(const char* aName, + PseudoStack* aPseudoStack, + bool aIsMainThread, void* stackTop) +{ + if (!Sampler::sRegisteredThreadsMutex) + return false; + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = gettid(); + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + // Thread already registered. This means the first unregister will be + // too early. + ASSERT(false); + return false; + } + } + + set_tls_stack_top(stackTop); + + ThreadInfo* info = new StackOwningThreadInfo(aName, id, + aIsMainThread, aPseudoStack, stackTop); + + if (sActiveSampler) { + sActiveSampler->RegisterThread(info); + } + + sRegisteredThreads->push_back(info); + + return true; +} + +void Sampler::UnregisterCurrentThread() +{ + if (!Sampler::sRegisteredThreadsMutex) + return; + + tlsStackTop.set(nullptr); + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = gettid(); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + if (profiler_is_active()) { + // We still want to show the results of this thread if you + // save the profile shortly after a thread is terminated. + // For now we will defer the delete to profile stop. + info->SetPendingDelete(); + break; + } else { + delete info; + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); + break; + } + } + } +} + +#ifdef ANDROID +static struct sigaction old_sigstart_signal_handler; +const int SIGSTART = SIGUSR2; + +static void freeArray(const char** array, int size) { + for (int i = 0; i < size; i++) { + free((void*) array[i]); + } +} + +static uint32_t readCSVArray(char* csvList, const char** buffer) { + uint32_t count; + char* savePtr; + int newlinePos = strlen(csvList) - 1; + if (csvList[newlinePos] == '\n') { + csvList[newlinePos] = '\0'; + } + + char* item = strtok_r(csvList, ",", &savePtr); + for (count = 0; item; item = strtok_r(NULL, ",", &savePtr)) { + int length = strlen(item) + 1; // Include \0 + char* newBuf = (char*) malloc(sizeof(char) * length); + buffer[count] = newBuf; + strncpy(newBuf, item, length); + count++; + } + + return count; +} + +// Currently support only the env variables +// reported in read_profiler_env +static void ReadProfilerVars(const char* fileName, const char** features, + uint32_t* featureCount, const char** threadNames, uint32_t* threadCount) { + FILE* file = fopen(fileName, "r"); + const int bufferSize = 1024; + char line[bufferSize]; + char* feature; + char* value; + char* savePtr; + + if (file) { + while (fgets(line, bufferSize, file) != NULL) { + feature = strtok_r(line, "=", &savePtr); + value = strtok_r(NULL, "", &savePtr); + + if (strncmp(feature, PROFILER_INTERVAL, bufferSize) == 0) { + set_profiler_interval(value); + } else if (strncmp(feature, PROFILER_ENTRIES, bufferSize) == 0) { + set_profiler_entries(value); + } else if (strncmp(feature, PROFILER_STACK, bufferSize) == 0) { + set_profiler_scan(value); + } else if (strncmp(feature, PROFILER_FEATURES, bufferSize) == 0) { + *featureCount = readCSVArray(value, features); + } else if (strncmp(feature, "threads", bufferSize) == 0) { + *threadCount = readCSVArray(value, threadNames); + } + } + + fclose(file); + } +} + +static void DoStartTask() { + uint32_t featureCount = 0; + uint32_t threadCount = 0; + + // Just allocate 10 features for now + // FIXME: these don't really point to const chars* + // So we free them later, but we don't want to change the const char** + // declaration in profiler_start. Annoying but ok for now. + const char* threadNames[10]; + const char* features[10]; + const char* profilerConfigFile = "/data/local/tmp/profiler.options"; + + ReadProfilerVars(profilerConfigFile, features, &featureCount, threadNames, &threadCount); + MOZ_ASSERT(featureCount < 10); + MOZ_ASSERT(threadCount < 10); + + profiler_start(PROFILE_DEFAULT_ENTRY, 1, + features, featureCount, + threadNames, threadCount); + + freeArray(threadNames, threadCount); + freeArray(features, featureCount); +} + +static void StartSignalHandler(int signal, siginfo_t* info, void* context) { + class StartTask : public Runnable { + public: + NS_IMETHOD Run() override { + DoStartTask(); + return NS_OK; + } + }; + // XXX: technically NS_DispatchToMainThread is NOT async signal safe. We risk + // nasty things like deadlocks, but the probability is very low and we + // typically only do this once so it tends to be ok. See bug 909403. + NS_DispatchToMainThread(new StartTask()); +} + +void OS::Startup() +{ + LOG("Registering start signal"); + struct sigaction sa; + sa.sa_sigaction = StartSignalHandler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigaction(SIGSTART, &sa, &old_sigstart_signal_handler) != 0) { + LOG("Error installing signal"); + } +} + +#else + +void OS::Startup() { + // Set up the fork handlers. + setup_atfork(); +} + +#endif + + + +void TickSample::PopulateContext(void* aContext) +{ + MOZ_ASSERT(aContext); + ucontext_t* pContext = reinterpret_cast<ucontext_t*>(aContext); + if (!getcontext(pContext)) { + context = pContext; + SetSampleContext(this, aContext); + } +} + +void OS::SleepMicro(int microseconds) +{ + if (MOZ_UNLIKELY(microseconds >= 1000000)) { + // Use usleep for larger intervals, because the nanosleep + // code below only supports intervals < 1 second. + MOZ_ALWAYS_TRUE(!::usleep(microseconds)); + return; + } + + struct timespec ts; + ts.tv_sec = 0; + ts.tv_nsec = microseconds * 1000UL; + + int rv = ::nanosleep(&ts, &ts); + + while (rv != 0 && errno == EINTR) { + // Keep waiting in case of interrupt. + // nanosleep puts the remaining time back into ts. + rv = ::nanosleep(&ts, &ts); + } + + MOZ_ASSERT(!rv, "nanosleep call failed"); +} diff --git a/tools/profiler/core/platform-macos.cc b/tools/profiler/core/platform-macos.cc new file mode 100644 index 000000000..9a98d1a26 --- /dev/null +++ b/tools/profiler/core/platform-macos.cc @@ -0,0 +1,469 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <dlfcn.h> +#include <unistd.h> +#include <sys/mman.h> +#include <mach/mach_init.h> +#include <mach-o/dyld.h> +#include <mach-o/getsect.h> + +#include <AvailabilityMacros.h> + +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <libkern/OSAtomic.h> +#include <mach/mach.h> +#include <mach/semaphore.h> +#include <mach/task.h> +#include <mach/vm_statistics.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/sysctl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <math.h> + +#ifndef SPS_STANDALONE +#include "ThreadResponsiveness.h" +#include "nsThreadUtils.h" + +// Memory profile +#include "nsMemoryReporterManager.h" +#endif + +#include "platform.h" +#include "GeckoSampler.h" +#include "mozilla/TimeStamp.h" + +using mozilla::TimeStamp; +using mozilla::TimeDuration; + +// this port is based off of v8 svn revision 9837 + +// XXX: this is a very stubbed out implementation +// that only supports a single Sampler +struct SamplerRegistry { + static void AddActiveSampler(Sampler *sampler) { + ASSERT(!SamplerRegistry::sampler); + SamplerRegistry::sampler = sampler; + } + static void RemoveActiveSampler(Sampler *sampler) { + SamplerRegistry::sampler = NULL; + } + static Sampler *sampler; +}; + +Sampler *SamplerRegistry::sampler = NULL; + +#ifdef DEBUG +// 0 is never a valid thread id on MacOSX since a pthread_t is a pointer. +static const pthread_t kNoThread = (pthread_t) 0; +#endif + +void OS::Startup() { +} + +void OS::Sleep(int milliseconds) { + usleep(1000 * milliseconds); +} + +void OS::SleepMicro(int microseconds) { + usleep(microseconds); +} + +Thread::Thread(const char* name) + : stack_size_(0) { + set_name(name); +} + + +Thread::~Thread() { +} + + +static void SetThreadName(const char* name) { + // pthread_setname_np is only available in 10.6 or later, so test + // for it at runtime. + int (*dynamic_pthread_setname_np)(const char*); + *reinterpret_cast<void**>(&dynamic_pthread_setname_np) = + dlsym(RTLD_DEFAULT, "pthread_setname_np"); + if (!dynamic_pthread_setname_np) + return; + + // Mac OS X does not expose the length limit of the name, so hardcode it. + static const int kMaxNameLength = 63; + USE(kMaxNameLength); + ASSERT(Thread::kMaxThreadNameLength <= kMaxNameLength); + dynamic_pthread_setname_np(name); +} + + +static void* ThreadEntry(void* arg) { + Thread* thread = reinterpret_cast<Thread*>(arg); + + thread->thread_ = pthread_self(); + SetThreadName(thread->name()); + ASSERT(thread->thread_ != kNoThread); + thread->Run(); + return NULL; +} + + +void Thread::set_name(const char* name) { + strncpy(name_, name, sizeof(name_)); + name_[sizeof(name_) - 1] = '\0'; +} + + +void Thread::Start() { + pthread_attr_t* attr_ptr = NULL; + pthread_attr_t attr; + if (stack_size_ > 0) { + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, static_cast<size_t>(stack_size_)); + attr_ptr = &attr; + } + pthread_create(&thread_, attr_ptr, ThreadEntry, this); + ASSERT(thread_ != kNoThread); +} + +void Thread::Join() { + pthread_join(thread_, NULL); +} + +class PlatformData { + public: + PlatformData() : profiled_thread_(mach_thread_self()) + { + profiled_pthread_ = pthread_from_mach_thread_np(profiled_thread_); + } + + ~PlatformData() { + // Deallocate Mach port for thread. + mach_port_deallocate(mach_task_self(), profiled_thread_); + } + + thread_act_t profiled_thread() { return profiled_thread_; } + pthread_t profiled_pthread() { return profiled_pthread_; } + + private: + // Note: for profiled_thread_ Mach primitives are used instead of PThread's + // because the latter doesn't provide thread manipulation primitives required. + // For details, consult "Mac OS X Internals" book, Section 7.3. + thread_act_t profiled_thread_; + // we also store the pthread because Mach threads have no concept of stack + // and we want to be able to get the stack size when we need to unwind the + // stack using frame pointers. + pthread_t profiled_pthread_; +}; + +/* static */ PlatformData* +Sampler::AllocPlatformData(int aThreadId) +{ + return new PlatformData; +} + +/* static */ void +Sampler::FreePlatformData(PlatformData* aData) +{ + delete aData; +} + +class SamplerThread : public Thread { + public: + explicit SamplerThread(double interval) + : Thread("SamplerThread") + , intervalMicro_(floor(interval * 1000 + 0.5)) + { + if (intervalMicro_ <= 0) { + intervalMicro_ = 1; + } + } + + static void AddActiveSampler(Sampler* sampler) { + SamplerRegistry::AddActiveSampler(sampler); + if (instance_ == NULL) { + instance_ = new SamplerThread(sampler->interval()); + instance_->Start(); + } + } + + static void RemoveActiveSampler(Sampler* sampler) { + instance_->Join(); + //XXX: unlike v8 we need to remove the active sampler after doing the Join + // because we drop the sampler immediately + SamplerRegistry::RemoveActiveSampler(sampler); + delete instance_; + instance_ = NULL; + } + + // Implement Thread::Run(). + virtual void Run() { + TimeDuration lastSleepOverhead = 0; + TimeStamp sampleStart = TimeStamp::Now(); + while (SamplerRegistry::sampler->IsActive()) { + SamplerRegistry::sampler->DeleteExpiredMarkers(); + if (!SamplerRegistry::sampler->IsPaused()) { + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + std::vector<ThreadInfo*> threads = + SamplerRegistry::sampler->GetRegisteredThreads(); + bool isFirstProfiledThread = true; + for (uint32_t i = 0; i < threads.size(); i++) { + ThreadInfo* info = threads[i]; + + // This will be null if we're not interested in profiling this thread. + if (!info->Profile() || info->IsPendingDelete()) + continue; + + PseudoStack::SleepState sleeping = info->Stack()->observeSleeping(); + if (sleeping == PseudoStack::SLEEPING_AGAIN) { + info->Profile()->DuplicateLastSample(); + continue; + } + +#ifndef SPS_STANDALONE + info->Profile()->GetThreadResponsiveness()->Update(); +#endif + + ThreadProfile* thread_profile = info->Profile(); + + SampleContext(SamplerRegistry::sampler, thread_profile, + isFirstProfiledThread); + isFirstProfiledThread = false; + } + } + + TimeStamp targetSleepEndTime = sampleStart + TimeDuration::FromMicroseconds(intervalMicro_); + TimeStamp beforeSleep = TimeStamp::Now(); + TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep; + double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds()); + OS::SleepMicro(sleepTime); + sampleStart = TimeStamp::Now(); + lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime)); + } + } + + void SampleContext(Sampler* sampler, ThreadProfile* thread_profile, + bool isFirstProfiledThread) + { + thread_act_t profiled_thread = + thread_profile->GetPlatformData()->profiled_thread(); + + TickSample sample_obj; + TickSample* sample = &sample_obj; + + // Unique Set Size is not supported on Mac. + sample->ussMemory = 0; + sample->rssMemory = 0; + +#ifndef SPS_STANDALONE + if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) { + sample->rssMemory = nsMemoryReporterManager::ResidentFast(); + } +#endif + + // We're using thread_suspend on OS X because pthread_kill (which is what + // we're using on Linux) has less consistent performance and causes + // strange crashes, see bug 1166778 and bug 1166808. + + if (KERN_SUCCESS != thread_suspend(profiled_thread)) return; + +#if V8_HOST_ARCH_X64 + thread_state_flavor_t flavor = x86_THREAD_STATE64; + x86_thread_state64_t state; + mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT; +#if __DARWIN_UNIX03 +#define REGISTER_FIELD(name) __r ## name +#else +#define REGISTER_FIELD(name) r ## name +#endif // __DARWIN_UNIX03 +#elif V8_HOST_ARCH_IA32 + thread_state_flavor_t flavor = i386_THREAD_STATE; + i386_thread_state_t state; + mach_msg_type_number_t count = i386_THREAD_STATE_COUNT; +#if __DARWIN_UNIX03 +#define REGISTER_FIELD(name) __e ## name +#else +#define REGISTER_FIELD(name) e ## name +#endif // __DARWIN_UNIX03 +#else +#error Unsupported Mac OS X host architecture. +#endif // V8_HOST_ARCH + + if (thread_get_state(profiled_thread, + flavor, + reinterpret_cast<natural_t*>(&state), + &count) == KERN_SUCCESS) { + sample->pc = reinterpret_cast<Address>(state.REGISTER_FIELD(ip)); + sample->sp = reinterpret_cast<Address>(state.REGISTER_FIELD(sp)); + sample->fp = reinterpret_cast<Address>(state.REGISTER_FIELD(bp)); + sample->timestamp = mozilla::TimeStamp::Now(); + sample->threadProfile = thread_profile; + + sampler->Tick(sample); + } + thread_resume(profiled_thread); + } + + int intervalMicro_; + //RuntimeProfilerRateLimiter rate_limiter_; + + static SamplerThread* instance_; + + DISALLOW_COPY_AND_ASSIGN(SamplerThread); +}; + +#undef REGISTER_FIELD + +SamplerThread* SamplerThread::instance_ = NULL; + +Sampler::Sampler(double interval, bool profiling, int entrySize) + : // isolate_(isolate), + interval_(interval), + profiling_(profiling), + paused_(false), + active_(false), + entrySize_(entrySize) /*, + samples_taken_(0)*/ { +} + + +Sampler::~Sampler() { + ASSERT(!IsActive()); +} + + +void Sampler::Start() { + ASSERT(!IsActive()); + SetActive(true); + SamplerThread::AddActiveSampler(this); +} + + +void Sampler::Stop() { + ASSERT(IsActive()); + SetActive(false); + SamplerThread::RemoveActiveSampler(this); +} + +pthread_t +Sampler::GetProfiledThread(PlatformData* aData) +{ + return aData->profiled_pthread(); +} + +#include <sys/syscall.h> +pid_t gettid() +{ + return (pid_t) syscall(SYS_thread_selfid); +} + +/* static */ Thread::tid_t +Thread::GetCurrentId() +{ + return gettid(); +} + +bool Sampler::RegisterCurrentThread(const char* aName, + PseudoStack* aPseudoStack, + bool aIsMainThread, void* stackTop) +{ + if (!Sampler::sRegisteredThreadsMutex) + return false; + + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = gettid(); + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + // Thread already registered. This means the first unregister will be + // too early. + ASSERT(false); + return false; + } + } + + set_tls_stack_top(stackTop); + + ThreadInfo* info = new StackOwningThreadInfo(aName, id, + aIsMainThread, aPseudoStack, stackTop); + + if (sActiveSampler) { + sActiveSampler->RegisterThread(info); + } + + sRegisteredThreads->push_back(info); + + return true; +} + +void Sampler::UnregisterCurrentThread() +{ + if (!Sampler::sRegisteredThreadsMutex) + return; + + tlsStackTop.set(nullptr); + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = gettid(); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + if (profiler_is_active()) { + // We still want to show the results of this thread if you + // save the profile shortly after a thread is terminated. + // For now we will defer the delete to profile stop. + info->SetPendingDelete(); + break; + } else { + delete info; + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); + break; + } + } + } +} + +void TickSample::PopulateContext(void* aContext) +{ + // Note that this asm changes if PopulateContext's parameter list is altered +#if defined(SPS_PLAT_amd64_darwin) + asm ( + // Compute caller's %rsp by adding to %rbp: + // 8 bytes for previous %rbp, 8 bytes for return address + "leaq 0x10(%%rbp), %0\n\t" + // Dereference %rbp to get previous %rbp + "movq (%%rbp), %1\n\t" + : + "=r"(sp), + "=r"(fp) + ); +#elif defined(SPS_PLAT_x86_darwin) + asm ( + // Compute caller's %esp by adding to %ebp: + // 4 bytes for aContext + 4 bytes for return address + + // 4 bytes for previous %ebp + "leal 0xc(%%ebp), %0\n\t" + // Dereference %ebp to get previous %ebp + "movl (%%ebp), %1\n\t" + : + "=r"(sp), + "=r"(fp) + ); +#else +# error "Unsupported architecture" +#endif + pc = reinterpret_cast<Address>(__builtin_extract_return_addr( + __builtin_return_address(0))); +} + diff --git a/tools/profiler/core/platform-win32.cc b/tools/profiler/core/platform-win32.cc new file mode 100644 index 000000000..74b311f28 --- /dev/null +++ b/tools/profiler/core/platform-win32.cc @@ -0,0 +1,431 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#include <windows.h> +#include <mmsystem.h> +#include <process.h> +#include "platform.h" +#include "GeckoSampler.h" +#include "ThreadResponsiveness.h" +#include "ProfileEntry.h" + +// Memory profile +#include "nsMemoryReporterManager.h" + +#include "mozilla/StackWalk_windows.h" + + +class PlatformData { + public: + // Get a handle to the calling thread. This is the thread that we are + // going to profile. We need to make a copy of the handle because we are + // going to use it in the sampler thread. Using GetThreadHandle() will + // not work in this case. We're using OpenThread because DuplicateHandle + // for some reason doesn't work in Chrome's sandbox. + PlatformData(int aThreadId) : profiled_thread_(OpenThread(THREAD_GET_CONTEXT | + THREAD_SUSPEND_RESUME | + THREAD_QUERY_INFORMATION, + false, + aThreadId)) {} + + ~PlatformData() { + if (profiled_thread_ != NULL) { + CloseHandle(profiled_thread_); + profiled_thread_ = NULL; + } + } + + HANDLE profiled_thread() { return profiled_thread_; } + + private: + HANDLE profiled_thread_; +}; + +/* static */ PlatformData* +Sampler::AllocPlatformData(int aThreadId) +{ + return new PlatformData(aThreadId); +} + +/* static */ void +Sampler::FreePlatformData(PlatformData* aData) +{ + delete aData; +} + +uintptr_t +Sampler::GetThreadHandle(PlatformData* aData) +{ + return (uintptr_t) aData->profiled_thread(); +} + +class SamplerThread : public Thread { + public: + SamplerThread(double interval, Sampler* sampler) + : Thread("SamplerThread") + , sampler_(sampler) + , interval_(interval) + { + interval_ = floor(interval + 0.5); + if (interval_ <= 0) { + interval_ = 1; + } + } + + static void StartSampler(Sampler* sampler) { + if (instance_ == NULL) { + instance_ = new SamplerThread(sampler->interval(), sampler); + instance_->Start(); + } else { + ASSERT(instance_->interval_ == sampler->interval()); + } + } + + static void StopSampler() { + instance_->Join(); + delete instance_; + instance_ = NULL; + } + + // Implement Thread::Run(). + virtual void Run() { + + // By default we'll not adjust the timer resolution which tends to be around + // 16ms. However, if the requested interval is sufficiently low we'll try to + // adjust the resolution to match. + if (interval_ < 10) + ::timeBeginPeriod(interval_); + + while (sampler_->IsActive()) { + sampler_->DeleteExpiredMarkers(); + + if (!sampler_->IsPaused()) { + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + std::vector<ThreadInfo*> threads = + sampler_->GetRegisteredThreads(); + bool isFirstProfiledThread = true; + for (uint32_t i = 0; i < threads.size(); i++) { + ThreadInfo* info = threads[i]; + + // This will be null if we're not interested in profiling this thread. + if (!info->Profile() || info->IsPendingDelete()) + continue; + + PseudoStack::SleepState sleeping = info->Stack()->observeSleeping(); + if (sleeping == PseudoStack::SLEEPING_AGAIN) { + info->Profile()->DuplicateLastSample(); + continue; + } + + info->Profile()->GetThreadResponsiveness()->Update(); + + ThreadProfile* thread_profile = info->Profile(); + + SampleContext(sampler_, thread_profile, isFirstProfiledThread); + isFirstProfiledThread = false; + } + } + OS::Sleep(interval_); + } + + // disable any timer resolution changes we've made + if (interval_ < 10) + ::timeEndPeriod(interval_); + } + + void SampleContext(Sampler* sampler, ThreadProfile* thread_profile, + bool isFirstProfiledThread) + { + uintptr_t thread = Sampler::GetThreadHandle( + thread_profile->GetPlatformData()); + HANDLE profiled_thread = reinterpret_cast<HANDLE>(thread); + if (profiled_thread == NULL) + return; + + // Context used for sampling the register state of the profiled thread. + CONTEXT context; + memset(&context, 0, sizeof(context)); + + TickSample sample_obj; + TickSample* sample = &sample_obj; + + // Grab the timestamp before pausing the thread, to avoid deadlocks. + sample->timestamp = mozilla::TimeStamp::Now(); + sample->threadProfile = thread_profile; + + if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) { + sample->rssMemory = nsMemoryReporterManager::ResidentFast(); + } else { + sample->rssMemory = 0; + } + + // Unique Set Size is not supported on Windows. + sample->ussMemory = 0; + + static const DWORD kSuspendFailed = static_cast<DWORD>(-1); + if (SuspendThread(profiled_thread) == kSuspendFailed) + return; + + // SuspendThread is asynchronous, so the thread may still be running. + // Call GetThreadContext first to ensure the thread is really suspended. + // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743. + + // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in + // RtlVirtualUnwind (see bug 1120126) so we set all the flags. +#if V8_HOST_ARCH_X64 + context.ContextFlags = CONTEXT_FULL; +#else + context.ContextFlags = CONTEXT_CONTROL; +#endif + if (!GetThreadContext(profiled_thread, &context)) { + ResumeThread(profiled_thread); + return; + } + + // Threads that may invoke JS require extra attention. Since, on windows, + // the jits also need to modify the same dynamic function table that we need + // to get a stack trace, we have to be wary of that to avoid deadlock. + // + // When embedded in Gecko, for threads that aren't the main thread, + // CanInvokeJS consults an unlocked value in the nsIThread, so we must + // consult this after suspending the profiled thread to avoid racing + // against a value change. + if (thread_profile->CanInvokeJS()) { + if (!TryAcquireStackWalkWorkaroundLock()) { + ResumeThread(profiled_thread); + return; + } + + // It is safe to immediately drop the lock. We only need to contend with + // the case in which the profiled thread held needed system resources. + // If the profiled thread had held those resources, the trylock would have + // failed. Anyone else who grabs those resources will continue to make + // progress, since those threads are not suspended. Because of this, + // we cannot deadlock with them, and should let them run as they please. + ReleaseStackWalkWorkaroundLock(); + } + +#if V8_HOST_ARCH_X64 + sample->pc = reinterpret_cast<Address>(context.Rip); + sample->sp = reinterpret_cast<Address>(context.Rsp); + sample->fp = reinterpret_cast<Address>(context.Rbp); +#else + sample->pc = reinterpret_cast<Address>(context.Eip); + sample->sp = reinterpret_cast<Address>(context.Esp); + sample->fp = reinterpret_cast<Address>(context.Ebp); +#endif + + sample->context = &context; + sampler->Tick(sample); + + ResumeThread(profiled_thread); + } + + Sampler* sampler_; + int interval_; // units: ms + + // Protects the process wide state below. + static SamplerThread* instance_; + + DISALLOW_COPY_AND_ASSIGN(SamplerThread); +}; + +SamplerThread* SamplerThread::instance_ = NULL; + + +Sampler::Sampler(double interval, bool profiling, int entrySize) + : interval_(interval), + profiling_(profiling), + paused_(false), + active_(false), + entrySize_(entrySize) { +} + +Sampler::~Sampler() { + ASSERT(!IsActive()); +} + +void Sampler::Start() { + ASSERT(!IsActive()); + SetActive(true); + SamplerThread::StartSampler(this); +} + +void Sampler::Stop() { + ASSERT(IsActive()); + SetActive(false); + SamplerThread::StopSampler(); +} + + +static const HANDLE kNoThread = INVALID_HANDLE_VALUE; + +static unsigned int __stdcall ThreadEntry(void* arg) { + Thread* thread = reinterpret_cast<Thread*>(arg); + thread->Run(); + return 0; +} + +// Initialize a Win32 thread object. The thread has an invalid thread +// handle until it is started. +Thread::Thread(const char* name) + : stack_size_(0) { + thread_ = kNoThread; + set_name(name); +} + +void Thread::set_name(const char* name) { + strncpy(name_, name, sizeof(name_)); + name_[sizeof(name_) - 1] = '\0'; +} + +// Close our own handle for the thread. +Thread::~Thread() { + if (thread_ != kNoThread) CloseHandle(thread_); +} + +// Create a new thread. It is important to use _beginthreadex() instead of +// the Win32 function CreateThread(), because the CreateThread() does not +// initialize thread specific structures in the C runtime library. +void Thread::Start() { + thread_ = reinterpret_cast<HANDLE>( + _beginthreadex(NULL, + static_cast<unsigned>(stack_size_), + ThreadEntry, + this, + 0, + (unsigned int*) &thread_id_)); +} + +// Wait for thread to terminate. +void Thread::Join() { + if (thread_id_ != GetCurrentId()) { + WaitForSingleObject(thread_, INFINITE); + } +} + +/* static */ Thread::tid_t +Thread::GetCurrentId() +{ + return GetCurrentThreadId(); +} + +void OS::Startup() { +} + +void OS::Sleep(int milliseconds) { + ::Sleep(milliseconds); +} + +bool Sampler::RegisterCurrentThread(const char* aName, + PseudoStack* aPseudoStack, + bool aIsMainThread, void* stackTop) +{ + if (!Sampler::sRegisteredThreadsMutex) + return false; + + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = GetCurrentThreadId(); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + // Thread already registered. This means the first unregister will be + // too early. + ASSERT(false); + return false; + } + } + + set_tls_stack_top(stackTop); + + ThreadInfo* info = new StackOwningThreadInfo(aName, id, + aIsMainThread, aPseudoStack, stackTop); + + if (sActiveSampler) { + sActiveSampler->RegisterThread(info); + } + + sRegisteredThreads->push_back(info); + + return true; +} + +void Sampler::UnregisterCurrentThread() +{ + if (!Sampler::sRegisteredThreadsMutex) + return; + + tlsStackTop.set(nullptr); + + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + + int id = GetCurrentThreadId(); + + for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { + ThreadInfo* info = sRegisteredThreads->at(i); + if (info->ThreadId() == id && !info->IsPendingDelete()) { + if (profiler_is_active()) { + // We still want to show the results of this thread if you + // save the profile shortly after a thread is terminated. + // For now we will defer the delete to profile stop. + info->SetPendingDelete(); + break; + } else { + delete info; + sRegisteredThreads->erase(sRegisteredThreads->begin() + i); + break; + } + } + } +} + +void TickSample::PopulateContext(void* aContext) +{ + MOZ_ASSERT(aContext); + CONTEXT* pContext = reinterpret_cast<CONTEXT*>(aContext); + context = pContext; + RtlCaptureContext(pContext); + +#if defined(SPS_PLAT_amd64_windows) + + pc = reinterpret_cast<Address>(pContext->Rip); + sp = reinterpret_cast<Address>(pContext->Rsp); + fp = reinterpret_cast<Address>(pContext->Rbp); + +#elif defined(SPS_PLAT_x86_windows) + + pc = reinterpret_cast<Address>(pContext->Eip); + sp = reinterpret_cast<Address>(pContext->Esp); + fp = reinterpret_cast<Address>(pContext->Ebp); + +#endif +} + diff --git a/tools/profiler/core/platform.cpp b/tools/profiler/core/platform.cpp new file mode 100644 index 000000000..0d3cb1648 --- /dev/null +++ b/tools/profiler/core/platform.cpp @@ -0,0 +1,1266 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <ostream> +#include <fstream> +#include <sstream> +#include <errno.h> + +#include "platform.h" +#include "PlatformMacros.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/UniquePtr.h" +#include "GeckoProfiler.h" +#ifndef SPS_STANDALONE +#include "ProfilerIOInterposeObserver.h" +#include "mozilla/StaticPtr.h" +#endif +#include "mozilla/ThreadLocal.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/Sprintf.h" +#include "PseudoStack.h" +#include "GeckoSampler.h" +#ifndef SPS_STANDALONE +#include "nsIObserverService.h" +#include "nsDirectoryServiceUtils.h" +#include "nsDirectoryServiceDefs.h" +#include "nsXULAppAPI.h" +#include "nsProfilerStartParams.h" +#include "mozilla/Services.h" +#include "nsThreadUtils.h" +#endif +#include "ProfilerMarkers.h" + +#ifdef MOZ_TASK_TRACER +#include "GeckoTaskTracer.h" +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + #include "FennecJNIWrappers.h" +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +#include "FennecJNINatives.h" +#endif + +#ifndef SPS_STANDALONE +#if defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux) +# define USE_LUL_STACKWALK +# include "lul/LulMain.h" +# include "lul/platform-linux-lul.h" +#endif +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +class GeckoJavaSampler : public java::GeckoJavaSampler::Natives<GeckoJavaSampler> +{ +private: + GeckoJavaSampler(); + +public: + static double GetProfilerTime() { + if (!profiler_is_active()) { + return 0.0; + } + return profiler_time(); + }; +}; +#endif + +MOZ_THREAD_LOCAL(PseudoStack *) tlsPseudoStack; +MOZ_THREAD_LOCAL(GeckoSampler *) tlsTicker; +MOZ_THREAD_LOCAL(void *) tlsStackTop; +// We need to track whether we've been initialized otherwise +// we end up using tlsStack without initializing it. +// Because tlsStack is totally opaque to us we can't reuse +// it as the flag itself. +bool stack_key_initialized; + +mozilla::TimeStamp sLastTracerEvent; // is raced on +mozilla::TimeStamp sStartTime; +int sFrameNumber = 0; +int sLastFrameNumber = 0; +int sInitCount = 0; // Each init must have a matched shutdown. +static bool sIsProfiling = false; // is raced on +static bool sIsGPUProfiling = false; // is raced on +static bool sIsLayersDump = false; // is raced on +static bool sIsDisplayListDump = false; // is raced on +static bool sIsRestyleProfiling = false; // is raced on + +// Environment variables to control the profiler +const char* PROFILER_HELP = "MOZ_PROFILER_HELP"; +const char* PROFILER_INTERVAL = "MOZ_PROFILER_INTERVAL"; +const char* PROFILER_ENTRIES = "MOZ_PROFILER_ENTRIES"; +const char* PROFILER_STACK = "MOZ_PROFILER_STACK_SCAN"; +const char* PROFILER_FEATURES = "MOZ_PROFILING_FEATURES"; + +/* we don't need to worry about overflow because we only treat the + * case of them being the same as special. i.e. we only run into + * a problem if 2^32 events happen between samples that we need + * to know are associated with different events */ + +// Values harvested from env vars, that control the profiler. +static int sUnwindInterval; /* in milliseconds */ +static int sUnwindStackScan; /* max # of dubious frames allowed */ +static int sProfileEntries; /* how many entries do we store? */ + +std::vector<ThreadInfo*>* Sampler::sRegisteredThreads = nullptr; +mozilla::UniquePtr< ::Mutex> Sampler::sRegisteredThreadsMutex; + +GeckoSampler* Sampler::sActiveSampler; + +#ifndef SPS_STANDALONE +static mozilla::StaticAutoPtr<mozilla::ProfilerIOInterposeObserver> + sInterposeObserver; +#endif + +// The name that identifies the gecko thread for calls to +// profiler_register_thread. +static const char * gGeckoThreadName = "GeckoMain"; + +void Sampler::Startup() { + sRegisteredThreads = new std::vector<ThreadInfo*>(); + sRegisteredThreadsMutex = OS::CreateMutex("sRegisteredThreads mutex"); + + // We could create the sLUL object and read unwind info into it at + // this point. That would match the lifetime implied by destruction + // of it in Sampler::Shutdown just below. However, that gives a big + // delay on startup, even if no profiling is actually to be done. + // So, instead, sLUL is created on demand at the first call to + // Sampler::Start. +} + +void Sampler::Shutdown() { + while (sRegisteredThreads->size() > 0) { + delete sRegisteredThreads->back(); + sRegisteredThreads->pop_back(); + } + + sRegisteredThreadsMutex = nullptr; + delete sRegisteredThreads; + + // UnregisterThread can be called after shutdown in XPCShell. Thus + // we need to point to null to ignore such a call after shutdown. + sRegisteredThreadsMutex = nullptr; + sRegisteredThreads = nullptr; + +#if defined(USE_LUL_STACKWALK) + // Delete the sLUL object, if it actually got created. + if (sLUL) { + delete sLUL; + sLUL = nullptr; + } +#endif +} + +StackOwningThreadInfo::StackOwningThreadInfo(const char* aName, int aThreadId, + bool aIsMainThread, + PseudoStack* aPseudoStack, + void* aStackTop) + : ThreadInfo(aName, aThreadId, aIsMainThread, aPseudoStack, aStackTop) +{ + aPseudoStack->ref(); +} + +StackOwningThreadInfo::~StackOwningThreadInfo() +{ + PseudoStack* stack = Stack(); + if (stack) { + stack->deref(); + } +} + +void +StackOwningThreadInfo::SetPendingDelete() +{ + PseudoStack* stack = Stack(); + if (stack) { + stack->deref(); + } + ThreadInfo::SetPendingDelete(); +} + +ProfilerMarker::ProfilerMarker(const char* aMarkerName, + ProfilerMarkerPayload* aPayload, + double aTime) + : mMarkerName(strdup(aMarkerName)) + , mPayload(aPayload) + , mTime(aTime) +{ +} + +ProfilerMarker::~ProfilerMarker() { + free(mMarkerName); + delete mPayload; +} + +void +ProfilerMarker::SetGeneration(uint32_t aGenID) { + mGenID = aGenID; +} + +double +ProfilerMarker::GetTime() const { + return mTime; +} + +void ProfilerMarker::StreamJSON(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) const +{ + // Schema: + // [name, time, data] + + aWriter.StartArrayElement(); + { + aUniqueStacks.mUniqueStrings.WriteElement(aWriter, GetMarkerName()); + aWriter.DoubleElement(mTime); + // TODO: Store the callsite for this marker if available: + // if have location data + // b.NameValue(marker, "location", ...); + if (mPayload) { + aWriter.StartObjectElement(); + { + mPayload->StreamPayload(aWriter, aUniqueStacks); + } + aWriter.EndObject(); + } + } + aWriter.EndArray(); +} + +/* Has MOZ_PROFILER_VERBOSE been set? */ + +// Verbosity control for the profiler. The aim is to check env var +// MOZ_PROFILER_VERBOSE only once. However, we may need to temporarily +// override that so as to print the profiler's help message. That's +// what moz_profiler_set_verbosity is for. + +enum class ProfilerVerbosity : int8_t { UNCHECKED, NOTVERBOSE, VERBOSE }; + +// Raced on, potentially +static ProfilerVerbosity profiler_verbosity = ProfilerVerbosity::UNCHECKED; + +bool moz_profiler_verbose() +{ + if (profiler_verbosity == ProfilerVerbosity::UNCHECKED) { + if (getenv("MOZ_PROFILER_VERBOSE") != nullptr) + profiler_verbosity = ProfilerVerbosity::VERBOSE; + else + profiler_verbosity = ProfilerVerbosity::NOTVERBOSE; + } + + return profiler_verbosity == ProfilerVerbosity::VERBOSE; +} + +void moz_profiler_set_verbosity(ProfilerVerbosity pv) +{ + MOZ_ASSERT(pv == ProfilerVerbosity::UNCHECKED || + pv == ProfilerVerbosity::VERBOSE); + profiler_verbosity = pv; +} + + +bool set_profiler_interval(const char* interval) { + if (interval) { + errno = 0; + long int n = strtol(interval, (char**)nullptr, 10); + if (errno == 0 && n >= 1 && n <= 1000) { + sUnwindInterval = n; + return true; + } + return false; + } + + return true; +} + +bool set_profiler_entries(const char* entries) { + if (entries) { + errno = 0; + long int n = strtol(entries, (char**)nullptr, 10); + if (errno == 0 && n > 0) { + sProfileEntries = n; + return true; + } + return false; + } + + return true; +} + +bool set_profiler_scan(const char* scanCount) { + if (scanCount) { + errno = 0; + long int n = strtol(scanCount, (char**)nullptr, 10); + if (errno == 0 && n >= 0 && n <= 100) { + sUnwindStackScan = n; + return true; + } + return false; + } + + return true; +} + +bool is_native_unwinding_avail() { +# if defined(HAVE_NATIVE_UNWIND) + return true; +#else + return false; +#endif +} + +// Read env vars at startup, so as to set: +// sUnwindInterval, sProfileEntries, sUnwindStackScan. +void read_profiler_env_vars() +{ + /* Set defaults */ + sUnwindInterval = 0; /* We'll have to look elsewhere */ + sProfileEntries = 0; + + const char* interval = getenv(PROFILER_INTERVAL); + const char* entries = getenv(PROFILER_ENTRIES); + const char* scanCount = getenv(PROFILER_STACK); + + if (getenv(PROFILER_HELP)) { + // Enable verbose output + moz_profiler_set_verbosity(ProfilerVerbosity::VERBOSE); + profiler_usage(); + // Now force the next enquiry of moz_profiler_verbose to re-query + // env var MOZ_PROFILER_VERBOSE. + moz_profiler_set_verbosity(ProfilerVerbosity::UNCHECKED); + } + + if (!set_profiler_interval(interval) || + !set_profiler_entries(entries) || + !set_profiler_scan(scanCount)) { + profiler_usage(); + } else { + LOG( "SPS:"); + LOGF("SPS: Sampling interval = %d ms (zero means \"platform default\")", + (int)sUnwindInterval); + LOGF("SPS: Entry store size = %d (zero means \"platform default\")", + (int)sProfileEntries); + LOGF("SPS: UnwindStackScan = %d (max dubious frames per unwind).", + (int)sUnwindStackScan); + LOG( "SPS:"); + } +} + +void profiler_usage() { + LOG( "SPS: "); + LOG( "SPS: Environment variable usage:"); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_HELP"); + LOG( "SPS: If set to any value, prints this message."); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_INTERVAL=<number> (milliseconds, 1 to 1000)"); + LOG( "SPS: If unset, platform default is used."); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_ENTRIES=<number> (count, minimum of 1)"); + LOG( "SPS: If unset, platform default is used."); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_VERBOSE"); + LOG( "SPS: If set to any value, increases verbosity (recommended)."); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_STACK_SCAN=<number> (default is zero)"); + LOG( "SPS: The number of dubious (stack-scanned) frames allowed"); + LOG( "SPS: "); + LOG( "SPS: MOZ_PROFILER_LUL_TEST"); + LOG( "SPS: If set to any value, runs LUL unit tests at startup of"); + LOG( "SPS: the unwinder thread, and prints a short summary of results."); + LOG( "SPS: "); + LOGF("SPS: This platform %s native unwinding.", + is_native_unwinding_avail() ? "supports" : "does not support"); + LOG( "SPS: "); + + /* Re-set defaults */ + sUnwindInterval = 0; /* We'll have to look elsewhere */ + sProfileEntries = 0; + sUnwindStackScan = 0; + + LOG( "SPS:"); + LOGF("SPS: Sampling interval = %d ms (zero means \"platform default\")", + (int)sUnwindInterval); + LOGF("SPS: Entry store size = %d (zero means \"platform default\")", + (int)sProfileEntries); + LOGF("SPS: UnwindStackScan = %d (max dubious frames per unwind).", + (int)sUnwindStackScan); + LOG( "SPS:"); + + return; +} + +void set_tls_stack_top(void* stackTop) +{ + // Round |stackTop| up to the end of the containing page. We may + // as well do this -- there's no danger of a fault, and we might + // get a few more base-of-the-stack frames as a result. This + // assumes that no target has a page size smaller than 4096. + uintptr_t stackTopR = (uintptr_t)stackTop; + if (stackTop) { + stackTopR = (stackTopR & ~(uintptr_t)4095) + (uintptr_t)4095; + } + tlsStackTop.set((void*)stackTopR); +} + +bool is_main_thread_name(const char* aName) { + if (!aName) { + return false; + } + return strcmp(aName, gGeckoThreadName) == 0; +} + +#ifndef SPS_STANDALONE +#ifdef HAVE_VA_COPY +#define VARARGS_ASSIGN(foo, bar) VA_COPY(foo,bar) +#elif defined(HAVE_VA_LIST_AS_ARRAY) +#define VARARGS_ASSIGN(foo, bar) foo[0] = bar[0] +#else +#define VARARGS_ASSIGN(foo, bar) (foo) = (bar) +#endif + +void +mozilla_sampler_log(const char *fmt, va_list args) +{ + if (profiler_is_active()) { + // nsAutoCString AppendPrintf would be nicer but + // this is mozilla external code + char buf[2048]; + va_list argsCpy; + VARARGS_ASSIGN(argsCpy, args); + int required = VsprintfLiteral(buf, fmt, argsCpy); + va_end(argsCpy); + + if (required < 0) { + return; // silently drop for now + } else if (required < 2048) { + profiler_tracing("log", buf, TRACING_EVENT); + } else { + char* heapBuf = new char[required+1]; + va_list argsCpy; + VARARGS_ASSIGN(argsCpy, args); + vsnprintf(heapBuf, required+1, fmt, argsCpy); + va_end(argsCpy); + // EVENT_BACKTRACE could be used to get a source + // for all log events. This could be a runtime + // flag later. + profiler_tracing("log", heapBuf, TRACING_EVENT); + delete[] heapBuf; + } + } +} +#endif + +//////////////////////////////////////////////////////////////////////// +// BEGIN externally visible functions + +void mozilla_sampler_init(void* stackTop) +{ + sInitCount++; + + if (stack_key_initialized) + return; + +#ifdef MOZ_TASK_TRACER + mozilla::tasktracer::InitTaskTracer(); +#endif + +#ifdef SPS_STANDALONE + mozilla::TimeStamp::Startup(); +#endif + + LOG("BEGIN mozilla_sampler_init"); + if (!tlsPseudoStack.init() || !tlsTicker.init() || !tlsStackTop.init()) { + LOG("Failed to init."); + return; + } + bool ignore; + sStartTime = mozilla::TimeStamp::ProcessCreation(ignore); + + stack_key_initialized = true; + + Sampler::Startup(); + + PseudoStack *stack = PseudoStack::create(); + tlsPseudoStack.set(stack); + + bool isMainThread = true; + Sampler::RegisterCurrentThread(isMainThread ? + gGeckoThreadName : "Application Thread", + stack, isMainThread, stackTop); + + // Read interval settings from MOZ_PROFILER_INTERVAL and stack-scan + // threshhold from MOZ_PROFILER_STACK_SCAN. + read_profiler_env_vars(); + + // platform specific initialization + OS::Startup(); + +#ifndef SPS_STANDALONE + set_stderr_callback(mozilla_sampler_log); +#endif + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + if (mozilla::jni::IsFennec()) { + GeckoJavaSampler::Init(); + } +#endif + + // We can't open pref so we use an environment variable + // to know if we should trigger the profiler on startup + // NOTE: Default + const char *val = getenv("MOZ_PROFILER_STARTUP"); + if (!val || !*val) { + return; + } + + const char* features[] = {"js" + , "leaf" + , "threads" +#if defined(XP_WIN) || defined(XP_MACOSX) \ + || (defined(SPS_ARCH_arm) && defined(linux)) \ + || defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_x86_linux) + , "stackwalk" +#endif +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + , "java" +#endif + }; + + const char* threadFilters[] = { "GeckoMain", "Compositor" }; + + profiler_start(PROFILE_DEFAULT_ENTRY, PROFILE_DEFAULT_INTERVAL, + features, MOZ_ARRAY_LENGTH(features), + threadFilters, MOZ_ARRAY_LENGTH(threadFilters)); + LOG("END mozilla_sampler_init"); +} + +void mozilla_sampler_shutdown() +{ + sInitCount--; + + if (sInitCount > 0) + return; + + // Save the profile on shutdown if requested. + GeckoSampler *t = tlsTicker.get(); + if (t) { + const char *val = getenv("MOZ_PROFILER_SHUTDOWN"); + if (val) { + std::ofstream stream; + stream.open(val); + if (stream.is_open()) { + t->ToStreamAsJSON(stream); + stream.close(); + } + } + } + + profiler_stop(); + +#ifndef SPS_STANDALONE + set_stderr_callback(nullptr); +#endif + + Sampler::Shutdown(); + +#ifdef SPS_STANDALONE + mozilla::TimeStamp::Shutdown(); +#endif + + PseudoStack *stack = tlsPseudoStack.get(); + stack->deref(); + tlsPseudoStack.set(nullptr); + +#ifdef MOZ_TASK_TRACER + mozilla::tasktracer::ShutdownTaskTracer(); +#endif +} + +void mozilla_sampler_save() +{ + GeckoSampler *t = tlsTicker.get(); + if (!t) { + return; + } + + t->RequestSave(); + // We're on the main thread already so we don't + // have to wait to handle the save request. + t->HandleSaveRequest(); +} + +mozilla::UniquePtr<char[]> mozilla_sampler_get_profile(double aSinceTime) +{ + GeckoSampler *t = tlsTicker.get(); + if (!t) { + return nullptr; + } + + return t->ToJSON(aSinceTime); +} + +#ifndef SPS_STANDALONE +JSObject *mozilla_sampler_get_profile_data(JSContext *aCx, double aSinceTime) +{ + GeckoSampler *t = tlsTicker.get(); + if (!t) { + return nullptr; + } + + return t->ToJSObject(aCx, aSinceTime); +} + +void mozilla_sampler_get_profile_data_async(double aSinceTime, + mozilla::dom::Promise* aPromise) +{ + GeckoSampler *t = tlsTicker.get(); + if (NS_WARN_IF(!t)) { + return; + } + + t->ToJSObjectAsync(aSinceTime, aPromise); +} + +void mozilla_sampler_get_profiler_start_params(int* aEntrySize, + double* aInterval, + mozilla::Vector<const char*>* aFilters, + mozilla::Vector<const char*>* aFeatures) +{ + if (NS_WARN_IF(!aEntrySize) || NS_WARN_IF(!aInterval) || + NS_WARN_IF(!aFilters) || NS_WARN_IF(!aFeatures)) { + return; + } + + GeckoSampler *t = tlsTicker.get(); + if (NS_WARN_IF(!t)) { + return; + } + + *aEntrySize = t->EntrySize(); + *aInterval = t->interval(); + + const ThreadNameFilterList& threadNameFilterList = t->ThreadNameFilters(); + MOZ_ALWAYS_TRUE(aFilters->resize(threadNameFilterList.length())); + for (uint32_t i = 0; i < threadNameFilterList.length(); ++i) { + (*aFilters)[i] = threadNameFilterList[i].c_str(); + } + + const FeatureList& featureList = t->Features(); + MOZ_ALWAYS_TRUE(aFeatures->resize(featureList.length())); + for (size_t i = 0; i < featureList.length(); ++i) { + (*aFeatures)[i] = featureList[i].c_str(); + } +} + +void mozilla_sampler_get_gatherer(nsISupports** aRetVal) +{ + if (!aRetVal) { + return; + } + + if (NS_WARN_IF(!profiler_is_active())) { + *aRetVal = nullptr; + return; + } + + GeckoSampler *t = tlsTicker.get(); + if (NS_WARN_IF(!t)) { + *aRetVal = nullptr; + return; + } + + t->GetGatherer(aRetVal); +} + +#endif + +void mozilla_sampler_save_profile_to_file(const char* aFilename) +{ + GeckoSampler *t = tlsTicker.get(); + if (!t) { + return; + } + + std::ofstream stream; + stream.open(aFilename); + if (stream.is_open()) { + t->ToStreamAsJSON(stream); + stream.close(); + LOGF("Saved to %s", aFilename); + } else { + LOG("Fail to open profile log file."); + } +} + + +const char** mozilla_sampler_get_features() +{ + static const char* features[] = { +#if defined(MOZ_PROFILING) && defined(HAVE_NATIVE_UNWIND) + // Walk the C++ stack. + "stackwalk", +#endif +#if defined(ENABLE_SPS_LEAF_DATA) + // Include the C++ leaf node if not stackwalking. DevTools + // profiler doesn't want the native addresses. + "leaf", +#endif +#if !defined(SPS_OS_windows) + // Use a seperate thread of walking the stack. + "unwinder", +#endif + "java", + // Only record samples during periods of bad responsiveness + "jank", + // Tell the JS engine to emmit pseudostack entries in the + // pro/epilogue. + "js", + // GPU Profiling (may not be supported by the GL) + "gpu", + // Profile the registered secondary threads. + "threads", + // Do not include user-identifiable information + "privacy", + // Dump the layer tree with the textures. + "layersdump", + // Dump the display list with the textures. + "displaylistdump", + // Add main thread I/O to the profile + "mainthreadio", + // Add RSS collection + "memory", +#ifdef MOZ_TASK_TRACER + // Start profiling with feature TaskTracer. + "tasktracer", +#endif +#if defined(XP_WIN) + // Add power collection + "power", +#endif + nullptr + }; + + return features; +} + +void mozilla_sampler_get_buffer_info(uint32_t *aCurrentPosition, uint32_t *aTotalSize, + uint32_t *aGeneration) +{ + *aCurrentPosition = 0; + *aTotalSize = 0; + *aGeneration = 0; + + if (!stack_key_initialized) + return; + + GeckoSampler *t = tlsTicker.get(); + if (!t) + return; + + t->GetBufferInfo(aCurrentPosition, aTotalSize, aGeneration); +} + +// Values are only honored on the first start +void mozilla_sampler_start(int aProfileEntries, double aInterval, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount) + +{ + LOG("BEGIN mozilla_sampler_start"); + + if (!stack_key_initialized) + profiler_init(nullptr); + + /* If the sampling interval was set using env vars, use that + in preference to anything else. */ + if (sUnwindInterval > 0) + aInterval = sUnwindInterval; + + /* If the entry count was set using env vars, use that, too: */ + if (sProfileEntries > 0) + aProfileEntries = sProfileEntries; + + // Reset the current state if the profiler is running + profiler_stop(); + + GeckoSampler* t; + t = new GeckoSampler(aInterval ? aInterval : PROFILE_DEFAULT_INTERVAL, + aProfileEntries ? aProfileEntries : PROFILE_DEFAULT_ENTRY, + aFeatures, aFeatureCount, + aThreadNameFilters, aFilterCount); + + tlsTicker.set(t); + t->Start(); + if (t->ProfileJS() || t->InPrivacyMode()) { + ::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); + std::vector<ThreadInfo*> threads = t->GetRegisteredThreads(); + + for (uint32_t i = 0; i < threads.size(); i++) { + ThreadInfo* info = threads[i]; + if (info->IsPendingDelete()) { + continue; + } + ThreadProfile* thread_profile = info->Profile(); + if (!thread_profile) { + continue; + } + thread_profile->GetPseudoStack()->reinitializeOnResume(); +#ifndef SPS_STANDALONE + if (t->ProfileJS()) { + thread_profile->GetPseudoStack()->enableJSSampling(); + } + if (t->InPrivacyMode()) { + thread_profile->GetPseudoStack()->mPrivacyMode = true; + } +#endif + } + } + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + if (t->ProfileJava()) { + int javaInterval = aInterval; + // Java sampling doesn't accuratly keep up with 1ms sampling + if (javaInterval < 10) { + aInterval = 10; + } + java::GeckoJavaSampler::Start(javaInterval, 1000); + } +#endif + +#ifndef SPS_STANDALONE + if (t->AddMainThreadIO()) { + if (!sInterposeObserver) { + // Lazily create IO interposer observer + sInterposeObserver = new mozilla::ProfilerIOInterposeObserver(); + } + mozilla::IOInterposer::Register(mozilla::IOInterposeObserver::OpAll, + sInterposeObserver); + } +#endif + + sIsProfiling = true; +#ifndef SPS_STANDALONE + sIsGPUProfiling = t->ProfileGPU(); + sIsLayersDump = t->LayersDump(); + sIsDisplayListDump = t->DisplayListDump(); + sIsRestyleProfiling = t->ProfileRestyle(); + + if (Sampler::CanNotifyObservers()) { + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) { + nsTArray<nsCString> featuresArray; + nsTArray<nsCString> threadNameFiltersArray; + + for (size_t i = 0; i < aFeatureCount; ++i) { + featuresArray.AppendElement(aFeatures[i]); + } + + for (size_t i = 0; i < aFilterCount; ++i) { + threadNameFiltersArray.AppendElement(aThreadNameFilters[i]); + } + + nsCOMPtr<nsIProfilerStartParams> params = + new nsProfilerStartParams(aProfileEntries, aInterval, featuresArray, + threadNameFiltersArray); + + os->NotifyObservers(params, "profiler-started", nullptr); + } + } +#endif + + LOG("END mozilla_sampler_start"); +} + +void mozilla_sampler_stop() +{ + LOG("BEGIN mozilla_sampler_stop"); + + if (!stack_key_initialized) + return; + + GeckoSampler *t = tlsTicker.get(); + if (!t) { + LOG("END mozilla_sampler_stop-early"); + return; + } + + bool disableJS = t->ProfileJS(); + + t->Stop(); + delete t; + tlsTicker.set(nullptr); + +#ifndef SPS_STANDALONE + if (disableJS) { + PseudoStack *stack = tlsPseudoStack.get(); + ASSERT(stack != nullptr); + stack->disableJSSampling(); + } + + mozilla::IOInterposer::Unregister(mozilla::IOInterposeObserver::OpAll, + sInterposeObserver); + sInterposeObserver = nullptr; +#endif + + sIsProfiling = false; +#ifndef SPS_STANDALONE + sIsGPUProfiling = false; + sIsLayersDump = false; + sIsDisplayListDump = false; + sIsRestyleProfiling = false; + + if (Sampler::CanNotifyObservers()) { + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-stopped", nullptr); + } +#endif + + LOG("END mozilla_sampler_stop"); +} + +bool mozilla_sampler_is_paused() { + if (Sampler::GetActiveSampler()) { + return Sampler::GetActiveSampler()->IsPaused(); + } else { + return false; + } +} + +void mozilla_sampler_pause() { + if (Sampler::GetActiveSampler()) { + Sampler::GetActiveSampler()->SetPaused(true); +#ifndef SPS_STANDALONE + if (Sampler::CanNotifyObservers()) { + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-paused", nullptr); + } +#endif + } +} + +void mozilla_sampler_resume() { + if (Sampler::GetActiveSampler()) { + Sampler::GetActiveSampler()->SetPaused(false); +#ifndef SPS_STANDALONE + if (Sampler::CanNotifyObservers()) { + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-resumed", nullptr); + } +#endif + } +} + +bool mozilla_sampler_feature_active(const char* aName) +{ + if (!profiler_is_active()) { + return false; + } + + if (strcmp(aName, "gpu") == 0) { + return sIsGPUProfiling; + } + + if (strcmp(aName, "layersdump") == 0) { + return sIsLayersDump; + } + + if (strcmp(aName, "displaylistdump") == 0) { + return sIsDisplayListDump; + } + + if (strcmp(aName, "restyle") == 0) { + return sIsRestyleProfiling; + } + + return false; +} + +bool mozilla_sampler_is_active() +{ + return sIsProfiling; +} + +void mozilla_sampler_responsiveness(const mozilla::TimeStamp& aTime) +{ + sLastTracerEvent = aTime; +} + +void mozilla_sampler_frame_number(int frameNumber) +{ + sFrameNumber = frameNumber; +} + +void mozilla_sampler_lock() +{ + profiler_stop(); +#ifndef SPS_STANDALONE + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-locked", nullptr); +#endif +} + +void mozilla_sampler_unlock() +{ +#ifndef SPS_STANDALONE + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) + os->NotifyObservers(nullptr, "profiler-unlocked", nullptr); +#endif +} + +bool mozilla_sampler_register_thread(const char* aName, void* aGuessStackTop) +{ + if (sInitCount == 0) { + return false; + } + +#if defined(MOZ_WIDGET_GONK) && !defined(MOZ_PROFILING) + // The only way to profile secondary threads on b2g + // is to build with profiling OR have the profiler + // running on startup. + if (!profiler_is_active()) { + return false; + } +#endif + + MOZ_ASSERT(tlsPseudoStack.get() == nullptr); + PseudoStack* stack = PseudoStack::create(); + tlsPseudoStack.set(stack); + bool isMainThread = is_main_thread_name(aName); + void* stackTop = GetStackTop(aGuessStackTop); + return Sampler::RegisterCurrentThread(aName, stack, isMainThread, stackTop); +} + +void mozilla_sampler_unregister_thread() +{ + // Don't check sInitCount count here -- we may be unregistering the + // thread after the sampler was shut down. + if (!stack_key_initialized) { + return; + } + + PseudoStack *stack = tlsPseudoStack.get(); + if (!stack) { + return; + } + stack->deref(); + tlsPseudoStack.set(nullptr); + + Sampler::UnregisterCurrentThread(); +} + +void mozilla_sampler_sleep_start() { + if (sInitCount == 0) { + return; + } + + PseudoStack *stack = tlsPseudoStack.get(); + if (stack == nullptr) { + return; + } + stack->setSleeping(1); +} + +void mozilla_sampler_sleep_end() { + if (sInitCount == 0) { + return; + } + + PseudoStack *stack = tlsPseudoStack.get(); + if (stack == nullptr) { + return; + } + stack->setSleeping(0); +} + +bool mozilla_sampler_is_sleeping() { + if (sInitCount == 0) { + return false; + } + PseudoStack *stack = tlsPseudoStack.get(); + if (stack == nullptr) { + return false; + } + return stack->isSleeping(); +} + +double mozilla_sampler_time(const mozilla::TimeStamp& aTime) +{ + mozilla::TimeDuration delta = aTime - sStartTime; + return delta.ToMilliseconds(); +} + +double mozilla_sampler_time() +{ + return mozilla_sampler_time(mozilla::TimeStamp::Now()); +} + +ProfilerBacktrace* mozilla_sampler_get_backtrace() +{ + if (!stack_key_initialized) + return nullptr; + + // Don't capture a stack if we're not profiling + if (!profiler_is_active()) { + return nullptr; + } + + // Don't capture a stack if we don't want to include personal information + if (profiler_in_privacy_mode()) { + return nullptr; + } + + GeckoSampler* t = tlsTicker.get(); + if (!t) { + return nullptr; + } + + return new ProfilerBacktrace(t->GetBacktrace()); +} + +void mozilla_sampler_free_backtrace(ProfilerBacktrace* aBacktrace) +{ + delete aBacktrace; +} + +// Fill the output buffer with the following pattern: +// "Lable 1" "\0" "Label 2" "\0" ... "Label N" "\0" "\0" +// TODO: use the unwinder instead of pseudo stack. +void mozilla_sampler_get_backtrace_noalloc(char *output, size_t outputSize) +{ + MOZ_ASSERT(outputSize >= 2); + char *bound = output + outputSize - 2; + output[0] = output[1] = '\0'; + PseudoStack *pseudoStack = tlsPseudoStack.get(); + if (!pseudoStack) { + return; + } + + volatile StackEntry *pseudoFrames = pseudoStack->mStack; + uint32_t pseudoCount = pseudoStack->stackSize(); + + for (uint32_t i = 0; i < pseudoCount; i++) { + size_t len = strlen(pseudoFrames[i].label()); + if (output + len >= bound) + break; + strcpy(output, pseudoFrames[i].label()); + output += len; + *output++ = '\0'; + *output = '\0'; + } +} + +void mozilla_sampler_tracing(const char* aCategory, const char* aInfo, + TracingMetadata aMetaData) +{ + mozilla_sampler_add_marker(aInfo, new ProfilerMarkerTracing(aCategory, aMetaData)); +} + +void mozilla_sampler_tracing(const char* aCategory, const char* aInfo, + ProfilerBacktrace* aCause, + TracingMetadata aMetaData) +{ + mozilla_sampler_add_marker(aInfo, new ProfilerMarkerTracing(aCategory, aMetaData, aCause)); +} + +void mozilla_sampler_add_marker(const char *aMarker, ProfilerMarkerPayload *aPayload) +{ + // Note that aPayload may be allocated by the caller, so we need to make sure + // that we free it at some point. + mozilla::UniquePtr<ProfilerMarkerPayload> payload(aPayload); + + if (!stack_key_initialized) + return; + + // Don't insert a marker if we're not profiling to avoid + // the heap copy (malloc). + if (!profiler_is_active()) { + return; + } + + // Don't add a marker if we don't want to include personal information + if (profiler_in_privacy_mode()) { + return; + } + + PseudoStack *stack = tlsPseudoStack.get(); + if (!stack) { + return; + } + + mozilla::TimeStamp origin = (aPayload && !aPayload->GetStartTime().IsNull()) ? + aPayload->GetStartTime() : mozilla::TimeStamp::Now(); + mozilla::TimeDuration delta = origin - sStartTime; + stack->addMarker(aMarker, payload.release(), delta.ToMilliseconds()); +} + +#ifndef SPS_STANDALONE +#include "mozilla/Mutex.h" + +class GeckoMutex : public ::Mutex { + public: + explicit GeckoMutex(const char* aDesc) : + mMutex(aDesc) + {} + + virtual ~GeckoMutex() {} + + virtual int Lock() { + mMutex.Lock(); + return 0; + } + + virtual int Unlock() { + mMutex.Unlock(); + return 0; + } + + private: + mozilla::Mutex mMutex; +}; + +mozilla::UniquePtr< ::Mutex> OS::CreateMutex(const char* aDesc) { + return mozilla::MakeUnique<GeckoMutex>(aDesc); +} + +#else +// Otherwise use c++11 Mutex +#include <mutex> + +class OSXMutex : public ::Mutex { + public: + OSXMutex(const char* aDesc) : + mMutex() + {} + + virtual ~OSXMutex() {} + + virtual int Lock() { + mMutex.lock(); + return 0; + } + + virtual int Unlock() { + mMutex.unlock(); + return 0; + } + + private: + std::mutex mMutex; +}; + +mozilla::UniquePtr< ::Mutex> OS::CreateMutex(const char* aDesc) { + return mozilla::MakeUnique<GeckoMutex>(aDesc); +} + +#endif + +// END externally visible functions +//////////////////////////////////////////////////////////////////////// diff --git a/tools/profiler/core/platform.h b/tools/profiler/core/platform.h new file mode 100644 index 000000000..2e736d97c --- /dev/null +++ b/tools/profiler/core/platform.h @@ -0,0 +1,431 @@ +// Copyright (c) 2006-2011 The Chromium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in +// the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google, Inc. nor the names of its contributors +// may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. + +#ifndef TOOLS_PLATFORM_H_ +#define TOOLS_PLATFORM_H_ + +#ifdef SPS_STANDALONE +#define MOZ_COUNT_CTOR(name) +#define MOZ_COUNT_DTOR(name) +#endif + +#ifdef ANDROID +#include <android/log.h> +#else +#define __android_log_print(a, ...) +#endif + +#ifdef XP_UNIX +#include <pthread.h> +#endif + +#include <stdint.h> +#include <math.h> +#ifndef SPS_STANDALONE +#include "MainThreadUtils.h" +#include "mozilla/Mutex.h" +#include "ThreadResponsiveness.h" +#endif +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" +#include "PlatformMacros.h" +#include "v8-support.h" +#include <vector> +#include "StackTop.h" + +// We need a definition of gettid(), but Linux libc implementations don't +// provide a wrapper for it (except for Bionic) +#if defined(__linux__) +#include <unistd.h> +#if !defined(__BIONIC__) +#include <sys/syscall.h> +static inline pid_t gettid() +{ + return (pid_t) syscall(SYS_gettid); +} +#endif +#endif + +#ifdef XP_WIN +#include <windows.h> +#endif + +#define ASSERT(a) MOZ_ASSERT(a) + +bool moz_profiler_verbose(); + +#ifdef ANDROID +# if defined(__arm__) || defined(__thumb__) +# define ENABLE_SPS_LEAF_DATA +# define ENABLE_ARM_LR_SAVING +# endif +# define LOG(text) \ + do { if (moz_profiler_verbose()) \ + __android_log_write(ANDROID_LOG_ERROR, "Profiler", text); \ + } while (0) +# define LOGF(format, ...) \ + do { if (moz_profiler_verbose()) \ + __android_log_print(ANDROID_LOG_ERROR, "Profiler", format, \ + __VA_ARGS__); \ + } while (0) + +#else +# define LOG(text) \ + do { if (moz_profiler_verbose()) fprintf(stderr, "Profiler: %s\n", text); \ + } while (0) +# define LOGF(format, ...) \ + do { if (moz_profiler_verbose()) fprintf(stderr, "Profiler: " format \ + "\n", __VA_ARGS__); \ + } while (0) + +#endif + +#if defined(XP_MACOSX) || defined(XP_WIN) || defined(XP_LINUX) +#define ENABLE_SPS_LEAF_DATA +#endif + +typedef int32_t Atomic32; + +extern mozilla::TimeStamp sStartTime; + +typedef uint8_t* Address; + +// ---------------------------------------------------------------------------- +// Mutex +// +// Mutexes are used for serializing access to non-reentrant sections of code. +// The implementations of mutex should allow for nested/recursive locking. + +class Mutex { + public: + virtual ~Mutex() {} + + // Locks the given mutex. If the mutex is currently unlocked, it becomes + // locked and owned by the calling thread, and immediately. If the mutex + // is already locked by another thread, suspends the calling thread until + // the mutex is unlocked. + virtual int Lock() = 0; + + // Unlocks the given mutex. The mutex is assumed to be locked and owned by + // the calling thread on entrance. + virtual int Unlock() = 0; +}; + +class MutexAutoLock { + public: + explicit MutexAutoLock(::Mutex& aMutex) + : mMutex(&aMutex) + { + mMutex->Lock(); + } + + ~MutexAutoLock() { + mMutex->Unlock(); + } + + private: + Mutex* mMutex; +}; + +// ---------------------------------------------------------------------------- +// OS +// +// This class has static methods for the different platform specific +// functions. Add methods here to cope with differences between the +// supported platforms. + +class OS { + public: + + // Sleep for a number of milliseconds. + static void Sleep(const int milliseconds); + + // Sleep for a number of microseconds. + static void SleepMicro(const int microseconds); + + // Called on startup to initialize platform specific things + static void Startup(); + + static mozilla::UniquePtr< ::Mutex> CreateMutex(const char* aDesc); + + private: + static const int msPerSecond = 1000; + +}; + + + + +// ---------------------------------------------------------------------------- +// Thread +// +// Thread objects are used for creating and running threads. When the start() +// method is called the new thread starts running the run() method in the new +// thread. The Thread object should not be deallocated before the thread has +// terminated. + +class Thread { + public: + // Create new thread. + explicit Thread(const char* name); + virtual ~Thread(); + + // Start new thread by calling the Run() method in the new thread. + void Start(); + + void Join(); + + inline const char* name() const { + return name_; + } + + // Abstract method for run handler. + virtual void Run() = 0; + + // The thread name length is limited to 16 based on Linux's implementation of + // prctl(). + static const int kMaxThreadNameLength = 16; + +#ifdef XP_WIN + HANDLE thread_; + typedef DWORD tid_t; + tid_t thread_id_; +#else + typedef ::pid_t tid_t; +#endif +#if defined(XP_MACOSX) + pthread_t thread_; +#endif + + static tid_t GetCurrentId(); + + private: + void set_name(const char *name); + + char name_[kMaxThreadNameLength]; + int stack_size_; + + DISALLOW_COPY_AND_ASSIGN(Thread); +}; + +// ---------------------------------------------------------------------------- +// HAVE_NATIVE_UNWIND +// +// Pseudo backtraces are available on all platforms. Native +// backtraces are available only on selected platforms. Breakpad is +// the only supported native unwinder. HAVE_NATIVE_UNWIND is set at +// build time to indicate whether native unwinding is possible on this +// platform. + +#undef HAVE_NATIVE_UNWIND +#if defined(MOZ_PROFILING) \ + && (defined(SPS_PLAT_amd64_linux) || defined(SPS_PLAT_arm_android) \ + || (defined(MOZ_WIDGET_ANDROID) && defined(__arm__)) \ + || defined(SPS_PLAT_x86_linux) \ + || defined(SPS_OS_windows) \ + || defined(SPS_OS_darwin)) +# define HAVE_NATIVE_UNWIND +#endif + +/* Some values extracted at startup from environment variables, that + control the behaviour of the breakpad unwinder. */ +extern const char* PROFILER_INTERVAL; +extern const char* PROFILER_ENTRIES; +extern const char* PROFILER_STACK; +extern const char* PROFILER_FEATURES; + +void read_profiler_env_vars(); +void profiler_usage(); + +// Helper methods to expose modifying profiler behavior +bool set_profiler_interval(const char*); +bool set_profiler_entries(const char*); +bool set_profiler_scan(const char*); +bool is_native_unwinding_avail(); + +void set_tls_stack_top(void* stackTop); + +// ---------------------------------------------------------------------------- +// Sampler +// +// A sampler periodically samples the state of the VM and optionally +// (if used for profiling) the program counter and stack pointer for +// the thread that created it. + +struct PseudoStack; +class ThreadProfile; + +// TickSample captures the information collected for each sample. +class TickSample { + public: + TickSample() + : pc(NULL) + , sp(NULL) + , fp(NULL) +#ifdef ENABLE_ARM_LR_SAVING + , lr(NULL) +#endif + , context(NULL) + , isSamplingCurrentThread(false) + , threadProfile(nullptr) + , rssMemory(0) + , ussMemory(0) + {} + + void PopulateContext(void* aContext); + + Address pc; // Instruction pointer. + Address sp; // Stack pointer. + Address fp; // Frame pointer. +#ifdef ENABLE_ARM_LR_SAVING + Address lr; // ARM link register +#endif + void* context; // The context from the signal handler, if available. On + // Win32 this may contain the windows thread context. + bool isSamplingCurrentThread; + ThreadProfile* threadProfile; + mozilla::TimeStamp timestamp; + int64_t rssMemory; + int64_t ussMemory; +}; + +class ThreadInfo; +class PlatformData; +class GeckoSampler; +class SyncProfile; +class Sampler { + public: + // Initialize sampler. + explicit Sampler(double interval, bool profiling, int entrySize); + virtual ~Sampler(); + + double interval() const { return interval_; } + + // This method is called for each sampling period with the current + // program counter. + virtual void Tick(TickSample* sample) = 0; + + // Immediately captures the calling thread's call stack and returns it. + virtual SyncProfile* GetBacktrace() = 0; + + // Request a save from a signal handler + virtual void RequestSave() = 0; + // Process any outstanding request outside a signal handler. + virtual void HandleSaveRequest() = 0; + // Delete markers which are no longer part of the profile due to buffer wraparound. + virtual void DeleteExpiredMarkers() = 0; + + // Start and stop sampler. + void Start(); + void Stop(); + + // Is the sampler used for profiling? + bool IsProfiling() const { return profiling_; } + + // Whether the sampler is running (that is, consumes resources). + bool IsActive() const { return active_; } + + // Low overhead way to stop the sampler from ticking + bool IsPaused() const { return paused_; } + void SetPaused(bool value) { NoBarrier_Store(&paused_, value); } + + virtual bool ProfileThreads() const = 0; + + int EntrySize() { return entrySize_; } + + // We can't new/delete the type safely without defining it + // (-Wdelete-incomplete). Use these Alloc/Free functions instead. + static PlatformData* AllocPlatformData(int aThreadId); + static void FreePlatformData(PlatformData*); + + // If we move the backtracing code into the platform files we won't + // need to have these hacks +#ifdef XP_WIN + // xxxehsan sucky hack :( + static uintptr_t GetThreadHandle(PlatformData*); +#endif +#ifdef XP_MACOSX + static pthread_t GetProfiledThread(PlatformData*); +#endif + + static std::vector<ThreadInfo*> GetRegisteredThreads() { + return *sRegisteredThreads; + } + + static bool RegisterCurrentThread(const char* aName, + PseudoStack* aPseudoStack, + bool aIsMainThread, void* stackTop); + static void UnregisterCurrentThread(); + + static void Startup(); + // Should only be called on shutdown + static void Shutdown(); + + static GeckoSampler* GetActiveSampler() { return sActiveSampler; } + static void SetActiveSampler(GeckoSampler* sampler) { sActiveSampler = sampler; } + + static mozilla::UniquePtr<Mutex> sRegisteredThreadsMutex; + + static bool CanNotifyObservers() { +#ifdef MOZ_WIDGET_GONK + // We use profile.sh on b2g to manually select threads and options per process. + return false; +#elif defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + // Android ANR reporter uses the profiler off the main thread + return NS_IsMainThread(); +#else + MOZ_ASSERT(NS_IsMainThread()); + return true; +#endif + } + + protected: + static std::vector<ThreadInfo*>* sRegisteredThreads; + static GeckoSampler* sActiveSampler; + + private: + void SetActive(bool value) { NoBarrier_Store(&active_, value); } + + const double interval_; + const bool profiling_; + Atomic32 paused_; + Atomic32 active_; + const int entrySize_; + + // Refactor me! +#if defined(SPS_OS_linux) || defined(SPS_OS_android) + bool signal_handler_installed_; + struct sigaction old_sigprof_signal_handler_; + struct sigaction old_sigsave_signal_handler_; + bool signal_sender_launched_; + pthread_t signal_sender_thread_; +#endif +}; + +#endif /* ndef TOOLS_PLATFORM_H_ */ diff --git a/tools/profiler/core/shared-libraries-linux.cc b/tools/profiler/core/shared-libraries-linux.cc new file mode 100644 index 000000000..24437fb4e --- /dev/null +++ b/tools/profiler/core/shared-libraries-linux.cc @@ -0,0 +1,159 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "shared-libraries.h" + +#define PATH_MAX_TOSTRING(x) #x +#define PATH_MAX_STRING(x) PATH_MAX_TOSTRING(x) +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <fstream> +#include "platform.h" +#include "shared-libraries.h" + +#include "common/linux/file_id.h" +#include <algorithm> + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +// Get the breakpad Id for the binary file pointed by bin_name +static std::string getId(const char *bin_name) +{ + using namespace google_breakpad; + using namespace std; + + PageAllocator allocator; + auto_wasteful_vector<uint8_t, sizeof(MDGUID)> identifier(&allocator); + + FileID file_id(bin_name); + if (file_id.ElfFileIdentifier(identifier)) { + return FileID::ConvertIdentifierToUUIDString(identifier) + "0"; + } + + return ""; +} + +#if !defined(MOZ_WIDGET_GONK) +// TODO fix me with proper include +#include "nsDebug.h" +#ifdef ANDROID +#include "ElfLoader.h" // dl_phdr_info +#else +#include <link.h> // dl_phdr_info +#endif +#include <features.h> +#include <dlfcn.h> +#include <sys/types.h> + +#ifdef ANDROID +extern "C" MOZ_EXPORT __attribute__((weak)) +int dl_iterate_phdr( + int (*callback) (struct dl_phdr_info *info, + size_t size, void *data), + void *data); +#endif + +static int +dl_iterate_callback(struct dl_phdr_info *dl_info, size_t size, void *data) +{ + SharedLibraryInfo& info = *reinterpret_cast<SharedLibraryInfo*>(data); + + if (dl_info->dlpi_phnum <= 0) + return 0; + + unsigned long libStart = -1; + unsigned long libEnd = 0; + + for (size_t i = 0; i < dl_info->dlpi_phnum; i++) { + if (dl_info->dlpi_phdr[i].p_type != PT_LOAD) { + continue; + } + unsigned long start = dl_info->dlpi_addr + dl_info->dlpi_phdr[i].p_vaddr; + unsigned long end = start + dl_info->dlpi_phdr[i].p_memsz; + if (start < libStart) + libStart = start; + if (end > libEnd) + libEnd = end; + } + const char *name = dl_info->dlpi_name; + SharedLibrary shlib(libStart, libEnd, 0, getId(name), name); + info.AddSharedLibrary(shlib); + + return 0; +} + +#endif // !MOZ_WIDGET_GONK + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() +{ + SharedLibraryInfo info; + +#if !defined(MOZ_WIDGET_GONK) +#ifdef ANDROID + if (!dl_iterate_phdr) { + // On ARM Android, dl_iterate_phdr is provided by the custom linker. + // So if libxul was loaded by the system linker (e.g. as part of + // xpcshell when running tests), it won't be available and we should + // not call it. + return info; + } +#endif // ANDROID + + dl_iterate_phdr(dl_iterate_callback, &info); +#endif // !MOZ_WIDGET_GONK + +#if defined(ANDROID) || defined(MOZ_WIDGET_GONK) + pid_t pid = getpid(); + char path[PATH_MAX]; + snprintf(path, PATH_MAX, "/proc/%d/maps", pid); + std::ifstream maps(path); + std::string line; + int count = 0; + while (std::getline(maps, line)) { + int ret; + //XXX: needs input sanitizing + unsigned long start; + unsigned long end; + char perm[6] = ""; + unsigned long offset; + char name[PATH_MAX] = ""; + ret = sscanf(line.c_str(), + "%lx-%lx %6s %lx %*s %*x %" PATH_MAX_STRING(PATH_MAX) "s\n", + &start, &end, perm, &offset, name); + if (!strchr(perm, 'x')) { + // Ignore non executable entries + continue; + } + if (ret != 5 && ret != 4) { + LOG("Get maps line failed"); + continue; + } +#if defined(ANDROID) && !defined(MOZ_WIDGET_GONK) + // Use proc/pid/maps to get the dalvik-jit section since it has + // no associated phdrs + if (strcmp(name, "/dev/ashmem/dalvik-jit-code-cache") != 0) + continue; +#else + if (strcmp(perm, "r-xp") != 0) { + // Ignore entries that are writable and/or shared. + // At least one graphics driver uses short-lived "rwxs" mappings + // (see bug 926734 comment 5), so just checking for 'x' isn't enough. + continue; + } +#endif + SharedLibrary shlib(start, end, offset, getId(name), name); + info.AddSharedLibrary(shlib); + if (count > 10000) { + LOG("Get maps failed"); + break; + } + count++; + } +#endif // ANDROID || MOZ_WIDGET_GONK + + return info; +} diff --git a/tools/profiler/core/shared-libraries-macos.cc b/tools/profiler/core/shared-libraries-macos.cc new file mode 100644 index 000000000..e218d2280 --- /dev/null +++ b/tools/profiler/core/shared-libraries-macos.cc @@ -0,0 +1,132 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <AvailabilityMacros.h> +#include <mach-o/loader.h> +#include <mach-o/dyld_images.h> +#include <mach/task_info.h> +#include <mach/task.h> +#include <mach/mach_init.h> +#include <mach/mach_traps.h> +#include <string.h> +#include <stdlib.h> +#include <vector> +#include <sstream> + +#include "shared-libraries.h" + +#ifndef MAC_OS_X_VERSION_10_6 +#define MAC_OS_X_VERSION_10_6 1060 +#endif + +#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6 +// borrowed from Breakpad +// Fallback declarations for TASK_DYLD_INFO and friends, introduced in +// <mach/task_info.h> in the Mac OS X 10.6 SDK. +#define TASK_DYLD_INFO 17 +struct task_dyld_info { + mach_vm_address_t all_image_info_addr; + mach_vm_size_t all_image_info_size; + }; +typedef struct task_dyld_info task_dyld_info_data_t; +typedef struct task_dyld_info *task_dyld_info_t; +#define TASK_DYLD_INFO_COUNT (sizeof(task_dyld_info_data_t) / sizeof(natural_t)) + +#endif + +// Architecture specific abstraction. +#ifdef __i386__ +typedef mach_header platform_mach_header; +typedef segment_command mach_segment_command_type; +#define MACHO_MAGIC_NUMBER MH_MAGIC +#define CMD_SEGMENT LC_SEGMENT +#define seg_size uint32_t +#else +typedef mach_header_64 platform_mach_header; +typedef segment_command_64 mach_segment_command_type; +#define MACHO_MAGIC_NUMBER MH_MAGIC_64 +#define CMD_SEGMENT LC_SEGMENT_64 +#define seg_size uint64_t +#endif + +static +void addSharedLibrary(const platform_mach_header* header, char *name, SharedLibraryInfo &info) { + const struct load_command *cmd = + reinterpret_cast<const struct load_command *>(header + 1); + + seg_size size = 0; + unsigned long long start = reinterpret_cast<unsigned long long>(header); + // Find the cmd segment in the macho image. It will contain the offset we care about. + const uint8_t *uuid_bytes = nullptr; + for (unsigned int i = 0; + cmd && (i < header->ncmds) && (uuid_bytes == nullptr || size == 0); + ++i) { + if (cmd->cmd == CMD_SEGMENT) { + const mach_segment_command_type *seg = + reinterpret_cast<const mach_segment_command_type *>(cmd); + + if (!strcmp(seg->segname, "__TEXT")) { + size = seg->vmsize; + } + } else if (cmd->cmd == LC_UUID) { + const uuid_command *ucmd = reinterpret_cast<const uuid_command *>(cmd); + uuid_bytes = ucmd->uuid; + } + + cmd = reinterpret_cast<const struct load_command *> + (reinterpret_cast<const char *>(cmd) + cmd->cmdsize); + } + + std::stringstream uuid; + uuid << std::hex << std::uppercase; + if (uuid_bytes != nullptr) { + for (int i = 0; i < 16; ++i) { + uuid << ((uuid_bytes[i] & 0xf0) >> 4); + uuid << (uuid_bytes[i] & 0xf); + } + uuid << '0'; + } + + info.AddSharedLibrary(SharedLibrary(start, start + size, 0, uuid.str(), + name)); +} + +// Use dyld to inspect the macho image information. We can build the SharedLibraryEntry structure +// giving us roughtly the same info as /proc/PID/maps in Linux. +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() +{ + SharedLibraryInfo sharedLibraryInfo; + + task_dyld_info_data_t task_dyld_info; + mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT; + if (task_info(mach_task_self (), TASK_DYLD_INFO, (task_info_t)&task_dyld_info, + &count) != KERN_SUCCESS) { + return sharedLibraryInfo; + } + + struct dyld_all_image_infos* aii = (struct dyld_all_image_infos*)task_dyld_info.all_image_info_addr; + size_t infoCount = aii->infoArrayCount; + + // Iterate through all dyld images (loaded libraries) to get their names + // and offests. + for (size_t i = 0; i < infoCount; ++i) { + const dyld_image_info *info = &aii->infoArray[i]; + + // If the magic number doesn't match then go no further + // since we're not pointing to where we think we are. + if (info->imageLoadAddress->magic != MACHO_MAGIC_NUMBER) { + continue; + } + + const platform_mach_header* header = + reinterpret_cast<const platform_mach_header*>(info->imageLoadAddress); + + // Add the entry for this image. + addSharedLibrary(header, (char*)info->imageFilePath, sharedLibraryInfo); + + } + return sharedLibraryInfo; +} + diff --git a/tools/profiler/core/shared-libraries-win32.cc b/tools/profiler/core/shared-libraries-win32.cc new file mode 100644 index 000000000..e2db2579b --- /dev/null +++ b/tools/profiler/core/shared-libraries-win32.cc @@ -0,0 +1,137 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <windows.h> +#include <tlhelp32.h> +#include <dbghelp.h> +#include <sstream> + +#include "shared-libraries.h" +#include "nsWindowsHelpers.h" + +#define CV_SIGNATURE 0x53445352 // 'SDSR' + +struct CodeViewRecord70 +{ + uint32_t signature; + GUID pdbSignature; + uint32_t pdbAge; + char pdbFileName[1]; +}; + +static bool GetPdbInfo(uintptr_t aStart, nsID& aSignature, uint32_t& aAge, char** aPdbName) +{ + if (!aStart) { + return false; + } + + PIMAGE_DOS_HEADER dosHeader = reinterpret_cast<PIMAGE_DOS_HEADER>(aStart); + if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE) { + return false; + } + + PIMAGE_NT_HEADERS ntHeaders = reinterpret_cast<PIMAGE_NT_HEADERS>( + aStart + dosHeader->e_lfanew); + if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) { + return false; + } + + uint32_t relativeVirtualAddress = + ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG].VirtualAddress; + if (!relativeVirtualAddress) { + return false; + } + + PIMAGE_DEBUG_DIRECTORY debugDirectory = + reinterpret_cast<PIMAGE_DEBUG_DIRECTORY>(aStart + relativeVirtualAddress); + if (!debugDirectory || debugDirectory->Type != IMAGE_DEBUG_TYPE_CODEVIEW) { + return false; + } + + CodeViewRecord70 *debugInfo = reinterpret_cast<CodeViewRecord70 *>( + aStart + debugDirectory->AddressOfRawData); + if (!debugInfo || debugInfo->signature != CV_SIGNATURE) { + return false; + } + + aAge = debugInfo->pdbAge; + GUID& pdbSignature = debugInfo->pdbSignature; + aSignature.m0 = pdbSignature.Data1; + aSignature.m1 = pdbSignature.Data2; + aSignature.m2 = pdbSignature.Data3; + memcpy(aSignature.m3, pdbSignature.Data4, sizeof(pdbSignature.Data4)); + + // The PDB file name could be different from module filename, so report both + // e.g. The PDB for C:\Windows\SysWOW64\ntdll.dll is wntdll.pdb + char * leafName = strrchr(debugInfo->pdbFileName, '\\'); + if (leafName) { + // Only report the file portion of the path + *aPdbName = leafName + 1; + } else { + *aPdbName = debugInfo->pdbFileName; + } + + return true; +} + +static bool IsDashOrBraces(char c) +{ + return c == '-' || c == '{' || c == '}'; +} + +SharedLibraryInfo SharedLibraryInfo::GetInfoForSelf() +{ + SharedLibraryInfo sharedLibraryInfo; + + nsAutoHandle snap(CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId())); + + MODULEENTRY32 module = {0}; + module.dwSize = sizeof(MODULEENTRY32); + if (Module32First(snap, &module)) { + do { + nsID pdbSig; + uint32_t pdbAge; + char *pdbName = NULL; + + // Load the module again to make sure that its handle will remain remain + // valid as we attempt to read the PDB information from it. We load the + // DLL as a datafile so that if the module actually gets unloaded between + // the call to Module32Next and the following LoadLibraryEx, we don't end + // up running the now newly loaded module's DllMain function. If the + // module is already loaded, LoadLibraryEx just increments its refcount. + // + // Note that because of the race condition above, merely loading the DLL + // again is not safe enough, therefore we also need to make sure that we + // can read the memory mapped at the base address before we can safely + // proceed to actually access those pages. + HMODULE handleLock = LoadLibraryEx(module.szExePath, NULL, LOAD_LIBRARY_AS_DATAFILE); + MEMORY_BASIC_INFORMATION vmemInfo = {0}; + if (handleLock && + sizeof(vmemInfo) == VirtualQuery(module.modBaseAddr, &vmemInfo, sizeof(vmemInfo)) && + vmemInfo.State == MEM_COMMIT && + GetPdbInfo((uintptr_t)module.modBaseAddr, pdbSig, pdbAge, &pdbName)) { + std::ostringstream stream; + stream << pdbSig.ToString() << std::hex << pdbAge; + std::string breakpadId = stream.str(); + std::string::iterator end = + std::remove_if(breakpadId.begin(), breakpadId.end(), IsDashOrBraces); + breakpadId.erase(end, breakpadId.end()); + std::transform(breakpadId.begin(), breakpadId.end(), + breakpadId.begin(), toupper); + + SharedLibrary shlib((uintptr_t)module.modBaseAddr, + (uintptr_t)module.modBaseAddr+module.modBaseSize, + 0, // DLLs are always mapped at offset 0 on Windows + breakpadId, + pdbName); + sharedLibraryInfo.AddSharedLibrary(shlib); + } + FreeLibrary(handleLock); // ok to free null handles + } while (Module32Next(snap, &module)); + } + + return sharedLibraryInfo; +} + diff --git a/tools/profiler/core/v8-support.h b/tools/profiler/core/v8-support.h new file mode 100644 index 000000000..391069dcc --- /dev/null +++ b/tools/profiler/core/v8-support.h @@ -0,0 +1,48 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This contains stubs and infrastructure to support code from v8 */ + +#ifndef V8_SUPPORT_H_ +#define V8_SUPPORT_H_ + +#if defined(_M_X64) || defined(__x86_64__) +#define V8_HOST_ARCH_X64 1 +#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) +#define V8_HOST_ARCH_IA32 1 +#elif defined(__ARMEL__) +#define V8_HOST_ARCH_ARM 1 +#else +#warning Please add support for your architecture in chromium_types.h +#endif + +typedef int32_t Atomic32; + +#if defined(V8_HOST_ARCH_X64) || defined(V8_HOST_ARCH_IA32) || defined(V8_HOST_ARCH_ARM) +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} +#endif + + +const int kMaxInt = 0x7FFFFFFF; +const int kMinInt = -kMaxInt - 1; + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + + +// The USE(x) template is used to silence C++ compiler warnings +// issued for (yet) unused variables (typically parameters). +template <typename T> +static inline void USE(T) { } + +class Malloced { +}; + +#endif // V8_SUPPORT_H_ diff --git a/tools/profiler/gecko/ProfileGatherer.cpp b/tools/profiler/gecko/ProfileGatherer.cpp new file mode 100644 index 000000000..5cd45bee3 --- /dev/null +++ b/tools/profiler/gecko/ProfileGatherer.cpp @@ -0,0 +1,207 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ProfileGatherer.h" +#include "mozilla/Services.h" +#include "nsIObserverService.h" +#include "GeckoSampler.h" + +using mozilla::dom::AutoJSAPI; +using mozilla::dom::Promise; + +namespace mozilla { + +/** + * When a subprocess exits before we've gathered profiles, we'll + * store profiles for those processes until gathering starts. We'll + * only store up to MAX_SUBPROCESS_EXIT_PROFILES. The buffer is + * circular, so as soon as we receive another exit profile, we'll + * bump the oldest one out of the buffer. + */ +static const uint32_t MAX_SUBPROCESS_EXIT_PROFILES = 5; + +NS_IMPL_ISUPPORTS(ProfileGatherer, nsIObserver) + +ProfileGatherer::ProfileGatherer(GeckoSampler* aTicker) + : mTicker(aTicker) + , mSinceTime(0) + , mPendingProfiles(0) + , mGathering(false) +{ +} + +void +ProfileGatherer::GatheredOOPProfile() +{ + MOZ_ASSERT(NS_IsMainThread()); + if (!mGathering) { + // If we're not actively gathering, then we don't actually + // care that we gathered a profile here. This can happen for + // processes that exit while profiling. + return; + } + + if (NS_WARN_IF(!mPromise)) { + // If we're not holding on to a Promise, then someone is + // calling us erroneously. + return; + } + + mPendingProfiles--; + + if (mPendingProfiles == 0) { + // We've got all of the async profiles now. Let's + // finish off the profile and resolve the Promise. + Finish(); + } +} + +void +ProfileGatherer::WillGatherOOPProfile() +{ + mPendingProfiles++; +} + +void +ProfileGatherer::Start(double aSinceTime, + Promise* aPromise) +{ + MOZ_ASSERT(NS_IsMainThread()); + if (mGathering) { + // If we're already gathering, reject the promise - this isn't going + // to end well. + if (aPromise) { + aPromise->MaybeReject(NS_ERROR_NOT_AVAILABLE); + } + return; + } + + mSinceTime = aSinceTime; + mPromise = aPromise; + mGathering = true; + mPendingProfiles = 0; + + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) { + DebugOnly<nsresult> rv = + os->AddObserver(this, "profiler-subprocess", false); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "AddObserver failed"); + rv = os->NotifyObservers(this, "profiler-subprocess-gather", nullptr); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "NotifyObservers failed"); + } + + if (!mPendingProfiles) { + Finish(); + } +} + +void +ProfileGatherer::Finish() +{ + MOZ_ASSERT(NS_IsMainThread()); + + if (!mTicker) { + // We somehow got called after we were cancelled! This shouldn't + // be possible, but doing a belt-and-suspenders check to be sure. + return; + } + + UniquePtr<char[]> buf = mTicker->ToJSON(mSinceTime); + + nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService(); + if (os) { + DebugOnly<nsresult> rv = os->RemoveObserver(this, "profiler-subprocess"); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "RemoveObserver failed"); + } + + AutoJSAPI jsapi; + if (NS_WARN_IF(!jsapi.Init(mPromise->GlobalJSObject()))) { + // We're really hosed if we can't get a JS context for some reason. + Reset(); + return; + } + + JSContext* cx = jsapi.cx(); + + // Now parse the JSON so that we resolve with a JS Object. + JS::RootedValue val(cx); + { + NS_ConvertUTF8toUTF16 js_string(nsDependentCString(buf.get())); + if (!JS_ParseJSON(cx, static_cast<const char16_t*>(js_string.get()), + js_string.Length(), &val)) { + if (!jsapi.HasException()) { + mPromise->MaybeReject(NS_ERROR_DOM_UNKNOWN_ERR); + } else { + JS::RootedValue exn(cx); + DebugOnly<bool> gotException = jsapi.StealException(&exn); + MOZ_ASSERT(gotException); + + jsapi.ClearException(); + mPromise->MaybeReject(cx, exn); + } + } else { + mPromise->MaybeResolve(val); + } + } + + Reset(); +} + +void +ProfileGatherer::Reset() +{ + mSinceTime = 0; + mPromise = nullptr; + mPendingProfiles = 0; + mGathering = false; +} + +void +ProfileGatherer::Cancel() +{ + // The GeckoSampler is going away. If we have a Promise in flight, we + // should reject it. + if (mPromise) { + mPromise->MaybeReject(NS_ERROR_DOM_ABORT_ERR); + } + + // Clear out the GeckoSampler reference, since it's being destroyed. + mTicker = nullptr; +} + +void +ProfileGatherer::OOPExitProfile(const nsCString& aProfile) +{ + if (mExitProfiles.Length() >= MAX_SUBPROCESS_EXIT_PROFILES) { + mExitProfiles.RemoveElementAt(0); + } + mExitProfiles.AppendElement(aProfile); + + // If a process exited while gathering, we need to make + // sure we decrement the counter. + if (mGathering) { + GatheredOOPProfile(); + } +} + +NS_IMETHODIMP +ProfileGatherer::Observe(nsISupports* aSubject, + const char* aTopic, + const char16_t *someData) +{ + if (!strcmp(aTopic, "profiler-subprocess")) { + nsCOMPtr<nsIProfileSaveEvent> pse = do_QueryInterface(aSubject); + if (pse) { + for (size_t i = 0; i < mExitProfiles.Length(); ++i) { + if (!mExitProfiles[i].IsEmpty()) { + pse->AddSubProfile(mExitProfiles[i].get()); + } + } + mExitProfiles.Clear(); + } + } + return NS_OK; +} + +} // namespace mozilla diff --git a/tools/profiler/gecko/Profiler.jsm b/tools/profiler/gecko/Profiler.jsm new file mode 100644 index 000000000..c61218875 --- /dev/null +++ b/tools/profiler/gecko/Profiler.jsm @@ -0,0 +1,16 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +const Cc = Components.classes; +const Ci = Components.interfaces; +const Cr = Components.results; + +this.EXPORTED_SYMBOLS = ["Profiler"]; + +this.Profiler = { + +}; + diff --git a/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp b/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp new file mode 100644 index 000000000..07801535d --- /dev/null +++ b/tools/profiler/gecko/ProfilerIOInterposeObserver.cpp @@ -0,0 +1,30 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "GeckoProfiler.h" +#include "ProfilerIOInterposeObserver.h" +#include "ProfilerMarkers.h" + +using namespace mozilla; + +void ProfilerIOInterposeObserver::Observe(Observation& aObservation) +{ + if (!IsMainThread()) { + return; + } + + ProfilerBacktrace* stack = profiler_get_backtrace(); + + nsCString filename; + if (aObservation.Filename()) { + filename = NS_ConvertUTF16toUTF8(aObservation.Filename()); + } + + IOMarkerPayload* markerPayload = new IOMarkerPayload(aObservation.Reference(), + filename.get(), + aObservation.Start(), + aObservation.End(), + stack); + PROFILER_MARKER_PAYLOAD(aObservation.ObservedOperationString(), markerPayload); +} diff --git a/tools/profiler/gecko/ProfilerIOInterposeObserver.h b/tools/profiler/gecko/ProfilerIOInterposeObserver.h new file mode 100644 index 000000000..8661b197e --- /dev/null +++ b/tools/profiler/gecko/ProfilerIOInterposeObserver.h @@ -0,0 +1,28 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILERIOINTERPOSEOBSERVER_H +#define PROFILERIOINTERPOSEOBSERVER_H + +#ifdef MOZ_ENABLE_PROFILER_SPS + +#include "mozilla/IOInterposer.h" + +namespace mozilla { + +/** + * This class is the observer that calls into the profiler whenever + * main thread I/O occurs. + */ +class ProfilerIOInterposeObserver final : public IOInterposeObserver +{ +public: + virtual void Observe(Observation& aObservation); +}; + +} // namespace mozilla + +#endif // MOZ_ENABLE_PROFILER_SPS + +#endif // PROFILERIOINTERPOSEOBSERVER_H diff --git a/tools/profiler/gecko/ProfilerTypes.ipdlh b/tools/profiler/gecko/ProfilerTypes.ipdlh new file mode 100644 index 000000000..1ef670b03 --- /dev/null +++ b/tools/profiler/gecko/ProfilerTypes.ipdlh @@ -0,0 +1,16 @@ +/* -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil; tab-width: 8 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +namespace mozilla { + +struct ProfilerInitParams { + bool enabled; + uint32_t entries; + double interval; + nsCString[] threadFilters; + nsCString[] features; +}; + +} // namespace mozilla
\ No newline at end of file diff --git a/tools/profiler/gecko/SaveProfileTask.cpp b/tools/profiler/gecko/SaveProfileTask.cpp new file mode 100644 index 000000000..497385355 --- /dev/null +++ b/tools/profiler/gecko/SaveProfileTask.cpp @@ -0,0 +1,45 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "SaveProfileTask.h" +#include "GeckoProfiler.h" + +nsresult +SaveProfileTask::Run() { + // Get file path +#if defined(SPS_PLAT_arm_android) && !defined(MOZ_WIDGET_GONK) + nsCString tmpPath; + tmpPath.AppendPrintf("/sdcard/profile_%i_%i.txt", XRE_GetProcessType(), getpid()); +#else + nsCOMPtr<nsIFile> tmpFile; + nsAutoCString tmpPath; + if (NS_FAILED(NS_GetSpecialDirectory(NS_OS_TEMP_DIR, getter_AddRefs(tmpFile)))) { + LOG("Failed to find temporary directory."); + return NS_ERROR_FAILURE; + } + tmpPath.AppendPrintf("profile_%i_%i.txt", XRE_GetProcessType(), getpid()); + + nsresult rv = tmpFile->AppendNative(tmpPath); + if (NS_FAILED(rv)) + return rv; + + rv = tmpFile->GetNativePath(tmpPath); + if (NS_FAILED(rv)) + return rv; +#endif + + profiler_save_profile_to_file(tmpPath.get()); + + return NS_OK; +} + +NS_IMPL_ISUPPORTS(ProfileSaveEvent, nsIProfileSaveEvent) + +nsresult +ProfileSaveEvent::AddSubProfile(const char* aProfile) { + mFunc(aProfile, mClosure); + return NS_OK; +} + diff --git a/tools/profiler/gecko/SaveProfileTask.h b/tools/profiler/gecko/SaveProfileTask.h new file mode 100644 index 000000000..4a215bba0 --- /dev/null +++ b/tools/profiler/gecko/SaveProfileTask.h @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILER_SAVETASK_H_ +#define PROFILER_SAVETASK_H_ + +#include "platform.h" +#include "nsThreadUtils.h" +#include "nsIXULRuntime.h" +#include "nsDirectoryServiceUtils.h" +#include "nsDirectoryServiceDefs.h" +#include "nsXULAppAPI.h" +#include "nsIProfileSaveEvent.h" + +#ifdef XP_WIN + #include <windows.h> + #define getpid GetCurrentProcessId +#else + #include <unistd.h> +#endif + +/** + * This is an event used to save the profile on the main thread + * to be sure that it is not being modified while saving. + */ +class SaveProfileTask : public mozilla::Runnable { +public: + SaveProfileTask() {} + + NS_IMETHOD Run(); +}; + +class ProfileSaveEvent final : public nsIProfileSaveEvent { +public: + typedef void (*AddSubProfileFunc)(const char* aProfile, void* aClosure); + NS_DECL_ISUPPORTS + + ProfileSaveEvent(AddSubProfileFunc aFunc, void* aClosure) + : mFunc(aFunc) + , mClosure(aClosure) + {} + + NS_IMETHOD AddSubProfile(const char* aProfile) override; +private: + ~ProfileSaveEvent() {} + + AddSubProfileFunc mFunc; + void* mClosure; +}; + +#endif + diff --git a/tools/profiler/gecko/ThreadResponsiveness.cpp b/tools/profiler/gecko/ThreadResponsiveness.cpp new file mode 100644 index 000000000..0057251e2 --- /dev/null +++ b/tools/profiler/gecko/ThreadResponsiveness.cpp @@ -0,0 +1,118 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ThreadResponsiveness.h" +#include "platform.h" +#include "nsComponentManagerUtils.h" +#include "nsThreadUtils.h" +#include "nsITimer.h" +#include "mozilla/Monitor.h" +#include "ProfileEntry.h" +#include "ThreadProfile.h" + +using mozilla::Monitor; +using mozilla::MonitorAutoLock; +using mozilla::TimeStamp; + +class CheckResponsivenessTask : public mozilla::Runnable, + public nsITimerCallback { +public: + CheckResponsivenessTask() + : mLastTracerTime(TimeStamp::Now()) + , mMonitor("CheckResponsivenessTask") + , mTimer(nullptr) + , mStop(false) + { + MOZ_COUNT_CTOR(CheckResponsivenessTask); + } + +protected: + ~CheckResponsivenessTask() + { + MOZ_COUNT_DTOR(CheckResponsivenessTask); + } + +public: + NS_IMETHOD Run() override + { + MonitorAutoLock mon(mMonitor); + if (mStop) + return NS_OK; + + // This is raced on because we might pause the thread here + // for profiling so if we tried to use a monitor to protect + // mLastTracerTime we could deadlock. We're risking seeing + // a partial write which will show up as an outlier in our + // performance data. + mLastTracerTime = TimeStamp::Now(); + if (!mTimer) { + mTimer = do_CreateInstance("@mozilla.org/timer;1"); + } + mTimer->InitWithCallback(this, 16, nsITimer::TYPE_ONE_SHOT); + + return NS_OK; + } + + NS_IMETHOD Notify(nsITimer* aTimer) final + { + NS_DispatchToMainThread(this); + return NS_OK; + } + + void Terminate() { + MonitorAutoLock mon(mMonitor); + mStop = true; + } + + const TimeStamp& GetLastTracerTime() const { + return mLastTracerTime; + } + + NS_DECL_ISUPPORTS_INHERITED + +private: + TimeStamp mLastTracerTime; + Monitor mMonitor; + nsCOMPtr<nsITimer> mTimer; + bool mStop; +}; + +NS_IMPL_ISUPPORTS_INHERITED(CheckResponsivenessTask, mozilla::Runnable, + nsITimerCallback) + +ThreadResponsiveness::ThreadResponsiveness(ThreadProfile *aThreadProfile) + : mThreadProfile(aThreadProfile) + , mActiveTracerEvent(nullptr) +{ + MOZ_COUNT_CTOR(ThreadResponsiveness); +} + +ThreadResponsiveness::~ThreadResponsiveness() +{ + MOZ_COUNT_DTOR(ThreadResponsiveness); + if (mActiveTracerEvent) { + mActiveTracerEvent->Terminate(); + } +} + +void +ThreadResponsiveness::Update() +{ + if (!mActiveTracerEvent) { + if (mThreadProfile->GetThreadInfo()->IsMainThread()) { + mActiveTracerEvent = new CheckResponsivenessTask(); + NS_DispatchToMainThread(mActiveTracerEvent); + } else if (mThreadProfile->GetThreadInfo()->GetThread()) { + mActiveTracerEvent = new CheckResponsivenessTask(); + mThreadProfile->GetThreadInfo()-> + GetThread()->Dispatch(mActiveTracerEvent, NS_DISPATCH_NORMAL); + } + } + + if (mActiveTracerEvent) { + mLastTracerTime = mActiveTracerEvent->GetLastTracerTime(); + } +} + diff --git a/tools/profiler/gecko/ThreadResponsiveness.h b/tools/profiler/gecko/ThreadResponsiveness.h new file mode 100644 index 000000000..5454c3c05 --- /dev/null +++ b/tools/profiler/gecko/ThreadResponsiveness.h @@ -0,0 +1,38 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef ThreadResponsiveness_h +#define ThreadResponsiveness_h + +#include "nsISupports.h" +#include "mozilla/RefPtr.h" +#include "mozilla/TimeStamp.h" + +class ThreadProfile; +class CheckResponsivenessTask; + +class ThreadResponsiveness { +public: + explicit ThreadResponsiveness(ThreadProfile *aThreadProfile); + + ~ThreadResponsiveness(); + + void Update(); + + mozilla::TimeDuration GetUnresponsiveDuration(const mozilla::TimeStamp& now) const { + return now - mLastTracerTime; + } + + bool HasData() const { + return !mLastTracerTime.IsNull(); + } +private: + ThreadProfile* mThreadProfile; + RefPtr<CheckResponsivenessTask> mActiveTracerEvent; + mozilla::TimeStamp mLastTracerTime; +}; + +#endif + diff --git a/tools/profiler/gecko/nsIProfileSaveEvent.idl b/tools/profiler/gecko/nsIProfileSaveEvent.idl new file mode 100644 index 000000000..c2c4bed02 --- /dev/null +++ b/tools/profiler/gecko/nsIProfileSaveEvent.idl @@ -0,0 +1,19 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +[uuid(f5ad0830-e178-41f9-b253-db9b4fae4cb3)] +interface nsIProfileSaveEvent : nsISupports +{ + /** + * Call this method when observing this event to include + * a sub profile origining from an external source such + * as a non native thread or another process. + */ + void AddSubProfile(in string aMarker); +}; + + diff --git a/tools/profiler/gecko/nsIProfiler.idl b/tools/profiler/gecko/nsIProfiler.idl new file mode 100644 index 000000000..f9b118650 --- /dev/null +++ b/tools/profiler/gecko/nsIProfiler.idl @@ -0,0 +1,101 @@ +/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +%{C++ +#include "nsTArrayForwardDeclare.h" +class nsCString; +%} + +[ref] native StringArrayRef(const nsTArray<nsCString>); + +/** + * Start-up parameters for subprocesses are passed through nsIObserverService, + * which, unfortunately, means we need to implement nsISupports in order to + * go through it. + */ +[uuid(0a175ba7-8fcf-4ce9-9c4b-ccc6272f4425)] +interface nsIProfilerStartParams : nsISupports +{ + attribute uint32_t entries; + attribute double interval; + + [noscript, notxpcom, nostdcall] StringArrayRef getFeatures(); + [noscript, notxpcom, nostdcall] StringArrayRef getThreadFilterNames(); +}; + +[scriptable, uuid(ead3f75c-0e0e-4fbb-901c-1e5392ef5b2a)] +interface nsIProfiler : nsISupports +{ + boolean CanProfile(); + void StartProfiler(in uint32_t aEntries, in double aInterval, + [array, size_is(aFeatureCount)] in string aFeatures, + in uint32_t aFeatureCount, + [array, size_is(aFilterCount), optional] in string aThreadNameFilters, + [optional] in uint32_t aFilterCount); + void StopProfiler(); + boolean IsPaused(); + void PauseSampling(); + void ResumeSampling(); + void AddMarker(in string aMarker); + /* + * Returns the JSON string of the profile. If aSinceTime is passed, only + * report samples taken at >= aSinceTime. + */ + string GetProfile([optional] in double aSinceTime); + + /* + * Returns a JS object of the profile. If aSinceTime is passed, only report + * samples taken at >= aSinceTime. + */ + [implicit_jscontext] + jsval getProfileData([optional] in double aSinceTime); + + [implicit_jscontext] + nsISupports getProfileDataAsync([optional] in double aSinceTime); + + boolean IsActive(); + void GetFeatures(out uint32_t aCount, [retval, array, size_is(aCount)] out string aFeatures); + + /** + * The starting parameters that were sent to the profiler for sampling. + * If the profiler is not currently sampling, this will return null. + */ + readonly attribute nsIProfilerStartParams startParams; + + /** + * The profileGatherer will be null if the profiler is not currently + * active. + */ + readonly attribute nsISupports profileGatherer; + + void GetBufferInfo(out uint32_t aCurrentPosition, out uint32_t aTotalSize, + out uint32_t aGeneration); + + /** + * Returns the elapsed time, in milliseconds, since the profiler's epoch. + * The epoch is guaranteed to be constant for the duration of the + * process, but is otherwise arbitrary. + */ + double getElapsedTime(); + + /** + * Returns a JSON string of an array of shared library objects. + * Every object has three properties: start, end, and name. + * start and end are integers describing the address range that the library + * occupies in memory. name is the path of the library as a string. + * + * On Windows profiling builds, the shared library objects will have + * additional pdbSignature and pdbAge properties for uniquely identifying + * shared library versions for stack symbolication. + */ + AString getSharedLibraryInformation(); + + /** + * Dump the collected profile to a file. + */ + void dumpProfileToFile(in string aFilename); +}; diff --git a/tools/profiler/gecko/nsProfiler.cpp b/tools/profiler/gecko/nsProfiler.cpp new file mode 100644 index 000000000..c38447381 --- /dev/null +++ b/tools/profiler/gecko/nsProfiler.cpp @@ -0,0 +1,308 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <string> +#include <sstream> +#include "GeckoProfiler.h" +#include "nsProfiler.h" +#include "nsProfilerStartParams.h" +#include "nsMemory.h" +#include "nsString.h" +#include "mozilla/Services.h" +#include "nsIObserverService.h" +#include "nsIInterfaceRequestor.h" +#include "nsILoadContext.h" +#include "nsIWebNavigation.h" +#include "nsIInterfaceRequestorUtils.h" +#include "shared-libraries.h" +#include "js/Value.h" +#include "mozilla/ErrorResult.h" +#include "mozilla/dom/Promise.h" + +using mozilla::ErrorResult; +using mozilla::dom::Promise; +using std::string; + +NS_IMPL_ISUPPORTS(nsProfiler, nsIProfiler) + +nsProfiler::nsProfiler() + : mLockedForPrivateBrowsing(false) +{ +} + +nsProfiler::~nsProfiler() +{ + nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService(); + if (observerService) { + observerService->RemoveObserver(this, "chrome-document-global-created"); + observerService->RemoveObserver(this, "last-pb-context-exited"); + } +} + +nsresult +nsProfiler::Init() { + nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService(); + if (observerService) { + observerService->AddObserver(this, "chrome-document-global-created", false); + observerService->AddObserver(this, "last-pb-context-exited", false); + } + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::Observe(nsISupports *aSubject, + const char *aTopic, + const char16_t *aData) +{ + if (strcmp(aTopic, "chrome-document-global-created") == 0) { + nsCOMPtr<nsIInterfaceRequestor> requestor = do_QueryInterface(aSubject); + nsCOMPtr<nsIWebNavigation> parentWebNav = do_GetInterface(requestor); + nsCOMPtr<nsILoadContext> loadContext = do_QueryInterface(parentWebNav); + if (loadContext && loadContext->UsePrivateBrowsing() && !mLockedForPrivateBrowsing) { + mLockedForPrivateBrowsing = true; + profiler_lock(); + } + } else if (strcmp(aTopic, "last-pb-context-exited") == 0) { + mLockedForPrivateBrowsing = false; + profiler_unlock(); + } + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::CanProfile(bool *aCanProfile) +{ + *aCanProfile = !mLockedForPrivateBrowsing; + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::StartProfiler(uint32_t aEntries, double aInterval, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount) +{ + if (mLockedForPrivateBrowsing) { + return NS_ERROR_NOT_AVAILABLE; + } + + profiler_start(aEntries, aInterval, + aFeatures, aFeatureCount, + aThreadNameFilters, aFilterCount); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::StopProfiler() +{ + profiler_stop(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::IsPaused(bool *aIsPaused) +{ + *aIsPaused = profiler_is_paused(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::PauseSampling() +{ + profiler_pause(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::ResumeSampling() +{ + profiler_resume(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::AddMarker(const char *aMarker) +{ + PROFILER_MARKER(aMarker); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfile(double aSinceTime, char** aProfile) +{ + mozilla::UniquePtr<char[]> profile = profiler_get_profile(aSinceTime); + if (profile) { + size_t len = strlen(profile.get()); + char *profileStr = static_cast<char *> + (nsMemory::Clone(profile.get(), (len + 1) * sizeof(char))); + profileStr[len] = '\0'; + *aProfile = profileStr; + } + return NS_OK; +} + +std::string GetSharedLibraryInfoStringInternal(); + +std::string +GetSharedLibraryInfoString() +{ + return GetSharedLibraryInfoStringInternal(); +} + +NS_IMETHODIMP +nsProfiler::GetSharedLibraryInformation(nsAString& aOutString) +{ + aOutString.Assign(NS_ConvertUTF8toUTF16(GetSharedLibraryInfoString().c_str())); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::DumpProfileToFile(const char* aFilename) +{ + profiler_save_profile_to_file(aFilename); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfileData(double aSinceTime, JSContext* aCx, + JS::MutableHandle<JS::Value> aResult) +{ + JS::RootedObject obj(aCx, profiler_get_profile_jsobject(aCx, aSinceTime)); + if (!obj) { + return NS_ERROR_FAILURE; + } + aResult.setObject(*obj); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfileDataAsync(double aSinceTime, JSContext* aCx, + nsISupports** aPromise) +{ + MOZ_ASSERT(NS_IsMainThread()); + + if (NS_WARN_IF(!aCx)) { + return NS_ERROR_FAILURE; + } + + nsIGlobalObject* go = xpc::NativeGlobal(JS::CurrentGlobalOrNull(aCx)); + + if (NS_WARN_IF(!go)) { + return NS_ERROR_FAILURE; + } + + ErrorResult result; + RefPtr<Promise> promise = Promise::Create(go, result); + if (NS_WARN_IF(result.Failed())) { + return result.StealNSResult(); + } + + profiler_get_profile_jsobject_async(aSinceTime, promise); + + promise.forget(aPromise); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetElapsedTime(double* aElapsedTime) +{ + *aElapsedTime = profiler_time(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::IsActive(bool *aIsActive) +{ + *aIsActive = profiler_is_active(); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetFeatures(uint32_t *aCount, char ***aFeatures) +{ + uint32_t len = 0; + + const char **features = profiler_get_features(); + if (!features) { + *aCount = 0; + *aFeatures = nullptr; + return NS_OK; + } + + while (features[len]) { + len++; + } + + char **featureList = static_cast<char **> + (moz_xmalloc(len * sizeof(char*))); + + for (size_t i = 0; i < len; i++) { + size_t strLen = strlen(features[i]); + featureList[i] = static_cast<char *> + (nsMemory::Clone(features[i], (strLen + 1) * sizeof(char))); + } + + *aFeatures = featureList; + *aCount = len; + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetStartParams(nsIProfilerStartParams** aRetVal) +{ + if (!profiler_is_active()) { + *aRetVal = nullptr; + } else { + int entrySize = 0; + double interval = 0; + mozilla::Vector<const char*> filters; + mozilla::Vector<const char*> features; + profiler_get_start_params(&entrySize, &interval, &filters, &features); + + nsTArray<nsCString> filtersArray; + for (uint32_t i = 0; i < filters.length(); ++i) { + filtersArray.AppendElement(filters[i]); + } + + nsTArray<nsCString> featuresArray; + for (size_t i = 0; i < features.length(); ++i) { + featuresArray.AppendElement(features[i]); + } + + nsCOMPtr<nsIProfilerStartParams> startParams = + new nsProfilerStartParams(entrySize, interval, featuresArray, + filtersArray); + + startParams.forget(aRetVal); + } + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetBufferInfo(uint32_t *aCurrentPosition, uint32_t *aTotalSize, uint32_t *aGeneration) +{ + MOZ_ASSERT(aCurrentPosition); + MOZ_ASSERT(aTotalSize); + MOZ_ASSERT(aGeneration); + profiler_get_buffer_info(aCurrentPosition, aTotalSize, aGeneration); + return NS_OK; +} + +NS_IMETHODIMP +nsProfiler::GetProfileGatherer(nsISupports** aRetVal) +{ + if (!aRetVal) { + return NS_ERROR_INVALID_POINTER; + } + + // If we're not profiling, there will be no gatherer. + if (!profiler_is_active()) { + *aRetVal = nullptr; + } else { + nsCOMPtr<nsISupports> gatherer; + profiler_get_gatherer(getter_AddRefs(gatherer)); + gatherer.forget(aRetVal); + } + return NS_OK; +}
\ No newline at end of file diff --git a/tools/profiler/gecko/nsProfiler.h b/tools/profiler/gecko/nsProfiler.h new file mode 100644 index 000000000..50dabd278 --- /dev/null +++ b/tools/profiler/gecko/nsProfiler.h @@ -0,0 +1,29 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _NSPROFILER_H_ +#define _NSPROFILER_H_ + +#include "nsIProfiler.h" +#include "nsIObserver.h" +#include "mozilla/Attributes.h" + +class nsProfiler final : public nsIProfiler, public nsIObserver +{ +public: + nsProfiler(); + + NS_DECL_ISUPPORTS + NS_DECL_NSIOBSERVER + NS_DECL_NSIPROFILER + + nsresult Init(); +private: + ~nsProfiler(); + bool mLockedForPrivateBrowsing; +}; + +#endif /* _NSPROFILER_H_ */ + diff --git a/tools/profiler/gecko/nsProfilerCIID.h b/tools/profiler/gecko/nsProfilerCIID.h new file mode 100644 index 000000000..3057a6ae0 --- /dev/null +++ b/tools/profiler/gecko/nsProfilerCIID.h @@ -0,0 +1,14 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsProfilerCIID_h__ +#define nsProfilerCIID_h__ + +#define NS_PROFILER_CID \ +{ 0x25db9b8e, 0x8123, 0x4de1, \ +{ 0xb6, 0x6d, 0x8b, 0xbb, 0xed, 0xf2, 0xcd, 0xf4 } } + +#endif + diff --git a/tools/profiler/gecko/nsProfilerFactory.cpp b/tools/profiler/gecko/nsProfilerFactory.cpp new file mode 100644 index 000000000..0cab23e89 --- /dev/null +++ b/tools/profiler/gecko/nsProfilerFactory.cpp @@ -0,0 +1,31 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ModuleUtils.h" +#include "nsCOMPtr.h" +#include "nsProfiler.h" +#include "nsProfilerCIID.h" + +NS_GENERIC_FACTORY_CONSTRUCTOR_INIT(nsProfiler, Init) + +NS_DEFINE_NAMED_CID(NS_PROFILER_CID); + +static const mozilla::Module::CIDEntry kProfilerCIDs[] = { + { &kNS_PROFILER_CID, false, nullptr, nsProfilerConstructor }, + { nullptr } +}; + +static const mozilla::Module::ContractIDEntry kProfilerContracts[] = { + { "@mozilla.org/tools/profiler;1", &kNS_PROFILER_CID }, + { nullptr } +}; + +static const mozilla::Module kProfilerModule = { + mozilla::Module::kVersion, + kProfilerCIDs, + kProfilerContracts +}; + +NSMODULE_DEFN(nsProfilerModule) = &kProfilerModule; diff --git a/tools/profiler/gecko/nsProfilerStartParams.cpp b/tools/profiler/gecko/nsProfilerStartParams.cpp new file mode 100644 index 000000000..5335e694e --- /dev/null +++ b/tools/profiler/gecko/nsProfilerStartParams.cpp @@ -0,0 +1,67 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsProfilerStartParams.h" + +NS_IMPL_ISUPPORTS(nsProfilerStartParams, nsIProfilerStartParams) + +nsProfilerStartParams::nsProfilerStartParams(uint32_t aEntries, + double aInterval, + const nsTArray<nsCString>& aFeatures, + const nsTArray<nsCString>& aThreadFilterNames) : + mEntries(aEntries), + mInterval(aInterval), + mFeatures(aFeatures), + mThreadFilterNames(aThreadFilterNames) +{ +} + +nsProfilerStartParams::~nsProfilerStartParams() +{ +} + +NS_IMETHODIMP +nsProfilerStartParams::GetEntries(uint32_t* aEntries) +{ + NS_ENSURE_ARG_POINTER(aEntries); + *aEntries = mEntries; + return NS_OK; +} + +NS_IMETHODIMP +nsProfilerStartParams::SetEntries(uint32_t aEntries) +{ + NS_ENSURE_ARG(aEntries); + mEntries = aEntries; + return NS_OK; +} + +NS_IMETHODIMP +nsProfilerStartParams::GetInterval(double* aInterval) +{ + NS_ENSURE_ARG_POINTER(aInterval); + *aInterval = mInterval; + return NS_OK; +} + +NS_IMETHODIMP +nsProfilerStartParams::SetInterval(double aInterval) +{ + NS_ENSURE_ARG(aInterval); + mInterval = aInterval; + return NS_OK; +} + +const nsTArray<nsCString>& +nsProfilerStartParams::GetFeatures() +{ + return mFeatures; +} + +const nsTArray<nsCString>& +nsProfilerStartParams::GetThreadFilterNames() +{ + return mThreadFilterNames; +} diff --git a/tools/profiler/gecko/nsProfilerStartParams.h b/tools/profiler/gecko/nsProfilerStartParams.h new file mode 100644 index 000000000..98788077f --- /dev/null +++ b/tools/profiler/gecko/nsProfilerStartParams.h @@ -0,0 +1,32 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _NSPROFILERSTARTPARAMS_H_ +#define _NSPROFILERSTARTPARAMS_H_ + +#include "nsIProfiler.h" +#include "nsString.h" +#include "nsTArray.h" + +class nsProfilerStartParams : public nsIProfilerStartParams +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSIPROFILERSTARTPARAMS + + nsProfilerStartParams(uint32_t aEntries, + double aInterval, + const nsTArray<nsCString>& aFeatures, + const nsTArray<nsCString>& aThreadFilterNames); + +private: + virtual ~nsProfilerStartParams(); + uint32_t mEntries; + double mInterval; + nsTArray<nsCString> mFeatures; + nsTArray<nsCString> mThreadFilterNames; +}; + +#endif diff --git a/tools/profiler/lul/AutoObjectMapper.cpp b/tools/profiler/lul/AutoObjectMapper.cpp new file mode 100644 index 000000000..a5dc902fd --- /dev/null +++ b/tools/profiler/lul/AutoObjectMapper.cpp @@ -0,0 +1,207 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <sys/mman.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "PlatformMacros.h" +#include "AutoObjectMapper.h" + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +# include <dlfcn.h> +# include "mozilla/Types.h" + // FIXME move these out of mozglue/linker/ElfLoader.h into their + // own header, so as to avoid conflicts arising from two definitions + // of Array + extern "C" { + MFBT_API size_t + __dl_get_mappable_length(void *handle); + MFBT_API void * + __dl_mmap(void *handle, void *addr, size_t length, off_t offset); + MFBT_API void + __dl_munmap(void *handle, void *addr, size_t length); + } + // The following are for get_installation_lib_dir() +# include "nsString.h" +# include "nsDirectoryServiceUtils.h" +# include "nsDirectoryServiceDefs.h" +#endif + + +// A helper function for creating failure error messages in +// AutoObjectMapper*::Map. +static void +failedToMessage(void(*aLog)(const char*), + const char* aHowFailed, std::string aFileName) +{ + char buf[300]; + SprintfLiteral(buf, "AutoObjectMapper::Map: Failed to %s \'%s\'", + aHowFailed, aFileName.c_str()); + buf[sizeof(buf)-1] = 0; + aLog(buf); +} + + +AutoObjectMapperPOSIX::AutoObjectMapperPOSIX(void(*aLog)(const char*)) + : mImage(nullptr) + , mSize(0) + , mLog(aLog) + , mIsMapped(false) +{} + +AutoObjectMapperPOSIX::~AutoObjectMapperPOSIX() { + if (!mIsMapped) { + // There's nothing to do. + MOZ_ASSERT(!mImage); + MOZ_ASSERT(mSize == 0); + return; + } + MOZ_ASSERT(mSize > 0); + // The following assertion doesn't necessarily have to be true, + // but we assume (reasonably enough) that no mmap facility would + // be crazy enough to map anything at page zero. + MOZ_ASSERT(mImage); + munmap(mImage, mSize); +} + +bool AutoObjectMapperPOSIX::Map(/*OUT*/void** start, /*OUT*/size_t* length, + std::string fileName) +{ + MOZ_ASSERT(!mIsMapped); + + int fd = open(fileName.c_str(), O_RDONLY); + if (fd == -1) { + failedToMessage(mLog, "open", fileName); + return false; + } + + struct stat st; + int err = fstat(fd, &st); + size_t sz = (err == 0) ? st.st_size : 0; + if (err != 0 || sz == 0) { + failedToMessage(mLog, "fstat", fileName); + close(fd); + return false; + } + + void* image = mmap(nullptr, sz, PROT_READ, MAP_SHARED, fd, 0); + if (image == MAP_FAILED) { + failedToMessage(mLog, "mmap", fileName); + close(fd); + return false; + } + + close(fd); + mIsMapped = true; + mImage = *start = image; + mSize = *length = sz; + return true; +} + + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +// A helper function for AutoObjectMapperFaultyLib::Map. Finds out +// where the installation's lib directory is, since we'll have to look +// in there to get hold of libmozglue.so. Returned C string is heap +// allocated and the caller must deallocate it. +static char* +get_installation_lib_dir() +{ + nsCOMPtr<nsIProperties> + directoryService(do_GetService(NS_DIRECTORY_SERVICE_CONTRACTID)); + if (!directoryService) { + return nullptr; + } + nsCOMPtr<nsIFile> greDir; + nsresult rv = directoryService->Get(NS_GRE_DIR, NS_GET_IID(nsIFile), + getter_AddRefs(greDir)); + if (NS_FAILED(rv)) return nullptr; + nsCString path; + rv = greDir->GetNativePath(path); + if (NS_FAILED(rv)) { + return nullptr; + } + return strdup(path.get()); +} + +AutoObjectMapperFaultyLib::AutoObjectMapperFaultyLib(void(*aLog)(const char*)) + : AutoObjectMapperPOSIX(aLog) + , mHdl(nullptr) +{} + +AutoObjectMapperFaultyLib::~AutoObjectMapperFaultyLib() { + if (mHdl) { + // We've got an object mapped by faulty.lib. Unmap it via faulty.lib. + MOZ_ASSERT(mSize > 0); + // Assert on the basis that no valid mapping would start at page zero. + MOZ_ASSERT(mImage); + __dl_munmap(mHdl, mImage, mSize); + dlclose(mHdl); + // Stop assertions in ~AutoObjectMapperPOSIX from failing. + mImage = nullptr; + mSize = 0; + } + // At this point the parent class destructor, ~AutoObjectMapperPOSIX, + // gets called. If that has something mapped in the normal way, it + // will unmap it in the normal way. Unfortunately there's no + // obvious way to enforce the requirement that the object is mapped + // either by faulty.lib or by the parent class, but not by both. +} + +bool AutoObjectMapperFaultyLib::Map(/*OUT*/void** start, /*OUT*/size_t* length, + std::string fileName) +{ + MOZ_ASSERT(!mHdl); + + if (fileName == "libmozglue.so") { + + // Do (2) in the comment above. + char* libdir = get_installation_lib_dir(); + if (libdir) { + fileName = std::string(libdir) + "/lib/" + fileName; + free(libdir); + } + // Hand the problem off to the standard mapper. + return AutoObjectMapperPOSIX::Map(start, length, fileName); + + } else { + + // Do cases (1) and (3) in the comment above. We have to + // grapple with faulty.lib directly. + void* hdl = dlopen(fileName.c_str(), RTLD_GLOBAL | RTLD_LAZY); + if (!hdl) { + failedToMessage(mLog, "get handle for ELF file", fileName); + return false; + } + + size_t sz = __dl_get_mappable_length(hdl); + if (sz == 0) { + dlclose(hdl); + failedToMessage(mLog, "get size for ELF file", fileName); + return false; + } + + void* image = __dl_mmap(hdl, nullptr, sz, 0); + if (image == MAP_FAILED) { + dlclose(hdl); + failedToMessage(mLog, "mmap ELF file", fileName); + return false; + } + + mHdl = hdl; + mImage = *start = image; + mSize = *length = sz; + return true; + } +} + +#endif // defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) diff --git a/tools/profiler/lul/AutoObjectMapper.h b/tools/profiler/lul/AutoObjectMapper.h new file mode 100644 index 000000000..1f813d6f2 --- /dev/null +++ b/tools/profiler/lul/AutoObjectMapper.h @@ -0,0 +1,115 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef AutoObjectMapper_h +#define AutoObjectMapper_h + +#include <string> + +#include "mozilla/Attributes.h" +#include "PlatformMacros.h" + +// A (nearly-) RAII class that maps an object in and then unmaps it on +// destruction. This base class version uses the "normal" POSIX +// functions: open, fstat, close, mmap, munmap. + +class MOZ_STACK_CLASS AutoObjectMapperPOSIX { +public: + // The constructor does not attempt to map the file, because that + // might fail. Instead, once the object has been constructed, + // call Map() to attempt the mapping. There is no corresponding + // Unmap() since the unmapping is done in the destructor. Failure + // messages are sent to |aLog|. + explicit AutoObjectMapperPOSIX(void(*aLog)(const char*)); + + // Unmap the file on destruction of this object. + ~AutoObjectMapperPOSIX(); + + // Map |fileName| into the address space and return the mapping + // extents. If the file is zero sized this will fail. The file is + // mapped read-only and private. Returns true iff the mapping + // succeeded, in which case *start and *length hold its extent. + // Once a call to Map succeeds, all subsequent calls to it will + // fail. + bool Map(/*OUT*/void** start, /*OUT*/size_t* length, std::string fileName); + +protected: + // If we are currently holding a mapped object, these record the + // mapped address range. + void* mImage; + size_t mSize; + + // A logging sink, for complaining about mapping failures. + void (*mLog)(const char*); + +private: + // Are we currently holding a mapped object? This is private to + // the base class. Derived classes need to have their own way to + // track whether they are holding a mapped object. + bool mIsMapped; + + // Disable copying and assignment. + AutoObjectMapperPOSIX(const AutoObjectMapperPOSIX&); + AutoObjectMapperPOSIX& operator=(const AutoObjectMapperPOSIX&); + // Disable heap allocation of this class. + void* operator new(size_t); + void* operator new[](size_t); + void operator delete(void*); + void operator delete[](void*); +}; + + +#if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) +// This is a variant of AutoObjectMapperPOSIX suitable for use in +// conjunction with faulty.lib on Android. How it behaves depends on +// the name of the file to be mapped. There are three possible cases: +// +// (1) /foo/bar/xyzzy/blah.apk!/libwurble.so +// We hand it as-is to faulty.lib and let it fish the relevant +// bits out of the APK. +// +// (2) libmozglue.so +// This is part of the Fennec installation, but is not in the +// APK. Instead we have to figure out the installation path +// and look for it there. Because of faulty.lib limitations, +// we have to use regular open/mmap instead of faulty.lib. +// +// (3) libanythingelse.so +// faulty.lib assumes this is a system library, and prepends +// "/system/lib/" to the path. So as in (1), we can give it +// as-is to faulty.lib. +// +// Hence (1) and (3) require special-casing here. Case (2) simply +// hands the problem to the parent class. + +class MOZ_STACK_CLASS AutoObjectMapperFaultyLib : public AutoObjectMapperPOSIX { +public: + AutoObjectMapperFaultyLib(void(*aLog)(const char*)); + + ~AutoObjectMapperFaultyLib(); + + bool Map(/*OUT*/void** start, /*OUT*/size_t* length, std::string fileName); + +private: + // faulty.lib requires us to maintain an abstract handle that can be + // used later to unmap the area. If this is non-NULL, it is assumed + // that unmapping is to be done by faulty.lib. Otherwise it goes + // via the normal mechanism. + void* mHdl; + + // Disable copying and assignment. + AutoObjectMapperFaultyLib(const AutoObjectMapperFaultyLib&); + AutoObjectMapperFaultyLib& operator=(const AutoObjectMapperFaultyLib&); + // Disable heap allocation of this class. + void* operator new(size_t); + void* operator new[](size_t); + void operator delete(void*); + void operator delete[](void*); +}; + +#endif // defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + +#endif // AutoObjectMapper_h diff --git a/tools/profiler/lul/LulCommon.cpp b/tools/profiler/lul/LulCommon.cpp new file mode 100644 index 000000000..7321251c8 --- /dev/null +++ b/tools/profiler/lul/LulCommon.cpp @@ -0,0 +1,114 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2011, 2013 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/module.cc +// src/common/unique_string.cc + +// There's no internal-only interface for LulCommon. Hence include +// the external interface directly. +#include "LulCommonExt.h" + +#include <stdlib.h> +#include <string.h> + +#include <string> +#include <map> + + +namespace lul { + +using std::string; + +//////////////////////////////////////////////////////////////// +// Module +// +Module::Module(const string &name, const string &os, + const string &architecture, const string &id) : + name_(name), + os_(os), + architecture_(architecture), + id_(id) { } + +Module::~Module() { +} + + +//////////////////////////////////////////////////////////////// +// UniqueString +// +class UniqueString { + public: + explicit UniqueString(string str) { str_ = strdup(str.c_str()); } + ~UniqueString() { free(reinterpret_cast<void*>(const_cast<char*>(str_))); } + const char* str_; +}; + +const char* FromUniqueString(const UniqueString* ustr) +{ + return ustr->str_; +} + +bool IsEmptyUniqueString(const UniqueString* ustr) +{ + return (ustr->str_)[0] == '\0'; +} + + +//////////////////////////////////////////////////////////////// +// UniqueStringUniverse +// +UniqueStringUniverse::~UniqueStringUniverse() +{ + for (std::map<string, UniqueString*>::iterator it = map_.begin(); + it != map_.end(); it++) { + delete it->second; + } +} + +const UniqueString* UniqueStringUniverse::ToUniqueString(string str) +{ + std::map<string, UniqueString*>::iterator it = map_.find(str); + if (it == map_.end()) { + UniqueString* ustr = new UniqueString(str); + map_[str] = ustr; + return ustr; + } else { + return it->second; + } +} + +} // namespace lul diff --git a/tools/profiler/lul/LulCommonExt.h b/tools/profiler/lul/LulCommonExt.h new file mode 100644 index 000000000..99a967683 --- /dev/null +++ b/tools/profiler/lul/LulCommonExt.h @@ -0,0 +1,554 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2010, 2012, 2013 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// module.h: Define google_breakpad::Module. A Module holds debugging +// information, and can write that information out as a Breakpad +// symbol file. + + +// (C) Copyright Greg Colvin and Beman Dawes 1998, 1999. +// Copyright (c) 2001, 2002 Peter Dimov +// +// Permission to copy, use, modify, sell and distribute this software +// is granted provided this copyright notice appears in all copies. +// This software is provided "as is" without express or implied +// warranty, and with no claim as to its suitability for any purpose. +// +// See http://www.boost.org/libs/smart_ptr/scoped_ptr.htm for documentation. +// + + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/unique_string.h +// src/common/scoped_ptr.h +// src/common/module.h + +// External interface for the "Common" component of LUL. + +#ifndef LulCommonExt_h +#define LulCommonExt_h + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> + +#include <string> +#include <map> +#include <vector> +#include <cstddef> // for std::ptrdiff_t + +#include "mozilla/Assertions.h" + +namespace lul { + +using std::string; +using std::map; + + +//////////////////////////////////////////////////////////////// +// UniqueString +// + +// Abstract type +class UniqueString; + +// Get the contained C string (debugging only) +const char* FromUniqueString(const UniqueString*); + +// Is the given string empty (that is, "") ? +bool IsEmptyUniqueString(const UniqueString*); + + +//////////////////////////////////////////////////////////////// +// UniqueStringUniverse +// + +// All UniqueStrings live in some specific UniqueStringUniverse. +class UniqueStringUniverse { +public: + UniqueStringUniverse() {} + ~UniqueStringUniverse(); + // Convert a |string| to a UniqueString, that lives in this universe. + const UniqueString* ToUniqueString(string str); +private: + map<string, UniqueString*> map_; +}; + + +//////////////////////////////////////////////////////////////// +// GUID +// + +typedef struct { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +} MDGUID; // GUID + +typedef MDGUID GUID; + + +//////////////////////////////////////////////////////////////// +// scoped_ptr +// + +// scoped_ptr mimics a built-in pointer except that it guarantees deletion +// of the object pointed to, either on destruction of the scoped_ptr or via +// an explicit reset(). scoped_ptr is a simple solution for simple needs; +// use shared_ptr or std::auto_ptr if your needs are more complex. + +// *** NOTE *** +// If your scoped_ptr is a class member of class FOO pointing to a +// forward declared type BAR (as shown below), then you MUST use a non-inlined +// version of the destructor. The destructor of a scoped_ptr (called from +// FOO's destructor) must have a complete definition of BAR in order to +// destroy it. Example: +// +// -- foo.h -- +// class BAR; +// +// class FOO { +// public: +// FOO(); +// ~FOO(); // Required for sources that instantiate class FOO to compile! +// +// private: +// scoped_ptr<BAR> bar_; +// }; +// +// -- foo.cc -- +// #include "foo.h" +// FOO::~FOO() {} // Empty, but must be non-inlined to FOO's class definition. + +// scoped_ptr_malloc added by Google +// When one of these goes out of scope, instead of doing a delete or +// delete[], it calls free(). scoped_ptr_malloc<char> is likely to see +// much more use than any other specializations. + +// release() added by Google +// Use this to conditionally transfer ownership of a heap-allocated object +// to the caller, usually on method success. + +template <typename T> +class scoped_ptr { + private: + + T* ptr; + + scoped_ptr(scoped_ptr const &); + scoped_ptr & operator=(scoped_ptr const &); + + public: + + typedef T element_type; + + explicit scoped_ptr(T* p = 0): ptr(p) {} + + ~scoped_ptr() { + delete ptr; + } + + void reset(T* p = 0) { + if (ptr != p) { + delete ptr; + ptr = p; + } + } + + T& operator*() const { + MOZ_ASSERT(ptr != 0); + return *ptr; + } + + T* operator->() const { + MOZ_ASSERT(ptr != 0); + return ptr; + } + + bool operator==(T* p) const { + return ptr == p; + } + + bool operator!=(T* p) const { + return ptr != p; + } + + T* get() const { + return ptr; + } + + void swap(scoped_ptr & b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + + // no reason to use these: each scoped_ptr should have its own object + template <typename U> bool operator==(scoped_ptr<U> const& p) const; + template <typename U> bool operator!=(scoped_ptr<U> const& p) const; +}; + +template<typename T> inline +void swap(scoped_ptr<T>& a, scoped_ptr<T>& b) { + a.swap(b); +} + +template<typename T> inline +bool operator==(T* p, const scoped_ptr<T>& b) { + return p == b.get(); +} + +template<typename T> inline +bool operator!=(T* p, const scoped_ptr<T>& b) { + return p != b.get(); +} + +// scoped_array extends scoped_ptr to arrays. Deletion of the array pointed to +// is guaranteed, either on destruction of the scoped_array or via an explicit +// reset(). Use shared_array or std::vector if your needs are more complex. + +template<typename T> +class scoped_array { + private: + + T* ptr; + + scoped_array(scoped_array const &); + scoped_array & operator=(scoped_array const &); + + public: + + typedef T element_type; + + explicit scoped_array(T* p = 0) : ptr(p) {} + + ~scoped_array() { + delete[] ptr; + } + + void reset(T* p = 0) { + if (ptr != p) { + delete [] ptr; + ptr = p; + } + } + + T& operator[](std::ptrdiff_t i) const { + MOZ_ASSERT(ptr != 0); + MOZ_ASSERT(i >= 0); + return ptr[i]; + } + + bool operator==(T* p) const { + return ptr == p; + } + + bool operator!=(T* p) const { + return ptr != p; + } + + T* get() const { + return ptr; + } + + void swap(scoped_array & b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + + // no reason to use these: each scoped_array should have its own object + template <typename U> bool operator==(scoped_array<U> const& p) const; + template <typename U> bool operator!=(scoped_array<U> const& p) const; +}; + +template<class T> inline +void swap(scoped_array<T>& a, scoped_array<T>& b) { + a.swap(b); +} + +template<typename T> inline +bool operator==(T* p, const scoped_array<T>& b) { + return p == b.get(); +} + +template<typename T> inline +bool operator!=(T* p, const scoped_array<T>& b) { + return p != b.get(); +} + + +// This class wraps the c library function free() in a class that can be +// passed as a template argument to scoped_ptr_malloc below. +class ScopedPtrMallocFree { + public: + inline void operator()(void* x) const { + free(x); + } +}; + +// scoped_ptr_malloc<> is similar to scoped_ptr<>, but it accepts a +// second template argument, the functor used to free the object. + +template<typename T, typename FreeProc = ScopedPtrMallocFree> +class scoped_ptr_malloc { + private: + + T* ptr; + + scoped_ptr_malloc(scoped_ptr_malloc const &); + scoped_ptr_malloc & operator=(scoped_ptr_malloc const &); + + public: + + typedef T element_type; + + explicit scoped_ptr_malloc(T* p = 0): ptr(p) {} + + ~scoped_ptr_malloc() { + free_((void*) ptr); + } + + void reset(T* p = 0) { + if (ptr != p) { + free_((void*) ptr); + ptr = p; + } + } + + T& operator*() const { + MOZ_ASSERT(ptr != 0); + return *ptr; + } + + T* operator->() const { + MOZ_ASSERT(ptr != 0); + return ptr; + } + + bool operator==(T* p) const { + return ptr == p; + } + + bool operator!=(T* p) const { + return ptr != p; + } + + T* get() const { + return ptr; + } + + void swap(scoped_ptr_malloc & b) { + T* tmp = b.ptr; + b.ptr = ptr; + ptr = tmp; + } + + T* release() { + T* tmp = ptr; + ptr = 0; + return tmp; + } + + private: + + // no reason to use these: each scoped_ptr_malloc should have its own object + template <typename U, typename GP> + bool operator==(scoped_ptr_malloc<U, GP> const& p) const; + template <typename U, typename GP> + bool operator!=(scoped_ptr_malloc<U, GP> const& p) const; + + static FreeProc const free_; +}; + +template<typename T, typename FP> +FP const scoped_ptr_malloc<T,FP>::free_ = FP(); + +template<typename T, typename FP> inline +void swap(scoped_ptr_malloc<T,FP>& a, scoped_ptr_malloc<T,FP>& b) { + a.swap(b); +} + +template<typename T, typename FP> inline +bool operator==(T* p, const scoped_ptr_malloc<T,FP>& b) { + return p == b.get(); +} + +template<typename T, typename FP> inline +bool operator!=(T* p, const scoped_ptr_malloc<T,FP>& b) { + return p != b.get(); +} + + +//////////////////////////////////////////////////////////////// +// Module +// + +// A Module represents the contents of a module, and supports methods +// for adding information produced by parsing STABS or DWARF data +// --- possibly both from the same file --- and then writing out the +// unified contents as a Breakpad-format symbol file. +class Module { +public: + // The type of addresses and sizes in a symbol table. + typedef uint64_t Address; + + // Representation of an expression. This can either be a postfix + // expression, in which case it is stored as a string, or a simple + // expression of the form (identifier + imm) or *(identifier + imm). + // It can also be invalid (denoting "no value"). + enum ExprHow { + kExprInvalid = 1, + kExprPostfix, + kExprSimple, + kExprSimpleMem + }; + + struct Expr { + // Construct a simple-form expression + Expr(const UniqueString* ident, long offset, bool deref) { + if (IsEmptyUniqueString(ident)) { + Expr(); + } else { + postfix_ = ""; + ident_ = ident; + offset_ = offset; + how_ = deref ? kExprSimpleMem : kExprSimple; + } + } + + // Construct an invalid expression + Expr() { + postfix_ = ""; + ident_ = nullptr; + offset_ = 0; + how_ = kExprInvalid; + } + + // Return the postfix expression string, either directly, + // if this is a postfix expression, or by synthesising it + // for a simple expression. + std::string getExprPostfix() const { + switch (how_) { + case kExprPostfix: + return postfix_; + case kExprSimple: + case kExprSimpleMem: { + char buf[40]; + sprintf(buf, " %ld %c%s", labs(offset_), offset_ < 0 ? '-' : '+', + how_ == kExprSimple ? "" : " ^"); + return std::string(FromUniqueString(ident_)) + std::string(buf); + } + case kExprInvalid: + default: + MOZ_ASSERT(0 && "getExprPostfix: invalid Module::Expr type"); + return "Expr::genExprPostfix: kExprInvalid"; + } + } + + // The identifier that gives the starting value for simple expressions. + const UniqueString* ident_; + // The offset to add for simple expressions. + long offset_; + // The Postfix expression string to evaluate for non-simple expressions. + std::string postfix_; + // The operation expressed by this expression. + ExprHow how_; + }; + + // A map from register names to expressions that recover + // their values. This can represent a complete set of rules to + // follow at some address, or a set of changes to be applied to an + // extant set of rules. + // NOTE! there are two completely different types called RuleMap. This + // is one of them. + typedef std::map<const UniqueString*, Expr> RuleMap; + + // A map from addresses to RuleMaps, representing changes that take + // effect at given addresses. + typedef std::map<Address, RuleMap> RuleChangeMap; + + // A range of 'STACK CFI' stack walking information. An instance of + // this structure corresponds to a 'STACK CFI INIT' record and the + // subsequent 'STACK CFI' records that fall within its range. + struct StackFrameEntry { + // The starting address and number of bytes of machine code this + // entry covers. + Address address, size; + + // The initial register recovery rules, in force at the starting + // address. + RuleMap initial_rules; + + // A map from addresses to rule changes. To find the rules in + // force at a given address, start with initial_rules, and then + // apply the changes given in this map for all addresses up to and + // including the address you're interested in. + RuleChangeMap rule_changes; + }; + + // Create a new module with the given name, operating system, + // architecture, and ID string. + Module(const std::string &name, const std::string &os, + const std::string &architecture, const std::string &id); + ~Module(); + +private: + + // Module header entries. + std::string name_, os_, architecture_, id_; +}; + + +} // namespace lul + +#endif // LulCommonExt_h diff --git a/tools/profiler/lul/LulDwarf.cpp b/tools/profiler/lul/LulDwarf.cpp new file mode 100644 index 000000000..1bdbdabb6 --- /dev/null +++ b/tools/profiler/lul/LulDwarf.cpp @@ -0,0 +1,2180 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit, +// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/bytereader.cc +// src/common/dwarf/dwarf2reader.cc +// src/common/dwarf_cfi_to_module.cc + +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include <map> +#include <stack> +#include <string> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "LulCommonExt.h" +#include "LulDwarfInt.h" + + +// Set this to 1 for verbose logging +#define DEBUG_DWARF 0 + + +namespace lul { + +using std::string; + +ByteReader::ByteReader(enum Endianness endian) + :offset_reader_(NULL), address_reader_(NULL), endian_(endian), + address_size_(0), offset_size_(0), + have_section_base_(), have_text_base_(), have_data_base_(), + have_function_base_() { } + +ByteReader::~ByteReader() { } + +void ByteReader::SetOffsetSize(uint8 size) { + offset_size_ = size; + MOZ_ASSERT(size == 4 || size == 8); + if (size == 4) { + this->offset_reader_ = &ByteReader::ReadFourBytes; + } else { + this->offset_reader_ = &ByteReader::ReadEightBytes; + } +} + +void ByteReader::SetAddressSize(uint8 size) { + address_size_ = size; + MOZ_ASSERT(size == 4 || size == 8); + if (size == 4) { + this->address_reader_ = &ByteReader::ReadFourBytes; + } else { + this->address_reader_ = &ByteReader::ReadEightBytes; + } +} + +uint64 ByteReader::ReadInitialLength(const char* start, size_t* len) { + const uint64 initial_length = ReadFourBytes(start); + start += 4; + + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length == 0xffffffff) { + SetOffsetSize(8); + *len = 12; + return ReadOffset(start); + } else { + SetOffsetSize(4); + *len = 4; + } + return initial_length; +} + +bool ByteReader::ValidEncoding(DwarfPointerEncoding encoding) const { + if (encoding == DW_EH_PE_omit) return true; + if (encoding == DW_EH_PE_aligned) return true; + if ((encoding & 0x7) > DW_EH_PE_udata8) + return false; + if ((encoding & 0x70) > DW_EH_PE_funcrel) + return false; + return true; +} + +bool ByteReader::UsableEncoding(DwarfPointerEncoding encoding) const { + switch (encoding & 0x70) { + case DW_EH_PE_absptr: return true; + case DW_EH_PE_pcrel: return have_section_base_; + case DW_EH_PE_textrel: return have_text_base_; + case DW_EH_PE_datarel: return have_data_base_; + case DW_EH_PE_funcrel: return have_function_base_; + default: return false; + } +} + +uint64 ByteReader::ReadEncodedPointer(const char *buffer, + DwarfPointerEncoding encoding, + size_t *len) const { + // UsableEncoding doesn't approve of DW_EH_PE_omit, so we shouldn't + // see it here. + MOZ_ASSERT(encoding != DW_EH_PE_omit); + + // The Linux Standards Base 4.0 does not make this clear, but the + // GNU tools (gcc/unwind-pe.h; readelf/dwarf.c; gdb/dwarf2-frame.c) + // agree that aligned pointers are always absolute, machine-sized, + // machine-signed pointers. + if (encoding == DW_EH_PE_aligned) { + MOZ_ASSERT(have_section_base_); + + // We don't need to align BUFFER in *our* address space. Rather, we + // need to find the next position in our buffer that would be aligned + // when the .eh_frame section the buffer contains is loaded into the + // program's memory. So align assuming that buffer_base_ gets loaded at + // address section_base_, where section_base_ itself may or may not be + // aligned. + + // First, find the offset to START from the closest prior aligned + // address. + uint64 skew = section_base_ & (AddressSize() - 1); + // Now find the offset from that aligned address to buffer. + uint64 offset = skew + (buffer - buffer_base_); + // Round up to the next boundary. + uint64 aligned = (offset + AddressSize() - 1) & -AddressSize(); + // Convert back to a pointer. + const char *aligned_buffer = buffer_base_ + (aligned - skew); + // Finally, store the length and actually fetch the pointer. + *len = aligned_buffer - buffer + AddressSize(); + return ReadAddress(aligned_buffer); + } + + // Extract the value first, ignoring whether it's a pointer or an + // offset relative to some base. + uint64 offset; + switch (encoding & 0x0f) { + case DW_EH_PE_absptr: + // DW_EH_PE_absptr is weird, as it is used as a meaningful value for + // both the high and low nybble of encoding bytes. When it appears in + // the high nybble, it means that the pointer is absolute, not an + // offset from some base address. When it appears in the low nybble, + // as here, it means that the pointer is stored as a normal + // machine-sized and machine-signed address. A low nybble of + // DW_EH_PE_absptr does not imply that the pointer is absolute; it is + // correct for us to treat the value as an offset from a base address + // if the upper nybble is not DW_EH_PE_absptr. + offset = ReadAddress(buffer); + *len = AddressSize(); + break; + + case DW_EH_PE_uleb128: + offset = ReadUnsignedLEB128(buffer, len); + break; + + case DW_EH_PE_udata2: + offset = ReadTwoBytes(buffer); + *len = 2; + break; + + case DW_EH_PE_udata4: + offset = ReadFourBytes(buffer); + *len = 4; + break; + + case DW_EH_PE_udata8: + offset = ReadEightBytes(buffer); + *len = 8; + break; + + case DW_EH_PE_sleb128: + offset = ReadSignedLEB128(buffer, len); + break; + + case DW_EH_PE_sdata2: + offset = ReadTwoBytes(buffer); + // Sign-extend from 16 bits. + offset = (offset ^ 0x8000) - 0x8000; + *len = 2; + break; + + case DW_EH_PE_sdata4: + offset = ReadFourBytes(buffer); + // Sign-extend from 32 bits. + offset = (offset ^ 0x80000000ULL) - 0x80000000ULL; + *len = 4; + break; + + case DW_EH_PE_sdata8: + // No need to sign-extend; this is the full width of our type. + offset = ReadEightBytes(buffer); + *len = 8; + break; + + default: + abort(); + } + + // Find the appropriate base address. + uint64 base; + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = 0; + break; + + case DW_EH_PE_pcrel: + MOZ_ASSERT(have_section_base_); + base = section_base_ + (buffer - buffer_base_); + break; + + case DW_EH_PE_textrel: + MOZ_ASSERT(have_text_base_); + base = text_base_; + break; + + case DW_EH_PE_datarel: + MOZ_ASSERT(have_data_base_); + base = data_base_; + break; + + case DW_EH_PE_funcrel: + MOZ_ASSERT(have_function_base_); + base = function_base_; + break; + + default: + abort(); + } + + uint64 pointer = base + offset; + + // Remove inappropriate upper bits. + if (AddressSize() == 4) + pointer = pointer & 0xffffffff; + else + MOZ_ASSERT(AddressSize() == sizeof(uint64)); + + return pointer; +} + + +// A DWARF rule for recovering the address or value of a register, or +// computing the canonical frame address. There is one subclass of this for +// each '*Rule' member function in CallFrameInfo::Handler. +// +// It's annoying that we have to handle Rules using pointers (because +// the concrete instances can have an arbitrary size). They're small, +// so it would be much nicer if we could just handle them by value +// instead of fretting about ownership and destruction. +// +// It seems like all these could simply be instances of std::tr1::bind, +// except that we need instances to be EqualityComparable, too. +// +// This could logically be nested within State, but then the qualified names +// get horrendous. +class CallFrameInfo::Rule { + public: + virtual ~Rule() { } + + // Tell HANDLER that, at ADDRESS in the program, REGISTER can be + // recovered using this rule. If REGISTER is kCFARegister, then this rule + // describes how to compute the canonical frame address. Return what the + // HANDLER member function returned. + virtual bool Handle(Handler *handler, uint64 address, int register) const = 0; + + // Equality on rules. We use these to decide which rules we need + // to report after a DW_CFA_restore_state instruction. + virtual bool operator==(const Rule &rhs) const = 0; + + bool operator!=(const Rule &rhs) const { return ! (*this == rhs); } + + // Return a pointer to a copy of this rule. + virtual Rule *Copy() const = 0; + + // If this is a base+offset rule, change its base register to REG. + // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.) + virtual void SetBaseRegister(unsigned reg) { } + + // If this is a base+offset rule, change its offset to OFFSET. Otherwise, + // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.) + virtual void SetOffset(long long offset) { } + + // A RTTI workaround, to make it possible to implement equality + // comparisons on classes derived from this one. + enum CFIRTag { + CFIR_UNDEFINED_RULE, + CFIR_SAME_VALUE_RULE, + CFIR_OFFSET_RULE, + CFIR_VAL_OFFSET_RULE, + CFIR_REGISTER_RULE, + CFIR_EXPRESSION_RULE, + CFIR_VAL_EXPRESSION_RULE + }; + + // Produce the tag that identifies the child class of this object. + virtual CFIRTag getTag() const = 0; +}; + +// Rule: the value the register had in the caller cannot be recovered. +class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule { + public: + UndefinedRule() { } + ~UndefinedRule() { } + CFIRTag getTag() const { return CFIR_UNDEFINED_RULE; } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->UndefinedRule(address, reg); + } + bool operator==(const Rule &rhs) const { + if (rhs.getTag() != CFIR_UNDEFINED_RULE) return false; + return true; + } + Rule *Copy() const { return new UndefinedRule(*this); } +}; + +// Rule: the register's value is the same as that it had in the caller. +class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule { + public: + SameValueRule() { } + ~SameValueRule() { } + CFIRTag getTag() const { return CFIR_SAME_VALUE_RULE; } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->SameValueRule(address, reg); + } + bool operator==(const Rule &rhs) const { + if (rhs.getTag() != CFIR_SAME_VALUE_RULE) return false; + return true; + } + Rule *Copy() const { return new SameValueRule(*this); } +}; + +// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER +// may be CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule { + public: + OffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) { } + ~OffsetRule() { } + CFIRTag getTag() const { return CFIR_OFFSET_RULE; } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->OffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule &rhs) const { + if (rhs.getTag() != CFIR_OFFSET_RULE) return false; + const OffsetRule *our_rhs = static_cast<const OffsetRule *>(&rhs); + return (base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule *Copy() const { return new OffsetRule(*this); } + // We don't actually need SetBaseRegister or SetOffset here, since they + // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it + // doesn't make sense to use OffsetRule for computing the CFA: it + // computes the address at which a register is saved, not a value. + private: + int base_register_; + long offset_; +}; + +// Rule: the value the register had in the caller is the value of +// BASE_REGISTER plus offset. BASE_REGISTER may be +// CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule { + public: + ValOffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) { } + ~ValOffsetRule() { } + CFIRTag getTag() const { return CFIR_VAL_OFFSET_RULE; } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ValOffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule &rhs) const { + if (rhs.getTag() != CFIR_VAL_OFFSET_RULE) return false; + const ValOffsetRule *our_rhs = static_cast<const ValOffsetRule *>(&rhs); + return (base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule *Copy() const { return new ValOffsetRule(*this); } + void SetBaseRegister(unsigned reg) { base_register_ = reg; } + void SetOffset(long long offset) { offset_ = offset; } + private: + int base_register_; + long offset_; +}; + +// Rule: the register has been saved in another register REGISTER_NUMBER_. +class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule { + public: + explicit RegisterRule(int register_number) + : register_number_(register_number) { } + ~RegisterRule() { } + CFIRTag getTag() const { return CFIR_REGISTER_RULE; } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->RegisterRule(address, reg, register_number_); + } + bool operator==(const Rule &rhs) const { + if (rhs.getTag() != CFIR_REGISTER_RULE) return false; + const RegisterRule *our_rhs = static_cast<const RegisterRule *>(&rhs); + return (register_number_ == our_rhs->register_number_); + } + Rule *Copy() const { return new RegisterRule(*this); } + private: + int register_number_; +}; + +// Rule: EXPRESSION evaluates to the address at which the register is saved. +class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule { + public: + explicit ExpressionRule(const string &expression) + : expression_(expression) { } + ~ExpressionRule() { } + CFIRTag getTag() const { return CFIR_EXPRESSION_RULE; } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ExpressionRule(address, reg, expression_); + } + bool operator==(const Rule &rhs) const { + if (rhs.getTag() != CFIR_EXPRESSION_RULE) return false; + const ExpressionRule *our_rhs = static_cast<const ExpressionRule *>(&rhs); + return (expression_ == our_rhs->expression_); + } + Rule *Copy() const { return new ExpressionRule(*this); } + private: + string expression_; +}; + +// Rule: EXPRESSION evaluates to the previous value of the register. +class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule { + public: + explicit ValExpressionRule(const string &expression) + : expression_(expression) { } + ~ValExpressionRule() { } + CFIRTag getTag() const { return CFIR_VAL_EXPRESSION_RULE; } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ValExpressionRule(address, reg, expression_); + } + bool operator==(const Rule &rhs) const { + if (rhs.getTag() != CFIR_VAL_EXPRESSION_RULE) return false; + const ValExpressionRule *our_rhs = + static_cast<const ValExpressionRule *>(&rhs); + return (expression_ == our_rhs->expression_); + } + Rule *Copy() const { return new ValExpressionRule(*this); } + private: + string expression_; +}; + +// A map from register numbers to rules. +class CallFrameInfo::RuleMap { + public: + RuleMap() : cfa_rule_(NULL) { } + RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; } + ~RuleMap() { Clear(); } + + RuleMap &operator=(const RuleMap &rhs); + + // Set the rule for computing the CFA to RULE. Take ownership of RULE. + void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; } + + // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains + // ownership of the rule. We use this for DW_CFA_def_cfa_offset and + // DW_CFA_def_cfa_register, and for detecting references to the CFA before + // a rule for it has been established. + Rule *CFARule() const { return cfa_rule_; } + + // Return the rule for REG, or NULL if there is none. The caller takes + // ownership of the result. + Rule *RegisterRule(int reg) const; + + // Set the rule for computing REG to RULE. Take ownership of RULE. + void SetRegisterRule(int reg, Rule *rule); + + // Make all the appropriate calls to HANDLER as if we were changing from + // this RuleMap to NEW_RULES at ADDRESS. We use this to implement + // DW_CFA_restore_state, where lots of rules can change simultaneously. + // Return true if all handlers returned true; otherwise, return false. + bool HandleTransitionTo(Handler *handler, uint64 address, + const RuleMap &new_rules) const; + + private: + // A map from register numbers to Rules. + typedef std::map<int, Rule *> RuleByNumber; + + // Remove all register rules and clear cfa_rule_. + void Clear(); + + // The rule for computing the canonical frame address. This RuleMap owns + // this rule. + Rule *cfa_rule_; + + // A map from register numbers to postfix expressions to recover + // their values. This RuleMap owns the Rules the map refers to. + RuleByNumber registers_; +}; + +CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) { + Clear(); + // Since each map owns the rules it refers to, assignment must copy them. + if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy(); + for (RuleByNumber::const_iterator it = rhs.registers_.begin(); + it != rhs.registers_.end(); it++) + registers_[it->first] = it->second->Copy(); + return *this; +} + +CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const { + MOZ_ASSERT(reg != Handler::kCFARegister); + RuleByNumber::const_iterator it = registers_.find(reg); + if (it != registers_.end()) + return it->second->Copy(); + else + return NULL; +} + +void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) { + MOZ_ASSERT(reg != Handler::kCFARegister); + MOZ_ASSERT(rule); + Rule **slot = ®isters_[reg]; + delete *slot; + *slot = rule; +} + +bool CallFrameInfo::RuleMap::HandleTransitionTo( + Handler *handler, + uint64 address, + const RuleMap &new_rules) const { + // Transition from cfa_rule_ to new_rules.cfa_rule_. + if (cfa_rule_ && new_rules.cfa_rule_) { + if (*cfa_rule_ != *new_rules.cfa_rule_ && + !new_rules.cfa_rule_->Handle(handler, address, Handler::kCFARegister)) + return false; + } else if (cfa_rule_) { + // this RuleMap has a CFA rule but new_rules doesn't. + // CallFrameInfo::Handler has no way to handle this --- and shouldn't; + // it's garbage input. The instruction interpreter should have + // detected this and warned, so take no action here. + } else if (new_rules.cfa_rule_) { + // This shouldn't be possible: NEW_RULES is some prior state, and + // there's no way to remove entries. + MOZ_ASSERT(0); + } else { + // Both CFA rules are empty. No action needed. + } + + // Traverse the two maps in order by register number, and report + // whatever differences we find. + RuleByNumber::const_iterator old_it = registers_.begin(); + RuleByNumber::const_iterator new_it = new_rules.registers_.begin(); + while (old_it != registers_.end() && new_it != new_rules.registers_.end()) { + if (old_it->first < new_it->first) { + // This RuleMap has an entry for old_it->first, but NEW_RULES + // doesn't. + // + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + if (!handler->SameValueRule(address, old_it->first)) + return false; + old_it++; + } else if (old_it->first > new_it->first) { + // NEW_RULES has entry for new_it->first, but this RuleMap + // doesn't. This shouldn't be possible: NEW_RULES is some prior + // state, and there's no way to remove entries. + MOZ_ASSERT(0); + } else { + // Both maps have an entry for this register. Report the new + // rule if it is different. + if (*old_it->second != *new_it->second && + !new_it->second->Handle(handler, address, new_it->first)) + return false; + new_it++, old_it++; + } + } + // Finish off entries from this RuleMap with no counterparts in new_rules. + while (old_it != registers_.end()) { + if (!handler->SameValueRule(address, old_it->first)) + return false; + old_it++; + } + // Since we only make transitions from a rule set to some previously + // saved rule set, and we can only add rules to the map, NEW_RULES + // must have fewer rules than *this. + MOZ_ASSERT(new_it == new_rules.registers_.end()); + + return true; +} + +// Remove all register rules and clear cfa_rule_. +void CallFrameInfo::RuleMap::Clear() { + delete cfa_rule_; + cfa_rule_ = NULL; + for (RuleByNumber::iterator it = registers_.begin(); + it != registers_.end(); it++) + delete it->second; + registers_.clear(); +} + +// The state of the call frame information interpreter as it processes +// instructions from a CIE and FDE. +class CallFrameInfo::State { + public: + // Create a call frame information interpreter state with the given + // reporter, reader, handler, and initial call frame info address. + State(ByteReader *reader, Handler *handler, Reporter *reporter, + uint64 address) + : reader_(reader), handler_(handler), reporter_(reporter), + address_(address), entry_(NULL), cursor_(NULL), + saved_rules_(NULL) { } + + ~State() { + if (saved_rules_) + delete saved_rules_; + } + + // Interpret instructions from CIE, save the resulting rule set for + // DW_CFA_restore instructions, and return true. On error, report + // the problem to reporter_ and return false. + bool InterpretCIE(const CIE &cie); + + // Interpret instructions from FDE, and return true. On error, + // report the problem to reporter_ and return false. + bool InterpretFDE(const FDE &fde); + + private: + // The operands of a CFI instruction, for ParseOperands. + struct Operands { + unsigned register_number; // A register number. + uint64 offset; // An offset or address. + long signed_offset; // A signed offset. + string expression; // A DWARF expression. + }; + + // Parse CFI instruction operands from STATE's instruction stream as + // described by FORMAT. On success, populate OPERANDS with the + // results, and return true. On failure, report the problem and + // return false. + // + // Each character of FORMAT should be one of the following: + // + // 'r' unsigned LEB128 register number (OPERANDS->register_number) + // 'o' unsigned LEB128 offset (OPERANDS->offset) + // 's' signed LEB128 offset (OPERANDS->signed_offset) + // 'a' machine-size address (OPERANDS->offset) + // (If the CIE has a 'z' augmentation string, 'a' uses the + // encoding specified by the 'R' argument.) + // '1' a one-byte offset (OPERANDS->offset) + // '2' a two-byte offset (OPERANDS->offset) + // '4' a four-byte offset (OPERANDS->offset) + // '8' an eight-byte offset (OPERANDS->offset) + // 'e' a DW_FORM_block holding a (OPERANDS->expression) + // DWARF expression + bool ParseOperands(const char *format, Operands *operands); + + // Interpret one CFI instruction from STATE's instruction stream, update + // STATE, report any rule changes to handler_, and return true. On + // failure, report the problem and return false. + bool DoInstruction(); + + // The following Do* member functions are subroutines of DoInstruction, + // factoring out the actual work of operations that have several + // different encodings. + + // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and + // return true. On failure, report and return false. (Used for + // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.) + bool DoDefCFA(unsigned base_register, long offset); + + // Change the offset of the CFA rule to OFFSET, and return true. On + // failure, report and return false. (Subroutine for + // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.) + bool DoDefCFAOffset(long offset); + + // Specify that REG can be recovered using RULE, and return true. On + // failure, report and return false. + bool DoRule(unsigned reg, Rule *rule); + + // Specify that REG can be found at OFFSET from the CFA, and return true. + // On failure, report and return false. (Subroutine for DW_CFA_offset, + // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.) + bool DoOffset(unsigned reg, long offset); + + // Specify that the caller's value for REG is the CFA plus OFFSET, + // and return true. On failure, report and return false. (Subroutine + // for DW_CFA_val_offset and DW_CFA_val_offset_sf.) + bool DoValOffset(unsigned reg, long offset); + + // Restore REG to the rule established in the CIE, and return true. On + // failure, report and return false. (Subroutine for DW_CFA_restore and + // DW_CFA_restore_extended.) + bool DoRestore(unsigned reg); + + // Return the section offset of the instruction at cursor. For use + // in error messages. + uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); } + + // Report that entry_ is incomplete, and return false. For brevity. + bool ReportIncomplete() { + reporter_->Incomplete(entry_->offset, entry_->kind); + return false; + } + + // For reading multi-byte values with the appropriate endianness. + ByteReader *reader_; + + // The handler to which we should report the data we find. + Handler *handler_; + + // For reporting problems in the info we're parsing. + Reporter *reporter_; + + // The code address to which the next instruction in the stream applies. + uint64 address_; + + // The entry whose instructions we are currently processing. This is + // first a CIE, and then an FDE. + const Entry *entry_; + + // The next instruction to process. + const char *cursor_; + + // The current set of rules. + RuleMap rules_; + + // The set of rules established by the CIE, used by DW_CFA_restore + // and DW_CFA_restore_extended. We set this after interpreting the + // CIE's instructions. + RuleMap cie_rules_; + + // A stack of saved states, for DW_CFA_remember_state and + // DW_CFA_restore_state. + std::stack<RuleMap>* saved_rules_; +}; + +bool CallFrameInfo::State::InterpretCIE(const CIE &cie) { + entry_ = &cie; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) + return false; + // Note the rules established by the CIE, for use by DW_CFA_restore + // and DW_CFA_restore_extended. + cie_rules_ = rules_; + return true; +} + +bool CallFrameInfo::State::InterpretFDE(const FDE &fde) { + entry_ = &fde; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) + return false; + return true; +} + +bool CallFrameInfo::State::ParseOperands(const char *format, + Operands *operands) { + size_t len; + const char *operand; + + for (operand = format; *operand; operand++) { + size_t bytes_left = entry_->end - cursor_; + switch (*operand) { + case 'r': + operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'o': + operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 's': + operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'a': + operands->offset = + reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding, + &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case '1': + if (1 > bytes_left) return ReportIncomplete(); + operands->offset = static_cast<unsigned char>(*cursor_++); + break; + + case '2': + if (2 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadTwoBytes(cursor_); + cursor_ += 2; + break; + + case '4': + if (4 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadFourBytes(cursor_); + cursor_ += 4; + break; + + case '8': + if (8 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadEightBytes(cursor_); + cursor_ += 8; + break; + + case 'e': { + size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left || expression_length > bytes_left - len) + return ReportIncomplete(); + cursor_ += len; + operands->expression = string(cursor_, expression_length); + cursor_ += expression_length; + break; + } + + default: + MOZ_ASSERT(0); + } + } + + return true; +} + +bool CallFrameInfo::State::DoInstruction() { + CIE *cie = entry_->cie; + Operands ops; + + // Our entry's kind should have been set by now. + MOZ_ASSERT(entry_->kind != kUnknown); + + // We shouldn't have been invoked unless there were more + // instructions to parse. + MOZ_ASSERT(cursor_ < entry_->end); + + unsigned opcode = *cursor_++; + if ((opcode & 0xc0) != 0) { + switch (opcode & 0xc0) { + // Advance the address. + case DW_CFA_advance_loc: { + size_t code_offset = opcode & 0x3f; + address_ += code_offset * cie->code_alignment_factor; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset: + if (!ParseOperands("o", &ops) || + !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor)) + return false; + break; + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore: + if (!DoRestore(opcode & 0x3f)) return false; + break; + + // The 'if' above should have excluded this possibility. + default: + MOZ_ASSERT(0); + } + + // Return here, so the big switch below won't be indented. + return true; + } + + switch (opcode) { + // Set the address. + case DW_CFA_set_loc: + if (!ParseOperands("a", &ops)) return false; + address_ = ops.offset; + break; + + // Advance the address. + case DW_CFA_advance_loc1: + if (!ParseOperands("1", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc2: + if (!ParseOperands("2", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc4: + if (!ParseOperands("4", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_MIPS_advance_loc8: + if (!ParseOperands("8", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa: + if (!ParseOperands("ro", &ops) || + !DoDefCFA(ops.register_number, ops.offset)) + return false; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa_sf: + if (!ParseOperands("rs", &ops) || + !DoDefCFA(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Change the base register used to compute the CFA. + case DW_CFA_def_cfa_register: { + Rule *cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + if (!ParseOperands("r", &ops)) return false; + cfa_rule->SetBaseRegister(ops.register_number); + if (!cfa_rule->Handle(handler_, address_, Handler::kCFARegister)) + return false; + break; + } + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset: + if (!ParseOperands("o", &ops) || + !DoDefCFAOffset(ops.offset)) + return false; + break; + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset_sf: + if (!ParseOperands("s", &ops) || + !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Specify an expression whose value is the CFA. + case DW_CFA_def_cfa_expression: { + if (!ParseOperands("e", &ops)) + return false; + Rule *rule = new ValExpressionRule(ops.expression); + rules_.SetCFARule(rule); + if (!rule->Handle(handler_, address_, Handler::kCFARegister)) + return false; + break; + } + + // The register's value cannot be recovered. + case DW_CFA_undefined: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new UndefinedRule())) + return false; + break; + } + + // The register's value is unchanged from its value in the caller. + case DW_CFA_same_value: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new SameValueRule())) + return false; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_offset_extended_sf: + if (!ParseOperands("rs", &ops) || + !DoOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_GNU_negative_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + -ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset: + if (!ParseOperands("ro", &ops) || + !DoValOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset_sf: + if (!ParseOperands("rs", &ops) || + !DoValOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register has been saved in another register. + case DW_CFA_register: { + if (!ParseOperands("ro", &ops) || + !DoRule(ops.register_number, new RegisterRule(ops.offset))) + return false; + break; + } + + // An expression yields the address at which the register is saved. + case DW_CFA_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ExpressionRule(ops.expression))) + return false; + break; + } + + // An expression yields the caller's value for the register. + case DW_CFA_val_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ValExpressionRule(ops.expression))) + return false; + break; + } + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore_extended: + if (!ParseOperands("r", &ops) || + !DoRestore( ops.register_number)) + return false; + break; + + // Save the current set of rules on a stack. + case DW_CFA_remember_state: + if (!saved_rules_) { + saved_rules_ = new std::stack<RuleMap>(); + } + saved_rules_->push(rules_); + break; + + // Pop the current set of rules off the stack. + case DW_CFA_restore_state: { + if (!saved_rules_ || saved_rules_->empty()) { + reporter_->EmptyStateStack(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + const RuleMap &new_rules = saved_rules_->top(); + if (rules_.CFARule() && !new_rules.CFARule()) { + reporter_->ClearingCFARule(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + rules_.HandleTransitionTo(handler_, address_, new_rules); + rules_ = new_rules; + saved_rules_->pop(); + break; + } + + // No operation. (Padding instruction.) + case DW_CFA_nop: + break; + + // A SPARC register window save: Registers 8 through 15 (%o0-%o7) + // are saved in registers 24 through 31 (%i0-%i7), and registers + // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets + // (0-15 * the register size). The register numbers must be + // hard-coded. A GNU extension, and not a pretty one. + case DW_CFA_GNU_window_save: { + // Save %o0-%o7 in %i0-%i7. + for (int i = 8; i < 16; i++) + if (!DoRule(i, new RegisterRule(i + 16))) + return false; + // Save %l0-%l7 and %i0-%i7 at the CFA. + for (int i = 16; i < 32; i++) + // Assume that the byte reader's address size is the same as + // the architecture's register size. !@#%*^ hilarious. + if (!DoRule(i, new OffsetRule(Handler::kCFARegister, + (i - 16) * reader_->AddressSize()))) + return false; + break; + } + + // I'm not sure what this is. GDB doesn't use it for unwinding. + case DW_CFA_GNU_args_size: + if (!ParseOperands("o", &ops)) return false; + break; + + // An opcode we don't recognize. + default: { + reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } + + return true; +} + +bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) { + Rule *rule = new ValOffsetRule(base_register, offset); + rules_.SetCFARule(rule); + return rule->Handle(handler_, address_, Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoDefCFAOffset(long offset) { + Rule *cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + cfa_rule->SetOffset(offset); + return cfa_rule->Handle(handler_, address_, Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) { + rules_.SetRegisterRule(reg, rule); + return rule->Handle(handler_, address_, reg); +} + +bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, + new OffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, + new ValOffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoRestore(unsigned reg) { + // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE. + if (entry_->kind == kCIE) { + reporter_->RestoreInCIE(entry_->offset, CursorOffset()); + return false; + } + Rule *rule = cie_rules_.RegisterRule(reg); + if (!rule) { + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + rule = new SameValueRule(); + } + return DoRule(reg, rule); +} + +bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) { + const char *buffer_end = buffer_ + buffer_length_; + + // Initialize enough of ENTRY for use in error reporting. + entry->offset = cursor - buffer_; + entry->start = cursor; + entry->kind = kUnknown; + entry->end = NULL; + + // Read the initial length. This sets reader_'s offset size. + size_t length_size; + uint64 length = reader_->ReadInitialLength(cursor, &length_size); + if (length_size > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); + cursor += length_size; + + // In a .eh_frame section, a length of zero marks the end of the series + // of entries. + if (length == 0 && eh_frame_) { + entry->kind = kTerminator; + entry->end = cursor; + return true; + } + + // Validate the length. + if (length > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); + + // The length is the number of bytes after the initial length field; + // we have that position handy at this point, so compute the end + // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine, + // and the length didn't fit in a size_t, we would have rejected it + // above.) + entry->end = cursor + length; + + // Parse the next field: either the offset of a CIE or a CIE id. + size_t offset_size = reader_->OffsetSize(); + if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); + entry->id = reader_->ReadOffset(cursor); + + // Don't advance cursor past id field yet; in .eh_frame data we need + // the id's position to compute the section offset of an FDE's CIE. + + // Now we can decide what kind of entry this is. + if (eh_frame_) { + // In .eh_frame data, an ID of zero marks the entry as a CIE, and + // anything else is an offset from the id field of the FDE to the start + // of the CIE. + if (entry->id == 0) { + entry->kind = kCIE; + } else { + entry->kind = kFDE; + // Turn the offset from the id into an offset from the buffer's start. + entry->id = (cursor - buffer_) - entry->id; + } + } else { + // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the + // offset size for the entry) marks the entry as a CIE, and anything + // else is the offset of the CIE from the beginning of the section. + if (offset_size == 4) + entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; + else { + MOZ_ASSERT(offset_size == 8); + entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + } + } + + // Now advance cursor past the id. + cursor += offset_size; + + // The fields specific to this kind of entry start here. + entry->fields = cursor; + + entry->cie = NULL; + + return true; +} + +bool CallFrameInfo::ReadCIEFields(CIE *cie) { + const char *cursor = cie->fields; + size_t len; + + MOZ_ASSERT(cie->kind == kCIE); + + // Prepare for early exit. + cie->version = 0; + cie->augmentation.clear(); + cie->code_alignment_factor = 0; + cie->data_alignment_factor = 0; + cie->return_address_register = 0; + cie->has_z_augmentation = false; + cie->pointer_encoding = DW_EH_PE_absptr; + cie->instructions = 0; + + // Parse the version number. + if (cie->end - cursor < 1) + return ReportIncomplete(cie); + cie->version = reader_->ReadOneByte(cursor); + cursor++; + + // If we don't recognize the version, we can't parse any more fields of the + // CIE. For DWARF CFI, we handle versions 1 through 3 (there was never a + // version 2 of CFI data). For .eh_frame, we handle versions 1 and 3 as well; + // the difference between those versions seems to be the same as for + // .debug_frame. + if (cie->version < 1 || cie->version > 3) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } + + const char *augmentation_start = cursor; + const void *augmentation_end = + memchr(augmentation_start, '\0', cie->end - augmentation_start); + if (! augmentation_end) return ReportIncomplete(cie); + cursor = static_cast<const char *>(augmentation_end); + cie->augmentation = string(augmentation_start, + cursor - augmentation_start); + // Skip the terminating '\0'. + cursor++; + + // Is this CFI augmented? + if (!cie->augmentation.empty()) { + // Is it an augmentation we recognize? + if (cie->augmentation[0] == DW_Z_augmentation_start) { + // Linux C++ ABI 'z' augmentation, used for exception handling data. + cie->has_z_augmentation = true; + } else { + // Not an augmentation we recognize. Augmentations can have arbitrary + // effects on the form of rest of the content, so we have to give up. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + + // Parse the code alignment factor. + cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the data alignment factor. + cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the return address register. This is a ubyte in version 1, and + // a ULEB128 in version 3. + if (cie->version == 1) { + if (cursor >= cie->end) return ReportIncomplete(cie); + cie->return_address_register = uint8(*cursor++); + } else { + cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + } + + // If we have a 'z' augmentation string, find the augmentation data and + // use the augmentation string to parse it. + if (cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len + data_size) + return ReportIncomplete(cie); + cursor += len; + const char *data = cursor; + cursor += data_size; + const char *data_end = cursor; + + cie->has_z_lsda = false; + cie->has_z_personality = false; + cie->has_z_signal_frame = false; + + // Walk the augmentation string, and extract values from the + // augmentation data as the string directs. + for (size_t i = 1; i < cie->augmentation.size(); i++) { + switch (cie->augmentation[i]) { + case DW_Z_has_LSDA: + // The CIE's augmentation data holds the language-specific data + // area pointer's encoding, and the FDE's augmentation data holds + // the pointer itself. + cie->has_z_lsda = true; + // Fetch the LSDA encoding from the augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->lsda_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->lsda_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); + return false; + } + // Don't check if the encoding is usable here --- we haven't + // read the FDE's fields yet, so we're not prepared for + // DW_EH_PE_funcrel, although that's a fine encoding for the + // LSDA to use, since it appears in the FDE. + break; + + case DW_Z_has_personality_routine: + // The CIE's augmentation data holds the personality routine + // pointer's encoding, followed by the pointer itself. + cie->has_z_personality = true; + // Fetch the personality routine pointer's encoding from the + // augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->personality_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->personality_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->personality_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + // Fetch the personality routine's pointer itself from the data. + cie->personality_address = + reader_->ReadEncodedPointer(data, cie->personality_encoding, + &len); + if (len > size_t(data_end - data)) + return ReportIncomplete(cie); + data += len; + break; + + case DW_Z_has_FDE_address_encoding: + // The CIE's augmentation data holds the pointer encoding to use + // for addresses in the FDE. + if (data >= data_end) return ReportIncomplete(cie); + cie->pointer_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->pointer_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->pointer_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + break; + + case DW_Z_is_signal_trampoline: + // Frames using this CIE are signal delivery frames. + cie->has_z_signal_frame = true; + break; + + default: + // An augmentation we don't recognize. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + } + + // The CIE's instructions start here. + cie->instructions = cursor; + + return true; +} + +bool CallFrameInfo::ReadFDEFields(FDE *fde) { + const char *cursor = fde->fields; + size_t size; + + fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, + &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + reader_->SetFunctionBase(fde->address); + + // For the length, we strip off the upper nybble of the encoding used for + // the starting address. + DwarfPointerEncoding length_encoding = + DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); + fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + + // If the CIE has a 'z' augmentation string, then augmentation data + // appears here. + if (fde->cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); + if (size_t(fde->end - cursor) < size + data_size) + return ReportIncomplete(fde); + cursor += size; + + // In the abstract, we should walk the augmentation string, and extract + // items from the FDE's augmentation data as we encounter augmentation + // string characters that specify their presence: the ordering of items + // in the augmentation string determines the arrangement of values in + // the augmentation data. + // + // In practice, there's only ever one value in FDE augmentation data + // that we support --- the LSDA pointer --- and we have to bail if we + // see any unrecognized augmentation string characters. So if there is + // anything here at all, we know what it is, and where it starts. + if (fde->cie->has_z_lsda) { + // Check whether the LSDA's pointer encoding is usable now: only once + // we've parsed the FDE's starting address do we call reader_-> + // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes + // usable. + if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { + reporter_->UnusablePointerEncoding(fde->cie->offset, + fde->cie->lsda_encoding); + return false; + } + + fde->lsda_address = + reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); + if (size > data_size) + return ReportIncomplete(fde); + // Ideally, we would also complain here if there were unconsumed + // augmentation data. + } + + cursor += data_size; + } + + // The FDE's instructions start after those. + fde->instructions = cursor; + + return true; +} + +bool CallFrameInfo::Start() { + const char *buffer_end = buffer_ + buffer_length_; + const char *cursor; + bool all_ok = true; + const char *entry_end; + bool ok; + + // Traverse all the entries in buffer_, skipping CIEs and offering + // FDEs to the handler. + for (cursor = buffer_; cursor < buffer_end; + cursor = entry_end, all_ok = all_ok && ok) { + FDE fde; + + // Make it easy to skip this entry with 'continue': assume that + // things are not okay until we've checked all the data, and + // prepare the address of the next entry. + ok = false; + + // Read the entry's prologue. + if (!ReadEntryPrologue(cursor, &fde)) { + if (!fde.end) { + // If we couldn't even figure out this entry's extent, then we + // must stop processing entries altogether. + all_ok = false; + break; + } + entry_end = fde.end; + continue; + } + + // The next iteration picks up after this entry. + entry_end = fde.end; + + // Did we see an .eh_frame terminating mark? + if (fde.kind == kTerminator) { + // If there appears to be more data left in the section after the + // terminating mark, warn the user. But this is just a warning; + // we leave all_ok true. + if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); + break; + } + + // In this loop, we skip CIEs. We only parse them fully when we + // parse an FDE that refers to them. This limits our memory + // consumption (beyond the buffer itself) to that needed to + // process the largest single entry. + if (fde.kind != kFDE) { + ok = true; + continue; + } + + // Validate the CIE pointer. + if (fde.id > buffer_length_) { + reporter_->CIEPointerOutOfRange(fde.offset, fde.id); + continue; + } + + CIE cie; + + // Parse this FDE's CIE header. + if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) + continue; + // This had better be an actual CIE. + if (cie.kind != kCIE) { + reporter_->BadCIEId(fde.offset, fde.id); + continue; + } + if (!ReadCIEFields(&cie)) + continue; + + // We now have the values that govern both the CIE and the FDE. + cie.cie = &cie; + fde.cie = &cie; + + // Parse the FDE's header. + if (!ReadFDEFields(&fde)) + continue; + + // Call Entry to ask the consumer if they're interested. + if (!handler_->Entry(fde.offset, fde.address, fde.size, + cie.version, cie.augmentation, + cie.return_address_register)) { + // The handler isn't interested in this entry. That's not an error. + ok = true; + continue; + } + + if (cie.has_z_augmentation) { + // Report the personality routine address, if we have one. + if (cie.has_z_personality) { + if (!handler_ + ->PersonalityRoutine(cie.personality_address, + IsIndirectEncoding(cie.personality_encoding))) + continue; + } + + // Report the language-specific data area address, if we have one. + if (cie.has_z_lsda) { + if (!handler_ + ->LanguageSpecificDataArea(fde.lsda_address, + IsIndirectEncoding(cie.lsda_encoding))) + continue; + } + + // If this is a signal-handling frame, report that. + if (cie.has_z_signal_frame) { + if (!handler_->SignalHandler()) + continue; + } + } + + // Interpret the CIE's instructions, and then the FDE's instructions. + State state(reader_, handler_, reporter_, fde.address); + ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); + + // Tell the ByteReader that the function start address from the + // FDE header is no longer valid. + reader_->ClearFunctionBase(); + + // Report the end of the entry. + handler_->End(); + } + + return all_ok; +} + +const char *CallFrameInfo::KindName(EntryKind kind) { + if (kind == CallFrameInfo::kUnknown) + return "entry"; + else if (kind == CallFrameInfo::kCIE) + return "common information entry"; + else if (kind == CallFrameInfo::kFDE) + return "frame description entry"; + else { + MOZ_ASSERT (kind == CallFrameInfo::kTerminator); + return ".eh_frame sequence terminator"; + } +} + +bool CallFrameInfo::ReportIncomplete(Entry *entry) { + reporter_->Incomplete(entry->offset, entry->kind); + return false; +} + +void CallFrameInfo::Reporter::Incomplete(uint64 offset, + CallFrameInfo::EntryKind kind) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker" + " before end of section contents\n", + filename_.c_str(), offset, section_.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset, + uint64 cie_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer is out of range: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer does not point to a CIE: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized version: %d\n", + filename_.c_str(), offset, section_.c_str(), version); + log_(buf); +} + +void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset, + const string &aug) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized augmentation: '%s'\n", + filename_.c_str(), offset, section_.c_str(), aug.c_str()); + log_(buf); +} + +void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset, + uint8 encoding) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies invalid pointer encoding: " + "0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); + log_(buf); +} + +void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset, + uint8 encoding) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies a pointer encoding for which" + " we have no base address: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); + log_(buf); +} + +void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " the DW_CFA_restore instruction at offset 0x%llx" + " cannot be used in a common information entry\n", + filename_.c_str(), offset, section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::BadInstruction(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx is unrecognized\n", + filename_.c_str(), CallFrameInfo::KindName(kind), + offset, section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::NoCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx assumes that a CFA rule " + "has been set, but none has been set\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " should pop a saved state from the stack, but the stack " + "is empty\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + +void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + char buf[300]; + SprintfLiteral(buf, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " would clear the CFA rule in effect\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); + log_(buf); +} + + +unsigned int DwarfCFIToModule::RegisterNames::I386() { + /* + 8 "$eax", "$ecx", "$edx", "$ebx", "$esp", "$ebp", "$esi", "$edi", + 3 "$eip", "$eflags", "$unused1", + 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + 2 "$unused2", "$unused3", + 8 "$xmm0", "$xmm1", "$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + 3 "$fcw", "$fsw", "$mxcsr", + 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused4", "$unused5", + 2 "$tr", "$ldtr" + */ + return 8 + 3 + 8 + 2 + 8 + 8 + 3 + 8 + 2; +} + +unsigned int DwarfCFIToModule::RegisterNames::X86_64() { + /* + 8 "$rax", "$rdx", "$rcx", "$rbx", "$rsi", "$rdi", "$rbp", "$rsp", + 8 "$r8", "$r9", "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", + 1 "$rip", + 8 "$xmm0","$xmm1","$xmm2", "$xmm3", "$xmm4", "$xmm5", "$xmm6", "$xmm7", + 8 "$xmm8","$xmm9","$xmm10","$xmm11","$xmm12","$xmm13","$xmm14","$xmm15", + 8 "$st0", "$st1", "$st2", "$st3", "$st4", "$st5", "$st6", "$st7", + 8 "$mm0", "$mm1", "$mm2", "$mm3", "$mm4", "$mm5", "$mm6", "$mm7", + 1 "$rflags", + 8 "$es", "$cs", "$ss", "$ds", "$fs", "$gs", "$unused1", "$unused2", + 4 "$fs.base", "$gs.base", "$unused3", "$unused4", + 2 "$tr", "$ldtr", + 3 "$mxcsr", "$fcw", "$fsw" + */ + return 8 + 8 + 1 + 8 + 8 + 8 + 8 + 1 + 8 + 4 + 2 + 3; +} + +// Per ARM IHI 0040A, section 3.1 +unsigned int DwarfCFIToModule::RegisterNames::ARM() { + /* + 8 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + 8 "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc", + 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + 8 "fps", "cpsr", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "", "", "", "", "", "", "", "", + 8 "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + 8 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + 8 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + 8 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + 8 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7" + */ + return 13 * 8; +} + +// See prototype for comments. +int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, + string expr, bool debug, + bool pushCfaAtStart, bool derefAtEnd) +{ + const char* cursor = expr.c_str(); + const char* end1 = cursor + expr.length(); + + char buf[100]; + if (debug) { + SprintfLiteral(buf, "LUL.DW << DwarfExpr, len is %d\n", + (int)(end1 - cursor)); + summ->Log(buf); + } + + // Add a marker for the start of this expression. In it, indicate + // whether or not the CFA should be pushed onto the stack prior to + // evaluation. + int32_t start_ix + = summ->AddPfxInstr(PfxInstr(PX_Start, pushCfaAtStart ? 1 : 0)); + MOZ_ASSERT(start_ix >= 0); + + while (cursor < end1) { + + uint8 opc = reader->ReadOneByte(cursor); + cursor++; + + const char* nm = nullptr; + PfxExprOp pxop = PX_End; + + switch (opc) { + + case DW_OP_lit0 ... DW_OP_lit31: { + int32_t simm32 = (int32_t)(opc - DW_OP_lit0); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_lit%d\n", (int)simm32); + summ->Log(buf); + } + (void) summ->AddPfxInstr(PfxInstr(PX_SImm32, simm32)); + break; + } + + case DW_OP_breg0 ... DW_OP_breg31: { + size_t len; + int64_t n = reader->ReadSignedLEB128(cursor, &len); + cursor += len; + DW_REG_NUMBER reg = (DW_REG_NUMBER)(opc - DW_OP_breg0); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_breg%d %lld\n", + (int)reg, (long long int)n); + summ->Log(buf); + } + // PfxInstr only allows a 32 bit signed offset. So we + // must fail if the immediate is out of range. + if (n < INT32_MIN || INT32_MAX < n) + goto fail; + (void) summ->AddPfxInstr(PfxInstr(PX_DwReg, reg)); + (void) summ->AddPfxInstr(PfxInstr(PX_SImm32, (int32_t)n)); + (void) summ->AddPfxInstr(PfxInstr(PX_Add)); + break; + } + + case DW_OP_const4s: { + uint64_t u64 = reader->ReadFourBytes(cursor); + cursor += 4; + // u64 is guaranteed by |ReadFourBytes| to be in the + // range 0 .. FFFFFFFF inclusive. But to be safe: + uint32_t u32 = (uint32_t)(u64 & 0xFFFFFFFF); + int32_t s32 = (int32_t)u32; + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_const4s %d\n", (int)s32); + summ->Log(buf); + } + (void) summ->AddPfxInstr(PfxInstr(PX_SImm32, s32)); + break; + } + + case DW_OP_deref: nm = "deref"; pxop = PX_Deref; goto no_operands; + case DW_OP_and: nm = "and"; pxop = PX_And; goto no_operands; + case DW_OP_plus: nm = "plus"; pxop = PX_Add; goto no_operands; + case DW_OP_minus: nm = "minus"; pxop = PX_Sub; goto no_operands; + case DW_OP_shl: nm = "shl"; pxop = PX_Shl; goto no_operands; + case DW_OP_ge: nm = "ge"; pxop = PX_CmpGES; goto no_operands; + no_operands: + MOZ_ASSERT(nm && pxop != PX_End); + if (debug) { + SprintfLiteral(buf, "LUL.DW DW_OP_%s\n", nm); + summ->Log(buf); + } + (void) summ->AddPfxInstr(PfxInstr(pxop)); + break; + + default: + if (debug) { + SprintfLiteral(buf, "LUL.DW unknown opc %d\n", (int)opc); + summ->Log(buf); + } + goto fail; + + } // switch (opc) + + } // while (cursor < end1) + + MOZ_ASSERT(cursor >= end1); + + if (cursor > end1) { + // We overran the Dwarf expression. Give up. + goto fail; + } + + // For DW_CFA_expression, what the expression denotes is the address + // of where the previous value is located. The caller of this routine + // may therefore request one last dereference before the end marker is + // inserted. + if (derefAtEnd) { + (void) summ->AddPfxInstr(PfxInstr(PX_Deref)); + } + + // Insert an end marker, and declare success. + (void) summ->AddPfxInstr(PfxInstr(PX_End)); + if (debug) { + SprintfLiteral(buf, "LUL.DW conversion of dwarf expression succeeded, " + "ix = %d\n", (int)start_ix); + summ->Log(buf); + summ->Log("LUL.DW >>\n"); + } + return start_ix; + + fail: + if (debug) { + summ->Log("LUL.DW conversion of dwarf expression failed\n"); + summ->Log("LUL.DW >>\n"); + } + return -1; +} + + +bool DwarfCFIToModule::Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const string &augmentation, + unsigned return_address) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW DwarfCFIToModule::Entry 0x%llx,+%lld\n", + address, length); + summ_->Log(buf); + } + + summ_->Entry(address, length); + + // If dwarf2reader::CallFrameInfo can handle this version and + // augmentation, then we should be okay with that, so there's no + // need to check them here. + + // Get ready to collect entries. + return_address_ = return_address; + + // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI + // may not establish any rule for .ra if the return address column + // is an ordinary register, and that register holds the return + // address on entry to the function. So establish an initial .ra + // rule citing the return address register. + if (return_address_ < num_dw_regs_) { + summ_->Rule(address, return_address_, NODEREF, return_address, 0); + } + + return true; +} + +const UniqueString* DwarfCFIToModule::RegisterName(int i) { + if (i < 0) { + MOZ_ASSERT(i == kCFARegister); + return usu_->ToUniqueString(".cfa"); + } + unsigned reg = i; + if (reg == return_address_) + return usu_->ToUniqueString(".ra"); + + char buf[30]; + SprintfLiteral(buf, "dwarf_reg_%u", reg); + return usu_->ToUniqueString(buf); +} + +bool DwarfCFIToModule::UndefinedRule(uint64 address, int reg) { + reporter_->UndefinedNotSupported(entry_offset_, RegisterName(reg)); + // Treat this as a non-fatal error. + return true; +} + +bool DwarfCFIToModule::SameValueRule(uint64 address, int reg) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = Same\n", address, reg); + summ_->Log(buf); + } + // reg + 0 + summ_->Rule(address, reg, NODEREF, reg, 0); + return true; +} + +bool DwarfCFIToModule::OffsetRule(uint64 address, int reg, + int base_register, long offset) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = *(r%d + %ld)\n", + address, reg, base_register, offset); + summ_->Log(buf); + } + // *(base_register + offset) + summ_->Rule(address, reg, DEREF, base_register, offset); + return true; +} + +bool DwarfCFIToModule::ValOffsetRule(uint64 address, int reg, + int base_register, long offset) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d + %ld\n", + address, reg, base_register, offset); + summ_->Log(buf); + } + // base_register + offset + summ_->Rule(address, reg, NODEREF, base_register, offset); + return true; +} + +bool DwarfCFIToModule::RegisterRule(uint64 address, int reg, + int base_register) { + if (DEBUG_DWARF) { + char buf[100]; + SprintfLiteral(buf, "LUL.DW 0x%llx: old r%d = r%d\n", + address, reg, base_register); + summ_->Log(buf); + } + // base_register + 0 + summ_->Rule(address, reg, NODEREF, base_register, 0); + return true; +} + +bool DwarfCFIToModule::ExpressionRule(uint64 address, int reg, + const string &expression) +{ + bool debug = !!DEBUG_DWARF; + int32_t start_ix = parseDwarfExpr(summ_, reader_, expression, debug, + true/*pushCfaAtStart*/, + true/*derefAtEnd*/); + if (start_ix >= 0) { + summ_->Rule(address, reg, PFXEXPR, 0, start_ix); + } else { + // Parsing of the Dwarf expression failed. Treat this as a + // non-fatal error, hence return |true| even on this path. + reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg)); + } + return true; +} + +bool DwarfCFIToModule::ValExpressionRule(uint64 address, int reg, + const string &expression) +{ + bool debug = !!DEBUG_DWARF; + int32_t start_ix = parseDwarfExpr(summ_, reader_, expression, debug, + true/*pushCfaAtStart*/, + false/*!derefAtEnd*/); + if (start_ix >= 0) { + summ_->Rule(address, reg, PFXEXPR, 0, start_ix); + } else { + // Parsing of the Dwarf expression failed. Treat this as a + // non-fatal error, hence return |true| even on this path. + reporter_->ExpressionCouldNotBeSummarised(entry_offset_, RegisterName(reg)); + } + return true; +} + +bool DwarfCFIToModule::End() { + //module_->AddStackFrameEntry(entry_); + if (DEBUG_DWARF) { + summ_->Log("LUL.DW DwarfCFIToModule::End()\n"); + } + summ_->End(); + return true; +} + +void DwarfCFIToModule::Reporter::UndefinedNotSupported( + size_t offset, + const UniqueString* reg) { + char buf[300]; + SprintfLiteral(buf, "DwarfCFIToModule::Reporter::UndefinedNotSupported()\n"); + log_(buf); + //BPLOG(INFO) << file_ << ", section '" << section_ + // << "': the call frame entry at offset 0x" + // << std::setbase(16) << offset << std::setbase(10) + // << " sets the rule for register '" << FromUniqueString(reg) + // << "' to 'undefined', but the Breakpad symbol file format cannot " + // << " express this"; +} + +// FIXME: move this somewhere sensible +static bool is_power_of_2(uint64_t n) +{ + int i, nSetBits = 0; + for (i = 0; i < 8*(int)sizeof(n); i++) { + if ((n & ((uint64_t)1) << i) != 0) + nSetBits++; + } + return nSetBits <= 1; +} + +void DwarfCFIToModule::Reporter::ExpressionCouldNotBeSummarised( + size_t offset, + const UniqueString* reg) { + static uint64_t n_complaints = 0; // This isn't threadsafe + n_complaints++; + if (!is_power_of_2(n_complaints)) + return; + char buf[300]; + SprintfLiteral(buf, + "DwarfCFIToModule::Reporter::" + "ExpressionCouldNotBeSummarised(shown %llu times)\n", + (unsigned long long int)n_complaints); + log_(buf); +} + +} // namespace lul diff --git a/tools/profiler/lul/LulDwarfExt.h b/tools/profiler/lul/LulDwarfExt.h new file mode 100644 index 000000000..f3555ac55 --- /dev/null +++ b/tools/profiler/lul/LulDwarfExt.h @@ -0,0 +1,1287 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright 2006, 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/types.h +// src/common/dwarf/dwarf2enums.h +// src/common/dwarf/bytereader.h +// src/common/dwarf_cfi_to_module.h +// src/common/dwarf/dwarf2reader.h + +#ifndef LulDwarfExt_h +#define LulDwarfExt_h + +#include <stdint.h> + +#include "mozilla/Assertions.h" + +#include "LulDwarfSummariser.h" + +typedef signed char int8; +typedef short int16; +typedef int int32; +typedef long long int64; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long long uint64; + +#ifdef __PTRDIFF_TYPE__ +typedef __PTRDIFF_TYPE__ intptr; +typedef unsigned __PTRDIFF_TYPE__ uintptr; +#else +#error "Can't find pointer-sized integral types." +#endif + + +namespace lul { + +// Exception handling frame description pointer formats, as described +// by the Linux Standard Base Core Specification 4.0, section 11.5, +// DWARF Extensions. +enum DwarfPointerEncoding + { + DW_EH_PE_absptr = 0x00, + DW_EH_PE_omit = 0xff, + DW_EH_PE_uleb128 = 0x01, + DW_EH_PE_udata2 = 0x02, + DW_EH_PE_udata4 = 0x03, + DW_EH_PE_udata8 = 0x04, + DW_EH_PE_sleb128 = 0x09, + DW_EH_PE_sdata2 = 0x0A, + DW_EH_PE_sdata4 = 0x0B, + DW_EH_PE_sdata8 = 0x0C, + DW_EH_PE_pcrel = 0x10, + DW_EH_PE_textrel = 0x20, + DW_EH_PE_datarel = 0x30, + DW_EH_PE_funcrel = 0x40, + DW_EH_PE_aligned = 0x50, + + // The GNU toolchain sources define this enum value as well, + // simply to help classify the lower nybble values into signed and + // unsigned groups. + DW_EH_PE_signed = 0x08, + + // This is not documented in LSB 4.0, but it is used in both the + // Linux and OS X toolchains. It can be added to any other + // encoding (except DW_EH_PE_aligned), and indicates that the + // encoded value represents the address at which the true address + // is stored, not the true address itself. + DW_EH_PE_indirect = 0x80 + }; + + +// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN +// because it conflicts with a macro +enum Endianness { + ENDIANNESS_BIG, + ENDIANNESS_LITTLE +}; + +// A ByteReader knows how to read single- and multi-byte values of +// various endiannesses, sizes, and encodings, as used in DWARF +// debugging information and Linux C++ exception handling data. +class ByteReader { + public: + // Construct a ByteReader capable of reading one-, two-, four-, and + // eight-byte values according to ENDIANNESS, absolute machine-sized + // addresses, DWARF-style "initial length" values, signed and + // unsigned LEB128 numbers, and Linux C++ exception handling data's + // encoded pointers. + explicit ByteReader(enum Endianness endianness); + virtual ~ByteReader(); + + // Read a single byte from BUFFER and return it as an unsigned 8 bit + // number. + uint8 ReadOneByte(const char* buffer) const; + + // Read two bytes from BUFFER and return them as an unsigned 16 bit + // number, using this ByteReader's endianness. + uint16 ReadTwoBytes(const char* buffer) const; + + // Read four bytes from BUFFER and return them as an unsigned 32 bit + // number, using this ByteReader's endianness. This function returns + // a uint64 so that it is compatible with ReadAddress and + // ReadOffset. The number it returns will never be outside the range + // of an unsigned 32 bit integer. + uint64 ReadFourBytes(const char* buffer) const; + + // Read eight bytes from BUFFER and return them as an unsigned 64 + // bit number, using this ByteReader's endianness. + uint64 ReadEightBytes(const char* buffer) const; + + // Read an unsigned LEB128 (Little Endian Base 128) number from + // BUFFER and return it as an unsigned 64 bit integer. Set LEN to + // the number of bytes read. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; + + // Read a signed LEB128 number from BUFFER and return it as an + // signed 64 bit integer. Set LEN to the number of bytes read. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N in two's + // complement. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + int64 ReadSignedLEB128(const char* buffer, size_t* len) const; + + // Indicate that addresses on this architecture are SIZE bytes long. SIZE + // must be either 4 or 8. (DWARF allows addresses to be any number of + // bytes in length from 1 to 255, but we only support 32- and 64-bit + // addresses at the moment.) You must call this before using the + // ReadAddress member function. + // + // For data in a .debug_info section, or something that .debug_info + // refers to like line number or macro data, the compilation unit + // header's address_size field indicates the address size to use. Call + // frame information doesn't indicate its address size (a shortcoming of + // the spec); you must supply the appropriate size based on the + // architecture of the target machine. + void SetAddressSize(uint8 size); + + // Return the current address size, in bytes. This is either 4, + // indicating 32-bit addresses, or 8, indicating 64-bit addresses. + uint8 AddressSize() const { return address_size_; } + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer, respecting this ByteReader's endianness and address size. You + // must call SetAddressSize before calling this function. + uint64 ReadAddress(const char* buffer) const; + + // DWARF actually defines two slightly different formats: 32-bit DWARF + // and 64-bit DWARF. This is *not* related to the size of registers or + // addresses on the target machine; it refers only to the size of section + // offsets and data lengths appearing in the DWARF data. One only needs + // 64-bit DWARF when the debugging data itself is larger than 4GiB. + // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the + // debugging data itself is very large. + // + // DWARF information identifies itself as 32-bit or 64-bit DWARF: each + // compilation unit and call frame information entry begins with an + // "initial length" field, which, in addition to giving the length of the + // data, also indicates the size of section offsets and lengths appearing + // in that data. The ReadInitialLength member function, below, reads an + // initial length and sets the ByteReader's offset size as a side effect. + // Thus, in the normal process of reading DWARF data, the appropriate + // offset size is set automatically. So, you should only need to call + // SetOffsetSize if you are using the same ByteReader to jump from the + // midst of one block of DWARF data into another. + + // Read a DWARF "initial length" field from START, and return it as + // an unsigned 64 bit integer, respecting this ByteReader's + // endianness. Set *LEN to the length of the initial length in + // bytes, either four or twelve. As a side effect, set this + // ByteReader's offset size to either 4 (if we see a 32-bit DWARF + // initial length) or 8 (if we see a 64-bit DWARF initial length). + // + // A DWARF initial length is either: + // + // - a byte count stored as an unsigned 32-bit value less than + // 0xffffff00, indicating that the data whose length is being + // measured uses the 32-bit DWARF format, or + // + // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, + // indicating that the data whose length is being measured uses + // the 64-bit DWARF format. + uint64 ReadInitialLength(const char* start, size_t* len); + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the + // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes + // long. You must call ReadInitialLength or SetOffsetSize before calling + // this function; see the comments above for details. + uint64 ReadOffset(const char* buffer) const; + + // Return the current offset size, in bytes. + // A return value of 4 indicates that we are reading 32-bit DWARF. + // A return value of 8 indicates that we are reading 64-bit DWARF. + uint8 OffsetSize() const { return offset_size_; } + + // Indicate that section offsets and lengths are SIZE bytes long. SIZE + // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). + // Usually, you should not call this function yourself; instead, let a + // call to ReadInitialLength establish the data's offset size + // automatically. + void SetOffsetSize(uint8 size); + + // The Linux C++ ABI uses a variant of DWARF call frame information + // for exception handling. This data is included in the program's + // address space as the ".eh_frame" section, and intepreted at + // runtime to walk the stack, find exception handlers, and run + // cleanup code. The format is mostly the same as DWARF CFI, with + // some adjustments made to provide the additional + // exception-handling data, and to make the data easier to work with + // in memory --- for example, to allow it to be placed in read-only + // memory even when describing position-independent code. + // + // In particular, exception handling data can select a number of + // different encodings for pointers that appear in the data, as + // described by the DwarfPointerEncoding enum. There are actually + // four axes(!) to the encoding: + // + // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use + // the DWARF LEB128 encoding. + // + // - The pointer's signedness: pointers can be signed or unsigned. + // + // - The pointer's base address: the data stored in the exception + // handling data can be the actual address (that is, an absolute + // pointer), or relative to one of a number of different base + // addreses --- including that of the encoded pointer itself, for + // a form of "pc-relative" addressing. + // + // - The pointer may be indirect: it may be the address where the + // true pointer is stored. (This is used to refer to things via + // global offset table entries, program linkage table entries, or + // other tricks used in position-independent code.) + // + // There are also two options that fall outside that matrix + // altogether: the pointer may be omitted, or it may have padding to + // align it on an appropriate address boundary. (That last option + // may seem like it should be just another axis, but it is not.) + + // Indicate that the exception handling data is loaded starting at + // SECTION_BASE, and that the start of its buffer in our own memory + // is BUFFER_BASE. This allows us to find the address that a given + // byte in our buffer would have when loaded into the program the + // data describes. We need this to resolve DW_EH_PE_pcrel pointers. + void SetCFIDataBase(uint64 section_base, const char *buffer_base); + + // Indicate that the base address of the program's ".text" section + // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. + void SetTextBase(uint64 text_base); + + // Indicate that the base address for DW_EH_PE_datarel pointers is + // DATA_BASE. The proper value depends on the ABI; it is usually the + // address of the global offset table, held in a designated register in + // position-independent code. You will need to look at the startup code + // for the target system to be sure. I tried; my eyes bled. + void SetDataBase(uint64 data_base); + + // Indicate that the base address for the FDE we are processing is + // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel + // pointers. (This encoding does not seem to be used by the GNU + // toolchain.) + void SetFunctionBase(uint64 function_base); + + // Indicate that we are no longer processing any FDE, so any use of + // a DW_EH_PE_funcrel encoding is an error. + void ClearFunctionBase(); + + // Return true if ENCODING is a valid pointer encoding. + bool ValidEncoding(DwarfPointerEncoding encoding) const; + + // Return true if we have all the information we need to read a + // pointer that uses ENCODING. This checks that the appropriate + // SetFooBase function for ENCODING has been called. + bool UsableEncoding(DwarfPointerEncoding encoding) const; + + // Read an encoded pointer from BUFFER using ENCODING; return the + // absolute address it represents, and set *LEN to the pointer's + // length in bytes, including any padding for aligned pointers. + // + // This function calls 'abort' if ENCODING is invalid or refers to a + // base address this reader hasn't been given, so you should check + // with ValidEncoding and UsableEncoding first if you would rather + // die in a more helpful way. + uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, + size_t *len) const; + + private: + + // Function pointer type for our address and offset readers. + typedef uint64 (ByteReader::*AddressReader)(const char*) const; + + // Read an offset from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 define offsets as either 4 or 8 bytes, + // generally depending on the amount of DWARF2/3 info present. + // This function pointer gets set by SetOffsetSize. + AddressReader offset_reader_; + + // Read an address from BUFFER and return it as an unsigned 64 bit + // integer. DWARF2/3 allow addresses to be any size from 0-255 + // bytes currently. Internally we support 4 and 8 byte addresses, + // and will CHECK on anything else. + // This function pointer gets set by SetAddressSize. + AddressReader address_reader_; + + Endianness endian_; + uint8 address_size_; + uint8 offset_size_; + + // Base addresses for Linux C++ exception handling data's encoded pointers. + bool have_section_base_, have_text_base_, have_data_base_; + bool have_function_base_; + uint64 section_base_; + uint64 text_base_, data_base_, function_base_; + const char *buffer_base_; +}; + + +inline uint8 ByteReader::ReadOneByte(const char* buffer) const { + return buffer[0]; +} + +inline uint16 ByteReader::ReadTwoBytes(const char* signed_buffer) const { + const unsigned char *buffer + = reinterpret_cast<const unsigned char *>(signed_buffer); + const uint16 buffer0 = buffer[0]; + const uint16 buffer1 = buffer[1]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8; + } else { + return buffer1 | buffer0 << 8; + } +} + +inline uint64 ByteReader::ReadFourBytes(const char* signed_buffer) const { + const unsigned char *buffer + = reinterpret_cast<const unsigned char *>(signed_buffer); + const uint32 buffer0 = buffer[0]; + const uint32 buffer1 = buffer[1]; + const uint32 buffer2 = buffer[2]; + const uint32 buffer3 = buffer[3]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24; + } else { + return buffer3 | buffer2 << 8 | buffer1 << 16 | buffer0 << 24; + } +} + +inline uint64 ByteReader::ReadEightBytes(const char* signed_buffer) const { + const unsigned char *buffer + = reinterpret_cast<const unsigned char *>(signed_buffer); + const uint64 buffer0 = buffer[0]; + const uint64 buffer1 = buffer[1]; + const uint64 buffer2 = buffer[2]; + const uint64 buffer3 = buffer[3]; + const uint64 buffer4 = buffer[4]; + const uint64 buffer5 = buffer[5]; + const uint64 buffer6 = buffer[6]; + const uint64 buffer7 = buffer[7]; + if (endian_ == ENDIANNESS_LITTLE) { + return buffer0 | buffer1 << 8 | buffer2 << 16 | buffer3 << 24 | + buffer4 << 32 | buffer5 << 40 | buffer6 << 48 | buffer7 << 56; + } else { + return buffer7 | buffer6 << 8 | buffer5 << 16 | buffer4 << 24 | + buffer3 << 32 | buffer2 << 40 | buffer1 << 48 | buffer0 << 56; + } +} + +// Read an unsigned LEB128 number. Each byte contains 7 bits of +// information, plus one bit saying whether the number continues or +// not. + +inline uint64 ByteReader::ReadUnsignedLEB128(const char* buffer, + size_t* len) const { + uint64 result = 0; + size_t num_read = 0; + unsigned int shift = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + + result |= (static_cast<uint64>(byte & 0x7f)) << shift; + + shift += 7; + + } while (byte & 0x80); + + *len = num_read; + + return result; +} + +// Read a signed LEB128 number. These are like regular LEB128 +// numbers, except the last byte may have a sign bit set. + +inline int64 ByteReader::ReadSignedLEB128(const char* buffer, + size_t* len) const { + int64 result = 0; + unsigned int shift = 0; + size_t num_read = 0; + unsigned char byte; + + do { + byte = *buffer++; + num_read++; + result |= (static_cast<uint64>(byte & 0x7f) << shift); + shift += 7; + } while (byte & 0x80); + + if ((shift < 8 * sizeof (result)) && (byte & 0x40)) + result |= -((static_cast<int64>(1)) << shift); + *len = num_read; + return result; +} + +inline uint64 ByteReader::ReadOffset(const char* buffer) const { + MOZ_ASSERT(this->offset_reader_); + return (this->*offset_reader_)(buffer); +} + +inline uint64 ByteReader::ReadAddress(const char* buffer) const { + MOZ_ASSERT(this->address_reader_); + return (this->*address_reader_)(buffer); +} + +inline void ByteReader::SetCFIDataBase(uint64 section_base, + const char *buffer_base) { + section_base_ = section_base; + buffer_base_ = buffer_base; + have_section_base_ = true; +} + +inline void ByteReader::SetTextBase(uint64 text_base) { + text_base_ = text_base; + have_text_base_ = true; +} + +inline void ByteReader::SetDataBase(uint64 data_base) { + data_base_ = data_base; + have_data_base_ = true; +} + +inline void ByteReader::SetFunctionBase(uint64 function_base) { + function_base_ = function_base; + have_function_base_ = true; +} + +inline void ByteReader::ClearFunctionBase() { + have_function_base_ = false; +} + + +// (derived from) +// dwarf_cfi_to_module.h: Define the DwarfCFIToModule class, which +// accepts parsed DWARF call frame info and adds it to a Summariser object. + +// This class is a reader for DWARF's Call Frame Information. CFI +// describes how to unwind stack frames --- even for functions that do +// not follow fixed conventions for saving registers, whose frame size +// varies as they execute, etc. +// +// CFI describes, at each machine instruction, how to compute the +// stack frame's base address, how to find the return address, and +// where to find the saved values of the caller's registers (if the +// callee has stashed them somewhere to free up the registers for its +// own use). +// +// For example, suppose we have a function whose machine code looks +// like this (imagine an assembly language that looks like C, for a +// machine with 32-bit registers, and a stack that grows towards lower +// addresses): +// +// func: ; entry point; return address at sp +// func+0: sp = sp - 16 ; allocate space for stack frame +// func+1: sp[12] = r0 ; save r0 at sp+12 +// ... ; other code, not frame-related +// func+10: sp -= 4; *sp = x ; push some x on the stack +// ... ; other code, not frame-related +// func+20: r0 = sp[16] ; restore saved r0 +// func+21: sp += 20 ; pop whole stack frame +// func+22: pc = *sp; sp += 4 ; pop return address and jump to it +// +// DWARF CFI is (a very compressed representation of) a table with a +// row for each machine instruction address and a column for each +// register showing how to restore it, if possible. +// +// A special column named "CFA", for "Canonical Frame Address", tells how +// to compute the base address of the frame; registers' entries may +// refer to the CFA in describing where the registers are saved. +// +// Another special column, named "RA", represents the return address. +// +// For example, here is a complete (uncompressed) table describing the +// function above: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 cfa[0] +// func+2: sp+16 cfa[-4] cfa[0] +// func+11: sp+20 cfa[-4] cfa[0] +// func+21: sp+20 cfa[0] +// func+22: sp cfa[0] +// +// Some things to note here: +// +// - Each row describes the state of affairs *before* executing the +// instruction at the given address. Thus, the row for func+0 +// describes the state before we allocate the stack frame. In the +// next row, the formula for computing the CFA has changed, +// reflecting that allocation. +// +// - The other entries are written in terms of the CFA; this allows +// them to remain unchanged as the stack pointer gets bumped around. +// For example, the rule for recovering the return address (the "ra" +// column) remains unchanged throughout the function, even as the +// stack pointer takes on three different offsets from the return +// address. +// +// - Although we haven't shown it, most calling conventions designate +// "callee-saves" and "caller-saves" registers. The callee must +// preserve the values of callee-saves registers; if it uses them, +// it must save their original values somewhere, and restore them +// before it returns. In contrast, the callee is free to trash +// caller-saves registers; if the callee uses these, it will +// probably not bother to save them anywhere, and the CFI will +// probably mark their values as "unrecoverable". +// +// (However, since the caller cannot assume the callee was going to +// save them, caller-saves registers are probably dead in the caller +// anyway, so compilers usually don't generate CFA for caller-saves +// registers.) +// +// - Exactly where the CFA points is a matter of convention that +// depends on the architecture and ABI in use. In the example, the +// CFA is the value the stack pointer had upon entry to the +// function, pointing at the saved return address. But on the x86, +// the call frame information generated by GCC follows the +// convention that the CFA is the address *after* the saved return +// address. +// +// But by definition, the CFA remains constant throughout the +// lifetime of the frame. This makes it a useful value for other +// columns to refer to. It is also gives debuggers a useful handle +// for identifying a frame. +// +// If you look at the table above, you'll notice that a given entry is +// often the same as the one immediately above it: most instructions +// change only one or two aspects of the stack frame, if they affect +// it at all. The DWARF format takes advantage of this fact, and +// reduces the size of the data by mentioning only the addresses and +// columns at which changes take place. So for the above, DWARF CFI +// data would only actually mention the following: +// +// insn cfa r0 r1 ... ra +// ======================================= +// func+0: sp cfa[0] +// func+1: sp+16 +// func+2: cfa[-4] +// func+11: sp+20 +// func+21: r0 +// func+22: sp +// +// In fact, this is the way the parser reports CFI to the consumer: as +// a series of statements of the form, "At address X, column Y changed +// to Z," and related conventions for describing the initial state. +// +// Naturally, it would be impractical to have to scan the entire +// program's CFI, noting changes as we go, just to recover the +// unwinding rules in effect at one particular instruction. To avoid +// this, CFI data is grouped into "entries", each of which covers a +// specified range of addresses and begins with a complete statement +// of the rules for all recoverable registers at that starting +// address. Each entry typically covers a single function. +// +// Thus, to compute the contents of a given row of the table --- that +// is, rules for recovering the CFA, RA, and registers at a given +// instruction --- the consumer should find the entry that covers that +// instruction's address, start with the initial state supplied at the +// beginning of the entry, and work forward until it has processed all +// the changes up to and including those for the present instruction. +// +// There are seven kinds of rules that can appear in an entry of the +// table: +// +// - "undefined": The given register is not preserved by the callee; +// its value cannot be recovered. +// +// - "same value": This register has the same value it did in the callee. +// +// - offset(N): The register is saved at offset N from the CFA. +// +// - val_offset(N): The value the register had in the caller is the +// CFA plus offset N. (This is usually only useful for describing +// the stack pointer.) +// +// - register(R): The register's value was saved in another register R. +// +// - expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the address at which the +// register was saved. +// +// - val_expression(E): Evaluating the DWARF expression E using the +// current frame's registers' values yields the value the register +// had in the caller. + +class CallFrameInfo { + public: + // The different kinds of entries one finds in CFI. Used internally, + // and for error reporting. + enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; + + // The handler class to which the parser hands the parsed call frame + // information. Defined below. + class Handler; + + // A reporter class, which CallFrameInfo uses to report errors + // encountered while parsing call frame information. Defined below. + class Reporter; + + // Create a DWARF CFI parser. BUFFER points to the contents of the + // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. + // REPORTER is an error reporter the parser should use to report + // problems. READER is a ByteReader instance that has the endianness and + // address size set properly. Report the data we find to HANDLER. + // + // This class can also parse Linux C++ exception handling data, as found + // in '.eh_frame' sections. This data is a variant of DWARF CFI that is + // placed in loadable segments so that it is present in the program's + // address space, and is interpreted by the C++ runtime to search the + // call stack for a handler interested in the exception being thrown, + // actually pop the frames, and find cleanup code to run. + // + // There are two differences between the call frame information described + // in the DWARF standard and the exception handling data Linux places in + // the .eh_frame section: + // + // - Exception handling data uses uses a different format for call frame + // information entry headers. The distinguished CIE id, the way FDEs + // refer to their CIEs, and the way the end of the series of entries is + // determined are all slightly different. + // + // If the constructor's EH_FRAME argument is true, then the + // CallFrameInfo parses the entry headers as Linux C++ exception + // handling data. If EH_FRAME is false or omitted, the CallFrameInfo + // parses standard DWARF call frame information. + // + // - Linux C++ exception handling data uses CIE augmentation strings + // beginning with 'z' to specify the presence of additional data after + // the CIE and FDE headers and special encodings used for addresses in + // frame description entries. + // + // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or + // exception handling data if you have supplied READER with the base + // addresses needed to interpret the pointer encodings that 'z' + // augmentations can specify. See the ByteReader interface for details + // about the base addresses. See the CallFrameInfo::Handler interface + // for details about the additional information one might find in + // 'z'-augmented data. + // + // Thus: + // + // - If you are parsing standard DWARF CFI, as found in a .debug_frame + // section, you should pass false for the EH_FRAME argument, or omit + // it, and you need not worry about providing READER with the + // additional base addresses. + // + // - If you want to parse Linux C++ exception handling data from a + // .eh_frame section, you should pass EH_FRAME as true, and call + // READER's Set*Base member functions before calling our Start method. + // + // - If you want to parse DWARF CFI that uses the 'z' augmentations + // (although I don't think any toolchain ever emits such data), you + // could pass false for EH_FRAME, but call READER's Set*Base members. + // + // The extensions the Linux C++ ABI makes to DWARF for exception + // handling are described here, rather poorly: + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // The mechanics of C++ exception handling, personality routines, + // and language-specific data areas are described here, rather nicely: + // http://www.codesourcery.com/public/cxx-abi/abi-eh.html + + CallFrameInfo(const char *buffer, size_t buffer_length, + ByteReader *reader, Handler *handler, Reporter *reporter, + bool eh_frame = false) + : buffer_(buffer), buffer_length_(buffer_length), + reader_(reader), handler_(handler), reporter_(reporter), + eh_frame_(eh_frame) { } + + ~CallFrameInfo() { } + + // Parse the entries in BUFFER, reporting what we find to HANDLER. + // Return true if we reach the end of the section successfully, or + // false if we encounter an error. + bool Start(); + + // Return the textual name of KIND. For error reporting. + static const char *KindName(EntryKind kind); + + private: + + struct CIE; + + // A CFI entry, either an FDE or a CIE. + struct Entry { + // The starting offset of the entry in the section, for error + // reporting. + size_t offset; + + // The start of this entry in the buffer. + const char *start; + + // Which kind of entry this is. + // + // We want to be able to use this for error reporting even while we're + // in the midst of parsing. Error reporting code may assume that kind, + // offset, and start fields are valid, although kind may be kUnknown. + EntryKind kind; + + // The end of this entry's common prologue (initial length and id), and + // the start of this entry's kind-specific fields. + const char *fields; + + // The start of this entry's instructions. + const char *instructions; + + // The address past the entry's last byte in the buffer. (Note that + // since offset points to the entry's initial length field, and the + // length field is the number of bytes after that field, this is not + // simply buffer_ + offset + length.) + const char *end; + + // For both DWARF CFI and .eh_frame sections, this is the CIE id in a + // CIE, and the offset of the associated CIE in an FDE. + uint64 id; + + // The CIE that applies to this entry, if we've parsed it. If this is a + // CIE, then this field points to this structure. + CIE *cie; + }; + + // A common information entry (CIE). + struct CIE: public Entry { + uint8 version; // CFI data version number + std::string augmentation; // vendor format extension markers + uint64 code_alignment_factor; // scale for code address adjustments + int data_alignment_factor; // scale for stack pointer adjustments + unsigned return_address_register; // which register holds the return addr + + // True if this CIE includes Linux C++ ABI 'z' augmentation data. + bool has_z_augmentation; + + // Parsed 'z' augmentation data. These are meaningful only if + // has_z_augmentation is true. + bool has_z_lsda; // The 'z' augmentation included 'L'. + bool has_z_personality; // The 'z' augmentation included 'P'. + bool has_z_signal_frame; // The 'z' augmentation included 'S'. + + // If has_z_lsda is true, this is the encoding to be used for language- + // specific data area pointers in FDEs. + DwarfPointerEncoding lsda_encoding; + + // If has_z_personality is true, this is the encoding used for the + // personality routine pointer in the augmentation data. + DwarfPointerEncoding personality_encoding; + + // If has_z_personality is true, this is the address of the personality + // routine --- or, if personality_encoding & DW_EH_PE_indirect, the + // address where the personality routine's address is stored. + uint64 personality_address; + + // This is the encoding used for addresses in the FDE header and + // in DW_CFA_set_loc instructions. This is always valid, whether + // or not we saw a 'z' augmentation string; its default value is + // DW_EH_PE_absptr, which is what normal DWARF CFI uses. + DwarfPointerEncoding pointer_encoding; + }; + + // A frame description entry (FDE). + struct FDE: public Entry { + uint64 address; // start address of described code + uint64 size; // size of described code, in bytes + + // If cie->has_z_lsda is true, then this is the language-specific data + // area's address --- or its address's address, if cie->lsda_encoding + // has the DW_EH_PE_indirect bit set. + uint64 lsda_address; + }; + + // Internal use. + class Rule; + class UndefinedRule; + class SameValueRule; + class OffsetRule; + class ValOffsetRule; + class RegisterRule; + class ExpressionRule; + class ValExpressionRule; + class RuleMap; + class State; + + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, + // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the + // data to parse. On success, populate ENTRY as appropriate, and return + // true. On failure, report the problem, and return false. Even if we + // return false, set ENTRY->end to the first byte after the entry if we + // were able to figure that out, or NULL if we weren't. + bool ReadEntryPrologue(const char *cursor, Entry *entry); + + // Parse the fields of a CIE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of CIE are + // populated; use CIE->fields and CIE->end as the start and limit for + // parsing. On success, populate the rest of *CIE, and return true; on + // failure, report the problem and return false. + bool ReadCIEFields(CIE *cie); + + // Parse the fields of an FDE after the entry prologue, including any 'z' + // augmentation data. Assume that the 'Entry' fields of *FDE are + // initialized; use FDE->fields and FDE->end as the start and limit for + // parsing. Assume that FDE->cie is fully initialized. On success, + // populate the rest of *FDE, and return true; on failure, report the + // problem and return false. + bool ReadFDEFields(FDE *fde); + + // Report that ENTRY is incomplete, and return false. This is just a + // trivial wrapper for invoking reporter_->Incomplete; it provides a + // little brevity. + bool ReportIncomplete(Entry *entry); + + // Return true if ENCODING has the DW_EH_PE_indirect bit set. + static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { + return encoding & DW_EH_PE_indirect; + } + + // The contents of the DWARF .debug_info section we're parsing. + const char *buffer_; + size_t buffer_length_; + + // For reading multi-byte values with the appropriate endianness. + ByteReader *reader_; + + // The handler to which we should report the data we find. + Handler *handler_; + + // For reporting problems in the info we're parsing. + Reporter *reporter_; + + // True if we are processing .eh_frame-format data. + bool eh_frame_; +}; + + +// The handler class for CallFrameInfo. The a CFI parser calls the +// member functions of a handler object to report the data it finds. +class CallFrameInfo::Handler { + public: + // The pseudo-register number for the canonical frame address. + enum { kCFARegister = DW_REG_CFA }; + + Handler() { } + virtual ~Handler() { } + + // The parser has found CFI for the machine code at ADDRESS, + // extending for LENGTH bytes. OFFSET is the offset of the frame + // description entry in the section, for use in error messages. + // VERSION is the version number of the CFI format. AUGMENTATION is + // a string describing any producer-specific extensions present in + // the data. RETURN_ADDRESS is the number of the register that holds + // the address to which the function should return. + // + // Entry should return true to process this CFI, or false to skip to + // the next entry. + // + // The parser invokes Entry for each Frame Description Entry (FDE) + // it finds. The parser doesn't report Common Information Entries + // to the handler explicitly; instead, if the handler elects to + // process a given FDE, the parser reiterates the appropriate CIE's + // contents at the beginning of the FDE's rules. + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string &augmentation, + unsigned return_address) = 0; + + // When the Entry function returns true, the parser calls these + // handler functions repeatedly to describe the rules for recovering + // registers at each instruction in the given range of machine code. + // Immediately after a call to Entry, the handler should assume that + // the rule for each callee-saves register is "unchanged" --- that + // is, that the register still has the value it had in the caller. + // + // If a *Rule function returns true, we continue processing this entry's + // instructions. If a *Rule function returns false, we stop evaluating + // instructions, and skip to the next entry. Either way, we call End + // before going on to the next entry. + // + // In all of these functions, if the REG parameter is kCFARegister, then + // the rule describes how to find the canonical frame address. + // kCFARegister may be passed as a BASE_REGISTER argument, meaning that + // the canonical frame address should be used as the base address for the + // computation. All other REG values will be positive. + + // At ADDRESS, register REG's value is not recoverable. + virtual bool UndefinedRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG's value is the same as that it had in + // the caller. + virtual bool SameValueRule(uint64 address, int reg) = 0; + + // At ADDRESS, register REG has been saved at offset OFFSET from + // BASE_REGISTER. + virtual bool OffsetRule(uint64 address, int reg, + int base_register, long offset) = 0; + + // At ADDRESS, the caller's value of register REG is the current + // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an + // address at which the register's value is saved.) + virtual bool ValOffsetRule(uint64 address, int reg, + int base_register, long offset) = 0; + + // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs + // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that + // BASE_REGISTER is the "home" for REG's saved value: if you want to + // assign to a variable whose home is REG in the calling frame, you + // should put the value in BASE_REGISTER. + virtual bool RegisterRule(uint64 address, int reg, int base_register) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the address at + // which REG was saved. + virtual bool ExpressionRule(uint64 address, int reg, + const std::string &expression) = 0; + + // At ADDRESS, the DWARF expression EXPRESSION yields the caller's + // value for REG. (This rule doesn't provide an address at which the + // register's value is saved.) + virtual bool ValExpressionRule(uint64 address, int reg, + const std::string &expression) = 0; + + // Indicate that the rules for the address range reported by the + // last call to Entry are complete. End should return true if + // everything is okay, or false if an error has occurred and parsing + // should stop. + virtual bool End() = 0; + + // Handler functions for Linux C++ exception handling data. These are + // only called if the data includes 'z' augmentation strings. + + // The Linux C++ ABI uses an extension of the DWARF CFI format to + // walk the stack to propagate exceptions from the throw to the + // appropriate catch, and do the appropriate cleanups along the way. + // CFI entries used for exception handling have two additional data + // associated with them: + // + // - The "language-specific data area" describes which exception + // types the function has 'catch' clauses for, and indicates how + // to go about re-entering the function at the appropriate catch + // clause. If the exception is not caught, it describes the + // destructors that must run before the frame is popped. + // + // - The "personality routine" is responsible for interpreting the + // language-specific data area's contents, and deciding whether + // the exception should continue to propagate down the stack, + // perhaps after doing some cleanup for this frame, or whether the + // exception will be caught here. + // + // In principle, the language-specific data area is opaque to + // everybody but the personality routine. In practice, these values + // may be useful or interesting to readers with extra context, and + // we have to at least skip them anyway, so we might as well report + // them to the handler. + + // This entry's exception handling personality routine's address is + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the routine's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool PersonalityRoutine(uint64 address, bool indirect) { + return true; + } + + // This entry's language-specific data area (LSDA) is located at + // ADDRESS. If INDIRECT is true, then ADDRESS is the address at + // which the area's address is stored. The default definition for + // this handler function simply returns true, allowing parsing of + // the entry to continue. + virtual bool LanguageSpecificDataArea(uint64 address, bool indirect) { + return true; + } + + // This entry describes a signal trampoline --- this frame is the + // caller of a signal handler. The default definition for this + // handler function simply returns true, allowing parsing of the + // entry to continue. + // + // The best description of the rationale for and meaning of signal + // trampoline CFI entries seems to be in the GCC bug database: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 + virtual bool SignalHandler() { return true; } +}; + + +// The CallFrameInfo class makes calls on an instance of this class to +// report errors or warn about problems in the data it is parsing. +// These messages are sent to the message sink |aLog| provided to the +// constructor. +class CallFrameInfo::Reporter { + public: + // Create an error reporter which attributes troubles to the section + // named SECTION in FILENAME. + // + // Normally SECTION would be .debug_frame, but the Mac puts CFI data + // in a Mach-O section named __debug_frame. If we support + // Linux-style exception handling data, we could be reading an + // .eh_frame section. + Reporter(void (*aLog)(const char*), + const std::string &filename, + const std::string §ion = ".debug_frame") + : log_(aLog), filename_(filename), section_(section) { } + virtual ~Reporter() { } + + // The CFI entry at OFFSET ends too early to be well-formed. KIND + // indicates what kind of entry it is; KIND can be kUnknown if we + // haven't parsed enough of the entry to tell yet. + virtual void Incomplete(uint64 offset, CallFrameInfo::EntryKind kind); + + // The .eh_frame data has a four-byte zero at OFFSET where the next + // entry's length would be; this is a terminator. However, the buffer + // length as given to the CallFrameInfo constructor says there should be + // more data. + virtual void EarlyEHTerminator(uint64 offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the + // section is not that large. + virtual void CIEPointerOutOfRange(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry + // there is not a CIE. + virtual void BadCIEId(uint64 offset, uint64 cie_offset); + + // The FDE at OFFSET refers to a CIE with version number VERSION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // a version number we don't recognize. + virtual void UnrecognizedVersion(uint64 offset, int version); + + // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, + // which we don't recognize. We cannot parse DWARF CFI if it uses + // augmentations we don't recognize. + virtual void UnrecognizedAugmentation(uint64 offset, + const std::string &augmentation); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not + // a valid encoding. + virtual void InvalidPointerEncoding(uint64 offset, uint8 encoding); + + // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends + // on a base address which has not been supplied. + virtual void UnusablePointerEncoding(uint64 offset, uint8 encoding); + + // The CIE at OFFSET contains a DW_CFA_restore instruction at + // INSN_OFFSET, which may not appear in a CIE. + virtual void RestoreInCIE(uint64 offset, uint64 insn_offset); + + // The entry at OFFSET, of kind KIND, has an unrecognized + // instruction at INSN_OFFSET. + virtual void BadInstruction(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, establishes a rule that cites the CFA, but we have not + // established a CFA rule yet. + virtual void NoCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The instruction at INSN_OFFSET in the entry at OFFSET, of kind + // KIND, is a DW_CFA_restore_state instruction, but the stack of + // saved states is empty. + virtual void EmptyStateStack(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry + // at OFFSET, of kind KIND, would restore a state that has no CFA + // rule, whereas the current state does have a CFA rule. This is + // bogus input, which the CallFrameInfo::Handler interface doesn't + // (and shouldn't) have any way to report. + virtual void ClearingCFARule(uint64 offset, CallFrameInfo::EntryKind kind, + uint64 insn_offset); + + private: + // A logging sink function, as supplied by LUL's user. + void (*log_)(const char*); + + protected: + // The name of the file whose CFI we're reading. + std::string filename_; + + // The name of the CFI section in that file. + std::string section_; +}; + + +using lul::CallFrameInfo; +using lul::Summariser; + +// A class that accepts parsed call frame information from the DWARF +// CFI parser and populates a google_breakpad::Module object with the +// contents. +class DwarfCFIToModule: public CallFrameInfo::Handler { + public: + + // DwarfCFIToModule uses an instance of this class to report errors + // detected while converting DWARF CFI to Breakpad STACK CFI records. + class Reporter { + public: + // Create a reporter that writes messages to the message sink + // |aLog|. FILE is the name of the file we're processing, and + // SECTION is the name of the section within that file that we're + // looking at (.debug_frame, .eh_frame, etc.). + Reporter(void (*aLog)(const char*), + const std::string &file, const std::string §ion) + : log_(aLog), file_(file), section_(section) { } + virtual ~Reporter() { } + + // The DWARF CFI entry at OFFSET says that REG is undefined, but the + // Breakpad symbol file format cannot express this. + virtual void UndefinedNotSupported(size_t offset, + const UniqueString* reg); + + // The DWARF CFI entry at OFFSET says that REG uses a DWARF + // expression to find its value, but parseDwarfExpr could not + // convert it to a sequence of PfxInstrs. + virtual void ExpressionCouldNotBeSummarised(size_t offset, + const UniqueString* reg); + + private: + // A logging sink function, as supplied by LUL's user. + void (*log_)(const char*); + protected: + std::string file_, section_; + }; + + // Register name tables. If TABLE is a vector returned by one of these + // functions, then TABLE[R] is the name of the register numbered R in + // DWARF call frame information. + class RegisterNames { + public: + // Intel's "x86" or IA-32. + static unsigned int I386(); + + // AMD x86_64, AMD64, Intel EM64T, or Intel 64 + static unsigned int X86_64(); + + // ARM. + static unsigned int ARM(); + }; + + // Create a handler for the dwarf2reader::CallFrameInfo parser that + // records the stack unwinding information it receives in SUMM. + // + // Use REGISTER_NAMES[I] as the name of register number I; *this + // keeps a reference to the vector, so the vector should remain + // alive for as long as the DwarfCFIToModule does. + // + // Use REPORTER for reporting problems encountered in the conversion + // process. + DwarfCFIToModule(const unsigned int num_dw_regs, + Reporter *reporter, + ByteReader* reader, + /*MOD*/UniqueStringUniverse* usu, + /*OUT*/Summariser* summ) + : summ_(summ), usu_(usu), num_dw_regs_(num_dw_regs), + reporter_(reporter), reader_(reader), return_address_(-1) { + } + virtual ~DwarfCFIToModule() {} + + virtual bool Entry(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string &augmentation, + unsigned return_address); + virtual bool UndefinedRule(uint64 address, int reg); + virtual bool SameValueRule(uint64 address, int reg); + virtual bool OffsetRule(uint64 address, int reg, + int base_register, long offset); + virtual bool ValOffsetRule(uint64 address, int reg, + int base_register, long offset); + virtual bool RegisterRule(uint64 address, int reg, int base_register); + virtual bool ExpressionRule(uint64 address, int reg, + const std::string &expression); + virtual bool ValExpressionRule(uint64 address, int reg, + const std::string &expression); + virtual bool End(); + + private: + // Return the name to use for register I. + const UniqueString* RegisterName(int i); + + // The Summariser to which we should give entries + Summariser* summ_; + + // Universe for creating UniqueStrings in, should that be necessary. + UniqueStringUniverse* usu_; + + // The number of Dwarf-defined register names for this architecture. + const unsigned int num_dw_regs_; + + // The reporter to use to report problems. + Reporter *reporter_; + + // The ByteReader to use for parsing Dwarf expressions. + ByteReader* reader_; + + // The section offset of the current frame description entry, for + // use in error messages. + size_t entry_offset_; + + // The return address column for that entry. + unsigned return_address_; +}; + + +// Convert the Dwarf expression in |expr| into PfxInstrs stored in the +// SecMap referred to by |summ|, and return the index of the starting +// PfxInstr added, which must be >= 0. In case of failure return -1. +int32_t parseDwarfExpr(Summariser* summ, const ByteReader* reader, + string expr, bool debug, + bool pushCfaAtStart, bool derefAtEnd); + +} // namespace lul + +#endif // LulDwarfExt_h diff --git a/tools/profiler/lul/LulDwarfInt.h b/tools/profiler/lul/LulDwarfInt.h new file mode 100644 index 000000000..05c231f84 --- /dev/null +++ b/tools/profiler/lul/LulDwarfInt.h @@ -0,0 +1,194 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2008, 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// This file is derived from the following file in +// toolkit/crashreporter/google-breakpad: +// src/common/dwarf/dwarf2enums.h + +#ifndef LulDwarfInt_h +#define LulDwarfInt_h + +#include "LulCommonExt.h" +#include "LulDwarfExt.h" + +namespace lul { + +// These enums do not follow the google3 style only because they are +// known universally (specs, other implementations) by the names in +// exactly this capitalization. +// Tag names and codes. + +// Call Frame Info instructions. +enum DwarfCFI + { + DW_CFA_advance_loc = 0x40, + DW_CFA_offset = 0x80, + DW_CFA_restore = 0xc0, + DW_CFA_nop = 0x00, + DW_CFA_set_loc = 0x01, + DW_CFA_advance_loc1 = 0x02, + DW_CFA_advance_loc2 = 0x03, + DW_CFA_advance_loc4 = 0x04, + DW_CFA_offset_extended = 0x05, + DW_CFA_restore_extended = 0x06, + DW_CFA_undefined = 0x07, + DW_CFA_same_value = 0x08, + DW_CFA_register = 0x09, + DW_CFA_remember_state = 0x0a, + DW_CFA_restore_state = 0x0b, + DW_CFA_def_cfa = 0x0c, + DW_CFA_def_cfa_register = 0x0d, + DW_CFA_def_cfa_offset = 0x0e, + DW_CFA_def_cfa_expression = 0x0f, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + + // Opcodes in this range are reserved for user extensions. + DW_CFA_lo_user = 0x1c, + DW_CFA_hi_user = 0x3f, + + // SGI/MIPS specific. + DW_CFA_MIPS_advance_loc8 = 0x1d, + + // GNU extensions. + DW_CFA_GNU_window_save = 0x2d, + DW_CFA_GNU_args_size = 0x2e, + DW_CFA_GNU_negative_offset_extended = 0x2f + }; + +// Exception handling 'z' augmentation letters. +enum DwarfZAugmentationCodes { + // If the CFI augmentation string begins with 'z', then the CIE and FDE + // have an augmentation data area just before the instructions, whose + // contents are determined by the subsequent augmentation letters. + DW_Z_augmentation_start = 'z', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, and the FDE + // augmentation data includes a language-specific data area pointer, + // represented using that encoding. + DW_Z_has_LSDA = 'L', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding, followed by a pointer + // to a personality routine, represented using that encoding. + DW_Z_has_personality_routine = 'P', + + // If this letter is present in a 'z' augmentation string, the CIE + // augmentation data includes a pointer encoding describing how the FDE's + // initial location, address range, and DW_CFA_set_loc operands are + // encoded. + DW_Z_has_FDE_address_encoding = 'R', + + // If this letter is present in a 'z' augmentation string, then code + // addresses covered by FDEs that cite this CIE are signal delivery + // trampolines. Return addresses of frames in trampolines should not be + // adjusted as described in section 6.4.4 of the DWARF 3 spec. + DW_Z_is_signal_trampoline = 'S' +}; + +// Expression opcodes +enum DwarfExpressionOpcodes { + DW_OP_addr = 0x03, + DW_OP_deref = 0x06, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_const8u = 0x0e, + DW_OP_const8s = 0x0f, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1a, + DW_OP_div = 0x1b, + DW_OP_minus = 0x1c, + DW_OP_mod = 0x1d, + DW_OP_mul = 0x1e, + DW_OP_neg = 0x1f, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_skip = 0x2f, + DW_OP_bra = 0x28, + DW_OP_eq = 0x29, + DW_OP_ge = 0x2a, + DW_OP_gt = 0x2b, + DW_OP_le = 0x2c, + DW_OP_lt = 0x2d, + DW_OP_ne = 0x2e, + DW_OP_lit0 = 0x30, + DW_OP_lit31 = 0x4f, + DW_OP_reg0 = 0x50, + DW_OP_reg31 = 0x6f, + DW_OP_breg0 = 0x70, + DW_OP_breg31 = 0x8f, + DW_OP_regx = 0x90, + DW_OP_fbreg = 0x91, + DW_OP_bregx = 0x92, + DW_OP_piece = 0x93, + DW_OP_deref_size = 0x94, + DW_OP_xderef_size = 0x95, + DW_OP_nop = 0x96, + DW_OP_push_object_address = 0x97, + DW_OP_call2 = 0x98, + DW_OP_call4 = 0x99, + DW_OP_call_ref = 0x9a, + DW_OP_form_tls_address = 0x9b, + DW_OP_call_frame_cfa = 0x9c, + DW_OP_bit_piece = 0x9d, + DW_OP_lo_user = 0xe0, + DW_OP_hi_user = 0xff +}; + +} // namespace lul + +#endif // LulDwarfInt_h diff --git a/tools/profiler/lul/LulDwarfSummariser.cpp b/tools/profiler/lul/LulDwarfSummariser.cpp new file mode 100644 index 000000000..74c2565df --- /dev/null +++ b/tools/profiler/lul/LulDwarfSummariser.cpp @@ -0,0 +1,359 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulDwarfSummariser.h" + +#include "mozilla/Assertions.h" + +// Set this to 1 for verbose logging +#define DEBUG_SUMMARISER 0 + +namespace lul { + +// Do |s64|'s lowest 32 bits sign extend back to |s64| itself? +static inline bool fitsIn32Bits(int64 s64) { + return s64 == ((s64 & 0xffffffff) ^ 0x80000000) - 0x80000000; +} + +// Check a LExpr prefix expression, starting at pfxInstrs[start] up to +// the next PX_End instruction, to ensure that: +// * It only mentions registers that are tracked on this target +// * The start point is sane +// If the expression is ok, return NULL. Else return a pointer +// a const char* holding a bit of text describing the problem. +static const char* +checkPfxExpr(const vector<PfxInstr>* pfxInstrs, int64_t start) +{ + size_t nInstrs = pfxInstrs->size(); + if (start < 0 || start >= (ssize_t)nInstrs) { + return "bogus start point"; + } + size_t i; + for (i = start; i < nInstrs; i++) { + PfxInstr pxi = (*pfxInstrs)[i]; + if (pxi.mOpcode == PX_End) + break; + if (pxi.mOpcode == PX_DwReg && + !registerIsTracked((DW_REG_NUMBER)pxi.mOperand)) { + return "uses untracked reg"; + } + } + return nullptr; // success +} + + +Summariser::Summariser(SecMap* aSecMap, uintptr_t aTextBias, + void(*aLog)(const char*)) + : mSecMap(aSecMap) + , mTextBias(aTextBias) + , mLog(aLog) +{ + mCurrAddr = 0; + mMax1Addr = 0; // Gives an empty range. + + // Initialise the running RuleSet to "haven't got a clue" status. + new (&mCurrRules) RuleSet(); +} + +void +Summariser::Entry(uintptr_t aAddress, uintptr_t aLength) +{ + aAddress += mTextBias; + if (DEBUG_SUMMARISER) { + char buf[100]; + SprintfLiteral(buf, + "LUL Entry(%llx, %llu)\n", + (unsigned long long int)aAddress, + (unsigned long long int)aLength); + mLog(buf); + } + // This throws away any previous summary, that is, assumes + // that the previous summary, if any, has been properly finished + // by a call to End(). + mCurrAddr = aAddress; + mMax1Addr = aAddress + aLength; + new (&mCurrRules) RuleSet(); +} + +void +Summariser::Rule(uintptr_t aAddress, int aNewReg, + LExprHow how, int16_t oldReg, int64_t offset) +{ + aAddress += mTextBias; + if (DEBUG_SUMMARISER) { + char buf[100]; + if (how == NODEREF || how == DEREF) { + bool deref = how == DEREF; + SprintfLiteral(buf, + "LUL 0x%llx old-r%d = %sr%d + %lld%s\n", + (unsigned long long int)aAddress, aNewReg, + deref ? "*(" : "", (int)oldReg, (long long int)offset, + deref ? ")" : ""); + } else if (how == PFXEXPR) { + SprintfLiteral(buf, + "LUL 0x%llx old-r%d = pfx-expr-at %lld\n", + (unsigned long long int)aAddress, aNewReg, + (long long int)offset); + } else { + SprintfLiteral(buf, + "LUL 0x%llx old-r%d = (invalid LExpr!)\n", + (unsigned long long int)aAddress, aNewReg); + } + mLog(buf); + } + + if (mCurrAddr < aAddress) { + // Flush the existing summary first. + mCurrRules.mAddr = mCurrAddr; + mCurrRules.mLen = aAddress - mCurrAddr; + mSecMap->AddRuleSet(&mCurrRules); + if (DEBUG_SUMMARISER) { + mLog("LUL "); mCurrRules.Print(mLog); + mLog("\n"); + } + mCurrAddr = aAddress; + } + + // If for some reason summarisation fails, either or both of these + // become non-null and point at constant text describing the + // problem. Using two rather than just one avoids complications of + // having to concatenate two strings to produce a complete error message. + const char* reason1 = nullptr; + const char* reason2 = nullptr; + + // |offset| needs to be a 32 bit value that sign extends to 64 bits + // on a 64 bit target. We will need to incorporate |offset| into + // any LExpr made here. So we may as well check it right now. + if (!fitsIn32Bits(offset)) { + reason1 = "offset not in signed 32-bit range"; + goto cant_summarise; + } + + // FIXME: factor out common parts of the arch-dependent summarisers. + +#if defined(LUL_ARCH_arm) + + // ----------------- arm ----------------- // + + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + + case DW_REG_CFA: + // This is a rule that defines the CFA. The only forms we + // choose to represent are: r7/11/12/13 + offset. The offset + // must fit into 32 bits since 'uintptr_t' is 32 bit on ARM, + // hence there is no need to check it for overflow. + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + switch (oldReg) { + case DW_REG_ARM_R7: case DW_REG_ARM_R11: + case DW_REG_ARM_R12: case DW_REG_ARM_R13: + break; + default: + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_ARM_R7: case DW_REG_ARM_R11: case DW_REG_ARM_R12: + case DW_REG_ARM_R13: case DW_REG_ARM_R14: case DW_REG_ARM_R15: { + // This is a new rule for R7, R11, R12, R13 (SP), R14 (LR) or + // R15 (the return address). + switch (how) { + case NODEREF: case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for R7/11/12/13/14/15: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for R7/11/12/13/14/15: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_ARM_R7: mCurrRules.mR7expr = expr; break; + case DW_REG_ARM_R11: mCurrRules.mR11expr = expr; break; + case DW_REG_ARM_R12: mCurrRules.mR12expr = expr; break; + case DW_REG_ARM_R13: mCurrRules.mR13expr = expr; break; + case DW_REG_ARM_R14: mCurrRules.mR14expr = expr; break; + case DW_REG_ARM_R15: mCurrRules.mR15expr = expr; break; + default: MOZ_ASSERT(0); + } + break; + } + + default: + // Leave |reason1| and |reason2| unset here. This program point + // is reached so often that it causes a flood of "Can't + // summarise" messages. In any case, we don't really care about + // the fact that this summary would produce a new value for a + // register that we're not tracking. We do on the other hand + // care if the summary's expression *uses* a register that we're + // not tracking. But in that case one of the above failures + // should tell us which. + goto cant_summarise; + } + + // Mark callee-saved registers (r4 .. r11) as unchanged, if there is + // no other information about them. FIXME: do this just once, at + // the point where the ruleset is committed. + if (mCurrRules.mR7expr.mHow == UNKNOWN) { + mCurrRules.mR7expr = LExpr(NODEREF, DW_REG_ARM_R7, 0); + } + if (mCurrRules.mR11expr.mHow == UNKNOWN) { + mCurrRules.mR11expr = LExpr(NODEREF, DW_REG_ARM_R11, 0); + } + if (mCurrRules.mR12expr.mHow == UNKNOWN) { + mCurrRules.mR12expr = LExpr(NODEREF, DW_REG_ARM_R12, 0); + } + + // The old r13 (SP) value before the call is always the same as the + // CFA. + mCurrRules.mR13expr = LExpr(NODEREF, DW_REG_CFA, 0); + + // If there's no information about R15 (the return address), say + // it's a copy of R14 (the link register). + if (mCurrRules.mR15expr.mHow == UNKNOWN) { + mCurrRules.mR15expr = LExpr(NODEREF, DW_REG_ARM_R14, 0); + } + +#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + + // ---------------- x64/x86 ---------------- // + + // Now, can we add the rule to our summary? This depends on whether + // the registers and the overall expression are representable. This + // is the heart of the summarisation process. + switch (aNewReg) { + + case DW_REG_CFA: + // This is a rule that defines the CFA. The only forms we can + // represent are: = SP+offset or = FP+offset. + if (how != NODEREF) { + reason1 = "rule for DW_REG_CFA: invalid |how|"; + goto cant_summarise; + } + if (oldReg != DW_REG_INTEL_XSP && oldReg != DW_REG_INTEL_XBP) { + reason1 = "rule for DW_REG_CFA: invalid |oldReg|"; + goto cant_summarise; + } + mCurrRules.mCfaExpr = LExpr(how, oldReg, offset); + break; + + case DW_REG_INTEL_XSP: case DW_REG_INTEL_XBP: case DW_REG_INTEL_XIP: { + // This is a new rule for XSP, XBP or XIP (the return address). + switch (how) { + case NODEREF: case DEREF: + // Check the old register is one we're tracking. + if (!registerIsTracked((DW_REG_NUMBER)oldReg) && + oldReg != DW_REG_CFA) { + reason1 = "rule for XSP/XBP/XIP: uses untracked reg"; + goto cant_summarise; + } + break; + case PFXEXPR: { + // Check that the prefix expression only mentions tracked registers. + const vector<PfxInstr>* pfxInstrs = mSecMap->GetPfxInstrs(); + reason2 = checkPfxExpr(pfxInstrs, offset); + if (reason2) { + reason1 = "rule for XSP/XBP/XIP: "; + goto cant_summarise; + } + break; + } + default: + goto cant_summarise; + } + LExpr expr = LExpr(how, oldReg, offset); + switch (aNewReg) { + case DW_REG_INTEL_XBP: mCurrRules.mXbpExpr = expr; break; + case DW_REG_INTEL_XSP: mCurrRules.mXspExpr = expr; break; + case DW_REG_INTEL_XIP: mCurrRules.mXipExpr = expr; break; + default: MOZ_CRASH("impossible value for aNewReg"); + } + break; + } + + default: + // Leave |reason1| and |reason2| unset here, for the reasons + // explained in the analogous point in the ARM case just above. + goto cant_summarise; + + } + + // On Intel, it seems the old SP value before the call is always the + // same as the CFA. Therefore, in the absence of any other way to + // recover the SP, specify that the CFA should be copied. + if (mCurrRules.mXspExpr.mHow == UNKNOWN) { + mCurrRules.mXspExpr = LExpr(NODEREF, DW_REG_CFA, 0); + } + + // Also, gcc says "Undef" for BP when it is unchanged. + if (mCurrRules.mXbpExpr.mHow == UNKNOWN) { + mCurrRules.mXbpExpr = LExpr(NODEREF, DW_REG_INTEL_XBP, 0); + } + +#else + +# error "Unsupported arch" +#endif + + return; + + cant_summarise: + if (reason1 || reason2) { + char buf[200]; + SprintfLiteral(buf, "LUL can't summarise: " + "SVMA=0x%llx: %s%s, expr=LExpr(%s,%u,%lld)\n", + (unsigned long long int)(aAddress - mTextBias), + reason1 ? reason1 : "", reason2 ? reason2 : "", + NameOf_LExprHow(how), + (unsigned int)oldReg, (long long int)offset); + mLog(buf); + } +} + +uint32_t +Summariser::AddPfxInstr(PfxInstr pfxi) +{ + return mSecMap->AddPfxInstr(pfxi); +} + +void +Summariser::End() +{ + if (DEBUG_SUMMARISER) { + mLog("LUL End\n"); + } + if (mCurrAddr < mMax1Addr) { + mCurrRules.mAddr = mCurrAddr; + mCurrRules.mLen = mMax1Addr - mCurrAddr; + mSecMap->AddRuleSet(&mCurrRules); + if (DEBUG_SUMMARISER) { + mLog("LUL "); mCurrRules.Print(mLog); + mLog("\n"); + } + } +} + +} // namespace lul diff --git a/tools/profiler/lul/LulDwarfSummariser.h b/tools/profiler/lul/LulDwarfSummariser.h new file mode 100644 index 000000000..b41db1ee3 --- /dev/null +++ b/tools/profiler/lul/LulDwarfSummariser.h @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulDwarfSummariser_h +#define LulDwarfSummariser_h + +#include "LulMainInt.h" + +namespace lul { + +class Summariser +{ +public: + Summariser(SecMap* aSecMap, uintptr_t aTextBias, void(*aLog)(const char*)); + + virtual void Entry(uintptr_t aAddress, uintptr_t aLength); + virtual void End(); + + // Tell the summariser that the value for |aNewReg| at |aAddress| is + // recovered using the LExpr that can be constructed using the + // components |how|, |oldReg| and |offset|. The summariser will + // inspect the components and may reject them for various reasons, + // but the hope is that it will find them acceptable and record this + // rule permanently. + virtual void Rule(uintptr_t aAddress, int aNewReg, + LExprHow how, int16_t oldReg, int64_t offset); + + virtual uint32_t AddPfxInstr(PfxInstr pfxi); + + // Send output to the logging sink, for debugging. + virtual void Log(const char* str) { mLog(str); } + +private: + // The SecMap in which we park the finished summaries (RuleSets) and + // also any PfxInstrs derived from Dwarf expressions. + SecMap* mSecMap; + + // Running state for the current summary (RuleSet) under construction. + RuleSet mCurrRules; + + // The start of the address range to which the RuleSet under + // construction applies. + uintptr_t mCurrAddr; + + // The highest address, plus one, for which the RuleSet under + // construction could possibly apply. If there are no further + // incoming events then mCurrRules will eventually be emitted + // as-is, for the range mCurrAddr.. mMax1Addr - 1, if that is + // nonempty. + uintptr_t mMax1Addr; + + // The bias value (to add to the SVMAs, to get AVMAs) to be used + // when adding entries into mSecMap. + uintptr_t mTextBias; + + // A logging sink, for debugging. + void (*mLog)(const char* aFmt); +}; + +} // namespace lul + +#endif // LulDwarfSummariser_h diff --git a/tools/profiler/lul/LulElf.cpp b/tools/profiler/lul/LulElf.cpp new file mode 100644 index 000000000..6f90d5f13 --- /dev/null +++ b/tools/profiler/lul/LulElf.cpp @@ -0,0 +1,915 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2011, 2012 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// (derived from) +// dump_symbols.cc: implement google_breakpad::WriteSymbolFile: +// Find all the debugging info in a file and dump it as a Breakpad symbol file. +// +// dump_symbols.h: Read debugging information from an ELF file, and write +// it out as a Breakpad symbol file. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/linux/dump_symbols.cc +// src/common/linux/elfutils.cc +// src/common/linux/file_id.cc + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#include <arpa/inet.h> + +#include <set> +#include <string> +#include <vector> + +#include "mozilla/Assertions.h" +#include "mozilla/Sprintf.h" + +#include "LulPlatformMacros.h" +#include "LulCommonExt.h" +#include "LulDwarfExt.h" +#include "LulElfInt.h" +#include "LulMainInt.h" + + +#if defined(LUL_PLAT_arm_android) && !defined(SHT_ARM_EXIDX) +// bionic and older glibsc don't define it +# define SHT_ARM_EXIDX (SHT_LOPROC + 1) +#endif + + +// This namespace contains helper functions. +namespace { + +using lul::DwarfCFIToModule; +using lul::FindElfSectionByName; +using lul::GetOffset; +using lul::IsValidElf; +using lul::Module; +using lul::UniqueStringUniverse; +using lul::scoped_ptr; +using lul::Summariser; +using std::string; +using std::vector; +using std::set; + +// +// FDWrapper +// +// Wrapper class to make sure opened file is closed. +// +class FDWrapper { + public: + explicit FDWrapper(int fd) : + fd_(fd) {} + ~FDWrapper() { + if (fd_ != -1) + close(fd_); + } + int get() { + return fd_; + } + int release() { + int fd = fd_; + fd_ = -1; + return fd; + } + private: + int fd_; +}; + +// +// MmapWrapper +// +// Wrapper class to make sure mapped regions are unmapped. +// +class MmapWrapper { + public: + MmapWrapper() : is_set_(false), base_(NULL), size_(0){} + ~MmapWrapper() { + if (is_set_ && base_ != NULL) { + MOZ_ASSERT(size_ > 0); + munmap(base_, size_); + } + } + void set(void *mapped_address, size_t mapped_size) { + is_set_ = true; + base_ = mapped_address; + size_ = mapped_size; + } + void release() { + MOZ_ASSERT(is_set_); + is_set_ = false; + base_ = NULL; + size_ = 0; + } + + private: + bool is_set_; + void *base_; + size_t size_; +}; + + +// Set NUM_DW_REGNAMES to be the number of Dwarf register names +// appropriate to the machine architecture given in HEADER. Return +// true on success, or false if HEADER's machine architecture is not +// supported. +template<typename ElfClass> +bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header, + unsigned int* num_dw_regnames) { + switch (elf_header->e_machine) { + case EM_386: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386(); + return true; + case EM_ARM: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM(); + return true; + case EM_X86_64: + *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64(); + return true; + default: + MOZ_ASSERT(0); + return false; + } +} + +template<typename ElfClass> +bool LoadDwarfCFI(const string& dwarf_filename, + const typename ElfClass::Ehdr* elf_header, + const char* section_name, + const typename ElfClass::Shdr* section, + const bool eh_frame, + const typename ElfClass::Shdr* got_section, + const typename ElfClass::Shdr* text_section, + const bool big_endian, + SecMap* smap, + uintptr_t text_bias, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + // Find the appropriate set of register names for this file's + // architecture. + unsigned int num_dw_regs = 0; + if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) { + fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';" + " cannot convert DWARF call frame information\n", + dwarf_filename.c_str(), elf_header->e_machine); + return false; + } + + const lul::Endianness endianness + = big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE; + + // Find the call frame information and its size. + const char* cfi = + GetOffset<ElfClass, char>(elf_header, section->sh_offset); + size_t cfi_size = section->sh_size; + + // Plug together the parser, handler, and their entourages. + + // Here's a summariser, which will receive the output of the + // parser, create summaries, and add them to |smap|. + Summariser summ(smap, text_bias, log); + + lul::ByteReader reader(endianness); + reader.SetAddressSize(ElfClass::kAddrSize); + + DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name); + DwarfCFIToModule handler(num_dw_regs, &module_reporter, &reader, usu, &summ); + + // Provide the base addresses for .eh_frame encoded pointers, if + // possible. + reader.SetCFIDataBase(section->sh_addr, cfi); + if (got_section) + reader.SetDataBase(got_section->sh_addr); + if (text_section) + reader.SetTextBase(text_section->sh_addr); + + lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename, + section_name); + lul::CallFrameInfo parser(cfi, cfi_size, + &reader, &handler, &dwarf_reporter, + eh_frame); + parser.Start(); + + return true; +} + +bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper, + void** elf_header) { + int obj_fd = open(obj_file.c_str(), O_RDONLY); + if (obj_fd < 0) { + fprintf(stderr, "Failed to open ELF file '%s': %s\n", + obj_file.c_str(), strerror(errno)); + return false; + } + FDWrapper obj_fd_wrapper(obj_fd); + struct stat st; + if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) { + fprintf(stderr, "Unable to fstat ELF file '%s': %s\n", + obj_file.c_str(), strerror(errno)); + return false; + } + // Mapping it read-only is good enough. In any case, mapping it + // read-write confuses Valgrind's debuginfo acquire/discard + // heuristics, making it hard to profile the profiler. + void *obj_base = mmap(nullptr, st.st_size, + PROT_READ, MAP_PRIVATE, obj_fd, 0); + if (obj_base == MAP_FAILED) { + fprintf(stderr, "Failed to mmap ELF file '%s': %s\n", + obj_file.c_str(), strerror(errno)); + return false; + } + map_wrapper->set(obj_base, st.st_size); + *elf_header = obj_base; + if (!IsValidElf(*elf_header)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str()); + return false; + } + return true; +} + +// Get the endianness of ELF_HEADER. If it's invalid, return false. +template<typename ElfClass> +bool ElfEndianness(const typename ElfClass::Ehdr* elf_header, + bool* big_endian) { + if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) { + *big_endian = false; + return true; + } + if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) { + *big_endian = true; + return true; + } + + fprintf(stderr, "bad data encoding in ELF header: %d\n", + elf_header->e_ident[EI_DATA]); + return false; +} + +// +// LoadSymbolsInfo +// +// Holds the state between the two calls to LoadSymbols() in case it's necessary +// to follow the .gnu_debuglink section and load debug information from a +// different file. +// +template<typename ElfClass> +class LoadSymbolsInfo { + public: + typedef typename ElfClass::Addr Addr; + + explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) : + debug_dirs_(dbg_dirs), + has_loading_addr_(false) {} + + // Keeps track of which sections have been loaded so sections don't + // accidentally get loaded twice from two different files. + void LoadedSection(const string §ion) { + if (loaded_sections_.count(section) == 0) { + loaded_sections_.insert(section); + } else { + fprintf(stderr, "Section %s has already been loaded.\n", + section.c_str()); + } + } + + string debuglink_file() const { + return debuglink_file_; + } + + private: + const vector<string>& debug_dirs_; // Directories in which to + // search for the debug ELF file. + + string debuglink_file_; // Full path to the debug ELF file. + + bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid. + + set<string> loaded_sections_; // Tracks the Loaded ELF sections + // between calls to LoadSymbols(). +}; + +// Find the preferred loading address of the binary. +template<typename ElfClass> +typename ElfClass::Addr GetLoadingAddress( + const typename ElfClass::Phdr* program_headers, + int nheader) { + typedef typename ElfClass::Phdr Phdr; + + // For non-PIC executables (e_type == ET_EXEC), the load address is + // the start address of the first PT_LOAD segment. (ELF requires + // the segments to be sorted by load address.) For PIC executables + // and dynamic libraries (e_type == ET_DYN), this address will + // normally be zero. + for (int i = 0; i < nheader; ++i) { + const Phdr& header = program_headers[i]; + if (header.p_type == PT_LOAD) + return header.p_vaddr; + } + return 0; +} + +template<typename ElfClass> +bool LoadSymbols(const string& obj_file, + const bool big_endian, + const typename ElfClass::Ehdr* elf_header, + const bool read_gnu_debug_link, + LoadSymbolsInfo<ElfClass>* info, + SecMap* smap, + void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + typedef typename ElfClass::Phdr Phdr; + typedef typename ElfClass::Shdr Shdr; + + char buf[500]; + SprintfLiteral(buf, "LoadSymbols: BEGIN %s\n", obj_file.c_str()); + buf[sizeof(buf)-1] = 0; + log(buf); + + // This is how the text bias is calculated. + // BEGIN CALCULATE BIAS + uintptr_t loading_addr = GetLoadingAddress<ElfClass>( + GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff), + elf_header->e_phnum); + uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr; + SprintfLiteral(buf, + "LoadSymbols: rx_avma=%llx, text_bias=%llx", + (unsigned long long int)(uintptr_t)rx_avma, + (unsigned long long int)text_bias); + buf[sizeof(buf)-1] = 0; + log(buf); + // END CALCULATE BIAS + + const Shdr* sections = + GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset<ElfClass, char>(elf_header, section_names->sh_offset); + const char *names_end = names + section_names->sh_size; + bool found_usable_info = false; + + // Dwarf Call Frame Information (CFI) is actually independent from + // the other DWARF debugging information, and can be used alone. + const Shdr* dwarf_cfi_section = + FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); + if (dwarf_cfi_section) { + // Ignore the return value of this function; even without call frame + // information, the other debugging information could be perfectly + // useful. + info->LoadedSection(".debug_frame"); + bool result = + LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame", + dwarf_cfi_section, false, 0, 0, big_endian, + smap, text_bias, usu, log); + found_usable_info = found_usable_info || result; + if (result) + log("LoadSymbols: read CFI from .debug_frame"); + } + + // Linux C++ exception handling information can also provide + // unwinding data. + const Shdr* eh_frame_section = + FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); + if (eh_frame_section) { + // Pointers in .eh_frame data may be relative to the base addresses of + // certain sections. Provide those sections if present. + const Shdr* got_section = + FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); + const Shdr* text_section = + FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS, + sections, names, names_end, + elf_header->e_shnum); + info->LoadedSection(".eh_frame"); + // As above, ignore the return value of this function. + bool result = + LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame", + eh_frame_section, true, + got_section, text_section, big_endian, + smap, text_bias, usu, log); + found_usable_info = found_usable_info || result; + if (result) + log("LoadSymbols: read CFI from .eh_frame"); + } + + SprintfLiteral(buf, "LoadSymbols: END %s\n", obj_file.c_str()); + buf[sizeof(buf)-1] = 0; + log(buf); + + return found_usable_info; +} + +// Return the breakpad symbol file identifier for the architecture of +// ELF_HEADER. +template<typename ElfClass> +const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { + typedef typename ElfClass::Half Half; + Half arch = elf_header->e_machine; + switch (arch) { + case EM_386: return "x86"; + case EM_ARM: return "arm"; + case EM_MIPS: return "mips"; + case EM_PPC64: return "ppc64"; + case EM_PPC: return "ppc"; + case EM_S390: return "s390"; + case EM_SPARC: return "sparc"; + case EM_SPARCV9: return "sparcv9"; + case EM_X86_64: return "x86_64"; + default: return NULL; + } +} + +// Format the Elf file identifier in IDENTIFIER as a UUID with the +// dashes removed. +string FormatIdentifier(unsigned char identifier[16]) { + char identifier_str[40]; + lul::FileID::ConvertIdentifierToString( + identifier, + identifier_str, + sizeof(identifier_str)); + string id_no_dash; + for (int i = 0; identifier_str[i] != '\0'; ++i) + if (identifier_str[i] != '-') + id_no_dash += identifier_str[i]; + // Add an extra "0" by the end. PDB files on Windows have an 'age' + // number appended to the end of the file identifier; this isn't + // really used or necessary on other platforms, but be consistent. + id_no_dash += '0'; + return id_no_dash; +} + +// Return the non-directory portion of FILENAME: the portion after the +// last slash, or the whole filename if there are no slashes. +string BaseFileName(const string &filename) { + // Lots of copies! basename's behavior is less than ideal. + char *c_filename = strdup(filename.c_str()); + string base = basename(c_filename); + free(c_filename); + return base; +} + +template<typename ElfClass> +bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, + const string& obj_filename, + const vector<string>& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + typedef typename ElfClass::Ehdr Ehdr; + + unsigned char identifier[16]; + if (!lul + ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) { + fprintf(stderr, "%s: unable to generate file identifier\n", + obj_filename.c_str()); + return false; + } + + const char *architecture = ElfArchitecture<ElfClass>(elf_header); + if (!architecture) { + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", + obj_filename.c_str(), elf_header->e_machine); + return false; + } + + // Figure out what endianness this file is. + bool big_endian; + if (!ElfEndianness<ElfClass>(elf_header, &big_endian)) + return false; + + string name = BaseFileName(obj_filename); + string os = "Linux"; + string id = FormatIdentifier(identifier); + + LoadSymbolsInfo<ElfClass> info(debug_dirs); + if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header, + !debug_dirs.empty(), &info, + smap, rx_avma, rx_size, usu, log)) { + const string debuglink_file = info.debuglink_file(); + if (debuglink_file.empty()) + return false; + + // Load debuglink ELF file. + fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str()); + MmapWrapper debug_map_wrapper; + Ehdr* debug_elf_header = NULL; + if (!LoadELF(debuglink_file, &debug_map_wrapper, + reinterpret_cast<void**>(&debug_elf_header))) + return false; + // Sanity checks to make sure everything matches up. + const char *debug_architecture = + ElfArchitecture<ElfClass>(debug_elf_header); + if (!debug_architecture) { + fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n", + debuglink_file.c_str(), debug_elf_header->e_machine); + return false; + } + if (strcmp(architecture, debug_architecture)) { + fprintf(stderr, "%s with ELF machine architecture %s does not match " + "%s with ELF architecture %s\n", + debuglink_file.c_str(), debug_architecture, + obj_filename.c_str(), architecture); + return false; + } + + bool debug_big_endian; + if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian)) + return false; + if (debug_big_endian != big_endian) { + fprintf(stderr, "%s and %s does not match in endianness\n", + obj_filename.c_str(), debuglink_file.c_str()); + return false; + } + + if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian, + debug_elf_header, false, &info, + smap, rx_avma, rx_size, usu, log)) { + return false; + } + } + + return true; +} + +} // namespace (anon) + + +namespace lul { + +bool ReadSymbolDataInternal(const uint8_t* obj_file, + const string& obj_filename, + const vector<string>& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + + if (!IsValidElf(obj_file)) { + fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str()); + return false; + } + + int elfclass = ElfClass(obj_file); + if (elfclass == ELFCLASS32) { + return ReadSymbolDataElfClass<ElfClass32>( + reinterpret_cast<const Elf32_Ehdr*>(obj_file), + obj_filename, debug_dirs, smap, rx_avma, rx_size, usu, log); + } + if (elfclass == ELFCLASS64) { + return ReadSymbolDataElfClass<ElfClass64>( + reinterpret_cast<const Elf64_Ehdr*>(obj_file), + obj_filename, debug_dirs, smap, rx_avma, rx_size, usu, log); + } + + return false; +} + +bool ReadSymbolData(const string& obj_file, + const vector<string>& debug_dirs, + SecMap* smap, void* rx_avma, size_t rx_size, + UniqueStringUniverse* usu, + void (*log)(const char*)) { + MmapWrapper map_wrapper; + void* elf_header = NULL; + if (!LoadELF(obj_file, &map_wrapper, &elf_header)) + return false; + + return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header), + obj_file, debug_dirs, + smap, rx_avma, rx_size, usu, log); +} + + +namespace { + +template<typename ElfClass> +void FindElfClassSection(const char *elf_base, + const char *section_name, + typename ElfClass::Word section_type, + const void **section_start, + int *section_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Shdr Shdr; + + MOZ_ASSERT(elf_base); + MOZ_ASSERT(section_start); + MOZ_ASSERT(section_size); + + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); + + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); + + const Shdr* sections = + GetOffset<ElfClass,Shdr>(elf_header, elf_header->e_shoff); + const Shdr* section_names = sections + elf_header->e_shstrndx; + const char* names = + GetOffset<ElfClass,char>(elf_header, section_names->sh_offset); + const char *names_end = names + section_names->sh_size; + + const Shdr* section = + FindElfSectionByName<ElfClass>(section_name, section_type, + sections, names, names_end, + elf_header->e_shnum); + + if (section != NULL && section->sh_size > 0) { + *section_start = elf_base + section->sh_offset; + *section_size = section->sh_size; + } +} + +template<typename ElfClass> +void FindElfClassSegment(const char *elf_base, + typename ElfClass::Word segment_type, + const void **segment_start, + int *segment_size) { + typedef typename ElfClass::Ehdr Ehdr; + typedef typename ElfClass::Phdr Phdr; + + MOZ_ASSERT(elf_base); + MOZ_ASSERT(segment_start); + MOZ_ASSERT(segment_size); + + MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0); + + const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base); + MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass); + + const Phdr* phdrs = + GetOffset<ElfClass,Phdr>(elf_header, elf_header->e_phoff); + + for (int i = 0; i < elf_header->e_phnum; ++i) { + if (phdrs[i].p_type == segment_type) { + *segment_start = elf_base + phdrs[i].p_offset; + *segment_size = phdrs[i].p_filesz; + return; + } + } +} + +} // namespace (anon) + +bool IsValidElf(const void* elf_base) { + return strncmp(reinterpret_cast<const char*>(elf_base), + ELFMAG, SELFMAG) == 0; +} + +int ElfClass(const void* elf_base) { + const ElfW(Ehdr)* elf_header = + reinterpret_cast<const ElfW(Ehdr)*>(elf_base); + + return elf_header->e_ident[EI_CLASS]; +} + +bool FindElfSection(const void *elf_mapped_base, + const char *section_name, + uint32_t section_type, + const void **section_start, + int *section_size, + int *elfclass) { + MOZ_ASSERT(elf_mapped_base); + MOZ_ASSERT(section_start); + MOZ_ASSERT(section_size); + + *section_start = NULL; + *section_size = 0; + + if (!IsValidElf(elf_mapped_base)) + return false; + + int cls = ElfClass(elf_mapped_base); + if (elfclass) { + *elfclass = cls; + } + + const char* elf_base = + static_cast<const char*>(elf_mapped_base); + + if (cls == ELFCLASS32) { + FindElfClassSection<ElfClass32>(elf_base, section_name, section_type, + section_start, section_size); + return *section_start != NULL; + } else if (cls == ELFCLASS64) { + FindElfClassSection<ElfClass64>(elf_base, section_name, section_type, + section_start, section_size); + return *section_start != NULL; + } + + return false; +} + +bool FindElfSegment(const void *elf_mapped_base, + uint32_t segment_type, + const void **segment_start, + int *segment_size, + int *elfclass) { + MOZ_ASSERT(elf_mapped_base); + MOZ_ASSERT(segment_start); + MOZ_ASSERT(segment_size); + + *segment_start = NULL; + *segment_size = 0; + + if (!IsValidElf(elf_mapped_base)) + return false; + + int cls = ElfClass(elf_mapped_base); + if (elfclass) { + *elfclass = cls; + } + + const char* elf_base = + static_cast<const char*>(elf_mapped_base); + + if (cls == ELFCLASS32) { + FindElfClassSegment<ElfClass32>(elf_base, segment_type, + segment_start, segment_size); + return *segment_start != NULL; + } else if (cls == ELFCLASS64) { + FindElfClassSegment<ElfClass64>(elf_base, segment_type, + segment_start, segment_size); + return *segment_start != NULL; + } + + return false; +} + + +// (derived from) +// file_id.cc: Return a unique identifier for a file +// +// See file_id.h for documentation +// + +// ELF note name and desc are 32-bits word padded. +#define NOTE_PADDING(a) ((a + 3) & ~3) + +// These functions are also used inside the crashed process, so be safe +// and use the syscall/libc wrappers instead of direct syscalls or libc. + +template<typename ElfClass> +static bool ElfClassBuildIDNoteIdentifier(const void *section, int length, + uint8_t identifier[kMDGUIDSize]) { + typedef typename ElfClass::Nhdr Nhdr; + + const void* section_end = reinterpret_cast<const char*>(section) + length; + const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section); + while (reinterpret_cast<const void *>(note_header) < section_end) { + if (note_header->n_type == NT_GNU_BUILD_ID) + break; + note_header = reinterpret_cast<const Nhdr*>( + reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) + + NOTE_PADDING(note_header->n_namesz) + + NOTE_PADDING(note_header->n_descsz)); + } + if (reinterpret_cast<const void *>(note_header) >= section_end || + note_header->n_descsz == 0) { + return false; + } + + const char* build_id = reinterpret_cast<const char*>(note_header) + + sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz); + // Copy as many bits of the build ID as will fit + // into the GUID space. + memset(identifier, 0, kMDGUIDSize); + memcpy(identifier, build_id, + std::min(kMDGUIDSize, (size_t)note_header->n_descsz)); + + return true; +} + +// Attempt to locate a .note.gnu.build-id section in an ELF binary +// and copy as many bytes of it as will fit into |identifier|. +static bool FindElfBuildIDNote(const void *elf_mapped_base, + uint8_t identifier[kMDGUIDSize]) { + void* note_section; + int note_size, elfclass; + if ((!FindElfSegment(elf_mapped_base, PT_NOTE, + (const void**)¬e_section, ¬e_size, &elfclass) || + note_size == 0) && + (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, + (const void**)¬e_section, ¬e_size, &elfclass) || + note_size == 0)) { + return false; + } + + if (elfclass == ELFCLASS32) { + return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size, + identifier); + } else if (elfclass == ELFCLASS64) { + return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size, + identifier); + } + + return false; +} + +// Attempt to locate the .text section of an ELF binary and generate +// a simple hash by XORing the first page worth of bytes into |identifier|. +static bool HashElfTextSection(const void *elf_mapped_base, + uint8_t identifier[kMDGUIDSize]) { + void* text_section; + int text_size; + if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS, + (const void**)&text_section, &text_size, NULL) || + text_size == 0) { + return false; + } + + memset(identifier, 0, kMDGUIDSize); + const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section); + const uint8_t* ptr_end = ptr + std::min(text_size, 4096); + while (ptr < ptr_end) { + for (unsigned i = 0; i < kMDGUIDSize; i++) + identifier[i] ^= ptr[i]; + ptr += kMDGUIDSize; + } + return true; +} + +// static +bool FileID::ElfFileIdentifierFromMappedFile(const void* base, + uint8_t identifier[kMDGUIDSize]) { + // Look for a build id note first. + if (FindElfBuildIDNote(base, identifier)) + return true; + + // Fall back on hashing the first page of the text section. + return HashElfTextSection(base, identifier); +} + +// static +void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], + char* buffer, int buffer_length) { + uint8_t identifier_swapped[kMDGUIDSize]; + + // Endian-ness swap to match dump processor expectation. + memcpy(identifier_swapped, identifier, kMDGUIDSize); + uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped); + *data1 = htonl(*data1); + uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4); + *data2 = htons(*data2); + uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6); + *data3 = htons(*data3); + + int buffer_idx = 0; + for (unsigned int idx = 0; + (buffer_idx < buffer_length) && (idx < kMDGUIDSize); + ++idx) { + int hi = (identifier_swapped[idx] >> 4) & 0x0F; + int lo = (identifier_swapped[idx]) & 0x0F; + + if (idx == 4 || idx == 6 || idx == 8 || idx == 10) + buffer[buffer_idx++] = '-'; + + buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi; + buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo; + } + + // NULL terminate + buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0; +} + +} // namespace lul diff --git a/tools/profiler/lul/LulElfExt.h b/tools/profiler/lul/LulElfExt.h new file mode 100644 index 000000000..b127d96d9 --- /dev/null +++ b/tools/profiler/lul/LulElfExt.h @@ -0,0 +1,68 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2011, 2012 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/linux/dump_symbols.h + +#ifndef LulElfExt_h +#define LulElfExt_h + +// These two functions are the external interface to the +// ELF/Dwarf/EXIDX reader. + +#include "LulMainInt.h" + +using lul::SecMap; + +namespace lul { + +// Find all the unwind information in OBJ_FILE, an ELF executable +// or shared library, and add it to SMAP. +bool ReadSymbolData(const std::string& obj_file, + const std::vector<std::string>& debug_dirs, + SecMap* smap, + void* rx_avma, size_t rx_size, + void (*log)(const char*)); + +// The same as ReadSymbolData, except that OBJ_FILE is assumed to +// point to a mapped-in image of OBJ_FILENAME. +bool ReadSymbolDataInternal(const uint8_t* obj_file, + const std::string& obj_filename, + const std::vector<std::string>& debug_dirs, + SecMap* smap, + void* rx_avma, size_t rx_size, + void (*log)(const char*)); + +} // namespace lul + +#endif // LulElfExt_h diff --git a/tools/profiler/lul/LulElfInt.h b/tools/profiler/lul/LulElfInt.h new file mode 100644 index 000000000..899d7d3ee --- /dev/null +++ b/tools/profiler/lul/LulElfInt.h @@ -0,0 +1,234 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ + +// Copyright (c) 2006, 2012, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file is derived from the following files in +// toolkit/crashreporter/google-breakpad: +// src/common/android/include/elf.h +// src/common/linux/elfutils.h +// src/common/linux/file_id.h +// src/common/linux/elfutils-inl.h + +#ifndef LulElfInt_h +#define LulElfInt_h + +// This header defines functions etc internal to the ELF reader. It +// should not be included outside of LulElf.cpp. + +#include <elf.h> +#include <stdlib.h> + +#include "mozilla/Assertions.h" + +#include "LulPlatformMacros.h" + + +// (derived from) +// elfutils.h: Utilities for dealing with ELF files. +// + +#if defined(LUL_OS_android) + +// From toolkit/crashreporter/google-breakpad/src/common/android/include/elf.h +// The Android headers don't always define this constant. +#ifndef EM_X86_64 +#define EM_X86_64 62 +#endif + +#ifndef EM_PPC64 +#define EM_PPC64 21 +#endif + +#ifndef EM_S390 +#define EM_S390 22 +#endif + +#ifndef NT_GNU_BUILD_ID +#define NT_GNU_BUILD_ID 3 +#endif + +#define ElfW(type) _ElfW (Elf, ELFSIZE, type) +#define _ElfW(e,w,t) _ElfW_1 (e, w, _##t) +#define _ElfW_1(e,w,t) e##w##t + +//FIXME +extern "C" { + extern char* basename(const char* path); +}; +#else + +# include <link.h> +#endif + + +namespace lul { + +// Traits classes so consumers can write templatized code to deal +// with specific ELF bits. +struct ElfClass32 { + typedef Elf32_Addr Addr; + typedef Elf32_Ehdr Ehdr; + typedef Elf32_Nhdr Nhdr; + typedef Elf32_Phdr Phdr; + typedef Elf32_Shdr Shdr; + typedef Elf32_Half Half; + typedef Elf32_Off Off; + typedef Elf32_Word Word; + static const int kClass = ELFCLASS32; + static const size_t kAddrSize = sizeof(Elf32_Addr); +}; + +struct ElfClass64 { + typedef Elf64_Addr Addr; + typedef Elf64_Ehdr Ehdr; + typedef Elf64_Nhdr Nhdr; + typedef Elf64_Phdr Phdr; + typedef Elf64_Shdr Shdr; + typedef Elf64_Half Half; + typedef Elf64_Off Off; + typedef Elf64_Word Word; + static const int kClass = ELFCLASS64; + static const size_t kAddrSize = sizeof(Elf64_Addr); +}; + +bool IsValidElf(const void* elf_header); +int ElfClass(const void* elf_base); + +// Attempt to find a section named |section_name| of type |section_type| +// in the ELF binary data at |elf_mapped_base|. On success, returns true +// and sets |*section_start| to point to the start of the section data, +// and |*section_size| to the size of the section's data. If |elfclass| +// is not NULL, set |*elfclass| to the ELF file class. +bool FindElfSection(const void *elf_mapped_base, + const char *section_name, + uint32_t section_type, + const void **section_start, + int *section_size, + int *elfclass); + +// Internal helper method, exposed for convenience for callers +// that already have more info. +template<typename ElfClass> +const typename ElfClass::Shdr* +FindElfSectionByName(const char* name, + typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, + const char* section_names, + const char* names_end, + int nsection); + +// Attempt to find the first segment of type |segment_type| in the ELF +// binary data at |elf_mapped_base|. On success, returns true and sets +// |*segment_start| to point to the start of the segment data, and +// and |*segment_size| to the size of the segment's data. If |elfclass| +// is not NULL, set |*elfclass| to the ELF file class. +bool FindElfSegment(const void *elf_mapped_base, + uint32_t segment_type, + const void **segment_start, + int *segment_size, + int *elfclass); + +// Convert an offset from an Elf header into a pointer to the mapped +// address in the current process. Takes an extra template parameter +// to specify the return type to avoid having to dynamic_cast the +// result. +template<typename ElfClass, typename T> +const T* +GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset); + + +// (derived from) +// file_id.h: Return a unique identifier for a file +// + +static const size_t kMDGUIDSize = sizeof(MDGUID); + +class FileID { + public: + + // Load the identifier for the elf file mapped into memory at |base| into + // |identifier|. Return false if the identifier could not be created for the + // file. + static bool ElfFileIdentifierFromMappedFile(const void* base, + uint8_t identifier[kMDGUIDSize]); + + // Convert the |identifier| data to a NULL terminated string. The string will + // be formatted as a UUID (e.g., 22F065BB-FC9C-49F7-80FE-26A7CEBD7BCE). + // The |buffer| should be at least 37 bytes long to receive all of the data + // and termination. Shorter buffers will contain truncated data. + static void ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize], + char* buffer, int buffer_length); +}; + + + +template<typename ElfClass, typename T> +const T* GetOffset(const typename ElfClass::Ehdr* elf_header, + typename ElfClass::Off offset) { + return reinterpret_cast<const T*>(reinterpret_cast<uintptr_t>(elf_header) + + offset); +} + +template<typename ElfClass> +const typename ElfClass::Shdr* FindElfSectionByName( + const char* name, + typename ElfClass::Word section_type, + const typename ElfClass::Shdr* sections, + const char* section_names, + const char* names_end, + int nsection) { + MOZ_ASSERT(name != NULL); + MOZ_ASSERT(sections != NULL); + MOZ_ASSERT(nsection > 0); + + int name_len = strlen(name); + if (name_len == 0) + return NULL; + + for (int i = 0; i < nsection; ++i) { + const char* section_name = section_names + sections[i].sh_name; + if (sections[i].sh_type == section_type && + names_end - section_name >= name_len + 1 && + strcmp(name, section_name) == 0) { + return sections + i; + } + } + return NULL; +} + +} // namespace lul + + +// And finally, the external interface, offered to LulMain.cpp +#include "LulElfExt.h" + +#endif // LulElfInt_h diff --git a/tools/profiler/lul/LulMain.cpp b/tools/profiler/lul/LulMain.cpp new file mode 100644 index 000000000..2e78f03ec --- /dev/null +++ b/tools/profiler/lul/LulMain.cpp @@ -0,0 +1,1963 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LulMain.h" + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> + +#include <algorithm> // std::sort +#include <string> + +#include "mozilla/Assertions.h" +#include "mozilla/ArrayUtils.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/MemoryChecking.h" +#include "mozilla/Sprintf.h" + +#include "LulCommonExt.h" +#include "LulElfExt.h" + +#include "LulMainInt.h" + +#include "platform-linux-lul.h" // for gettid() + +// Set this to 1 for verbose logging +#define DEBUG_MAIN 0 + +namespace lul { + +using std::string; +using std::vector; +using std::pair; +using mozilla::CheckedInt; +using mozilla::DebugOnly; + + +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING +// +// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT. +// Any such function -- and, hence, the transitive closure of those +// reachable from it -- must not do any dynamic memory allocation. +// Doing so risks deadlock. There is exactly one root function for +// the transitive closure: Lul::Unwind. +// +// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING + + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +static const char* +NameOf_DW_REG(int16_t aReg) +{ + switch (aReg) { + case DW_REG_CFA: return "cfa"; +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + case DW_REG_INTEL_XBP: return "xbp"; + case DW_REG_INTEL_XSP: return "xsp"; + case DW_REG_INTEL_XIP: return "xip"; +#elif defined(LUL_ARCH_arm) + case DW_REG_ARM_R7: return "r7"; + case DW_REG_ARM_R11: return "r11"; + case DW_REG_ARM_R12: return "r12"; + case DW_REG_ARM_R13: return "r13"; + case DW_REG_ARM_R14: return "r14"; + case DW_REG_ARM_R15: return "r15"; +#else +# error "Unsupported arch" +#endif + default: return "???"; + } +} + +string +LExpr::ShowRule(const char* aNewReg) const +{ + char buf[64]; + string res = string(aNewReg) + "="; + switch (mHow) { + case UNKNOWN: + res += "Unknown"; + break; + case NODEREF: + SprintfLiteral(buf, "%s+%d", + NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case DEREF: + SprintfLiteral(buf, "*(%s+%d)", + NameOf_DW_REG(mReg), (int)mOffset); + res += buf; + break; + case PFXEXPR: + SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset); + res += buf; + break; + default: + res += "???"; + break; + } + return res; +} + +void +RuleSet::Print(void(*aLog)(const char*)) const +{ + char buf[96]; + SprintfLiteral(buf, "[%llx .. %llx]: let ", + (unsigned long long int)mAddr, + (unsigned long long int)(mAddr + mLen - 1)); + string res = string(buf); + res += mCfaExpr.ShowRule("cfa"); + res += " in"; + // For each reg we care about, print the recovery expression. +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + res += mXipExpr.ShowRule(" RA"); + res += mXspExpr.ShowRule(" SP"); + res += mXbpExpr.ShowRule(" BP"); +#elif defined(LUL_ARCH_arm) + res += mR15expr.ShowRule(" R15"); + res += mR7expr .ShowRule(" R7" ); + res += mR11expr.ShowRule(" R11"); + res += mR12expr.ShowRule(" R12"); + res += mR13expr.ShowRule(" R13"); + res += mR14expr.ShowRule(" R14"); +#else +# error "Unsupported arch" +#endif + aLog(res.c_str()); +} + +LExpr* +RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) { + switch (aRegno) { + case DW_REG_CFA: return &mCfaExpr; +# if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + case DW_REG_INTEL_XIP: return &mXipExpr; + case DW_REG_INTEL_XSP: return &mXspExpr; + case DW_REG_INTEL_XBP: return &mXbpExpr; +# elif defined(LUL_ARCH_arm) + case DW_REG_ARM_R15: return &mR15expr; + case DW_REG_ARM_R14: return &mR14expr; + case DW_REG_ARM_R13: return &mR13expr; + case DW_REG_ARM_R12: return &mR12expr; + case DW_REG_ARM_R11: return &mR11expr; + case DW_REG_ARM_R7: return &mR7expr; +# else +# error "Unknown arch" +# endif + default: return nullptr; + } +} + +RuleSet::RuleSet() +{ + mAddr = 0; + mLen = 0; + // The only other fields are of type LExpr and those are initialised + // by LExpr::LExpr(). +} + + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// See header file LulMainInt.h for comments about invariants. + +SecMap::SecMap(void(*aLog)(const char*)) + : mSummaryMinAddr(1) + , mSummaryMaxAddr(0) + , mUsable(true) + , mLog(aLog) +{} + +SecMap::~SecMap() { + mRuleSets.clear(); +} + +// RUNS IN NO-MALLOC CONTEXT +RuleSet* +SecMap::FindRuleSet(uintptr_t ia) { + // Binary search mRuleSets to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. Note that this works correctly even when + // mRuleSets.size() == 0. + + // Can't do this until the array has been sorted and preened. + MOZ_ASSERT(mUsable); + + long int lo = 0; + long int hi = (long int)mRuleSets.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + RuleSet* mid_ruleSet = &mRuleSets[mid]; + uintptr_t mid_minAddr = mid_ruleSet->mAddr; + uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1; + if (ia < mid_minAddr) { hi = mid-1; continue; } + if (ia > mid_maxAddr) { lo = mid+1; continue; } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + return mid_ruleSet; + } + // NOTREACHED +} + +// Add a RuleSet to the collection. The rule is copied in. Calling +// this makes the map non-searchable. +void +SecMap::AddRuleSet(const RuleSet* rs) { + mUsable = false; + mRuleSets.push_back(*rs); +} + +// Add a PfxInstr to the vector of such instrs, and return the index +// in the vector. Calling this makes the map non-searchable. +uint32_t +SecMap::AddPfxInstr(PfxInstr pfxi) { + mUsable = false; + mPfxInstrs.push_back(pfxi); + return mPfxInstrs.size() - 1; +} + + +static bool +CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) { + return rs1.mAddr < rs2.mAddr; +} + +// Prepare the map for searching. Completely remove any which don't +// fall inside the specified range [start, +len). +void +SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) +{ + if (mRuleSets.empty()) { + return; + } + + MOZ_ASSERT(aLen > 0); + if (aLen == 0) { + // This should never happen. + mRuleSets.clear(); + return; + } + + // Sort by start addresses. + std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE); + + // Detect any entry not completely contained within [start, +len). + // Set its length to zero, so that the next pass will remove it. + for (size_t i = 0; i < mRuleSets.size(); ++i) { + RuleSet* rs = &mRuleSets[i]; + if (rs->mLen > 0 && + (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) { + rs->mLen = 0; + } + } + + // Iteratively truncate any overlaps and remove any zero length + // entries that might result, or that may have been present + // initially. Unless the input is seriously screwy, this is + // expected to iterate only once. + while (true) { + size_t i; + size_t n = mRuleSets.size(); + size_t nZeroLen = 0; + + if (n == 0) { + break; + } + + for (i = 1; i < n; ++i) { + RuleSet* prev = &mRuleSets[i-1]; + RuleSet* here = &mRuleSets[i]; + MOZ_ASSERT(prev->mAddr <= here->mAddr); + if (prev->mAddr + prev->mLen > here->mAddr) { + prev->mLen = here->mAddr - prev->mAddr; + } + if (prev->mLen == 0) + nZeroLen++; + } + + if (mRuleSets[n-1].mLen == 0) { + nZeroLen++; + } + + // At this point, the entries are in-order and non-overlapping. + // If none of them are zero-length, we are done. + if (nZeroLen == 0) { + break; + } + + // Slide back the entries to remove the zero length ones. + size_t j = 0; // The write-point. + for (i = 0; i < n; ++i) { + if (mRuleSets[i].mLen == 0) { + continue; + } + if (j != i) mRuleSets[j] = mRuleSets[i]; + ++j; + } + MOZ_ASSERT(i == n); + MOZ_ASSERT(nZeroLen <= n); + MOZ_ASSERT(j == n - nZeroLen); + while (nZeroLen > 0) { + mRuleSets.pop_back(); + nZeroLen--; + } + + MOZ_ASSERT(mRuleSets.size() == j); + } + + size_t n = mRuleSets.size(); + +#ifdef DEBUG + // Do a final check on the rules: their address ranges must be + // ascending, non overlapping, non zero sized. + if (n > 0) { + MOZ_ASSERT(mRuleSets[0].mLen > 0); + for (size_t i = 1; i < n; ++i) { + RuleSet* prev = &mRuleSets[i-1]; + RuleSet* here = &mRuleSets[i]; + MOZ_ASSERT(prev->mAddr < here->mAddr); + MOZ_ASSERT(here->mLen > 0); + MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr); + } + } +#endif + + // Set the summary min and max address values. + if (n == 0) { + // Use the values defined in comments in the class declaration. + mSummaryMinAddr = 1; + mSummaryMaxAddr = 0; + } else { + mSummaryMinAddr = mRuleSets[0].mAddr; + mSummaryMaxAddr = mRuleSets[n-1].mAddr + mRuleSets[n-1].mLen - 1; + } + char buf[150]; + SprintfLiteral(buf, + "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n", + (int)n, (unsigned long long int)mSummaryMinAddr, + (unsigned long long int)mSummaryMaxAddr); + buf[sizeof(buf)-1] = 0; + mLog(buf); + + // Is now usable for binary search. + mUsable = true; + + if (0) { + mLog("\nRulesets after preening\n"); + for (size_t i = 0; i < mRuleSets.size(); ++i) { + mRuleSets[i].Print(mLog); + mLog("\n"); + } + mLog("\n"); + } +} + +bool SecMap::IsEmpty() { + return mRuleSets.empty(); +} + + +//////////////////////////////////////////////////////////////// +// SegArray // +//////////////////////////////////////////////////////////////// + +// A SegArray holds a set of address ranges that together exactly +// cover an address range, with no overlaps or holes. Each range has +// an associated value, which in this case has been specialised to be +// a simple boolean. The representation is kept to minimal canonical +// form in which adjacent ranges with the same associated value are +// merged together. Each range is represented by a |struct Seg|. +// +// SegArrays are used to keep track of which parts of the address +// space are known to contain instructions. +class SegArray { + + public: + void add(uintptr_t lo, uintptr_t hi, bool val) { + if (lo > hi) { + return; + } + split_at(lo); + if (hi < UINTPTR_MAX) { + split_at(hi+1); + } + std::vector<Seg>::size_type iLo, iHi, i; + iLo = find(lo); + iHi = find(hi); + for (i = iLo; i <= iHi; ++i) { + mSegs[i].val = val; + } + preen(); + } + + // RUNS IN NO-MALLOC CONTEXT + bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min, + /*OUT*/uintptr_t* rx_max, uintptr_t addr) { + std::vector<Seg>::size_type i = find(addr); + if (!mSegs[i].val) { + return false; + } + *rx_min = mSegs[i].lo; + *rx_max = mSegs[i].hi; + return true; + } + + SegArray() { + Seg s(0, UINTPTR_MAX, false); + mSegs.push_back(s); + } + + private: + struct Seg { + Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {} + uintptr_t lo; + uintptr_t hi; + bool val; + }; + + void preen() { + for (std::vector<Seg>::iterator iter = mSegs.begin(); + iter < mSegs.end()-1; + ++iter) { + if (iter[0].val != iter[1].val) { + continue; + } + iter[0].hi = iter[1].hi; + mSegs.erase(iter+1); + // Back up one, so as not to miss an opportunity to merge + // with the entry after this one. + --iter; + } + } + + // RUNS IN NO-MALLOC CONTEXT + std::vector<Seg>::size_type find(uintptr_t a) { + long int lo = 0; + long int hi = (long int)mSegs.size(); + while (true) { + // The unsearched space is lo .. hi inclusive. + if (lo > hi) { + // Not found. This can't happen. + return (std::vector<Seg>::size_type)(-1); + } + long int mid = lo + ((hi - lo) / 2); + uintptr_t mid_lo = mSegs[mid].lo; + uintptr_t mid_hi = mSegs[mid].hi; + if (a < mid_lo) { hi = mid-1; continue; } + if (a > mid_hi) { lo = mid+1; continue; } + return (std::vector<Seg>::size_type)mid; + } + } + + void split_at(uintptr_t a) { + std::vector<Seg>::size_type i = find(a); + if (mSegs[i].lo == a) { + return; + } + mSegs.insert( mSegs.begin()+i+1, mSegs[i] ); + mSegs[i].hi = a-1; + mSegs[i+1].lo = a; + } + + void show() { + printf("<< %d entries:\n", (int)mSegs.size()); + for (std::vector<Seg>::iterator iter = mSegs.begin(); + iter < mSegs.end(); + ++iter) { + printf(" %016llx %016llx %s\n", + (unsigned long long int)(*iter).lo, + (unsigned long long int)(*iter).hi, + (*iter).val ? "true" : "false"); + } + printf(">>\n"); + } + + std::vector<Seg> mSegs; +}; + + +//////////////////////////////////////////////////////////////// +// PriMap // +//////////////////////////////////////////////////////////////// + +class PriMap { + public: + explicit PriMap(void (*aLog)(const char*)) + : mLog(aLog) + {} + + ~PriMap() { + for (std::vector<SecMap*>::iterator iter = mSecMaps.begin(); + iter != mSecMaps.end(); + ++iter) { + delete *iter; + } + mSecMaps.clear(); + } + + // RUNS IN NO-MALLOC CONTEXT + pair<const RuleSet*, const vector<PfxInstr>*> + Lookup(uintptr_t ia) + { + SecMap* sm = FindSecMap(ia); + return pair<const RuleSet*, const vector<PfxInstr>*> + (sm ? sm->FindRuleSet(ia) : nullptr, + sm ? sm->GetPfxInstrs() : nullptr); + } + + // Add a secondary map. No overlaps allowed w.r.t. existing + // secondary maps. + void AddSecMap(SecMap* aSecMap) { + // We can't add an empty SecMap to the PriMap. But that's OK + // since we'd never be able to find anything in it anyway. + if (aSecMap->IsEmpty()) { + return; + } + + // Iterate through the SecMaps and find the right place for this + // one. At the same time, ensure that the in-order + // non-overlapping invariant is preserved (and, generally, holds). + // FIXME: this gives a cost that is O(N^2) in the total number of + // shared objects in the system. ToDo: better. + MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr); + + size_t num_secMaps = mSecMaps.size(); + uintptr_t i; + for (i = 0; i < num_secMaps; ++i) { + SecMap* sm_i = mSecMaps[i]; + MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr); + if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) { + // |aSecMap| needs to be inserted immediately before mSecMaps[i]. + break; + } + } + MOZ_ASSERT(i <= num_secMaps); + if (i == num_secMaps) { + // It goes at the end. + mSecMaps.push_back(aSecMap); + } else { + std::vector<SecMap*>::iterator iter = mSecMaps.begin() + i; + mSecMaps.insert(iter, aSecMap); + } + char buf[100]; + SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n", + (int)mSecMaps.size()); + buf[sizeof(buf)-1] = 0; + mLog(buf); + } + + // Remove and delete any SecMaps in the mapping, that intersect + // with the specified address range. + void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) { + MOZ_ASSERT(avma_min <= avma_max); + size_t num_secMaps = mSecMaps.size(); + if (num_secMaps > 0) { + intptr_t i; + // Iterate from end to start over the vector, so as to ensure + // that the special case where |avma_min| and |avma_max| denote + // the entire address space, can be completed in time proportional + // to the number of elements in the map. + for (i = (intptr_t)num_secMaps-1; i >= 0; i--) { + SecMap* sm_i = mSecMaps[i]; + if (sm_i->mSummaryMaxAddr < avma_min || + avma_max < sm_i->mSummaryMinAddr) { + // There's no overlap. Move on. + continue; + } + // We need to remove mSecMaps[i] and slide all those above it + // downwards to cover the hole. + mSecMaps.erase(mSecMaps.begin() + i); + delete sm_i; + } + } + } + + // Return the number of currently contained SecMaps. + size_t CountSecMaps() { + return mSecMaps.size(); + } + + // Assess heuristically whether the given address is an instruction + // immediately following a call instruction. + // RUNS IN NO-MALLOC CONTEXT + bool MaybeIsReturnPoint(TaggedUWord aInstrAddr, SegArray* aSegArray) { + if (!aInstrAddr.Valid()) { + return false; + } + + uintptr_t ia = aInstrAddr.Value(); + + // Assume that nobody would be crazy enough to put code in the + // first or last page. + if (ia < 4096 || ((uintptr_t)(-ia)) < 4096) { + return false; + } + + // See if it falls inside a known r-x mapped area. Poking around + // outside such places risks segfaulting. + uintptr_t insns_min, insns_max; + bool b = aSegArray->getBoundingCodeSegment(&insns_min, &insns_max, ia); + if (!b) { + // no code (that we know about) at this address + return false; + } + + // |ia| falls within an r-x range. So we can + // safely poke around in [insns_min, insns_max]. + +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + // Is the previous instruction recognisably a CALL? This is + // common for the 32- and 64-bit versions, except for the + // simm32(%rip) case, which is 64-bit only. + // + // For all other cases, the 64 bit versions are either identical + // to the 32 bit versions, or have an optional extra leading REX.W + // byte (0x41). Since the extra 0x41 is optional we have to + // ignore it, with the convenient result that the same matching + // logic works for both 32- and 64-bit cases. + + uint8_t* p = (uint8_t*)ia; +# if defined(LUL_ARCH_x64) + // CALL simm32(%rip) == FF15 simm32 + if (ia - 6 >= insns_min && p[-6] == 0xFF && p[-5] == 0x15) { + return true; + } +# endif + // CALL rel32 == E8 rel32 (both 32- and 64-bit) + if (ia - 5 >= insns_min && p[-5] == 0xE8) { + return true; + } + // CALL *%eax .. CALL *%edi == FFD0 .. FFD7 (32-bit) + // CALL *%rax .. CALL *%rdi == FFD0 .. FFD7 (64-bit) + // CALL *%r8 .. CALL *%r15 == 41FFD0 .. 41FFD7 (64-bit) + if (ia - 2 >= insns_min && + p[-2] == 0xFF && p[-1] >= 0xD0 && p[-1] <= 0xD7) { + return true; + } + // Almost all of the remaining cases that occur in practice are + // of the form CALL *simm8(reg) or CALL *simm32(reg). + // + // 64 bit cases: + // + // call *simm8(%rax) FF50 simm8 + // call *simm8(%rcx) FF51 simm8 + // call *simm8(%rdx) FF52 simm8 + // call *simm8(%rbx) FF53 simm8 + // call *simm8(%rsp) FF5424 simm8 + // call *simm8(%rbp) FF55 simm8 + // call *simm8(%rsi) FF56 simm8 + // call *simm8(%rdi) FF57 simm8 + // + // call *simm8(%r8) 41FF50 simm8 + // call *simm8(%r9) 41FF51 simm8 + // call *simm8(%r10) 41FF52 simm8 + // call *simm8(%r11) 41FF53 simm8 + // call *simm8(%r12) 41FF5424 simm8 + // call *simm8(%r13) 41FF55 simm8 + // call *simm8(%r14) 41FF56 simm8 + // call *simm8(%r15) 41FF57 simm8 + // + // call *simm32(%rax) FF90 simm32 + // call *simm32(%rcx) FF91 simm32 + // call *simm32(%rdx) FF92 simm32 + // call *simm32(%rbx) FF93 simm32 + // call *simm32(%rsp) FF9424 simm32 + // call *simm32(%rbp) FF95 simm32 + // call *simm32(%rsi) FF96 simm32 + // call *simm32(%rdi) FF97 simm32 + // + // call *simm32(%r8) 41FF90 simm32 + // call *simm32(%r9) 41FF91 simm32 + // call *simm32(%r10) 41FF92 simm32 + // call *simm32(%r11) 41FF93 simm32 + // call *simm32(%r12) 41FF9424 simm32 + // call *simm32(%r13) 41FF95 simm32 + // call *simm32(%r14) 41FF96 simm32 + // call *simm32(%r15) 41FF97 simm32 + // + // 32 bit cases: + // + // call *simm8(%eax) FF50 simm8 + // call *simm8(%ecx) FF51 simm8 + // call *simm8(%edx) FF52 simm8 + // call *simm8(%ebx) FF53 simm8 + // call *simm8(%esp) FF5424 simm8 + // call *simm8(%ebp) FF55 simm8 + // call *simm8(%esi) FF56 simm8 + // call *simm8(%edi) FF57 simm8 + // + // call *simm32(%eax) FF90 simm32 + // call *simm32(%ecx) FF91 simm32 + // call *simm32(%edx) FF92 simm32 + // call *simm32(%ebx) FF93 simm32 + // call *simm32(%esp) FF9424 simm32 + // call *simm32(%ebp) FF95 simm32 + // call *simm32(%esi) FF96 simm32 + // call *simm32(%edi) FF97 simm32 + if (ia - 3 >= insns_min && + p[-3] == 0xFF && + (p[-2] >= 0x50 && p[-2] <= 0x57 && p[-2] != 0x54)) { + // imm8 case, not including %esp/%rsp + return true; + } + if (ia - 4 >= insns_min && + p[-4] == 0xFF && p[-3] == 0x54 && p[-2] == 0x24) { + // imm8 case for %esp/%rsp + return true; + } + if (ia - 6 >= insns_min && + p[-6] == 0xFF && + (p[-5] >= 0x90 && p[-5] <= 0x97 && p[-5] != 0x94)) { + // imm32 case, not including %esp/%rsp + return true; + } + if (ia - 7 >= insns_min && + p[-7] == 0xFF && p[-6] == 0x94 && p[-5] == 0x24) { + // imm32 case for %esp/%rsp + return true; + } + +#elif defined(LUL_ARCH_arm) + if (ia & 1) { + uint16_t w0 = 0, w1 = 0; + // The return address has its lowest bit set, indicating a return + // to Thumb code. + ia &= ~(uintptr_t)1; + if (ia - 2 >= insns_min && ia - 1 <= insns_max) { + w1 = *(uint16_t*)(ia - 2); + } + if (ia - 4 >= insns_min && ia - 1 <= insns_max) { + w0 = *(uint16_t*)(ia - 4); + } + // Is it a 32-bit Thumb call insn? + // BL simm26 (Encoding T1) + if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) { + return true; + } + // BLX simm26 (Encoding T2) + if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) { + return true; + } + // Other possible cases: + // (BLX Rm, Encoding T1). + // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.) + // 0100 0111 1 Rm 000 + } else { + // Returning to ARM code. + uint32_t a0 = 0; + if ((ia & 3) == 0 && ia - 4 >= insns_min && ia - 1 <= insns_max) { + a0 = *(uint32_t*)(ia - 4); + } + // Leading E forces unconditional only -- fix. It could be + // anything except F, which is the deprecated NV code. + // BL simm26 (Encoding A1) + if ((a0 & 0xFF000000) == 0xEB000000) { + return true; + } + // Other possible cases: + // BLX simm26 (Encoding A2) + //if ((a0 & 0xFE000000) == 0xFA000000) + // return true; + // BLX (register) (A1): BLX <c> <Rm> + // cond 0001 0010 1111 1111 1111 0011 Rm + // again, cond can be anything except NV (0xF) + } + +#else +# error "Unsupported arch" +#endif + + // Not an insn we recognise. + return false; + } + + private: + // RUNS IN NO-MALLOC CONTEXT + SecMap* FindSecMap(uintptr_t ia) { + // Binary search mSecMaps to find one that brackets |ia|. + // lo and hi need to be signed, else the loop termination tests + // don't work properly. + long int lo = 0; + long int hi = (long int)mSecMaps.size() - 1; + while (true) { + // current unsearched space is from lo to hi, inclusive. + if (lo > hi) { + // not found + return nullptr; + } + long int mid = lo + ((hi - lo) / 2); + SecMap* mid_secMap = mSecMaps[mid]; + uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr; + uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr; + if (ia < mid_minAddr) { hi = mid-1; continue; } + if (ia > mid_maxAddr) { lo = mid+1; continue; } + MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); + return mid_secMap; + } + // NOTREACHED + } + + private: + // sorted array of per-object ranges, non overlapping, non empty + std::vector<SecMap*> mSecMaps; + + // a logging sink, for debugging. + void (*mLog)(const char*); +}; + + +//////////////////////////////////////////////////////////////// +// LUL // +//////////////////////////////////////////////////////////////// + +#define LUL_LOG(_str) \ + do { \ + char buf[200]; \ + SprintfLiteral(buf, \ + "LUL: pid %d tid %d lul-obj %p: %s", \ + getpid(), gettid(), this, (_str)); \ + buf[sizeof(buf)-1] = 0; \ + mLog(buf); \ + } while (0) + +LUL::LUL(void (*aLog)(const char*)) + : mLog(aLog) + , mAdminMode(true) + , mAdminThreadId(gettid()) + , mPriMap(new PriMap(aLog)) + , mSegArray(new SegArray()) + , mUSU(new UniqueStringUniverse()) +{ + LUL_LOG("LUL::LUL: Created object"); +} + + +LUL::~LUL() +{ + LUL_LOG("LUL::~LUL: Destroyed object"); + delete mPriMap; + delete mSegArray; + mLog = nullptr; + delete mUSU; +} + + +void +LUL::MaybeShowStats() +{ + // This is racey in the sense that it can't guarantee that + // n_new == n_new_Context + n_new_CFI + n_new_Scanned + // if it should happen that mStats is updated by some other thread + // in between computation of n_new and n_new_{Context,CFI,Scanned}. + // But it's just stats printing, so we don't really care. + uint32_t n_new = mStats - mStatsPrevious; + if (n_new >= 5000) { + uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext; + uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI; + uint32_t n_new_Scanned = mStats.mScanned - mStatsPrevious.mScanned; + mStatsPrevious = mStats; + char buf[200]; + SprintfLiteral(buf, + "LUL frame stats: TOTAL %5u" + " CTX %4u CFI %4u SCAN %4u", + n_new, n_new_Context, n_new_CFI, n_new_Scanned); + buf[sizeof(buf)-1] = 0; + mLog(buf); + } +} + + +void +LUL::EnableUnwinding() +{ + LUL_LOG("LUL::EnableUnwinding"); + // Don't assert for Admin mode here. That is, tolerate a call here + // if we are already in Unwinding mode. + MOZ_ASSERT(gettid() == mAdminThreadId); + + mAdminMode = false; +} + + +void +LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, + const char* aFileName, const void* aMappedImage) +{ + MOZ_ASSERT(mAdminMode); + MOZ_ASSERT(gettid() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyMap %llx %llu %s\n", + (unsigned long long int)aRXavma, (unsigned long long int)aSize, + aFileName); + buf[sizeof(buf)-1] = 0; + mLog(buf); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + + // Here's a new mapping, for this object. + SecMap* smap = new SecMap(mLog); + + // Read CFI or EXIDX unwind data into |smap|. + if (!aMappedImage) { + (void)lul::ReadSymbolData( + string(aFileName), std::vector<string>(), smap, + (void*)aRXavma, aSize, mUSU, mLog); + } else { + (void)lul::ReadSymbolDataInternal( + (const uint8_t*)aMappedImage, + string(aFileName), std::vector<string>(), smap, + (void*)aRXavma, aSize, mUSU, mLog); + } + + mLog("NotifyMap .. preparing entries\n"); + + smap->PrepareRuleSets(aRXavma, aSize); + + SprintfLiteral(buf, + "NotifyMap got %lld entries\n", (long long int)smap->Size()); + buf[sizeof(buf)-1] = 0; + mLog(buf); + + // Add it to the primary map (the top level set of mapped objects). + mPriMap->AddSecMap(smap); + + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + + +void +LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) +{ + MOZ_ASSERT(mAdminMode); + MOZ_ASSERT(gettid() == mAdminThreadId); + + mLog(":\n"); + char buf[200]; + SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n", + (unsigned long long int)aRXavma, (unsigned long long int)aSize); + buf[sizeof(buf)-1] = 0; + mLog(buf); + + // Ignore obviously-stupid notifications. + if (aSize > 0) { + // Tell the segment array about the mapping, so that the stack + // scan and __kernel_syscall mechanisms know where valid code is. + mSegArray->add(aRXavma, aRXavma + aSize - 1, true); + } +} + + +void +LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) +{ + MOZ_ASSERT(mAdminMode); + MOZ_ASSERT(gettid() == mAdminThreadId); + + mLog(":\n"); + char buf[100]; + SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n", + (unsigned long long int)aRXavmaMin, + (unsigned long long int)aRXavmaMax); + buf[sizeof(buf)-1] = 0; + mLog(buf); + + MOZ_ASSERT(aRXavmaMin <= aRXavmaMax); + + // Remove from the primary map, any secondary maps that intersect + // with the address range. Also delete the secondary maps. + mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax); + + // Tell the segment array that the address range no longer + // contains valid code. + mSegArray->add(aRXavmaMin, aRXavmaMax, false); + + SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n", + (int)mPriMap->CountSecMaps()); + buf[sizeof(buf)-1] = 0; + mLog(buf); +} + + +size_t +LUL::CountMappings() +{ + MOZ_ASSERT(mAdminMode); + MOZ_ASSERT(gettid() == mAdminThreadId); + + return mPriMap->CountSecMaps(); +} + + +// RUNS IN NO-MALLOC CONTEXT +static +TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) +{ + if (!aAddr.Valid()) { + return TaggedUWord(); + } + + // Lower limit check. |aAddr.Value()| is the lowest requested address + // and |aStackImg->mStartAvma| is the lowest address we actually have, + // so the comparison is straightforward. + if (aAddr.Value() < aStackImg->mStartAvma) { + return TaggedUWord(); + } + + // Upper limit check. We must compute the highest requested address + // and the highest address we actually have, but being careful to + // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the + // 3/7 values below that, then we will get overflow. See bug #1245477. + typedef CheckedInt<uintptr_t> CheckedUWord; + CheckedUWord highest_requested_plus_one + = CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t)); + CheckedUWord highest_available_plus_one + = CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen); + if (!highest_requested_plus_one.isValid() // overflow? + || !highest_available_plus_one.isValid() // overflow? + || (highest_requested_plus_one.value() + > highest_available_plus_one.value())) { // in range? + return TaggedUWord(); + } + + return TaggedUWord(*(uintptr_t*)(aStackImg->mContents + aAddr.Value() + - aStackImg->mStartAvma)); +} + +// RUNS IN NO-MALLOC CONTEXT +static +TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs, + TaggedUWord aCFA) +{ + switch (aReg) { + case DW_REG_CFA: return aCFA; +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + case DW_REG_INTEL_XBP: return aOldRegs->xbp; + case DW_REG_INTEL_XSP: return aOldRegs->xsp; + case DW_REG_INTEL_XIP: return aOldRegs->xip; +#elif defined(LUL_ARCH_arm) + case DW_REG_ARM_R7: return aOldRegs->r7; + case DW_REG_ARM_R11: return aOldRegs->r11; + case DW_REG_ARM_R12: return aOldRegs->r12; + case DW_REG_ARM_R13: return aOldRegs->r13; + case DW_REG_ARM_R14: return aOldRegs->r14; + case DW_REG_ARM_R15: return aOldRegs->r15; +#else +# error "Unsupported arch" +#endif + default: MOZ_ASSERT(0); return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +// See prototype for comment. +TaggedUWord EvaluatePfxExpr(int32_t start, + const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>& aPfxInstrs) +{ + // A small evaluation stack, and a stack pointer, which points to + // the highest numbered in-use element. + const int N_STACK = 10; + TaggedUWord stack[N_STACK]; + int stackPointer = -1; + for (int i = 0; i < N_STACK; i++) + stack[i] = TaggedUWord(); + +# define PUSH(_tuw) \ + do { \ + if (stackPointer >= N_STACK-1) goto fail; /* overflow */ \ + stack[++stackPointer] = (_tuw); \ + } while (0) + +# define POP(_lval) \ + do { \ + if (stackPointer < 0) goto fail; /* underflow */ \ + _lval = stack[stackPointer--]; \ + } while (0) + + // Cursor in the instruction sequence. + size_t curr = start + 1; + + // Check the start point is sane. + size_t nInstrs = aPfxInstrs.size(); + if (start < 0 || (size_t)start >= nInstrs) + goto fail; + + { + // The instruction sequence must start with PX_Start. If not, + // something is seriously wrong. + PfxInstr first = aPfxInstrs[start]; + if (first.mOpcode != PX_Start) + goto fail; + + // Push the CFA on the stack to start with (or not), as required by + // the original DW_OP_*expression* CFI. + if (first.mOperand != 0) + PUSH(aCFA); + } + + while (true) { + if (curr >= nInstrs) + goto fail; // ran off the end of the sequence + + PfxInstr pfxi = aPfxInstrs[curr++]; + if (pfxi.mOpcode == PX_End) + break; // we're done + + switch (pfxi.mOpcode) { + case PX_Start: + // This should appear only at the start of the sequence. + goto fail; + case PX_End: + // We just took care of that, so we shouldn't see it again. + MOZ_ASSERT(0); + goto fail; + case PX_SImm32: + PUSH(TaggedUWord((intptr_t)pfxi.mOperand)); + break; + case PX_DwReg: { + DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand; + MOZ_ASSERT(reg != DW_REG_CFA); + PUSH(EvaluateReg(reg, aOldRegs, aCFA)); + break; + } + case PX_Deref: { + TaggedUWord addr; + POP(addr); + PUSH(DerefTUW(addr, aStackImg)); + break; + } + case PX_Add: { + TaggedUWord x, y; + POP(x); POP(y); PUSH(y + x); + break; + } + case PX_Sub: { + TaggedUWord x, y; + POP(x); POP(y); PUSH(y - x); + break; + } + case PX_And: { + TaggedUWord x, y; + POP(x); POP(y); PUSH(y & x); + break; + } + case PX_Or: { + TaggedUWord x, y; + POP(x); POP(y); PUSH(y | x); + break; + } + case PX_CmpGES: { + TaggedUWord x, y; + POP(x); POP(y); PUSH(y.CmpGEs(x)); + break; + } + case PX_Shl: { + TaggedUWord x, y; + POP(x); POP(y); PUSH(y << x); + break; + } + default: + MOZ_ASSERT(0); + goto fail; + } + } // while (true) + + // Evaluation finished. The top value on the stack is the result. + if (stackPointer >= 0) { + return stack[stackPointer]; + } + // Else fall through + + fail: + return TaggedUWord(); + +# undef PUSH +# undef POP +} + +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>* aPfxInstrs) const +{ + switch (mHow) { + case UNKNOWN: + return TaggedUWord(); + case NODEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return tuw; + } + case DEREF: { + TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); + tuw = tuw + TaggedUWord((intptr_t)mOffset); + return DerefTUW(tuw, aStackImg); + } + case PFXEXPR: { + MOZ_ASSERT(aPfxInstrs); + if (!aPfxInstrs) { + return TaggedUWord(); + } + return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs); + } + default: + MOZ_ASSERT(0); + return TaggedUWord(); + } +} + +// RUNS IN NO-MALLOC CONTEXT +static +void UseRuleSet(/*MOD*/UnwindRegs* aRegs, + const StackImage* aStackImg, const RuleSet* aRS, + const vector<PfxInstr>* aPfxInstrs) +{ + // Take a copy of regs, since we'll need to refer to the old values + // whilst computing the new ones. + UnwindRegs old_regs = *aRegs; + + // Mark all the current register values as invalid, so that the + // caller can see, on our return, which ones have been computed + // anew. If we don't even manage to compute a new PC value, then + // the caller will have to abandon the unwind. + // FIXME: Create and use instead: aRegs->SetAllInvalid(); +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + aRegs->xbp = TaggedUWord(); + aRegs->xsp = TaggedUWord(); + aRegs->xip = TaggedUWord(); +#elif defined(LUL_ARCH_arm) + aRegs->r7 = TaggedUWord(); + aRegs->r11 = TaggedUWord(); + aRegs->r12 = TaggedUWord(); + aRegs->r13 = TaggedUWord(); + aRegs->r14 = TaggedUWord(); + aRegs->r15 = TaggedUWord(); +#else +# error "Unsupported arch" +#endif + + // This is generally useful. + const TaggedUWord inval = TaggedUWord(); + + // First, compute the CFA. + TaggedUWord cfa + = aRS->mCfaExpr.EvaluateExpr(&old_regs, + inval/*old cfa*/, aStackImg, aPfxInstrs); + + // If we didn't manage to compute the CFA, well .. that's ungood, + // but keep going anyway. It'll be OK provided none of the register + // value rules mention the CFA. In any case, compute the new values + // for each register that we're tracking. + +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + aRegs->xbp + = aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xsp + = aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->xip + = aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#elif defined(LUL_ARCH_arm) + aRegs->r7 + = aRS->mR7expr .EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r11 + = aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r12 + = aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r13 + = aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r14 + = aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); + aRegs->r15 + = aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); +#else +# error "Unsupported arch" +#endif + + // We're done. Any regs for which we didn't manage to compute a + // new value will now be marked as invalid. +} + +// RUNS IN NO-MALLOC CONTEXT +void +LUL::Unwind(/*OUT*/uintptr_t* aFramePCs, + /*OUT*/uintptr_t* aFrameSPs, + /*OUT*/size_t* aFramesUsed, + /*OUT*/size_t* aScannedFramesAcquired, + size_t aFramesAvail, + size_t aScannedFramesAllowed, + UnwindRegs* aStartRegs, StackImage* aStackImg) +{ + MOZ_ASSERT(!mAdminMode); + + ///////////////////////////////////////////////////////// + // BEGIN UNWIND + + *aFramesUsed = 0; + + UnwindRegs regs = *aStartRegs; + TaggedUWord last_valid_sp = TaggedUWord(); + + // Stack-scan control + unsigned int n_scanned_frames = 0; // # s-s frames recovered so far + static const int NUM_SCANNED_WORDS = 50; // max allowed scan length + + while (true) { + + if (DEBUG_MAIN) { + char buf[300]; + mLog("\n"); +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + SprintfLiteral(buf, + "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n", + (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(), + (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(), + (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value()); + buf[sizeof(buf)-1] = 0; + mLog(buf); +#elif defined(LUL_ARCH_arm) + SprintfLiteral(buf, + "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx" + " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n", + (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(), + (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(), + (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(), + (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(), + (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(), + (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value()); + buf[sizeof(buf)-1] = 0; + mLog(buf); +#else +# error "Unsupported arch" +#endif + } + +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + TaggedUWord ia = regs.xip; + TaggedUWord sp = regs.xsp; +#elif defined(LUL_ARCH_arm) + TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14); + TaggedUWord sp = regs.r13; +#else +# error "Unsupported arch" +#endif + + if (*aFramesUsed >= aFramesAvail) { + break; + } + + // If we don't have a valid value for the PC, give up. + if (!ia.Valid()) { + break; + } + + // If this is the innermost frame, record the SP value, which + // presumably is valid. If this isn't the innermost frame, and we + // have a valid SP value, check that its SP value isn't less that + // the one we've seen so far, so as to catch potential SP value + // cycles. + if (*aFramesUsed == 0) { + last_valid_sp = sp; + } else { + MOZ_ASSERT(last_valid_sp.Valid()); + if (sp.Valid()) { + if (sp.Value() < last_valid_sp.Value()) { + // Hmm, SP going in the wrong direction. Let's stop. + break; + } + // Remember where we got to. + last_valid_sp = sp; + } + } + + // For the innermost frame, the IA value is what we need. For all + // other frames, it's actually the return address, so back up one + // byte so as to get it into the calling instruction. + aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1); + aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0; + (*aFramesUsed)++; + + // Find the RuleSet for the current IA, if any. This will also + // query the backing (secondary) maps if it isn't found in the + // thread-local cache. + + // If this isn't the innermost frame, back up into the calling insn. + if (*aFramesUsed > 1) { + ia = ia + TaggedUWord((uintptr_t)(-1)); + } + + pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs + = mPriMap->Lookup(ia.Value()); + const RuleSet* ruleset = ruleset_and_pfxinstrs.first; + const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second; + + if (DEBUG_MAIN) { + char buf[100]; + SprintfLiteral(buf, "ruleset for 0x%llx = %p\n", + (unsigned long long int)ia.Value(), ruleset); + buf[sizeof(buf)-1] = 0; + mLog(buf); + } + + ///////////////////////////////////////////// + //// + // On 32 bit x86-linux, syscalls are often done via the VDSO + // function __kernel_vsyscall, which doesn't have a corresponding + // object that we can read debuginfo from. That effectively kills + // off all stack traces for threads blocked in syscalls. Hence + // special-case by looking at the code surrounding the program + // counter. + // + // 0xf7757420 <__kernel_vsyscall+0>: push %ecx + // 0xf7757421 <__kernel_vsyscall+1>: push %edx + // 0xf7757422 <__kernel_vsyscall+2>: push %ebp + // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp + // 0xf7757425 <__kernel_vsyscall+5>: sysenter + // 0xf7757427 <__kernel_vsyscall+7>: nop + // 0xf7757428 <__kernel_vsyscall+8>: nop + // 0xf7757429 <__kernel_vsyscall+9>: nop + // 0xf775742a <__kernel_vsyscall+10>: nop + // 0xf775742b <__kernel_vsyscall+11>: nop + // 0xf775742c <__kernel_vsyscall+12>: nop + // 0xf775742d <__kernel_vsyscall+13>: nop + // 0xf775742e <__kernel_vsyscall+14>: int $0x80 + // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp + // 0xf7757431 <__kernel_vsyscall+17>: pop %edx + // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx + // 0xf7757433 <__kernel_vsyscall+19>: ret + // + // In cases where the sampled thread is blocked in a syscall, its + // program counter will point at "pop %ebp". Hence we look for + // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and + // the corresponding register-recovery actions are: + // new_ebp = *(old_esp + 0) + // new eip = *(old_esp + 12) + // new_esp = old_esp + 16 + // + // It may also be the case that the program counter points two + // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in + // the case where the syscall has been restarted but the thread + // hasn't been rescheduled. The code below doesn't handle that; + // it could easily be made to. + // +#if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux) + if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) { + uintptr_t insns_min, insns_max; + uintptr_t eip = ia.Value(); + bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip); + if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) { + uint8_t* eipC = (uint8_t*)eip; + if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D && + eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) { + TaggedUWord sp_plus_0 = sp; + TaggedUWord sp_plus_12 = sp; + TaggedUWord sp_plus_16 = sp; + sp_plus_12 = sp_plus_12 + TaggedUWord(12); + sp_plus_16 = sp_plus_16 + TaggedUWord(16); + TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg); + TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg); + TaggedUWord new_esp = sp_plus_16; + if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) { + regs.xbp = new_ebp; + regs.xip = new_eip; + regs.xsp = new_esp; + continue; + } + } + } + } +#endif + //// + ///////////////////////////////////////////// + + // So, do we have a ruleset for this address? If so, use it now. + if (ruleset) { + + if (DEBUG_MAIN) { + ruleset->Print(mLog); mLog("\n"); + } + // Use the RuleSet to compute the registers for the previous + // frame. |regs| is modified in-place. + UseRuleSet(®s, aStackImg, ruleset, pfxinstrs); + + } else { + + // There's no RuleSet for the specified address, so see if + // it's possible to get anywhere by stack-scanning. + + // Use stack scanning frugally. + if (n_scanned_frames++ >= aScannedFramesAllowed) { + break; + } + + // We can't scan the stack without a valid, aligned stack pointer. + if (!sp.IsAligned()) { + break; + } + + bool scan_succeeded = false; + for (int i = 0; i < NUM_SCANNED_WORDS; ++i) { + TaggedUWord aWord = DerefTUW(sp, aStackImg); + // aWord is something we fished off the stack. It should be + // valid, unless we overran the stack bounds. + if (!aWord.Valid()) { + break; + } + + // Now, does aWord point inside a text section and immediately + // after something that looks like a call instruction? + if (mPriMap->MaybeIsReturnPoint(aWord, mSegArray)) { + // Yes it does. Update the unwound registers heuristically, + // using the same schemes as Breakpad does. + scan_succeeded = true; + (*aScannedFramesAcquired)++; + +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + // The same logic applies for the 32- and 64-bit cases. + // Register names of the form xsp etc refer to (eg) esp in + // the 32-bit case and rsp in the 64-bit case. +# if defined(LUL_ARCH_x64) + const int wordSize = 8; +# else + const int wordSize = 4; +# endif + // The return address -- at XSP -- will have been pushed by + // the CALL instruction. So the caller's XSP value + // immediately before and after that CALL instruction is the + // word above XSP. + regs.xsp = sp + TaggedUWord(wordSize); + + // aWord points at the return point, so back up one byte + // to put it in the calling instruction. + regs.xip = aWord + TaggedUWord((uintptr_t)(-1)); + + // Computing a new value from the frame pointer is more tricky. + if (regs.xbp.Valid() && + sp.Valid() && regs.xbp.Value() == sp.Value() - wordSize) { + // One possibility is that the callee begins with the standard + // preamble "push %xbp; mov %xsp, %xbp". In which case, the + // (1) caller's XBP value will be at the word below XSP, and + // (2) the current (callee's) XBP will point at that word: + regs.xbp = DerefTUW(regs.xbp, aStackImg); + } else if (regs.xbp.Valid() && + sp.Valid() && regs.xbp.Value() >= sp.Value() + wordSize) { + // If that didn't work out, maybe the callee didn't change + // XBP, so it still holds the caller's value. For that to + // be plausible, XBP will need to have a value at least + // higher than XSP since that holds the purported return + // address. In which case do nothing, since XBP already + // holds the "right" value. + } else { + // Mark XBP as invalid, so that subsequent unwind iterations + // don't assume it holds valid data. + regs.xbp = TaggedUWord(); + } + + // Move on to the next word up the stack + sp = sp + TaggedUWord(wordSize); + +#elif defined(LUL_ARCH_arm) + // Set all registers to be undefined, except for SP(R13) and + // PC(R15). + + // aWord points either at the return point, if returning to + // ARM code, or one insn past the return point if returning + // to Thumb code. In both cases, aWord-2 is guaranteed to + // fall within the calling instruction. + regs.r15 = aWord + TaggedUWord((uintptr_t)(-2)); + + // Make SP be the word above the location where the return + // address was found. + regs.r13 = sp + TaggedUWord(4); + + // All other regs are undefined. + regs.r7 = regs.r11 = regs.r12 = regs.r14 = TaggedUWord(); + + // Move on to the next word up the stack + sp = sp + TaggedUWord(4); + +#else +# error "Unknown plat" +#endif + + break; + } + + } // for (int i = 0; i < NUM_SCANNED_WORDS; i++) + + // We tried to make progress by scanning the stack, but failed. + // So give up -- fall out of the top level unwind loop. + if (!scan_succeeded) { + break; + } + } + + } // top level unwind loop + + // END UNWIND + ///////////////////////////////////////////////////////// +} + + +//////////////////////////////////////////////////////////////// +// LUL Unit Testing // +//////////////////////////////////////////////////////////////// + +static const int LUL_UNIT_TEST_STACK_SIZE = 16384; + +// This function is innermost in the test call sequence. It uses LUL +// to unwind, and compares the result with the sequence specified in +// the director string. These need to agree in order for the test to +// pass. In order not to screw up the results, this function needs +// to have a not-very big stack frame, since we're only presenting +// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and +// that chunk unavoidably includes the frame for this function. +// +// This function must not be inlined into its callers. Doing so will +// cause the expected-vs-actual backtrace consistency checking to +// fail. Prints summary results to |aLUL|'s logging sink and also +// returns a boolean indicating whether or not the test passed. +static __attribute__((noinline)) +bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring) +{ + // Get hold of the current unwind-start registers. + UnwindRegs startRegs; + memset(&startRegs, 0, sizeof(startRegs)); +#if defined(LUL_PLAT_x64_linux) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "leaq 0(%%rip), %%r15" "\n\t" + "movq %%r15, 0(%0)" "\n\t" + "movq %%rsp, 8(%0)" "\n\t" + "movq %%rbp, 16(%0)" "\n" + : : "r"(&block[0]) : "memory", "r15" + ); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 128; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android) + volatile uintptr_t block[3]; + MOZ_ASSERT(sizeof(block) == 12); + __asm__ __volatile__( + ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/ "\n\t" + "popl %%edi" "\n\t" + "movl %%edi, 0(%0)" "\n\t" + "movl %%esp, 4(%0)" "\n\t" + "movl %%ebp, 8(%0)" "\n" + : : "r"(&block[0]) : "memory", "edi" + ); + startRegs.xip = TaggedUWord(block[0]); + startRegs.xsp = TaggedUWord(block[1]); + startRegs.xbp = TaggedUWord(block[2]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#elif defined(LUL_PLAT_arm_android) + volatile uintptr_t block[6]; + MOZ_ASSERT(sizeof(block) == 24); + __asm__ __volatile__( + "mov r0, r15" "\n\t" + "str r0, [%0, #0]" "\n\t" + "str r14, [%0, #4]" "\n\t" + "str r13, [%0, #8]" "\n\t" + "str r12, [%0, #12]" "\n\t" + "str r11, [%0, #16]" "\n\t" + "str r7, [%0, #20]" "\n" + : : "r"(&block[0]) : "memory", "r0" + ); + startRegs.r15 = TaggedUWord(block[0]); + startRegs.r14 = TaggedUWord(block[1]); + startRegs.r13 = TaggedUWord(block[2]); + startRegs.r12 = TaggedUWord(block[3]); + startRegs.r11 = TaggedUWord(block[4]); + startRegs.r7 = TaggedUWord(block[5]); + const uintptr_t REDZONE_SIZE = 0; + uintptr_t start = block[1] - REDZONE_SIZE; +#else +# error "Unsupported platform" +#endif + + // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the + // stack. + uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE; + uintptr_t ws = sizeof(void*); + start &= ~(ws-1); + end &= ~(ws-1); + uintptr_t nToCopy = end - start; + if (nToCopy > lul::N_STACK_BYTES) { + nToCopy = lul::N_STACK_BYTES; + } + MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); + StackImage* stackImg = new StackImage(); + stackImg->mLen = nToCopy; + stackImg->mStartAvma = start; + if (nToCopy > 0) { + MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy); + memcpy(&stackImg->mContents[0], (void*)start, nToCopy); + } + + // Unwind it. + const int MAX_TEST_FRAMES = 64; + uintptr_t framePCs[MAX_TEST_FRAMES]; + uintptr_t frameSPs[MAX_TEST_FRAMES]; + size_t framesAvail = mozilla::ArrayLength(framePCs); + size_t framesUsed = 0; + size_t scannedFramesAllowed = 0; + size_t scannedFramesAcquired = 0; + aLUL->Unwind( &framePCs[0], &frameSPs[0], + &framesUsed, &scannedFramesAcquired, + framesAvail, scannedFramesAllowed, + &startRegs, stackImg ); + + delete stackImg; + + //if (0) { + // // Show what we have. + // fprintf(stderr, "Got %d frames:\n", (int)framesUsed); + // for (size_t i = 0; i < framesUsed; i++) { + // fprintf(stderr, " [%2d] SP %p PC %p\n", + // (int)i, (void*)frameSPs[i], (void*)framePCs[i]); + // } + // fprintf(stderr, "\n"); + //} + + // Check to see if there's a consistent binding between digits in + // the director string ('1' .. '8') and the PC values acquired by + // the unwind. If there isn't, the unwinding has failed somehow. + uintptr_t binding[8]; // binding for '1' .. binding for '8' + memset((void*)binding, 0, sizeof(binding)); + + // The general plan is to work backwards along the director string + // and forwards along the framePCs array. Doing so corresponds to + // working outwards from the innermost frame of the recursive test set. + const char* cursor = dstring; + + // Find the end. This leaves |cursor| two bytes past the first + // character we want to look at -- see comment below. + while (*cursor) cursor++; + + // Counts the number of consistent frames. + size_t nConsistent = 0; + + // Iterate back to the start of the director string. The starting + // points are a bit complex. We can't use framePCs[0] because that + // contains the PC in this frame (above). We can't use framePCs[1] + // because that will contain the PC at return point in the recursive + // test group (TestFn[1-8]) for their call "out" to this function, + // GetAndCheckStackTrace. Although LUL will compute a correct + // return address, that will not be the same return address as for a + // recursive call out of the the function to another function in the + // group. Hence we can only start consistency checking at + // framePCs[2]. + // + // To be consistent, then, we must ignore the last element in the + // director string as that corresponds to framePCs[1]. Hence the + // start points are: framePCs[2] and the director string 2 bytes + // before the terminating zero. + // + // Also as a result of this, the number of consistent frames counted + // will always be one less than the length of the director string + // (not including its terminating zero). + size_t frameIx; + for (cursor = cursor-2, frameIx = 2; + cursor >= dstring && frameIx < framesUsed; + cursor--, frameIx++) { + char c = *cursor; + uintptr_t pc = framePCs[frameIx]; + // If this doesn't hold, the director string is ill-formed. + MOZ_ASSERT(c >= '1' && c <= '8'); + int n = ((int)c) - ((int)'1'); + if (binding[n] == 0) { + // There's no binding for |c| yet, so install |pc| and carry on. + binding[n] = pc; + nConsistent++; + continue; + } + // There's a pre-existing binding for |c|. Check it's consistent. + if (binding[n] != pc) { + // Not consistent. Give up now. + break; + } + // Consistent. Keep going. + nConsistent++; + } + + // So, did we succeed? + bool passed = nConsistent+1 == strlen(dstring); + + // Show the results. + char buf[200]; + SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring); + buf[sizeof(buf)-1] = 0; + aLUL->mLog(buf); + SprintfLiteral(buf, + "LULUnitTest: %d consistent, %d in dstring: %s\n", + (int)nConsistent, (int)strlen(dstring), + passed ? "PASS" : "FAIL"); + buf[sizeof(buf)-1] = 0; + aLUL->mLog(buf); + + return passed; +} + + +// Macro magic to create a set of 8 mutually recursive functions with +// varying frame sizes. These will recurse amongst themselves as +// specified by |strP|, the directory string, and call +// GetAndCheckStackTrace when the string becomes empty, passing it the +// original value of the string. This checks the result, printing +// results on |aLUL|'s logging sink, and also returns a boolean +// indicating whether or not the results are acceptable (correct). + +#define DECL_TEST_FN(NAME) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP); + +#define GEN_TEST_FN(NAME, FRAMESIZE) \ + bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \ + volatile char space[FRAMESIZE]; \ + memset((char*)&space[0], 0, sizeof(space)); \ + if (*strP == '\0') { \ + /* We've come to the end of the director string. */ \ + /* Take a stack snapshot. */ \ + return GetAndCheckStackTrace(aLUL, strPorig); \ + } else { \ + /* Recurse onwards. This is a bit subtle. The obvious */ \ + /* thing to do here is call onwards directly, from within the */ \ + /* arms of the case statement. That gives a problem in that */ \ + /* there will be multiple return points inside each function when */ \ + /* unwinding, so it will be difficult to check for consistency */ \ + /* against the director string. Instead, we make an indirect */ \ + /* call, so as to guarantee that there is only one call site */ \ + /* within each function. This does assume that the compiler */ \ + /* won't transform it back to the simple direct-call form. */ \ + /* To discourage it from doing so, the call is bracketed with */ \ + /* __asm__ __volatile__ sections so as to make it not-movable. */ \ + bool (*nextFn)(LUL*, const char*, const char*) = NULL; \ + switch (*strP) { \ + case '1': nextFn = TestFn1; break; \ + case '2': nextFn = TestFn2; break; \ + case '3': nextFn = TestFn3; break; \ + case '4': nextFn = TestFn4; break; \ + case '5': nextFn = TestFn5; break; \ + case '6': nextFn = TestFn6; break; \ + case '7': nextFn = TestFn7; break; \ + case '8': nextFn = TestFn8; break; \ + default: nextFn = TestFn8; break; \ + } \ + __asm__ __volatile__("":::"cc","memory"); \ + bool passed = nextFn(aLUL, strPorig, strP+1); \ + __asm__ __volatile__("":::"cc","memory"); \ + return passed; \ + } \ + } + +// The test functions are mutually recursive, so it is necessary to +// declare them before defining them. +DECL_TEST_FN(TestFn1) +DECL_TEST_FN(TestFn2) +DECL_TEST_FN(TestFn3) +DECL_TEST_FN(TestFn4) +DECL_TEST_FN(TestFn5) +DECL_TEST_FN(TestFn6) +DECL_TEST_FN(TestFn7) +DECL_TEST_FN(TestFn8) + +GEN_TEST_FN(TestFn1, 123) +GEN_TEST_FN(TestFn2, 456) +GEN_TEST_FN(TestFn3, 789) +GEN_TEST_FN(TestFn4, 23) +GEN_TEST_FN(TestFn5, 47) +GEN_TEST_FN(TestFn6, 117) +GEN_TEST_FN(TestFn7, 1) +GEN_TEST_FN(TestFn8, 99) + + +// This starts the test sequence going. Call here to generate a +// sequence of calls as directed by the string |dstring|. The call +// sequence will, from its innermost frame, finish by calling +// GetAndCheckStackTrace() and passing it |dstring|. +// GetAndCheckStackTrace() will unwind the stack, check consistency +// of those results against |dstring|, and print a pass/fail message +// to aLUL's logging sink. It also updates the counters in *aNTests +// and aNTestsPassed. +__attribute__((noinline)) void +TestUnw(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, + LUL* aLUL, const char* dstring) +{ + // Ensure that the stack has at least this much space on it. This + // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes + // and hand it to LUL. Safe in the sense that no segfault can + // happen because the stack is at least this big. This is all + // somewhat dubious in the sense that a sufficiently clever compiler + // (clang, for one) can figure out that space[] is unused and delete + // it from the frame. Hence the somewhat elaborate hoop jumping to + // fill it up before the call and to at least appear to use the + // value afterwards. + int i; + volatile char space[LUL_UNIT_TEST_STACK_SIZE]; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + space[i] = (char)(i & 0x7F); + } + + // Really run the test. + bool passed = TestFn1(aLUL, dstring, dstring); + + // Appear to use space[], by visiting the value to compute some kind + // of checksum, and then (apparently) using the checksum. + int sum = 0; + for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { + // If this doesn't fool LLVM, I don't know what will. + sum += space[i] - 3*i; + } + __asm__ __volatile__("" : : "r"(sum)); + + // Update the counters. + (*aNTests)++; + if (passed) { + (*aNTestsPassed)++; + } +} + + +void +RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL) +{ + aLUL->mLog(":\n"); + aLUL->mLog("LULUnitTest: BEGIN\n"); + *aNTests = *aNTestsPassed = 0; + TestUnw(aNTests, aNTestsPassed, aLUL, "11111111"); + TestUnw(aNTests, aNTestsPassed, aLUL, "11222211"); + TestUnw(aNTests, aNTestsPassed, aLUL, "111222333"); + TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212"); + TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258"); + TestUnw(aNTests, aNTestsPassed, aLUL, + "123456781122334455667788777777777777777777777"); + aLUL->mLog("LULUnitTest: END\n"); + aLUL->mLog(":\n"); +} + + +} // namespace lul diff --git a/tools/profiler/lul/LulMain.h b/tools/profiler/lul/LulMain.h new file mode 100644 index 000000000..0916d1b26 --- /dev/null +++ b/tools/profiler/lul/LulMain.h @@ -0,0 +1,397 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMain_h +#define LulMain_h + +#include "LulPlatformMacros.h" +#include "mozilla/Atomics.h" + +// LUL: A Lightweight Unwind Library. +// This file provides the end-user (external) interface for LUL. + +// Some comments about naming in the implementation. These are safe +// to ignore if you are merely using LUL, but are important if you +// hack on its internals. +// +// Debuginfo readers in general have tended to use the word "address" +// to mean several different things. This sometimes makes them +// difficult to understand and maintain. LUL tries hard to avoid +// using the word "address" and instead uses the following more +// precise terms: +// +// * SVMA ("Stated Virtual Memory Address"): this is an address of a +// symbol (etc) as it is stated in the symbol table, or other +// metadata, of an object. Such values are typically small and +// start from zero or thereabouts, unless the object has been +// prelinked. +// +// * AVMA ("Actual Virtual Memory Address"): this is the address of a +// symbol (etc) in a running process, that is, once the associated +// object has been mapped into a process. Such values are typically +// much larger than SVMAs, since objects can get mapped arbitrarily +// far along the address space. +// +// * "Bias": the difference between AVMA and SVMA for a given symbol +// (specifically, AVMA - SVMA). The bias is always an integral +// number of pages. Once we know the bias for a given object's +// text section (for example), we can compute the AVMAs of all of +// its text symbols by adding the bias to their SVMAs. +// +// * "Image address": typically, to read debuginfo from an object we +// will temporarily mmap in the file so as to read symbol tables +// etc. Addresses in this temporary mapping are called "Image +// addresses". Note that the temporary mapping is entirely +// unrelated to the mappings of the file that the dynamic linker +// must perform merely in order to get the program to run. Hence +// image addresses are unrelated to either SVMAs or AVMAs. + + +namespace lul { + +// A machine word plus validity tag. +class TaggedUWord { +public: + // RUNS IN NO-MALLOC CONTEXT + // Construct a valid one. + explicit TaggedUWord(uintptr_t w) + : mValue(w) + , mValid(true) + {} + + // RUNS IN NO-MALLOC CONTEXT + // Construct an invalid one. + TaggedUWord() + : mValue(0) + , mValid(false) + {} + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator+(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() + rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator-(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() - rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator&(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() & rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator|(TaggedUWord rhs) const { + return (Valid() && rhs.Valid()) ? TaggedUWord(Value() | rhs.Value()) + : TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord CmpGEs(TaggedUWord rhs) const { + if (Valid() && rhs.Valid()) { + intptr_t s1 = (intptr_t)Value(); + intptr_t s2 = (intptr_t)rhs.Value(); + return TaggedUWord(s1 >= s2 ? 1 : 0); + } + return TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord operator<<(TaggedUWord rhs) const { + if (Valid() && rhs.Valid()) { + uintptr_t shift = rhs.Value(); + if (shift < 8 * sizeof(uintptr_t)) + return TaggedUWord(Value() << shift); + } + return TaggedUWord(); + } + + // RUNS IN NO-MALLOC CONTEXT + // Is equal? Note: non-validity on either side gives non-equality. + bool operator==(TaggedUWord other) const { + return (mValid && other.Valid()) ? (mValue == other.Value()) : false; + } + + // RUNS IN NO-MALLOC CONTEXT + // Is it word-aligned? + bool IsAligned() const { + return mValid && (mValue & (sizeof(uintptr_t)-1)) == 0; + } + + // RUNS IN NO-MALLOC CONTEXT + uintptr_t Value() const { return mValue; } + + // RUNS IN NO-MALLOC CONTEXT + bool Valid() const { return mValid; } + +private: + uintptr_t mValue; + bool mValid; +}; + + +// The registers, with validity tags, that will be unwound. + +struct UnwindRegs { +#if defined(LUL_ARCH_arm) + TaggedUWord r7; + TaggedUWord r11; + TaggedUWord r12; + TaggedUWord r13; + TaggedUWord r14; + TaggedUWord r15; +#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + TaggedUWord xbp; + TaggedUWord xsp; + TaggedUWord xip; +#else +# error "Unknown plat" +#endif +}; + + +// The maximum number of bytes in a stack snapshot. This can be +// increased if necessary, but larger values cost performance, since a +// stack snapshot needs to be copied between sampling and worker +// threads for each snapshot. In practice 32k seems to be enough +// to get good backtraces. +static const size_t N_STACK_BYTES = 32768; + +// The stack chunk image that will be unwound. +struct StackImage { + // [start_avma, +len) specify the address range in the buffer. + // Obviously we require 0 <= len <= N_STACK_BYTES. + uintptr_t mStartAvma; + size_t mLen; + uint8_t mContents[N_STACK_BYTES]; +}; + + +// Statistics collection for the unwinder. +template<typename T> +class LULStats { +public: + LULStats() + : mContext(0) + , mCFI(0) + , mScanned(0) + {} + + template <typename S> + explicit LULStats(const LULStats<S>& aOther) + : mContext(aOther.mContext) + , mCFI(aOther.mCFI) + , mScanned(aOther.mScanned) + {} + + template <typename S> + LULStats<T>& operator=(const LULStats<S>& aOther) + { + mContext = aOther.mContext; + mCFI = aOther.mCFI; + mScanned = aOther.mScanned; + return *this; + } + + template <typename S> + uint32_t operator-(const LULStats<S>& aOther) { + return (mContext - aOther.mContext) + + (mCFI - aOther.mCFI) + (mScanned - aOther.mScanned); + } + + T mContext; // Number of context frames + T mCFI; // Number of CFI/EXIDX frames + T mScanned; // Number of scanned frames +}; + + +// The core unwinder library class. Just one of these is needed, and +// it can be shared by multiple unwinder threads. +// +// The library operates in one of two modes. +// +// * Admin mode. The library is this state after creation. In Admin +// mode, no unwinding may be performed. It is however allowable to +// perform administrative tasks -- primarily, loading of unwind info +// -- in this mode. In particular, it is safe for the library to +// perform dynamic memory allocation in this mode. Safe in the +// sense that there is no risk of deadlock against unwinding threads +// that might -- because of where they have been sampled -- hold the +// system's malloc lock. +// +// * Unwind mode. In this mode, calls to ::Unwind may be made, but +// nothing else. ::Unwind guarantees not to make any dynamic memory +// requests, so as to guarantee that the calling thread won't +// deadlock in the case where it already holds the system's malloc lock. +// +// The library is created in Admin mode. After debuginfo is loaded, +// the caller must switch it into Unwind mode by calling +// ::EnableUnwinding. There is no way to switch it back to Admin mode +// after that. To safely switch back to Admin mode would require the +// caller (or other external agent) to guarantee that there are no +// pending ::Unwind calls. + +class PriMap; +class SegArray; +class UniqueStringUniverse; + +class LUL { +public: + // Create; supply a logging sink. Sets the object in Admin mode. + explicit LUL(void (*aLog)(const char*)); + + // Destroy. Caller is responsible for ensuring that no other + // threads are in Unwind calls. All resources are freed and all + // registered unwinder threads are deregistered. Can be called + // either in Admin or Unwind mode. + ~LUL(); + + // Notify the library that unwinding is now allowed and so + // admin-mode calls are no longer allowed. The object is initially + // created in admin mode. The only possible transition is + // admin->unwinding, therefore. + void EnableUnwinding(); + + // Notify of a new r-x mapping, and load the associated unwind info. + // The filename is strdup'd and used for debug printing. If + // aMappedImage is NULL, this function will mmap/munmap the file + // itself, so as to be able to read the unwind info. If + // aMappedImage is non-NULL then it is assumed to point to a + // called-supplied and caller-managed mapped image of the file. + // May only be called in Admin mode. + void NotifyAfterMap(uintptr_t aRXavma, size_t aSize, + const char* aFileName, const void* aMappedImage); + + // In rare cases we know an executable area exists but don't know + // what the associated file is. This call notifies LUL of such + // areas. This is important for correct functioning of stack + // scanning and of the x86-{linux,android} special-case + // __kernel_syscall function handling. + // This must be called only after the code area in + // question really has been mapped. + // May only be called in Admin mode. + void NotifyExecutableArea(uintptr_t aRXavma, size_t aSize); + + // Notify that a mapped area has been unmapped; discard any + // associated unwind info. Acquires mRWlock for writing. Note that + // to avoid segfaulting the stack-scan unwinder, which inspects code + // areas, this must be called before the code area in question is + // really unmapped. Note that, unlike NotifyAfterMap(), this + // function takes the start and end addresses of the range to be + // unmapped, rather than a start and a length parameter. This is so + // as to make it possible to notify an unmap for the entire address + // space using a single call. + // May only be called in Admin mode. + void NotifyBeforeUnmap(uintptr_t aAvmaMin, uintptr_t aAvmaMax); + + // Apply NotifyBeforeUnmap to the entire address space. This causes + // LUL to discard all unwind and executable-area information for the + // entire address space. + // May only be called in Admin mode. + void NotifyBeforeUnmapAll() { + NotifyBeforeUnmap(0, UINTPTR_MAX); + } + + // Returns the number of mappings currently registered. + // May only be called in Admin mode. + size_t CountMappings(); + + // Unwind |aStackImg| starting with the context in |aStartRegs|. + // Write the number of frames recovered in *aFramesUsed. Put + // the PC values in aFramePCs[0 .. *aFramesUsed-1] and + // the SP values in aFrameSPs[0 .. *aFramesUsed-1]. + // |aFramesAvail| is the size of the two output arrays and hence the + // largest possible value of *aFramesUsed. PC values are always + // valid, and the unwind will stop when the PC becomes invalid, but + // the SP values might be invalid, in which case the value zero will + // be written in the relevant frameSPs[] slot. + // + // Unwinding may optionally use stack scanning. The maximum number + // of frames that may be recovered by stack scanning is + // |aScannedFramesAllowed| and the actual number recovered is + // written into *aScannedFramesAcquired. |aScannedFramesAllowed| + // must be less than or equal to |aFramesAvail|. + // + // This function assumes that the SP values increase as it unwinds + // away from the innermost frame -- that is, that the stack grows + // down. It monitors SP values as it unwinds to check they + // decrease, so as to avoid looping on corrupted stacks. + // + // May only be called in Unwind mode. Multiple threads may unwind + // at once. LUL user is responsible for ensuring that no thread makes + // any Admin calls whilst in Unwind mode. + // MOZ_CRASHes if the calling thread is not registered for unwinding. + // + // Up to aScannedFramesAllowed stack-scanned frames may be recovered. + // + // The calling thread must previously have been registered via a call to + // RegisterSampledThread. + void Unwind(/*OUT*/uintptr_t* aFramePCs, + /*OUT*/uintptr_t* aFrameSPs, + /*OUT*/size_t* aFramesUsed, + /*OUT*/size_t* aScannedFramesAcquired, + size_t aFramesAvail, + size_t aScannedFramesAllowed, + UnwindRegs* aStartRegs, StackImage* aStackImg); + + // The logging sink. Call to send debug strings to the caller- + // specified destination. Can only be called by the Admin thread. + void (*mLog)(const char*); + + // Statistics relating to unwinding. These have to be atomic since + // unwinding can occur on different threads simultaneously. + LULStats<mozilla::Atomic<uint32_t>> mStats; + + // Possibly show the statistics. This may not be called from any + // registered sampling thread, since it involves I/O. + void MaybeShowStats(); + +private: + // The statistics counters at the point where they were last printed. + LULStats<uint32_t> mStatsPrevious; + + // Are we in admin mode? Initially |true| but changes to |false| + // once unwinding begins. + bool mAdminMode; + + // The thread ID associated with admin mode. This is the only thread + // that is allowed do perform non-Unwind calls on this object. Conversely, + // no registered Unwinding thread may be the admin thread. This is so + // as to clearly partition the one thread that may do dynamic memory + // allocation from the threads that are being sampled, since the latter + // absolutely may not do dynamic memory allocation. + int mAdminThreadId; + + // The top level mapping from code address ranges to postprocessed + // unwind info. Basically a sorted array of (addr, len, info) + // records. This field is updated by NotifyAfterMap and NotifyBeforeUnmap. + PriMap* mPriMap; + + // An auxiliary structure that records which address ranges are + // mapped r-x, for the benefit of the stack scanner. + SegArray* mSegArray; + + // A UniqueStringUniverse that holds all the strdup'd strings created + // whilst reading unwind information. This is included so as to make + // it possible to free them in ~LUL. + UniqueStringUniverse* mUSU; +}; + + +// Run unit tests on an initialised, loaded-up LUL instance, and print +// summary results on |aLUL|'s logging sink. Also return the number +// of tests run in *aNTests and the number that passed in +// *aNTestsPassed. +void +RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL); + +} // namespace lul + +#endif // LulMain_h diff --git a/tools/profiler/lul/LulMainInt.h b/tools/profiler/lul/LulMainInt.h new file mode 100644 index 000000000..54bd76c88 --- /dev/null +++ b/tools/profiler/lul/LulMainInt.h @@ -0,0 +1,393 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulMainInt_h +#define LulMainInt_h + +#include "LulPlatformMacros.h" +#include "LulMain.h" // for TaggedUWord + +#include <vector> + +#include "mozilla/Assertions.h" + +// This file is provides internal interface inside LUL. If you are an +// end-user of LUL, do not include it in your code. The end-user +// interface is in LulMain.h. + + +namespace lul { + +using std::vector; + +//////////////////////////////////////////////////////////////// +// DW_REG_ constants // +//////////////////////////////////////////////////////////////// + +// These are the Dwarf CFI register numbers, as (presumably) defined +// in the ELF ABI supplements for each architecture. + +enum DW_REG_NUMBER { + // No real register has this number. It's convenient to be able to + // treat the CFA (Canonical Frame Address) as "just another + // register", though. + DW_REG_CFA = -1, +#if defined(LUL_ARCH_arm) + // ARM registers + DW_REG_ARM_R7 = 7, + DW_REG_ARM_R11 = 11, + DW_REG_ARM_R12 = 12, + DW_REG_ARM_R13 = 13, + DW_REG_ARM_R14 = 14, + DW_REG_ARM_R15 = 15, +#elif defined(LUL_ARCH_x64) + // Because the X86 (32 bit) and AMD64 (64 bit) summarisers are + // combined, a merged set of register constants is needed. + DW_REG_INTEL_XBP = 6, + DW_REG_INTEL_XSP = 7, + DW_REG_INTEL_XIP = 16, +#elif defined(LUL_ARCH_x86) + DW_REG_INTEL_XBP = 5, + DW_REG_INTEL_XSP = 4, + DW_REG_INTEL_XIP = 8, +#else +# error "Unknown arch" +#endif +}; + + +//////////////////////////////////////////////////////////////// +// PfxExpr // +//////////////////////////////////////////////////////////////// + +enum PfxExprOp { + // meaning of mOperand effect on stack + PX_Start, // bool start-with-CFA? start, with CFA on stack, or not + PX_End, // none stop; result is at top of stack + PX_SImm32, // int32 push signed int32 + PX_DwReg, // DW_REG_NUMBER push value of the specified reg + PX_Deref, // none pop X ; push *X + PX_Add, // none pop X ; pop Y ; push Y + X + PX_Sub, // none pop X ; pop Y ; push Y - X + PX_And, // none pop X ; pop Y ; push Y & X + PX_Or, // none pop X ; pop Y ; push Y | X + PX_CmpGES, // none pop X ; pop Y ; push (Y >=s X) ? 1 : 0 + PX_Shl // none pop X ; pop Y ; push Y << X +}; + +struct PfxInstr { + PfxInstr(PfxExprOp opcode, int32_t operand) + : mOpcode(opcode) + , mOperand(operand) + {} + explicit PfxInstr(PfxExprOp opcode) + : mOpcode(opcode) + , mOperand(0) + {} + bool operator==(const PfxInstr& other) { + return mOpcode == other.mOpcode && mOperand == other.mOperand; + } + PfxExprOp mOpcode; + int32_t mOperand; +}; + +static_assert(sizeof(PfxInstr) <= 8, "PfxInstr size changed unexpectedly"); + +// Evaluate the prefix expression whose PfxInstrs start at aPfxInstrs[start]. +// In the case of any mishap (stack over/underflow, running off the end of +// the instruction vector, obviously malformed sequences), +// return an invalid TaggedUWord. +// RUNS IN NO-MALLOC CONTEXT +TaggedUWord EvaluatePfxExpr(int32_t start, + const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>& aPfxInstrs); + + +//////////////////////////////////////////////////////////////// +// LExpr // +//////////////////////////////////////////////////////////////// + +// An expression -- very primitive. Denotes either "register + +// offset", a dereferenced version of the same, or a reference to a +// prefix expression stored elsewhere. So as to allow convenient +// handling of Dwarf-derived unwind info, the register may also denote +// the CFA. A large number of these need to be stored, so we ensure +// it fits into 8 bytes. See comment below on RuleSet to see how +// expressions fit into the bigger picture. + +enum LExprHow { + UNKNOWN=0, // This LExpr denotes no value. + NODEREF, // Value is (mReg + mOffset). + DEREF, // Value is *(mReg + mOffset). + PFXEXPR // Value is EvaluatePfxExpr(secMap->mPfxInstrs[mOffset]) +}; + +inline static const char* NameOf_LExprHow(LExprHow how) { + switch (how) { + case UNKNOWN: return "UNKNOWN"; + case NODEREF: return "NODEREF"; + case DEREF: return "DEREF"; + case PFXEXPR: return "PFXEXPR"; + default: return "LExpr-??"; + } +} + + +struct LExpr { + // Denotes an expression with no value. + LExpr() + : mHow(UNKNOWN) + , mReg(0) + , mOffset(0) + {} + + // Denotes any expressible expression. + LExpr(LExprHow how, int16_t reg, int32_t offset) + : mHow(how) + , mReg(reg) + , mOffset(offset) + { + switch (how) { + case UNKNOWN: MOZ_ASSERT(reg == 0 && offset == 0); break; + case NODEREF: break; + case DEREF: break; + case PFXEXPR: MOZ_ASSERT(reg == 0 && offset >= 0); break; + default: MOZ_ASSERT(0, "LExpr::LExpr: invalid how"); + } + } + + // Change the offset for an expression that references memory. + LExpr add_delta(long delta) + { + MOZ_ASSERT(mHow == NODEREF); + // If this is a non-debug build and the above assertion would have + // failed, at least return LExpr() so that the machinery that uses + // the resulting expression fails in a repeatable way. + return (mHow == NODEREF) ? LExpr(mHow, mReg, mOffset+delta) + : LExpr(); // Gone bad + } + + // Dereference an expression that denotes a memory address. + LExpr deref() + { + MOZ_ASSERT(mHow == NODEREF); + // Same rationale as for add_delta(). + return (mHow == NODEREF) ? LExpr(DEREF, mReg, mOffset) + : LExpr(); // Gone bad + } + + // Print a rule for recovery of |aNewReg| whose recovered value + // is this LExpr. + string ShowRule(const char* aNewReg) const; + + // Evaluate this expression, producing a TaggedUWord. |aOldRegs| + // holds register values that may be referred to by the expression. + // |aCFA| holds the CFA value, if any, that applies. |aStackImg| + // contains a chuck of stack that will be consulted if the expression + // references memory. |aPfxInstrs| holds the vector of PfxInstrs + // that will be consulted if this is a PFXEXPR. + // RUNS IN NO-MALLOC CONTEXT + TaggedUWord EvaluateExpr(const UnwindRegs* aOldRegs, + TaggedUWord aCFA, const StackImage* aStackImg, + const vector<PfxInstr>* aPfxInstrs) const; + + // Representation of expressions. If |mReg| is DW_REG_CFA (-1) then + // it denotes the CFA. All other allowed values for |mReg| are + // nonnegative and are DW_REG_ values. + LExprHow mHow:8; + int16_t mReg; // A DW_REG_ value + int32_t mOffset; // 32-bit signed offset should be more than enough. +}; + +static_assert(sizeof(LExpr) <= 8, "LExpr size changed unexpectedly"); + + +//////////////////////////////////////////////////////////////// +// RuleSet // +//////////////////////////////////////////////////////////////// + +// This is platform-dependent. For some address range, describes how +// to recover the CFA and then how to recover the registers for the +// previous frame. +// +// The set of LExprs contained in a given RuleSet describe a DAG which +// says how to compute the caller's registers ("new registers") from +// the callee's registers ("old registers"). The DAG can contain a +// single internal node, which is the value of the CFA for the callee. +// It would be possible to construct a DAG that omits the CFA, but +// including it makes the summarisers simpler, and the Dwarf CFI spec +// has the CFA as a central concept. +// +// For this to make sense, |mCfaExpr| can't have +// |mReg| == DW_REG_CFA since we have no previous value for the CFA. +// All of the other |Expr| fields can -- and usually do -- specify +// |mReg| == DW_REG_CFA. +// +// With that in place, the unwind algorithm proceeds as follows. +// +// (0) Initially: we have values for the old registers, and a memory +// image. +// +// (1) Compute the CFA by evaluating |mCfaExpr|. Add the computed +// value to the set of "old registers". +// +// (2) Compute values for the registers by evaluating all of the other +// |Expr| fields in the RuleSet. These can depend on both the old +// register values and the just-computed CFA. +// +// If we are unwinding without computing a CFA, perhaps because the +// RuleSets are derived from EXIDX instead of Dwarf, then +// |mCfaExpr.mHow| will be LExpr::UNKNOWN, so the computed value will +// be invalid -- that is, TaggedUWord() -- and so any attempt to use +// that will result in the same value. But that's OK because the +// RuleSet would make no sense if depended on the CFA but specified no +// way to compute it. +// +// A RuleSet is not allowed to cover zero address range. Having zero +// length would break binary searching in SecMaps and PriMaps. + +class RuleSet { +public: + RuleSet(); + void Print(void(*aLog)(const char*)) const; + + // Find the LExpr* for a given DW_REG_ value in this class. + LExpr* ExprForRegno(DW_REG_NUMBER aRegno); + + uintptr_t mAddr; + uintptr_t mLen; + // How to compute the CFA. + LExpr mCfaExpr; + // How to compute caller register values. These may reference the + // value defined by |mCfaExpr|. +#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + LExpr mXipExpr; // return address + LExpr mXspExpr; + LExpr mXbpExpr; +#elif defined(LUL_ARCH_arm) + LExpr mR15expr; // return address + LExpr mR14expr; + LExpr mR13expr; + LExpr mR12expr; + LExpr mR11expr; + LExpr mR7expr; +#else +# error "Unknown arch" +#endif +}; + +// Returns |true| for Dwarf register numbers which are members +// of the set of registers that LUL unwinds on this target. +static inline bool registerIsTracked(DW_REG_NUMBER reg) { + switch (reg) { +# if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) + case DW_REG_INTEL_XBP: case DW_REG_INTEL_XSP: case DW_REG_INTEL_XIP: + return true; +# elif defined(LUL_ARCH_arm) + case DW_REG_ARM_R7: case DW_REG_ARM_R11: case DW_REG_ARM_R12: + case DW_REG_ARM_R13: case DW_REG_ARM_R14: case DW_REG_ARM_R15: + return true; +# else +# error "Unknown arch" +# endif + default: + return false; + } +} + + +//////////////////////////////////////////////////////////////// +// SecMap // +//////////////////////////////////////////////////////////////// + +// A SecMap may have zero address range, temporarily, whilst RuleSets +// are being added to it. But adding a zero-range SecMap to a PriMap +// will make it impossible to maintain the total order of the PriMap +// entries, and so that can't be allowed to happen. + +class SecMap { +public: + // These summarise the contained mRuleSets, in that they give + // exactly the lowest and highest addresses that any of the entries + // in this SecMap cover. Hence invariants: + // + // mRuleSets is nonempty + // <=> mSummaryMinAddr <= mSummaryMaxAddr + // && mSummaryMinAddr == mRuleSets[0].mAddr + // && mSummaryMaxAddr == mRuleSets[#rulesets-1].mAddr + // + mRuleSets[#rulesets-1].mLen - 1; + // + // This requires that no RuleSet has zero length. + // + // mRuleSets is empty + // <=> mSummaryMinAddr > mSummaryMaxAddr + // + // This doesn't constrain mSummaryMinAddr and mSummaryMaxAddr uniquely, + // so let's use mSummaryMinAddr == 1 and mSummaryMaxAddr == 0 to denote + // this case. + + explicit SecMap(void(*aLog)(const char*)); + ~SecMap(); + + // Binary search mRuleSets to find one that brackets |ia|, or nullptr + // if none is found. It's not allowable to do this until PrepareRuleSets + // has been called first. + RuleSet* FindRuleSet(uintptr_t ia); + + // Add a RuleSet to the collection. The rule is copied in. Calling + // this makes the map non-searchable. + void AddRuleSet(const RuleSet* rs); + + // Add a PfxInstr to the vector of such instrs, and return the index + // in the vector. Calling this makes the map non-searchable. + uint32_t AddPfxInstr(PfxInstr pfxi); + + // Returns the entire vector of PfxInstrs. + const vector<PfxInstr>* GetPfxInstrs() { return &mPfxInstrs; } + + // Prepare the map for searching. Also, remove any rules for code + // address ranges which don't fall inside [start, +len). |len| may + // not be zero. + void PrepareRuleSets(uintptr_t start, size_t len); + + bool IsEmpty(); + + size_t Size() { return mRuleSets.size(); } + + // The min and max addresses of the addresses in the contained + // RuleSets. See comment above for invariants. + uintptr_t mSummaryMinAddr; + uintptr_t mSummaryMaxAddr; + +private: + // False whilst adding entries; true once it is safe to call FindRuleSet. + // Transition (false->true) is caused by calling PrepareRuleSets(). + bool mUsable; + + // A vector of RuleSets, sorted, nonoverlapping (post Prepare()). + vector<RuleSet> mRuleSets; + + // A vector of PfxInstrs, which are referred to by the RuleSets. + // These are provided as a representation of Dwarf expressions + // (DW_CFA_val_expression, DW_CFA_expression, DW_CFA_def_cfa_expression), + // are relatively expensive to evaluate, and and are therefore + // expected to be used only occasionally. + // + // The vector holds a bunch of separate PfxInstr programs, each one + // starting with a PX_Start and terminated by a PX_End, all + // concatenated together. When a RuleSet can't recover a value + // using a self-contained LExpr, it uses a PFXEXPR whose mOffset is + // the index in this vector of start of the necessary PfxInstr program. + vector<PfxInstr> mPfxInstrs; + + // A logging sink, for debugging. + void (*mLog)(const char*); +}; + +} // namespace lul + +#endif // ndef LulMainInt_h diff --git a/tools/profiler/lul/LulPlatformMacros.h b/tools/profiler/lul/LulPlatformMacros.h new file mode 100644 index 000000000..8659a8fbe --- /dev/null +++ b/tools/profiler/lul/LulPlatformMacros.h @@ -0,0 +1,53 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef LulPlatformMacros_h +#define LulPlatformMacros_h + +#include <stdint.h> +#include <stdlib.h> + +// Define platform selection macros in a consistent way. The primary +// factorisation is on (ARCH,OS) pairs ("PLATforms") but ARCH_ and OS_ +// macros are defined too, since they are sometimes convenient. + +#undef LUL_PLAT_x64_linux +#undef LUL_PLAT_x86_linux +#undef LUL_PLAT_arm_android +#undef LUL_PLAT_x86_android + +#undef LUL_ARCH_arm +#undef LUL_ARCH_x86 +#undef LUL_ARCH_x64 + +#undef LUL_OS_android +#undef LUL_OS_linux + +#if defined(__linux__) && defined(__x86_64__) +# define LUL_PLAT_x64_linux 1 +# define LUL_ARCH_x64 1 +# define LUL_OS_linux 1 + +#elif defined(__linux__) && defined(__i386__) && !defined(__ANDROID__) +# define LUL_PLAT_x86_linux 1 +# define LUL_ARCH_x86 1 +# define LUL_OS_linux 1 + +#elif defined(__ANDROID__) && defined(__arm__) +# define LUL_PLAT_arm_android 1 +# define LUL_ARCH_arm 1 +# define LUL_OS_android 1 + +#elif defined(__ANDROID__) && defined(__i386__) +# define LUL_PLAT_x86_android 1 +# define LUL_ARCH_x86 1 +# define LUL_OS_android 1 + +#else +# error "Unsupported platform" +#endif + +#endif // LulPlatformMacros_h diff --git a/tools/profiler/lul/platform-linux-lul.cpp b/tools/profiler/lul/platform-linux-lul.cpp new file mode 100644 index 000000000..9541534a1 --- /dev/null +++ b/tools/profiler/lul/platform-linux-lul.cpp @@ -0,0 +1,88 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <stdio.h> +#include <signal.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> + +#include "platform.h" +#include "PlatformMacros.h" +#include "LulMain.h" +#include "shared-libraries.h" +#include "AutoObjectMapper.h" + +// Contains miscellaneous helpers that are used to connect SPS and LUL. + + +// Find out, in a platform-dependent way, where the code modules got +// mapped in the process' virtual address space, and get |aLUL| to +// load unwind info for them. +void +read_procmaps(lul::LUL* aLUL) +{ + MOZ_ASSERT(aLUL->CountMappings() == 0); + +# if defined(SPS_OS_linux) || defined(SPS_OS_android) || defined(SPS_OS_darwin) + SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf(); + + for (size_t i = 0; i < info.GetSize(); i++) { + const SharedLibrary& lib = info.GetEntry(i); + +# if defined(SPS_OS_android) && !defined(MOZ_WIDGET_GONK) + // We're using faulty.lib. Use a special-case object mapper. + AutoObjectMapperFaultyLib mapper(aLUL->mLog); +# else + // We can use the standard POSIX-based mapper. + AutoObjectMapperPOSIX mapper(aLUL->mLog); +# endif + + // Ask |mapper| to map the object. Then hand its mapped address + // to NotifyAfterMap(). + void* image = nullptr; + size_t size = 0; + bool ok = mapper.Map(&image, &size, lib.GetName()); + if (ok && image && size > 0) { + aLUL->NotifyAfterMap(lib.GetStart(), lib.GetEnd()-lib.GetStart(), + lib.GetName().c_str(), image); + } else if (!ok && lib.GetName() == "") { + // The object has no name and (as a consequence) the mapper + // failed to map it. This happens on Linux, where + // GetInfoForSelf() produces two such mappings: one for the + // executable and one for the VDSO. The executable one isn't a + // big deal since there's not much interesting code in there, + // but the VDSO one is a problem on x86-{linux,android} because + // lack of knowledge about the mapped area inhibits LUL's + // special __kernel_syscall handling. Hence notify |aLUL| at + // least of the mapping, even though it can't read any unwind + // information for the area. + aLUL->NotifyExecutableArea(lib.GetStart(), lib.GetEnd()-lib.GetStart()); + } + + // |mapper| goes out of scope at this point and so its destructor + // unmaps the object. + } + +# else +# error "Unknown platform" +# endif +} + + +// LUL needs a callback for its logging sink. +void +logging_sink_for_LUL(const char* str) { + // Ignore any trailing \n, since LOG will add one anyway. + size_t n = strlen(str); + if (n > 0 && str[n-1] == '\n') { + char* tmp = strdup(str); + tmp[n-1] = 0; + LOG(tmp); + free(tmp); + } else { + LOG(str); + } +} diff --git a/tools/profiler/lul/platform-linux-lul.h b/tools/profiler/lul/platform-linux-lul.h new file mode 100644 index 000000000..4698cd388 --- /dev/null +++ b/tools/profiler/lul/platform-linux-lul.h @@ -0,0 +1,24 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PLATFORM_LINUX_LUL_H +#define MOZ_PLATFORM_LINUX_LUL_H + +#include "platform.h" + +// Find out, in a platform-dependent way, where the code modules got +// mapped in the process' virtual address space, and get |aLUL| to +// load unwind info for them. +void +read_procmaps(lul::LUL* aLUL); + +// LUL needs a callback for its logging sink. +void +logging_sink_for_LUL(const char* str); + +// A singleton instance of the library. +extern lul::LUL* sLUL; + +#endif /* ndef MOZ_PLATFORM_LINUX_LUL_H */ diff --git a/tools/profiler/merge-profiles.py b/tools/profiler/merge-profiles.py new file mode 100755 index 000000000..0c10c60e1 --- /dev/null +++ b/tools/profiler/merge-profiles.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python +# +# This script takes b2g process profiles and merged them into a single profile. +# The meta data is taken from the first profile. The startTime for each profile +# is used to syncronized the samples. Each thread is moved into the merged +# profile. +# +import json +import re +import sys + +def MergeProfiles(files): + threads = [] + fileData = [] + symTable = dict() + meta = None + libs = None + videoUrl = None + minStartTime = None + + for fname in files: + if fname.startswith("--video="): + videoUrl = fname[8:] + continue + + match = re.match('profile_([0-9]+)_(.+)\.sym', fname) + if match is None: + raise Exception("Filename '" + fname + "' doesn't match expected pattern") + pid = match.groups(0)[0] + pname = match.groups(0)[1] + + fp = open(fname, "r") + fileData = json.load(fp) + fp.close() + + if meta is None: + meta = fileData['profileJSON']['meta'].copy() + libs = fileData['profileJSON']['libs'] + minStartTime = meta['startTime'] + else: + minStartTime = min(minStartTime, fileData['profileJSON']['meta']['startTime']) + meta['startTime'] = minStartTime + + for thread in fileData['profileJSON']['threads']: + thread['name'] = thread['name'] + " (" + pname + ":" + pid + ")" + threads.append(thread) + + # Note that pid + sym, pid + location could be ambigious + # if we had pid=11 sym=1 && pid=1 sym=11. + pidStr = pid + ":" + + thread['startTime'] = fileData['profileJSON']['meta']['startTime'] + if meta['version'] >= 3: + stringTable = thread['stringTable'] + for i, str in enumerate(stringTable): + if str[:2] == '0x': + newLoc = pidStr + str + stringTable[i] = newLoc + symTable[newLoc] = str + else: + samples = thread['samples'] + for sample in thread['samples']: + for frame in sample['frames']: + if "location" in frame and frame['location'][0:2] == '0x': + oldLoc = frame['location'] + newLoc = pidStr + oldLoc + frame['location'] = newLoc + # Default to the unprefixed symbol if no translation is + symTable[newLoc] = oldLoc + + filesyms = fileData['symbolicationTable'] + for sym in filesyms.keys(): + symTable[pidStr + sym] = filesyms[sym] + + # For each thread, make the time offsets line up based on the + # earliest start + for thread in threads: + delta = thread['startTime'] - minStartTime + if meta['version'] >= 3: + idxTime = thread['samples']['schema']['time'] + for sample in thread['samples']['data']: + sample[idxTime] += delta + idxTime = thread['markers']['schema']['time'] + for marker in thread['markers']['data']: + marker[idxTime] += delta + else: + for sample in thread['samples']: + if "time" in sample: + sample['time'] += delta + for marker in thread['markers']: + marker['time'] += delta + + result = dict() + result['profileJSON'] = dict() + result['profileJSON']['meta'] = meta + result['profileJSON']['libs'] = libs + result['profileJSON']['threads'] = threads + result['symbolicationTable'] = symTable + result['format'] = "profileJSONWithSymbolicationTable,1" + if videoUrl: + result['profileJSON']['meta']['videoCapture'] = {"src": videoUrl} + + json.dump(result, sys.stdout) + + +if len(sys.argv) > 1: + MergeProfiles(sys.argv[1:]) + sys.exit(0) + +print "Usage: merge-profile.py profile_<pid1>_<pname1>.sym profile_<pid2>_<pname2>.sym > merged.sym" + + + diff --git a/tools/profiler/moz.build b/tools/profiler/moz.build new file mode 100644 index 000000000..e48ae8f94 --- /dev/null +++ b/tools/profiler/moz.build @@ -0,0 +1,147 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +if CONFIG['MOZ_ENABLE_PROFILER_SPS']: + XPIDL_MODULE = 'profiler' + XPIDL_SOURCES += [ + 'gecko/nsIProfiler.idl', + 'gecko/nsIProfileSaveEvent.idl', + ] + EXPORTS += [ + 'public/GeckoProfilerFunc.h', + 'public/GeckoProfilerImpl.h', + 'public/ProfilerBacktrace.h', + 'public/ProfilerMarkers.h', + 'public/PseudoStack.h', + 'public/shared-libraries.h', + ] + EXPORTS.mozilla += [ + 'public/ProfileGatherer.h', + ] + EXTRA_JS_MODULES += [ + 'gecko/Profiler.jsm', + ] + UNIFIED_SOURCES += [ + 'core/GeckoSampler.cpp', + 'core/platform.cpp', + 'core/ProfileBuffer.cpp', + 'core/ProfileEntry.cpp', + 'core/ProfileJSONWriter.cpp', + 'core/ProfilerBacktrace.cpp', + 'core/ProfilerMarkers.cpp', + 'core/StackTop.cpp', + 'core/SyncProfile.cpp', + 'core/ThreadInfo.cpp', + 'core/ThreadProfile.cpp', + 'gecko/nsProfiler.cpp', + 'gecko/nsProfilerFactory.cpp', + 'gecko/nsProfilerStartParams.cpp', + 'gecko/ProfileGatherer.cpp', + 'gecko/ProfilerIOInterposeObserver.cpp', + 'gecko/SaveProfileTask.cpp', + 'gecko/ThreadResponsiveness.cpp', + ] + + if CONFIG['OS_TARGET'] in ('Android', 'Linux'): + UNIFIED_SOURCES += [ + 'lul/AutoObjectMapper.cpp', + 'lul/LulCommon.cpp', + 'lul/LulDwarf.cpp', + 'lul/LulDwarfSummariser.cpp', + 'lul/LulElf.cpp', + 'lul/LulMain.cpp', + 'lul/platform-linux-lul.cpp', + ] + # These files cannot be built in unified mode because of name clashes with mozglue headers on Android. + SOURCES += [ + 'core/platform-linux.cc', + 'core/shared-libraries-linux.cc', + ] + if not CONFIG['MOZ_CRASHREPORTER']: + SOURCES += [ + '/toolkit/crashreporter/google-breakpad/src/common/linux/elfutils.cc', + '/toolkit/crashreporter/google-breakpad/src/common/linux/file_id.cc', + '/toolkit/crashreporter/google-breakpad/src/common/linux/guid_creator.cc', + '/toolkit/crashreporter/google-breakpad/src/common/linux/linux_libc_support.cc', + '/toolkit/crashreporter/google-breakpad/src/common/linux/memory_mapped_file.cc', + ] + if CONFIG['CPU_ARCH'] == 'arm': + SOURCES += [ + 'core/EHABIStackWalk.cpp', + ] + elif CONFIG['OS_TARGET'] == 'Darwin': + UNIFIED_SOURCES += [ + 'core/platform-macos.cc', + 'core/shared-libraries-macos.cc', + ] + elif CONFIG['OS_TARGET'] == 'WINNT': + SOURCES += [ + 'core/IntelPowerGadget.cpp', + 'core/platform-win32.cc', + 'core/shared-libraries-win32.cc', + ] + + LOCAL_INCLUDES += [ + '/docshell/base', + '/ipc/chromium/src', + '/mozglue/linker', + '/toolkit/crashreporter/google-breakpad/src', + '/tools/profiler/core/', + '/tools/profiler/gecko/', + '/xpcom/base', + ] + + if CONFIG['OS_TARGET'] == 'Android': + LOCAL_INCLUDES += [ + # We need access to Breakpad's getcontext(3) which is suitable for Android + '/toolkit/crashreporter/google-breakpad/src/common/android/include', + ] + + if not CONFIG['MOZ_CRASHREPORTER'] and CONFIG['OS_TARGET'] == 'Android': + SOURCES += ['/toolkit/crashreporter/google-breakpad/src/common/android/breakpad_getcontext.S'] + + if CONFIG['ANDROID_CPU_ARCH'] == 'armeabi': + DEFINES['ARCH_ARMV6'] = True + + if CONFIG['ENABLE_TESTS']: + DIRS += ['tests/gtest'] + + if CONFIG['MOZ_WIDGET_TOOLKIT'] == 'gonk' and (CONFIG['ANDROID_VERSION'] <= '17' or CONFIG['ANDROID_VERSION'] >= '21'): + DEFINES['ELFSIZE'] = 32 + + FINAL_LIBRARY = 'xul' + +IPDL_SOURCES += [ + 'gecko/ProfilerTypes.ipdlh', +] + +include('/ipc/chromium/chromium-config.mozbuild') + +EXPORTS += [ + 'public/GeckoProfiler.h', +] + +if CONFIG['MOZ_TASK_TRACER']: + EXPORTS += [ + 'tasktracer/GeckoTaskTracer.h', + 'tasktracer/GeckoTaskTracerImpl.h', + 'tasktracer/TracedTaskCommon.h', + ] + UNIFIED_SOURCES += [ + 'tasktracer/GeckoTaskTracer.cpp', + 'tasktracer/TracedTaskCommon.cpp', + ] + +XPCSHELL_TESTS_MANIFESTS += ['tests/xpcshell.ini'] + +if CONFIG['GNU_CXX']: + CXXFLAGS += [ + '-Wno-error=shadow', + '-Wno-ignored-qualifiers', # due to use of breakpad headers + ] + +with Files('**'):
+ BUG_COMPONENT = ('Core', 'Gecko Profiler')
diff --git a/tools/profiler/nm-symbolicate.py b/tools/profiler/nm-symbolicate.py new file mode 100755 index 000000000..f51d7f75f --- /dev/null +++ b/tools/profiler/nm-symbolicate.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import sys, subprocess, os + +def NMSymbolicate(library, addresses): + target_tools_prefix = os.environ.get("TARGET_TOOLS_PREFIX", "") + args = [ + target_tools_prefix + "nm", "-D", "-S", library + ] + nm_lines = subprocess.check_output(args).split("\n") + symbol_table = [] + for line in nm_lines: + pieces = line.split(" ", 4) + if len(pieces) != 4 or pieces[2] != "T": + continue + start = int(pieces[0], 16) + end = int(pieces[1], 16) + symbol = pieces[3] + symbol_table.append({ + "start": int(pieces[0], 16), + "end": int(pieces[0], 16) + int(pieces[1], 16), + "funcName": pieces[3] + }); + + for addressStr in addresses: + address = int(addressStr, 16) + symbolForAddress = None + for symbol in symbol_table: + if address >= symbol["start"] and address <= symbol["end"]: + symbolForAddress = symbol + break + if symbolForAddress: + print symbolForAddress["funcName"] + else: + print "??" # match addr2line + print ":0" # no line information from nm + +if len(sys.argv) > 1: + NMSymbolicate(sys.argv[1], sys.argv[2:]) + sys.exit(0) + +print "Usage: nm-symbolicate.py <library> <addresses> > merged.sym" + + diff --git a/tools/profiler/public/GeckoProfiler.h b/tools/profiler/public/GeckoProfiler.h new file mode 100644 index 000000000..bef017d11 --- /dev/null +++ b/tools/profiler/public/GeckoProfiler.h @@ -0,0 +1,300 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* *************** SPS Sampler Information **************** + * + * SPS is an always on profiler that takes fast and low overheads samples + * of the program execution using only userspace functionity for portability. + * The goal of this module is to provide performance data in a generic + * cross platform way without requiring custom tools or kernel support. + * + * Non goals: Support features that are platform specific or replace + * platform specific profilers. + * + * Samples are collected to form a timeline with optional timeline event (markers) + * used for filtering. + * + * SPS collects samples in a platform independant way by using a speudo stack abstraction + * of the real program stack by using 'sample stack frames'. When a sample is collected + * all active sample stack frames and the program counter are recorded. + */ + +/* *************** SPS Sampler File Format **************** + * + * Simple new line seperated tag format: + * S -> BOF tags EOF + * tags -> tag tags + * tag -> CHAR - STRING + * + * Tags: + * 's' - Sample tag followed by the first stack frame followed by 0 or more 'c' tags. + * 'c' - Continue Sample tag gives remaining tag element. If a 'c' tag is seen without + * a preceding 's' tag it should be ignored. This is to support the behavior + * of circular buffers. + * If the 'stackwalk' feature is enabled this tag will have the format + * 'l-<library name>@<hex address>' and will expect an external tool to translate + * the tag into something readable through a symbolication processing step. + * 'm' - Timeline marker. Zero or more may appear before a 's' tag. + * 'l' - Information about the program counter library and address. Post processing + * can include function and source line. If built with leaf data enabled + * this tag will describe the last 'c' tag. + * 'r' - Responsiveness tag following an 's' tag. Gives an indication on how well the + * application is responding to the event loop. Lower is better. + * 't' - Elapse time since recording started. + * + */ + +#ifndef SAMPLER_H +#define SAMPLER_H + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#ifndef SPS_STANDALONE +#include "js/TypeDecls.h" +#endif +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" + +namespace mozilla { +class TimeStamp; + +namespace dom { +class Promise; +} // namespace dom + +} // namespace mozilla + +#ifndef SPS_STANDALONE +class nsIProfilerStartParams; +#endif + +enum TracingMetadata { + TRACING_DEFAULT, + TRACING_INTERVAL_START, + TRACING_INTERVAL_END, + TRACING_EVENT, + TRACING_EVENT_BACKTRACE, + TRACING_TIMESTAMP +}; + +#if !defined(MOZ_ENABLE_PROFILER_SPS) + +#include <stdint.h> +#include <stdarg.h> + +// Insert a RAII in this scope to active a pseudo label. Any samples collected +// in this scope will contain this annotation. For dynamic strings use +// PROFILER_LABEL_PRINTF. Arguments must be string literals. +#define PROFILER_LABEL(name_space, info, category) do {} while (0) + +// Similar to PROFILER_LABEL, PROFILER_LABEL_FUNC will push/pop the enclosing +// functon name as the pseudostack label. +#define PROFILER_LABEL_FUNC(category) do {} while (0) + +// Format a dynamic string as a pseudo label. These labels will a considerable +// storage size in the circular buffer compared to regular labels. This function +// can be used to annotate custom information such as URL for the resource being +// decoded or the size of the paint. +#define PROFILER_LABEL_PRINTF(name_space, info, category, format, ...) do {} while (0) + +// Insert a marker in the profile timeline. This is useful to delimit something +// important happening such as the first paint. Unlike profiler_label that are +// only recorded if a sample is collected while it is active, marker will always +// be collected. +#define PROFILER_MARKER(info) do {} while (0) +#define PROFILER_MARKER_PAYLOAD(info, payload) do { mozilla::UniquePtr<ProfilerMarkerPayload> payloadDeletor(payload); } while (0) + +// Main thread specilization to avoid TLS lookup for performance critical use. +#define PROFILER_MAIN_THREAD_LABEL(name_space, info, category) do {} while (0) +#define PROFILER_MAIN_THREAD_LABEL_PRINTF(name_space, info, category, format, ...) do {} while (0) + +static inline void profiler_tracing(const char* aCategory, const char* aInfo, + TracingMetadata metaData = TRACING_DEFAULT) {} +class ProfilerBacktrace; + +static inline void profiler_tracing(const char* aCategory, const char* aInfo, + ProfilerBacktrace* aCause, + TracingMetadata metaData = TRACING_DEFAULT) {} + +// Initilize the profiler TLS, signal handlers on linux. If MOZ_PROFILER_STARTUP +// is set the profiler will be started. This call must happen before any other +// sampler calls. Particularly sampler_label/sampler_marker. +static inline void profiler_init(void* stackTop) {}; + +// Clean up the profiler module, stopping it if required. This function may +// also save a shutdown profile if requested. No profiler calls should happen +// after this point and all pseudo labels should have been popped. +static inline void profiler_shutdown() {}; + +// Start the profiler with the selected options. The samples will be +// recorded in a circular buffer. +// "aProfileEntries" is an abstract size indication of how big +// the profile's circular buffer should be. Multiply by 4 +// words to get the cost. +// "aInterval" the sampling interval. The profiler will do its +// best to sample at this interval. The profiler visualization +// should represent the actual sampling accuracy. +static inline void profiler_start(int aProfileEntries, double aInterval, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount) {} + +// Stop the profiler and discard the profile. Call 'profiler_save' before this +// to retrieve the profile. +static inline void profiler_stop() {} + +// These functions pause and resume the profiler. While paused the profile will not +// take any samples and will not record any data into its buffers. The profiler +// remains fully initialized in this state. Timeline markers will still be stored. +// This feature will keep javascript profiling enabled, thus allowing toggling the +// profiler without invalidating the JIT. +static inline bool profiler_is_paused() { return false; } +static inline void profiler_pause() {} +static inline void profiler_resume() {} + + +// Immediately capture the current thread's call stack and return it +static inline ProfilerBacktrace* profiler_get_backtrace() { return nullptr; } +static inline void profiler_get_backtrace_noalloc(char *output, size_t outputSize) { return; } + +// Free a ProfilerBacktrace returned by profiler_get_backtrace() +static inline void profiler_free_backtrace(ProfilerBacktrace* aBacktrace) {} + +static inline bool profiler_is_active() { return false; } + +// Check if an external profiler feature is active. +// Supported: +// * gpu +static inline bool profiler_feature_active(const char*) { return false; } + +// Internal-only. Used by the event tracer. +static inline void profiler_responsiveness(const mozilla::TimeStamp& aTime) {} + +// Internal-only. +static inline void profiler_set_frame_number(int frameNumber) {} + +// Get the profile encoded as a JSON string. +static inline mozilla::UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0) { + return nullptr; +} + +// Get the profile encoded as a JSON object. +static inline JSObject* profiler_get_profile_jsobject(JSContext* aCx, + double aSinceTime = 0) { + return nullptr; +} + +#ifndef SPS_STANDALONE +// Get the profile encoded as a JSON object. +static inline void profiler_get_profile_jsobject_async(double aSinceTime = 0, + mozilla::dom::Promise* = 0) {} +static inline void profiler_get_start_params(int* aEntrySize, + double* aInterval, + mozilla::Vector<const char*>* aFilters, + mozilla::Vector<const char*>* aFeatures) {} +#endif + +// Get the profile and write it into a file +static inline void profiler_save_profile_to_file(char* aFilename) { } + +// Get the features supported by the profiler that are accepted by profiler_init. +// Returns a null terminated char* array. +static inline char** profiler_get_features() { return nullptr; } + +// Get information about the current buffer status. +// Retursn (using outparams) the current write position in the buffer, +// the total size of the buffer, and the generation of the buffer. +// This information may be useful to a user-interface displaying the +// current status of the profiler, allowing the user to get a sense +// for how fast the buffer is being written to, and how much +// data is visible. +static inline void profiler_get_buffer_info(uint32_t *aCurrentPosition, + uint32_t *aTotalSize, + uint32_t *aGeneration) +{ + *aCurrentPosition = 0; + *aTotalSize = 0; + *aGeneration = 0; +} + +// Discard the profile, throw away the profile and notify 'profiler-locked'. +// This function is to be used when entering private browsing to prevent +// the profiler from collecting sensitive data. +static inline void profiler_lock() {} + +// Re-enable the profiler and notify 'profiler-unlocked'. +static inline void profiler_unlock() {} + +static inline void profiler_register_thread(const char* name, void* guessStackTop) {} +static inline void profiler_unregister_thread() {} + +// These functions tell the profiler that a thread went to sleep so that we can avoid +// sampling it while it's sleeping. Calling profiler_sleep_start() twice without +// profiler_sleep_end() is an error. +static inline void profiler_sleep_start() {} +static inline void profiler_sleep_end() {} +static inline bool profiler_is_sleeping() { return false; } + +// Call by the JSRuntime's operation callback. This is used to enable +// profiling on auxilerary threads. +static inline void profiler_js_operation_callback() {} + +static inline double profiler_time() { return 0; } +static inline double profiler_time(const mozilla::TimeStamp& aTime) { return 0; } + +static inline bool profiler_in_privacy_mode() { return false; } + +static inline void profiler_log(const char *str) {} +static inline void profiler_log(const char *fmt, va_list args) {} + +#else + +#include "GeckoProfilerImpl.h" + +#endif + +class MOZ_RAII GeckoProfilerInitRAII { +public: + explicit GeckoProfilerInitRAII(void* stackTop) { + profiler_init(stackTop); + } + ~GeckoProfilerInitRAII() { + profiler_shutdown(); + } +}; + +class MOZ_RAII GeckoProfilerSleepRAII { +public: + GeckoProfilerSleepRAII() { + profiler_sleep_start(); + } + ~GeckoProfilerSleepRAII() { + profiler_sleep_end(); + } +}; + +/** + * Temporarily wake up the profiler while servicing events such as + * Asynchronous Procedure Calls (APCs). + */ +class MOZ_RAII GeckoProfilerWakeRAII { +public: + GeckoProfilerWakeRAII() + : mIssuedWake(profiler_is_sleeping()) + { + if (mIssuedWake) { + profiler_sleep_end(); + } + } + ~GeckoProfilerWakeRAII() { + if (mIssuedWake) { + MOZ_ASSERT(!profiler_is_sleeping()); + profiler_sleep_start(); + } + } +private: + bool mIssuedWake; +}; + +#endif // ifndef SAMPLER_H diff --git a/tools/profiler/public/GeckoProfilerFunc.h b/tools/profiler/public/GeckoProfilerFunc.h new file mode 100644 index 000000000..e0d27f593 --- /dev/null +++ b/tools/profiler/public/GeckoProfilerFunc.h @@ -0,0 +1,125 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILER_FUNCS_H +#define PROFILER_FUNCS_H + +#ifndef SPS_STANDALONE +#include "js/TypeDecls.h" +#endif +#include "js/ProfilingStack.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Vector.h" +#include <stdint.h> + +class nsISupports; + +namespace mozilla { +class TimeStamp; + +namespace dom { +class Promise; +} // namespace dom + +} // namespace mozilla + +class ProfilerBacktrace; +class ProfilerMarkerPayload; + +// Returns a handle to pass on exit. This can check that we are popping the +// correct callstack. +inline void* mozilla_sampler_call_enter(const char *aInfo, js::ProfileEntry::Category aCategory, + void *aFrameAddress = nullptr, bool aCopy = false, + uint32_t line = 0); + +inline void mozilla_sampler_call_exit(void* handle); + +void mozilla_sampler_add_marker(const char *aInfo, + ProfilerMarkerPayload *aPayload = nullptr); + +void mozilla_sampler_start(int aEntries, double aInterval, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount); + +void mozilla_sampler_stop(); + +bool mozilla_sampler_is_paused(); +void mozilla_sampler_pause(); +void mozilla_sampler_resume(); + +ProfilerBacktrace* mozilla_sampler_get_backtrace(); +void mozilla_sampler_free_backtrace(ProfilerBacktrace* aBacktrace); +void mozilla_sampler_get_backtrace_noalloc(char *output, size_t outputSize); + +bool mozilla_sampler_is_active(); + +bool mozilla_sampler_feature_active(const char* aName); + +void mozilla_sampler_responsiveness(const mozilla::TimeStamp& time); + +void mozilla_sampler_frame_number(int frameNumber); + +const double* mozilla_sampler_get_responsiveness(); + +void mozilla_sampler_save(); + +mozilla::UniquePtr<char[]> mozilla_sampler_get_profile(double aSinceTime); + +#ifndef SPS_STANDALONE +JSObject *mozilla_sampler_get_profile_data(JSContext* aCx, double aSinceTime); +void mozilla_sampler_get_profile_data_async(double aSinceTime, + mozilla::dom::Promise* aPromise); +void mozilla_sampler_get_profiler_start_params(int* aEntrySize, + double* aInterval, + mozilla::Vector<const char*>* aFilters, + mozilla::Vector<const char*>* aFeatures); +void mozilla_sampler_get_gatherer(nsISupports** aRetVal); +#endif + +// Make this function easily callable from a debugger in a build without +// debugging information (work around http://llvm.org/bugs/show_bug.cgi?id=22211) +extern "C" { + void mozilla_sampler_save_profile_to_file(const char* aFilename); +} + +const char** mozilla_sampler_get_features(); + +void mozilla_sampler_get_buffer_info(uint32_t *aCurrentPosition, uint32_t *aTotalSize, + uint32_t *aGeneration); + +void mozilla_sampler_init(void* stackTop); + +void mozilla_sampler_shutdown(); + +// Lock the profiler. When locked the profiler is (1) stopped, +// (2) profile data is cleared, (3) profiler-locked is fired. +// This is used to lock down the profiler during private browsing +void mozilla_sampler_lock(); + +// Unlock the profiler, leaving it stopped and fires profiler-unlocked. +void mozilla_sampler_unlock(); + +// Register/unregister threads with the profiler +bool mozilla_sampler_register_thread(const char* name, void* stackTop); +void mozilla_sampler_unregister_thread(); + +void mozilla_sampler_sleep_start(); +void mozilla_sampler_sleep_end(); +bool mozilla_sampler_is_sleeping(); + +double mozilla_sampler_time(); +double mozilla_sampler_time(const mozilla::TimeStamp& aTime); + +void mozilla_sampler_tracing(const char* aCategory, const char* aInfo, + TracingMetadata aMetaData); + +void mozilla_sampler_tracing(const char* aCategory, const char* aInfo, + ProfilerBacktrace* aCause, + TracingMetadata aMetaData); + +void mozilla_sampler_log(const char *fmt, va_list args); + +#endif + diff --git a/tools/profiler/public/GeckoProfilerImpl.h b/tools/profiler/public/GeckoProfilerImpl.h new file mode 100644 index 000000000..a32096b94 --- /dev/null +++ b/tools/profiler/public/GeckoProfilerImpl.h @@ -0,0 +1,522 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +// IWYU pragma: private, include "GeckoProfiler.h" + +#ifndef TOOLS_SPS_SAMPLER_H_ +#define TOOLS_SPS_SAMPLER_H_ + +#include <stdlib.h> +#include <signal.h> +#include <stdarg.h> +#include "mozilla/Assertions.h" +#include "mozilla/GuardObjects.h" +#include "mozilla/Sprintf.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/UniquePtr.h" +#ifndef SPS_STANDALONE +#include "nscore.h" +#include "nsISupports.h" +#endif +#include "GeckoProfilerFunc.h" +#include "PseudoStack.h" +#include "ProfilerBacktrace.h" + +// Make sure that we can use std::min here without the Windows headers messing with us. +#ifdef min +#undef min +#endif + +class GeckoSampler; + +namespace mozilla { +class TimeStamp; +} // namespace mozilla + +extern MOZ_THREAD_LOCAL(PseudoStack *) tlsPseudoStack; +extern MOZ_THREAD_LOCAL(GeckoSampler *) tlsTicker; +extern MOZ_THREAD_LOCAL(void *) tlsStackTop; +extern bool stack_key_initialized; + +#ifndef SAMPLE_FUNCTION_NAME +# ifdef __GNUC__ +# define SAMPLE_FUNCTION_NAME __FUNCTION__ +# elif defined(_MSC_VER) +# define SAMPLE_FUNCTION_NAME __FUNCTION__ +# else +# define SAMPLE_FUNCTION_NAME __func__ // defined in C99, supported in various C++ compilers. Just raw function name. +# endif +#endif + +static inline +void profiler_init(void* stackTop) +{ + mozilla_sampler_init(stackTop); +} + +static inline +void profiler_shutdown() +{ + mozilla_sampler_shutdown(); +} + +static inline +void profiler_start(int aProfileEntries, double aInterval, + const char** aFeatures, uint32_t aFeatureCount, + const char** aThreadNameFilters, uint32_t aFilterCount) +{ + mozilla_sampler_start(aProfileEntries, aInterval, aFeatures, aFeatureCount, aThreadNameFilters, aFilterCount); +} + +static inline +void profiler_stop() +{ + mozilla_sampler_stop(); +} + +static inline +bool profiler_is_paused() +{ + return mozilla_sampler_is_paused(); +} + +static inline +void profiler_pause() +{ + mozilla_sampler_pause(); +} + +static inline +void profiler_resume() +{ + mozilla_sampler_resume(); +} + +static inline +ProfilerBacktrace* profiler_get_backtrace() +{ + return mozilla_sampler_get_backtrace(); +} + +static inline +void profiler_free_backtrace(ProfilerBacktrace* aBacktrace) +{ + mozilla_sampler_free_backtrace(aBacktrace); +} + +static inline +void profiler_get_backtrace_noalloc(char *output, size_t outputSize) +{ + return mozilla_sampler_get_backtrace_noalloc(output, outputSize); +} + +static inline +bool profiler_is_active() +{ + return mozilla_sampler_is_active(); +} + +static inline +bool profiler_feature_active(const char* aName) +{ + return mozilla_sampler_feature_active(aName); +} + +static inline +void profiler_responsiveness(const mozilla::TimeStamp& aTime) +{ + mozilla_sampler_responsiveness(aTime); +} + +static inline +void profiler_set_frame_number(int frameNumber) +{ + return mozilla_sampler_frame_number(frameNumber); +} + +static inline +mozilla::UniquePtr<char[]> profiler_get_profile(double aSinceTime = 0) +{ + return mozilla_sampler_get_profile(aSinceTime); +} + +#ifndef SPS_STANDALONE +static inline +JSObject* profiler_get_profile_jsobject(JSContext* aCx, double aSinceTime = 0) +{ + return mozilla_sampler_get_profile_data(aCx, aSinceTime); +} + +static inline +void profiler_get_profile_jsobject_async(double aSinceTime = 0, + mozilla::dom::Promise* aPromise = 0) +{ + mozilla_sampler_get_profile_data_async(aSinceTime, aPromise); +} + +static inline +void profiler_get_start_params(int* aEntrySize, + double* aInterval, + mozilla::Vector<const char*>* aFilters, + mozilla::Vector<const char*>* aFeatures) +{ + mozilla_sampler_get_profiler_start_params(aEntrySize, aInterval, aFilters, aFeatures); +} + +static inline +void profiler_get_gatherer(nsISupports** aRetVal) +{ + mozilla_sampler_get_gatherer(aRetVal); +} + +#endif + +static inline +void profiler_save_profile_to_file(const char* aFilename) +{ + return mozilla_sampler_save_profile_to_file(aFilename); +} + +static inline +const char** profiler_get_features() +{ + return mozilla_sampler_get_features(); +} + +static inline +void profiler_get_buffer_info(uint32_t *aCurrentPosition, uint32_t *aTotalSize, + uint32_t *aGeneration) +{ + return mozilla_sampler_get_buffer_info(aCurrentPosition, aTotalSize, aGeneration); +} + +static inline +void profiler_lock() +{ + return mozilla_sampler_lock(); +} + +static inline +void profiler_unlock() +{ + return mozilla_sampler_unlock(); +} + +static inline +void profiler_register_thread(const char* name, void* guessStackTop) +{ + mozilla_sampler_register_thread(name, guessStackTop); +} + +static inline +void profiler_unregister_thread() +{ + mozilla_sampler_unregister_thread(); +} + +static inline +void profiler_sleep_start() +{ + mozilla_sampler_sleep_start(); +} + +static inline +void profiler_sleep_end() +{ + mozilla_sampler_sleep_end(); +} + +static inline +bool profiler_is_sleeping() +{ + return mozilla_sampler_is_sleeping(); +} + +#ifndef SPS_STANDALONE +static inline +void profiler_js_operation_callback() +{ + PseudoStack *stack = tlsPseudoStack.get(); + if (!stack) { + return; + } + + stack->jsOperationCallback(); +} +#endif + +static inline +double profiler_time() +{ + return mozilla_sampler_time(); +} + +static inline +double profiler_time(const mozilla::TimeStamp& aTime) +{ + return mozilla_sampler_time(aTime); +} + +static inline +bool profiler_in_privacy_mode() +{ + PseudoStack *stack = tlsPseudoStack.get(); + if (!stack) { + return false; + } + return stack->mPrivacyMode; +} + +static inline void profiler_tracing(const char* aCategory, const char* aInfo, + ProfilerBacktrace* aCause, + TracingMetadata aMetaData = TRACING_DEFAULT) +{ + // Don't insert a marker if we're not profiling to avoid + // the heap copy (malloc). + if (!stack_key_initialized || !profiler_is_active()) { + delete aCause; + return; + } + + mozilla_sampler_tracing(aCategory, aInfo, aCause, aMetaData); +} + +static inline void profiler_tracing(const char* aCategory, const char* aInfo, + TracingMetadata aMetaData = TRACING_DEFAULT) +{ + if (!stack_key_initialized) + return; + + // Don't insert a marker if we're not profiling to avoid + // the heap copy (malloc). + if (!profiler_is_active()) { + return; + } + + mozilla_sampler_tracing(aCategory, aInfo, aMetaData); +} + +#define SAMPLER_APPEND_LINE_NUMBER_PASTE(id, line) id ## line +#define SAMPLER_APPEND_LINE_NUMBER_EXPAND(id, line) SAMPLER_APPEND_LINE_NUMBER_PASTE(id, line) +#define SAMPLER_APPEND_LINE_NUMBER(id) SAMPLER_APPEND_LINE_NUMBER_EXPAND(id, __LINE__) + +// Uncomment this to turn on systrace or build with +// ac_add_options --enable-systace +//#define MOZ_USE_SYSTRACE +#ifdef MOZ_USE_SYSTRACE +#ifndef ATRACE_TAG +# define ATRACE_TAG ATRACE_TAG_ALWAYS +#endif +// We need HAVE_ANDROID_OS to be defined for Trace.h. +// If its not set we will set it temporary and remove it. +# ifndef HAVE_ANDROID_OS +# define HAVE_ANDROID_OS +# define REMOVE_HAVE_ANDROID_OS +# endif +// Android source code will include <cutils/trace.h> before this. There is no +// HAVE_ANDROID_OS defined in Firefox OS build at that time. Enabled it globally +// will cause other build break. So atrace_begin and atrace_end are not defined. +// It will cause a build-break when we include <utils/Trace.h>. Use undef +// _LIBS_CUTILS_TRACE_H will force <cutils/trace.h> to define atrace_begin and +// atrace_end with defined HAVE_ANDROID_OS again. Then there is no build-break. +# undef _LIBS_CUTILS_TRACE_H +# include <utils/Trace.h> +# define MOZ_PLATFORM_TRACING(name) android::ScopedTrace SAMPLER_APPEND_LINE_NUMBER(scopedTrace)(ATRACE_TAG, name); +# ifdef REMOVE_HAVE_ANDROID_OS +# undef HAVE_ANDROID_OS +# undef REMOVE_HAVE_ANDROID_OS +# endif +#else +# define MOZ_PLATFORM_TRACING(name) +#endif + +// we want the class and function name but can't easily get that using preprocessor macros +// __func__ doesn't have the class name and __PRETTY_FUNCTION__ has the parameters + +#define PROFILER_LABEL(name_space, info, category) MOZ_PLATFORM_TRACING(name_space "::" info) mozilla::SamplerStackFrameRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, category, __LINE__) +#define PROFILER_LABEL_FUNC(category) MOZ_PLATFORM_TRACING(SAMPLE_FUNCTION_NAME) mozilla::SamplerStackFrameRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(SAMPLE_FUNCTION_NAME, category, __LINE__) +#define PROFILER_LABEL_PRINTF(name_space, info, category, ...) MOZ_PLATFORM_TRACING(name_space "::" info) mozilla::SamplerStackFramePrintfRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, category, __LINE__, __VA_ARGS__) + +#define PROFILER_MARKER(info) mozilla_sampler_add_marker(info) +#define PROFILER_MARKER_PAYLOAD(info, payload) mozilla_sampler_add_marker(info, payload) +#define PROFILER_MAIN_THREAD_MARKER(info) MOZ_ASSERT(NS_IsMainThread(), "This can only be called on the main thread"); mozilla_sampler_add_marker(info) + +#define PROFILER_MAIN_THREAD_LABEL(name_space, info, category) MOZ_ASSERT(NS_IsMainThread(), "This can only be called on the main thread"); mozilla::SamplerStackFrameRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, category, __LINE__) +#define PROFILER_MAIN_THREAD_LABEL_PRINTF(name_space, info, category, ...) MOZ_ASSERT(NS_IsMainThread(), "This can only be called on the main thread"); mozilla::SamplerStackFramePrintfRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, category, __LINE__, __VA_ARGS__) + + +/* FIXME/bug 789667: memory constraints wouldn't much of a problem for + * this small a sample buffer size, except that serializing the + * profile data is extremely, unnecessarily memory intensive. */ +#ifdef MOZ_WIDGET_GONK +# define PLATFORM_LIKELY_MEMORY_CONSTRAINED +#endif + +#if !defined(PLATFORM_LIKELY_MEMORY_CONSTRAINED) && !defined(ARCH_ARMV6) +# define PROFILE_DEFAULT_ENTRY 1000000 +#else +# define PROFILE_DEFAULT_ENTRY 100000 +#endif + +// In the case of profiler_get_backtrace we know that we only need enough space +// for a single backtrace. +#define GET_BACKTRACE_DEFAULT_ENTRY 1000 + +#if defined(PLATFORM_LIKELY_MEMORY_CONSTRAINED) +/* A 1ms sampling interval has been shown to be a large perf hit + * (10fps) on memory-contrained (low-end) platforms, and additionally + * to yield different results from the profiler. Where this is the + * important case, b2g, there are also many gecko processes which + * magnify these effects. */ +# define PROFILE_DEFAULT_INTERVAL 10 +#elif defined(ANDROID) +// We use a lower frequency on Android, in order to make things work +// more smoothly on phones. This value can be adjusted later with +// some libunwind optimizations. +// In one sample measurement on Galaxy Nexus, out of about 700 backtraces, +// 60 of them took more than 25ms, and the average and standard deviation +// were 6.17ms and 9.71ms respectively. + +// For now since we don't support stackwalking let's use 1ms since it's fast +// enough. +#define PROFILE_DEFAULT_INTERVAL 1 +#else +#define PROFILE_DEFAULT_INTERVAL 1 +#endif +#define PROFILE_DEFAULT_FEATURES NULL +#define PROFILE_DEFAULT_FEATURE_COUNT 0 + +namespace mozilla { + +class MOZ_RAII GeckoProfilerTracingRAII { +public: + GeckoProfilerTracingRAII(const char* aCategory, const char* aInfo, + mozilla::UniquePtr<ProfilerBacktrace> aBacktrace + MOZ_GUARD_OBJECT_NOTIFIER_PARAM) + : mCategory(aCategory) + , mInfo(aInfo) + { + MOZ_GUARD_OBJECT_NOTIFIER_INIT; + profiler_tracing(mCategory, mInfo, aBacktrace.release(), TRACING_INTERVAL_START); + } + + ~GeckoProfilerTracingRAII() { + profiler_tracing(mCategory, mInfo, TRACING_INTERVAL_END); + } + +protected: + MOZ_DECL_USE_GUARD_OBJECT_NOTIFIER + const char* mCategory; + const char* mInfo; +}; + +class MOZ_RAII SamplerStackFrameRAII { +public: + // we only copy the strings at save time, so to take multiple parameters we'd need to copy them then. + SamplerStackFrameRAII(const char *aInfo, + js::ProfileEntry::Category aCategory, uint32_t line + MOZ_GUARD_OBJECT_NOTIFIER_PARAM) + { + MOZ_GUARD_OBJECT_NOTIFIER_INIT; + mHandle = mozilla_sampler_call_enter(aInfo, aCategory, this, false, line); + } + ~SamplerStackFrameRAII() { + mozilla_sampler_call_exit(mHandle); + } +private: + MOZ_DECL_USE_GUARD_OBJECT_NOTIFIER + void* mHandle; +}; + +static const int SAMPLER_MAX_STRING = 128; +class MOZ_RAII SamplerStackFramePrintfRAII { +public: + // we only copy the strings at save time, so to take multiple parameters we'd need to copy them then. + SamplerStackFramePrintfRAII(const char *aInfo, + js::ProfileEntry::Category aCategory, uint32_t line, const char *aFormat, ...) + : mHandle(nullptr) + { + if (profiler_is_active() && !profiler_in_privacy_mode()) { + va_list args; + va_start(args, aFormat); + char buff[SAMPLER_MAX_STRING]; + + // We have to use seperate printf's because we're using + // the vargs. + VsprintfLiteral(buff, aFormat, args); + SprintfLiteral(mDest, "%s %s", aInfo, buff); + + mHandle = mozilla_sampler_call_enter(mDest, aCategory, this, true, line); + va_end(args); + } else { + mHandle = mozilla_sampler_call_enter(aInfo, aCategory, this, false, line); + } + } + ~SamplerStackFramePrintfRAII() { + mozilla_sampler_call_exit(mHandle); + } +private: + char mDest[SAMPLER_MAX_STRING]; + void* mHandle; +}; + +} // namespace mozilla + +inline PseudoStack* mozilla_get_pseudo_stack(void) +{ + if (!stack_key_initialized) + return nullptr; + return tlsPseudoStack.get(); +} + +inline void* mozilla_sampler_call_enter(const char *aInfo, + js::ProfileEntry::Category aCategory, void *aFrameAddress, bool aCopy, uint32_t line) +{ + // check if we've been initialized to avoid calling pthread_getspecific + // with a null tlsStack which will return undefined results. + if (!stack_key_initialized) + return nullptr; + + PseudoStack *stack = tlsPseudoStack.get(); + // we can't infer whether 'stack' has been initialized + // based on the value of stack_key_intiailized because + // 'stack' is only intialized when a thread is being + // profiled. + if (!stack) { + return stack; + } + stack->push(aInfo, aCategory, aFrameAddress, aCopy, line); + + // The handle is meant to support future changes + // but for now it is simply use to save a call to + // pthread_getspecific on exit. It also supports the + // case where the sampler is initialized between + // enter and exit. + return stack; +} + +inline void mozilla_sampler_call_exit(void *aHandle) +{ + if (!aHandle) + return; + + PseudoStack *stack = (PseudoStack*)aHandle; + stack->popAndMaybeDelete(); +} + +void mozilla_sampler_add_marker(const char *aMarker, ProfilerMarkerPayload *aPayload); + +static inline +void profiler_log(const char *str) +{ + profiler_tracing("log", str, TRACING_EVENT); +} + +static inline +void profiler_log(const char *fmt, va_list args) +{ + mozilla_sampler_log(fmt, args); +} + +#endif /* ndef TOOLS_SPS_SAMPLER_H_ */ diff --git a/tools/profiler/public/ProfileGatherer.h b/tools/profiler/public/ProfileGatherer.h new file mode 100644 index 000000000..4e39a4f5c --- /dev/null +++ b/tools/profiler/public/ProfileGatherer.h @@ -0,0 +1,42 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZ_PROFILE_GATHERER_H +#define MOZ_PROFILE_GATHERER_H + +#include "mozilla/dom/Promise.h" + +class GeckoSampler; + +namespace mozilla { + +class ProfileGatherer final : public nsIObserver +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSIOBSERVER + + explicit ProfileGatherer(GeckoSampler* aTicker); + void WillGatherOOPProfile(); + void GatheredOOPProfile(); + void Start(double aSinceTime, mozilla::dom::Promise* aPromise); + void Cancel(); + void OOPExitProfile(const nsCString& aProfile); + +private: + ~ProfileGatherer() {}; + void Finish(); + void Reset(); + + nsTArray<nsCString> mExitProfiles; + RefPtr<mozilla::dom::Promise> mPromise; + GeckoSampler* mTicker; + double mSinceTime; + uint32_t mPendingProfiles; + bool mGathering; +}; + +} // namespace mozilla + +#endif diff --git a/tools/profiler/public/ProfilerBacktrace.h b/tools/profiler/public/ProfilerBacktrace.h new file mode 100644 index 000000000..bcaab3563 --- /dev/null +++ b/tools/profiler/public/ProfilerBacktrace.h @@ -0,0 +1,36 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __PROFILER_BACKTRACE_H +#define __PROFILER_BACKTRACE_H + +class SyncProfile; +class SpliceableJSONWriter; +class UniqueStacks; + +class ProfilerBacktrace +{ +public: + explicit ProfilerBacktrace(SyncProfile* aProfile); + ~ProfilerBacktrace(); + + // ProfilerBacktraces' stacks are deduplicated in the context of the + // profile that contains the backtrace as a marker payload. + // + // That is, markers that contain backtraces should not need their own stack, + // frame, and string tables. They should instead reuse their parent + // profile's tables. + void StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks); + +private: + ProfilerBacktrace(const ProfilerBacktrace&); + ProfilerBacktrace& operator=(const ProfilerBacktrace&); + + SyncProfile* mProfile; +}; + +#endif // __PROFILER_BACKTRACE_H + diff --git a/tools/profiler/public/ProfilerMarkers.h b/tools/profiler/public/ProfilerMarkers.h new file mode 100644 index 000000000..29711f210 --- /dev/null +++ b/tools/profiler/public/ProfilerMarkers.h @@ -0,0 +1,193 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILER_MARKERS_H +#define PROFILER_MARKERS_H + +#include "mozilla/TimeStamp.h" +#include "mozilla/Attributes.h" + +namespace mozilla { +namespace layers { +class Layer; +} // namespace layers +} // namespace mozilla + +class SpliceableJSONWriter; +class UniqueStacks; + +/** + * This is an abstract object that can be implied to supply + * data to be attached with a profiler marker. Most data inserted + * into a profile is stored in a circular buffer. This buffer + * typically wraps around and overwrites most entries. Because + * of this, this structure is designed to defer the work of + * prepare the payload only when 'preparePayload' is called. + * + * Note when implementing that this object is typically constructed + * on a particular thread but 'preparePayload' and the destructor + * is called from the main thread. + */ +class ProfilerMarkerPayload +{ +public: + /** + * ProfilerMarkerPayload takes ownership of aStack + */ + explicit ProfilerMarkerPayload(ProfilerBacktrace* aStack = nullptr); + ProfilerMarkerPayload(const mozilla::TimeStamp& aStartTime, + const mozilla::TimeStamp& aEndTime, + ProfilerBacktrace* aStack = nullptr); + + /** + * Called from the main thread + */ + virtual ~ProfilerMarkerPayload(); + + /** + * Called from the main thread + */ + virtual void StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) = 0; + + mozilla::TimeStamp GetStartTime() const { return mStartTime; } + +protected: + /** + * Called from the main thread + */ + void streamCommonProps(const char* aMarkerType, SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks); + + void SetStack(ProfilerBacktrace* aStack) { mStack = aStack; } + +private: + mozilla::TimeStamp mStartTime; + mozilla::TimeStamp mEndTime; + ProfilerBacktrace* mStack; +}; + +class ProfilerMarkerTracing : public ProfilerMarkerPayload +{ +public: + ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData); + ProfilerMarkerTracing(const char* aCategory, TracingMetadata aMetaData, ProfilerBacktrace* aCause); + + const char *GetCategory() const { return mCategory; } + TracingMetadata GetMetaData() const { return mMetaData; } + + virtual void StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) override; + +private: + const char *mCategory; + TracingMetadata mMetaData; +}; + + +#ifndef SPS_STANDALONE +#include "gfxASurface.h" +class ProfilerMarkerImagePayload : public ProfilerMarkerPayload +{ +public: + explicit ProfilerMarkerImagePayload(gfxASurface *aImg); + + virtual void StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) override; + +private: + RefPtr<gfxASurface> mImg; +}; + +class IOMarkerPayload : public ProfilerMarkerPayload +{ +public: + IOMarkerPayload(const char* aSource, const char* aFilename, const mozilla::TimeStamp& aStartTime, + const mozilla::TimeStamp& aEndTime, + ProfilerBacktrace* aStack); + ~IOMarkerPayload(); + + virtual void StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) override; + +private: + const char* mSource; + char* mFilename; +}; + +/** + * Contains the translation applied to a 2d layer so we can + * track the layer position at each frame. + */ +class LayerTranslationPayload : public ProfilerMarkerPayload +{ +public: + LayerTranslationPayload(mozilla::layers::Layer* aLayer, + mozilla::gfx::Point aPoint); + + virtual void StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) override; + +private: + mozilla::layers::Layer* mLayer; + mozilla::gfx::Point mPoint; +}; + +#include "Units.h" // For ScreenIntPoint + +/** + * Tracks when touch events are processed by gecko, not when + * the touch actually occured in gonk/android. + */ +class TouchDataPayload : public ProfilerMarkerPayload +{ +public: + explicit TouchDataPayload(const mozilla::ScreenIntPoint& aPoint); + virtual ~TouchDataPayload() {} + + virtual void StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) override; + +private: + mozilla::ScreenIntPoint mPoint; +}; + +/** + * Tracks when a vsync occurs according to the HardwareComposer. + */ +class VsyncPayload : public ProfilerMarkerPayload +{ +public: + explicit VsyncPayload(mozilla::TimeStamp aVsyncTimestamp); + virtual ~VsyncPayload() {} + + virtual void StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) override; + +private: + mozilla::TimeStamp mVsyncTimestamp; +}; + +class GPUMarkerPayload : public ProfilerMarkerPayload +{ +public: + GPUMarkerPayload(const mozilla::TimeStamp& aCpuTimeStart, + const mozilla::TimeStamp& aCpuTimeEnd, + uint64_t aGpuTimeStart, + uint64_t aGpuTimeEnd); + ~GPUMarkerPayload() {} + + virtual void StreamPayload(SpliceableJSONWriter& aWriter, + UniqueStacks& aUniqueStacks) override; + +private: + mozilla::TimeStamp mCpuTimeStart; + mozilla::TimeStamp mCpuTimeEnd; + uint64_t mGpuTimeStart; + uint64_t mGpuTimeEnd; +}; +#endif + +#endif // PROFILER_MARKERS_H diff --git a/tools/profiler/public/PseudoStack.h b/tools/profiler/public/PseudoStack.h new file mode 100644 index 000000000..f9e3836ea --- /dev/null +++ b/tools/profiler/public/PseudoStack.h @@ -0,0 +1,469 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef PROFILER_PSEUDO_STACK_H_ +#define PROFILER_PSEUDO_STACK_H_ + +#include "mozilla/ArrayUtils.h" +#include <stdint.h> +#include "js/ProfilingStack.h" +#include <stdlib.h> +#include "mozilla/Atomics.h" +#ifndef SPS_STANDALONE +#include "nsISupportsImpl.h" +#endif + +/* we duplicate this code here to avoid header dependencies + * which make it more difficult to include in other places */ +#if defined(_M_X64) || defined(__x86_64__) +#define V8_HOST_ARCH_X64 1 +#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) +#define V8_HOST_ARCH_IA32 1 +#elif defined(__ARMEL__) +#define V8_HOST_ARCH_ARM 1 +#else +#warning Please add support for your architecture in chromium_types.h +#endif + +// STORE_SEQUENCER: Because signals can interrupt our profile modification +// we need to make stores are not re-ordered by the compiler +// or hardware to make sure the profile is consistent at +// every point the signal can fire. +#ifdef V8_HOST_ARCH_ARM +// TODO Is there something cheaper that will prevent +// memory stores from being reordered + +typedef void (*LinuxKernelMemoryBarrierFunc)(void); +LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) = + (LinuxKernelMemoryBarrierFunc) 0xffff0fa0; + +# define STORE_SEQUENCER() pLinuxKernelMemoryBarrier() +#elif defined(V8_HOST_ARCH_IA32) || defined(V8_HOST_ARCH_X64) +# if defined(_MSC_VER) +# include <intrin.h> +# define STORE_SEQUENCER() _ReadWriteBarrier(); +# elif defined(__INTEL_COMPILER) +# define STORE_SEQUENCER() __memory_barrier(); +# elif __GNUC__ +# define STORE_SEQUENCER() asm volatile("" ::: "memory"); +# else +# error "Memory clobber not supported for your compiler." +# endif +#else +# error "Memory clobber not supported for your platform." +#endif + +// We can't include <algorithm> because it causes issues on OS X, so we use +// our own min function. +static inline uint32_t sMin(uint32_t l, uint32_t r) { + return l < r ? l : r; +} + +// A stack entry exists to allow the JS engine to inform SPS of the current +// backtrace, but also to instrument particular points in C++ in case stack +// walking is not available on the platform we are running on. +// +// Each entry has a descriptive string, a relevant stack address, and some extra +// information the JS engine might want to inform SPS of. This class inherits +// from the JS engine's version of the entry to ensure that the size and layout +// of the two representations are consistent. +class StackEntry : public js::ProfileEntry +{ +}; + +class ProfilerMarkerPayload; +template<typename T> +class ProfilerLinkedList; +class SpliceableJSONWriter; +class UniqueStacks; + +class ProfilerMarker { + friend class ProfilerLinkedList<ProfilerMarker>; +public: + explicit ProfilerMarker(const char* aMarkerName, + ProfilerMarkerPayload* aPayload = nullptr, + double aTime = 0); + + ~ProfilerMarker(); + + const char* GetMarkerName() const { + return mMarkerName; + } + + void StreamJSON(SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks) const; + + void SetGeneration(uint32_t aGenID); + + bool HasExpired(uint32_t aGenID) const { + return mGenID + 2 <= aGenID; + } + + double GetTime() const; + +private: + char* mMarkerName; + ProfilerMarkerPayload* mPayload; + ProfilerMarker* mNext; + double mTime; + uint32_t mGenID; +}; + +template<typename T> +class ProfilerLinkedList { +public: + ProfilerLinkedList() + : mHead(nullptr) + , mTail(nullptr) + {} + + void insert(T* elem) + { + if (!mTail) { + mHead = elem; + mTail = elem; + } else { + mTail->mNext = elem; + mTail = elem; + } + elem->mNext = nullptr; + } + + T* popHead() + { + if (!mHead) { + MOZ_ASSERT(false); + return nullptr; + } + + T* head = mHead; + + mHead = head->mNext; + if (!mHead) { + mTail = nullptr; + } + + return head; + } + + const T* peek() { + return mHead; + } + +private: + T* mHead; + T* mTail; +}; + +typedef ProfilerLinkedList<ProfilerMarker> ProfilerMarkerLinkedList; + +template<typename T> +class ProfilerSignalSafeLinkedList { +public: + ProfilerSignalSafeLinkedList() + : mSignalLock(false) + {} + + ~ProfilerSignalSafeLinkedList() + { + if (mSignalLock) { + // Some thread is modifying the list. We should only be released on that + // thread. + abort(); + } + + while (mList.peek()) { + delete mList.popHead(); + } + } + + // Insert an item into the list. + // Must only be called from the owning thread. + // Must not be called while the list from accessList() is being accessed. + // In the profiler, we ensure that by interrupting the profiled thread + // (which is the one that owns this list and calls insert() on it) until + // we're done reading the list from the signal handler. + void insert(T* aElement) { + MOZ_ASSERT(aElement); + + mSignalLock = true; + STORE_SEQUENCER(); + + mList.insert(aElement); + + STORE_SEQUENCER(); + mSignalLock = false; + } + + // Called within signal, from any thread, possibly while insert() is in the + // middle of modifying the list (on the owning thread). Will return null if + // that is the case. + // Function must be reentrant. + ProfilerLinkedList<T>* accessList() + { + if (mSignalLock) { + return nullptr; + } + return &mList; + } + +private: + ProfilerLinkedList<T> mList; + + // If this is set, then it's not safe to read the list because its contents + // are being changed. + volatile bool mSignalLock; +}; + +// Stub eventMarker function for js-engine event generation. +void ProfilerJSEventMarker(const char *event); + +// the PseudoStack members are read by signal +// handlers, so the mutation of them needs to be signal-safe. +struct PseudoStack +{ +public: + // Create a new PseudoStack and acquire a reference to it. + static PseudoStack *create() + { + return new PseudoStack(); + } + + // This is called on every profiler restart. Put things that should happen at that time here. + void reinitializeOnResume() { + // This is needed to cause an initial sample to be taken from sleeping threads. Otherwise sleeping + // threads would not have any samples to copy forward while sleeping. + mSleepId++; + } + + void addMarker(const char* aMarkerStr, ProfilerMarkerPayload* aPayload, double aTime) + { + ProfilerMarker* marker = new ProfilerMarker(aMarkerStr, aPayload, aTime); + mPendingMarkers.insert(marker); + } + + // called within signal. Function must be reentrant + ProfilerMarkerLinkedList* getPendingMarkers() + { + // The profiled thread is interrupted, so we can access the list safely. + // Unless the profiled thread was in the middle of changing the list when + // we interrupted it - in that case, accessList() will return null. + return mPendingMarkers.accessList(); + } + + void push(const char *aName, js::ProfileEntry::Category aCategory, uint32_t line) + { + push(aName, aCategory, nullptr, false, line); + } + + void push(const char *aName, js::ProfileEntry::Category aCategory, + void *aStackAddress, bool aCopy, uint32_t line) + { + if (size_t(mStackPointer) >= mozilla::ArrayLength(mStack)) { + mStackPointer++; + return; + } + + // In order to ensure this object is kept alive while it is + // active, we acquire a reference at the outermost push. This is + // released by the corresponding pop. + if (mStackPointer == 0) { + ref(); + } + + volatile StackEntry &entry = mStack[mStackPointer]; + + // Make sure we increment the pointer after the name has + // been written such that mStack is always consistent. + entry.initCppFrame(aStackAddress, line); + entry.setLabel(aName); + MOZ_ASSERT(entry.flags() == js::ProfileEntry::IS_CPP_ENTRY); + entry.setCategory(aCategory); + + // Track if mLabel needs a copy. + if (aCopy) + entry.setFlag(js::ProfileEntry::FRAME_LABEL_COPY); + else + entry.unsetFlag(js::ProfileEntry::FRAME_LABEL_COPY); + + // Prevent the optimizer from re-ordering these instructions + STORE_SEQUENCER(); + mStackPointer++; + } + + // Pop the stack. If the stack is empty and all other references to + // this PseudoStack have been dropped, then the PseudoStack is + // deleted and "false" is returned. Otherwise "true" is returned. + bool popAndMaybeDelete() + { + mStackPointer--; + if (mStackPointer == 0) { + // Release our self-owned reference count. See 'push'. + deref(); + return false; + } else { + return true; + } + } + bool isEmpty() + { + return mStackPointer == 0; + } + uint32_t stackSize() const + { + return sMin(mStackPointer, mozilla::sig_safe_t(mozilla::ArrayLength(mStack))); + } + + void sampleContext(JSContext* context) { +#ifndef SPS_STANDALONE + if (mContext && !context) { + // On JS shut down, flush the current buffer as stringifying JIT samples + // requires a live JSContext. + flushSamplerOnJSShutdown(); + } + + mContext = context; + + if (!context) { + return; + } + + static_assert(sizeof(mStack[0]) == sizeof(js::ProfileEntry), + "mStack must be binary compatible with js::ProfileEntry."); + js::SetContextProfilingStack(context, + (js::ProfileEntry*) mStack, + (uint32_t*) &mStackPointer, + (uint32_t) mozilla::ArrayLength(mStack)); + if (mStartJSSampling) + enableJSSampling(); +#endif + } +#ifndef SPS_STANDALONE + void enableJSSampling() { + if (mContext) { + js::EnableContextProfilingStack(mContext, true); + js::RegisterContextProfilingEventMarker(mContext, &ProfilerJSEventMarker); + mStartJSSampling = false; + } else { + mStartJSSampling = true; + } + } + void jsOperationCallback() { + if (mStartJSSampling) + enableJSSampling(); + } + void disableJSSampling() { + mStartJSSampling = false; + if (mContext) + js::EnableContextProfilingStack(mContext, false); + } +#endif + + // Keep a list of active checkpoints + StackEntry volatile mStack[1024]; + private: + + // A PseudoStack can only be created via the "create" method. + PseudoStack() + : mStackPointer(0) + , mSleepId(0) + , mSleepIdObserved(0) + , mSleeping(false) + , mRefCnt(1) +#ifndef SPS_STANDALONE + , mContext(nullptr) +#endif + , mStartJSSampling(false) + , mPrivacyMode(false) + { + MOZ_COUNT_CTOR(PseudoStack); + } + + // A PseudoStack can only be deleted via deref. + ~PseudoStack() { + MOZ_COUNT_DTOR(PseudoStack); + if (mStackPointer != 0) { + // We're releasing the pseudostack while it's still in use. + // The label macros keep a non ref counted reference to the + // stack to avoid a TLS. If these are not all cleared we will + // get a use-after-free so better to crash now. + abort(); + } + } + + // No copying. + PseudoStack(const PseudoStack&) = delete; + void operator=(const PseudoStack&) = delete; + + void flushSamplerOnJSShutdown(); + + // Keep a list of pending markers that must be moved + // to the circular buffer + ProfilerSignalSafeLinkedList<ProfilerMarker> mPendingMarkers; + // This may exceed the length of mStack, so instead use the stackSize() method + // to determine the number of valid samples in mStack + mozilla::sig_safe_t mStackPointer; + // Incremented at every sleep/wake up of the thread + int mSleepId; + // Previous id observed. If this is not the same as mSleepId, this thread is not sleeping in the same place any more + mozilla::Atomic<int> mSleepIdObserved; + // Keeps tack of whether the thread is sleeping or not (1 when sleeping 0 when awake) + mozilla::Atomic<int> mSleeping; + // This class is reference counted because it must be kept alive by + // the ThreadInfo, by the reference from tlsPseudoStack, and by the + // current thread when callbacks are in progress. + mozilla::Atomic<int> mRefCnt; + + public: +#ifndef SPS_STANDALONE + // The context which is being sampled + JSContext *mContext; +#endif + // Start JS Profiling when possible + bool mStartJSSampling; + bool mPrivacyMode; + + enum SleepState {NOT_SLEEPING, SLEEPING_FIRST, SLEEPING_AGAIN}; + + // The first time this is called per sleep cycle we return SLEEPING_FIRST + // and any other subsequent call within the same sleep cycle we return SLEEPING_AGAIN + SleepState observeSleeping() { + if (mSleeping != 0) { + if (mSleepIdObserved == mSleepId) { + return SLEEPING_AGAIN; + } else { + mSleepIdObserved = mSleepId; + return SLEEPING_FIRST; + } + } else { + return NOT_SLEEPING; + } + } + + + // Call this whenever the current thread sleeps or wakes up + // Calling setSleeping with the same value twice in a row is an error + void setSleeping(int sleeping) { + MOZ_ASSERT(mSleeping != sleeping); + mSleepId++; + mSleeping = sleeping; + } + + bool isSleeping() { + return !!mSleeping; + } + + void ref() { + ++mRefCnt; + } + + void deref() { + int newValue = --mRefCnt; + if (newValue == 0) { + delete this; + } + } +}; + +#endif diff --git a/tools/profiler/public/shared-libraries.h b/tools/profiler/public/shared-libraries.h new file mode 100644 index 000000000..b02a1fb08 --- /dev/null +++ b/tools/profiler/public/shared-libraries.h @@ -0,0 +1,137 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef SHARED_LIBRARIES_H_ +#define SHARED_LIBRARIES_H_ + +#ifndef MOZ_ENABLE_PROFILER_SPS +#error This header does not have a useful implementation on your platform! +#endif + +#include <algorithm> +#include <vector> +#include <string> +#include <stdlib.h> +#include <stdint.h> +#ifndef SPS_STANDALONE +#include <nsID.h> +#endif + +class SharedLibrary { +public: + + SharedLibrary(uintptr_t aStart, + uintptr_t aEnd, + uintptr_t aOffset, + const std::string& aBreakpadId, + const std::string& aName) + : mStart(aStart) + , mEnd(aEnd) + , mOffset(aOffset) + , mBreakpadId(aBreakpadId) + , mName(aName) + {} + + SharedLibrary(const SharedLibrary& aEntry) + : mStart(aEntry.mStart) + , mEnd(aEntry.mEnd) + , mOffset(aEntry.mOffset) + , mBreakpadId(aEntry.mBreakpadId) + , mName(aEntry.mName) + {} + + SharedLibrary& operator=(const SharedLibrary& aEntry) + { + // Gracefully handle self assignment + if (this == &aEntry) return *this; + + mStart = aEntry.mStart; + mEnd = aEntry.mEnd; + mOffset = aEntry.mOffset; + mBreakpadId = aEntry.mBreakpadId; + mName = aEntry.mName; + return *this; + } + + bool operator==(const SharedLibrary& other) const + { + return (mStart == other.mStart) && + (mEnd == other.mEnd) && + (mOffset == other.mOffset) && + (mName == other.mName) && + (mBreakpadId == other.mBreakpadId); + } + + uintptr_t GetStart() const { return mStart; } + uintptr_t GetEnd() const { return mEnd; } + uintptr_t GetOffset() const { return mOffset; } + const std::string &GetBreakpadId() const { return mBreakpadId; } + const std::string &GetName() const { return mName; } + +private: + SharedLibrary() {} + + uintptr_t mStart; + uintptr_t mEnd; + uintptr_t mOffset; + std::string mBreakpadId; + std::string mName; +}; + +static bool +CompareAddresses(const SharedLibrary& first, const SharedLibrary& second) +{ + return first.GetStart() < second.GetStart(); +} + +class SharedLibraryInfo { +public: + static SharedLibraryInfo GetInfoForSelf(); + SharedLibraryInfo() {} + + void AddSharedLibrary(SharedLibrary entry) + { + mEntries.push_back(entry); + } + + const SharedLibrary& GetEntry(size_t i) const + { + return mEntries[i]; + } + + // Removes items in the range [first, last) + // i.e. element at the "last" index is not removed + void RemoveEntries(size_t first, size_t last) + { + mEntries.erase(mEntries.begin() + first, mEntries.begin() + last); + } + + bool Contains(const SharedLibrary& searchItem) const + { + return (mEntries.end() != + std::find(mEntries.begin(), mEntries.end(), searchItem)); + } + + size_t GetSize() const + { + return mEntries.size(); + } + + void SortByAddress() + { + std::sort(mEntries.begin(), mEntries.end(), CompareAddresses); + } + + void Clear() + { + mEntries.clear(); + } + +private: + std::vector<SharedLibrary> mEntries; +}; + +#endif diff --git a/tools/profiler/tasktracer/GeckoTaskTracer.cpp b/tools/profiler/tasktracer/GeckoTaskTracer.cpp new file mode 100644 index 000000000..ada695614 --- /dev/null +++ b/tools/profiler/tasktracer/GeckoTaskTracer.cpp @@ -0,0 +1,472 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "GeckoTaskTracer.h" +#include "GeckoTaskTracerImpl.h" + +#include "mozilla/MathAlgorithms.h" +#include "mozilla/StaticMutex.h" +#include "mozilla/ThreadLocal.h" +#include "mozilla/TimeStamp.h" +#include "mozilla/UniquePtr.h" +#include "mozilla/Unused.h" + +#include "nsString.h" +#include "nsThreadUtils.h" +#include "prtime.h" + +#include <stdarg.h> + +// We need a definition of gettid(), but glibc doesn't provide a +// wrapper for it. +#if defined(__GLIBC__) +#include <unistd.h> +#include <sys/syscall.h> +static inline pid_t gettid() +{ + return (pid_t) syscall(SYS_gettid); +} +#elif defined(XP_MACOSX) +#include <unistd.h> +#include <sys/syscall.h> +static inline pid_t gettid() +{ + return (pid_t) syscall(SYS_thread_selfid); +} +#elif defined(LINUX) +#include <sys/types.h> +pid_t gettid(); +#endif + +// NS_ENSURE_TRUE_VOID() without the warning on the debug build. +#define ENSURE_TRUE_VOID(x) \ + do { \ + if (MOZ_UNLIKELY(!(x))) { \ + return; \ + } \ + } while(0) + +// NS_ENSURE_TRUE() without the warning on the debug build. +#define ENSURE_TRUE(x, ret) \ + do { \ + if (MOZ_UNLIKELY(!(x))) { \ + return ret; \ + } \ + } while(0) + +namespace mozilla { +namespace tasktracer { + +static MOZ_THREAD_LOCAL(TraceInfo*) sTraceInfoTLS; +static mozilla::StaticMutex sMutex; + +// The generation of TraceInfo. It will be > 0 if the Task Tracer is started and +// <= 0 if stopped. +static mozilla::Atomic<bool> sStarted; +static nsTArray<UniquePtr<TraceInfo>>* sTraceInfos = nullptr; +static PRTime sStartTime; + +static const char sJSLabelPrefix[] = "#tt#"; + +namespace { + +static PRTime +GetTimestamp() +{ + return PR_Now() / 1000; +} + +static TraceInfo* +AllocTraceInfo(int aTid) +{ + StaticMutexAutoLock lock(sMutex); + + auto* info = sTraceInfos->AppendElement(MakeUnique<TraceInfo>(aTid)); + + return info->get(); +} + +static void +SaveCurTraceInfo() +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + info->mSavedCurTraceSourceId = info->mCurTraceSourceId; + info->mSavedCurTraceSourceType = info->mCurTraceSourceType; + info->mSavedCurTaskId = info->mCurTaskId; +} + +static void +RestoreCurTraceInfo() +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + info->mCurTraceSourceId = info->mSavedCurTraceSourceId; + info->mCurTraceSourceType = info->mSavedCurTraceSourceType; + info->mCurTaskId = info->mSavedCurTaskId; +} + +static void +CreateSourceEvent(SourceEventType aType) +{ + // Save the currently traced source event info. + SaveCurTraceInfo(); + + // Create a new unique task id. + uint64_t newId = GenNewUniqueTaskId(); + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + info->mCurTraceSourceId = newId; + info->mCurTraceSourceType = aType; + info->mCurTaskId = newId; + + uintptr_t* namePtr; +#define SOURCE_EVENT_NAME(type) \ + case SourceEventType::type: \ + { \ + static int CreateSourceEvent##type; \ + namePtr = (uintptr_t*)&CreateSourceEvent##type; \ + break; \ + } + + switch (aType) { +#include "SourceEventTypeMap.h" + default: + MOZ_CRASH("Unknown SourceEvent."); + } +#undef CREATE_SOURCE_EVENT_NAME + + // Log a fake dispatch and start for this source event. + LogDispatch(newId, newId, newId, aType); + LogVirtualTablePtr(newId, newId, namePtr); + LogBegin(newId, newId); +} + +static void +DestroySourceEvent() +{ + // Log a fake end for this source event. + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + LogEnd(info->mCurTraceSourceId, info->mCurTraceSourceId); + + // Restore the previously saved source event info. + RestoreCurTraceInfo(); +} + +inline static bool +IsStartLogging() +{ + return sStarted; +} + +static void +SetLogStarted(bool aIsStartLogging) +{ + MOZ_ASSERT(aIsStartLogging != IsStartLogging()); + sStarted = aIsStartLogging; + + StaticMutexAutoLock lock(sMutex); + if (!aIsStartLogging) { + for (uint32_t i = 0; i < sTraceInfos->Length(); ++i) { + (*sTraceInfos)[i]->mObsolete = true; + } + } +} + +static void +CleanUp() +{ + SetLogStarted(false); + StaticMutexAutoLock lock(sMutex); + + if (sTraceInfos) { + delete sTraceInfos; + sTraceInfos = nullptr; + } +} + +inline static void +ObsoleteCurrentTraceInfos() +{ + // Note that we can't and don't need to acquire sMutex here because this + // function is called before the other threads are recreated. + for (uint32_t i = 0; i < sTraceInfos->Length(); ++i) { + (*sTraceInfos)[i]->mObsolete = true; + } +} + +} // namespace anonymous + +nsCString* +TraceInfo::AppendLog() +{ + MutexAutoLock lock(mLogsMutex); + return mLogs.AppendElement(); +} + +void +TraceInfo::MoveLogsInto(TraceInfoLogsType& aResult) +{ + MutexAutoLock lock(mLogsMutex); + aResult.AppendElements(Move(mLogs)); +} + +void +InitTaskTracer(uint32_t aFlags) +{ + if (aFlags & FORKED_AFTER_NUWA) { + ObsoleteCurrentTraceInfos(); + return; + } + + MOZ_ASSERT(!sTraceInfos); + sTraceInfos = new nsTArray<UniquePtr<TraceInfo>>(); + + if (!sTraceInfoTLS.initialized()) { + Unused << sTraceInfoTLS.init(); + } +} + +void +ShutdownTaskTracer() +{ + CleanUp(); +} + +static void +FreeTraceInfo(TraceInfo* aTraceInfo) +{ + StaticMutexAutoLock lock(sMutex); + if (aTraceInfo) { + sTraceInfos->RemoveElement(aTraceInfo); + } +} + +void FreeTraceInfo() +{ + FreeTraceInfo(sTraceInfoTLS.get()); +} + +TraceInfo* +GetOrCreateTraceInfo() +{ + ENSURE_TRUE(sTraceInfoTLS.initialized(), nullptr); + ENSURE_TRUE(IsStartLogging(), nullptr); + + TraceInfo* info = sTraceInfoTLS.get(); + if (info && info->mObsolete) { + // TraceInfo is obsolete: remove it. + FreeTraceInfo(info); + info = nullptr; + } + + if (!info) { + info = AllocTraceInfo(gettid()); + sTraceInfoTLS.set(info); + } + + return info; +} + +uint64_t +GenNewUniqueTaskId() +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE(info, 0); + + pid_t tid = gettid(); + uint64_t taskid = ((uint64_t)tid << 32) | ++info->mLastUniqueTaskId; + return taskid; +} + +AutoSaveCurTraceInfo::AutoSaveCurTraceInfo() +{ + SaveCurTraceInfo(); +} + +AutoSaveCurTraceInfo::~AutoSaveCurTraceInfo() +{ + RestoreCurTraceInfo(); +} + +void +SetCurTraceInfo(uint64_t aSourceEventId, uint64_t aParentTaskId, + SourceEventType aSourceEventType) +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + info->mCurTraceSourceId = aSourceEventId; + info->mCurTaskId = aParentTaskId; + info->mCurTraceSourceType = aSourceEventType; +} + +void +GetCurTraceInfo(uint64_t* aOutSourceEventId, uint64_t* aOutParentTaskId, + SourceEventType* aOutSourceEventType) +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + *aOutSourceEventId = info->mCurTraceSourceId; + *aOutParentTaskId = info->mCurTaskId; + *aOutSourceEventType = info->mCurTraceSourceType; +} + +void +LogDispatch(uint64_t aTaskId, uint64_t aParentTaskId, uint64_t aSourceEventId, + SourceEventType aSourceEventType) +{ + LogDispatch(aTaskId, aParentTaskId, aSourceEventId, aSourceEventType, 0); +} + +void +LogDispatch(uint64_t aTaskId, uint64_t aParentTaskId, uint64_t aSourceEventId, + SourceEventType aSourceEventType, int aDelayTimeMs) +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + // aDelayTimeMs is the expected delay time in milliseconds, thus the dispatch + // time calculated of it might be slightly off in the real world. + uint64_t time = (aDelayTimeMs <= 0) ? GetTimestamp() : + GetTimestamp() + aDelayTimeMs; + + // Log format: + // [0 taskId dispatchTime sourceEventId sourceEventType parentTaskId] + nsCString* log = info->AppendLog(); + if (log) { + log->AppendPrintf("%d %lld %lld %lld %d %lld", + ACTION_DISPATCH, aTaskId, time, aSourceEventId, + aSourceEventType, aParentTaskId); + } +} + +void +LogBegin(uint64_t aTaskId, uint64_t aSourceEventId) +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + // Log format: + // [1 taskId beginTime processId threadId] + nsCString* log = info->AppendLog(); + if (log) { + log->AppendPrintf("%d %lld %lld %d %d", + ACTION_BEGIN, aTaskId, GetTimestamp(), getpid(), gettid()); + } +} + +void +LogEnd(uint64_t aTaskId, uint64_t aSourceEventId) +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + // Log format: + // [2 taskId endTime] + nsCString* log = info->AppendLog(); + if (log) { + log->AppendPrintf("%d %lld %lld", ACTION_END, aTaskId, GetTimestamp()); + } +} + +void +LogVirtualTablePtr(uint64_t aTaskId, uint64_t aSourceEventId, uintptr_t* aVptr) +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + // Log format: + // [4 taskId address] + nsCString* log = info->AppendLog(); + if (log) { + log->AppendPrintf("%d %lld %p", ACTION_GET_VTABLE, aTaskId, aVptr); + } +} + +AutoSourceEvent::AutoSourceEvent(SourceEventType aType) +{ + CreateSourceEvent(aType); +} + +AutoSourceEvent::~AutoSourceEvent() +{ + DestroySourceEvent(); +} + +void AddLabel(const char* aFormat, ...) +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + va_list args; + va_start(args, aFormat); + nsAutoCString buffer; + buffer.AppendPrintf(aFormat, args); + va_end(args); + + // Log format: + // [3 taskId "label"] + nsCString* log = info->AppendLog(); + if (log) { + log->AppendPrintf("%d %lld %lld \"%s\"", ACTION_ADD_LABEL, info->mCurTaskId, + GetTimestamp(), buffer.get()); + } +} + +// Functions used by GeckoProfiler. + +void +StartLogging() +{ + sStartTime = GetTimestamp(); + SetLogStarted(true); +} + +void +StopLogging() +{ + SetLogStarted(false); +} + +UniquePtr<TraceInfoLogsType> +GetLoggedData(TimeStamp aTimeStamp) +{ + auto result = MakeUnique<TraceInfoLogsType>(); + + // TODO: This is called from a signal handler. Use semaphore instead. + StaticMutexAutoLock lock(sMutex); + + for (uint32_t i = 0; i < sTraceInfos->Length(); ++i) { + (*sTraceInfos)[i]->MoveLogsInto(*result); + } + + return result; +} + +const PRTime +GetStartTime() +{ + return sStartTime; +} + +const char* +GetJSLabelPrefix() +{ + return sJSLabelPrefix; +} + +#undef ENSURE_TRUE_VOID +#undef ENSURE_TRUE + +} // namespace tasktracer +} // namespace mozilla diff --git a/tools/profiler/tasktracer/GeckoTaskTracer.h b/tools/profiler/tasktracer/GeckoTaskTracer.h new file mode 100644 index 000000000..9e36b3f0b --- /dev/null +++ b/tools/profiler/tasktracer/GeckoTaskTracer.h @@ -0,0 +1,92 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef GECKO_TASK_TRACER_H +#define GECKO_TASK_TRACER_H + +#include "mozilla/UniquePtr.h" +#include "nsCOMPtr.h" +#include "nsTArrayForwardDeclare.h" + +/** + * TaskTracer provides a way to trace the correlation between different tasks + * across threads and processes. Unlike sampling based profilers, TaskTracer can + * tell you where a task is dispatched from, what its original source was, how + * long it waited in the event queue, and how long it took to execute. + * + * Source Events are usually some kinds of I/O events we're interested in, such + * as touch events, timer events, network events, etc. When a source event is + * created, TaskTracer records the entire chain of Tasks and nsRunnables as they + * are dispatched to different threads and processes. It records latency, + * execution time, etc. for each Task and nsRunnable that chains back to the + * original source event. + */ + +class Task; +class nsIRunnable; +class nsCString; + +namespace mozilla { + +class TimeStamp; + +namespace tasktracer { + +enum { + FORKED_AFTER_NUWA = 1 << 0 +}; + +enum SourceEventType { + Unknown = 0, + Touch, + Mouse, + Key, + Bluetooth, + Unixsocket, + Wifi +}; + +class AutoSourceEvent +{ +public: + AutoSourceEvent(SourceEventType aType); + ~AutoSourceEvent(); +}; + +void InitTaskTracer(uint32_t aFlags = 0); +void ShutdownTaskTracer(); + +// Add a label to the currently running task, aFormat is the message to log, +// followed by corresponding parameters. +void AddLabel(const char* aFormat, ...); + +void StartLogging(); +void StopLogging(); +UniquePtr<nsTArray<nsCString>> GetLoggedData(TimeStamp aStartTime); + +// Returns the timestamp when Task Tracer is enabled in this process. +const PRTime GetStartTime(); + +/** + * Internal functions. + */ + +Task* CreateTracedTask(Task* aTask); + +already_AddRefed<nsIRunnable> +CreateTracedRunnable(already_AddRefed<nsIRunnable>&& aRunnable); + +// Free the TraceInfo allocated on a thread's TLS. Currently we are wrapping +// tasks running on nsThreads and base::thread, so FreeTraceInfo is called at +// where nsThread and base::thread release themselves. +void FreeTraceInfo(); + +const char* GetJSLabelPrefix(); + +} // namespace tasktracer +} // namespace mozilla. + +#endif diff --git a/tools/profiler/tasktracer/GeckoTaskTracerImpl.h b/tools/profiler/tasktracer/GeckoTaskTracerImpl.h new file mode 100644 index 000000000..5b748fb96 --- /dev/null +++ b/tools/profiler/tasktracer/GeckoTaskTracerImpl.h @@ -0,0 +1,102 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef GECKO_TASK_TRACER_IMPL_H +#define GECKO_TASK_TRACER_IMPL_H + +#include "GeckoTaskTracer.h" +#include "mozilla/Mutex.h" +#include "nsTArray.h" + +namespace mozilla { +namespace tasktracer { + +typedef nsTArray<nsCString> TraceInfoLogsType; + +struct TraceInfo +{ + TraceInfo(uint32_t aThreadId) + : mCurTraceSourceId(0) + , mCurTaskId(0) + , mSavedCurTraceSourceId(0) + , mSavedCurTaskId(0) + , mCurTraceSourceType(Unknown) + , mSavedCurTraceSourceType(Unknown) + , mThreadId(aThreadId) + , mLastUniqueTaskId(0) + , mObsolete(false) + , mLogsMutex("TraceInfoMutex") + { + MOZ_COUNT_CTOR(TraceInfo); + } + + ~TraceInfo() { MOZ_COUNT_DTOR(TraceInfo); } + + nsCString* AppendLog(); + void MoveLogsInto(TraceInfoLogsType& aResult); + + uint64_t mCurTraceSourceId; + uint64_t mCurTaskId; + uint64_t mSavedCurTraceSourceId; + uint64_t mSavedCurTaskId; + SourceEventType mCurTraceSourceType; + SourceEventType mSavedCurTraceSourceType; + uint32_t mThreadId; + uint32_t mLastUniqueTaskId; + mozilla::Atomic<bool> mObsolete; + + // This mutex protects the following log array because MoveLogsInto() might + // be called on another thread. + mozilla::Mutex mLogsMutex; + TraceInfoLogsType mLogs; +}; + +// Return the TraceInfo of current thread, allocate a new one if not exit. +TraceInfo* GetOrCreateTraceInfo(); + +uint64_t GenNewUniqueTaskId(); + +class AutoSaveCurTraceInfo +{ +public: + AutoSaveCurTraceInfo(); + ~AutoSaveCurTraceInfo(); +}; + +void SetCurTraceInfo(uint64_t aSourceEventId, uint64_t aParentTaskId, + SourceEventType aSourceEventType); + +void GetCurTraceInfo(uint64_t* aOutSourceEventId, uint64_t* aOutParentTaskId, + SourceEventType* aOutSourceEventType); + +/** + * Logging functions of different trace actions. + */ +enum ActionType { + ACTION_DISPATCH = 0, + ACTION_BEGIN, + ACTION_END, + ACTION_ADD_LABEL, + ACTION_GET_VTABLE +}; + +void LogDispatch(uint64_t aTaskId, uint64_t aParentTaskId, + uint64_t aSourceEventId, SourceEventType aSourceEventType); + +void LogDispatch(uint64_t aTaskId, uint64_t aParentTaskId, + uint64_t aSourceEventId, SourceEventType aSourceEventType, + int aDelayTimeMs); + +void LogBegin(uint64_t aTaskId, uint64_t aSourceEventId); + +void LogEnd(uint64_t aTaskId, uint64_t aSourceEventId); + +void LogVirtualTablePtr(uint64_t aTaskId, uint64_t aSourceEventId, uintptr_t* aVptr); + +} // namespace mozilla +} // namespace tasktracer + +#endif diff --git a/tools/profiler/tasktracer/SourceEventTypeMap.h b/tools/profiler/tasktracer/SourceEventTypeMap.h new file mode 100644 index 000000000..77dbc8330 --- /dev/null +++ b/tools/profiler/tasktracer/SourceEventTypeMap.h @@ -0,0 +1,11 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +SOURCE_EVENT_NAME(Unknown) +SOURCE_EVENT_NAME(Touch) +SOURCE_EVENT_NAME(Mouse) +SOURCE_EVENT_NAME(Key) +SOURCE_EVENT_NAME(Bluetooth) +SOURCE_EVENT_NAME(Unixsocket) +SOURCE_EVENT_NAME(Wifi) diff --git a/tools/profiler/tasktracer/TracedTaskCommon.cpp b/tools/profiler/tasktracer/TracedTaskCommon.cpp new file mode 100644 index 000000000..770eb202c --- /dev/null +++ b/tools/profiler/tasktracer/TracedTaskCommon.cpp @@ -0,0 +1,169 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "GeckoTaskTracerImpl.h" +#include "TracedTaskCommon.h" + +// NS_ENSURE_TRUE_VOID() without the warning on the debug build. +#define ENSURE_TRUE_VOID(x) \ + do { \ + if (MOZ_UNLIKELY(!(x))) { \ + return; \ + } \ + } while(0) + +namespace mozilla { +namespace tasktracer { + +TracedTaskCommon::TracedTaskCommon() + : mSourceEventType(SourceEventType::Unknown) + , mSourceEventId(0) + , mParentTaskId(0) + , mTaskId(0) + , mIsTraceInfoInit(false) +{ +} + +TracedTaskCommon::~TracedTaskCommon() +{ +} + +void +TracedTaskCommon::Init() +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + mTaskId = GenNewUniqueTaskId(); + mSourceEventId = info->mCurTraceSourceId; + mSourceEventType = info->mCurTraceSourceType; + mParentTaskId = info->mCurTaskId; + mIsTraceInfoInit = true; +} + +void +TracedTaskCommon::DispatchTask(int aDelayTimeMs) +{ + LogDispatch(mTaskId, mParentTaskId, mSourceEventId, mSourceEventType, + aDelayTimeMs); +} + +void +TracedTaskCommon::GetTLSTraceInfo() +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + mSourceEventType = info->mCurTraceSourceType; + mSourceEventId = info->mCurTraceSourceId; + mTaskId = info->mCurTaskId; + mIsTraceInfoInit = true; +} + +void +TracedTaskCommon::SetTLSTraceInfo() +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + if (mIsTraceInfoInit) { + info->mCurTraceSourceId = mSourceEventId; + info->mCurTraceSourceType = mSourceEventType; + info->mCurTaskId = mTaskId; + } +} + +void +TracedTaskCommon::ClearTLSTraceInfo() +{ + TraceInfo* info = GetOrCreateTraceInfo(); + ENSURE_TRUE_VOID(info); + + info->mCurTraceSourceId = 0; + info->mCurTraceSourceType = SourceEventType::Unknown; + info->mCurTaskId = 0; +} + +/** + * Implementation of class TracedRunnable. + */ +TracedRunnable::TracedRunnable(already_AddRefed<nsIRunnable>&& aOriginalObj) + : TracedTaskCommon() + , mOriginalObj(Move(aOriginalObj)) +{ + Init(); + LogVirtualTablePtr(mTaskId, mSourceEventId, reinterpret_cast<uintptr_t*>(mOriginalObj.get())); +} + +TracedRunnable::~TracedRunnable() +{ +} + +NS_IMETHODIMP +TracedRunnable::Run() +{ + SetTLSTraceInfo(); + LogBegin(mTaskId, mSourceEventId); + nsresult rv = mOriginalObj->Run(); + LogEnd(mTaskId, mSourceEventId); + ClearTLSTraceInfo(); + + return rv; +} + +/** + * Implementation of class TracedTask. + */ +TracedTask::TracedTask(Task* aOriginalObj) + : TracedTaskCommon() + , mOriginalObj(aOriginalObj) +{ + Init(); + LogVirtualTablePtr(mTaskId, mSourceEventId, reinterpret_cast<uintptr_t*>(aOriginalObj)); +} + +TracedTask::~TracedTask() +{ + if (mOriginalObj) { + delete mOriginalObj; + mOriginalObj = nullptr; + } +} + +void +TracedTask::Run() +{ + SetTLSTraceInfo(); + LogBegin(mTaskId, mSourceEventId); + mOriginalObj->Run(); + LogEnd(mTaskId, mSourceEventId); + ClearTLSTraceInfo(); +} + +/** + * CreateTracedRunnable() returns a TracedRunnable wrapping the original + * nsIRunnable object, aRunnable. + */ +already_AddRefed<nsIRunnable> +CreateTracedRunnable(already_AddRefed<nsIRunnable>&& aRunnable) +{ + nsCOMPtr<nsIRunnable> runnable = new TracedRunnable(Move(aRunnable)); + return runnable.forget(); +} + +/** + * CreateTracedTask() returns a TracedTask wrapping the original Task object, + * aTask. + */ +Task* +CreateTracedTask(Task* aTask) +{ + Task* task = new TracedTask(aTask); + return task; +} + +} // namespace tasktracer +} // namespace mozilla diff --git a/tools/profiler/tasktracer/TracedTaskCommon.h b/tools/profiler/tasktracer/TracedTaskCommon.h new file mode 100644 index 000000000..3594b8e9e --- /dev/null +++ b/tools/profiler/tasktracer/TracedTaskCommon.h @@ -0,0 +1,73 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef TRACED_TASK_COMMON_H +#define TRACED_TASK_COMMON_H + +#include "base/task.h" +#include "GeckoTaskTracer.h" +#include "nsCOMPtr.h" +#include "nsThreadUtils.h" + +namespace mozilla { +namespace tasktracer { + +class TracedTaskCommon +{ +public: + TracedTaskCommon(); + virtual ~TracedTaskCommon(); + + void DispatchTask(int aDelayTimeMs = 0); + + void SetTLSTraceInfo(); + void GetTLSTraceInfo(); + void ClearTLSTraceInfo(); + +protected: + void Init(); + + // TraceInfo of TLS will be set by the following parameters, including source + // event type, source event ID, parent task ID, and task ID of this traced + // task/runnable. + SourceEventType mSourceEventType; + uint64_t mSourceEventId; + uint64_t mParentTaskId; + uint64_t mTaskId; + bool mIsTraceInfoInit; +}; + +class TracedRunnable : public TracedTaskCommon + , public nsRunnable +{ +public: + NS_DECL_NSIRUNNABLE + + TracedRunnable(already_AddRefed<nsIRunnable>&& aOriginalObj); + +private: + virtual ~TracedRunnable(); + + nsCOMPtr<nsIRunnable> mOriginalObj; +}; + +class TracedTask : public TracedTaskCommon + , public Task +{ +public: + TracedTask(Task* aOriginalObj); + ~TracedTask(); + + virtual void Run(); + +private: + Task* mOriginalObj; +}; + +} // namespace tasktracer +} // namespace mozilla + +#endif diff --git a/tools/profiler/tests/gtest/LulTest.cpp b/tools/profiler/tests/gtest/LulTest.cpp new file mode 100644 index 000000000..8a165ab34 --- /dev/null +++ b/tools/profiler/tests/gtest/LulTest.cpp @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "mozilla/Atomics.h" +#include "LulMain.h" +#include "GeckoProfiler.h" // for TracingMetadata +#include "platform-linux-lul.h" // for read_procmaps + +// Set this to 0 to make LUL be completely silent during tests. +// Set it to 1 to get logging output from LUL, presumably for +// the purpose of debugging it. +#define DEBUG_LUL_TEST 0 + +// LUL needs a callback for its logging sink. +static void +gtest_logging_sink_for_LulIntegration(const char* str) { + if (DEBUG_LUL_TEST == 0) { + return; + } + // Ignore any trailing \n, since LOG will add one anyway. + size_t n = strlen(str); + if (n > 0 && str[n-1] == '\n') { + char* tmp = strdup(str); + tmp[n-1] = 0; + fprintf(stderr, "LUL-in-gtest: %s\n", tmp); + free(tmp); + } else { + fprintf(stderr, "LUL-in-gtest: %s\n", str); + } +} + +TEST(LulIntegration, unwind_consistency) { + // Set up LUL and get it to read unwind info for libxul.so, which is + // all we care about here, plus (incidentally) practically every + // other object in the process too. + lul::LUL* lul = new lul::LUL(gtest_logging_sink_for_LulIntegration); + read_procmaps(lul); + + // Run unwind tests and receive information about how many there + // were and how many were successful. + lul->EnableUnwinding(); + int nTests = 0, nTestsPassed = 0; + RunLulUnitTests(&nTests, &nTestsPassed, lul); + EXPECT_TRUE(nTests == 6) << "Unexpected number of tests"; + EXPECT_TRUE(nTestsPassed == nTests) << "Not all tests passed"; + + delete lul; +} diff --git a/tools/profiler/tests/gtest/LulTestDwarf.cpp b/tools/profiler/tests/gtest/LulTestDwarf.cpp new file mode 100644 index 000000000..5cfd71fd4 --- /dev/null +++ b/tools/profiler/tests/gtest/LulTestDwarf.cpp @@ -0,0 +1,2597 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "gmock/gmock.h" +#include "LulCommonExt.h" +#include "LulDwarfExt.h" +#include "LulDwarfInt.h" +#include "LulTestInfrastructure.h" + +using testing::Test; +using testing::Return; +using testing::Sequence; +using testing::InSequence; +using testing::_; +using lul_test::CFISection; +using lul_test::test_assembler::kBigEndian; +using lul_test::test_assembler::kLittleEndian; +using lul_test::test_assembler::Label; + +#define PERHAPS_WRITE_DEBUG_FRAME_FILE(name, section) /**/ +#define PERHAPS_WRITE_EH_FRAME_FILE(name, section) /**/ + +// Set this to 0 to make LUL be completely silent during tests. +// Set it to 1 to get logging output from LUL, presumably for +// the purpose of debugging it. +#define DEBUG_LUL_TEST_DWARF 0 + +// LUL needs a callback for its logging sink. +static void +gtest_logging_sink_for_LulTestDwarf(const char* str) { + if (DEBUG_LUL_TEST_DWARF == 0) { + return; + } + // Ignore any trailing \n, since LOG will add one anyway. + size_t n = strlen(str); + if (n > 0 && str[n-1] == '\n') { + char* tmp = strdup(str); + tmp[n-1] = 0; + fprintf(stderr, "LUL-in-gtest: %s\n", tmp); + free(tmp); + } else { + fprintf(stderr, "LUL-in-gtest: %s\n", str); + } +} + +namespace lul { + +class MockCallFrameInfoHandler : public CallFrameInfo::Handler { + public: + MOCK_METHOD6(Entry, bool(size_t offset, uint64 address, uint64 length, + uint8 version, const std::string &augmentation, + unsigned return_address)); + MOCK_METHOD2(UndefinedRule, bool(uint64 address, int reg)); + MOCK_METHOD2(SameValueRule, bool(uint64 address, int reg)); + MOCK_METHOD4(OffsetRule, bool(uint64 address, int reg, int base_register, + long offset)); + MOCK_METHOD4(ValOffsetRule, bool(uint64 address, int reg, int base_register, + long offset)); + MOCK_METHOD3(RegisterRule, bool(uint64 address, int reg, int base_register)); + MOCK_METHOD3(ExpressionRule, bool(uint64 address, int reg, + const std::string &expression)); + MOCK_METHOD3(ValExpressionRule, bool(uint64 address, int reg, + const std::string &expression)); + MOCK_METHOD0(End, bool()); + MOCK_METHOD2(PersonalityRoutine, bool(uint64 address, bool indirect)); + MOCK_METHOD2(LanguageSpecificDataArea, bool(uint64 address, bool indirect)); + MOCK_METHOD0(SignalHandler, bool()); +}; + +class MockCallFrameErrorReporter : public CallFrameInfo::Reporter { + public: + MockCallFrameErrorReporter() + : Reporter(gtest_logging_sink_for_LulTestDwarf, + "mock filename", "mock section") + { } + MOCK_METHOD2(Incomplete, void(uint64, CallFrameInfo::EntryKind)); + MOCK_METHOD1(EarlyEHTerminator, void(uint64)); + MOCK_METHOD2(CIEPointerOutOfRange, void(uint64, uint64)); + MOCK_METHOD2(BadCIEId, void(uint64, uint64)); + MOCK_METHOD2(UnrecognizedVersion, void(uint64, int version)); + MOCK_METHOD2(UnrecognizedAugmentation, void(uint64, const string &)); + MOCK_METHOD2(InvalidPointerEncoding, void(uint64, uint8)); + MOCK_METHOD2(UnusablePointerEncoding, void(uint64, uint8)); + MOCK_METHOD2(RestoreInCIE, void(uint64, uint64)); + MOCK_METHOD3(BadInstruction, void(uint64, CallFrameInfo::EntryKind, uint64)); + MOCK_METHOD3(NoCFARule, void(uint64, CallFrameInfo::EntryKind, uint64)); + MOCK_METHOD3(EmptyStateStack, void(uint64, CallFrameInfo::EntryKind, uint64)); + MOCK_METHOD3(ClearingCFARule, void(uint64, CallFrameInfo::EntryKind, uint64)); +}; + +struct CFIFixture { + + enum { kCFARegister = CallFrameInfo::Handler::kCFARegister }; + + CFIFixture() { + // Default expectations for the data handler. + // + // - Leave Entry and End without expectations, as it's probably a + // good idea to set those explicitly in each test. + // + // - Expect the *Rule functions to not be called, + // so that each test can simply list the calls they expect. + // + // I gather I could use StrictMock for this, but the manual seems + // to suggest using that only as a last resort, and this isn't so + // bad. + EXPECT_CALL(handler, UndefinedRule(_, _)).Times(0); + EXPECT_CALL(handler, SameValueRule(_, _)).Times(0); + EXPECT_CALL(handler, OffsetRule(_, _, _, _)).Times(0); + EXPECT_CALL(handler, ValOffsetRule(_, _, _, _)).Times(0); + EXPECT_CALL(handler, RegisterRule(_, _, _)).Times(0); + EXPECT_CALL(handler, ExpressionRule(_, _, _)).Times(0); + EXPECT_CALL(handler, ValExpressionRule(_, _, _)).Times(0); + EXPECT_CALL(handler, PersonalityRoutine(_, _)).Times(0); + EXPECT_CALL(handler, LanguageSpecificDataArea(_, _)).Times(0); + EXPECT_CALL(handler, SignalHandler()).Times(0); + + // Default expectations for the error/warning reporer. + EXPECT_CALL(reporter, Incomplete(_, _)).Times(0); + EXPECT_CALL(reporter, EarlyEHTerminator(_)).Times(0); + EXPECT_CALL(reporter, CIEPointerOutOfRange(_, _)).Times(0); + EXPECT_CALL(reporter, BadCIEId(_, _)).Times(0); + EXPECT_CALL(reporter, UnrecognizedVersion(_, _)).Times(0); + EXPECT_CALL(reporter, UnrecognizedAugmentation(_, _)).Times(0); + EXPECT_CALL(reporter, InvalidPointerEncoding(_, _)).Times(0); + EXPECT_CALL(reporter, UnusablePointerEncoding(_, _)).Times(0); + EXPECT_CALL(reporter, RestoreInCIE(_, _)).Times(0); + EXPECT_CALL(reporter, BadInstruction(_, _, _)).Times(0); + EXPECT_CALL(reporter, NoCFARule(_, _, _)).Times(0); + EXPECT_CALL(reporter, EmptyStateStack(_, _, _)).Times(0); + EXPECT_CALL(reporter, ClearingCFARule(_, _, _)).Times(0); + } + + MockCallFrameInfoHandler handler; + MockCallFrameErrorReporter reporter; +}; + +class LulDwarfCFI: public CFIFixture, public Test { }; + +TEST_F(LulDwarfCFI, EmptyRegion) { + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + static const char data[1] = { 42 }; + + ByteReader reader(ENDIANNESS_BIG); + CallFrameInfo parser(data, 0, &reader, &handler, &reporter); + EXPECT_TRUE(parser.Start()); +} + +TEST_F(LulDwarfCFI, IncompleteLength32) { + CFISection section(kBigEndian, 8); + section + // Not even long enough for an initial length. + .D16(0xa0f) + // Padding to keep valgrind happy. We subtract these off when we + // construct the parser. + .D16(0); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown)) + .WillOnce(Return()); + + string contents; + ASSERT_TRUE(section.GetContents(&contents)); + + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(8); + CallFrameInfo parser(contents.data(), contents.size() - 2, + &reader, &handler, &reporter); + EXPECT_FALSE(parser.Start()); +} + +TEST_F(LulDwarfCFI, IncompleteLength64) { + CFISection section(kLittleEndian, 4); + section + // An incomplete 64-bit DWARF initial length. + .D32(0xffffffff).D32(0x71fbaec2) + // Padding to keep valgrind happy. We subtract these off when we + // construct the parser. + .D32(0); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown)) + .WillOnce(Return()); + + string contents; + ASSERT_TRUE(section.GetContents(&contents)); + + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size() - 4, + &reader, &handler, &reporter); + EXPECT_FALSE(parser.Start()); +} + +TEST_F(LulDwarfCFI, IncompleteId32) { + CFISection section(kBigEndian, 8); + section + .D32(3) // Initial length, not long enough for id + .D8(0xd7).D8(0xe5).D8(0xf1) // incomplete id + .CIEHeader(8727, 3983, 8889, 3, "") + .FinishEntry(); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + EXPECT_CALL(reporter, Incomplete(_, CallFrameInfo::kUnknown)) + .WillOnce(Return()); + + string contents; + ASSERT_TRUE(section.GetContents(&contents)); + + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(8); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_FALSE(parser.Start()); +} + +TEST_F(LulDwarfCFI, BadId32) { + CFISection section(kBigEndian, 8); + section + .D32(0x100) // Initial length + .D32(0xe802fade) // bogus ID + .Append(0x100 - 4, 0x42); // make the length true + section + .CIEHeader(1672, 9872, 8529, 3, "") + .FinishEntry(); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + EXPECT_CALL(reporter, CIEPointerOutOfRange(_, 0xe802fade)) + .WillOnce(Return()); + + string contents; + ASSERT_TRUE(section.GetContents(&contents)); + + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(8); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_FALSE(parser.Start()); +} + +// A lone CIE shouldn't cause any handler calls. +TEST_F(LulDwarfCFI, SingleCIE) { + CFISection section(kLittleEndian, 4); + section.CIEHeader(0xffe799a8, 0x3398dcdd, 0x6e9683de, 3, ""); + section.Append(10, lul::DW_CFA_nop); + section.FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("SingleCIE", section); + + EXPECT_CALL(handler, Entry(_, _, _, _, _, _)).Times(0); + EXPECT_CALL(handler, End()).Times(0); + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_TRUE(parser.Start()); +} + +// One FDE, one CIE. +TEST_F(LulDwarfCFI, OneFDE) { + CFISection section(kBigEndian, 4); + Label cie; + section + .Mark(&cie) + .CIEHeader(0x4be22f75, 0x2492236e, 0x6b6efb87, 3, "") + .FinishEntry() + .FDEHeader(cie, 0x7714740d, 0x3d5a10cd) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("OneFDE", section); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, 0x7714740d, 0x3d5a10cd, 3, "", 0x6b6efb87)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_TRUE(parser.Start()); +} + +// Two FDEs share a CIE. +TEST_F(LulDwarfCFI, TwoFDEsOneCIE) { + CFISection section(kBigEndian, 4); + Label cie; + section + // First FDE. readelf complains about this one because it makes + // a forward reference to its CIE. + .FDEHeader(cie, 0xa42744df, 0xa3b42121) + .FinishEntry() + // CIE. + .Mark(&cie) + .CIEHeader(0x04f7dc7b, 0x3d00c05f, 0xbd43cb59, 3, "") + .FinishEntry() + // Second FDE. + .FDEHeader(cie, 0x6057d391, 0x700f608d) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsOneCIE", section); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, 0xa42744df, 0xa3b42121, 3, "", 0xbd43cb59)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, 0x6057d391, 0x700f608d, 3, "", 0xbd43cb59)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_TRUE(parser.Start()); +} + +// Two FDEs, two CIEs. +TEST_F(LulDwarfCFI, TwoFDEsTwoCIEs) { + CFISection section(kLittleEndian, 8); + Label cie1, cie2; + section + // First CIE. + .Mark(&cie1) + .CIEHeader(0x694d5d45, 0x4233221b, 0xbf45e65a, 3, "") + .FinishEntry() + // First FDE which cites second CIE. readelf complains about + // this one because it makes a forward reference to its CIE. + .FDEHeader(cie2, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL) + .FinishEntry() + // Second FDE, which cites first CIE. + .FDEHeader(cie1, 0xf6054ca18b10bf5fULL, 0x45fdb970d8bca342ULL) + .FinishEntry() + // Second CIE. + .Mark(&cie2) + .CIEHeader(0xfba3fad7, 0x6287e1fd, 0x61d2c581, 2, "") + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("TwoFDEsTwoCIEs", section); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, 0x778b27dfe5871f05ULL, 0x324ace3448070926ULL, 2, + "", 0x61d2c581)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, 0xf6054ca18b10bf5fULL, 0x45fdb970d8bca342ULL, 3, + "", 0xbf45e65a)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(8); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_TRUE(parser.Start()); +} + +// An FDE whose CIE specifies a version we don't recognize. +TEST_F(LulDwarfCFI, BadVersion) { + CFISection section(kBigEndian, 4); + Label cie1, cie2; + section + .Mark(&cie1) + .CIEHeader(0xca878cf0, 0x7698ec04, 0x7b616f54, 0x52, "") + .FinishEntry() + // We should skip this entry, as its CIE specifies a version we + // don't recognize. + .FDEHeader(cie1, 0x08852292, 0x2204004a) + .FinishEntry() + // Despite the above, we should visit this entry. + .Mark(&cie2) + .CIEHeader(0x7c3ae7c9, 0xb9b9a512, 0x96cb3264, 3, "") + .FinishEntry() + .FDEHeader(cie2, 0x2094735a, 0x6e875501) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("BadVersion", section); + + EXPECT_CALL(reporter, UnrecognizedVersion(_, 0x52)) + .WillOnce(Return()); + + { + InSequence s; + // We should see no mention of the first FDE, but we should get + // a call to Entry for the second. + EXPECT_CALL(handler, Entry(_, 0x2094735a, 0x6e875501, 3, "", + 0x96cb3264)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_FALSE(parser.Start()); +} + +// An FDE whose CIE specifies an augmentation we don't recognize. +TEST_F(LulDwarfCFI, BadAugmentation) { + CFISection section(kBigEndian, 4); + Label cie1, cie2; + section + .Mark(&cie1) + .CIEHeader(0x4be22f75, 0x2492236e, 0x6b6efb87, 3, "spaniels!") + .FinishEntry() + // We should skip this entry, as its CIE specifies an + // augmentation we don't recognize. + .FDEHeader(cie1, 0x7714740d, 0x3d5a10cd) + .FinishEntry() + // Despite the above, we should visit this entry. + .Mark(&cie2) + .CIEHeader(0xf8bc4399, 0x8cf09931, 0xf2f519b2, 3, "") + .FinishEntry() + .FDEHeader(cie2, 0x7bf0fda0, 0xcbcd28d8) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("BadAugmentation", section); + + EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "spaniels!")) + .WillOnce(Return()); + + { + InSequence s; + // We should see no mention of the first FDE, but we should get + // a call to Entry for the second. + EXPECT_CALL(handler, Entry(_, 0x7bf0fda0, 0xcbcd28d8, 3, "", + 0xf2f519b2)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_FALSE(parser.Start()); +} + +// The return address column field is a byte in CFI version 1 +// (DWARF2), but a ULEB128 value in version 3 (DWARF3). +TEST_F(LulDwarfCFI, CIEVersion1ReturnColumn) { + CFISection section(kBigEndian, 4); + Label cie; + section + // CIE, using the version 1 format: return column is a ubyte. + .Mark(&cie) + // Use a value for the return column that is parsed differently + // as a ubyte and as a ULEB128. + .CIEHeader(0xbcdea24f, 0x5be28286, 0x9f, 1, "") + .FinishEntry() + // FDE, citing that CIE. + .FDEHeader(cie, 0xb8d347b5, 0x825e55dc) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion1ReturnColumn", section); + + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0xb8d347b5, 0x825e55dc, 1, "", 0x9f)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_TRUE(parser.Start()); +} + +// The return address column field is a byte in CFI version 1 +// (DWARF2), but a ULEB128 value in version 3 (DWARF3). +TEST_F(LulDwarfCFI, CIEVersion3ReturnColumn) { + CFISection section(kBigEndian, 4); + Label cie; + section + // CIE, using the version 3 format: return column is a ULEB128. + .Mark(&cie) + // Use a value for the return column that is parsed differently + // as a ubyte and as a ULEB128. + .CIEHeader(0x0ab4758d, 0xc010fdf7, 0x89, 3, "") + .FinishEntry() + // FDE, citing that CIE. + .FDEHeader(cie, 0x86763f2b, 0x2a66dc23) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("CIEVersion3ReturnColumn", section); + + { + InSequence s; + EXPECT_CALL(handler, Entry(_, 0x86763f2b, 0x2a66dc23, 3, "", 0x89)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + string contents; + EXPECT_TRUE(section.GetContents(&contents)); + ByteReader reader(ENDIANNESS_BIG); + reader.SetAddressSize(4); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter); + EXPECT_TRUE(parser.Start()); +} + +struct CFIInsnFixture: public CFIFixture { + CFIInsnFixture() : CFIFixture() { + data_factor = 0xb6f; + return_register = 0x9be1ed9f; + version = 3; + cfa_base_register = 0x383a3aa; + cfa_offset = 0xf748; + } + + // Prepare SECTION to receive FDE instructions. + // + // - Append a stock CIE header that establishes the fixture's + // code_factor, data_factor, return_register, version, and + // augmentation values. + // - Have the CIE set up a CFA rule using cfa_base_register and + // cfa_offset. + // - Append a stock FDE header, referring to the above CIE, for the + // fde_size bytes at fde_start. Choose fde_start and fde_size + // appropriately for the section's address size. + // - Set appropriate expectations on handler in sequence s for the + // frame description entry and the CIE's CFA rule. + // + // On return, SECTION is ready to have FDE instructions appended to + // it, and its FinishEntry member called. + void StockCIEAndFDE(CFISection *section) { + // Choose appropriate constants for our address size. + if (section->AddressSize() == 4) { + fde_start = 0xc628ecfbU; + fde_size = 0x5dee04a2; + code_factor = 0x60b; + } else { + assert(section->AddressSize() == 8); + fde_start = 0x0005c57ce7806bd3ULL; + fde_size = 0x2699521b5e333100ULL; + code_factor = 0x01008e32855274a8ULL; + } + + // Create the CIE. + (*section) + .Mark(&cie_label) + .CIEHeader(code_factor, data_factor, return_register, version, + "") + .D8(lul::DW_CFA_def_cfa) + .ULEB128(cfa_base_register) + .ULEB128(cfa_offset) + .FinishEntry(); + + // Create the FDE. + section->FDEHeader(cie_label, fde_start, fde_size); + + // Expect an Entry call for the FDE and a ValOffsetRule call for the + // CIE's CFA rule. + EXPECT_CALL(handler, Entry(_, fde_start, fde_size, version, "", + return_register)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start, kCFARegister, + cfa_base_register, cfa_offset)) + .InSequence(s) + .WillOnce(Return(true)); + } + + // Run the contents of SECTION through a CallFrameInfo parser, + // expecting parser.Start to return SUCCEEDS. Caller may optionally + // supply, via READER, its own ByteReader. If that's absent, a + // local one is used. + void ParseSection(CFISection *section, + bool succeeds = true, ByteReader* reader = nullptr) { + string contents; + EXPECT_TRUE(section->GetContents(&contents)); + lul::Endianness endianness; + if (section->endianness() == kBigEndian) + endianness = ENDIANNESS_BIG; + else { + assert(section->endianness() == kLittleEndian); + endianness = ENDIANNESS_LITTLE; + } + ByteReader local_reader(endianness); + ByteReader* reader_to_use = reader ? reader : &local_reader; + reader_to_use->SetAddressSize(section->AddressSize()); + CallFrameInfo parser(contents.data(), contents.size(), + reader_to_use, &handler, &reporter); + if (succeeds) + EXPECT_TRUE(parser.Start()); + else + EXPECT_FALSE(parser.Start()); + } + + Label cie_label; + Sequence s; + uint64 code_factor; + int data_factor; + unsigned return_register; + unsigned version; + unsigned cfa_base_register; + int cfa_offset; + uint64 fde_start, fde_size; +}; + +class LulDwarfCFIInsn: public CFIInsnFixture, public Test { }; + +TEST_F(LulDwarfCFIInsn, DW_CFA_set_loc) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_set_loc).D32(0xb1ee3e7a) + // Use DW_CFA_def_cfa to force a handler call that we can use to + // check the effect of the DW_CFA_set_loc. + .D8(lul::DW_CFA_def_cfa).ULEB128(0x4defb431).ULEB128(0x6d17b0ee) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_set_loc", section); + + EXPECT_CALL(handler, + ValOffsetRule(0xb1ee3e7a, kCFARegister, 0x4defb431, 0x6d17b0ee)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc) { + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_advance_loc | 0x2a) + // Use DW_CFA_def_cfa to force a handler call that we can use to + // check the effect of the DW_CFA_advance_loc. + .D8(lul::DW_CFA_def_cfa).ULEB128(0x5bbb3715).ULEB128(0x0186c7bf) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc", section); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start + 0x2a * code_factor, + kCFARegister, 0x5bbb3715, 0x0186c7bf)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc1) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_advance_loc1).D8(0xd8) + .D8(lul::DW_CFA_def_cfa).ULEB128(0x69d5696a).ULEB128(0x1eb7fc93) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc1", section); + + EXPECT_CALL(handler, + ValOffsetRule((fde_start + 0xd8 * code_factor), + kCFARegister, 0x69d5696a, 0x1eb7fc93)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc2) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_advance_loc2).D16(0x3adb) + .D8(lul::DW_CFA_def_cfa).ULEB128(0x3a368bed).ULEB128(0x3194ee37) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc2", section); + + EXPECT_CALL(handler, + ValOffsetRule((fde_start + 0x3adb * code_factor), + kCFARegister, 0x3a368bed, 0x3194ee37)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_advance_loc4) { + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_advance_loc4).D32(0x15813c88) + .D8(lul::DW_CFA_def_cfa).ULEB128(0x135270c5).ULEB128(0x24bad7cb) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc4", section); + + EXPECT_CALL(handler, + ValOffsetRule((fde_start + 0x15813c88ULL * code_factor), + kCFARegister, 0x135270c5, 0x24bad7cb)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_MIPS_advance_loc8) { + code_factor = 0x2d; + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_MIPS_advance_loc8).D64(0x3c4f3945b92c14ULL) + .D8(lul::DW_CFA_def_cfa).ULEB128(0xe17ed602).ULEB128(0x3d162e7f) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_advance_loc8", section); + + EXPECT_CALL(handler, + ValOffsetRule((fde_start + 0x3c4f3945b92c14ULL * code_factor), + kCFARegister, 0xe17ed602, 0x3d162e7f)) + .InSequence(s) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_def_cfa).ULEB128(0x4e363a85).ULEB128(0x815f9aa7) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("DW_CFA_def_cfa", section); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x4e363a85, 0x815f9aa7)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_sf) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_def_cfa_sf).ULEB128(0x8ccb32b7).LEB128(0x9ea) + .D8(lul::DW_CFA_def_cfa_sf).ULEB128(0x9b40f5da).LEB128(-0x40a2) + .FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x8ccb32b7, + 0x9ea * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x9b40f5da, + -0x40a2 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_register) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_def_cfa_register).ULEB128(0x3e7e9363) + .FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x3e7e9363, cfa_offset)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +// DW_CFA_def_cfa_register should have no effect when applied to a +// non-base/offset rule. +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_registerBadRule) { + ByteReader reader(ENDIANNESS_BIG); + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_def_cfa_expression).Block("needle in a haystack") + .D8(lul::DW_CFA_def_cfa_register).ULEB128(0xf1b49e49) + .FinishEntry(); + + EXPECT_CALL(handler, + ValExpressionRule(fde_start, kCFARegister, + "needle in a haystack")) + .WillRepeatedly(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offset) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_def_cfa_offset).ULEB128(0x1e8e3b9b) + .FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, cfa_base_register, + 0x1e8e3b9b)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offset_sf) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_def_cfa_offset_sf).LEB128(0x970) + .D8(lul::DW_CFA_def_cfa_offset_sf).LEB128(-0x2cd) + .FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, cfa_base_register, + 0x970 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, cfa_base_register, + -0x2cd * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +// DW_CFA_def_cfa_offset should have no effect when applied to a +// non-base/offset rule. +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_offsetBadRule) { + ByteReader reader(ENDIANNESS_BIG); + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_def_cfa_expression).Block("six ways to Sunday") + .D8(lul::DW_CFA_def_cfa_offset).ULEB128(0x1e8e3b9b) + .FinishEntry(); + + EXPECT_CALL(handler, + ValExpressionRule(fde_start, kCFARegister, + "six ways to Sunday")) + .WillRepeatedly(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + + +TEST_F(LulDwarfCFIInsn, DW_CFA_def_cfa_expression) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_def_cfa_expression).Block("eating crow") + .FinishEntry(); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, kCFARegister, + "eating crow")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_undefined) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_undefined).ULEB128(0x300ce45d) + .FinishEntry(); + + EXPECT_CALL(handler, UndefinedRule(fde_start, 0x300ce45d)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_same_value) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_same_value).ULEB128(0x3865a760) + .FinishEntry(); + + EXPECT_CALL(handler, SameValueRule(fde_start, 0x3865a760)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_offset) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_offset | 0x2c).ULEB128(0x9f6) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x2c, kCFARegister, 0x9f6 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_offset_extended) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_offset_extended).ULEB128(0x402b).ULEB128(0xb48) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, + 0x402b, kCFARegister, 0xb48 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_offset_extended_sf) { + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_offset_extended_sf) + .ULEB128(0x997c23ee).LEB128(0x2d00) + .D8(lul::DW_CFA_offset_extended_sf) + .ULEB128(0x9519eb82).LEB128(-0xa77) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x997c23ee, + kCFARegister, 0x2d00 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x9519eb82, + kCFARegister, -0xa77 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_val_offset) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_offset).ULEB128(0x623562fe).ULEB128(0x673) + .FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, 0x623562fe, + kCFARegister, 0x673 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_val_offset_sf) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_offset_sf).ULEB128(0x6f4f).LEB128(0xaab) + .D8(lul::DW_CFA_val_offset_sf).ULEB128(0x2483).LEB128(-0x8a2) + .FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, 0x6f4f, + kCFARegister, 0xaab * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, + ValOffsetRule(fde_start, 0x2483, + kCFARegister, -0x8a2 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_register) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_register).ULEB128(0x278d18f9).ULEB128(0x1a684414) + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0x278d18f9, 0x1a684414)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_expression) { + ByteReader reader(ENDIANNESS_BIG); + CFISection section(kBigEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_expression).ULEB128(0xa1619fb2) + .Block("plus ça change, plus c'est la même chose") + .FinishEntry(); + + EXPECT_CALL(handler, + ExpressionRule(fde_start, 0xa1619fb2, + "plus ça change, plus c'est la même chose")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_val_expression) { + ByteReader reader(ENDIANNESS_BIG); + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_expression).ULEB128(0xc5e4a9e3) + .Block("he who has the gold makes the rules") + .FinishEntry(); + + EXPECT_CALL(handler, + ValExpressionRule(fde_start, 0xc5e4a9e3, + "he who has the gold makes the rules")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_restore) { + CFISection section(kLittleEndian, 8); + code_factor = 0x01bd188a9b1fa083ULL; + data_factor = -0x1ac8; + return_register = 0x8c35b049; + version = 2; + fde_start = 0x2d70fe998298bbb1ULL; + fde_size = 0x46ccc2e63cf0b108ULL; + Label cie; + section + .Mark(&cie) + .CIEHeader(code_factor, data_factor, return_register, version, + "") + // Provide a CFA rule, because register rules require them. + .D8(lul::DW_CFA_def_cfa).ULEB128(0x6ca1d50e).ULEB128(0x372e38e8) + // Provide an offset(N) rule for register 0x3c. + .D8(lul::DW_CFA_offset | 0x3c).ULEB128(0xb348) + .FinishEntry() + // In the FDE... + .FDEHeader(cie, fde_start, fde_size) + // At a second address, provide a new offset(N) rule for register 0x3c. + .D8(lul::DW_CFA_advance_loc | 0x13) + .D8(lul::DW_CFA_offset | 0x3c).ULEB128(0x9a50) + // At a third address, restore the original rule for register 0x3c. + .D8(lul::DW_CFA_advance_loc | 0x01) + .D8(lul::DW_CFA_restore | 0x3c) + .FinishEntry(); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, fde_start, fde_size, version, "", return_register)) + .WillOnce(Return(true)); + // CIE's CFA rule. + EXPECT_CALL(handler, + ValOffsetRule(fde_start, + kCFARegister, 0x6ca1d50e, 0x372e38e8)) + .WillOnce(Return(true)); + // CIE's rule for register 0x3c. + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x3c, + kCFARegister, 0xb348 * data_factor)) + .WillOnce(Return(true)); + // FDE's rule for register 0x3c. + EXPECT_CALL(handler, + OffsetRule(fde_start + 0x13 * code_factor, 0x3c, + kCFARegister, 0x9a50 * data_factor)) + .WillOnce(Return(true)); + // Restore CIE's rule for register 0x3c. + EXPECT_CALL(handler, + OffsetRule(fde_start + (0x13 + 0x01) * code_factor, 0x3c, + kCFARegister, 0xb348 * data_factor)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_restoreNoRule) { + CFISection section(kBigEndian, 4); + code_factor = 0x005f78143c1c3b82ULL; + data_factor = 0x25d0; + return_register = 0xe8; + version = 1; + fde_start = 0x4062e30f; + fde_size = 0x5302a389; + Label cie; + section + .Mark(&cie) + .CIEHeader(code_factor, data_factor, return_register, version, "") + // Provide a CFA rule, because register rules require them. + .D8(lul::DW_CFA_def_cfa).ULEB128(0x470aa334).ULEB128(0x099ef127) + .FinishEntry() + // In the FDE... + .FDEHeader(cie, fde_start, fde_size) + // At a second address, provide an offset(N) rule for register 0x2c. + .D8(lul::DW_CFA_advance_loc | 0x7) + .D8(lul::DW_CFA_offset | 0x2c).ULEB128(0x1f47) + // At a third address, restore the (missing) CIE rule for register 0x2c. + .D8(lul::DW_CFA_advance_loc | 0xb) + .D8(lul::DW_CFA_restore | 0x2c) + .FinishEntry(); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, fde_start, fde_size, version, "", return_register)) + .WillOnce(Return(true)); + // CIE's CFA rule. + EXPECT_CALL(handler, + ValOffsetRule(fde_start, + kCFARegister, 0x470aa334, 0x099ef127)) + .WillOnce(Return(true)); + // FDE's rule for register 0x2c. + EXPECT_CALL(handler, + OffsetRule(fde_start + 0x7 * code_factor, 0x2c, + kCFARegister, 0x1f47 * data_factor)) + .WillOnce(Return(true)); + // Restore CIE's (missing) rule for register 0x2c. + EXPECT_CALL(handler, + SameValueRule(fde_start + (0x7 + 0xb) * code_factor, 0x2c)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_restore_extended) { + CFISection section(kBigEndian, 4); + code_factor = 0x126e; + data_factor = -0xd8b; + return_register = 0x77711787; + version = 3; + fde_start = 0x01f55a45; + fde_size = 0x452adb80; + Label cie; + section + .Mark(&cie) + .CIEHeader(code_factor, data_factor, return_register, version, + "", true /* dwarf64 */ ) + // Provide a CFA rule, because register rules require them. + .D8(lul::DW_CFA_def_cfa).ULEB128(0x56fa0edd).ULEB128(0x097f78a5) + // Provide an offset(N) rule for register 0x0f9b8a1c. + .D8(lul::DW_CFA_offset_extended) + .ULEB128(0x0f9b8a1c).ULEB128(0xc979) + .FinishEntry() + // In the FDE... + .FDEHeader(cie, fde_start, fde_size) + // At a second address, provide a new offset(N) rule for reg 0x0f9b8a1c. + .D8(lul::DW_CFA_advance_loc | 0x3) + .D8(lul::DW_CFA_offset_extended) + .ULEB128(0x0f9b8a1c).ULEB128(0x3b7b) + // At a third address, restore the original rule for register 0x0f9b8a1c. + .D8(lul::DW_CFA_advance_loc | 0x04) + .D8(lul::DW_CFA_restore_extended).ULEB128(0x0f9b8a1c) + .FinishEntry(); + + { + InSequence s; + EXPECT_CALL(handler, + Entry(_, fde_start, fde_size, version, "", return_register)) + .WillOnce(Return(true)); + // CIE's CFA rule. + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x56fa0edd, 0x097f78a5)) + .WillOnce(Return(true)); + // CIE's rule for register 0x0f9b8a1c. + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x0f9b8a1c, kCFARegister, + 0xc979 * data_factor)) + .WillOnce(Return(true)); + // FDE's rule for register 0x0f9b8a1c. + EXPECT_CALL(handler, + OffsetRule(fde_start + 0x3 * code_factor, 0x0f9b8a1c, + kCFARegister, 0x3b7b * data_factor)) + .WillOnce(Return(true)); + // Restore CIE's rule for register 0x0f9b8a1c. + EXPECT_CALL(handler, + OffsetRule(fde_start + (0x3 + 0x4) * code_factor, 0x0f9b8a1c, + kCFARegister, 0xc979 * data_factor)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + } + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_remember_and_restore_state) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + + // We create a state, save it, modify it, and then restore. We + // refer to the state that is overridden the restore as the + // "outgoing" state, and the restored state the "incoming" state. + // + // Register outgoing incoming expect + // 1 offset(N) no rule new "same value" rule + // 2 register(R) offset(N) report changed rule + // 3 offset(N) offset(M) report changed offset + // 4 offset(N) offset(N) no report + // 5 offset(N) no rule new "same value" rule + section + // Create the "incoming" state, which we will save and later restore. + .D8(lul::DW_CFA_offset | 2).ULEB128(0x9806) + .D8(lul::DW_CFA_offset | 3).ULEB128(0x995d) + .D8(lul::DW_CFA_offset | 4).ULEB128(0x7055) + .D8(lul::DW_CFA_remember_state) + // Advance to a new instruction; an implementation could legitimately + // ignore all but the final rule for a given register at a given address. + .D8(lul::DW_CFA_advance_loc | 1) + // Create the "outgoing" state, which we will discard. + .D8(lul::DW_CFA_offset | 1).ULEB128(0xea1a) + .D8(lul::DW_CFA_register).ULEB128(2).ULEB128(0x1d2a3767) + .D8(lul::DW_CFA_offset | 3).ULEB128(0xdd29) + .D8(lul::DW_CFA_offset | 5).ULEB128(0xf1ce) + // At a third address, restore the incoming state. + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + uint64 addr = fde_start; + + // Expect the incoming rules to be reported. + EXPECT_CALL(handler, OffsetRule(addr, 2, kCFARegister, 0x9806 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0x995d * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 4, kCFARegister, 0x7055 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + + addr += code_factor; + + // After the save, we establish the outgoing rule set. + EXPECT_CALL(handler, OffsetRule(addr, 1, kCFARegister, 0xea1a * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(addr, 2, 0x1d2a3767)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0xdd29 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 5, kCFARegister, 0xf1ce * data_factor)) + .InSequence(s).WillOnce(Return(true)); + + addr += code_factor; + + // Finally, after the restore, expect to see the differences from + // the outgoing to the incoming rules reported. + EXPECT_CALL(handler, SameValueRule(addr, 1)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 2, kCFARegister, 0x9806 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(addr, 3, kCFARegister, 0x995d * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, SameValueRule(addr, 5)) + .InSequence(s).WillOnce(Return(true)); + + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +// Check that restoring a rule set reports changes to the CFA rule. +TEST_F(LulDwarfCFIInsn, DW_CFA_remember_and_restore_stateCFA) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + + section + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_def_cfa_offset).ULEB128(0x90481102) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor, kCFARegister, + cfa_base_register, 0x90481102)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor * 2, kCFARegister, + cfa_base_register, cfa_offset)) + .InSequence(s).WillOnce(Return(true)); + + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_nop) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_nop) + .D8(lul::DW_CFA_def_cfa).ULEB128(0x3fb8d4f1).ULEB128(0x078dc67b) + .D8(lul::DW_CFA_nop) + .FinishEntry(); + + EXPECT_CALL(handler, + ValOffsetRule(fde_start, kCFARegister, 0x3fb8d4f1, 0x078dc67b)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_window_save) { + CFISection section(kBigEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_GNU_window_save) + .FinishEntry(); + + // Don't include all the rules in any particular sequence. + + // The caller's %o0-%o7 have become the callee's %i0-%i7. This is + // the GCC register numbering. + for (int i = 8; i < 16; i++) + EXPECT_CALL(handler, RegisterRule(fde_start, i, i + 16)) + .WillOnce(Return(true)); + // The caller's %l0-%l7 and %i0-%i7 have been saved at the top of + // its frame. + for (int i = 16; i < 32; i++) + EXPECT_CALL(handler, OffsetRule(fde_start, i, kCFARegister, (i-16) * 4)) + .WillOnce(Return(true)); + + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_args_size) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_GNU_args_size).ULEB128(0xeddfa520) + // Verify that we see this, meaning we parsed the above properly. + .D8(lul::DW_CFA_offset | 0x23).ULEB128(0x269) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x23, kCFARegister, 0x269 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIInsn, DW_CFA_GNU_negative_offset_extended) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_GNU_negative_offset_extended) + .ULEB128(0x430cc87a).ULEB128(0x613) + .FinishEntry(); + + EXPECT_CALL(handler, + OffsetRule(fde_start, 0x430cc87a, + kCFARegister, -0x613 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion); +} + +// Three FDEs: skip the second +TEST_F(LulDwarfCFIInsn, SkipFDE) { + CFISection section(kBigEndian, 4); + Label cie; + section + // CIE, used by all FDEs. + .Mark(&cie) + .CIEHeader(0x010269f2, 0x9177, 0xedca5849, 2, "") + .D8(lul::DW_CFA_def_cfa).ULEB128(0x42ed390b).ULEB128(0x98f43aad) + .FinishEntry() + // First FDE. + .FDEHeader(cie, 0xa870ebdd, 0x60f6aa4) + .D8(lul::DW_CFA_register).ULEB128(0x3a860351).ULEB128(0x6c9a6bcf) + .FinishEntry() + // Second FDE. + .FDEHeader(cie, 0xc534f7c0, 0xf6552e9, true /* dwarf64 */) + .D8(lul::DW_CFA_register).ULEB128(0x1b62c234).ULEB128(0x26586b18) + .FinishEntry() + // Third FDE. + .FDEHeader(cie, 0xf681cfc8, 0x7e4594e) + .D8(lul::DW_CFA_register).ULEB128(0x26c53934).ULEB128(0x18eeb8a4) + .FinishEntry(); + + { + InSequence s; + + // Process the first FDE. + EXPECT_CALL(handler, Entry(_, 0xa870ebdd, 0x60f6aa4, 2, "", 0xedca5849)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0xa870ebdd, kCFARegister, + 0x42ed390b, 0x98f43aad)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(0xa870ebdd, 0x3a860351, 0x6c9a6bcf)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .WillOnce(Return(true)); + + // Skip the second FDE. + EXPECT_CALL(handler, Entry(_, 0xc534f7c0, 0xf6552e9, 2, "", 0xedca5849)) + .WillOnce(Return(false)); + + // Process the third FDE. + EXPECT_CALL(handler, Entry(_, 0xf681cfc8, 0x7e4594e, 2, "", 0xedca5849)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0xf681cfc8, kCFARegister, + 0x42ed390b, 0x98f43aad)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(0xf681cfc8, 0x26c53934, 0x18eeb8a4)) + .WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .WillOnce(Return(true)); + } + + ParseSection(§ion); +} + +// Quit processing in the middle of an entry's instructions. +TEST_F(LulDwarfCFIInsn, QuitMidentry) { + CFISection section(kLittleEndian, 8); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_register).ULEB128(0xe0cf850d).ULEB128(0x15aab431) + .D8(lul::DW_CFA_expression).ULEB128(0x46750aa5).Block("meat") + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0xe0cf850d, 0x15aab431)) + .InSequence(s).WillOnce(Return(false)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseSection(§ion, false); +} + +class LulDwarfCFIRestore: public CFIInsnFixture, public Test { }; + +TEST_F(LulDwarfCFIRestore, RestoreUndefinedRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_undefined).ULEB128(0x0bac878e) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, UndefinedRule(fde_start, 0x0bac878e)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreUndefinedRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_undefined).ULEB128(0x7dedff5f) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_same_value).ULEB128(0x7dedff5f) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, UndefinedRule(fde_start, 0x7dedff5f)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, SameValueRule(fde_start + code_factor, 0x7dedff5f)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + 2 * code_factor, 0x7dedff5f)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreSameValueRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_same_value).ULEB128(0xadbc9b3a) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, SameValueRule(fde_start, 0xadbc9b3a)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreSameValueRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_same_value).ULEB128(0x3d90dcb5) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined).ULEB128(0x3d90dcb5) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, SameValueRule(fde_start, 0x3d90dcb5)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0x3d90dcb5)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, SameValueRule(fde_start + 2 * code_factor, 0x3d90dcb5)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_offset | 0x14).ULEB128(0xb6f) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, OffsetRule(fde_start, 0x14, + kCFARegister, 0xb6f * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_offset | 0x21).ULEB128(0xeb7) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined).ULEB128(0x21) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, OffsetRule(fde_start, 0x21, + kCFARegister, 0xeb7 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0x21)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(fde_start + 2 * code_factor, 0x21, + kCFARegister, 0xeb7 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreOffsetRuleChangedOffset) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_offset | 0x21).ULEB128(0x134) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_offset | 0x21).ULEB128(0xf4f) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, OffsetRule(fde_start, 0x21, + kCFARegister, 0x134 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(fde_start + code_factor, 0x21, + kCFARegister, 0xf4f * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, OffsetRule(fde_start + 2 * code_factor, 0x21, + kCFARegister, 0x134 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_offset).ULEB128(0x829caee6).ULEB128(0xe4c) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x829caee6, + kCFARegister, 0xe4c * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_offset).ULEB128(0xf17c36d6).ULEB128(0xeb7) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined).ULEB128(0xf17c36d6) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0xf17c36d6, + kCFARegister, 0xeb7 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xf17c36d6)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start + 2 * code_factor, 0xf17c36d6, + kCFARegister, 0xeb7 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreValOffsetRuleChangedValOffset) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_offset).ULEB128(0x2cf0ab1b).ULEB128(0x562) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_val_offset).ULEB128(0x2cf0ab1b).ULEB128(0xe88) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValOffsetRule(fde_start, 0x2cf0ab1b, + kCFARegister, 0x562 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start + code_factor, 0x2cf0ab1b, + kCFARegister, 0xe88 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(fde_start + 2 * code_factor, 0x2cf0ab1b, + kCFARegister, 0x562 * data_factor)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleUnchanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_register).ULEB128(0x77514acc).ULEB128(0x464de4ce) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0x77514acc, 0x464de4ce)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleChanged) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_register).ULEB128(0xe39acce5).ULEB128(0x095f1559) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined).ULEB128(0xe39acce5) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0xe39acce5, 0x095f1559)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xe39acce5)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(fde_start + 2 * code_factor, 0xe39acce5, + 0x095f1559)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreRegisterRuleChangedRegister) { + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_register).ULEB128(0xd40e21b1).ULEB128(0x16607d6a) + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_register).ULEB128(0xd40e21b1).ULEB128(0xbabb4742) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, RegisterRule(fde_start, 0xd40e21b1, 0x16607d6a)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(fde_start + code_factor, 0xd40e21b1, + 0xbabb4742)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, RegisterRule(fde_start + 2 * code_factor, 0xd40e21b1, + 0x16607d6a)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion); +} + +TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleUnchanged) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_expression).ULEB128(0x666ae152).Block("dwarf") + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ExpressionRule(fde_start, 0x666ae152, "dwarf")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleChanged) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_expression).ULEB128(0xb5ca5c46).Block("elf") + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined).ULEB128(0xb5ca5c46) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ExpressionRule(fde_start, 0xb5ca5c46, "elf")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ExpressionRule(fde_start + 2 * code_factor, 0xb5ca5c46, + "elf")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreExpressionRuleChangedExpression) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_expression).ULEB128(0x500f5739).Block("smurf") + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_expression).ULEB128(0x500f5739).Block("orc") + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ExpressionRule(fde_start, 0x500f5739, "smurf")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ExpressionRule(fde_start + code_factor, 0x500f5739, + "orc")) + .InSequence(s).WillOnce(Return(true)); + // Expectations are not wishes. + EXPECT_CALL(handler, ExpressionRule(fde_start + 2 * code_factor, 0x500f5739, + "smurf")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleUnchanged) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_expression).ULEB128(0x666ae152) + .Block("hideous") + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x666ae152, "hideous")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleChanged) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_expression).ULEB128(0xb5ca5c46) + .Block("revolting") + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_undefined).ULEB128(0xb5ca5c46) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChanged", section); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0xb5ca5c46, "revolting")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(fde_start + code_factor, 0xb5ca5c46)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValExpressionRule(fde_start + 2 * code_factor, 0xb5ca5c46, + "revolting")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +TEST_F(LulDwarfCFIRestore, RestoreValExpressionRuleChangedValExpression) { + ByteReader reader(ENDIANNESS_LITTLE); + CFISection section(kLittleEndian, 4); + StockCIEAndFDE(§ion); + section + .D8(lul::DW_CFA_val_expression).ULEB128(0x500f5739) + .Block("repulsive") + .D8(lul::DW_CFA_remember_state) + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_val_expression).ULEB128(0x500f5739) + .Block("nauseous") + .D8(lul::DW_CFA_advance_loc | 1) + .D8(lul::DW_CFA_restore_state) + .FinishEntry(); + + PERHAPS_WRITE_DEBUG_FRAME_FILE("RestoreValExpressionRuleChangedValExpression", + section); + + EXPECT_CALL(handler, ValExpressionRule(fde_start, 0x500f5739, "repulsive")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValExpressionRule(fde_start + code_factor, 0x500f5739, + "nauseous")) + .InSequence(s).WillOnce(Return(true)); + // Expectations are not wishes. + EXPECT_CALL(handler, ValExpressionRule(fde_start + 2 * code_factor, 0x500f5739, + "repulsive")) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()).WillOnce(Return(true)); + + ParseSection(§ion, true, &reader); +} + +struct EHFrameFixture: public CFIInsnFixture { + EHFrameFixture() + : CFIInsnFixture(), section(kBigEndian, 4, true) { + encoded_pointer_bases.cfi = 0x7f496cb2; + encoded_pointer_bases.text = 0x540f67b6; + encoded_pointer_bases.data = 0xe3eab768; + section.SetEncodedPointerBases(encoded_pointer_bases); + } + CFISection section; + CFISection::EncodedPointerBases encoded_pointer_bases; + + // Parse CFIInsnFixture::ParseSection, but parse the section as + // .eh_frame data, supplying stock base addresses. + void ParseEHFrameSection(CFISection *section, bool succeeds = true) { + EXPECT_TRUE(section->ContainsEHFrame()); + string contents; + EXPECT_TRUE(section->GetContents(&contents)); + lul::Endianness endianness; + if (section->endianness() == kBigEndian) + endianness = ENDIANNESS_BIG; + else { + assert(section->endianness() == kLittleEndian); + endianness = ENDIANNESS_LITTLE; + } + ByteReader reader(endianness); + reader.SetAddressSize(section->AddressSize()); + reader.SetCFIDataBase(encoded_pointer_bases.cfi, contents.data()); + reader.SetTextBase(encoded_pointer_bases.text); + reader.SetDataBase(encoded_pointer_bases.data); + CallFrameInfo parser(contents.data(), contents.size(), + &reader, &handler, &reporter, true); + if (succeeds) + EXPECT_TRUE(parser.Start()); + else + EXPECT_FALSE(parser.Start()); + } + +}; + +class LulDwarfEHFrame: public EHFrameFixture, public Test { }; + +// A simple CIE, an FDE, and a terminator. +TEST_F(LulDwarfEHFrame, Terminator) { + Label cie; + section + .Mark(&cie) + .CIEHeader(9968, 2466, 67, 1, "") + .D8(lul::DW_CFA_def_cfa).ULEB128(3772).ULEB128(1372) + .FinishEntry() + .FDEHeader(cie, 0x848037a1, 0x7b30475e) + .D8(lul::DW_CFA_set_loc).D32(0x17713850) + .D8(lul::DW_CFA_undefined).ULEB128(5721) + .FinishEntry() + .D32(0) // Terminate the sequence. + // This FDE should be ignored. + .FDEHeader(cie, 0xf19629fe, 0x439fb09b) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.Terminator", section); + + EXPECT_CALL(handler, Entry(_, 0x848037a1, 0x7b30475e, 1, "", 67)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0x848037a1, kCFARegister, 3772, 1372)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0x17713850, 5721)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(reporter, EarlyEHTerminator(_)) + .InSequence(s).WillOnce(Return()); + + ParseEHFrameSection(§ion); +} + +// The parser should recognize the Linux Standards Base 'z' augmentations. +TEST_F(LulDwarfEHFrame, SimpleFDE) { + lul::DwarfPointerEncoding lsda_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_indirect + | lul::DW_EH_PE_datarel + | lul::DW_EH_PE_sdata2); + lul::DwarfPointerEncoding fde_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_textrel + | lul::DW_EH_PE_udata2); + + section.SetPointerEncoding(fde_encoding); + section.SetEncodedPointerBases(encoded_pointer_bases); + Label cie; + section + .Mark(&cie) + .CIEHeader(4873, 7012, 100, 1, "zSLPR") + .ULEB128(7) // Augmentation data length + .D8(lsda_encoding) // LSDA pointer format + .D8(lul::DW_EH_PE_pcrel) // personality pointer format + .EncodedPointer(0x97baa00, lul::DW_EH_PE_pcrel) // and value + .D8(fde_encoding) // FDE pointer format + .D8(lul::DW_CFA_def_cfa).ULEB128(6706).ULEB128(31) + .FinishEntry() + .FDEHeader(cie, 0x540f6b56, 0xf686) + .ULEB128(2) // Augmentation data length + .EncodedPointer(0xe3eab475, lsda_encoding) // LSDA pointer, signed + .D8(lul::DW_CFA_set_loc) + .EncodedPointer(0x540fa4ce, fde_encoding) + .D8(lul::DW_CFA_undefined).ULEB128(0x675e) + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.SimpleFDE", section); + + EXPECT_CALL(handler, Entry(_, 0x540f6b56, 0xf686, 1, "zSLPR", 100)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, PersonalityRoutine(0x97baa00, false)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, LanguageSpecificDataArea(0xe3eab475, true)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, SignalHandler()) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0x540f6b56, kCFARegister, 6706, 31)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0x540fa4ce, 0x675e)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +// Check that we can handle an empty 'z' augmentation. +TEST_F(LulDwarfEHFrame, EmptyZ) { + Label cie; + section + .Mark(&cie) + .CIEHeader(5955, 5805, 228, 1, "z") + .ULEB128(0) // Augmentation data length + .D8(lul::DW_CFA_def_cfa).ULEB128(3629).ULEB128(247) + .FinishEntry() + .FDEHeader(cie, 0xda007738, 0xfb55c641) + .ULEB128(0) // Augmentation data length + .D8(lul::DW_CFA_advance_loc1).D8(11) + .D8(lul::DW_CFA_undefined).ULEB128(3769) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.EmptyZ", section); + + EXPECT_CALL(handler, Entry(_, 0xda007738, 0xfb55c641, 1, "z", 228)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, ValOffsetRule(0xda007738, kCFARegister, 3629, 247)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, UndefinedRule(0xda007738 + 11 * 5955, 3769)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +// Check that we recognize bad 'z' augmentation characters. +TEST_F(LulDwarfEHFrame, BadZ) { + Label cie; + section + .Mark(&cie) + .CIEHeader(6937, 1045, 142, 1, "zQ") + .ULEB128(0) // Augmentation data length + .D8(lul::DW_CFA_def_cfa).ULEB128(9006).ULEB128(7725) + .FinishEntry() + .FDEHeader(cie, 0x1293efa8, 0x236f53f2) + .ULEB128(0) // Augmentation data length + .D8(lul::DW_CFA_advance_loc | 12) + .D8(lul::DW_CFA_register).ULEB128(5667).ULEB128(3462) + .FinishEntry(); + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.BadZ", section); + + EXPECT_CALL(reporter, UnrecognizedAugmentation(_, "zQ")) + .WillOnce(Return()); + + ParseEHFrameSection(§ion, false); +} + +TEST_F(LulDwarfEHFrame, zL) { + Label cie; + lul::DwarfPointerEncoding lsda_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_funcrel | lul::DW_EH_PE_udata2); + section + .Mark(&cie) + .CIEHeader(9285, 9959, 54, 1, "zL") + .ULEB128(1) // Augmentation data length + .D8(lsda_encoding) // encoding for LSDA pointer in FDE + + .FinishEntry() + .FDEHeader(cie, 0xd40091aa, 0x9aa6e746) + .ULEB128(2) // Augmentation data length + .EncodedPointer(0xd40099cd, lsda_encoding) // LSDA pointer + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zL", section); + + EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zL", 54)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, LanguageSpecificDataArea(0xd40099cd, false)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(LulDwarfEHFrame, zP) { + Label cie; + lul::DwarfPointerEncoding personality_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_datarel | lul::DW_EH_PE_udata2); + section + .Mark(&cie) + .CIEHeader(1097, 6313, 17, 1, "zP") + .ULEB128(3) // Augmentation data length + .D8(personality_encoding) // encoding for personality routine + .EncodedPointer(0xe3eaccac, personality_encoding) // value + .FinishEntry() + .FDEHeader(cie, 0x0c8350c9, 0xbef11087) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zP", section); + + EXPECT_CALL(handler, Entry(_, 0x0c8350c9, 0xbef11087, 1, "zP", 17)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, PersonalityRoutine(0xe3eaccac, false)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(LulDwarfEHFrame, zR) { + Label cie; + lul::DwarfPointerEncoding pointer_encoding = + lul::DwarfPointerEncoding(lul::DW_EH_PE_textrel | lul::DW_EH_PE_sdata2); + section.SetPointerEncoding(pointer_encoding); + section + .Mark(&cie) + .CIEHeader(8011, 5496, 75, 1, "zR") + .ULEB128(1) // Augmentation data length + .D8(pointer_encoding) // encoding for FDE addresses + .FinishEntry() + .FDEHeader(cie, 0x540f9431, 0xbd0) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zR", section); + + EXPECT_CALL(handler, Entry(_, 0x540f9431, 0xbd0, 1, "zR", 75)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +TEST_F(LulDwarfEHFrame, zS) { + Label cie; + section + .Mark(&cie) + .CIEHeader(9217, 7694, 57, 1, "zS") + .ULEB128(0) // Augmentation data length + .FinishEntry() + .FDEHeader(cie, 0xd40091aa, 0x9aa6e746) + .ULEB128(0) // Augmentation data length + .FinishEntry() + .D32(0); // terminator + + PERHAPS_WRITE_EH_FRAME_FILE("EHFrame.zS", section); + + EXPECT_CALL(handler, Entry(_, 0xd40091aa, 0x9aa6e746, 1, "zS", 57)) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, SignalHandler()) + .InSequence(s).WillOnce(Return(true)); + EXPECT_CALL(handler, End()) + .InSequence(s).WillOnce(Return(true)); + + ParseEHFrameSection(§ion); +} + +// These tests require manual inspection of the test output. +struct CFIReporterFixture { + CFIReporterFixture() : reporter(gtest_logging_sink_for_LulTestDwarf, + "test file name", "test section name") { } + CallFrameInfo::Reporter reporter; +}; + +class LulDwarfCFIReporter: public CFIReporterFixture, public Test { }; + +TEST_F(LulDwarfCFIReporter, Incomplete) { + reporter.Incomplete(0x0102030405060708ULL, CallFrameInfo::kUnknown); +} + +TEST_F(LulDwarfCFIReporter, EarlyEHTerminator) { + reporter.EarlyEHTerminator(0x0102030405060708ULL); +} + +TEST_F(LulDwarfCFIReporter, CIEPointerOutOfRange) { + reporter.CIEPointerOutOfRange(0x0123456789abcdefULL, 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, BadCIEId) { + reporter.BadCIEId(0x0123456789abcdefULL, 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, UnrecognizedVersion) { + reporter.UnrecognizedVersion(0x0123456789abcdefULL, 43); +} + +TEST_F(LulDwarfCFIReporter, UnrecognizedAugmentation) { + reporter.UnrecognizedAugmentation(0x0123456789abcdefULL, "poodles"); +} + +TEST_F(LulDwarfCFIReporter, InvalidPointerEncoding) { + reporter.InvalidPointerEncoding(0x0123456789abcdefULL, 0x42); +} + +TEST_F(LulDwarfCFIReporter, UnusablePointerEncoding) { + reporter.UnusablePointerEncoding(0x0123456789abcdefULL, 0x42); +} + +TEST_F(LulDwarfCFIReporter, RestoreInCIE) { + reporter.RestoreInCIE(0x0123456789abcdefULL, 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, BadInstruction) { + reporter.BadInstruction(0x0123456789abcdefULL, CallFrameInfo::kFDE, + 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, NoCFARule) { + reporter.NoCFARule(0x0123456789abcdefULL, CallFrameInfo::kCIE, + 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, EmptyStateStack) { + reporter.EmptyStateStack(0x0123456789abcdefULL, CallFrameInfo::kTerminator, + 0xfedcba9876543210ULL); +} + +TEST_F(LulDwarfCFIReporter, ClearingCFARule) { + reporter.ClearingCFARule(0x0123456789abcdefULL, CallFrameInfo::kFDE, + 0xfedcba9876543210ULL); +} +class LulDwarfExpr : public Test { }; + +class MockSummariser : public Summariser { +public: + MockSummariser() : Summariser(nullptr, 0, nullptr) {} + MOCK_METHOD2(Entry, void(uintptr_t, uintptr_t)); + MOCK_METHOD0(End, void()); + MOCK_METHOD5(Rule, void(uintptr_t, int, LExprHow, int16_t, int64_t)); + MOCK_METHOD1(AddPfxInstr, uint32_t(PfxInstr)); +}; + +TEST_F(LulDwarfExpr, SimpleTransliteration) { + MockSummariser summ; + ByteReader reader(ENDIANNESS_LITTLE); + + CFISection section(kLittleEndian, 8); + section + .D8(DW_OP_lit0) + .D8(DW_OP_lit31) + .D8(DW_OP_breg0 + 17).LEB128(-1234) + .D8(DW_OP_const4s).D32(0xFEDC9876) + .D8(DW_OP_deref) + .D8(DW_OP_and) + .D8(DW_OP_plus) + .D8(DW_OP_minus) + .D8(DW_OP_shl) + .D8(DW_OP_ge); + string expr; + bool ok = section.GetContents(&expr); + EXPECT_TRUE(ok); + + { + InSequence s; + // required start marker + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0))); + // DW_OP_lit0 + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 0))); + // DW_OP_lit31 + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 31))); + // DW_OP_breg17 -1234 + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_DwReg, 17))); + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, -1234))); + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Add))); + // DW_OP_const4s 0xFEDC9876 + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_SImm32, 0xFEDC9876))); + // DW_OP_deref + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Deref))); + // DW_OP_and + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_And))); + // DW_OP_plus + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Add))); + // DW_OP_minus + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Sub))); + // DW_OP_shl + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Shl))); + // DW_OP_ge + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_CmpGES))); + // required end marker + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_End))); + } + + int32_t ix = parseDwarfExpr(&summ, &reader, expr, false, false, false); + EXPECT_TRUE(ix >= 0); +} + +TEST_F(LulDwarfExpr, UnknownOpcode) { + MockSummariser summ; + ByteReader reader(ENDIANNESS_LITTLE); + + CFISection section(kLittleEndian, 8); + section + .D8(DW_OP_lo_user - 1); + string expr; + bool ok = section.GetContents(&expr); + EXPECT_TRUE(ok); + + { + InSequence s; + // required start marker + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0))); + } + + int32_t ix = parseDwarfExpr(&summ, &reader, expr, false, false, false); + EXPECT_TRUE(ix == -1); +} + +TEST_F(LulDwarfExpr, ExpressionOverrun) { + MockSummariser summ; + ByteReader reader(ENDIANNESS_LITTLE); + + CFISection section(kLittleEndian, 8); + section + .D8(DW_OP_const4s).D8(0x12).D8(0x34).D8(0x56); + string expr; + bool ok = section.GetContents(&expr); + EXPECT_TRUE(ok); + + { + InSequence s; + // required start marker + EXPECT_CALL(summ, AddPfxInstr(PfxInstr(PX_Start, 0))); + // DW_OP_const4s followed by 3 (a.k.a. not enough) bytes + // We expect PfxInstr(PX_Simm32, not-known-for-sure-32-bit-immediate) + // Hence must use _ as the argument. + EXPECT_CALL(summ, AddPfxInstr(_)); + } + + int32_t ix = parseDwarfExpr(&summ, &reader, expr, false, false, false); + EXPECT_TRUE(ix == -1); +} + +// We'll need to mention specific Dwarf registers in the EvaluatePfxExpr tests, +// and those names are arch-specific, so a bit of macro magic is helpful. +#if defined(LUL_ARCH_arm) +# define TESTED_REG_STRUCT_NAME r11 +# define TESTED_REG_DWARF_NAME DW_REG_ARM_R11 +#elif defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) +# define TESTED_REG_STRUCT_NAME xbp +# define TESTED_REG_DWARF_NAME DW_REG_INTEL_XBP +#else +# error "Unknown plat" +#endif + +struct EvaluatePfxExprFixture { + // Creates: + // initial stack, AVMA 0x12345678, at offset 4 bytes = 0xdeadbeef + // initial regs, with XBP = 0x14141356 + // initial CFA = 0x5432ABCD + EvaluatePfxExprFixture() { + // The test stack. + si.mStartAvma = 0x12345678; + si.mLen = 0; +# define XX(_byte) do { si.mContents[si.mLen++] = (_byte); } while (0) + XX(0x55); XX(0x55); XX(0x55); XX(0x55); + if (sizeof(void*) == 8) { + // le64 + XX(0xEF); XX(0xBE); XX(0xAD); XX(0xDE); XX(0); XX(0); XX(0); XX(0); + } else { + // le32 + XX(0xEF); XX(0xBE); XX(0xAD); XX(0xDE); + } + XX(0xAA); XX(0xAA); XX(0xAA); XX(0xAA); +# undef XX + // The initial CFA. + initialCFA = TaggedUWord(0x5432ABCD); + // The initial register state. + memset(®s, 0, sizeof(regs)); + regs.TESTED_REG_STRUCT_NAME = TaggedUWord(0x14141356); + } + + StackImage si; + TaggedUWord initialCFA; + UnwindRegs regs; +}; + +class LulDwarfEvaluatePfxExpr : public EvaluatePfxExprFixture, public Test { }; + +TEST_F(LulDwarfEvaluatePfxExpr, NormalEvaluation) { + vector<PfxInstr> instrs; + // Put some junk at the start of the insn sequence. + instrs.push_back(PfxInstr(PX_End)); + instrs.push_back(PfxInstr(PX_End)); + + // Now the real sequence + // stack is empty + instrs.push_back(PfxInstr(PX_Start, 1)); + // 0x5432ABCD + instrs.push_back(PfxInstr(PX_SImm32, 0x31415927)); + // 0x5432ABCD 0x31415927 + instrs.push_back(PfxInstr(PX_DwReg, TESTED_REG_DWARF_NAME)); + // 0x5432ABCD 0x31415927 0x14141356 + instrs.push_back(PfxInstr(PX_SImm32, 42)); + // 0x5432ABCD 0x31415927 0x14141356 42 + instrs.push_back(PfxInstr(PX_Sub)); + // 0x5432ABCD 0x31415927 0x1414132c + instrs.push_back(PfxInstr(PX_Add)); + // 0x5432ABCD 0x45556c53 + instrs.push_back(PfxInstr(PX_SImm32, si.mStartAvma + 4)); + // 0x5432ABCD 0x45556c53 0x1234567c + instrs.push_back(PfxInstr(PX_Deref)); + // 0x5432ABCD 0x45556c53 0xdeadbeef + instrs.push_back(PfxInstr(PX_SImm32, 0xFE01DC23)); + // 0x5432ABCD 0x45556c53 0xdeadbeef 0xFE01DC23 + instrs.push_back(PfxInstr(PX_And)); + // 0x5432ABCD 0x45556c53 0xde019c23 + instrs.push_back(PfxInstr(PX_SImm32, 7)); + // 0x5432ABCD 0x45556c53 0xde019c23 7 + instrs.push_back(PfxInstr(PX_Shl)); + // 0x5432ABCD 0x45556c53 0x6f00ce1180 + instrs.push_back(PfxInstr(PX_SImm32, 0x7fffffff)); + // 0x5432ABCD 0x45556c53 0x6f00ce1180 7fffffff + instrs.push_back(PfxInstr(PX_And)); + // 0x5432ABCD 0x45556c53 0x00ce1180 + instrs.push_back(PfxInstr(PX_Add)); + // 0x5432ABCD 0x46237dd3 + instrs.push_back(PfxInstr(PX_Sub)); + // 0xe0f2dfa + + instrs.push_back(PfxInstr(PX_End)); + + TaggedUWord res = EvaluatePfxExpr(2/*offset of start insn*/, + ®s, initialCFA, &si, instrs); + EXPECT_TRUE(res.Valid()); + EXPECT_TRUE(res.Value() == 0xe0f2dfa); +} + +TEST_F(LulDwarfEvaluatePfxExpr, EmptySequence) { + vector<PfxInstr> instrs; + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, BogusStartPoint) { + vector<PfxInstr> instrs; + instrs.push_back(PfxInstr(PX_SImm32, 42)); + instrs.push_back(PfxInstr(PX_SImm32, 24)); + instrs.push_back(PfxInstr(PX_SImm32, 4224)); + TaggedUWord res = EvaluatePfxExpr(1, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, MissingEndMarker) { + vector<PfxInstr> instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + instrs.push_back(PfxInstr(PX_SImm32, 24)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, StackUnderflow) { + vector<PfxInstr> instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, StackNoUnderflow) { + vector<PfxInstr> instrs; + instrs.push_back(PfxInstr(PX_Start, 1/*push the initial CFA*/)); + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_TRUE(res.Valid()); + EXPECT_TRUE(res == initialCFA); +} + +TEST_F(LulDwarfEvaluatePfxExpr, StackOverflow) { + vector<PfxInstr> instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + for (int i = 0; i < 10+1; i++) { + instrs.push_back(PfxInstr(PX_SImm32, i + 100)); + } + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_FALSE(res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, StackNoOverflow) { + vector<PfxInstr> instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + for (int i = 0; i < 10+0; i++) { + instrs.push_back(PfxInstr(PX_SImm32, i + 100)); + } + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_TRUE(res.Valid()); + EXPECT_TRUE(res == TaggedUWord(109)); +} + +TEST_F(LulDwarfEvaluatePfxExpr, OutOfRangeShl) { + vector<PfxInstr> instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + instrs.push_back(PfxInstr(PX_SImm32, 1234)); + instrs.push_back(PfxInstr(PX_SImm32, 5678)); + instrs.push_back(PfxInstr(PX_Shl)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_TRUE(!res.Valid()); +} + +TEST_F(LulDwarfEvaluatePfxExpr, TestCmpGES) { + const int32_t argsL[6] = { 0, 0, 1, -2, -1, -2 }; + const int32_t argsR[6] = { 0, 1, 0, -2, -2, -1 }; + // expecting: t f t t t f = 101110 = 0x2E + vector<PfxInstr> instrs; + instrs.push_back(PfxInstr(PX_Start, 0)); + // The "running total" + instrs.push_back(PfxInstr(PX_SImm32, 0)); + for (unsigned int i = 0; i < sizeof(argsL)/sizeof(argsL[0]); i++) { + // Shift the "running total" at the bottom of the stack left by one bit + instrs.push_back(PfxInstr(PX_SImm32, 1)); + instrs.push_back(PfxInstr(PX_Shl)); + // Push both test args and do the comparison + instrs.push_back(PfxInstr(PX_SImm32, argsL[i])); + instrs.push_back(PfxInstr(PX_SImm32, argsR[i])); + instrs.push_back(PfxInstr(PX_CmpGES)); + // Or the result into the running total + instrs.push_back(PfxInstr(PX_Or)); + } + instrs.push_back(PfxInstr(PX_End)); + TaggedUWord res = EvaluatePfxExpr(0, ®s, initialCFA, &si, instrs); + EXPECT_TRUE(res.Valid()); + EXPECT_TRUE(res == TaggedUWord(0x2E)); +} + +} // namespace lul diff --git a/tools/profiler/tests/gtest/LulTestInfrastructure.cpp b/tools/profiler/tests/gtest/LulTestInfrastructure.cpp new file mode 100644 index 000000000..ba8e2e41e --- /dev/null +++ b/tools/profiler/tests/gtest/LulTestInfrastructure.cpp @@ -0,0 +1,491 @@ +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Derived from: +// test_assembler.cc: Implementation of google_breakpad::TestAssembler. +// See test_assembler.h for details. + +// Derived from: +// cfi_assembler.cc: Implementation of google_breakpad::CFISection class. +// See cfi_assembler.h for details. + +#include "LulTestInfrastructure.h" + +namespace lul_test { +namespace test_assembler { + +using std::back_insert_iterator; + +Label::Label() : value_(new Binding()) { } +Label::Label(uint64_t value) : value_(new Binding(value)) { } +Label::Label(const Label &label) { + value_ = label.value_; + value_->Acquire(); +} +Label::~Label() { + if (value_->Release()) delete value_; +} + +Label &Label::operator=(uint64_t value) { + value_->Set(NULL, value); + return *this; +} + +Label &Label::operator=(const Label &label) { + value_->Set(label.value_, 0); + return *this; +} + +Label Label::operator+(uint64_t addend) const { + Label l; + l.value_->Set(this->value_, addend); + return l; +} + +Label Label::operator-(uint64_t subtrahend) const { + Label l; + l.value_->Set(this->value_, -subtrahend); + return l; +} + +// When NDEBUG is #defined, assert doesn't evaluate its argument. This +// means you can't simply use assert to check the return value of a +// function with necessary side effects. +// +// ALWAYS_EVALUATE_AND_ASSERT(x) evaluates x regardless of whether +// NDEBUG is #defined; when NDEBUG is not #defined, it further asserts +// that x is true. +#ifdef NDEBUG +#define ALWAYS_EVALUATE_AND_ASSERT(x) x +#else +#define ALWAYS_EVALUATE_AND_ASSERT(x) assert(x) +#endif + +uint64_t Label::operator-(const Label &label) const { + uint64_t offset; + ALWAYS_EVALUATE_AND_ASSERT(IsKnownOffsetFrom(label, &offset)); + return offset; +} + +bool Label::IsKnownConstant(uint64_t *value_p) const { + Binding *base; + uint64_t addend; + value_->Get(&base, &addend); + if (base != NULL) return false; + if (value_p) *value_p = addend; + return true; +} + +bool Label::IsKnownOffsetFrom(const Label &label, uint64_t *offset_p) const +{ + Binding *label_base, *this_base; + uint64_t label_addend, this_addend; + label.value_->Get(&label_base, &label_addend); + value_->Get(&this_base, &this_addend); + // If this and label are related, Get will find their final + // common ancestor, regardless of how indirect the relation is. This + // comparison also handles the constant vs. constant case. + if (this_base != label_base) return false; + if (offset_p) *offset_p = this_addend - label_addend; + return true; +} + +Label::Binding::Binding() : base_(this), addend_(), reference_count_(1) { } + +Label::Binding::Binding(uint64_t addend) + : base_(NULL), addend_(addend), reference_count_(1) { } + +Label::Binding::~Binding() { + assert(reference_count_ == 0); + if (base_ && base_ != this && base_->Release()) + delete base_; +} + +void Label::Binding::Set(Binding *binding, uint64_t addend) { + if (!base_ && !binding) { + // We're equating two constants. This could be okay. + assert(addend_ == addend); + } else if (!base_) { + // We are a known constant, but BINDING may not be, so turn the + // tables and try to set BINDING's value instead. + binding->Set(NULL, addend_ - addend); + } else { + if (binding) { + // Find binding's final value. Since the final value is always either + // completely unconstrained or a constant, never a reference to + // another variable (otherwise, it wouldn't be final), this + // guarantees we won't create cycles here, even for code like this: + // l = m, m = n, n = l; + uint64_t binding_addend; + binding->Get(&binding, &binding_addend); + addend += binding_addend; + } + + // It seems likely that setting a binding to itself is a bug + // (although I can imagine this might turn out to be helpful to + // permit). + assert(binding != this); + + if (base_ != this) { + // Set the other bindings on our chain as well. Note that this + // is sufficient even though binding relationships form trees: + // All binding operations traverse their chains to the end, and + // all bindings related to us share some tail of our chain, so + // they will see the changes we make here. + base_->Set(binding, addend - addend_); + // We're not going to use base_ any more. + if (base_->Release()) delete base_; + } + + // Adopt BINDING as our base. Note that it should be correct to + // acquire here, after the release above, even though the usual + // reference-counting rules call for acquiring first, and then + // releasing: the self-reference assertion above should have + // complained if BINDING were 'this' or anywhere along our chain, + // so we didn't release BINDING. + if (binding) binding->Acquire(); + base_ = binding; + addend_ = addend; + } +} + +void Label::Binding::Get(Binding **base, uint64_t *addend) { + if (base_ && base_ != this) { + // Recurse to find the end of our reference chain (the root of our + // tree), and then rewrite every binding along the chain to refer + // to it directly, adjusting addends appropriately. (This is why + // this member function isn't this-const.) + Binding *final_base; + uint64_t final_addend; + base_->Get(&final_base, &final_addend); + if (final_base) final_base->Acquire(); + if (base_->Release()) delete base_; + base_ = final_base; + addend_ += final_addend; + } + *base = base_; + *addend = addend_; +} + +template<typename Inserter> +static inline void InsertEndian(test_assembler::Endianness endianness, + size_t size, uint64_t number, Inserter dest) { + assert(size > 0); + if (endianness == kLittleEndian) { + for (size_t i = 0; i < size; i++) { + *dest++ = (char) (number & 0xff); + number >>= 8; + } + } else { + assert(endianness == kBigEndian); + // The loop condition is odd, but it's correct for size_t. + for (size_t i = size - 1; i < size; i--) + *dest++ = (char) ((number >> (i * 8)) & 0xff); + } +} + +Section &Section::Append(Endianness endianness, size_t size, uint64_t number) { + InsertEndian(endianness, size, number, + back_insert_iterator<string>(contents_)); + return *this; +} + +Section &Section::Append(Endianness endianness, size_t size, + const Label &label) { + // If this label's value is known, there's no reason to waste an + // entry in references_ on it. + uint64_t value; + if (label.IsKnownConstant(&value)) + return Append(endianness, size, value); + + // This will get caught when the references are resolved, but it's + // nicer to find out earlier. + assert(endianness != kUnsetEndian); + + references_.push_back(Reference(contents_.size(), endianness, size, label)); + contents_.append(size, 0); + return *this; +} + +#define ENDIANNESS_L kLittleEndian +#define ENDIANNESS_B kBigEndian +#define ENDIANNESS(e) ENDIANNESS_ ## e + +#define DEFINE_SHORT_APPEND_NUMBER_ENDIAN(e, bits) \ + Section &Section::e ## bits(uint ## bits ## _t v) { \ + InsertEndian(ENDIANNESS(e), bits / 8, v, \ + back_insert_iterator<string>(contents_)); \ + return *this; \ + } + +#define DEFINE_SHORT_APPEND_LABEL_ENDIAN(e, bits) \ + Section &Section::e ## bits(const Label &v) { \ + return Append(ENDIANNESS(e), bits / 8, v); \ + } + +// Define L16, B32, and friends. +#define DEFINE_SHORT_APPEND_ENDIAN(e, bits) \ + DEFINE_SHORT_APPEND_NUMBER_ENDIAN(e, bits) \ + DEFINE_SHORT_APPEND_LABEL_ENDIAN(e, bits) + +DEFINE_SHORT_APPEND_LABEL_ENDIAN(L, 8); +DEFINE_SHORT_APPEND_LABEL_ENDIAN(B, 8); +DEFINE_SHORT_APPEND_ENDIAN(L, 16); +DEFINE_SHORT_APPEND_ENDIAN(L, 32); +DEFINE_SHORT_APPEND_ENDIAN(L, 64); +DEFINE_SHORT_APPEND_ENDIAN(B, 16); +DEFINE_SHORT_APPEND_ENDIAN(B, 32); +DEFINE_SHORT_APPEND_ENDIAN(B, 64); + +#define DEFINE_SHORT_APPEND_NUMBER_DEFAULT(bits) \ + Section &Section::D ## bits(uint ## bits ## _t v) { \ + InsertEndian(endianness_, bits / 8, v, \ + back_insert_iterator<string>(contents_)); \ + return *this; \ + } +#define DEFINE_SHORT_APPEND_LABEL_DEFAULT(bits) \ + Section &Section::D ## bits(const Label &v) { \ + return Append(endianness_, bits / 8, v); \ + } +#define DEFINE_SHORT_APPEND_DEFAULT(bits) \ + DEFINE_SHORT_APPEND_NUMBER_DEFAULT(bits) \ + DEFINE_SHORT_APPEND_LABEL_DEFAULT(bits) + +DEFINE_SHORT_APPEND_LABEL_DEFAULT(8) +DEFINE_SHORT_APPEND_DEFAULT(16); +DEFINE_SHORT_APPEND_DEFAULT(32); +DEFINE_SHORT_APPEND_DEFAULT(64); + +Section &Section::LEB128(long long value) { + while (value < -0x40 || 0x3f < value) { + contents_ += (value & 0x7f) | 0x80; + if (value < 0) + value = (value >> 7) | ~(((unsigned long long) -1) >> 7); + else + value = (value >> 7); + } + contents_ += value & 0x7f; + return *this; +} + +Section &Section::ULEB128(uint64_t value) { + while (value > 0x7f) { + contents_ += (value & 0x7f) | 0x80; + value = (value >> 7); + } + contents_ += value; + return *this; +} + +Section &Section::Align(size_t alignment, uint8_t pad_byte) { + // ALIGNMENT must be a power of two. + assert(((alignment - 1) & alignment) == 0); + size_t new_size = (contents_.size() + alignment - 1) & ~(alignment - 1); + contents_.append(new_size - contents_.size(), pad_byte); + assert((contents_.size() & (alignment - 1)) == 0); + return *this; +} + +bool Section::GetContents(string *contents) { + // For each label reference, find the label's value, and patch it into + // the section's contents. + for (size_t i = 0; i < references_.size(); i++) { + Reference &r = references_[i]; + uint64_t value; + if (!r.label.IsKnownConstant(&value)) { + fprintf(stderr, "Undefined label #%zu at offset 0x%zx\n", i, r.offset); + return false; + } + assert(r.offset < contents_.size()); + assert(contents_.size() - r.offset >= r.size); + InsertEndian(r.endianness, r.size, value, contents_.begin() + r.offset); + } + contents->clear(); + std::swap(contents_, *contents); + references_.clear(); + return true; +} + +} // namespace test_assembler +} // namespace lul_test + + +namespace lul_test { + +CFISection &CFISection::CIEHeader(uint64_t code_alignment_factor, + int data_alignment_factor, + unsigned return_address_register, + uint8_t version, + const string &augmentation, + bool dwarf64) { + assert(!entry_length_); + entry_length_ = new PendingLength(); + in_fde_ = false; + + if (dwarf64) { + D32(kDwarf64InitialLengthMarker); + D64(entry_length_->length); + entry_length_->start = Here(); + D64(eh_frame_ ? kEHFrame64CIEIdentifier : kDwarf64CIEIdentifier); + } else { + D32(entry_length_->length); + entry_length_->start = Here(); + D32(eh_frame_ ? kEHFrame32CIEIdentifier : kDwarf32CIEIdentifier); + } + D8(version); + AppendCString(augmentation); + ULEB128(code_alignment_factor); + LEB128(data_alignment_factor); + if (version == 1) + D8(return_address_register); + else + ULEB128(return_address_register); + return *this; +} + +CFISection &CFISection::FDEHeader(Label cie_pointer, + uint64_t initial_location, + uint64_t address_range, + bool dwarf64) { + assert(!entry_length_); + entry_length_ = new PendingLength(); + in_fde_ = true; + fde_start_address_ = initial_location; + + if (dwarf64) { + D32(0xffffffff); + D64(entry_length_->length); + entry_length_->start = Here(); + if (eh_frame_) + D64(Here() - cie_pointer); + else + D64(cie_pointer); + } else { + D32(entry_length_->length); + entry_length_->start = Here(); + if (eh_frame_) + D32(Here() - cie_pointer); + else + D32(cie_pointer); + } + EncodedPointer(initial_location); + // The FDE length in an .eh_frame section uses the same encoding as the + // initial location, but ignores the base address (selected by the upper + // nybble of the encoding), as it's a length, not an address that can be + // made relative. + EncodedPointer(address_range, + DwarfPointerEncoding(pointer_encoding_ & 0x0f)); + return *this; +} + +CFISection &CFISection::FinishEntry() { + assert(entry_length_); + Align(address_size_, lul::DW_CFA_nop); + entry_length_->length = Here() - entry_length_->start; + delete entry_length_; + entry_length_ = NULL; + in_fde_ = false; + return *this; +} + +CFISection &CFISection::EncodedPointer(uint64_t address, + DwarfPointerEncoding encoding, + const EncodedPointerBases &bases) { + // Omitted data is extremely easy to emit. + if (encoding == lul::DW_EH_PE_omit) + return *this; + + // If (encoding & lul::DW_EH_PE_indirect) != 0, then we assume + // that ADDRESS is the address at which the pointer is stored --- in + // other words, that bit has no effect on how we write the pointer. + encoding = DwarfPointerEncoding(encoding & ~lul::DW_EH_PE_indirect); + + // Find the base address to which this pointer is relative. The upper + // nybble of the encoding specifies this. + uint64_t base; + switch (encoding & 0xf0) { + case lul::DW_EH_PE_absptr: base = 0; break; + case lul::DW_EH_PE_pcrel: base = bases.cfi + Size(); break; + case lul::DW_EH_PE_textrel: base = bases.text; break; + case lul::DW_EH_PE_datarel: base = bases.data; break; + case lul::DW_EH_PE_funcrel: base = fde_start_address_; break; + case lul::DW_EH_PE_aligned: base = 0; break; + default: abort(); + }; + + // Make ADDRESS relative. Yes, this is appropriate even for "absptr" + // values; see gcc/unwind-pe.h. + address -= base; + + // Align the pointer, if required. + if ((encoding & 0xf0) == lul::DW_EH_PE_aligned) + Align(AddressSize()); + + // Append ADDRESS to this section in the appropriate form. For the + // fixed-width forms, we don't need to differentiate between signed and + // unsigned encodings, because ADDRESS has already been extended to 64 + // bits before it was passed to us. + switch (encoding & 0x0f) { + case lul::DW_EH_PE_absptr: + Address(address); + break; + + case lul::DW_EH_PE_uleb128: + ULEB128(address); + break; + + case lul::DW_EH_PE_sleb128: + LEB128(address); + break; + + case lul::DW_EH_PE_udata2: + case lul::DW_EH_PE_sdata2: + D16(address); + break; + + case lul::DW_EH_PE_udata4: + case lul::DW_EH_PE_sdata4: + D32(address); + break; + + case lul::DW_EH_PE_udata8: + case lul::DW_EH_PE_sdata8: + D64(address); + break; + + default: + abort(); + } + + return *this; +}; + +} // namespace lul_test diff --git a/tools/profiler/tests/gtest/LulTestInfrastructure.h b/tools/profiler/tests/gtest/LulTestInfrastructure.h new file mode 100644 index 000000000..37b1b7d49 --- /dev/null +++ b/tools/profiler/tests/gtest/LulTestInfrastructure.h @@ -0,0 +1,666 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Derived from: +// cfi_assembler.h: Define CFISection, a class for creating properly +// (and improperly) formatted DWARF CFI data for unit tests. + +// Derived from: +// test-assembler.h: interface to class for building complex binary streams. + +// To test the Breakpad symbol dumper and processor thoroughly, for +// all combinations of host system and minidump processor +// architecture, we need to be able to easily generate complex test +// data like debugging information and minidump files. +// +// For example, if we want our unit tests to provide full code +// coverage for stack walking, it may be difficult to persuade the +// compiler to generate every possible sort of stack walking +// information that we want to support; there are probably DWARF CFI +// opcodes that GCC never emits. Similarly, if we want to test our +// error handling, we will need to generate damaged minidumps or +// debugging information that (we hope) the client or compiler will +// never produce on its own. +// +// google_breakpad::TestAssembler provides a predictable and +// (relatively) simple way to generate complex formatted data streams +// like minidumps and CFI. Furthermore, because TestAssembler is +// portable, developers without access to (say) Visual Studio or a +// SPARC assembler can still work on test data for those targets. + +#ifndef LUL_TEST_INFRASTRUCTURE_H +#define LUL_TEST_INFRASTRUCTURE_H + +#include <string> +#include <vector> + +using std::string; +using std::vector; + +namespace lul_test { +namespace test_assembler { + +// A Label represents a value not yet known that we need to store in a +// section. As long as all the labels a section refers to are defined +// by the time we retrieve its contents as bytes, we can use undefined +// labels freely in that section's construction. +// +// A label can be in one of three states: +// - undefined, +// - defined as the sum of some other label and a constant, or +// - a constant. +// +// A label's value never changes, but it can accumulate constraints. +// Adding labels and integers is permitted, and yields a label. +// Subtracting a constant from a label is permitted, and also yields a +// label. Subtracting two labels that have some relationship to each +// other is permitted, and yields a constant. +// +// For example: +// +// Label a; // a's value is undefined +// Label b; // b's value is undefined +// { +// Label c = a + 4; // okay, even though a's value is unknown +// b = c + 4; // also okay; b is now a+8 +// } +// Label d = b - 2; // okay; d == a+6, even though c is gone +// d.Value(); // error: d's value is not yet known +// d - a; // is 6, even though their values are not known +// a = 12; // now b == 20, and d == 18 +// d.Value(); // 18: no longer an error +// b.Value(); // 20 +// d = 10; // error: d is already defined. +// +// Label objects' lifetimes are unconstrained: notice that, in the +// above example, even though a and b are only related through c, and +// c goes out of scope, the assignment to a sets b's value as well. In +// particular, it's not necessary to ensure that a Label lives beyond +// Sections that refer to it. +class Label { + public: + Label(); // An undefined label. + explicit Label(uint64_t value); // A label with a fixed value + Label(const Label &value); // A label equal to another. + ~Label(); + + Label &operator=(uint64_t value); + Label &operator=(const Label &value); + Label operator+(uint64_t addend) const; + Label operator-(uint64_t subtrahend) const; + uint64_t operator-(const Label &subtrahend) const; + + // We could also provide == and != that work on undefined, but + // related, labels. + + // Return true if this label's value is known. If VALUE_P is given, + // set *VALUE_P to the known value if returning true. + bool IsKnownConstant(uint64_t *value_p = NULL) const; + + // Return true if the offset from LABEL to this label is known. If + // OFFSET_P is given, set *OFFSET_P to the offset when returning true. + // + // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m', + // except that it also returns a value indicating whether the + // subtraction is possible given what we currently know of l and m. + // It can be possible even if we don't know l and m's values. For + // example: + // + // Label l, m; + // m = l + 10; + // l.IsKnownConstant(); // false + // m.IsKnownConstant(); // false + // uint64_t d; + // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10. + // l-m // -10 + // m-l // 10 + // m.Value() // error: m's value is not known + bool IsKnownOffsetFrom(const Label &label, uint64_t *offset_p = NULL) const; + + private: + // A label's value, or if that is not yet known, how the value is + // related to other labels' values. A binding may be: + // - a known constant, + // - constrained to be equal to some other binding plus a constant, or + // - unconstrained, and free to take on any value. + // + // Many labels may point to a single binding, and each binding may + // refer to another, so bindings and labels form trees whose leaves + // are labels, whose interior nodes (and roots) are bindings, and + // where links point from children to parents. Bindings are + // reference counted, allowing labels to be lightweight, copyable, + // assignable, placed in containers, and so on. + class Binding { + public: + Binding(); + explicit Binding(uint64_t addend); + ~Binding(); + + // Increment our reference count. + void Acquire() { reference_count_++; }; + // Decrement our reference count, and return true if it is zero. + bool Release() { return --reference_count_ == 0; } + + // Set this binding to be equal to BINDING + ADDEND. If BINDING is + // NULL, then set this binding to the known constant ADDEND. + // Update every binding on this binding's chain to point directly + // to BINDING, or to be a constant, with addends adjusted + // appropriately. + void Set(Binding *binding, uint64_t value); + + // Return what we know about the value of this binding. + // - If this binding's value is a known constant, set BASE to + // NULL, and set ADDEND to its value. + // - If this binding is not a known constant but related to other + // bindings, set BASE to the binding at the end of the relation + // chain (which will always be unconstrained), and set ADDEND to the + // value to add to that binding's value to get this binding's + // value. + // - If this binding is unconstrained, set BASE to this, and leave + // ADDEND unchanged. + void Get(Binding **base, uint64_t *addend); + + private: + // There are three cases: + // + // - A binding representing a known constant value has base_ NULL, + // and addend_ equal to the value. + // + // - A binding representing a completely unconstrained value has + // base_ pointing to this; addend_ is unused. + // + // - A binding whose value is related to some other binding's + // value has base_ pointing to that other binding, and addend_ + // set to the amount to add to that binding's value to get this + // binding's value. We only represent relationships of the form + // x = y+c. + // + // Thus, the bind_ links form a chain terminating in either a + // known constant value or a completely unconstrained value. Most + // operations on bindings do path compression: they change every + // binding on the chain to point directly to the final value, + // adjusting addends as appropriate. + Binding *base_; + uint64_t addend_; + + // The number of Labels and Bindings pointing to this binding. + // (When a binding points to itself, indicating a completely + // unconstrained binding, that doesn't count as a reference.) + int reference_count_; + }; + + // This label's value. + Binding *value_; +}; + +// Conventions for representing larger numbers as sequences of bytes. +enum Endianness { + kBigEndian, // Big-endian: the most significant byte comes first. + kLittleEndian, // Little-endian: the least significant byte comes first. + kUnsetEndian, // used internally +}; + +// A section is a sequence of bytes, constructed by appending bytes +// to the end. Sections have a convenient and flexible set of member +// functions for appending data in various formats: big-endian and +// little-endian signed and unsigned values of different sizes; +// LEB128 and ULEB128 values (see below), and raw blocks of bytes. +// +// If you need to append a value to a section that is not convenient +// to compute immediately, you can create a label, append the +// label's value to the section, and then set the label's value +// later, when it's convenient to do so. Once a label's value is +// known, the section class takes care of updating all previously +// appended references to it. +// +// Once all the labels to which a section refers have had their +// values determined, you can get a copy of the section's contents +// as a string. +// +// Note that there is no specified "start of section" label. This is +// because there are typically several different meanings for "the +// start of a section": the offset of the section within an object +// file, the address in memory at which the section's content appear, +// and so on. It's up to the code that uses the Section class to +// keep track of these explicitly, as they depend on the application. +class Section { + public: + explicit Section(Endianness endianness = kUnsetEndian) + : endianness_(endianness) { }; + + // A base class destructor should be either public and virtual, + // or protected and nonvirtual. + virtual ~Section() { }; + + // Return the default endianness of this section. + Endianness endianness() const { return endianness_; } + + // Append the SIZE bytes at DATA to the end of this section. Return + // a reference to this section. + Section &Append(const string &data) { + contents_.append(data); + return *this; + }; + + // Append SIZE copies of BYTE to the end of this section. Return a + // reference to this section. + Section &Append(size_t size, uint8_t byte) { + contents_.append(size, (char) byte); + return *this; + } + + // Append NUMBER to this section. ENDIANNESS is the endianness to + // use to write the number. SIZE is the length of the number in + // bytes. Return a reference to this section. + Section &Append(Endianness endianness, size_t size, uint64_t number); + Section &Append(Endianness endianness, size_t size, const Label &label); + + // Append SECTION to the end of this section. The labels SECTION + // refers to need not be defined yet. + // + // Note that this has no effect on any Labels' values, or on + // SECTION. If placing SECTION within 'this' provides new + // constraints on existing labels' values, then it's up to the + // caller to fiddle with those labels as needed. + Section &Append(const Section §ion); + + // Append the contents of DATA as a series of bytes terminated by + // a NULL character. + Section &AppendCString(const string &data) { + Append(data); + contents_ += '\0'; + return *this; + } + + // Append VALUE or LABEL to this section, with the given bit width and + // endianness. Return a reference to this section. + // + // The names of these functions have the form <ENDIANNESS><BITWIDTH>: + // <ENDIANNESS> is either 'L' (little-endian, least significant byte first), + // 'B' (big-endian, most significant byte first), or + // 'D' (default, the section's default endianness) + // <BITWIDTH> is 8, 16, 32, or 64. + // + // Since endianness doesn't matter for a single byte, all the + // <BITWIDTH>=8 functions are equivalent. + // + // These can be used to write both signed and unsigned values, as + // the compiler will properly sign-extend a signed value before + // passing it to the function, at which point the function's + // behavior is the same either way. + Section &L8(uint8_t value) { contents_ += value; return *this; } + Section &B8(uint8_t value) { contents_ += value; return *this; } + Section &D8(uint8_t value) { contents_ += value; return *this; } + Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t), + &B16(uint16_t), &B32(uint32_t), &B64(uint64_t), + &D16(uint16_t), &D32(uint32_t), &D64(uint64_t); + Section &L8(const Label &label), &L16(const Label &label), + &L32(const Label &label), &L64(const Label &label), + &B8(const Label &label), &B16(const Label &label), + &B32(const Label &label), &B64(const Label &label), + &D8(const Label &label), &D16(const Label &label), + &D32(const Label &label), &D64(const Label &label); + + // Append VALUE in a signed LEB128 (Little-Endian Base 128) form. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + // + // Note that VALUE cannot be a Label (we would have to implement + // relaxation). + Section &LEB128(long long value); + + // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // Note that VALUE cannot be a Label (we would have to implement + // relaxation). + Section &ULEB128(uint64_t value); + + // Jump to the next location aligned on an ALIGNMENT-byte boundary, + // relative to the start of the section. Fill the gap with PAD_BYTE. + // ALIGNMENT must be a power of two. Return a reference to this + // section. + Section &Align(size_t alignment, uint8_t pad_byte = 0); + + // Return the current size of the section. + size_t Size() const { return contents_.size(); } + + // Return a label representing the start of the section. + // + // It is up to the user whether this label represents the section's + // position in an object file, the section's address in memory, or + // what have you; some applications may need both, in which case + // this simple-minded interface won't be enough. This class only + // provides a single start label, for use with the Here and Mark + // member functions. + // + // Ideally, we'd provide this in a subclass that actually knows more + // about the application at hand and can provide an appropriate + // collection of start labels. But then the appending member + // functions like Append and D32 would return a reference to the + // base class, not the derived class, and the chaining won't work. + // Since the only value here is in pretty notation, that's a fatal + // flaw. + Label start() const { return start_; } + + // Return a label representing the point at which the next Appended + // item will appear in the section, relative to start(). + Label Here() const { return start_ + Size(); } + + // Set *LABEL to Here, and return a reference to this section. + Section &Mark(Label *label) { *label = Here(); return *this; } + + // If there are no undefined label references left in this + // section, set CONTENTS to the contents of this section, as a + // string, and clear this section. Return true on success, or false + // if there were still undefined labels. + bool GetContents(string *contents); + + private: + // Used internally. A reference to a label's value. + struct Reference { + Reference(size_t set_offset, Endianness set_endianness, size_t set_size, + const Label &set_label) + : offset(set_offset), endianness(set_endianness), size(set_size), + label(set_label) { } + + // The offset of the reference within the section. + size_t offset; + + // The endianness of the reference. + Endianness endianness; + + // The size of the reference. + size_t size; + + // The label to which this is a reference. + Label label; + }; + + // The default endianness of this section. + Endianness endianness_; + + // The contents of the section. + string contents_; + + // References to labels within those contents. + vector<Reference> references_; + + // A label referring to the beginning of the section. + Label start_; +}; + +} // namespace test_assembler +} // namespace lul_test + + +namespace lul_test { + +using lul::DwarfPointerEncoding; +using lul_test::test_assembler::Endianness; +using lul_test::test_assembler::Label; +using lul_test::test_assembler::Section; + +class CFISection: public Section { + public: + + // CFI augmentation strings beginning with 'z', defined by the + // Linux/IA-64 C++ ABI, can specify interesting encodings for + // addresses appearing in FDE headers and call frame instructions (and + // for additional fields whose presence the augmentation string + // specifies). In particular, pointers can be specified to be relative + // to various base address: the start of the .text section, the + // location holding the address itself, and so on. These allow the + // frame data to be position-independent even when they live in + // write-protected pages. These variants are specified at the + // following two URLs: + // + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html + // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + // + // CFISection leaves the production of well-formed 'z'-augmented CIEs and + // FDEs to the user, but does provide EncodedPointer, to emit + // properly-encoded addresses for a given pointer encoding. + // EncodedPointer uses an instance of this structure to find the base + // addresses it should use; you can establish a default for all encoded + // pointers appended to this section with SetEncodedPointerBases. + struct EncodedPointerBases { + EncodedPointerBases() : cfi(), text(), data() { } + + // The starting address of this CFI section in memory, for + // DW_EH_PE_pcrel. DW_EH_PE_pcrel pointers may only be used in data + // that has is loaded into the program's address space. + uint64_t cfi; + + // The starting address of this file's .text section, for DW_EH_PE_textrel. + uint64_t text; + + // The starting address of this file's .got or .eh_frame_hdr section, + // for DW_EH_PE_datarel. + uint64_t data; + }; + + // Create a CFISection whose endianness is ENDIANNESS, and where + // machine addresses are ADDRESS_SIZE bytes long. If EH_FRAME is + // true, use the .eh_frame format, as described by the Linux + // Standards Base Core Specification, instead of the DWARF CFI + // format. + CFISection(Endianness endianness, size_t address_size, + bool eh_frame = false) + : Section(endianness), address_size_(address_size), eh_frame_(eh_frame), + pointer_encoding_(lul::DW_EH_PE_absptr), + encoded_pointer_bases_(), entry_length_(NULL), in_fde_(false) { + // The 'start', 'Here', and 'Mark' members of a CFISection all refer + // to section offsets. + start() = 0; + } + + // Return this CFISection's address size. + size_t AddressSize() const { return address_size_; } + + // Return true if this CFISection uses the .eh_frame format, or + // false if it contains ordinary DWARF CFI data. + bool ContainsEHFrame() const { return eh_frame_; } + + // Use ENCODING for pointers in calls to FDEHeader and EncodedPointer. + void SetPointerEncoding(DwarfPointerEncoding encoding) { + pointer_encoding_ = encoding; + } + + // Use the addresses in BASES as the base addresses for encoded + // pointers in subsequent calls to FDEHeader or EncodedPointer. + // This function makes a copy of BASES. + void SetEncodedPointerBases(const EncodedPointerBases &bases) { + encoded_pointer_bases_ = bases; + } + + // Append a Common Information Entry header to this section with the + // given values. If dwarf64 is true, use the 64-bit DWARF initial + // length format for the CIE's initial length. Return a reference to + // this section. You should call FinishEntry after writing the last + // instruction for the CIE. + // + // Before calling this function, you will typically want to use Mark + // or Here to make a label to pass to FDEHeader that refers to this + // CIE's position in the section. + CFISection &CIEHeader(uint64_t code_alignment_factor, + int data_alignment_factor, + unsigned return_address_register, + uint8_t version = 3, + const string &augmentation = "", + bool dwarf64 = false); + + // Append a Frame Description Entry header to this section with the + // given values. If dwarf64 is true, use the 64-bit DWARF initial + // length format for the CIE's initial length. Return a reference to + // this section. You should call FinishEntry after writing the last + // instruction for the CIE. + // + // This function doesn't support entries that are longer than + // 0xffffff00 bytes. (The "initial length" is always a 32-bit + // value.) Nor does it support .debug_frame sections longer than + // 0xffffff00 bytes. + CFISection &FDEHeader(Label cie_pointer, + uint64_t initial_location, + uint64_t address_range, + bool dwarf64 = false); + + // Note the current position as the end of the last CIE or FDE we + // started, after padding with DW_CFA_nops for alignment. This + // defines the label representing the entry's length, cited in the + // entry's header. Return a reference to this section. + CFISection &FinishEntry(); + + // Append the contents of BLOCK as a DW_FORM_block value: an + // unsigned LEB128 length, followed by that many bytes of data. + CFISection &Block(const string &block) { + ULEB128(block.size()); + Append(block); + return *this; + } + + // Append ADDRESS to this section, in the appropriate size and + // endianness. Return a reference to this section. + CFISection &Address(uint64_t address) { + Section::Append(endianness(), address_size_, address); + return *this; + } + + // Append ADDRESS to this section, using ENCODING and BASES. ENCODING + // defaults to this section's default encoding, established by + // SetPointerEncoding. BASES defaults to this section's bases, set by + // SetEncodedPointerBases. If the DW_EH_PE_indirect bit is set in the + // encoding, assume that ADDRESS is where the true address is stored. + // Return a reference to this section. + // + // (C++ doesn't let me use default arguments here, because I want to + // refer to members of *this in the default argument expression.) + CFISection &EncodedPointer(uint64_t address) { + return EncodedPointer(address, pointer_encoding_, encoded_pointer_bases_); + } + CFISection &EncodedPointer(uint64_t address, DwarfPointerEncoding encoding) { + return EncodedPointer(address, encoding, encoded_pointer_bases_); + } + CFISection &EncodedPointer(uint64_t address, DwarfPointerEncoding encoding, + const EncodedPointerBases &bases); + + // Restate some member functions, to keep chaining working nicely. + CFISection &Mark(Label *label) { Section::Mark(label); return *this; } + CFISection &D8(uint8_t v) { Section::D8(v); return *this; } + CFISection &D16(uint16_t v) { Section::D16(v); return *this; } + CFISection &D16(Label v) { Section::D16(v); return *this; } + CFISection &D32(uint32_t v) { Section::D32(v); return *this; } + CFISection &D32(const Label &v) { Section::D32(v); return *this; } + CFISection &D64(uint64_t v) { Section::D64(v); return *this; } + CFISection &D64(const Label &v) { Section::D64(v); return *this; } + CFISection &LEB128(long long v) { Section::LEB128(v); return *this; } + CFISection &ULEB128(uint64_t v) { Section::ULEB128(v); return *this; } + + private: + // A length value that we've appended to the section, but is not yet + // known. LENGTH is the appended value; START is a label referring + // to the start of the data whose length was cited. + struct PendingLength { + Label length; + Label start; + }; + + // Constants used in CFI/.eh_frame data: + + // If the first four bytes of an "initial length" are this constant, then + // the data uses the 64-bit DWARF format, and the length itself is the + // subsequent eight bytes. + static const uint32_t kDwarf64InitialLengthMarker = 0xffffffffU; + + // The CIE identifier for 32- and 64-bit DWARF CFI and .eh_frame data. + static const uint32_t kDwarf32CIEIdentifier = ~(uint32_t)0; + static const uint64_t kDwarf64CIEIdentifier = ~(uint64_t)0; + static const uint32_t kEHFrame32CIEIdentifier = 0; + static const uint64_t kEHFrame64CIEIdentifier = 0; + + // The size of a machine address for the data in this section. + size_t address_size_; + + // If true, we are generating a Linux .eh_frame section, instead of + // a standard DWARF .debug_frame section. + bool eh_frame_; + + // The encoding to use for FDE pointers. + DwarfPointerEncoding pointer_encoding_; + + // The base addresses to use when emitting encoded pointers. + EncodedPointerBases encoded_pointer_bases_; + + // The length value for the current entry. + // + // Oddly, this must be dynamically allocated. Labels never get new + // values; they only acquire constraints on the value they already + // have, or assert if you assign them something incompatible. So + // each header needs truly fresh Label objects to cite in their + // headers and track their positions. The alternative is explicit + // destructor invocation and a placement new. Ick. + PendingLength *entry_length_; + + // True if we are currently emitting an FDE --- that is, we have + // called FDEHeader but have not yet called FinishEntry. + bool in_fde_; + + // If in_fde_ is true, this is its starting address. We use this for + // emitting DW_EH_PE_funcrel pointers. + uint64_t fde_start_address_; +}; + +} // namespace lul_test + +#endif // LUL_TEST_INFRASTRUCTURE_H diff --git a/tools/profiler/tests/gtest/ThreadProfileTest.cpp b/tools/profiler/tests/gtest/ThreadProfileTest.cpp new file mode 100644 index 000000000..4399a5bc2 --- /dev/null +++ b/tools/profiler/tests/gtest/ThreadProfileTest.cpp @@ -0,0 +1,75 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" + +#include "ProfileEntry.h" +#include "ThreadProfile.h" + +// Make sure we can initialize our ThreadProfile +TEST(ThreadProfile, Initialization) { + PseudoStack* stack = PseudoStack::create(); + Thread::tid_t tid = 1000; + ThreadInfo info("testThread", tid, true, stack, nullptr); + RefPtr<ProfileBuffer> pb = new ProfileBuffer(10); + ThreadProfile tp(&info, pb); +} + +// Make sure we can record one tag and read it +TEST(ThreadProfile, InsertOneTag) { + PseudoStack* stack = PseudoStack::create(); + Thread::tid_t tid = 1000; + ThreadInfo info("testThread", tid, true, stack, nullptr); + RefPtr<ProfileBuffer> pb = new ProfileBuffer(10); + pb->addTag(ProfileEntry('t', 123.1)); + ASSERT_TRUE(pb->mEntries != nullptr); + ASSERT_TRUE(pb->mEntries[pb->mReadPos].mTagName == 't'); + ASSERT_TRUE(pb->mEntries[pb->mReadPos].mTagDouble == 123.1); +} + +// See if we can insert some tags +TEST(ThreadProfile, InsertTagsNoWrap) { + PseudoStack* stack = PseudoStack::create(); + Thread::tid_t tid = 1000; + ThreadInfo info("testThread", tid, true, stack, nullptr); + RefPtr<ProfileBuffer> pb = new ProfileBuffer(100); + int test_size = 50; + for (int i = 0; i < test_size; i++) { + pb->addTag(ProfileEntry('t', i)); + } + ASSERT_TRUE(pb->mEntries != nullptr); + int readPos = pb->mReadPos; + while (readPos != pb->mWritePos) { + ASSERT_TRUE(pb->mEntries[readPos].mTagName == 't'); + ASSERT_TRUE(pb->mEntries[readPos].mTagInt == readPos); + readPos = (readPos + 1) % pb->mEntrySize; + } +} + +// See if wrapping works as it should in the basic case +TEST(ThreadProfile, InsertTagsWrap) { + PseudoStack* stack = PseudoStack::create(); + Thread::tid_t tid = 1000; + // we can fit only 24 tags in this buffer because of the empty slot + int tags = 24; + int buffer_size = tags + 1; + ThreadInfo info("testThread", tid, true, stack, nullptr); + RefPtr<ProfileBuffer> pb = new ProfileBuffer(buffer_size); + int test_size = 43; + for (int i = 0; i < test_size; i++) { + pb->addTag(ProfileEntry('t', i)); + } + ASSERT_TRUE(pb->mEntries != nullptr); + int readPos = pb->mReadPos; + int ctr = 0; + while (readPos != pb->mWritePos) { + ASSERT_TRUE(pb->mEntries[readPos].mTagName == 't'); + // the first few tags were discarded when we wrapped + ASSERT_TRUE(pb->mEntries[readPos].mTagInt == ctr + (test_size - tags)); + ctr++; + readPos = (readPos + 1) % pb->mEntrySize; + } +} + diff --git a/tools/profiler/tests/gtest/moz.build b/tools/profiler/tests/gtest/moz.build new file mode 100644 index 000000000..33aded164 --- /dev/null +++ b/tools/profiler/tests/gtest/moz.build @@ -0,0 +1,30 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, you can obtain one at http://mozilla.org/MPL/2.0/. + +if CONFIG['OS_TARGET'] in ('Android', 'Linux'): + UNIFIED_SOURCES += [ + 'LulTestDwarf.cpp', + 'LulTestInfrastructure.cpp', + ] + if CONFIG['CPU_ARCH'] != 'x86': + UNIFIED_SOURCES += [ + 'LulTest.cpp', + ] + +LOCAL_INCLUDES += [ + '/tools/profiler/core', + '/tools/profiler/gecko', + '/tools/profiler/lul', +] + +UNIFIED_SOURCES += [ + 'ThreadProfileTest.cpp', +] + +FINAL_LIBRARY = 'xul-gtest' + +if CONFIG['GNU_CXX']: + CXXFLAGS += ['-Wno-error=shadow'] diff --git a/tools/profiler/tests/head_profiler.js b/tools/profiler/tests/head_profiler.js new file mode 100644 index 000000000..a3821f51f --- /dev/null +++ b/tools/profiler/tests/head_profiler.js @@ -0,0 +1,31 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const Cc = Components.classes; +const Ci = Components.interfaces; +const Cu = Components.utils; + +function getInflatedStackLocations(thread, sample) { + let stackTable = thread.stackTable; + let frameTable = thread.frameTable; + let stringTable = thread.stringTable; + let SAMPLE_STACK_SLOT = thread.samples.schema.stack; + let STACK_PREFIX_SLOT = stackTable.schema.prefix; + let STACK_FRAME_SLOT = stackTable.schema.frame; + let FRAME_LOCATION_SLOT = frameTable.schema.location; + + // Build the stack from the raw data and accumulate the locations in + // an array. + let stackIndex = sample[SAMPLE_STACK_SLOT]; + let locations = []; + while (stackIndex !== null) { + let stackEntry = stackTable.data[stackIndex]; + let frame = frameTable.data[stackEntry[STACK_FRAME_SLOT]]; + locations.push(stringTable[frame[FRAME_LOCATION_SLOT]]); + stackIndex = stackEntry[STACK_PREFIX_SLOT]; + } + + // The profiler tree is inverted, so reverse the array. + return locations.reverse(); +} diff --git a/tools/profiler/tests/test_asm.js b/tools/profiler/tests/test_asm.js new file mode 100644 index 000000000..4d273a559 --- /dev/null +++ b/tools/profiler/tests/test_asm.js @@ -0,0 +1,79 @@ +// Check that asm.js code shows up on the stack. +function run_test() { + let p = Cc["@mozilla.org/tools/profiler;1"]; + + // Just skip the test if the profiler component isn't present. + if (!p) + return; + p = p.getService(Ci.nsIProfiler); + if (!p) + return; + + // This test assumes that it's starting on an empty SPS stack. + // (Note that the other profiler tests also assume the profiler + // isn't already started.) + do_check_true(!p.IsActive()); + + let jsFuns = Cu.getJSTestingFunctions(); + if (!jsFuns.isAsmJSCompilationAvailable()) + return; + + const ms = 10; + p.StartProfiler(10000, ms, ["js"], 1); + + let stack = null; + function ffi_function(){ + var delayMS = 5; + while (1) { + let then = Date.now(); + do {} while (Date.now() - then < delayMS); + + var thread0 = p.getProfileData().threads[0]; + + if (delayMS > 30000) + return; + + delayMS *= 2; + + if (thread0.samples.data.length == 0) + continue; + + var lastSample = thread0.samples.data[thread0.samples.data.length - 1]; + stack = String(getInflatedStackLocations(thread0, lastSample)); + if (stack.indexOf("trampoline") !== -1) + return; + } + } + + function asmjs_module(global, ffis) { + "use asm"; + var ffi = ffis.ffi; + function asmjs_function() { + ffi(); + } + return asmjs_function; + } + + do_check_true(jsFuns.isAsmJSModule(asmjs_module)); + + var asmjs_function = asmjs_module(null, {ffi:ffi_function}); + do_check_true(jsFuns.isAsmJSFunction(asmjs_function)); + + asmjs_function(); + + do_check_neq(stack, null); + + var i1 = stack.indexOf("entry trampoline"); + do_check_true(i1 !== -1); + var i2 = stack.indexOf("asmjs_function"); + do_check_true(i2 !== -1); + var i3 = stack.indexOf("FFI trampoline"); + do_check_true(i3 !== -1); + var i4 = stack.indexOf("ffi_function"); + do_check_true(i4 !== -1); + do_check_true(i1 < i2); + do_check_true(i2 < i3); + do_check_true(i3 < i4); + + p.StopProfiler(); +} diff --git a/tools/profiler/tests/test_enterjit_osr.js b/tools/profiler/tests/test_enterjit_osr.js new file mode 100644 index 000000000..a4bca590f --- /dev/null +++ b/tools/profiler/tests/test_enterjit_osr.js @@ -0,0 +1,59 @@ +// Check that the EnterJIT frame, added by the JIT trampoline and +// usable by a native unwinder to resume unwinding after encountering +// JIT code, is pushed as expected. +function run_test() { + let p = Cc["@mozilla.org/tools/profiler;1"]; + // Just skip the test if the profiler component isn't present. + if (!p) + return; + p = p.getService(Ci.nsIProfiler); + if (!p) + return; + + // This test assumes that it's starting on an empty SPS stack. + // (Note that the other profiler tests also assume the profiler + // isn't already started.) + do_check_true(!p.IsActive()); + + const ms = 5; + p.StartProfiler(100, ms, ["js"], 1); + + function arbitrary_name(){ + // A frame for |arbitrary_name| has been pushed. Do a sequence of + // increasingly long spins until we get a sample. + var delayMS = 5; + while (1) { + do_print("loop: ms = " + delayMS); + let then = Date.now(); + do { + let n = 10000; + while (--n); // OSR happens here + // Spin in the hope of getting a sample. + } while (Date.now() - then < delayMS); + let pr = p.getProfileData().threads[0]; + if (pr.samples.data.length > 0 || delayMS > 30000) + return pr; + delayMS *= 2; + } + }; + + var profile = arbitrary_name(); + + do_check_neq(profile.samples.data.length, 0); + var lastSample = profile.samples.data[profile.samples.data.length - 1]; + var stack = getInflatedStackLocations(profile, lastSample); + do_print(stack); + + // All we can really check here is ensure that there is exactly + // one arbitrary_name frame in the list. + var gotName = false; + for (var i = 0; i < stack.length; i++) { + if (stack[i].match(/arbitrary_name/)) { + do_check_eq(gotName, false); + gotName = true; + } + } + do_check_eq(gotName, true); + + p.StopProfiler(); +} diff --git a/tools/profiler/tests/test_enterjit_osr_disabling.js b/tools/profiler/tests/test_enterjit_osr_disabling.js new file mode 100644 index 000000000..dbf74c93a --- /dev/null +++ b/tools/profiler/tests/test_enterjit_osr_disabling.js @@ -0,0 +1,21 @@ +function run_test() { + let p = Cc["@mozilla.org/tools/profiler;1"]; + // Just skip the test if the profiler component isn't present. + if (!p) + return; + p = p.getService(Ci.nsIProfiler); + if (!p) + return; + + do_check_true(!p.IsActive()); + + p.StartProfiler(100, 10, ["js"], 1); + // The function is entered with the profiler enabled + (function (){ + p.StopProfiler(); + let n = 10000; + while (--n); // OSR happens here with the profiler disabled. + // An assertion will fail when this function returns, if the + // SPS stack was misbalanced. + })(); +} diff --git a/tools/profiler/tests/test_enterjit_osr_enabling.js b/tools/profiler/tests/test_enterjit_osr_enabling.js new file mode 100644 index 000000000..ae696057b --- /dev/null +++ b/tools/profiler/tests/test_enterjit_osr_enabling.js @@ -0,0 +1,21 @@ +function run_test() { + let p = Cc["@mozilla.org/tools/profiler;1"]; + // Just skip the test if the profiler component isn't present. + if (!p) + return; + p = p.getService(Ci.nsIProfiler); + if (!p) + return; + + do_check_true(!p.IsActive()); + + // The function is entered with the profiler disabled. + (function (){ + p.StartProfiler(100, 10, ["js"], 1); + let n = 10000; + while (--n); // OSR happens here with the profiler enabled. + // An assertion will fail when this function returns, if the + // SPS stack was misbalanced. + })(); + p.StopProfiler(); +} diff --git a/tools/profiler/tests/test_get_features.js b/tools/profiler/tests/test_get_features.js new file mode 100644 index 000000000..4fbd5891c --- /dev/null +++ b/tools/profiler/tests/test_get_features.js @@ -0,0 +1,18 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function run_test() { + // If we can't get the profiler component then assume gecko was + // built without it and pass all the tests + var profilerCc = Cc["@mozilla.org/tools/profiler;1"]; + if (!profilerCc) + return; + + var profiler = Cc["@mozilla.org/tools/profiler;1"].getService(Ci.nsIProfiler); + if (!profiler) + return; + + var profilerFeatures = profiler.GetFeatures([]); + do_check_true(profilerFeatures != null); +} diff --git a/tools/profiler/tests/test_pause.js b/tools/profiler/tests/test_pause.js new file mode 100644 index 000000000..fedff70c4 --- /dev/null +++ b/tools/profiler/tests/test_pause.js @@ -0,0 +1,35 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function run_test() { + // If we can't get the profiler component then assume gecko was + // built without it and pass all the tests + var profilerCc = Cc["@mozilla.org/tools/profiler;1"]; + if (!profilerCc) + return; + + var profiler = profilerCc.getService(Ci.nsIProfiler); + if (!profiler) + return; + + do_check_true(!profiler.IsActive()); + do_check_true(!profiler.IsPaused()); + + profiler.StartProfiler(1000, 10, [], 0); + + do_check_true(profiler.IsActive()); + + profiler.PauseSampling(); + + do_check_true(profiler.IsPaused()); + + profiler.ResumeSampling(); + + do_check_true(!profiler.IsPaused()); + + profiler.StopProfiler(); + do_check_true(!profiler.IsActive()); + do_check_true(!profiler.IsPaused()); + do_test_finished(); +} diff --git a/tools/profiler/tests/test_run.js b/tools/profiler/tests/test_run.js new file mode 100644 index 000000000..fef03a07d --- /dev/null +++ b/tools/profiler/tests/test_run.js @@ -0,0 +1,44 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function run_test() { + // If we can't get the profiler component then assume gecko was + // built without it and pass all the tests + var profilerCc = Cc["@mozilla.org/tools/profiler;1"]; + if (!profilerCc) + return; + + var profiler = Cc["@mozilla.org/tools/profiler;1"].getService(Ci.nsIProfiler); + if (!profiler) + return; + + do_check_true(!profiler.IsActive()); + + profiler.StartProfiler(1000, 10, [], 0); + + do_check_true(profiler.IsActive()); + + do_test_pending(); + + do_timeout(1000, function wait() { + // Check text profile format + var profileStr = profiler.GetProfile(); + do_check_true(profileStr.length > 10); + + // check json profile format + var profileObj = profiler.getProfileData(); + do_check_neq(profileObj, null); + do_check_neq(profileObj.threads, null); + do_check_true(profileObj.threads.length >= 1); + do_check_neq(profileObj.threads[0].samples, null); + // NOTE: The number of samples will be empty since we + // don't have any labels in the xpcshell code + + profiler.StopProfiler(); + do_check_true(!profiler.IsActive()); + do_test_finished(); + }); + + +} diff --git a/tools/profiler/tests/test_shared_library.js b/tools/profiler/tests/test_shared_library.js new file mode 100644 index 000000000..2bdbc0109 --- /dev/null +++ b/tools/profiler/tests/test_shared_library.js @@ -0,0 +1,23 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function run_test() { + // If we can't get the profiler component then assume gecko was + // built without it and pass all the tests + var profilerCc = Cc["@mozilla.org/tools/profiler;1"]; + if (!profilerCc) + return; + + var profiler = Cc["@mozilla.org/tools/profiler;1"].getService(Ci.nsIProfiler); + if (!profiler) + return; + + var sharedStr = profiler.getSharedLibraryInformation(); + sharedStr = sharedStr.toLowerCase(); + + // Let's not hardcode anything too specific + // just some sanity checks. + do_check_neq(sharedStr, null); + do_check_neq(sharedStr, ""); +} diff --git a/tools/profiler/tests/test_start.js b/tools/profiler/tests/test_start.js new file mode 100644 index 000000000..b04b130ff --- /dev/null +++ b/tools/profiler/tests/test_start.js @@ -0,0 +1,25 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function run_test() { + // If we can't get the profiler component then assume gecko was + // built without it and pass all the tests + var profilerCc = Cc["@mozilla.org/tools/profiler;1"]; + if (!profilerCc) + return; + + var profiler = Cc["@mozilla.org/tools/profiler;1"].getService(Ci.nsIProfiler); + if (!profiler) + return; + + do_check_true(!profiler.IsActive()); + + profiler.StartProfiler(10, 100, [], 0); + + do_check_true(profiler.IsActive()); + + profiler.StopProfiler(); + + do_check_true(!profiler.IsActive()); +} diff --git a/tools/profiler/tests/xpcshell.ini b/tools/profiler/tests/xpcshell.ini new file mode 100644 index 000000000..997a7c142 --- /dev/null +++ b/tools/profiler/tests/xpcshell.ini @@ -0,0 +1,18 @@ +[DEFAULT] +head = head_profiler.js +tail = +skip-if = toolkit == 'android' + +[test_start.js] +skip-if = true +[test_get_features.js] +[test_shared_library.js] +[test_run.js] +skip-if = true +[test_pause.js] +[test_enterjit_osr.js] +[test_enterjit_osr_disabling.js] +skip-if = !debug +[test_enterjit_osr_enabling.js] +skip-if = !debug +[test_asm.js]
\ No newline at end of file |