diff options
Diffstat (limited to 'tools/profiler/lul/LulMain.cpp')
-rw-r--r-- | tools/profiler/lul/LulMain.cpp | 1963 |
1 files changed, 0 insertions, 1963 deletions
diff --git a/tools/profiler/lul/LulMain.cpp b/tools/profiler/lul/LulMain.cpp deleted file mode 100644 index 2e78f03ec..000000000 --- a/tools/profiler/lul/LulMain.cpp +++ /dev/null @@ -1,1963 +0,0 @@ -/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* vim: set ts=8 sts=2 et sw=2 tw=80: */ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "LulMain.h" - -#include <string.h> -#include <stdlib.h> -#include <stdio.h> - -#include <algorithm> // std::sort -#include <string> - -#include "mozilla/Assertions.h" -#include "mozilla/ArrayUtils.h" -#include "mozilla/CheckedInt.h" -#include "mozilla/DebugOnly.h" -#include "mozilla/MemoryChecking.h" -#include "mozilla/Sprintf.h" - -#include "LulCommonExt.h" -#include "LulElfExt.h" - -#include "LulMainInt.h" - -#include "platform-linux-lul.h" // for gettid() - -// Set this to 1 for verbose logging -#define DEBUG_MAIN 0 - -namespace lul { - -using std::string; -using std::vector; -using std::pair; -using mozilla::CheckedInt; -using mozilla::DebugOnly; - - -// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING -// -// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT. -// Any such function -- and, hence, the transitive closure of those -// reachable from it -- must not do any dynamic memory allocation. -// Doing so risks deadlock. There is exactly one root function for -// the transitive closure: Lul::Unwind. -// -// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING - - -//////////////////////////////////////////////////////////////// -// RuleSet // -//////////////////////////////////////////////////////////////// - -static const char* -NameOf_DW_REG(int16_t aReg) -{ - switch (aReg) { - case DW_REG_CFA: return "cfa"; -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - case DW_REG_INTEL_XBP: return "xbp"; - case DW_REG_INTEL_XSP: return "xsp"; - case DW_REG_INTEL_XIP: return "xip"; -#elif defined(LUL_ARCH_arm) - case DW_REG_ARM_R7: return "r7"; - case DW_REG_ARM_R11: return "r11"; - case DW_REG_ARM_R12: return "r12"; - case DW_REG_ARM_R13: return "r13"; - case DW_REG_ARM_R14: return "r14"; - case DW_REG_ARM_R15: return "r15"; -#else -# error "Unsupported arch" -#endif - default: return "???"; - } -} - -string -LExpr::ShowRule(const char* aNewReg) const -{ - char buf[64]; - string res = string(aNewReg) + "="; - switch (mHow) { - case UNKNOWN: - res += "Unknown"; - break; - case NODEREF: - SprintfLiteral(buf, "%s+%d", - NameOf_DW_REG(mReg), (int)mOffset); - res += buf; - break; - case DEREF: - SprintfLiteral(buf, "*(%s+%d)", - NameOf_DW_REG(mReg), (int)mOffset); - res += buf; - break; - case PFXEXPR: - SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset); - res += buf; - break; - default: - res += "???"; - break; - } - return res; -} - -void -RuleSet::Print(void(*aLog)(const char*)) const -{ - char buf[96]; - SprintfLiteral(buf, "[%llx .. %llx]: let ", - (unsigned long long int)mAddr, - (unsigned long long int)(mAddr + mLen - 1)); - string res = string(buf); - res += mCfaExpr.ShowRule("cfa"); - res += " in"; - // For each reg we care about, print the recovery expression. -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - res += mXipExpr.ShowRule(" RA"); - res += mXspExpr.ShowRule(" SP"); - res += mXbpExpr.ShowRule(" BP"); -#elif defined(LUL_ARCH_arm) - res += mR15expr.ShowRule(" R15"); - res += mR7expr .ShowRule(" R7" ); - res += mR11expr.ShowRule(" R11"); - res += mR12expr.ShowRule(" R12"); - res += mR13expr.ShowRule(" R13"); - res += mR14expr.ShowRule(" R14"); -#else -# error "Unsupported arch" -#endif - aLog(res.c_str()); -} - -LExpr* -RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) { - switch (aRegno) { - case DW_REG_CFA: return &mCfaExpr; -# if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - case DW_REG_INTEL_XIP: return &mXipExpr; - case DW_REG_INTEL_XSP: return &mXspExpr; - case DW_REG_INTEL_XBP: return &mXbpExpr; -# elif defined(LUL_ARCH_arm) - case DW_REG_ARM_R15: return &mR15expr; - case DW_REG_ARM_R14: return &mR14expr; - case DW_REG_ARM_R13: return &mR13expr; - case DW_REG_ARM_R12: return &mR12expr; - case DW_REG_ARM_R11: return &mR11expr; - case DW_REG_ARM_R7: return &mR7expr; -# else -# error "Unknown arch" -# endif - default: return nullptr; - } -} - -RuleSet::RuleSet() -{ - mAddr = 0; - mLen = 0; - // The only other fields are of type LExpr and those are initialised - // by LExpr::LExpr(). -} - - -//////////////////////////////////////////////////////////////// -// SecMap // -//////////////////////////////////////////////////////////////// - -// See header file LulMainInt.h for comments about invariants. - -SecMap::SecMap(void(*aLog)(const char*)) - : mSummaryMinAddr(1) - , mSummaryMaxAddr(0) - , mUsable(true) - , mLog(aLog) -{} - -SecMap::~SecMap() { - mRuleSets.clear(); -} - -// RUNS IN NO-MALLOC CONTEXT -RuleSet* -SecMap::FindRuleSet(uintptr_t ia) { - // Binary search mRuleSets to find one that brackets |ia|. - // lo and hi need to be signed, else the loop termination tests - // don't work properly. Note that this works correctly even when - // mRuleSets.size() == 0. - - // Can't do this until the array has been sorted and preened. - MOZ_ASSERT(mUsable); - - long int lo = 0; - long int hi = (long int)mRuleSets.size() - 1; - while (true) { - // current unsearched space is from lo to hi, inclusive. - if (lo > hi) { - // not found - return nullptr; - } - long int mid = lo + ((hi - lo) / 2); - RuleSet* mid_ruleSet = &mRuleSets[mid]; - uintptr_t mid_minAddr = mid_ruleSet->mAddr; - uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1; - if (ia < mid_minAddr) { hi = mid-1; continue; } - if (ia > mid_maxAddr) { lo = mid+1; continue; } - MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); - return mid_ruleSet; - } - // NOTREACHED -} - -// Add a RuleSet to the collection. The rule is copied in. Calling -// this makes the map non-searchable. -void -SecMap::AddRuleSet(const RuleSet* rs) { - mUsable = false; - mRuleSets.push_back(*rs); -} - -// Add a PfxInstr to the vector of such instrs, and return the index -// in the vector. Calling this makes the map non-searchable. -uint32_t -SecMap::AddPfxInstr(PfxInstr pfxi) { - mUsable = false; - mPfxInstrs.push_back(pfxi); - return mPfxInstrs.size() - 1; -} - - -static bool -CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) { - return rs1.mAddr < rs2.mAddr; -} - -// Prepare the map for searching. Completely remove any which don't -// fall inside the specified range [start, +len). -void -SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen) -{ - if (mRuleSets.empty()) { - return; - } - - MOZ_ASSERT(aLen > 0); - if (aLen == 0) { - // This should never happen. - mRuleSets.clear(); - return; - } - - // Sort by start addresses. - std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE); - - // Detect any entry not completely contained within [start, +len). - // Set its length to zero, so that the next pass will remove it. - for (size_t i = 0; i < mRuleSets.size(); ++i) { - RuleSet* rs = &mRuleSets[i]; - if (rs->mLen > 0 && - (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) { - rs->mLen = 0; - } - } - - // Iteratively truncate any overlaps and remove any zero length - // entries that might result, or that may have been present - // initially. Unless the input is seriously screwy, this is - // expected to iterate only once. - while (true) { - size_t i; - size_t n = mRuleSets.size(); - size_t nZeroLen = 0; - - if (n == 0) { - break; - } - - for (i = 1; i < n; ++i) { - RuleSet* prev = &mRuleSets[i-1]; - RuleSet* here = &mRuleSets[i]; - MOZ_ASSERT(prev->mAddr <= here->mAddr); - if (prev->mAddr + prev->mLen > here->mAddr) { - prev->mLen = here->mAddr - prev->mAddr; - } - if (prev->mLen == 0) - nZeroLen++; - } - - if (mRuleSets[n-1].mLen == 0) { - nZeroLen++; - } - - // At this point, the entries are in-order and non-overlapping. - // If none of them are zero-length, we are done. - if (nZeroLen == 0) { - break; - } - - // Slide back the entries to remove the zero length ones. - size_t j = 0; // The write-point. - for (i = 0; i < n; ++i) { - if (mRuleSets[i].mLen == 0) { - continue; - } - if (j != i) mRuleSets[j] = mRuleSets[i]; - ++j; - } - MOZ_ASSERT(i == n); - MOZ_ASSERT(nZeroLen <= n); - MOZ_ASSERT(j == n - nZeroLen); - while (nZeroLen > 0) { - mRuleSets.pop_back(); - nZeroLen--; - } - - MOZ_ASSERT(mRuleSets.size() == j); - } - - size_t n = mRuleSets.size(); - -#ifdef DEBUG - // Do a final check on the rules: their address ranges must be - // ascending, non overlapping, non zero sized. - if (n > 0) { - MOZ_ASSERT(mRuleSets[0].mLen > 0); - for (size_t i = 1; i < n; ++i) { - RuleSet* prev = &mRuleSets[i-1]; - RuleSet* here = &mRuleSets[i]; - MOZ_ASSERT(prev->mAddr < here->mAddr); - MOZ_ASSERT(here->mLen > 0); - MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr); - } - } -#endif - - // Set the summary min and max address values. - if (n == 0) { - // Use the values defined in comments in the class declaration. - mSummaryMinAddr = 1; - mSummaryMaxAddr = 0; - } else { - mSummaryMinAddr = mRuleSets[0].mAddr; - mSummaryMaxAddr = mRuleSets[n-1].mAddr + mRuleSets[n-1].mLen - 1; - } - char buf[150]; - SprintfLiteral(buf, - "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n", - (int)n, (unsigned long long int)mSummaryMinAddr, - (unsigned long long int)mSummaryMaxAddr); - buf[sizeof(buf)-1] = 0; - mLog(buf); - - // Is now usable for binary search. - mUsable = true; - - if (0) { - mLog("\nRulesets after preening\n"); - for (size_t i = 0; i < mRuleSets.size(); ++i) { - mRuleSets[i].Print(mLog); - mLog("\n"); - } - mLog("\n"); - } -} - -bool SecMap::IsEmpty() { - return mRuleSets.empty(); -} - - -//////////////////////////////////////////////////////////////// -// SegArray // -//////////////////////////////////////////////////////////////// - -// A SegArray holds a set of address ranges that together exactly -// cover an address range, with no overlaps or holes. Each range has -// an associated value, which in this case has been specialised to be -// a simple boolean. The representation is kept to minimal canonical -// form in which adjacent ranges with the same associated value are -// merged together. Each range is represented by a |struct Seg|. -// -// SegArrays are used to keep track of which parts of the address -// space are known to contain instructions. -class SegArray { - - public: - void add(uintptr_t lo, uintptr_t hi, bool val) { - if (lo > hi) { - return; - } - split_at(lo); - if (hi < UINTPTR_MAX) { - split_at(hi+1); - } - std::vector<Seg>::size_type iLo, iHi, i; - iLo = find(lo); - iHi = find(hi); - for (i = iLo; i <= iHi; ++i) { - mSegs[i].val = val; - } - preen(); - } - - // RUNS IN NO-MALLOC CONTEXT - bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min, - /*OUT*/uintptr_t* rx_max, uintptr_t addr) { - std::vector<Seg>::size_type i = find(addr); - if (!mSegs[i].val) { - return false; - } - *rx_min = mSegs[i].lo; - *rx_max = mSegs[i].hi; - return true; - } - - SegArray() { - Seg s(0, UINTPTR_MAX, false); - mSegs.push_back(s); - } - - private: - struct Seg { - Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {} - uintptr_t lo; - uintptr_t hi; - bool val; - }; - - void preen() { - for (std::vector<Seg>::iterator iter = mSegs.begin(); - iter < mSegs.end()-1; - ++iter) { - if (iter[0].val != iter[1].val) { - continue; - } - iter[0].hi = iter[1].hi; - mSegs.erase(iter+1); - // Back up one, so as not to miss an opportunity to merge - // with the entry after this one. - --iter; - } - } - - // RUNS IN NO-MALLOC CONTEXT - std::vector<Seg>::size_type find(uintptr_t a) { - long int lo = 0; - long int hi = (long int)mSegs.size(); - while (true) { - // The unsearched space is lo .. hi inclusive. - if (lo > hi) { - // Not found. This can't happen. - return (std::vector<Seg>::size_type)(-1); - } - long int mid = lo + ((hi - lo) / 2); - uintptr_t mid_lo = mSegs[mid].lo; - uintptr_t mid_hi = mSegs[mid].hi; - if (a < mid_lo) { hi = mid-1; continue; } - if (a > mid_hi) { lo = mid+1; continue; } - return (std::vector<Seg>::size_type)mid; - } - } - - void split_at(uintptr_t a) { - std::vector<Seg>::size_type i = find(a); - if (mSegs[i].lo == a) { - return; - } - mSegs.insert( mSegs.begin()+i+1, mSegs[i] ); - mSegs[i].hi = a-1; - mSegs[i+1].lo = a; - } - - void show() { - printf("<< %d entries:\n", (int)mSegs.size()); - for (std::vector<Seg>::iterator iter = mSegs.begin(); - iter < mSegs.end(); - ++iter) { - printf(" %016llx %016llx %s\n", - (unsigned long long int)(*iter).lo, - (unsigned long long int)(*iter).hi, - (*iter).val ? "true" : "false"); - } - printf(">>\n"); - } - - std::vector<Seg> mSegs; -}; - - -//////////////////////////////////////////////////////////////// -// PriMap // -//////////////////////////////////////////////////////////////// - -class PriMap { - public: - explicit PriMap(void (*aLog)(const char*)) - : mLog(aLog) - {} - - ~PriMap() { - for (std::vector<SecMap*>::iterator iter = mSecMaps.begin(); - iter != mSecMaps.end(); - ++iter) { - delete *iter; - } - mSecMaps.clear(); - } - - // RUNS IN NO-MALLOC CONTEXT - pair<const RuleSet*, const vector<PfxInstr>*> - Lookup(uintptr_t ia) - { - SecMap* sm = FindSecMap(ia); - return pair<const RuleSet*, const vector<PfxInstr>*> - (sm ? sm->FindRuleSet(ia) : nullptr, - sm ? sm->GetPfxInstrs() : nullptr); - } - - // Add a secondary map. No overlaps allowed w.r.t. existing - // secondary maps. - void AddSecMap(SecMap* aSecMap) { - // We can't add an empty SecMap to the PriMap. But that's OK - // since we'd never be able to find anything in it anyway. - if (aSecMap->IsEmpty()) { - return; - } - - // Iterate through the SecMaps and find the right place for this - // one. At the same time, ensure that the in-order - // non-overlapping invariant is preserved (and, generally, holds). - // FIXME: this gives a cost that is O(N^2) in the total number of - // shared objects in the system. ToDo: better. - MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr); - - size_t num_secMaps = mSecMaps.size(); - uintptr_t i; - for (i = 0; i < num_secMaps; ++i) { - SecMap* sm_i = mSecMaps[i]; - MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr); - if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) { - // |aSecMap| needs to be inserted immediately before mSecMaps[i]. - break; - } - } - MOZ_ASSERT(i <= num_secMaps); - if (i == num_secMaps) { - // It goes at the end. - mSecMaps.push_back(aSecMap); - } else { - std::vector<SecMap*>::iterator iter = mSecMaps.begin() + i; - mSecMaps.insert(iter, aSecMap); - } - char buf[100]; - SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n", - (int)mSecMaps.size()); - buf[sizeof(buf)-1] = 0; - mLog(buf); - } - - // Remove and delete any SecMaps in the mapping, that intersect - // with the specified address range. - void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) { - MOZ_ASSERT(avma_min <= avma_max); - size_t num_secMaps = mSecMaps.size(); - if (num_secMaps > 0) { - intptr_t i; - // Iterate from end to start over the vector, so as to ensure - // that the special case where |avma_min| and |avma_max| denote - // the entire address space, can be completed in time proportional - // to the number of elements in the map. - for (i = (intptr_t)num_secMaps-1; i >= 0; i--) { - SecMap* sm_i = mSecMaps[i]; - if (sm_i->mSummaryMaxAddr < avma_min || - avma_max < sm_i->mSummaryMinAddr) { - // There's no overlap. Move on. - continue; - } - // We need to remove mSecMaps[i] and slide all those above it - // downwards to cover the hole. - mSecMaps.erase(mSecMaps.begin() + i); - delete sm_i; - } - } - } - - // Return the number of currently contained SecMaps. - size_t CountSecMaps() { - return mSecMaps.size(); - } - - // Assess heuristically whether the given address is an instruction - // immediately following a call instruction. - // RUNS IN NO-MALLOC CONTEXT - bool MaybeIsReturnPoint(TaggedUWord aInstrAddr, SegArray* aSegArray) { - if (!aInstrAddr.Valid()) { - return false; - } - - uintptr_t ia = aInstrAddr.Value(); - - // Assume that nobody would be crazy enough to put code in the - // first or last page. - if (ia < 4096 || ((uintptr_t)(-ia)) < 4096) { - return false; - } - - // See if it falls inside a known r-x mapped area. Poking around - // outside such places risks segfaulting. - uintptr_t insns_min, insns_max; - bool b = aSegArray->getBoundingCodeSegment(&insns_min, &insns_max, ia); - if (!b) { - // no code (that we know about) at this address - return false; - } - - // |ia| falls within an r-x range. So we can - // safely poke around in [insns_min, insns_max]. - -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - // Is the previous instruction recognisably a CALL? This is - // common for the 32- and 64-bit versions, except for the - // simm32(%rip) case, which is 64-bit only. - // - // For all other cases, the 64 bit versions are either identical - // to the 32 bit versions, or have an optional extra leading REX.W - // byte (0x41). Since the extra 0x41 is optional we have to - // ignore it, with the convenient result that the same matching - // logic works for both 32- and 64-bit cases. - - uint8_t* p = (uint8_t*)ia; -# if defined(LUL_ARCH_x64) - // CALL simm32(%rip) == FF15 simm32 - if (ia - 6 >= insns_min && p[-6] == 0xFF && p[-5] == 0x15) { - return true; - } -# endif - // CALL rel32 == E8 rel32 (both 32- and 64-bit) - if (ia - 5 >= insns_min && p[-5] == 0xE8) { - return true; - } - // CALL *%eax .. CALL *%edi == FFD0 .. FFD7 (32-bit) - // CALL *%rax .. CALL *%rdi == FFD0 .. FFD7 (64-bit) - // CALL *%r8 .. CALL *%r15 == 41FFD0 .. 41FFD7 (64-bit) - if (ia - 2 >= insns_min && - p[-2] == 0xFF && p[-1] >= 0xD0 && p[-1] <= 0xD7) { - return true; - } - // Almost all of the remaining cases that occur in practice are - // of the form CALL *simm8(reg) or CALL *simm32(reg). - // - // 64 bit cases: - // - // call *simm8(%rax) FF50 simm8 - // call *simm8(%rcx) FF51 simm8 - // call *simm8(%rdx) FF52 simm8 - // call *simm8(%rbx) FF53 simm8 - // call *simm8(%rsp) FF5424 simm8 - // call *simm8(%rbp) FF55 simm8 - // call *simm8(%rsi) FF56 simm8 - // call *simm8(%rdi) FF57 simm8 - // - // call *simm8(%r8) 41FF50 simm8 - // call *simm8(%r9) 41FF51 simm8 - // call *simm8(%r10) 41FF52 simm8 - // call *simm8(%r11) 41FF53 simm8 - // call *simm8(%r12) 41FF5424 simm8 - // call *simm8(%r13) 41FF55 simm8 - // call *simm8(%r14) 41FF56 simm8 - // call *simm8(%r15) 41FF57 simm8 - // - // call *simm32(%rax) FF90 simm32 - // call *simm32(%rcx) FF91 simm32 - // call *simm32(%rdx) FF92 simm32 - // call *simm32(%rbx) FF93 simm32 - // call *simm32(%rsp) FF9424 simm32 - // call *simm32(%rbp) FF95 simm32 - // call *simm32(%rsi) FF96 simm32 - // call *simm32(%rdi) FF97 simm32 - // - // call *simm32(%r8) 41FF90 simm32 - // call *simm32(%r9) 41FF91 simm32 - // call *simm32(%r10) 41FF92 simm32 - // call *simm32(%r11) 41FF93 simm32 - // call *simm32(%r12) 41FF9424 simm32 - // call *simm32(%r13) 41FF95 simm32 - // call *simm32(%r14) 41FF96 simm32 - // call *simm32(%r15) 41FF97 simm32 - // - // 32 bit cases: - // - // call *simm8(%eax) FF50 simm8 - // call *simm8(%ecx) FF51 simm8 - // call *simm8(%edx) FF52 simm8 - // call *simm8(%ebx) FF53 simm8 - // call *simm8(%esp) FF5424 simm8 - // call *simm8(%ebp) FF55 simm8 - // call *simm8(%esi) FF56 simm8 - // call *simm8(%edi) FF57 simm8 - // - // call *simm32(%eax) FF90 simm32 - // call *simm32(%ecx) FF91 simm32 - // call *simm32(%edx) FF92 simm32 - // call *simm32(%ebx) FF93 simm32 - // call *simm32(%esp) FF9424 simm32 - // call *simm32(%ebp) FF95 simm32 - // call *simm32(%esi) FF96 simm32 - // call *simm32(%edi) FF97 simm32 - if (ia - 3 >= insns_min && - p[-3] == 0xFF && - (p[-2] >= 0x50 && p[-2] <= 0x57 && p[-2] != 0x54)) { - // imm8 case, not including %esp/%rsp - return true; - } - if (ia - 4 >= insns_min && - p[-4] == 0xFF && p[-3] == 0x54 && p[-2] == 0x24) { - // imm8 case for %esp/%rsp - return true; - } - if (ia - 6 >= insns_min && - p[-6] == 0xFF && - (p[-5] >= 0x90 && p[-5] <= 0x97 && p[-5] != 0x94)) { - // imm32 case, not including %esp/%rsp - return true; - } - if (ia - 7 >= insns_min && - p[-7] == 0xFF && p[-6] == 0x94 && p[-5] == 0x24) { - // imm32 case for %esp/%rsp - return true; - } - -#elif defined(LUL_ARCH_arm) - if (ia & 1) { - uint16_t w0 = 0, w1 = 0; - // The return address has its lowest bit set, indicating a return - // to Thumb code. - ia &= ~(uintptr_t)1; - if (ia - 2 >= insns_min && ia - 1 <= insns_max) { - w1 = *(uint16_t*)(ia - 2); - } - if (ia - 4 >= insns_min && ia - 1 <= insns_max) { - w0 = *(uint16_t*)(ia - 4); - } - // Is it a 32-bit Thumb call insn? - // BL simm26 (Encoding T1) - if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) { - return true; - } - // BLX simm26 (Encoding T2) - if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) { - return true; - } - // Other possible cases: - // (BLX Rm, Encoding T1). - // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.) - // 0100 0111 1 Rm 000 - } else { - // Returning to ARM code. - uint32_t a0 = 0; - if ((ia & 3) == 0 && ia - 4 >= insns_min && ia - 1 <= insns_max) { - a0 = *(uint32_t*)(ia - 4); - } - // Leading E forces unconditional only -- fix. It could be - // anything except F, which is the deprecated NV code. - // BL simm26 (Encoding A1) - if ((a0 & 0xFF000000) == 0xEB000000) { - return true; - } - // Other possible cases: - // BLX simm26 (Encoding A2) - //if ((a0 & 0xFE000000) == 0xFA000000) - // return true; - // BLX (register) (A1): BLX <c> <Rm> - // cond 0001 0010 1111 1111 1111 0011 Rm - // again, cond can be anything except NV (0xF) - } - -#else -# error "Unsupported arch" -#endif - - // Not an insn we recognise. - return false; - } - - private: - // RUNS IN NO-MALLOC CONTEXT - SecMap* FindSecMap(uintptr_t ia) { - // Binary search mSecMaps to find one that brackets |ia|. - // lo and hi need to be signed, else the loop termination tests - // don't work properly. - long int lo = 0; - long int hi = (long int)mSecMaps.size() - 1; - while (true) { - // current unsearched space is from lo to hi, inclusive. - if (lo > hi) { - // not found - return nullptr; - } - long int mid = lo + ((hi - lo) / 2); - SecMap* mid_secMap = mSecMaps[mid]; - uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr; - uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr; - if (ia < mid_minAddr) { hi = mid-1; continue; } - if (ia > mid_maxAddr) { lo = mid+1; continue; } - MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr); - return mid_secMap; - } - // NOTREACHED - } - - private: - // sorted array of per-object ranges, non overlapping, non empty - std::vector<SecMap*> mSecMaps; - - // a logging sink, for debugging. - void (*mLog)(const char*); -}; - - -//////////////////////////////////////////////////////////////// -// LUL // -//////////////////////////////////////////////////////////////// - -#define LUL_LOG(_str) \ - do { \ - char buf[200]; \ - SprintfLiteral(buf, \ - "LUL: pid %d tid %d lul-obj %p: %s", \ - getpid(), gettid(), this, (_str)); \ - buf[sizeof(buf)-1] = 0; \ - mLog(buf); \ - } while (0) - -LUL::LUL(void (*aLog)(const char*)) - : mLog(aLog) - , mAdminMode(true) - , mAdminThreadId(gettid()) - , mPriMap(new PriMap(aLog)) - , mSegArray(new SegArray()) - , mUSU(new UniqueStringUniverse()) -{ - LUL_LOG("LUL::LUL: Created object"); -} - - -LUL::~LUL() -{ - LUL_LOG("LUL::~LUL: Destroyed object"); - delete mPriMap; - delete mSegArray; - mLog = nullptr; - delete mUSU; -} - - -void -LUL::MaybeShowStats() -{ - // This is racey in the sense that it can't guarantee that - // n_new == n_new_Context + n_new_CFI + n_new_Scanned - // if it should happen that mStats is updated by some other thread - // in between computation of n_new and n_new_{Context,CFI,Scanned}. - // But it's just stats printing, so we don't really care. - uint32_t n_new = mStats - mStatsPrevious; - if (n_new >= 5000) { - uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext; - uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI; - uint32_t n_new_Scanned = mStats.mScanned - mStatsPrevious.mScanned; - mStatsPrevious = mStats; - char buf[200]; - SprintfLiteral(buf, - "LUL frame stats: TOTAL %5u" - " CTX %4u CFI %4u SCAN %4u", - n_new, n_new_Context, n_new_CFI, n_new_Scanned); - buf[sizeof(buf)-1] = 0; - mLog(buf); - } -} - - -void -LUL::EnableUnwinding() -{ - LUL_LOG("LUL::EnableUnwinding"); - // Don't assert for Admin mode here. That is, tolerate a call here - // if we are already in Unwinding mode. - MOZ_ASSERT(gettid() == mAdminThreadId); - - mAdminMode = false; -} - - -void -LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, - const char* aFileName, const void* aMappedImage) -{ - MOZ_ASSERT(mAdminMode); - MOZ_ASSERT(gettid() == mAdminThreadId); - - mLog(":\n"); - char buf[200]; - SprintfLiteral(buf, "NotifyMap %llx %llu %s\n", - (unsigned long long int)aRXavma, (unsigned long long int)aSize, - aFileName); - buf[sizeof(buf)-1] = 0; - mLog(buf); - - // Ignore obviously-stupid notifications. - if (aSize > 0) { - - // Here's a new mapping, for this object. - SecMap* smap = new SecMap(mLog); - - // Read CFI or EXIDX unwind data into |smap|. - if (!aMappedImage) { - (void)lul::ReadSymbolData( - string(aFileName), std::vector<string>(), smap, - (void*)aRXavma, aSize, mUSU, mLog); - } else { - (void)lul::ReadSymbolDataInternal( - (const uint8_t*)aMappedImage, - string(aFileName), std::vector<string>(), smap, - (void*)aRXavma, aSize, mUSU, mLog); - } - - mLog("NotifyMap .. preparing entries\n"); - - smap->PrepareRuleSets(aRXavma, aSize); - - SprintfLiteral(buf, - "NotifyMap got %lld entries\n", (long long int)smap->Size()); - buf[sizeof(buf)-1] = 0; - mLog(buf); - - // Add it to the primary map (the top level set of mapped objects). - mPriMap->AddSecMap(smap); - - // Tell the segment array about the mapping, so that the stack - // scan and __kernel_syscall mechanisms know where valid code is. - mSegArray->add(aRXavma, aRXavma + aSize - 1, true); - } -} - - -void -LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) -{ - MOZ_ASSERT(mAdminMode); - MOZ_ASSERT(gettid() == mAdminThreadId); - - mLog(":\n"); - char buf[200]; - SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n", - (unsigned long long int)aRXavma, (unsigned long long int)aSize); - buf[sizeof(buf)-1] = 0; - mLog(buf); - - // Ignore obviously-stupid notifications. - if (aSize > 0) { - // Tell the segment array about the mapping, so that the stack - // scan and __kernel_syscall mechanisms know where valid code is. - mSegArray->add(aRXavma, aRXavma + aSize - 1, true); - } -} - - -void -LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) -{ - MOZ_ASSERT(mAdminMode); - MOZ_ASSERT(gettid() == mAdminThreadId); - - mLog(":\n"); - char buf[100]; - SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n", - (unsigned long long int)aRXavmaMin, - (unsigned long long int)aRXavmaMax); - buf[sizeof(buf)-1] = 0; - mLog(buf); - - MOZ_ASSERT(aRXavmaMin <= aRXavmaMax); - - // Remove from the primary map, any secondary maps that intersect - // with the address range. Also delete the secondary maps. - mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax); - - // Tell the segment array that the address range no longer - // contains valid code. - mSegArray->add(aRXavmaMin, aRXavmaMax, false); - - SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n", - (int)mPriMap->CountSecMaps()); - buf[sizeof(buf)-1] = 0; - mLog(buf); -} - - -size_t -LUL::CountMappings() -{ - MOZ_ASSERT(mAdminMode); - MOZ_ASSERT(gettid() == mAdminThreadId); - - return mPriMap->CountSecMaps(); -} - - -// RUNS IN NO-MALLOC CONTEXT -static -TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) -{ - if (!aAddr.Valid()) { - return TaggedUWord(); - } - - // Lower limit check. |aAddr.Value()| is the lowest requested address - // and |aStackImg->mStartAvma| is the lowest address we actually have, - // so the comparison is straightforward. - if (aAddr.Value() < aStackImg->mStartAvma) { - return TaggedUWord(); - } - - // Upper limit check. We must compute the highest requested address - // and the highest address we actually have, but being careful to - // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the - // 3/7 values below that, then we will get overflow. See bug #1245477. - typedef CheckedInt<uintptr_t> CheckedUWord; - CheckedUWord highest_requested_plus_one - = CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t)); - CheckedUWord highest_available_plus_one - = CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen); - if (!highest_requested_plus_one.isValid() // overflow? - || !highest_available_plus_one.isValid() // overflow? - || (highest_requested_plus_one.value() - > highest_available_plus_one.value())) { // in range? - return TaggedUWord(); - } - - return TaggedUWord(*(uintptr_t*)(aStackImg->mContents + aAddr.Value() - - aStackImg->mStartAvma)); -} - -// RUNS IN NO-MALLOC CONTEXT -static -TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs, - TaggedUWord aCFA) -{ - switch (aReg) { - case DW_REG_CFA: return aCFA; -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - case DW_REG_INTEL_XBP: return aOldRegs->xbp; - case DW_REG_INTEL_XSP: return aOldRegs->xsp; - case DW_REG_INTEL_XIP: return aOldRegs->xip; -#elif defined(LUL_ARCH_arm) - case DW_REG_ARM_R7: return aOldRegs->r7; - case DW_REG_ARM_R11: return aOldRegs->r11; - case DW_REG_ARM_R12: return aOldRegs->r12; - case DW_REG_ARM_R13: return aOldRegs->r13; - case DW_REG_ARM_R14: return aOldRegs->r14; - case DW_REG_ARM_R15: return aOldRegs->r15; -#else -# error "Unsupported arch" -#endif - default: MOZ_ASSERT(0); return TaggedUWord(); - } -} - -// RUNS IN NO-MALLOC CONTEXT -// See prototype for comment. -TaggedUWord EvaluatePfxExpr(int32_t start, - const UnwindRegs* aOldRegs, - TaggedUWord aCFA, const StackImage* aStackImg, - const vector<PfxInstr>& aPfxInstrs) -{ - // A small evaluation stack, and a stack pointer, which points to - // the highest numbered in-use element. - const int N_STACK = 10; - TaggedUWord stack[N_STACK]; - int stackPointer = -1; - for (int i = 0; i < N_STACK; i++) - stack[i] = TaggedUWord(); - -# define PUSH(_tuw) \ - do { \ - if (stackPointer >= N_STACK-1) goto fail; /* overflow */ \ - stack[++stackPointer] = (_tuw); \ - } while (0) - -# define POP(_lval) \ - do { \ - if (stackPointer < 0) goto fail; /* underflow */ \ - _lval = stack[stackPointer--]; \ - } while (0) - - // Cursor in the instruction sequence. - size_t curr = start + 1; - - // Check the start point is sane. - size_t nInstrs = aPfxInstrs.size(); - if (start < 0 || (size_t)start >= nInstrs) - goto fail; - - { - // The instruction sequence must start with PX_Start. If not, - // something is seriously wrong. - PfxInstr first = aPfxInstrs[start]; - if (first.mOpcode != PX_Start) - goto fail; - - // Push the CFA on the stack to start with (or not), as required by - // the original DW_OP_*expression* CFI. - if (first.mOperand != 0) - PUSH(aCFA); - } - - while (true) { - if (curr >= nInstrs) - goto fail; // ran off the end of the sequence - - PfxInstr pfxi = aPfxInstrs[curr++]; - if (pfxi.mOpcode == PX_End) - break; // we're done - - switch (pfxi.mOpcode) { - case PX_Start: - // This should appear only at the start of the sequence. - goto fail; - case PX_End: - // We just took care of that, so we shouldn't see it again. - MOZ_ASSERT(0); - goto fail; - case PX_SImm32: - PUSH(TaggedUWord((intptr_t)pfxi.mOperand)); - break; - case PX_DwReg: { - DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand; - MOZ_ASSERT(reg != DW_REG_CFA); - PUSH(EvaluateReg(reg, aOldRegs, aCFA)); - break; - } - case PX_Deref: { - TaggedUWord addr; - POP(addr); - PUSH(DerefTUW(addr, aStackImg)); - break; - } - case PX_Add: { - TaggedUWord x, y; - POP(x); POP(y); PUSH(y + x); - break; - } - case PX_Sub: { - TaggedUWord x, y; - POP(x); POP(y); PUSH(y - x); - break; - } - case PX_And: { - TaggedUWord x, y; - POP(x); POP(y); PUSH(y & x); - break; - } - case PX_Or: { - TaggedUWord x, y; - POP(x); POP(y); PUSH(y | x); - break; - } - case PX_CmpGES: { - TaggedUWord x, y; - POP(x); POP(y); PUSH(y.CmpGEs(x)); - break; - } - case PX_Shl: { - TaggedUWord x, y; - POP(x); POP(y); PUSH(y << x); - break; - } - default: - MOZ_ASSERT(0); - goto fail; - } - } // while (true) - - // Evaluation finished. The top value on the stack is the result. - if (stackPointer >= 0) { - return stack[stackPointer]; - } - // Else fall through - - fail: - return TaggedUWord(); - -# undef PUSH -# undef POP -} - -// RUNS IN NO-MALLOC CONTEXT -TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, - TaggedUWord aCFA, const StackImage* aStackImg, - const vector<PfxInstr>* aPfxInstrs) const -{ - switch (mHow) { - case UNKNOWN: - return TaggedUWord(); - case NODEREF: { - TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); - tuw = tuw + TaggedUWord((intptr_t)mOffset); - return tuw; - } - case DEREF: { - TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA); - tuw = tuw + TaggedUWord((intptr_t)mOffset); - return DerefTUW(tuw, aStackImg); - } - case PFXEXPR: { - MOZ_ASSERT(aPfxInstrs); - if (!aPfxInstrs) { - return TaggedUWord(); - } - return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs); - } - default: - MOZ_ASSERT(0); - return TaggedUWord(); - } -} - -// RUNS IN NO-MALLOC CONTEXT -static -void UseRuleSet(/*MOD*/UnwindRegs* aRegs, - const StackImage* aStackImg, const RuleSet* aRS, - const vector<PfxInstr>* aPfxInstrs) -{ - // Take a copy of regs, since we'll need to refer to the old values - // whilst computing the new ones. - UnwindRegs old_regs = *aRegs; - - // Mark all the current register values as invalid, so that the - // caller can see, on our return, which ones have been computed - // anew. If we don't even manage to compute a new PC value, then - // the caller will have to abandon the unwind. - // FIXME: Create and use instead: aRegs->SetAllInvalid(); -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - aRegs->xbp = TaggedUWord(); - aRegs->xsp = TaggedUWord(); - aRegs->xip = TaggedUWord(); -#elif defined(LUL_ARCH_arm) - aRegs->r7 = TaggedUWord(); - aRegs->r11 = TaggedUWord(); - aRegs->r12 = TaggedUWord(); - aRegs->r13 = TaggedUWord(); - aRegs->r14 = TaggedUWord(); - aRegs->r15 = TaggedUWord(); -#else -# error "Unsupported arch" -#endif - - // This is generally useful. - const TaggedUWord inval = TaggedUWord(); - - // First, compute the CFA. - TaggedUWord cfa - = aRS->mCfaExpr.EvaluateExpr(&old_regs, - inval/*old cfa*/, aStackImg, aPfxInstrs); - - // If we didn't manage to compute the CFA, well .. that's ungood, - // but keep going anyway. It'll be OK provided none of the register - // value rules mention the CFA. In any case, compute the new values - // for each register that we're tracking. - -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - aRegs->xbp - = aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); - aRegs->xsp - = aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); - aRegs->xip - = aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); -#elif defined(LUL_ARCH_arm) - aRegs->r7 - = aRS->mR7expr .EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); - aRegs->r11 - = aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); - aRegs->r12 - = aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); - aRegs->r13 - = aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); - aRegs->r14 - = aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); - aRegs->r15 - = aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs); -#else -# error "Unsupported arch" -#endif - - // We're done. Any regs for which we didn't manage to compute a - // new value will now be marked as invalid. -} - -// RUNS IN NO-MALLOC CONTEXT -void -LUL::Unwind(/*OUT*/uintptr_t* aFramePCs, - /*OUT*/uintptr_t* aFrameSPs, - /*OUT*/size_t* aFramesUsed, - /*OUT*/size_t* aScannedFramesAcquired, - size_t aFramesAvail, - size_t aScannedFramesAllowed, - UnwindRegs* aStartRegs, StackImage* aStackImg) -{ - MOZ_ASSERT(!mAdminMode); - - ///////////////////////////////////////////////////////// - // BEGIN UNWIND - - *aFramesUsed = 0; - - UnwindRegs regs = *aStartRegs; - TaggedUWord last_valid_sp = TaggedUWord(); - - // Stack-scan control - unsigned int n_scanned_frames = 0; // # s-s frames recovered so far - static const int NUM_SCANNED_WORDS = 50; // max allowed scan length - - while (true) { - - if (DEBUG_MAIN) { - char buf[300]; - mLog("\n"); -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - SprintfLiteral(buf, - "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n", - (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(), - (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(), - (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value()); - buf[sizeof(buf)-1] = 0; - mLog(buf); -#elif defined(LUL_ARCH_arm) - SprintfLiteral(buf, - "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx" - " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n", - (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(), - (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(), - (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(), - (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(), - (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(), - (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value()); - buf[sizeof(buf)-1] = 0; - mLog(buf); -#else -# error "Unsupported arch" -#endif - } - -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - TaggedUWord ia = regs.xip; - TaggedUWord sp = regs.xsp; -#elif defined(LUL_ARCH_arm) - TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14); - TaggedUWord sp = regs.r13; -#else -# error "Unsupported arch" -#endif - - if (*aFramesUsed >= aFramesAvail) { - break; - } - - // If we don't have a valid value for the PC, give up. - if (!ia.Valid()) { - break; - } - - // If this is the innermost frame, record the SP value, which - // presumably is valid. If this isn't the innermost frame, and we - // have a valid SP value, check that its SP value isn't less that - // the one we've seen so far, so as to catch potential SP value - // cycles. - if (*aFramesUsed == 0) { - last_valid_sp = sp; - } else { - MOZ_ASSERT(last_valid_sp.Valid()); - if (sp.Valid()) { - if (sp.Value() < last_valid_sp.Value()) { - // Hmm, SP going in the wrong direction. Let's stop. - break; - } - // Remember where we got to. - last_valid_sp = sp; - } - } - - // For the innermost frame, the IA value is what we need. For all - // other frames, it's actually the return address, so back up one - // byte so as to get it into the calling instruction. - aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1); - aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0; - (*aFramesUsed)++; - - // Find the RuleSet for the current IA, if any. This will also - // query the backing (secondary) maps if it isn't found in the - // thread-local cache. - - // If this isn't the innermost frame, back up into the calling insn. - if (*aFramesUsed > 1) { - ia = ia + TaggedUWord((uintptr_t)(-1)); - } - - pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs - = mPriMap->Lookup(ia.Value()); - const RuleSet* ruleset = ruleset_and_pfxinstrs.first; - const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second; - - if (DEBUG_MAIN) { - char buf[100]; - SprintfLiteral(buf, "ruleset for 0x%llx = %p\n", - (unsigned long long int)ia.Value(), ruleset); - buf[sizeof(buf)-1] = 0; - mLog(buf); - } - - ///////////////////////////////////////////// - //// - // On 32 bit x86-linux, syscalls are often done via the VDSO - // function __kernel_vsyscall, which doesn't have a corresponding - // object that we can read debuginfo from. That effectively kills - // off all stack traces for threads blocked in syscalls. Hence - // special-case by looking at the code surrounding the program - // counter. - // - // 0xf7757420 <__kernel_vsyscall+0>: push %ecx - // 0xf7757421 <__kernel_vsyscall+1>: push %edx - // 0xf7757422 <__kernel_vsyscall+2>: push %ebp - // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp - // 0xf7757425 <__kernel_vsyscall+5>: sysenter - // 0xf7757427 <__kernel_vsyscall+7>: nop - // 0xf7757428 <__kernel_vsyscall+8>: nop - // 0xf7757429 <__kernel_vsyscall+9>: nop - // 0xf775742a <__kernel_vsyscall+10>: nop - // 0xf775742b <__kernel_vsyscall+11>: nop - // 0xf775742c <__kernel_vsyscall+12>: nop - // 0xf775742d <__kernel_vsyscall+13>: nop - // 0xf775742e <__kernel_vsyscall+14>: int $0x80 - // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp - // 0xf7757431 <__kernel_vsyscall+17>: pop %edx - // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx - // 0xf7757433 <__kernel_vsyscall+19>: ret - // - // In cases where the sampled thread is blocked in a syscall, its - // program counter will point at "pop %ebp". Hence we look for - // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and - // the corresponding register-recovery actions are: - // new_ebp = *(old_esp + 0) - // new eip = *(old_esp + 12) - // new_esp = old_esp + 16 - // - // It may also be the case that the program counter points two - // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in - // the case where the syscall has been restarted but the thread - // hasn't been rescheduled. The code below doesn't handle that; - // it could easily be made to. - // -#if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux) - if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) { - uintptr_t insns_min, insns_max; - uintptr_t eip = ia.Value(); - bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip); - if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) { - uint8_t* eipC = (uint8_t*)eip; - if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D && - eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) { - TaggedUWord sp_plus_0 = sp; - TaggedUWord sp_plus_12 = sp; - TaggedUWord sp_plus_16 = sp; - sp_plus_12 = sp_plus_12 + TaggedUWord(12); - sp_plus_16 = sp_plus_16 + TaggedUWord(16); - TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg); - TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg); - TaggedUWord new_esp = sp_plus_16; - if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) { - regs.xbp = new_ebp; - regs.xip = new_eip; - regs.xsp = new_esp; - continue; - } - } - } - } -#endif - //// - ///////////////////////////////////////////// - - // So, do we have a ruleset for this address? If so, use it now. - if (ruleset) { - - if (DEBUG_MAIN) { - ruleset->Print(mLog); mLog("\n"); - } - // Use the RuleSet to compute the registers for the previous - // frame. |regs| is modified in-place. - UseRuleSet(®s, aStackImg, ruleset, pfxinstrs); - - } else { - - // There's no RuleSet for the specified address, so see if - // it's possible to get anywhere by stack-scanning. - - // Use stack scanning frugally. - if (n_scanned_frames++ >= aScannedFramesAllowed) { - break; - } - - // We can't scan the stack without a valid, aligned stack pointer. - if (!sp.IsAligned()) { - break; - } - - bool scan_succeeded = false; - for (int i = 0; i < NUM_SCANNED_WORDS; ++i) { - TaggedUWord aWord = DerefTUW(sp, aStackImg); - // aWord is something we fished off the stack. It should be - // valid, unless we overran the stack bounds. - if (!aWord.Valid()) { - break; - } - - // Now, does aWord point inside a text section and immediately - // after something that looks like a call instruction? - if (mPriMap->MaybeIsReturnPoint(aWord, mSegArray)) { - // Yes it does. Update the unwound registers heuristically, - // using the same schemes as Breakpad does. - scan_succeeded = true; - (*aScannedFramesAcquired)++; - -#if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86) - // The same logic applies for the 32- and 64-bit cases. - // Register names of the form xsp etc refer to (eg) esp in - // the 32-bit case and rsp in the 64-bit case. -# if defined(LUL_ARCH_x64) - const int wordSize = 8; -# else - const int wordSize = 4; -# endif - // The return address -- at XSP -- will have been pushed by - // the CALL instruction. So the caller's XSP value - // immediately before and after that CALL instruction is the - // word above XSP. - regs.xsp = sp + TaggedUWord(wordSize); - - // aWord points at the return point, so back up one byte - // to put it in the calling instruction. - regs.xip = aWord + TaggedUWord((uintptr_t)(-1)); - - // Computing a new value from the frame pointer is more tricky. - if (regs.xbp.Valid() && - sp.Valid() && regs.xbp.Value() == sp.Value() - wordSize) { - // One possibility is that the callee begins with the standard - // preamble "push %xbp; mov %xsp, %xbp". In which case, the - // (1) caller's XBP value will be at the word below XSP, and - // (2) the current (callee's) XBP will point at that word: - regs.xbp = DerefTUW(regs.xbp, aStackImg); - } else if (regs.xbp.Valid() && - sp.Valid() && regs.xbp.Value() >= sp.Value() + wordSize) { - // If that didn't work out, maybe the callee didn't change - // XBP, so it still holds the caller's value. For that to - // be plausible, XBP will need to have a value at least - // higher than XSP since that holds the purported return - // address. In which case do nothing, since XBP already - // holds the "right" value. - } else { - // Mark XBP as invalid, so that subsequent unwind iterations - // don't assume it holds valid data. - regs.xbp = TaggedUWord(); - } - - // Move on to the next word up the stack - sp = sp + TaggedUWord(wordSize); - -#elif defined(LUL_ARCH_arm) - // Set all registers to be undefined, except for SP(R13) and - // PC(R15). - - // aWord points either at the return point, if returning to - // ARM code, or one insn past the return point if returning - // to Thumb code. In both cases, aWord-2 is guaranteed to - // fall within the calling instruction. - regs.r15 = aWord + TaggedUWord((uintptr_t)(-2)); - - // Make SP be the word above the location where the return - // address was found. - regs.r13 = sp + TaggedUWord(4); - - // All other regs are undefined. - regs.r7 = regs.r11 = regs.r12 = regs.r14 = TaggedUWord(); - - // Move on to the next word up the stack - sp = sp + TaggedUWord(4); - -#else -# error "Unknown plat" -#endif - - break; - } - - } // for (int i = 0; i < NUM_SCANNED_WORDS; i++) - - // We tried to make progress by scanning the stack, but failed. - // So give up -- fall out of the top level unwind loop. - if (!scan_succeeded) { - break; - } - } - - } // top level unwind loop - - // END UNWIND - ///////////////////////////////////////////////////////// -} - - -//////////////////////////////////////////////////////////////// -// LUL Unit Testing // -//////////////////////////////////////////////////////////////// - -static const int LUL_UNIT_TEST_STACK_SIZE = 16384; - -// This function is innermost in the test call sequence. It uses LUL -// to unwind, and compares the result with the sequence specified in -// the director string. These need to agree in order for the test to -// pass. In order not to screw up the results, this function needs -// to have a not-very big stack frame, since we're only presenting -// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and -// that chunk unavoidably includes the frame for this function. -// -// This function must not be inlined into its callers. Doing so will -// cause the expected-vs-actual backtrace consistency checking to -// fail. Prints summary results to |aLUL|'s logging sink and also -// returns a boolean indicating whether or not the test passed. -static __attribute__((noinline)) -bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring) -{ - // Get hold of the current unwind-start registers. - UnwindRegs startRegs; - memset(&startRegs, 0, sizeof(startRegs)); -#if defined(LUL_PLAT_x64_linux) - volatile uintptr_t block[3]; - MOZ_ASSERT(sizeof(block) == 24); - __asm__ __volatile__( - "leaq 0(%%rip), %%r15" "\n\t" - "movq %%r15, 0(%0)" "\n\t" - "movq %%rsp, 8(%0)" "\n\t" - "movq %%rbp, 16(%0)" "\n" - : : "r"(&block[0]) : "memory", "r15" - ); - startRegs.xip = TaggedUWord(block[0]); - startRegs.xsp = TaggedUWord(block[1]); - startRegs.xbp = TaggedUWord(block[2]); - const uintptr_t REDZONE_SIZE = 128; - uintptr_t start = block[1] - REDZONE_SIZE; -#elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android) - volatile uintptr_t block[3]; - MOZ_ASSERT(sizeof(block) == 12); - __asm__ __volatile__( - ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/ "\n\t" - "popl %%edi" "\n\t" - "movl %%edi, 0(%0)" "\n\t" - "movl %%esp, 4(%0)" "\n\t" - "movl %%ebp, 8(%0)" "\n" - : : "r"(&block[0]) : "memory", "edi" - ); - startRegs.xip = TaggedUWord(block[0]); - startRegs.xsp = TaggedUWord(block[1]); - startRegs.xbp = TaggedUWord(block[2]); - const uintptr_t REDZONE_SIZE = 0; - uintptr_t start = block[1] - REDZONE_SIZE; -#elif defined(LUL_PLAT_arm_android) - volatile uintptr_t block[6]; - MOZ_ASSERT(sizeof(block) == 24); - __asm__ __volatile__( - "mov r0, r15" "\n\t" - "str r0, [%0, #0]" "\n\t" - "str r14, [%0, #4]" "\n\t" - "str r13, [%0, #8]" "\n\t" - "str r12, [%0, #12]" "\n\t" - "str r11, [%0, #16]" "\n\t" - "str r7, [%0, #20]" "\n" - : : "r"(&block[0]) : "memory", "r0" - ); - startRegs.r15 = TaggedUWord(block[0]); - startRegs.r14 = TaggedUWord(block[1]); - startRegs.r13 = TaggedUWord(block[2]); - startRegs.r12 = TaggedUWord(block[3]); - startRegs.r11 = TaggedUWord(block[4]); - startRegs.r7 = TaggedUWord(block[5]); - const uintptr_t REDZONE_SIZE = 0; - uintptr_t start = block[1] - REDZONE_SIZE; -#else -# error "Unsupported platform" -#endif - - // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the - // stack. - uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE; - uintptr_t ws = sizeof(void*); - start &= ~(ws-1); - end &= ~(ws-1); - uintptr_t nToCopy = end - start; - if (nToCopy > lul::N_STACK_BYTES) { - nToCopy = lul::N_STACK_BYTES; - } - MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES); - StackImage* stackImg = new StackImage(); - stackImg->mLen = nToCopy; - stackImg->mStartAvma = start; - if (nToCopy > 0) { - MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy); - memcpy(&stackImg->mContents[0], (void*)start, nToCopy); - } - - // Unwind it. - const int MAX_TEST_FRAMES = 64; - uintptr_t framePCs[MAX_TEST_FRAMES]; - uintptr_t frameSPs[MAX_TEST_FRAMES]; - size_t framesAvail = mozilla::ArrayLength(framePCs); - size_t framesUsed = 0; - size_t scannedFramesAllowed = 0; - size_t scannedFramesAcquired = 0; - aLUL->Unwind( &framePCs[0], &frameSPs[0], - &framesUsed, &scannedFramesAcquired, - framesAvail, scannedFramesAllowed, - &startRegs, stackImg ); - - delete stackImg; - - //if (0) { - // // Show what we have. - // fprintf(stderr, "Got %d frames:\n", (int)framesUsed); - // for (size_t i = 0; i < framesUsed; i++) { - // fprintf(stderr, " [%2d] SP %p PC %p\n", - // (int)i, (void*)frameSPs[i], (void*)framePCs[i]); - // } - // fprintf(stderr, "\n"); - //} - - // Check to see if there's a consistent binding between digits in - // the director string ('1' .. '8') and the PC values acquired by - // the unwind. If there isn't, the unwinding has failed somehow. - uintptr_t binding[8]; // binding for '1' .. binding for '8' - memset((void*)binding, 0, sizeof(binding)); - - // The general plan is to work backwards along the director string - // and forwards along the framePCs array. Doing so corresponds to - // working outwards from the innermost frame of the recursive test set. - const char* cursor = dstring; - - // Find the end. This leaves |cursor| two bytes past the first - // character we want to look at -- see comment below. - while (*cursor) cursor++; - - // Counts the number of consistent frames. - size_t nConsistent = 0; - - // Iterate back to the start of the director string. The starting - // points are a bit complex. We can't use framePCs[0] because that - // contains the PC in this frame (above). We can't use framePCs[1] - // because that will contain the PC at return point in the recursive - // test group (TestFn[1-8]) for their call "out" to this function, - // GetAndCheckStackTrace. Although LUL will compute a correct - // return address, that will not be the same return address as for a - // recursive call out of the the function to another function in the - // group. Hence we can only start consistency checking at - // framePCs[2]. - // - // To be consistent, then, we must ignore the last element in the - // director string as that corresponds to framePCs[1]. Hence the - // start points are: framePCs[2] and the director string 2 bytes - // before the terminating zero. - // - // Also as a result of this, the number of consistent frames counted - // will always be one less than the length of the director string - // (not including its terminating zero). - size_t frameIx; - for (cursor = cursor-2, frameIx = 2; - cursor >= dstring && frameIx < framesUsed; - cursor--, frameIx++) { - char c = *cursor; - uintptr_t pc = framePCs[frameIx]; - // If this doesn't hold, the director string is ill-formed. - MOZ_ASSERT(c >= '1' && c <= '8'); - int n = ((int)c) - ((int)'1'); - if (binding[n] == 0) { - // There's no binding for |c| yet, so install |pc| and carry on. - binding[n] = pc; - nConsistent++; - continue; - } - // There's a pre-existing binding for |c|. Check it's consistent. - if (binding[n] != pc) { - // Not consistent. Give up now. - break; - } - // Consistent. Keep going. - nConsistent++; - } - - // So, did we succeed? - bool passed = nConsistent+1 == strlen(dstring); - - // Show the results. - char buf[200]; - SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring); - buf[sizeof(buf)-1] = 0; - aLUL->mLog(buf); - SprintfLiteral(buf, - "LULUnitTest: %d consistent, %d in dstring: %s\n", - (int)nConsistent, (int)strlen(dstring), - passed ? "PASS" : "FAIL"); - buf[sizeof(buf)-1] = 0; - aLUL->mLog(buf); - - return passed; -} - - -// Macro magic to create a set of 8 mutually recursive functions with -// varying frame sizes. These will recurse amongst themselves as -// specified by |strP|, the directory string, and call -// GetAndCheckStackTrace when the string becomes empty, passing it the -// original value of the string. This checks the result, printing -// results on |aLUL|'s logging sink, and also returns a boolean -// indicating whether or not the results are acceptable (correct). - -#define DECL_TEST_FN(NAME) \ - bool NAME(LUL* aLUL, const char* strPorig, const char* strP); - -#define GEN_TEST_FN(NAME, FRAMESIZE) \ - bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \ - volatile char space[FRAMESIZE]; \ - memset((char*)&space[0], 0, sizeof(space)); \ - if (*strP == '\0') { \ - /* We've come to the end of the director string. */ \ - /* Take a stack snapshot. */ \ - return GetAndCheckStackTrace(aLUL, strPorig); \ - } else { \ - /* Recurse onwards. This is a bit subtle. The obvious */ \ - /* thing to do here is call onwards directly, from within the */ \ - /* arms of the case statement. That gives a problem in that */ \ - /* there will be multiple return points inside each function when */ \ - /* unwinding, so it will be difficult to check for consistency */ \ - /* against the director string. Instead, we make an indirect */ \ - /* call, so as to guarantee that there is only one call site */ \ - /* within each function. This does assume that the compiler */ \ - /* won't transform it back to the simple direct-call form. */ \ - /* To discourage it from doing so, the call is bracketed with */ \ - /* __asm__ __volatile__ sections so as to make it not-movable. */ \ - bool (*nextFn)(LUL*, const char*, const char*) = NULL; \ - switch (*strP) { \ - case '1': nextFn = TestFn1; break; \ - case '2': nextFn = TestFn2; break; \ - case '3': nextFn = TestFn3; break; \ - case '4': nextFn = TestFn4; break; \ - case '5': nextFn = TestFn5; break; \ - case '6': nextFn = TestFn6; break; \ - case '7': nextFn = TestFn7; break; \ - case '8': nextFn = TestFn8; break; \ - default: nextFn = TestFn8; break; \ - } \ - __asm__ __volatile__("":::"cc","memory"); \ - bool passed = nextFn(aLUL, strPorig, strP+1); \ - __asm__ __volatile__("":::"cc","memory"); \ - return passed; \ - } \ - } - -// The test functions are mutually recursive, so it is necessary to -// declare them before defining them. -DECL_TEST_FN(TestFn1) -DECL_TEST_FN(TestFn2) -DECL_TEST_FN(TestFn3) -DECL_TEST_FN(TestFn4) -DECL_TEST_FN(TestFn5) -DECL_TEST_FN(TestFn6) -DECL_TEST_FN(TestFn7) -DECL_TEST_FN(TestFn8) - -GEN_TEST_FN(TestFn1, 123) -GEN_TEST_FN(TestFn2, 456) -GEN_TEST_FN(TestFn3, 789) -GEN_TEST_FN(TestFn4, 23) -GEN_TEST_FN(TestFn5, 47) -GEN_TEST_FN(TestFn6, 117) -GEN_TEST_FN(TestFn7, 1) -GEN_TEST_FN(TestFn8, 99) - - -// This starts the test sequence going. Call here to generate a -// sequence of calls as directed by the string |dstring|. The call -// sequence will, from its innermost frame, finish by calling -// GetAndCheckStackTrace() and passing it |dstring|. -// GetAndCheckStackTrace() will unwind the stack, check consistency -// of those results against |dstring|, and print a pass/fail message -// to aLUL's logging sink. It also updates the counters in *aNTests -// and aNTestsPassed. -__attribute__((noinline)) void -TestUnw(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, - LUL* aLUL, const char* dstring) -{ - // Ensure that the stack has at least this much space on it. This - // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes - // and hand it to LUL. Safe in the sense that no segfault can - // happen because the stack is at least this big. This is all - // somewhat dubious in the sense that a sufficiently clever compiler - // (clang, for one) can figure out that space[] is unused and delete - // it from the frame. Hence the somewhat elaborate hoop jumping to - // fill it up before the call and to at least appear to use the - // value afterwards. - int i; - volatile char space[LUL_UNIT_TEST_STACK_SIZE]; - for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { - space[i] = (char)(i & 0x7F); - } - - // Really run the test. - bool passed = TestFn1(aLUL, dstring, dstring); - - // Appear to use space[], by visiting the value to compute some kind - // of checksum, and then (apparently) using the checksum. - int sum = 0; - for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) { - // If this doesn't fool LLVM, I don't know what will. - sum += space[i] - 3*i; - } - __asm__ __volatile__("" : : "r"(sum)); - - // Update the counters. - (*aNTests)++; - if (passed) { - (*aNTestsPassed)++; - } -} - - -void -RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL) -{ - aLUL->mLog(":\n"); - aLUL->mLog("LULUnitTest: BEGIN\n"); - *aNTests = *aNTestsPassed = 0; - TestUnw(aNTests, aNTestsPassed, aLUL, "11111111"); - TestUnw(aNTests, aNTestsPassed, aLUL, "11222211"); - TestUnw(aNTests, aNTestsPassed, aLUL, "111222333"); - TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212"); - TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258"); - TestUnw(aNTests, aNTestsPassed, aLUL, - "123456781122334455667788777777777777777777777"); - aLUL->mLog("LULUnitTest: END\n"); - aLUL->mLog(":\n"); -} - - -} // namespace lul |