diff options
Diffstat (limited to 'toolkit/crashreporter/google-breakpad/src/common/dwarf/dwarf2reader.cc')
-rw-r--r-- | toolkit/crashreporter/google-breakpad/src/common/dwarf/dwarf2reader.cc | 2734 |
1 files changed, 2734 insertions, 0 deletions
diff --git a/toolkit/crashreporter/google-breakpad/src/common/dwarf/dwarf2reader.cc b/toolkit/crashreporter/google-breakpad/src/common/dwarf/dwarf2reader.cc new file mode 100644 index 000000000..a65b43c8a --- /dev/null +++ b/toolkit/crashreporter/google-breakpad/src/common/dwarf/dwarf2reader.cc @@ -0,0 +1,2734 @@ +// Copyright (c) 2010 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit, +// and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details. + +#include "common/dwarf/dwarf2reader.h" + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include <map> +#include <memory> +#include <stack> +#include <string> +#include <utility> + +#include <sys/stat.h> + +#include "common/dwarf/bytereader-inl.h" +#include "common/dwarf/bytereader.h" +#include "common/dwarf/line_state_machine.h" +#include "common/using_std_string.h" + +namespace dwarf2reader { + +CompilationUnit::CompilationUnit(const string& path, + const SectionMap& sections, uint64 offset, + ByteReader* reader, Dwarf2Handler* handler) + : path_(path), offset_from_section_start_(offset), reader_(reader), + sections_(sections), handler_(handler), abbrevs_(), + string_buffer_(NULL), string_buffer_length_(0), + str_offsets_buffer_(NULL), str_offsets_buffer_length_(0), + addr_buffer_(NULL), addr_buffer_length_(0), + is_split_dwarf_(false), dwo_id_(0), dwo_name_(), + skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0), + have_checked_for_dwp_(false), dwp_path_(), + dwp_byte_reader_(), dwp_reader_() {} + +// Initialize a compilation unit from a .dwo or .dwp file. +// In this case, we need the .debug_addr section from the +// executable file that contains the corresponding skeleton +// compilation unit. We also inherit the Dwarf2Handler from +// the executable file, and call it as if we were still +// processing the original compilation unit. + +void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer, + uint64 addr_buffer_length, + uint64 addr_base, + uint64 ranges_base, + uint64 dwo_id) { + is_split_dwarf_ = true; + addr_buffer_ = addr_buffer; + addr_buffer_length_ = addr_buffer_length; + addr_base_ = addr_base; + ranges_base_ = ranges_base; + skeleton_dwo_id_ = dwo_id; +} + +// Read a DWARF2/3 abbreviation section. +// Each abbrev consists of a abbreviation number, a tag, a byte +// specifying whether the tag has children, and a list of +// attribute/form pairs. +// The list of forms is terminated by a 0 for the attribute, and a +// zero for the form. The entire abbreviation section is terminated +// by a zero for the code. + +void CompilationUnit::ReadAbbrevs() { + if (abbrevs_) + return; + + // First get the debug_abbrev section. ".debug_abbrev" is the name + // recommended in the DWARF spec, and used on Linux; + // "__debug_abbrev" is the name used in Mac OS X Mach-O files. + SectionMap::const_iterator iter = sections_.find(".debug_abbrev"); + if (iter == sections_.end()) + iter = sections_.find("__debug_abbrev"); + assert(iter != sections_.end()); + + abbrevs_ = new std::vector<Abbrev>; + abbrevs_->resize(1); + + // The only way to check whether we are reading over the end of the + // buffer would be to first compute the size of the leb128 data by + // reading it, then go back and read it again. + const uint8_t *abbrev_start = iter->second.first + + header_.abbrev_offset; + const uint8_t *abbrevptr = abbrev_start; +#ifndef NDEBUG + const uint64 abbrev_length = iter->second.second - header_.abbrev_offset; +#endif + + while (1) { + CompilationUnit::Abbrev abbrev; + size_t len; + const uint64 number = reader_->ReadUnsignedLEB128(abbrevptr, &len); + + if (number == 0) + break; + abbrev.number = number; + abbrevptr += len; + + assert(abbrevptr < abbrev_start + abbrev_length); + const uint64 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + abbrev.tag = static_cast<enum DwarfTag>(tag); + + assert(abbrevptr < abbrev_start + abbrev_length); + abbrev.has_children = reader_->ReadOneByte(abbrevptr); + abbrevptr += 1; + + assert(abbrevptr < abbrev_start + abbrev_length); + + while (1) { + const uint64 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + + assert(abbrevptr < abbrev_start + abbrev_length); + const uint64 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len); + abbrevptr += len; + if (nametemp == 0 && formtemp == 0) + break; + + const enum DwarfAttribute name = + static_cast<enum DwarfAttribute>(nametemp); + const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp); + abbrev.attributes.push_back(std::make_pair(name, form)); + } + assert(abbrev.number == abbrevs_->size()); + abbrevs_->push_back(abbrev); + } +} + +// Skips a single DIE's attributes. +const uint8_t *CompilationUnit::SkipDIE(const uint8_t* start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = SkipAttribute(start, i->second); + } + return start; +} + +// Skips a single attribute form's data. +const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start, + enum DwarfForm form) { + size_t len; + + switch (form) { + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return SkipAttribute(start, form); + + case DW_FORM_flag_present: + return start; + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_ref1: + return start + 1; + case DW_FORM_ref2: + case DW_FORM_data2: + return start + 2; + case DW_FORM_ref4: + case DW_FORM_data4: + return start + 4; + case DW_FORM_ref8: + case DW_FORM_data8: + case DW_FORM_ref_sig8: + return start + 8; + case DW_FORM_string: + return start + strlen(reinterpret_cast<const char *>(start)) + 1; + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_GNU_str_index: + case DW_FORM_GNU_addr_index: + reader_->ReadUnsignedLEB128(start, &len); + return start + len; + + case DW_FORM_sdata: + reader_->ReadSignedLEB128(start, &len); + return start + len; + case DW_FORM_addr: + return start + reader_->AddressSize(); + case DW_FORM_ref_addr: + // DWARF2 and 3/4 differ on whether ref_addr is address size or + // offset size. + assert(header_.version >= 2); + if (header_.version == 2) { + return start + reader_->AddressSize(); + } else if (header_.version >= 3) { + return start + reader_->OffsetSize(); + } + break; + + case DW_FORM_block1: + return start + 1 + reader_->ReadOneByte(start); + case DW_FORM_block2: + return start + 2 + reader_->ReadTwoBytes(start); + case DW_FORM_block4: + return start + 4 + reader_->ReadFourBytes(start); + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64 size = reader_->ReadUnsignedLEB128(start, &len); + return start + size + len; + } + case DW_FORM_strp: + case DW_FORM_sec_offset: + return start + reader_->OffsetSize(); + } + fprintf(stderr,"Unhandled form type"); + return NULL; +} + +// Read a DWARF2/3 header. +// The header is variable length in DWARF3 (and DWARF2 as extended by +// most compilers), and consists of an length field, a version number, +// the offset in the .debug_abbrev section for our abbrevs, and an +// address size. +void CompilationUnit::ReadHeader() { + const uint8_t *headerptr = buffer_; + size_t initial_length_size; + + assert(headerptr + 4 < buffer_ + buffer_length_); + const uint64 initial_length + = reader_->ReadInitialLength(headerptr, &initial_length_size); + headerptr += initial_length_size; + header_.length = initial_length; + + assert(headerptr + 2 < buffer_ + buffer_length_); + header_.version = reader_->ReadTwoBytes(headerptr); + headerptr += 2; + + assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_); + header_.abbrev_offset = reader_->ReadOffset(headerptr); + headerptr += reader_->OffsetSize(); + + // Compare against less than or equal because this may be the last + // section in the file. + assert(headerptr + 1 <= buffer_ + buffer_length_); + header_.address_size = reader_->ReadOneByte(headerptr); + reader_->SetAddressSize(header_.address_size); + headerptr += 1; + + after_header_ = headerptr; + + // This check ensures that we don't have to do checking during the + // reading of DIEs. header_.length does not include the size of the + // initial length. + assert(buffer_ + initial_length_size + header_.length <= + buffer_ + buffer_length_); +} + +uint64 CompilationUnit::Start() { + // First get the debug_info section. ".debug_info" is the name + // recommended in the DWARF spec, and used on Linux; "__debug_info" + // is the name used in Mac OS X Mach-O files. + SectionMap::const_iterator iter = sections_.find(".debug_info"); + if (iter == sections_.end()) + iter = sections_.find("__debug_info"); + assert(iter != sections_.end()); + + // Set up our buffer + buffer_ = iter->second.first + offset_from_section_start_; + buffer_length_ = iter->second.second - offset_from_section_start_; + + // Read the header + ReadHeader(); + + // Figure out the real length from the end of the initial length to + // the end of the compilation unit, since that is the value we + // return. + uint64 ourlength = header_.length; + if (reader_->OffsetSize() == 8) + ourlength += 12; + else + ourlength += 4; + + // See if the user wants this compilation unit, and if not, just return. + if (!handler_->StartCompilationUnit(offset_from_section_start_, + reader_->AddressSize(), + reader_->OffsetSize(), + header_.length, + header_.version)) + return ourlength; + + // Otherwise, continue by reading our abbreviation entries. + ReadAbbrevs(); + + // Set the string section if we have one. ".debug_str" is the name + // recommended in the DWARF spec, and used on Linux; "__debug_str" + // is the name used in Mac OS X Mach-O files. + iter = sections_.find(".debug_str"); + if (iter == sections_.end()) + iter = sections_.find("__debug_str"); + if (iter != sections_.end()) { + string_buffer_ = iter->second.first; + string_buffer_length_ = iter->second.second; + } + + // Set the string offsets section if we have one. + iter = sections_.find(".debug_str_offsets"); + if (iter != sections_.end()) { + str_offsets_buffer_ = iter->second.first; + str_offsets_buffer_length_ = iter->second.second; + } + + // Set the address section if we have one. + iter = sections_.find(".debug_addr"); + if (iter != sections_.end()) { + addr_buffer_ = iter->second.first; + addr_buffer_length_ = iter->second.second; + } + + // Now that we have our abbreviations, start processing DIE's. + ProcessDIEs(); + + // If this is a skeleton compilation unit generated with split DWARF, + // and the client needs the full debug info, we need to find the full + // compilation unit in a .dwo or .dwp file. + if (!is_split_dwarf_ + && dwo_name_ != NULL + && handler_->NeedSplitDebugInfo()) + ProcessSplitDwarf(); + + return ourlength; +} + +// If one really wanted, you could merge SkipAttribute and +// ProcessAttribute +// This is all boring data manipulation and calling of the handler. +const uint8_t *CompilationUnit::ProcessAttribute( + uint64 dieoffset, const uint8_t *start, enum DwarfAttribute attr, + enum DwarfForm form) { + size_t len; + + switch (form) { + // DW_FORM_indirect is never used because it is such a space + // waster. + case DW_FORM_indirect: + form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start, + &len)); + start += len; + return ProcessAttribute(dieoffset, start, attr, form); + + case DW_FORM_flag_present: + ProcessAttributeUnsigned(dieoffset, attr, form, 1); + return start; + case DW_FORM_data1: + case DW_FORM_flag: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOneByte(start)); + return start + 1; + case DW_FORM_data2: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadTwoBytes(start)); + return start + 2; + case DW_FORM_data4: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadFourBytes(start)); + return start + 4; + case DW_FORM_data8: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadEightBytes(start)); + return start + 8; + case DW_FORM_string: { + const char *str = reinterpret_cast<const char *>(start); + ProcessAttributeString(dieoffset, attr, form, str); + return start + strlen(str) + 1; + } + case DW_FORM_udata: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, &len)); + return start + len; + + case DW_FORM_sdata: + ProcessAttributeSigned(dieoffset, attr, form, + reader_->ReadSignedLEB128(start, &len)); + return start + len; + case DW_FORM_addr: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + case DW_FORM_sec_offset: + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + + case DW_FORM_ref1: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadOneByte(start) + + offset_from_section_start_); + return start + 1; + case DW_FORM_ref2: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadTwoBytes(start) + + offset_from_section_start_); + return start + 2; + case DW_FORM_ref4: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadFourBytes(start) + + offset_from_section_start_); + return start + 4; + case DW_FORM_ref8: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadEightBytes(start) + + offset_from_section_start_); + return start + 8; + case DW_FORM_ref_udata: + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadUnsignedLEB128(start, + &len) + + offset_from_section_start_); + return start + len; + case DW_FORM_ref_addr: + // DWARF2 and 3/4 differ on whether ref_addr is address size or + // offset size. + assert(header_.version >= 2); + if (header_.version == 2) { + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadAddress(start)); + return start + reader_->AddressSize(); + } else if (header_.version >= 3) { + handler_->ProcessAttributeReference(dieoffset, attr, form, + reader_->ReadOffset(start)); + return start + reader_->OffsetSize(); + } + break; + case DW_FORM_ref_sig8: + handler_->ProcessAttributeSignature(dieoffset, attr, form, + reader_->ReadEightBytes(start)); + return start + 8; + + case DW_FORM_block1: { + uint64 datalen = reader_->ReadOneByte(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1, + datalen); + return start + 1 + datalen; + } + case DW_FORM_block2: { + uint64 datalen = reader_->ReadTwoBytes(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2, + datalen); + return start + 2 + datalen; + } + case DW_FORM_block4: { + uint64 datalen = reader_->ReadFourBytes(start); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4, + datalen); + return start + 4 + datalen; + } + case DW_FORM_block: + case DW_FORM_exprloc: { + uint64 datalen = reader_->ReadUnsignedLEB128(start, &len); + handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len, + datalen); + return start + datalen + len; + } + case DW_FORM_strp: { + assert(string_buffer_ != NULL); + + const uint64 offset = reader_->ReadOffset(start); + assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_); + + const char *str = reinterpret_cast<const char *>(string_buffer_ + offset); + ProcessAttributeString(dieoffset, attr, form, str); + return start + reader_->OffsetSize(); + } + + case DW_FORM_GNU_str_index: { + uint64 str_index = reader_->ReadUnsignedLEB128(start, &len); + const uint8_t* offset_ptr = + str_offsets_buffer_ + str_index * reader_->OffsetSize(); + const uint64 offset = reader_->ReadOffset(offset_ptr); + if (offset >= string_buffer_length_) { + return NULL; + } + + const char* str = reinterpret_cast<const char *>(string_buffer_) + offset; + ProcessAttributeString(dieoffset, attr, form, str); + return start + len; + break; + } + case DW_FORM_GNU_addr_index: { + uint64 addr_index = reader_->ReadUnsignedLEB128(start, &len); + const uint8_t* addr_ptr = + addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize(); + ProcessAttributeUnsigned(dieoffset, attr, form, + reader_->ReadAddress(addr_ptr)); + return start + len; + } + } + fprintf(stderr, "Unhandled form type\n"); + return NULL; +} + +const uint8_t *CompilationUnit::ProcessDIE(uint64 dieoffset, + const uint8_t *start, + const Abbrev& abbrev) { + for (AttributeList::const_iterator i = abbrev.attributes.begin(); + i != abbrev.attributes.end(); + i++) { + start = ProcessAttribute(dieoffset, start, i->first, i->second); + } + + // If this is a compilation unit in a split DWARF object, verify that + // the dwo_id matches. If it does not match, we will ignore this + // compilation unit. + if (abbrev.tag == DW_TAG_compile_unit + && is_split_dwarf_ + && dwo_id_ != skeleton_dwo_id_) { + return NULL; + } + + return start; +} + +void CompilationUnit::ProcessDIEs() { + const uint8_t *dieptr = after_header_; + size_t len; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const uint8_t *lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + std::stack<uint64> die_stack; + + while (dieptr < (lengthstart + header_.length)) { + // We give the user the absolute offset from the beginning of + // debug_info, since they need it to deal with ref_addr forms. + uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_; + + uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len); + + dieptr += len; + + // Abbrev == 0 represents the end of a list of children, or padding + // at the end of the compilation unit. + if (abbrev_num == 0) { + if (die_stack.size() == 0) + // If it is padding, then we are done with the compilation unit's DIEs. + return; + const uint64 offset = die_stack.top(); + die_stack.pop(); + handler_->EndDIE(offset); + continue; + } + + const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num)); + const enum DwarfTag tag = abbrev.tag; + if (!handler_->StartDIE(absolute_offset, tag)) { + dieptr = SkipDIE(dieptr, abbrev); + } else { + dieptr = ProcessDIE(absolute_offset, dieptr, abbrev); + } + + if (abbrev.has_children) { + die_stack.push(absolute_offset); + } else { + handler_->EndDIE(absolute_offset); + } + } +} + +// Check for a valid ELF file and return the Address size. +// Returns 0 if not a valid ELF file. +inline int GetElfWidth(const ElfReader& elf) { + if (elf.IsElf32File()) + return 4; + if (elf.IsElf64File()) + return 8; + return 0; +} + +void CompilationUnit::ProcessSplitDwarf() { + struct stat statbuf; + if (!have_checked_for_dwp_) { + // Look for a .dwp file in the same directory as the executable. + have_checked_for_dwp_ = true; + string dwp_suffix(".dwp"); + dwp_path_ = path_ + dwp_suffix; + if (stat(dwp_path_.c_str(), &statbuf) != 0) { + // Fall back to a split .debug file in the same directory. + string debug_suffix(".debug"); + dwp_path_ = path_; + size_t found = path_.rfind(debug_suffix); + if (found + debug_suffix.length() == path_.length()) + dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix); + } + if (stat(dwp_path_.c_str(), &statbuf) == 0) { + ElfReader* elf = new ElfReader(dwp_path_); + int width = GetElfWidth(*elf); + if (width != 0) { + dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness())); + dwp_byte_reader_->SetAddressSize(width); + dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf)); + dwp_reader_->Initialize(); + } else { + delete elf; + } + } + } + bool found_in_dwp = false; + if (dwp_reader_) { + // If we have a .dwp file, read the debug sections for the requested CU. + SectionMap sections; + dwp_reader_->ReadDebugSectionsForCU(dwo_id_, §ions); + if (!sections.empty()) { + found_in_dwp = true; + CompilationUnit dwp_comp_unit(dwp_path_, sections, 0, + dwp_byte_reader_.get(), handler_); + dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_, + ranges_base_, dwo_id_); + dwp_comp_unit.Start(); + } + } + if (!found_in_dwp) { + // If no .dwp file, try to open the .dwo file. + if (stat(dwo_name_, &statbuf) == 0) { + ElfReader elf(dwo_name_); + int width = GetElfWidth(elf); + if (width != 0) { + ByteReader reader(ENDIANNESS_LITTLE); + reader.SetAddressSize(width); + SectionMap sections; + ReadDebugSectionsFromDwo(&elf, §ions); + CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader, + handler_); + dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, + addr_base_, ranges_base_, dwo_id_); + dwo_comp_unit.Start(); + } + } + } +} + +void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader, + SectionMap* sections) { + static const char* const section_names[] = { + ".debug_abbrev", + ".debug_info", + ".debug_str_offsets", + ".debug_str" + }; + for (unsigned int i = 0u; + i < sizeof(section_names)/sizeof(*(section_names)); ++i) { + string base_name = section_names[i]; + string dwo_name = base_name + ".dwo"; + size_t section_size; + const char* section_data = elf_reader->GetSectionByName(dwo_name, + §ion_size); + if (section_data != NULL) + sections->insert(std::make_pair( + base_name, std::make_pair( + reinterpret_cast<const uint8_t *>(section_data), + section_size))); + } +} + +DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader) + : elf_reader_(elf_reader), byte_reader_(byte_reader), + cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL), + string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0), + nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL), + offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL), + abbrev_size_(0), info_data_(NULL), info_size_(0), + str_offsets_data_(NULL), str_offsets_size_(0) {} + +DwpReader::~DwpReader() { + if (elf_reader_) delete elf_reader_; +} + +void DwpReader::Initialize() { + cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index", + &cu_index_size_); + if (cu_index_ == NULL) { + return; + } + // The .debug_str.dwo section is shared by all CUs in the file. + string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo", + &string_buffer_size_); + + version_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_)); + + if (version_ == 1) { + nslots_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_) + + 3 * sizeof(uint32)); + phash_ = cu_index_ + 4 * sizeof(uint32); + pindex_ = phash_ + nslots_ * sizeof(uint64); + shndx_pool_ = pindex_ + nslots_ * sizeof(uint32); + if (shndx_pool_ >= cu_index_ + cu_index_size_) { + version_ = 0; + } + } else if (version_ == 2) { + ncolumns_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_) + sizeof(uint32)); + nunits_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_) + 2 * sizeof(uint32)); + nslots_ = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(cu_index_) + 3 * sizeof(uint32)); + phash_ = cu_index_ + 4 * sizeof(uint32); + pindex_ = phash_ + nslots_ * sizeof(uint64); + offset_table_ = pindex_ + nslots_ * sizeof(uint32); + size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32); + abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo", + &abbrev_size_); + info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_); + str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo", + &str_offsets_size_); + if (size_table_ >= cu_index_ + cu_index_size_) { + version_ = 0; + } + } +} + +void DwpReader::ReadDebugSectionsForCU(uint64 dwo_id, + SectionMap* sections) { + if (version_ == 1) { + int slot = LookupCU(dwo_id); + if (slot == -1) { + return; + } + + // The index table points to the section index pool, where we + // can read a list of section indexes for the debug sections + // for the CU whose dwo_id we are looking for. + int index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(pindex_) + + slot * sizeof(uint32)); + const char* shndx_list = shndx_pool_ + index * sizeof(uint32); + for (;;) { + if (shndx_list >= cu_index_ + cu_index_size_) { + version_ = 0; + return; + } + unsigned int shndx = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(shndx_list)); + shndx_list += sizeof(uint32); + if (shndx == 0) + break; + const char* section_name = elf_reader_->GetSectionName(shndx); + size_t section_size; + const char* section_data; + // We're only interested in these four debug sections. + // The section names in the .dwo file end with ".dwo", but we + // add them to the sections table with their normal names. + if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_abbrev", + std::make_pair(reinterpret_cast<const uint8_t *> (section_data), + section_size))); + } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_info", + std::make_pair(reinterpret_cast<const uint8_t *> (section_data), + section_size))); + } else if (!strncmp(section_name, ".debug_str_offsets", + strlen(".debug_str_offsets"))) { + section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size); + sections->insert(std::make_pair( + ".debug_str_offsets", + std::make_pair(reinterpret_cast<const uint8_t *> (section_data), + section_size))); + } + } + sections->insert(std::make_pair( + ".debug_str", + std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_), + string_buffer_size_))); + } else if (version_ == 2) { + uint32 index = LookupCUv2(dwo_id); + if (index == 0) { + return; + } + + // The index points to a row in each of the section offsets table + // and the section size table, where we can read the offsets and sizes + // of the contributions to each debug section from the CU whose dwo_id + // we are looking for. Row 0 of the section offsets table has the + // section ids for each column of the table. The size table begins + // with row 1. + const char* id_row = offset_table_; + const char* offset_row = offset_table_ + + index * ncolumns_ * sizeof(uint32); + const char* size_row = + size_table_ + (index - 1) * ncolumns_ * sizeof(uint32); + if (size_row + ncolumns_ * sizeof(uint32) > cu_index_ + cu_index_size_) { + version_ = 0; + return; + } + for (unsigned int col = 0u; col < ncolumns_; ++col) { + uint32 section_id = + byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t *>(id_row) + + col * sizeof(uint32)); + uint32 offset = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(offset_row) + + col * sizeof(uint32)); + uint32 size = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(size_row) + col * sizeof(uint32)); + if (section_id == DW_SECT_ABBREV) { + sections->insert(std::make_pair( + ".debug_abbrev", + std::make_pair(reinterpret_cast<const uint8_t *> (abbrev_data_) + + offset, size))); + } else if (section_id == DW_SECT_INFO) { + sections->insert(std::make_pair( + ".debug_info", + std::make_pair(reinterpret_cast<const uint8_t *> (info_data_) + + offset, size))); + } else if (section_id == DW_SECT_STR_OFFSETS) { + sections->insert(std::make_pair( + ".debug_str_offsets", + std::make_pair(reinterpret_cast<const uint8_t *> (str_offsets_data_) + + offset, size))); + } + } + sections->insert(std::make_pair( + ".debug_str", + std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_), + string_buffer_size_))); + } +} + +int DwpReader::LookupCU(uint64 dwo_id) { + uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1); + uint64 probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64)); + if (probe != 0 && probe != dwo_id) { + uint32 secondary_hash = + (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1; + do { + slot = (slot + secondary_hash) & (nslots_ - 1); + probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64)); + } while (probe != 0 && probe != dwo_id); + } + if (probe == 0) + return -1; + return slot; +} + +uint32 DwpReader::LookupCUv2(uint64 dwo_id) { + uint32 slot = static_cast<uint32>(dwo_id) & (nslots_ - 1); + uint64 probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64)); + uint32 index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32)); + if (index != 0 && probe != dwo_id) { + uint32 secondary_hash = + (static_cast<uint32>(dwo_id >> 32) & (nslots_ - 1)) | 1; + do { + slot = (slot + secondary_hash) & (nslots_ - 1); + probe = byte_reader_.ReadEightBytes( + reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64)); + index = byte_reader_.ReadFourBytes( + reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32)); + } while (index != 0 && probe != dwo_id); + } + return index; +} + +LineInfo::LineInfo(const uint8_t *buffer, uint64 buffer_length, + ByteReader* reader, LineInfoHandler* handler): + handler_(handler), reader_(reader), buffer_(buffer) { +#ifndef NDEBUG + buffer_length_ = buffer_length; +#endif + header_.std_opcode_lengths = NULL; +} + +uint64 LineInfo::Start() { + ReadHeader(); + ReadLines(); + return after_header_ - buffer_; +} + +// The header for a debug_line section is mildly complicated, because +// the line info is very tightly encoded. +void LineInfo::ReadHeader() { + const uint8_t *lineptr = buffer_; + size_t initial_length_size; + + const uint64 initial_length + = reader_->ReadInitialLength(lineptr, &initial_length_size); + + lineptr += initial_length_size; + header_.total_length = initial_length; + assert(buffer_ + initial_length_size + header_.total_length <= + buffer_ + buffer_length_); + + // Address size *must* be set by CU ahead of time. + assert(reader_->AddressSize() != 0); + + header_.version = reader_->ReadTwoBytes(lineptr); + lineptr += 2; + + header_.prologue_length = reader_->ReadOffset(lineptr); + lineptr += reader_->OffsetSize(); + + header_.min_insn_length = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.default_is_stmt = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.line_base = *reinterpret_cast<const int8*>(lineptr); + lineptr += 1; + + header_.line_range = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.opcode_base = reader_->ReadOneByte(lineptr); + lineptr += 1; + + header_.std_opcode_lengths = new std::vector<unsigned char>; + header_.std_opcode_lengths->resize(header_.opcode_base + 1); + (*header_.std_opcode_lengths)[0] = 0; + for (int i = 1; i < header_.opcode_base; i++) { + (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr); + lineptr += 1; + } + + // It is legal for the directory entry table to be empty. + if (*lineptr) { + uint32 dirindex = 1; + while (*lineptr) { + const char *dirname = reinterpret_cast<const char *>(lineptr); + handler_->DefineDir(dirname, dirindex); + lineptr += strlen(dirname) + 1; + dirindex++; + } + } + lineptr++; + + // It is also legal for the file entry table to be empty. + if (*lineptr) { + uint32 fileindex = 1; + size_t len; + while (*lineptr) { + const char *filename = reinterpret_cast<const char *>(lineptr); + lineptr += strlen(filename) + 1; + + uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + + uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len); + lineptr += len; + handler_->DefineFile(filename, fileindex, static_cast<uint32>(dirindex), + mod_time, filelength); + fileindex++; + } + } + lineptr++; + + after_header_ = lineptr; +} + +/* static */ +bool LineInfo::ProcessOneOpcode(ByteReader* reader, + LineInfoHandler* handler, + const struct LineInfoHeader &header, + const uint8_t *start, + struct LineStateMachine* lsm, + size_t* len, + uintptr pc, + bool *lsm_passes_pc) { + size_t oplen = 0; + size_t templen; + uint8 opcode = reader->ReadOneByte(start); + oplen++; + start++; + + // If the opcode is great than the opcode_base, it is a special + // opcode. Most line programs consist mainly of special opcodes. + if (opcode >= header.opcode_base) { + opcode -= header.opcode_base; + const int64 advance_address = (opcode / header.line_range) + * header.min_insn_length; + const int32 advance_line = (opcode % header.line_range) + + header.line_base; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + lsm->line_num += advance_line; + lsm->basic_block = true; + *len = oplen; + return true; + } + + // Otherwise, we have the regular opcodes + switch (opcode) { + case DW_LNS_copy: { + lsm->basic_block = false; + *len = oplen; + return true; + } + + case DW_LNS_advance_pc: { + uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && lsm->address <= pc && + pc < lsm->address + header.min_insn_length * advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += header.min_insn_length * advance_address; + } + break; + case DW_LNS_advance_line: { + const int64 advance_line = reader->ReadSignedLEB128(start, &templen); + oplen += templen; + lsm->line_num += static_cast<int32>(advance_line); + + // With gcc 4.2.1, we can get the line_no here for the first time + // since DW_LNS_advance_line is called after DW_LNE_set_address is + // called. So we check if the lsm passes "pc" here, not in + // DW_LNE_set_address. + if (lsm_passes_pc && lsm->address == pc) { + *lsm_passes_pc = true; + } + } + break; + case DW_LNS_set_file: { + const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->file_num = static_cast<uint32>(fileno); + } + break; + case DW_LNS_set_column: { + const uint64 colno = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + lsm->column_num = static_cast<uint32>(colno); + } + break; + case DW_LNS_negate_stmt: { + lsm->is_stmt = !lsm->is_stmt; + } + break; + case DW_LNS_set_basic_block: { + lsm->basic_block = true; + } + break; + case DW_LNS_fixed_advance_pc: { + const uint16 advance_address = reader->ReadTwoBytes(start); + oplen += 2; + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + } + break; + case DW_LNS_const_add_pc: { + const int64 advance_address = header.min_insn_length + * ((255 - header.opcode_base) + / header.line_range); + + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + + lsm->address += advance_address; + } + break; + case DW_LNS_extended_op: { + const uint64 extended_op_len = reader->ReadUnsignedLEB128(start, + &templen); + start += templen; + oplen += templen + extended_op_len; + + const uint64 extended_op = reader->ReadOneByte(start); + start++; + + switch (extended_op) { + case DW_LNE_end_sequence: { + lsm->end_sequence = true; + *len = oplen; + return true; + } + break; + case DW_LNE_set_address: { + // With gcc 4.2.1, we cannot tell the line_no here since + // DW_LNE_set_address is called before DW_LNS_advance_line is + // called. So we do not check if the lsm passes "pc" here. See + // also the comment in DW_LNS_advance_line. + uint64 address = reader->ReadAddress(start); + lsm->address = address; + } + break; + case DW_LNE_define_file: { + const char *filename = reinterpret_cast<const char *>(start); + + templen = strlen(filename) + 1; + start += templen; + + uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen); + oplen += templen; + + const uint64 mod_time = reader->ReadUnsignedLEB128(start, + &templen); + oplen += templen; + + const uint64 filelength = reader->ReadUnsignedLEB128(start, + &templen); + oplen += templen; + + if (handler) { + handler->DefineFile(filename, -1, static_cast<uint32>(dirindex), + mod_time, filelength); + } + } + break; + } + } + break; + + default: { + // Ignore unknown opcode silently + if (header.std_opcode_lengths) { + for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) { + reader->ReadUnsignedLEB128(start, &templen); + start += templen; + oplen += templen; + } + } + } + break; + } + *len = oplen; + return false; +} + +void LineInfo::ReadLines() { + struct LineStateMachine lsm; + + // lengthstart is the place the length field is based on. + // It is the point in the header after the initial length field + const uint8_t *lengthstart = buffer_; + + // In 64 bit dwarf, the initial length is 12 bytes, because of the + // 0xffffffff at the start. + if (reader_->OffsetSize() == 8) + lengthstart += 12; + else + lengthstart += 4; + + const uint8_t *lineptr = after_header_; + lsm.Reset(header_.default_is_stmt); + + // The LineInfoHandler interface expects each line's length along + // with its address, but DWARF only provides addresses (sans + // length), and an end-of-sequence address; one infers the length + // from the next address. So we report a line only when we get the + // next line's address, or the end-of-sequence address. + bool have_pending_line = false; + uint64 pending_address = 0; + uint32 pending_file_num = 0, pending_line_num = 0, pending_column_num = 0; + + while (lineptr < lengthstart + header_.total_length) { + size_t oplength; + bool add_row = ProcessOneOpcode(reader_, handler_, header_, + lineptr, &lsm, &oplength, (uintptr)-1, + NULL); + if (add_row) { + if (have_pending_line) + handler_->AddLine(pending_address, lsm.address - pending_address, + pending_file_num, pending_line_num, + pending_column_num); + if (lsm.end_sequence) { + lsm.Reset(header_.default_is_stmt); + have_pending_line = false; + } else { + pending_address = lsm.address; + pending_file_num = lsm.file_num; + pending_line_num = lsm.line_num; + pending_column_num = lsm.column_num; + have_pending_line = true; + } + } + lineptr += oplength; + } + + after_header_ = lengthstart + header_.total_length; +} + +// A DWARF rule for recovering the address or value of a register, or +// computing the canonical frame address. There is one subclass of this for +// each '*Rule' member function in CallFrameInfo::Handler. +// +// It's annoying that we have to handle Rules using pointers (because +// the concrete instances can have an arbitrary size). They're small, +// so it would be much nicer if we could just handle them by value +// instead of fretting about ownership and destruction. +// +// It seems like all these could simply be instances of std::tr1::bind, +// except that we need instances to be EqualityComparable, too. +// +// This could logically be nested within State, but then the qualified names +// get horrendous. +class CallFrameInfo::Rule { + public: + virtual ~Rule() { } + + // Tell HANDLER that, at ADDRESS in the program, REGISTER can be + // recovered using this rule. If REGISTER is kCFARegister, then this rule + // describes how to compute the canonical frame address. Return what the + // HANDLER member function returned. + virtual bool Handle(Handler *handler, + uint64 address, int register) const = 0; + + // Equality on rules. We use these to decide which rules we need + // to report after a DW_CFA_restore_state instruction. + virtual bool operator==(const Rule &rhs) const = 0; + + bool operator!=(const Rule &rhs) const { return ! (*this == rhs); } + + // Return a pointer to a copy of this rule. + virtual Rule *Copy() const = 0; + + // If this is a base+offset rule, change its base register to REG. + // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.) + virtual void SetBaseRegister(unsigned reg) { } + + // If this is a base+offset rule, change its offset to OFFSET. Otherwise, + // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.) + virtual void SetOffset(long long offset) { } +}; + +// Rule: the value the register had in the caller cannot be recovered. +class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule { + public: + UndefinedRule() { } + ~UndefinedRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->UndefinedRule(address, reg); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs); + return (our_rhs != NULL); + } + Rule *Copy() const { return new UndefinedRule(*this); } +}; + +// Rule: the register's value is the same as that it had in the caller. +class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule { + public: + SameValueRule() { } + ~SameValueRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->SameValueRule(address, reg); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs); + return (our_rhs != NULL); + } + Rule *Copy() const { return new SameValueRule(*this); } +}; + +// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER +// may be CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule { + public: + OffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) { } + ~OffsetRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->OffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs); + return (our_rhs && + base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule *Copy() const { return new OffsetRule(*this); } + // We don't actually need SetBaseRegister or SetOffset here, since they + // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it + // doesn't make sense to use OffsetRule for computing the CFA: it + // computes the address at which a register is saved, not a value. + private: + int base_register_; + long offset_; +}; + +// Rule: the value the register had in the caller is the value of +// BASE_REGISTER plus offset. BASE_REGISTER may be +// CallFrameInfo::Handler::kCFARegister. +class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule { + public: + ValOffsetRule(int base_register, long offset) + : base_register_(base_register), offset_(offset) { } + ~ValOffsetRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ValOffsetRule(address, reg, base_register_, offset_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs); + return (our_rhs && + base_register_ == our_rhs->base_register_ && + offset_ == our_rhs->offset_); + } + Rule *Copy() const { return new ValOffsetRule(*this); } + void SetBaseRegister(unsigned reg) { base_register_ = reg; } + void SetOffset(long long offset) { offset_ = offset; } + private: + int base_register_; + long offset_; +}; + +// Rule: the register has been saved in another register REGISTER_NUMBER_. +class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule { + public: + explicit RegisterRule(int register_number) + : register_number_(register_number) { } + ~RegisterRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->RegisterRule(address, reg, register_number_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs); + return (our_rhs && register_number_ == our_rhs->register_number_); + } + Rule *Copy() const { return new RegisterRule(*this); } + private: + int register_number_; +}; + +// Rule: EXPRESSION evaluates to the address at which the register is saved. +class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule { + public: + explicit ExpressionRule(const string &expression) + : expression_(expression) { } + ~ExpressionRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ExpressionRule(address, reg, expression_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs); + return (our_rhs && expression_ == our_rhs->expression_); + } + Rule *Copy() const { return new ExpressionRule(*this); } + private: + string expression_; +}; + +// Rule: EXPRESSION evaluates to the address at which the register is saved. +class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule { + public: + explicit ValExpressionRule(const string &expression) + : expression_(expression) { } + ~ValExpressionRule() { } + bool Handle(Handler *handler, uint64 address, int reg) const { + return handler->ValExpressionRule(address, reg, expression_); + } + bool operator==(const Rule &rhs) const { + // dynamic_cast is allowed by the Google C++ Style Guide, if the use has + // been carefully considered; cheap RTTI-like workarounds are forbidden. + const ValExpressionRule *our_rhs = + dynamic_cast<const ValExpressionRule *>(&rhs); + return (our_rhs && expression_ == our_rhs->expression_); + } + Rule *Copy() const { return new ValExpressionRule(*this); } + private: + string expression_; +}; + +// A map from register numbers to rules. +class CallFrameInfo::RuleMap { + public: + RuleMap() : cfa_rule_(NULL) { } + RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; } + ~RuleMap() { Clear(); } + + RuleMap &operator=(const RuleMap &rhs); + + // Set the rule for computing the CFA to RULE. Take ownership of RULE. + void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; } + + // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains + // ownership of the rule. We use this for DW_CFA_def_cfa_offset and + // DW_CFA_def_cfa_register, and for detecting references to the CFA before + // a rule for it has been established. + Rule *CFARule() const { return cfa_rule_; } + + // Return the rule for REG, or NULL if there is none. The caller takes + // ownership of the result. + Rule *RegisterRule(int reg) const; + + // Set the rule for computing REG to RULE. Take ownership of RULE. + void SetRegisterRule(int reg, Rule *rule); + + // Make all the appropriate calls to HANDLER as if we were changing from + // this RuleMap to NEW_RULES at ADDRESS. We use this to implement + // DW_CFA_restore_state, where lots of rules can change simultaneously. + // Return true if all handlers returned true; otherwise, return false. + bool HandleTransitionTo(Handler *handler, uint64 address, + const RuleMap &new_rules) const; + + private: + // A map from register numbers to Rules. + typedef std::map<int, Rule *> RuleByNumber; + + // Remove all register rules and clear cfa_rule_. + void Clear(); + + // The rule for computing the canonical frame address. This RuleMap owns + // this rule. + Rule *cfa_rule_; + + // A map from register numbers to postfix expressions to recover + // their values. This RuleMap owns the Rules the map refers to. + RuleByNumber registers_; +}; + +CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) { + Clear(); + // Since each map owns the rules it refers to, assignment must copy them. + if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy(); + for (RuleByNumber::const_iterator it = rhs.registers_.begin(); + it != rhs.registers_.end(); it++) + registers_[it->first] = it->second->Copy(); + return *this; +} + +CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const { + assert(reg != Handler::kCFARegister); + RuleByNumber::const_iterator it = registers_.find(reg); + if (it != registers_.end()) + return it->second->Copy(); + else + return NULL; +} + +void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) { + assert(reg != Handler::kCFARegister); + assert(rule); + Rule **slot = ®isters_[reg]; + delete *slot; + *slot = rule; +} + +bool CallFrameInfo::RuleMap::HandleTransitionTo( + Handler *handler, + uint64 address, + const RuleMap &new_rules) const { + // Transition from cfa_rule_ to new_rules.cfa_rule_. + if (cfa_rule_ && new_rules.cfa_rule_) { + if (*cfa_rule_ != *new_rules.cfa_rule_ && + !new_rules.cfa_rule_->Handle(handler, address, + Handler::kCFARegister)) + return false; + } else if (cfa_rule_) { + // this RuleMap has a CFA rule but new_rules doesn't. + // CallFrameInfo::Handler has no way to handle this --- and shouldn't; + // it's garbage input. The instruction interpreter should have + // detected this and warned, so take no action here. + } else if (new_rules.cfa_rule_) { + // This shouldn't be possible: NEW_RULES is some prior state, and + // there's no way to remove entries. + assert(0); + } else { + // Both CFA rules are empty. No action needed. + } + + // Traverse the two maps in order by register number, and report + // whatever differences we find. + RuleByNumber::const_iterator old_it = registers_.begin(); + RuleByNumber::const_iterator new_it = new_rules.registers_.begin(); + while (old_it != registers_.end() && new_it != new_rules.registers_.end()) { + if (old_it->first < new_it->first) { + // This RuleMap has an entry for old_it->first, but NEW_RULES + // doesn't. + // + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + if (!handler->SameValueRule(address, old_it->first)) + return false; + old_it++; + } else if (old_it->first > new_it->first) { + // NEW_RULES has entry for new_it->first, but this RuleMap + // doesn't. This shouldn't be possible: NEW_RULES is some prior + // state, and there's no way to remove entries. + assert(0); + } else { + // Both maps have an entry for this register. Report the new + // rule if it is different. + if (*old_it->second != *new_it->second && + !new_it->second->Handle(handler, address, new_it->first)) + return false; + new_it++, old_it++; + } + } + // Finish off entries from this RuleMap with no counterparts in new_rules. + while (old_it != registers_.end()) { + if (!handler->SameValueRule(address, old_it->first)) + return false; + old_it++; + } + // Since we only make transitions from a rule set to some previously + // saved rule set, and we can only add rules to the map, NEW_RULES + // must have fewer rules than *this. + assert(new_it == new_rules.registers_.end()); + + return true; +} + +// Remove all register rules and clear cfa_rule_. +void CallFrameInfo::RuleMap::Clear() { + delete cfa_rule_; + cfa_rule_ = NULL; + for (RuleByNumber::iterator it = registers_.begin(); + it != registers_.end(); it++) + delete it->second; + registers_.clear(); +} + +// The state of the call frame information interpreter as it processes +// instructions from a CIE and FDE. +class CallFrameInfo::State { + public: + // Create a call frame information interpreter state with the given + // reporter, reader, handler, and initial call frame info address. + State(ByteReader *reader, Handler *handler, Reporter *reporter, + uint64 address) + : reader_(reader), handler_(handler), reporter_(reporter), + address_(address), entry_(NULL), cursor_(NULL) { } + + // Interpret instructions from CIE, save the resulting rule set for + // DW_CFA_restore instructions, and return true. On error, report + // the problem to reporter_ and return false. + bool InterpretCIE(const CIE &cie); + + // Interpret instructions from FDE, and return true. On error, + // report the problem to reporter_ and return false. + bool InterpretFDE(const FDE &fde); + + private: + // The operands of a CFI instruction, for ParseOperands. + struct Operands { + unsigned register_number; // A register number. + uint64 offset; // An offset or address. + long signed_offset; // A signed offset. + string expression; // A DWARF expression. + }; + + // Parse CFI instruction operands from STATE's instruction stream as + // described by FORMAT. On success, populate OPERANDS with the + // results, and return true. On failure, report the problem and + // return false. + // + // Each character of FORMAT should be one of the following: + // + // 'r' unsigned LEB128 register number (OPERANDS->register_number) + // 'o' unsigned LEB128 offset (OPERANDS->offset) + // 's' signed LEB128 offset (OPERANDS->signed_offset) + // 'a' machine-size address (OPERANDS->offset) + // (If the CIE has a 'z' augmentation string, 'a' uses the + // encoding specified by the 'R' argument.) + // '1' a one-byte offset (OPERANDS->offset) + // '2' a two-byte offset (OPERANDS->offset) + // '4' a four-byte offset (OPERANDS->offset) + // '8' an eight-byte offset (OPERANDS->offset) + // 'e' a DW_FORM_block holding a (OPERANDS->expression) + // DWARF expression + bool ParseOperands(const char *format, Operands *operands); + + // Interpret one CFI instruction from STATE's instruction stream, update + // STATE, report any rule changes to handler_, and return true. On + // failure, report the problem and return false. + bool DoInstruction(); + + // The following Do* member functions are subroutines of DoInstruction, + // factoring out the actual work of operations that have several + // different encodings. + + // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and + // return true. On failure, report and return false. (Used for + // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.) + bool DoDefCFA(unsigned base_register, long offset); + + // Change the offset of the CFA rule to OFFSET, and return true. On + // failure, report and return false. (Subroutine for + // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.) + bool DoDefCFAOffset(long offset); + + // Specify that REG can be recovered using RULE, and return true. On + // failure, report and return false. + bool DoRule(unsigned reg, Rule *rule); + + // Specify that REG can be found at OFFSET from the CFA, and return true. + // On failure, report and return false. (Subroutine for DW_CFA_offset, + // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.) + bool DoOffset(unsigned reg, long offset); + + // Specify that the caller's value for REG is the CFA plus OFFSET, + // and return true. On failure, report and return false. (Subroutine + // for DW_CFA_val_offset and DW_CFA_val_offset_sf.) + bool DoValOffset(unsigned reg, long offset); + + // Restore REG to the rule established in the CIE, and return true. On + // failure, report and return false. (Subroutine for DW_CFA_restore and + // DW_CFA_restore_extended.) + bool DoRestore(unsigned reg); + + // Return the section offset of the instruction at cursor. For use + // in error messages. + uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); } + + // Report that entry_ is incomplete, and return false. For brevity. + bool ReportIncomplete() { + reporter_->Incomplete(entry_->offset, entry_->kind); + return false; + } + + // For reading multi-byte values with the appropriate endianness. + ByteReader *reader_; + + // The handler to which we should report the data we find. + Handler *handler_; + + // For reporting problems in the info we're parsing. + Reporter *reporter_; + + // The code address to which the next instruction in the stream applies. + uint64 address_; + + // The entry whose instructions we are currently processing. This is + // first a CIE, and then an FDE. + const Entry *entry_; + + // The next instruction to process. + const uint8_t *cursor_; + + // The current set of rules. + RuleMap rules_; + + // The set of rules established by the CIE, used by DW_CFA_restore + // and DW_CFA_restore_extended. We set this after interpreting the + // CIE's instructions. + RuleMap cie_rules_; + + // A stack of saved states, for DW_CFA_remember_state and + // DW_CFA_restore_state. + std::stack<RuleMap> saved_rules_; +}; + +bool CallFrameInfo::State::InterpretCIE(const CIE &cie) { + entry_ = &cie; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) + return false; + // Note the rules established by the CIE, for use by DW_CFA_restore + // and DW_CFA_restore_extended. + cie_rules_ = rules_; + return true; +} + +bool CallFrameInfo::State::InterpretFDE(const FDE &fde) { + entry_ = &fde; + cursor_ = entry_->instructions; + while (cursor_ < entry_->end) + if (!DoInstruction()) + return false; + return true; +} + +bool CallFrameInfo::State::ParseOperands(const char *format, + Operands *operands) { + size_t len; + const char *operand; + + for (operand = format; *operand; operand++) { + size_t bytes_left = entry_->end - cursor_; + switch (*operand) { + case 'r': + operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'o': + operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 's': + operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case 'a': + operands->offset = + reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding, + &len); + if (len > bytes_left) return ReportIncomplete(); + cursor_ += len; + break; + + case '1': + if (1 > bytes_left) return ReportIncomplete(); + operands->offset = static_cast<unsigned char>(*cursor_++); + break; + + case '2': + if (2 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadTwoBytes(cursor_); + cursor_ += 2; + break; + + case '4': + if (4 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadFourBytes(cursor_); + cursor_ += 4; + break; + + case '8': + if (8 > bytes_left) return ReportIncomplete(); + operands->offset = reader_->ReadEightBytes(cursor_); + cursor_ += 8; + break; + + case 'e': { + size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len); + if (len > bytes_left || expression_length > bytes_left - len) + return ReportIncomplete(); + cursor_ += len; + operands->expression = string(reinterpret_cast<const char *>(cursor_), + expression_length); + cursor_ += expression_length; + break; + } + + default: + assert(0); + } + } + + return true; +} + +bool CallFrameInfo::State::DoInstruction() { + CIE *cie = entry_->cie; + Operands ops; + + // Our entry's kind should have been set by now. + assert(entry_->kind != kUnknown); + + // We shouldn't have been invoked unless there were more + // instructions to parse. + assert(cursor_ < entry_->end); + + unsigned opcode = *cursor_++; + if ((opcode & 0xc0) != 0) { + switch (opcode & 0xc0) { + // Advance the address. + case DW_CFA_advance_loc: { + size_t code_offset = opcode & 0x3f; + address_ += code_offset * cie->code_alignment_factor; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset: + if (!ParseOperands("o", &ops) || + !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor)) + return false; + break; + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore: + if (!DoRestore(opcode & 0x3f)) return false; + break; + + // The 'if' above should have excluded this possibility. + default: + assert(0); + } + + // Return here, so the big switch below won't be indented. + return true; + } + + switch (opcode) { + // Set the address. + case DW_CFA_set_loc: + if (!ParseOperands("a", &ops)) return false; + address_ = ops.offset; + break; + + // Advance the address. + case DW_CFA_advance_loc1: + if (!ParseOperands("1", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc2: + if (!ParseOperands("2", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_advance_loc4: + if (!ParseOperands("4", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Advance the address. + case DW_CFA_MIPS_advance_loc8: + if (!ParseOperands("8", &ops)) return false; + address_ += ops.offset * cie->code_alignment_factor; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa: + if (!ParseOperands("ro", &ops) || + !DoDefCFA(ops.register_number, ops.offset)) + return false; + break; + + // Compute the CFA by adding an offset to a register. + case DW_CFA_def_cfa_sf: + if (!ParseOperands("rs", &ops) || + !DoDefCFA(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Change the base register used to compute the CFA. + case DW_CFA_def_cfa_register: { + if (!ParseOperands("r", &ops)) return false; + Rule *cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + if (!DoDefCFA(ops.register_number, ops.offset)) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } else { + cfa_rule->SetBaseRegister(ops.register_number); + if (!cfa_rule->Handle(handler_, address_, + Handler::kCFARegister)) + return false; + } + break; + } + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset: + if (!ParseOperands("o", &ops) || + !DoDefCFAOffset(ops.offset)) + return false; + break; + + // Change the offset used to compute the CFA. + case DW_CFA_def_cfa_offset_sf: + if (!ParseOperands("s", &ops) || + !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // Specify an expression whose value is the CFA. + case DW_CFA_def_cfa_expression: { + if (!ParseOperands("e", &ops)) + return false; + Rule *rule = new ValExpressionRule(ops.expression); + rules_.SetCFARule(rule); + if (!rule->Handle(handler_, address_, + Handler::kCFARegister)) + return false; + break; + } + + // The register's value cannot be recovered. + case DW_CFA_undefined: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new UndefinedRule())) + return false; + break; + } + + // The register's value is unchanged from its value in the caller. + case DW_CFA_same_value: { + if (!ParseOperands("r", &ops) || + !DoRule(ops.register_number, new SameValueRule())) + return false; + break; + } + + // Find a register at an offset from the CFA. + case DW_CFA_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_offset_extended_sf: + if (!ParseOperands("rs", &ops) || + !DoOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register is saved at an offset from the CFA. + case DW_CFA_GNU_negative_offset_extended: + if (!ParseOperands("ro", &ops) || + !DoOffset(ops.register_number, + -ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset: + if (!ParseOperands("ro", &ops) || + !DoValOffset(ops.register_number, + ops.offset * cie->data_alignment_factor)) + return false; + break; + + // The register's value is the sum of the CFA plus an offset. + case DW_CFA_val_offset_sf: + if (!ParseOperands("rs", &ops) || + !DoValOffset(ops.register_number, + ops.signed_offset * cie->data_alignment_factor)) + return false; + break; + + // The register has been saved in another register. + case DW_CFA_register: { + if (!ParseOperands("ro", &ops) || + !DoRule(ops.register_number, new RegisterRule(ops.offset))) + return false; + break; + } + + // An expression yields the address at which the register is saved. + case DW_CFA_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ExpressionRule(ops.expression))) + return false; + break; + } + + // An expression yields the caller's value for the register. + case DW_CFA_val_expression: { + if (!ParseOperands("re", &ops) || + !DoRule(ops.register_number, new ValExpressionRule(ops.expression))) + return false; + break; + } + + // Restore the rule established for a register by the CIE. + case DW_CFA_restore_extended: + if (!ParseOperands("r", &ops) || + !DoRestore( ops.register_number)) + return false; + break; + + // Save the current set of rules on a stack. + case DW_CFA_remember_state: + saved_rules_.push(rules_); + break; + + // Pop the current set of rules off the stack. + case DW_CFA_restore_state: { + if (saved_rules_.empty()) { + reporter_->EmptyStateStack(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + const RuleMap &new_rules = saved_rules_.top(); + if (rules_.CFARule() && !new_rules.CFARule()) { + reporter_->ClearingCFARule(entry_->offset, entry_->kind, + CursorOffset()); + return false; + } + rules_.HandleTransitionTo(handler_, address_, new_rules); + rules_ = new_rules; + saved_rules_.pop(); + break; + } + + // No operation. (Padding instruction.) + case DW_CFA_nop: + break; + + // A SPARC register window save: Registers 8 through 15 (%o0-%o7) + // are saved in registers 24 through 31 (%i0-%i7), and registers + // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets + // (0-15 * the register size). The register numbers must be + // hard-coded. A GNU extension, and not a pretty one. + case DW_CFA_GNU_window_save: { + // Save %o0-%o7 in %i0-%i7. + for (int i = 8; i < 16; i++) + if (!DoRule(i, new RegisterRule(i + 16))) + return false; + // Save %l0-%l7 and %i0-%i7 at the CFA. + for (int i = 16; i < 32; i++) + // Assume that the byte reader's address size is the same as + // the architecture's register size. !@#%*^ hilarious. + if (!DoRule(i, new OffsetRule(Handler::kCFARegister, + (i - 16) * reader_->AddressSize()))) + return false; + break; + } + + // I'm not sure what this is. GDB doesn't use it for unwinding. + case DW_CFA_GNU_args_size: + if (!ParseOperands("o", &ops)) return false; + break; + + // An opcode we don't recognize. + default: { + reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + } + + return true; +} + +bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) { + Rule *rule = new ValOffsetRule(base_register, offset); + rules_.SetCFARule(rule); + return rule->Handle(handler_, address_, + Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoDefCFAOffset(long offset) { + Rule *cfa_rule = rules_.CFARule(); + if (!cfa_rule) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + cfa_rule->SetOffset(offset); + return cfa_rule->Handle(handler_, address_, + Handler::kCFARegister); +} + +bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) { + rules_.SetRegisterRule(reg, rule); + return rule->Handle(handler_, address_, reg); +} + +bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, + new OffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) { + if (!rules_.CFARule()) { + reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset()); + return false; + } + return DoRule(reg, + new ValOffsetRule(Handler::kCFARegister, offset)); +} + +bool CallFrameInfo::State::DoRestore(unsigned reg) { + // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE. + if (entry_->kind == kCIE) { + reporter_->RestoreInCIE(entry_->offset, CursorOffset()); + return false; + } + Rule *rule = cie_rules_.RegisterRule(reg); + if (!rule) { + // This isn't really the right thing to do, but since CFI generally + // only mentions callee-saves registers, and GCC's convention for + // callee-saves registers is that they are unchanged, it's a good + // approximation. + rule = new SameValueRule(); + } + return DoRule(reg, rule); +} + +bool CallFrameInfo::ReadEntryPrologue(const uint8_t *cursor, Entry *entry) { + const uint8_t *buffer_end = buffer_ + buffer_length_; + + // Initialize enough of ENTRY for use in error reporting. + entry->offset = cursor - buffer_; + entry->start = cursor; + entry->kind = kUnknown; + entry->end = NULL; + + // Read the initial length. This sets reader_'s offset size. + size_t length_size; + uint64 length = reader_->ReadInitialLength(cursor, &length_size); + if (length_size > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); + cursor += length_size; + + // In a .eh_frame section, a length of zero marks the end of the series + // of entries. + if (length == 0 && eh_frame_) { + entry->kind = kTerminator; + entry->end = cursor; + return true; + } + + // Validate the length. + if (length > size_t(buffer_end - cursor)) + return ReportIncomplete(entry); + + // The length is the number of bytes after the initial length field; + // we have that position handy at this point, so compute the end + // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine, + // and the length didn't fit in a size_t, we would have rejected it + // above.) + entry->end = cursor + length; + + // Parse the next field: either the offset of a CIE or a CIE id. + size_t offset_size = reader_->OffsetSize(); + if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry); + entry->id = reader_->ReadOffset(cursor); + + // Don't advance cursor past id field yet; in .eh_frame data we need + // the id's position to compute the section offset of an FDE's CIE. + + // Now we can decide what kind of entry this is. + if (eh_frame_) { + // In .eh_frame data, an ID of zero marks the entry as a CIE, and + // anything else is an offset from the id field of the FDE to the start + // of the CIE. + if (entry->id == 0) { + entry->kind = kCIE; + } else { + entry->kind = kFDE; + // Turn the offset from the id into an offset from the buffer's start. + entry->id = (cursor - buffer_) - entry->id; + } + } else { + // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the + // offset size for the entry) marks the entry as a CIE, and anything + // else is the offset of the CIE from the beginning of the section. + if (offset_size == 4) + entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE; + else { + assert(offset_size == 8); + entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE; + } + } + + // Now advance cursor past the id. + cursor += offset_size; + + // The fields specific to this kind of entry start here. + entry->fields = cursor; + + entry->cie = NULL; + + return true; +} + +bool CallFrameInfo::ReadCIEFields(CIE *cie) { + const uint8_t *cursor = cie->fields; + size_t len; + + assert(cie->kind == kCIE); + + // Prepare for early exit. + cie->version = 0; + cie->augmentation.clear(); + cie->code_alignment_factor = 0; + cie->data_alignment_factor = 0; + cie->return_address_register = 0; + cie->has_z_augmentation = false; + cie->pointer_encoding = DW_EH_PE_absptr; + cie->instructions = 0; + + // Parse the version number. + if (cie->end - cursor < 1) + return ReportIncomplete(cie); + cie->version = reader_->ReadOneByte(cursor); + cursor++; + + // If we don't recognize the version, we can't parse any more fields of the + // CIE. For DWARF CFI, we handle versions 1 through 3 (there was never a + // version 2 of CFI data). For .eh_frame, we handle versions 1 and 3 as well; + // the difference between those versions seems to be the same as for + // .debug_frame. + if (cie->version < 1 || cie->version > 3) { + reporter_->UnrecognizedVersion(cie->offset, cie->version); + return false; + } + + const uint8_t *augmentation_start = cursor; + const uint8_t *augmentation_end = + reinterpret_cast<const uint8_t *>(memchr(augmentation_start, '\0', + cie->end - augmentation_start)); + if (! augmentation_end) return ReportIncomplete(cie); + cursor = augmentation_end; + cie->augmentation = string(reinterpret_cast<const char *>(augmentation_start), + cursor - augmentation_start); + // Skip the terminating '\0'. + cursor++; + + // Is this CFI augmented? + if (!cie->augmentation.empty()) { + // Is it an augmentation we recognize? + if (cie->augmentation[0] == DW_Z_augmentation_start) { + // Linux C++ ABI 'z' augmentation, used for exception handling data. + cie->has_z_augmentation = true; + } else { + // Not an augmentation we recognize. Augmentations can have arbitrary + // effects on the form of rest of the content, so we have to give up. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + + // Parse the code alignment factor. + cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the data alignment factor. + cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + + // Parse the return address register. This is a ubyte in version 1, and + // a ULEB128 in version 3. + if (cie->version == 1) { + if (cursor >= cie->end) return ReportIncomplete(cie); + cie->return_address_register = uint8(*cursor++); + } else { + cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie); + cursor += len; + } + + // If we have a 'z' augmentation string, find the augmentation data and + // use the augmentation string to parse it. + if (cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len); + if (size_t(cie->end - cursor) < len + data_size) + return ReportIncomplete(cie); + cursor += len; + const uint8_t *data = cursor; + cursor += data_size; + const uint8_t *data_end = cursor; + + cie->has_z_lsda = false; + cie->has_z_personality = false; + cie->has_z_signal_frame = false; + + // Walk the augmentation string, and extract values from the + // augmentation data as the string directs. + for (size_t i = 1; i < cie->augmentation.size(); i++) { + switch (cie->augmentation[i]) { + case DW_Z_has_LSDA: + // The CIE's augmentation data holds the language-specific data + // area pointer's encoding, and the FDE's augmentation data holds + // the pointer itself. + cie->has_z_lsda = true; + // Fetch the LSDA encoding from the augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->lsda_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->lsda_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding); + return false; + } + // Don't check if the encoding is usable here --- we haven't + // read the FDE's fields yet, so we're not prepared for + // DW_EH_PE_funcrel, although that's a fine encoding for the + // LSDA to use, since it appears in the FDE. + break; + + case DW_Z_has_personality_routine: + // The CIE's augmentation data holds the personality routine + // pointer's encoding, followed by the pointer itself. + cie->has_z_personality = true; + // Fetch the personality routine pointer's encoding from the + // augmentation data. + if (data >= data_end) return ReportIncomplete(cie); + cie->personality_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->personality_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->personality_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->personality_encoding); + return false; + } + // Fetch the personality routine's pointer itself from the data. + cie->personality_address = + reader_->ReadEncodedPointer(data, cie->personality_encoding, + &len); + if (len > size_t(data_end - data)) + return ReportIncomplete(cie); + data += len; + break; + + case DW_Z_has_FDE_address_encoding: + // The CIE's augmentation data holds the pointer encoding to use + // for addresses in the FDE. + if (data >= data_end) return ReportIncomplete(cie); + cie->pointer_encoding = DwarfPointerEncoding(*data++); + if (!reader_->ValidEncoding(cie->pointer_encoding)) { + reporter_->InvalidPointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + if (!reader_->UsableEncoding(cie->pointer_encoding)) { + reporter_->UnusablePointerEncoding(cie->offset, + cie->pointer_encoding); + return false; + } + break; + + case DW_Z_is_signal_trampoline: + // Frames using this CIE are signal delivery frames. + cie->has_z_signal_frame = true; + break; + + default: + // An augmentation we don't recognize. + reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation); + return false; + } + } + } + + // The CIE's instructions start here. + cie->instructions = cursor; + + return true; +} + +bool CallFrameInfo::ReadFDEFields(FDE *fde) { + const uint8_t *cursor = fde->fields; + size_t size; + + fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding, + &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + reader_->SetFunctionBase(fde->address); + + // For the length, we strip off the upper nybble of the encoding used for + // the starting address. + DwarfPointerEncoding length_encoding = + DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f); + fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size); + if (size > size_t(fde->end - cursor)) + return ReportIncomplete(fde); + cursor += size; + + // If the CIE has a 'z' augmentation string, then augmentation data + // appears here. + if (fde->cie->has_z_augmentation) { + uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size); + if (size_t(fde->end - cursor) < size + data_size) + return ReportIncomplete(fde); + cursor += size; + + // In the abstract, we should walk the augmentation string, and extract + // items from the FDE's augmentation data as we encounter augmentation + // string characters that specify their presence: the ordering of items + // in the augmentation string determines the arrangement of values in + // the augmentation data. + // + // In practice, there's only ever one value in FDE augmentation data + // that we support --- the LSDA pointer --- and we have to bail if we + // see any unrecognized augmentation string characters. So if there is + // anything here at all, we know what it is, and where it starts. + if (fde->cie->has_z_lsda) { + // Check whether the LSDA's pointer encoding is usable now: only once + // we've parsed the FDE's starting address do we call reader_-> + // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes + // usable. + if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) { + reporter_->UnusablePointerEncoding(fde->cie->offset, + fde->cie->lsda_encoding); + return false; + } + + fde->lsda_address = + reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size); + if (size > data_size) + return ReportIncomplete(fde); + // Ideally, we would also complain here if there were unconsumed + // augmentation data. + } + + cursor += data_size; + } + + // The FDE's instructions start after those. + fde->instructions = cursor; + + return true; +} + +bool CallFrameInfo::Start() { + const uint8_t *buffer_end = buffer_ + buffer_length_; + const uint8_t *cursor; + bool all_ok = true; + const uint8_t *entry_end; + bool ok; + + // Traverse all the entries in buffer_, skipping CIEs and offering + // FDEs to the handler. + for (cursor = buffer_; cursor < buffer_end; + cursor = entry_end, all_ok = all_ok && ok) { + FDE fde; + + // Make it easy to skip this entry with 'continue': assume that + // things are not okay until we've checked all the data, and + // prepare the address of the next entry. + ok = false; + + // Read the entry's prologue. + if (!ReadEntryPrologue(cursor, &fde)) { + if (!fde.end) { + // If we couldn't even figure out this entry's extent, then we + // must stop processing entries altogether. + all_ok = false; + break; + } + entry_end = fde.end; + continue; + } + + // The next iteration picks up after this entry. + entry_end = fde.end; + + // Did we see an .eh_frame terminating mark? + if (fde.kind == kTerminator) { + // If there appears to be more data left in the section after the + // terminating mark, warn the user. But this is just a warning; + // we leave all_ok true. + if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset); + break; + } + + // In this loop, we skip CIEs. We only parse them fully when we + // parse an FDE that refers to them. This limits our memory + // consumption (beyond the buffer itself) to that needed to + // process the largest single entry. + if (fde.kind != kFDE) { + ok = true; + continue; + } + + // Validate the CIE pointer. + if (fde.id > buffer_length_) { + reporter_->CIEPointerOutOfRange(fde.offset, fde.id); + continue; + } + + CIE cie; + + // Parse this FDE's CIE header. + if (!ReadEntryPrologue(buffer_ + fde.id, &cie)) + continue; + // This had better be an actual CIE. + if (cie.kind != kCIE) { + reporter_->BadCIEId(fde.offset, fde.id); + continue; + } + if (!ReadCIEFields(&cie)) + continue; + + // We now have the values that govern both the CIE and the FDE. + cie.cie = &cie; + fde.cie = &cie; + + // Parse the FDE's header. + if (!ReadFDEFields(&fde)) + continue; + + // Call Entry to ask the consumer if they're interested. + if (!handler_->Entry(fde.offset, fde.address, fde.size, + cie.version, cie.augmentation, + cie.return_address_register)) { + // The handler isn't interested in this entry. That's not an error. + ok = true; + continue; + } + + if (cie.has_z_augmentation) { + // Report the personality routine address, if we have one. + if (cie.has_z_personality) { + if (!handler_ + ->PersonalityRoutine(cie.personality_address, + IsIndirectEncoding(cie.personality_encoding))) + continue; + } + + // Report the language-specific data area address, if we have one. + if (cie.has_z_lsda) { + if (!handler_ + ->LanguageSpecificDataArea(fde.lsda_address, + IsIndirectEncoding(cie.lsda_encoding))) + continue; + } + + // If this is a signal-handling frame, report that. + if (cie.has_z_signal_frame) { + if (!handler_->SignalHandler()) + continue; + } + } + + // Interpret the CIE's instructions, and then the FDE's instructions. + State state(reader_, handler_, reporter_, fde.address); + ok = state.InterpretCIE(cie) && state.InterpretFDE(fde); + + // Tell the ByteReader that the function start address from the + // FDE header is no longer valid. + reader_->ClearFunctionBase(); + + // Report the end of the entry. + handler_->End(); + } + + return all_ok; +} + +const char *CallFrameInfo::KindName(EntryKind kind) { + if (kind == CallFrameInfo::kUnknown) + return "entry"; + else if (kind == CallFrameInfo::kCIE) + return "common information entry"; + else if (kind == CallFrameInfo::kFDE) + return "frame description entry"; + else { + assert (kind == CallFrameInfo::kTerminator); + return ".eh_frame sequence terminator"; + } +} + +bool CallFrameInfo::ReportIncomplete(Entry *entry) { + reporter_->Incomplete(entry->offset, entry->kind); + return false; +} + +void CallFrameInfo::Reporter::Incomplete(uint64 offset, + CallFrameInfo::EntryKind kind) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str()); +} + +void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) { + fprintf(stderr, + "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker" + " before end of section contents\n", + filename_.c_str(), offset, section_.c_str()); +} + +void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset, + uint64 cie_offset) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer is out of range: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); +} + +void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE pointer does not point to a CIE: 0x%llx\n", + filename_.c_str(), offset, section_.c_str(), cie_offset); +} + +void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized version: %d\n", + filename_.c_str(), offset, section_.c_str(), version); +} + +void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset, + const string &aug) { + fprintf(stderr, + "%s: CFI frame description entry at offset 0x%llx in '%s':" + " CIE specifies unrecognized augmentation: '%s'\n", + filename_.c_str(), offset, section_.c_str(), aug.c_str()); +} + +void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset, + uint8 encoding) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); +} + +void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset, + uint8 encoding) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " 'z' augmentation specifies a pointer encoding for which" + " we have no base address: 0x%02x\n", + filename_.c_str(), offset, section_.c_str(), encoding); +} + +void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) { + fprintf(stderr, + "%s: CFI common information entry at offset 0x%llx in '%s':" + " the DW_CFA_restore instruction at offset 0x%llx" + " cannot be used in a common information entry\n", + filename_.c_str(), offset, section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::BadInstruction(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx is unrecognized\n", + filename_.c_str(), CallFrameInfo::KindName(kind), + offset, section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::NoCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the instruction at offset 0x%llx assumes that a CFA rule has" + " been set, but none has been set\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " should pop a saved state from the stack, but the stack is empty\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset, + CallFrameInfo::EntryKind kind, + uint64 insn_offset) { + fprintf(stderr, + "%s: CFI %s at offset 0x%llx in section '%s':" + " the DW_CFA_restore_state instruction at offset 0x%llx" + " would clear the CFA rule in effect\n", + filename_.c_str(), CallFrameInfo::KindName(kind), offset, + section_.c_str(), insn_offset); +} + +} // namespace dwarf2reader |