diff options
Diffstat (limited to 'toolkit/crashreporter/google-breakpad/src/common/test_assembler.h')
-rw-r--r-- | toolkit/crashreporter/google-breakpad/src/common/test_assembler.h | 484 |
1 files changed, 484 insertions, 0 deletions
diff --git a/toolkit/crashreporter/google-breakpad/src/common/test_assembler.h b/toolkit/crashreporter/google-breakpad/src/common/test_assembler.h new file mode 100644 index 000000000..373dbebac --- /dev/null +++ b/toolkit/crashreporter/google-breakpad/src/common/test_assembler.h @@ -0,0 +1,484 @@ +// -*- mode: C++ -*- + +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> + +// test-assembler.h: interface to class for building complex binary streams. + +// To test the Breakpad symbol dumper and processor thoroughly, for +// all combinations of host system and minidump processor +// architecture, we need to be able to easily generate complex test +// data like debugging information and minidump files. +// +// For example, if we want our unit tests to provide full code +// coverage for stack walking, it may be difficult to persuade the +// compiler to generate every possible sort of stack walking +// information that we want to support; there are probably DWARF CFI +// opcodes that GCC never emits. Similarly, if we want to test our +// error handling, we will need to generate damaged minidumps or +// debugging information that (we hope) the client or compiler will +// never produce on its own. +// +// google_breakpad::TestAssembler provides a predictable and +// (relatively) simple way to generate complex formatted data streams +// like minidumps and CFI. Furthermore, because TestAssembler is +// portable, developers without access to (say) Visual Studio or a +// SPARC assembler can still work on test data for those targets. + +#ifndef PROCESSOR_TEST_ASSEMBLER_H_ +#define PROCESSOR_TEST_ASSEMBLER_H_ + +#include <list> +#include <vector> +#include <string> + +#include "common/using_std_string.h" +#include "google_breakpad/common/breakpad_types.h" + +namespace google_breakpad { + +using std::list; +using std::vector; + +namespace test_assembler { + +// A Label represents a value not yet known that we need to store in a +// section. As long as all the labels a section refers to are defined +// by the time we retrieve its contents as bytes, we can use undefined +// labels freely in that section's construction. +// +// A label can be in one of three states: +// - undefined, +// - defined as the sum of some other label and a constant, or +// - a constant. +// +// A label's value never changes, but it can accumulate constraints. +// Adding labels and integers is permitted, and yields a label. +// Subtracting a constant from a label is permitted, and also yields a +// label. Subtracting two labels that have some relationship to each +// other is permitted, and yields a constant. +// +// For example: +// +// Label a; // a's value is undefined +// Label b; // b's value is undefined +// { +// Label c = a + 4; // okay, even though a's value is unknown +// b = c + 4; // also okay; b is now a+8 +// } +// Label d = b - 2; // okay; d == a+6, even though c is gone +// d.Value(); // error: d's value is not yet known +// d - a; // is 6, even though their values are not known +// a = 12; // now b == 20, and d == 18 +// d.Value(); // 18: no longer an error +// b.Value(); // 20 +// d = 10; // error: d is already defined. +// +// Label objects' lifetimes are unconstrained: notice that, in the +// above example, even though a and b are only related through c, and +// c goes out of scope, the assignment to a sets b's value as well. In +// particular, it's not necessary to ensure that a Label lives beyond +// Sections that refer to it. +class Label { + public: + Label(); // An undefined label. + Label(uint64_t value); // A label with a fixed value + Label(const Label &value); // A label equal to another. + ~Label(); + + // Return this label's value; it must be known. + // + // Providing this as a cast operator is nifty, but the conversions + // happen in unexpected places. In particular, ISO C++ says that + // Label + size_t becomes ambigious, because it can't decide whether + // to convert the Label to a uint64_t and then to a size_t, or use + // the overloaded operator that returns a new label, even though the + // former could fail if the label is not yet defined and the latter won't. + uint64_t Value() const; + + Label &operator=(uint64_t value); + Label &operator=(const Label &value); + Label operator+(uint64_t addend) const; + Label operator-(uint64_t subtrahend) const; + uint64_t operator-(const Label &subtrahend) const; + + // We could also provide == and != that work on undefined, but + // related, labels. + + // Return true if this label's value is known. If VALUE_P is given, + // set *VALUE_P to the known value if returning true. + bool IsKnownConstant(uint64_t *value_p = NULL) const; + + // Return true if the offset from LABEL to this label is known. If + // OFFSET_P is given, set *OFFSET_P to the offset when returning true. + // + // You can think of l.KnownOffsetFrom(m, &d) as being like 'd = l-m', + // except that it also returns a value indicating whether the + // subtraction is possible given what we currently know of l and m. + // It can be possible even if we don't know l and m's values. For + // example: + // + // Label l, m; + // m = l + 10; + // l.IsKnownConstant(); // false + // m.IsKnownConstant(); // false + // uint64_t d; + // l.IsKnownOffsetFrom(m, &d); // true, and sets d to -10. + // l-m // -10 + // m-l // 10 + // m.Value() // error: m's value is not known + bool IsKnownOffsetFrom(const Label &label, uint64_t *offset_p = NULL) const; + + private: + // A label's value, or if that is not yet known, how the value is + // related to other labels' values. A binding may be: + // - a known constant, + // - constrained to be equal to some other binding plus a constant, or + // - unconstrained, and free to take on any value. + // + // Many labels may point to a single binding, and each binding may + // refer to another, so bindings and labels form trees whose leaves + // are labels, whose interior nodes (and roots) are bindings, and + // where links point from children to parents. Bindings are + // reference counted, allowing labels to be lightweight, copyable, + // assignable, placed in containers, and so on. + class Binding { + public: + Binding(); + Binding(uint64_t addend); + ~Binding(); + + // Increment our reference count. + void Acquire() { reference_count_++; }; + // Decrement our reference count, and return true if it is zero. + bool Release() { return --reference_count_ == 0; } + + // Set this binding to be equal to BINDING + ADDEND. If BINDING is + // NULL, then set this binding to the known constant ADDEND. + // Update every binding on this binding's chain to point directly + // to BINDING, or to be a constant, with addends adjusted + // appropriately. + void Set(Binding *binding, uint64_t value); + + // Return what we know about the value of this binding. + // - If this binding's value is a known constant, set BASE to + // NULL, and set ADDEND to its value. + // - If this binding is not a known constant but related to other + // bindings, set BASE to the binding at the end of the relation + // chain (which will always be unconstrained), and set ADDEND to the + // value to add to that binding's value to get this binding's + // value. + // - If this binding is unconstrained, set BASE to this, and leave + // ADDEND unchanged. + void Get(Binding **base, uint64_t *addend); + + private: + // There are three cases: + // + // - A binding representing a known constant value has base_ NULL, + // and addend_ equal to the value. + // + // - A binding representing a completely unconstrained value has + // base_ pointing to this; addend_ is unused. + // + // - A binding whose value is related to some other binding's + // value has base_ pointing to that other binding, and addend_ + // set to the amount to add to that binding's value to get this + // binding's value. We only represent relationships of the form + // x = y+c. + // + // Thus, the bind_ links form a chain terminating in either a + // known constant value or a completely unconstrained value. Most + // operations on bindings do path compression: they change every + // binding on the chain to point directly to the final value, + // adjusting addends as appropriate. + Binding *base_; + uint64_t addend_; + + // The number of Labels and Bindings pointing to this binding. + // (When a binding points to itself, indicating a completely + // unconstrained binding, that doesn't count as a reference.) + int reference_count_; + }; + + // This label's value. + Binding *value_; +}; + +inline Label operator+(uint64_t a, const Label &l) { return l + a; } +// Note that int-Label isn't defined, as negating a Label is not an +// operation we support. + +// Conventions for representing larger numbers as sequences of bytes. +enum Endianness { + kBigEndian, // Big-endian: the most significant byte comes first. + kLittleEndian, // Little-endian: the least significant byte comes first. + kUnsetEndian, // used internally +}; + +// A section is a sequence of bytes, constructed by appending bytes +// to the end. Sections have a convenient and flexible set of member +// functions for appending data in various formats: big-endian and +// little-endian signed and unsigned values of different sizes; +// LEB128 and ULEB128 values (see below), and raw blocks of bytes. +// +// If you need to append a value to a section that is not convenient +// to compute immediately, you can create a label, append the +// label's value to the section, and then set the label's value +// later, when it's convenient to do so. Once a label's value is +// known, the section class takes care of updating all previously +// appended references to it. +// +// Once all the labels to which a section refers have had their +// values determined, you can get a copy of the section's contents +// as a string. +// +// Note that there is no specified "start of section" label. This is +// because there are typically several different meanings for "the +// start of a section": the offset of the section within an object +// file, the address in memory at which the section's content appear, +// and so on. It's up to the code that uses the Section class to +// keep track of these explicitly, as they depend on the application. +class Section { + public: + Section(Endianness endianness = kUnsetEndian) + : endianness_(endianness) { }; + + // A base class destructor should be either public and virtual, + // or protected and nonvirtual. + virtual ~Section() { }; + + // Set the default endianness of this section to ENDIANNESS. This + // sets the behavior of the D<N> appending functions. If the + // assembler's default endianness was set, this is the + void set_endianness(Endianness endianness) { + endianness_ = endianness; + } + + // Return the default endianness of this section. + Endianness endianness() const { return endianness_; } + + // Append the SIZE bytes at DATA or the contents of STRING to the + // end of this section. Return a reference to this section. + Section &Append(const uint8_t *data, size_t size) { + contents_.append(reinterpret_cast<const char *>(data), size); + return *this; + }; + Section &Append(const string &data) { + contents_.append(data); + return *this; + }; + + // Append SIZE copies of BYTE to the end of this section. Return a + // reference to this section. + Section &Append(size_t size, uint8_t byte) { + contents_.append(size, (char) byte); + return *this; + } + + // Append NUMBER to this section. ENDIANNESS is the endianness to + // use to write the number. SIZE is the length of the number in + // bytes. Return a reference to this section. + Section &Append(Endianness endianness, size_t size, uint64_t number); + Section &Append(Endianness endianness, size_t size, const Label &label); + + // Append SECTION to the end of this section. The labels SECTION + // refers to need not be defined yet. + // + // Note that this has no effect on any Labels' values, or on + // SECTION. If placing SECTION within 'this' provides new + // constraints on existing labels' values, then it's up to the + // caller to fiddle with those labels as needed. + Section &Append(const Section §ion); + + // Append the contents of DATA as a series of bytes terminated by + // a NULL character. + Section &AppendCString(const string &data) { + Append(data); + contents_ += '\0'; + return *this; + } + + // Append at most SIZE bytes from DATA; if DATA is less than SIZE bytes + // long, pad with '\0' characters. + Section &AppendCString(const string &data, size_t size) { + contents_.append(data, 0, size); + if (data.size() < size) + Append(size - data.size(), 0); + return *this; + } + + // Append VALUE or LABEL to this section, with the given bit width and + // endianness. Return a reference to this section. + // + // The names of these functions have the form <ENDIANNESS><BITWIDTH>: + // <ENDIANNESS> is either 'L' (little-endian, least significant byte first), + // 'B' (big-endian, most significant byte first), or + // 'D' (default, the section's default endianness) + // <BITWIDTH> is 8, 16, 32, or 64. + // + // Since endianness doesn't matter for a single byte, all the + // <BITWIDTH>=8 functions are equivalent. + // + // These can be used to write both signed and unsigned values, as + // the compiler will properly sign-extend a signed value before + // passing it to the function, at which point the function's + // behavior is the same either way. + Section &L8(uint8_t value) { contents_ += value; return *this; } + Section &B8(uint8_t value) { contents_ += value; return *this; } + Section &D8(uint8_t value) { contents_ += value; return *this; } + Section &L16(uint16_t), &L32(uint32_t), &L64(uint64_t), + &B16(uint16_t), &B32(uint32_t), &B64(uint64_t), + &D16(uint16_t), &D32(uint32_t), &D64(uint64_t); + Section &L8(const Label &label), &L16(const Label &label), + &L32(const Label &label), &L64(const Label &label), + &B8(const Label &label), &B16(const Label &label), + &B32(const Label &label), &B64(const Label &label), + &D8(const Label &label), &D16(const Label &label), + &D32(const Label &label), &D64(const Label &label); + + // Append VALUE in a signed LEB128 (Little-Endian Base 128) form. + // + // The signed LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between -0x40 and 0x3f, then its signed LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its signed LEB128 representation is (N & 0x7f) | + // 0x80, followed by the signed LEB128 representation of N / 128, + // rounded towards negative infinity. + // + // In other words, we break VALUE into groups of seven bits, put + // them in little-endian order, and then write them as eight-bit + // bytes with the high bit on all but the last. + // + // Note that VALUE cannot be a Label (we would have to implement + // relaxation). + Section &LEB128(long long value); + + // Append VALUE in unsigned LEB128 (Little-Endian Base 128) form. + // + // The unsigned LEB128 representation of an integer N is a variable + // number of bytes: + // + // - If N is between 0 and 0x7f, then its unsigned LEB128 + // representation is a single byte whose value is N. + // + // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | + // 0x80, followed by the unsigned LEB128 representation of N / + // 128, rounded towards negative infinity. + // + // Note that VALUE cannot be a Label (we would have to implement + // relaxation). + Section &ULEB128(uint64_t value); + + // Jump to the next location aligned on an ALIGNMENT-byte boundary, + // relative to the start of the section. Fill the gap with PAD_BYTE. + // ALIGNMENT must be a power of two. Return a reference to this + // section. + Section &Align(size_t alignment, uint8_t pad_byte = 0); + + // Clear the contents of this section. + void Clear(); + + // Return the current size of the section. + size_t Size() const { return contents_.size(); } + + // Return a label representing the start of the section. + // + // It is up to the user whether this label represents the section's + // position in an object file, the section's address in memory, or + // what have you; some applications may need both, in which case + // this simple-minded interface won't be enough. This class only + // provides a single start label, for use with the Here and Mark + // member functions. + // + // Ideally, we'd provide this in a subclass that actually knows more + // about the application at hand and can provide an appropriate + // collection of start labels. But then the appending member + // functions like Append and D32 would return a reference to the + // base class, not the derived class, and the chaining won't work. + // Since the only value here is in pretty notation, that's a fatal + // flaw. + Label start() const { return start_; } + + // Return a label representing the point at which the next Appended + // item will appear in the section, relative to start(). + Label Here() const { return start_ + Size(); } + + // Set *LABEL to Here, and return a reference to this section. + Section &Mark(Label *label) { *label = Here(); return *this; } + + // If there are no undefined label references left in this + // section, set CONTENTS to the contents of this section, as a + // string, and clear this section. Return true on success, or false + // if there were still undefined labels. + bool GetContents(string *contents); + + private: + // Used internally. A reference to a label's value. + struct Reference { + Reference(size_t set_offset, Endianness set_endianness, size_t set_size, + const Label &set_label) + : offset(set_offset), endianness(set_endianness), size(set_size), + label(set_label) { } + + // The offset of the reference within the section. + size_t offset; + + // The endianness of the reference. + Endianness endianness; + + // The size of the reference. + size_t size; + + // The label to which this is a reference. + Label label; + }; + + // The default endianness of this section. + Endianness endianness_; + + // The contents of the section. + string contents_; + + // References to labels within those contents. + vector<Reference> references_; + + // A label referring to the beginning of the section. + Label start_; +}; + +} // namespace test_assembler +} // namespace google_breakpad + +#endif // PROCESSOR_TEST_ASSEMBLER_H_ |