diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /security/sandbox/chromium/base/strings | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'security/sandbox/chromium/base/strings')
24 files changed, 6360 insertions, 0 deletions
diff --git a/security/sandbox/chromium/base/strings/nullable_string16.cc b/security/sandbox/chromium/base/strings/nullable_string16.cc new file mode 100644 index 000000000..07f81d433 --- /dev/null +++ b/security/sandbox/chromium/base/strings/nullable_string16.cc @@ -0,0 +1,17 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/nullable_string16.h" + +#include <ostream> + +#include "base/strings/utf_string_conversions.h" + +namespace base { + +std::ostream& operator<<(std::ostream& out, const NullableString16& value) { + return value.is_null() ? out << "(null)" : out << UTF16ToUTF8(value.string()); +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/nullable_string16.h b/security/sandbox/chromium/base/strings/nullable_string16.h new file mode 100644 index 000000000..016c25c25 --- /dev/null +++ b/security/sandbox/chromium/base/strings/nullable_string16.h @@ -0,0 +1,46 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_NULLABLE_STRING16_H_ +#define BASE_STRINGS_NULLABLE_STRING16_H_ + +#include <iosfwd> + +#include "base/base_export.h" +#include "base/strings/string16.h" + +namespace base { + +// This class is a simple wrapper for string16 which also contains a null +// state. This should be used only where the difference between null and +// empty is meaningful. +class NullableString16 { + public: + NullableString16() : is_null_(true) { } + NullableString16(const string16& string, bool is_null) + : string_(string), is_null_(is_null) { + } + + const string16& string() const { return string_; } + bool is_null() const { return is_null_; } + + private: + string16 string_; + bool is_null_; +}; + +inline bool operator==(const NullableString16& a, const NullableString16& b) { + return a.is_null() == b.is_null() && a.string() == b.string(); +} + +inline bool operator!=(const NullableString16& a, const NullableString16& b) { + return !(a == b); +} + +BASE_EXPORT std::ostream& operator<<(std::ostream& out, + const NullableString16& value); + +} // namespace base + +#endif // BASE_STRINGS_NULLABLE_STRING16_H_ diff --git a/security/sandbox/chromium/base/strings/safe_sprintf.cc b/security/sandbox/chromium/base/strings/safe_sprintf.cc new file mode 100644 index 000000000..a51c77827 --- /dev/null +++ b/security/sandbox/chromium/base/strings/safe_sprintf.cc @@ -0,0 +1,686 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/safe_sprintf.h" + +#include <errno.h> +#include <string.h> + +#include <limits> + +#include "base/macros.h" +#include "build/build_config.h" + +#if !defined(NDEBUG) +// In debug builds, we use RAW_CHECK() to print useful error messages, if +// SafeSPrintf() is called with broken arguments. +// As our contract promises that SafeSPrintf() can be called from any +// restricted run-time context, it is not actually safe to call logging +// functions from it; and we only ever do so for debug builds and hope for the +// best. We should _never_ call any logging function other than RAW_CHECK(), +// and we should _never_ include any logging code that is active in production +// builds. Most notably, we should not include these logging functions in +// unofficial release builds, even though those builds would otherwise have +// DCHECKS() enabled. +// In other words; please do not remove the #ifdef around this #include. +// Instead, in production builds we opt for returning a degraded result, +// whenever an error is encountered. +// E.g. The broken function call +// SafeSPrintf("errno = %d (%x)", errno, strerror(errno)) +// will print something like +// errno = 13, (%x) +// instead of +// errno = 13 (Access denied) +// In most of the anticipated use cases, that's probably the preferred +// behavior. +#include "base/logging.h" +#define DEBUG_CHECK RAW_CHECK +#else +#define DEBUG_CHECK(x) do { if (x) { } } while (0) +#endif + +namespace base { +namespace strings { + +// The code in this file is extremely careful to be async-signal-safe. +// +// Most obviously, we avoid calling any code that could dynamically allocate +// memory. Doing so would almost certainly result in bugs and dead-locks. +// We also avoid calling any other STL functions that could have unintended +// side-effects involving memory allocation or access to other shared +// resources. +// +// But on top of that, we also avoid calling other library functions, as many +// of them have the side-effect of calling getenv() (in order to deal with +// localization) or accessing errno. The latter sounds benign, but there are +// several execution contexts where it isn't even possible to safely read let +// alone write errno. +// +// The stated design goal of the SafeSPrintf() function is that it can be +// called from any context that can safely call C or C++ code (i.e. anything +// that doesn't require assembly code). +// +// For a brief overview of some but not all of the issues with async-signal- +// safety, refer to: +// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html + +namespace { +const size_t kSSizeMaxConst = ((size_t)(ssize_t)-1) >> 1; + +const char kUpCaseHexDigits[] = "0123456789ABCDEF"; +const char kDownCaseHexDigits[] = "0123456789abcdef"; +} + +#if defined(NDEBUG) +// We would like to define kSSizeMax as std::numeric_limits<ssize_t>::max(), +// but C++ doesn't allow us to do that for constants. Instead, we have to +// use careful casting and shifting. We later use a static_assert to +// verify that this worked correctly. +namespace { +const size_t kSSizeMax = kSSizeMaxConst; +} +#else // defined(NDEBUG) +// For efficiency, we really need kSSizeMax to be a constant. But for unit +// tests, it should be adjustable. This allows us to verify edge cases without +// having to fill the entire available address space. As a compromise, we make +// kSSizeMax adjustable in debug builds, and then only compile that particular +// part of the unit test in debug builds. +namespace { +static size_t kSSizeMax = kSSizeMaxConst; +} + +namespace internal { +void SetSafeSPrintfSSizeMaxForTest(size_t max) { + kSSizeMax = max; +} + +size_t GetSafeSPrintfSSizeMaxForTest() { + return kSSizeMax; +} +} +#endif // defined(NDEBUG) + +namespace { +class Buffer { + public: + // |buffer| is caller-allocated storage that SafeSPrintf() writes to. It + // has |size| bytes of writable storage. It is the caller's responsibility + // to ensure that the buffer is at least one byte in size, so that it fits + // the trailing NUL that will be added by the destructor. The buffer also + // must be smaller or equal to kSSizeMax in size. + Buffer(char* buffer, size_t size) + : buffer_(buffer), + size_(size - 1), // Account for trailing NUL byte + count_(0) { +// MSVS2013's standard library doesn't mark max() as constexpr yet. cl.exe +// supports static_cast but doesn't really implement constexpr yet so it doesn't +// complain, but clang does. +#if __cplusplus >= 201103 && !(defined(__clang__) && defined(OS_WIN)) + static_assert(kSSizeMaxConst == + static_cast<size_t>(std::numeric_limits<ssize_t>::max()), + "kSSizeMaxConst should be the max value of an ssize_t"); +#endif + DEBUG_CHECK(size > 0); + DEBUG_CHECK(size <= kSSizeMax); + } + + ~Buffer() { + // The code calling the constructor guaranteed that there was enough space + // to store a trailing NUL -- and in debug builds, we are actually + // verifying this with DEBUG_CHECK()s in the constructor. So, we can + // always unconditionally write the NUL byte in the destructor. We do not + // need to adjust the count_, as SafeSPrintf() copies snprintf() in not + // including the NUL byte in its return code. + *GetInsertionPoint() = '\000'; + } + + // Returns true, iff the buffer is filled all the way to |kSSizeMax-1|. The + // caller can now stop adding more data, as GetCount() has reached its + // maximum possible value. + inline bool OutOfAddressableSpace() const { + return count_ == static_cast<size_t>(kSSizeMax - 1); + } + + // Returns the number of bytes that would have been emitted to |buffer_| + // if it was sized sufficiently large. This number can be larger than + // |size_|, if the caller provided an insufficiently large output buffer. + // But it will never be bigger than |kSSizeMax-1|. + inline ssize_t GetCount() const { + DEBUG_CHECK(count_ < kSSizeMax); + return static_cast<ssize_t>(count_); + } + + // Emits one |ch| character into the |buffer_| and updates the |count_| of + // characters that are currently supposed to be in the buffer. + // Returns "false", iff the buffer was already full. + // N.B. |count_| increases even if no characters have been written. This is + // needed so that GetCount() can return the number of bytes that should + // have been allocated for the |buffer_|. + inline bool Out(char ch) { + if (size_ >= 1 && count_ < size_) { + buffer_[count_] = ch; + return IncrementCountByOne(); + } + // |count_| still needs to be updated, even if the buffer has been + // filled completely. This allows SafeSPrintf() to return the number of + // bytes that should have been emitted. + IncrementCountByOne(); + return false; + } + + // Inserts |padding|-|len| bytes worth of padding into the |buffer_|. + // |count_| will also be incremented by the number of bytes that were meant + // to be emitted. The |pad| character is typically either a ' ' space + // or a '0' zero, but other non-NUL values are legal. + // Returns "false", iff the the |buffer_| filled up (i.e. |count_| + // overflowed |size_|) at any time during padding. + inline bool Pad(char pad, size_t padding, size_t len) { + DEBUG_CHECK(pad); + DEBUG_CHECK(padding <= kSSizeMax); + for (; padding > len; --padding) { + if (!Out(pad)) { + if (--padding) { + IncrementCount(padding-len); + } + return false; + } + } + return true; + } + + // POSIX doesn't define any async-signal-safe function for converting + // an integer to ASCII. Define our own version. + // + // This also gives us the ability to make the function a little more + // powerful and have it deal with |padding|, with truncation, and with + // predicting the length of the untruncated output. + // + // IToASCII() converts an integer |i| to ASCII. + // + // Unlike similar functions in the standard C library, it never appends a + // NUL character. This is left for the caller to do. + // + // While the function signature takes a signed int64_t, the code decides at + // run-time whether to treat the argument as signed (int64_t) or as unsigned + // (uint64_t) based on the value of |sign|. + // + // It supports |base|s 2 through 16. Only a |base| of 10 is allowed to have + // a |sign|. Otherwise, |i| is treated as unsigned. + // + // For bases larger than 10, |upcase| decides whether lower-case or upper- + // case letters should be used to designate digits greater than 10. + // + // Padding can be done with either '0' zeros or ' ' spaces. Padding has to + // be positive and will always be applied to the left of the output. + // + // Prepends a |prefix| to the number (e.g. "0x"). This prefix goes to + // the left of |padding|, if |pad| is '0'; and to the right of |padding| + // if |pad| is ' '. + // + // Returns "false", if the |buffer_| overflowed at any time. + bool IToASCII(bool sign, bool upcase, int64_t i, int base, + char pad, size_t padding, const char* prefix); + + private: + // Increments |count_| by |inc| unless this would cause |count_| to + // overflow |kSSizeMax-1|. Returns "false", iff an overflow was detected; + // it then clamps |count_| to |kSSizeMax-1|. + inline bool IncrementCount(size_t inc) { + // "inc" is either 1 or a "padding" value. Padding is clamped at + // run-time to at most kSSizeMax-1. So, we know that "inc" is always in + // the range 1..kSSizeMax-1. + // This allows us to compute "kSSizeMax - 1 - inc" without incurring any + // integer overflows. + DEBUG_CHECK(inc <= kSSizeMax - 1); + if (count_ > kSSizeMax - 1 - inc) { + count_ = kSSizeMax - 1; + return false; + } else { + count_ += inc; + return true; + } + } + + // Convenience method for the common case of incrementing |count_| by one. + inline bool IncrementCountByOne() { + return IncrementCount(1); + } + + // Return the current insertion point into the buffer. This is typically + // at |buffer_| + |count_|, but could be before that if truncation + // happened. It always points to one byte past the last byte that was + // successfully placed into the |buffer_|. + inline char* GetInsertionPoint() const { + size_t idx = count_; + if (idx > size_) { + idx = size_; + } + return buffer_ + idx; + } + + // User-provided buffer that will receive the fully formatted output string. + char* buffer_; + + // Number of bytes that are available in the buffer excluding the trailing + // NUL byte that will be added by the destructor. + const size_t size_; + + // Number of bytes that would have been emitted to the buffer, if the buffer + // was sufficiently big. This number always excludes the trailing NUL byte + // and it is guaranteed to never grow bigger than kSSizeMax-1. + size_t count_; + + DISALLOW_COPY_AND_ASSIGN(Buffer); +}; + + +bool Buffer::IToASCII(bool sign, bool upcase, int64_t i, int base, + char pad, size_t padding, const char* prefix) { + // Sanity check for parameters. None of these should ever fail, but see + // above for the rationale why we can't call CHECK(). + DEBUG_CHECK(base >= 2); + DEBUG_CHECK(base <= 16); + DEBUG_CHECK(!sign || base == 10); + DEBUG_CHECK(pad == '0' || pad == ' '); + DEBUG_CHECK(padding <= kSSizeMax); + DEBUG_CHECK(!(sign && prefix && *prefix)); + + // Handle negative numbers, if the caller indicated that |i| should be + // treated as a signed number; otherwise treat |i| as unsigned (even if the + // MSB is set!) + // Details are tricky, because of limited data-types, but equivalent pseudo- + // code would look like: + // if (sign && i < 0) + // prefix = "-"; + // num = abs(i); + int minint = 0; + uint64_t num; + if (sign && i < 0) { + prefix = "-"; + + // Turn our number positive. + if (i == std::numeric_limits<int64_t>::min()) { + // The most negative integer needs special treatment. + minint = 1; + num = static_cast<uint64_t>(-(i + 1)); + } else { + // "Normal" negative numbers are easy. + num = static_cast<uint64_t>(-i); + } + } else { + num = static_cast<uint64_t>(i); + } + + // If padding with '0' zero, emit the prefix or '-' character now. Otherwise, + // make the prefix accessible in reverse order, so that we can later output + // it right between padding and the number. + // We cannot choose the easier approach of just reversing the number, as that + // fails in situations where we need to truncate numbers that have padding + // and/or prefixes. + const char* reverse_prefix = NULL; + if (prefix && *prefix) { + if (pad == '0') { + while (*prefix) { + if (padding) { + --padding; + } + Out(*prefix++); + } + prefix = NULL; + } else { + for (reverse_prefix = prefix; *reverse_prefix; ++reverse_prefix) { + } + } + } else + prefix = NULL; + const size_t prefix_length = reverse_prefix - prefix; + + // Loop until we have converted the entire number. Output at least one + // character (i.e. '0'). + size_t start = count_; + size_t discarded = 0; + bool started = false; + do { + // Make sure there is still enough space left in our output buffer. + if (count_ >= size_) { + if (start < size_) { + // It is rare that we need to output a partial number. But if asked + // to do so, we will still make sure we output the correct number of + // leading digits. + // Since we are generating the digits in reverse order, we actually + // have to discard digits in the order that we have already emitted + // them. This is essentially equivalent to: + // memmove(buffer_ + start, buffer_ + start + 1, size_ - start - 1) + for (char* move = buffer_ + start, *end = buffer_ + size_ - 1; + move < end; + ++move) { + *move = move[1]; + } + ++discarded; + --count_; + } else if (count_ - size_ > 1) { + // Need to increment either |count_| or |discarded| to make progress. + // The latter is more efficient, as it eventually triggers fast + // handling of padding. But we have to ensure we don't accidentally + // change the overall state (i.e. switch the state-machine from + // discarding to non-discarding). |count_| needs to always stay + // bigger than |size_|. + --count_; + ++discarded; + } + } + + // Output the next digit and (if necessary) compensate for the most + // negative integer needing special treatment. This works because, + // no matter the bit width of the integer, the lowest-most decimal + // integer always ends in 2, 4, 6, or 8. + if (!num && started) { + if (reverse_prefix > prefix) { + Out(*--reverse_prefix); + } else { + Out(pad); + } + } else { + started = true; + Out((upcase ? kUpCaseHexDigits : kDownCaseHexDigits)[num%base + minint]); + } + + minint = 0; + num /= base; + + // Add padding, if requested. + if (padding > 0) { + --padding; + + // Performance optimization for when we are asked to output excessive + // padding, but our output buffer is limited in size. Even if we output + // a 64bit number in binary, we would never write more than 64 plus + // prefix non-padding characters. So, once this limit has been passed, + // any further state change can be computed arithmetically; we know that + // by this time, our entire final output consists of padding characters + // that have all already been output. + if (discarded > 8*sizeof(num) + prefix_length) { + IncrementCount(padding); + padding = 0; + } + } + } while (num || padding || (reverse_prefix > prefix)); + + // Conversion to ASCII actually resulted in the digits being in reverse + // order. We can't easily generate them in forward order, as we can't tell + // the number of characters needed until we are done converting. + // So, now, we reverse the string (except for the possible '-' sign). + char* front = buffer_ + start; + char* back = GetInsertionPoint(); + while (--back > front) { + char ch = *back; + *back = *front; + *front++ = ch; + } + + IncrementCount(discarded); + return !discarded; +} + +} // anonymous namespace + +namespace internal { + +ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt, const Arg* args, + const size_t max_args) { + // Make sure that at least one NUL byte can be written, and that the buffer + // never overflows kSSizeMax. Not only does that use up most or all of the + // address space, it also would result in a return code that cannot be + // represented. + if (static_cast<ssize_t>(sz) < 1) { + return -1; + } else if (sz > kSSizeMax) { + sz = kSSizeMax; + } + + // Iterate over format string and interpret '%' arguments as they are + // encountered. + Buffer buffer(buf, sz); + size_t padding; + char pad; + for (unsigned int cur_arg = 0; *fmt && !buffer.OutOfAddressableSpace(); ) { + if (*fmt++ == '%') { + padding = 0; + pad = ' '; + char ch = *fmt++; + format_character_found: + switch (ch) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + // Found a width parameter. Convert to an integer value and store in + // "padding". If the leading digit is a zero, change the padding + // character from a space ' ' to a zero '0'. + pad = ch == '0' ? '0' : ' '; + for (;;) { + // The maximum allowed padding fills all the available address + // space and leaves just enough space to insert the trailing NUL. + const size_t max_padding = kSSizeMax - 1; + if (padding > max_padding/10 || + 10*padding > max_padding - (ch - '0')) { + DEBUG_CHECK(padding <= max_padding/10 && + 10*padding <= max_padding - (ch - '0')); + // Integer overflow detected. Skip the rest of the width until + // we find the format character, then do the normal error handling. + padding_overflow: + padding = max_padding; + while ((ch = *fmt++) >= '0' && ch <= '9') { + } + if (cur_arg < max_args) { + ++cur_arg; + } + goto fail_to_expand; + } + padding = 10*padding + ch - '0'; + if (padding > max_padding) { + // This doesn't happen for "sane" values of kSSizeMax. But once + // kSSizeMax gets smaller than about 10, our earlier range checks + // are incomplete. Unittests do trigger this artificial corner + // case. + DEBUG_CHECK(padding <= max_padding); + goto padding_overflow; + } + ch = *fmt++; + if (ch < '0' || ch > '9') { + // Reached the end of the width parameter. This is where the format + // character is found. + goto format_character_found; + } + } + break; + case 'c': { // Output an ASCII character. + // Check that there are arguments left to be inserted. + if (cur_arg >= max_args) { + DEBUG_CHECK(cur_arg < max_args); + goto fail_to_expand; + } + + // Check that the argument has the expected type. + const Arg& arg = args[cur_arg++]; + if (arg.type != Arg::INT && arg.type != Arg::UINT) { + DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT); + goto fail_to_expand; + } + + // Apply padding, if needed. + buffer.Pad(' ', padding, 1); + + // Convert the argument to an ASCII character and output it. + char as_char = static_cast<char>(arg.integer.i); + if (!as_char) { + goto end_of_output_buffer; + } + buffer.Out(as_char); + break; } + case 'd': // Output a possibly signed decimal value. + case 'o': // Output an unsigned octal value. + case 'x': // Output an unsigned hexadecimal value. + case 'X': + case 'p': { // Output a pointer value. + // Check that there are arguments left to be inserted. + if (cur_arg >= max_args) { + DEBUG_CHECK(cur_arg < max_args); + goto fail_to_expand; + } + + const Arg& arg = args[cur_arg++]; + int64_t i; + const char* prefix = NULL; + if (ch != 'p') { + // Check that the argument has the expected type. + if (arg.type != Arg::INT && arg.type != Arg::UINT) { + DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT); + goto fail_to_expand; + } + i = arg.integer.i; + + if (ch != 'd') { + // The Arg() constructor automatically performed sign expansion on + // signed parameters. This is great when outputting a %d decimal + // number, but can result in unexpected leading 0xFF bytes when + // outputting a %x hexadecimal number. Mask bits, if necessary. + // We have to do this here, instead of in the Arg() constructor, as + // the Arg() constructor cannot tell whether we will output a %d + // or a %x. Only the latter should experience masking. + if (arg.integer.width < sizeof(int64_t)) { + i &= (1LL << (8*arg.integer.width)) - 1; + } + } + } else { + // Pointer values require an actual pointer or a string. + if (arg.type == Arg::POINTER) { + i = reinterpret_cast<uintptr_t>(arg.ptr); + } else if (arg.type == Arg::STRING) { + i = reinterpret_cast<uintptr_t>(arg.str); + } else if (arg.type == Arg::INT && + arg.integer.width == sizeof(NULL) && + arg.integer.i == 0) { // Allow C++'s version of NULL + i = 0; + } else { + DEBUG_CHECK(arg.type == Arg::POINTER || arg.type == Arg::STRING); + goto fail_to_expand; + } + + // Pointers always include the "0x" prefix. + prefix = "0x"; + } + + // Use IToASCII() to convert to ASCII representation. For decimal + // numbers, optionally print a sign. For hexadecimal numbers, + // distinguish between upper and lower case. %p addresses are always + // printed as upcase. Supports base 8, 10, and 16. Prints padding + // and/or prefixes, if so requested. + buffer.IToASCII(ch == 'd' && arg.type == Arg::INT, + ch != 'x', i, + ch == 'o' ? 8 : ch == 'd' ? 10 : 16, + pad, padding, prefix); + break; } + case 's': { + // Check that there are arguments left to be inserted. + if (cur_arg >= max_args) { + DEBUG_CHECK(cur_arg < max_args); + goto fail_to_expand; + } + + // Check that the argument has the expected type. + const Arg& arg = args[cur_arg++]; + const char *s; + if (arg.type == Arg::STRING) { + s = arg.str ? arg.str : "<NULL>"; + } else if (arg.type == Arg::INT && arg.integer.width == sizeof(NULL) && + arg.integer.i == 0) { // Allow C++'s version of NULL + s = "<NULL>"; + } else { + DEBUG_CHECK(arg.type == Arg::STRING); + goto fail_to_expand; + } + + // Apply padding, if needed. This requires us to first check the + // length of the string that we are outputting. + if (padding) { + size_t len = 0; + for (const char* src = s; *src++; ) { + ++len; + } + buffer.Pad(' ', padding, len); + } + + // Printing a string involves nothing more than copying it into the + // output buffer and making sure we don't output more bytes than + // available space; Out() takes care of doing that. + for (const char* src = s; *src; ) { + buffer.Out(*src++); + } + break; } + case '%': + // Quoted percent '%' character. + goto copy_verbatim; + fail_to_expand: + // C++ gives us tools to do type checking -- something that snprintf() + // could never really do. So, whenever we see arguments that don't + // match up with the format string, we refuse to output them. But + // since we have to be extremely conservative about being async- + // signal-safe, we are limited in the type of error handling that we + // can do in production builds (in debug builds we can use + // DEBUG_CHECK() and hope for the best). So, all we do is pass the + // format string unchanged. That should eventually get the user's + // attention; and in the meantime, it hopefully doesn't lose too much + // data. + default: + // Unknown or unsupported format character. Just copy verbatim to + // output. + buffer.Out('%'); + DEBUG_CHECK(ch); + if (!ch) { + goto end_of_format_string; + } + buffer.Out(ch); + break; + } + } else { + copy_verbatim: + buffer.Out(fmt[-1]); + } + } + end_of_format_string: + end_of_output_buffer: + return buffer.GetCount(); +} + +} // namespace internal + +ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt) { + // Make sure that at least one NUL byte can be written, and that the buffer + // never overflows kSSizeMax. Not only does that use up most or all of the + // address space, it also would result in a return code that cannot be + // represented. + if (static_cast<ssize_t>(sz) < 1) { + return -1; + } else if (sz > kSSizeMax) { + sz = kSSizeMax; + } + + Buffer buffer(buf, sz); + + // In the slow-path, we deal with errors by copying the contents of + // "fmt" unexpanded. This means, if there are no arguments passed, the + // SafeSPrintf() function always degenerates to a version of strncpy() that + // de-duplicates '%' characters. + const char* src = fmt; + for (; *src; ++src) { + buffer.Out(*src); + DEBUG_CHECK(src[0] != '%' || src[1] == '%'); + if (src[0] == '%' && src[1] == '%') { + ++src; + } + } + return buffer.GetCount(); +} + +} // namespace strings +} // namespace base diff --git a/security/sandbox/chromium/base/strings/safe_sprintf.h b/security/sandbox/chromium/base/strings/safe_sprintf.h new file mode 100644 index 000000000..65524a50c --- /dev/null +++ b/security/sandbox/chromium/base/strings/safe_sprintf.h @@ -0,0 +1,246 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_SAFE_SPRINTF_H_ +#define BASE_STRINGS_SAFE_SPRINTF_H_ + +#include "build/build_config.h" + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#if defined(OS_POSIX) +// For ssize_t +#include <unistd.h> +#endif + +#include "base/base_export.h" + +namespace base { +namespace strings { + +#if defined(_MSC_VER) +// Define ssize_t inside of our namespace. +#if defined(_WIN64) +typedef __int64 ssize_t; +#else +typedef long ssize_t; +#endif +#endif + +// SafeSPrintf() is a type-safe and completely self-contained version of +// snprintf(). +// +// SafeSNPrintf() is an alternative function signature that can be used when +// not dealing with fixed-sized buffers. When possible, SafeSPrintf() should +// always be used instead of SafeSNPrintf() +// +// These functions allow for formatting complicated messages from contexts that +// require strict async-signal-safety. In fact, it is safe to call them from +// any low-level execution context, as they are guaranteed to make no library +// or system calls. It deliberately never touches "errno", either. +// +// The only exception to this rule is that in debug builds the code calls +// RAW_CHECK() to help diagnose problems when the format string does not +// match the rest of the arguments. In release builds, no CHECK()s are used, +// and SafeSPrintf() instead returns an output string that expands only +// those arguments that match their format characters. Mismatched arguments +// are ignored. +// +// The code currently only supports a subset of format characters: +// %c, %o, %d, %x, %X, %p, and %s. +// +// SafeSPrintf() aims to be as liberal as reasonably possible. Integer-like +// values of arbitrary width can be passed to all of the format characters +// that expect integers. Thus, it is explicitly legal to pass an "int" to +// "%c", and output will automatically look at the LSB only. It is also +// explicitly legal to pass either signed or unsigned values, and the format +// characters will automatically interpret the arguments accordingly. +// +// It is still not legal to mix-and-match integer-like values with pointer +// values. For instance, you cannot pass a pointer to %x, nor can you pass an +// integer to %p. +// +// The one exception is "0" zero being accepted by "%p". This works-around +// the problem of C++ defining NULL as an integer-like value. +// +// All format characters take an optional width parameter. This must be a +// positive integer. For %d, %o, %x, %X and %p, if the width starts with +// a leading '0', padding is done with '0' instead of ' ' characters. +// +// There are a few features of snprintf()-style format strings, that +// SafeSPrintf() does not support at this time. +// +// If an actual user showed up, there is no particularly strong reason they +// couldn't be added. But that assumes that the trade-offs between complexity +// and utility are favorable. +// +// For example, adding support for negative padding widths, and for %n are all +// likely to be viewed positively. They are all clearly useful, low-risk, easy +// to test, don't jeopardize the async-signal-safety of the code, and overall +// have little impact on other parts of SafeSPrintf() function. +// +// On the other hands, adding support for alternate forms, positional +// arguments, grouping, wide characters, localization or floating point numbers +// are all unlikely to ever be added. +// +// SafeSPrintf() and SafeSNPrintf() mimic the behavior of snprintf() and they +// return the number of bytes needed to store the untruncated output. This +// does *not* include the terminating NUL byte. +// +// They return -1, iff a fatal error happened. This typically can only happen, +// if the buffer size is a) negative, or b) zero (i.e. not even the NUL byte +// can be written). The return value can never be larger than SSIZE_MAX-1. +// This ensures that the caller can always add one to the signed return code +// in order to determine the amount of storage that needs to be allocated. +// +// While the code supports type checking and while it is generally very careful +// to avoid printing incorrect values, it tends to be conservative in printing +// as much as possible, even when given incorrect parameters. Typically, in +// case of an error, the format string will not be expanded. (i.e. something +// like SafeSPrintf(buf, "%p %d", 1, 2) results in "%p 2"). See above for +// the use of RAW_CHECK() in debug builds, though. +// +// Basic example: +// char buf[20]; +// base::strings::SafeSPrintf(buf, "The answer: %2d", 42); +// +// Example with dynamically sized buffer (async-signal-safe). This code won't +// work on Visual studio, as it requires dynamically allocating arrays on the +// stack. Consider picking a smaller value for |kMaxSize| if stack size is +// limited and known. On the other hand, if the parameters to SafeSNPrintf() +// are trusted and not controllable by the user, you can consider eliminating +// the check for |kMaxSize| altogether. The current value of SSIZE_MAX is +// essentially a no-op that just illustrates how to implement an upper bound: +// const size_t kInitialSize = 128; +// const size_t kMaxSize = std::numeric_limits<ssize_t>::max(); +// size_t size = kInitialSize; +// for (;;) { +// char buf[size]; +// size = SafeSNPrintf(buf, size, "Error message \"%s\"\n", err) + 1; +// if (sizeof(buf) < kMaxSize && size > kMaxSize) { +// size = kMaxSize; +// continue; +// } else if (size > sizeof(buf)) +// continue; +// write(2, buf, size-1); +// break; +// } + +namespace internal { +// Helpers that use C++ overloading, templates, and specializations to deduce +// and record type information from function arguments. This allows us to +// later write a type-safe version of snprintf(). + +struct Arg { + enum Type { INT, UINT, STRING, POINTER }; + + // Any integer-like value. + Arg(signed char c) : type(INT) { + integer.i = c; + integer.width = sizeof(char); + } + Arg(unsigned char c) : type(UINT) { + integer.i = c; + integer.width = sizeof(char); + } + Arg(signed short j) : type(INT) { + integer.i = j; + integer.width = sizeof(short); + } + Arg(unsigned short j) : type(UINT) { + integer.i = j; + integer.width = sizeof(short); + } + Arg(signed int j) : type(INT) { + integer.i = j; + integer.width = sizeof(int); + } + Arg(unsigned int j) : type(UINT) { + integer.i = j; + integer.width = sizeof(int); + } + Arg(signed long j) : type(INT) { + integer.i = j; + integer.width = sizeof(long); + } + Arg(unsigned long j) : type(UINT) { + integer.i = j; + integer.width = sizeof(long); + } + Arg(signed long long j) : type(INT) { + integer.i = j; + integer.width = sizeof(long long); + } + Arg(unsigned long long j) : type(UINT) { + integer.i = j; + integer.width = sizeof(long long); + } + + // A C-style text string. + Arg(const char* s) : str(s), type(STRING) { } + Arg(char* s) : str(s), type(STRING) { } + + // Any pointer value that can be cast to a "void*". + template<class T> Arg(T* p) : ptr((void*)p), type(POINTER) { } + + union { + // An integer-like value. + struct { + int64_t i; + unsigned char width; + } integer; + + // A C-style text string. + const char* str; + + // A pointer to an arbitrary object. + const void* ptr; + }; + const enum Type type; +}; + +// This is the internal function that performs the actual formatting of +// an snprintf()-style format string. +BASE_EXPORT ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt, + const Arg* args, size_t max_args); + +#if !defined(NDEBUG) +// In debug builds, allow unit tests to artificially lower the kSSizeMax +// constant that is used as a hard upper-bound for all buffers. In normal +// use, this constant should always be std::numeric_limits<ssize_t>::max(). +BASE_EXPORT void SetSafeSPrintfSSizeMaxForTest(size_t max); +BASE_EXPORT size_t GetSafeSPrintfSSizeMaxForTest(); +#endif + +} // namespace internal + +template<typename... Args> +ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt, Args... args) { + // Use Arg() object to record type information and then copy arguments to an + // array to make it easier to iterate over them. + const internal::Arg arg_array[] = { args... }; + return internal::SafeSNPrintf(buf, N, fmt, arg_array, sizeof...(args)); +} + +template<size_t N, typename... Args> +ssize_t SafeSPrintf(char (&buf)[N], const char* fmt, Args... args) { + // Use Arg() object to record type information and then copy arguments to an + // array to make it easier to iterate over them. + const internal::Arg arg_array[] = { args... }; + return internal::SafeSNPrintf(buf, N, fmt, arg_array, sizeof...(args)); +} + +// Fast-path when we don't actually need to substitute any arguments. +BASE_EXPORT ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt); +template<size_t N> +inline ssize_t SafeSPrintf(char (&buf)[N], const char* fmt) { + return SafeSNPrintf(buf, N, fmt); +} + +} // namespace strings +} // namespace base + +#endif // BASE_STRINGS_SAFE_SPRINTF_H_ diff --git a/security/sandbox/chromium/base/strings/safe_sprintf_unittest.cc b/security/sandbox/chromium/base/strings/safe_sprintf_unittest.cc new file mode 100644 index 000000000..931ace8b1 --- /dev/null +++ b/security/sandbox/chromium/base/strings/safe_sprintf_unittest.cc @@ -0,0 +1,763 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/safe_sprintf.h" + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include <limits> + +#include "base/logging.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "build/build_config.h" +#include "testing/gtest/include/gtest/gtest.h" + +// Death tests on Android are currently very flaky. No need to add more flaky +// tests, as they just make it hard to spot real problems. +// TODO(markus): See if the restrictions on Android can eventually be lifted. +#if defined(GTEST_HAS_DEATH_TEST) && !defined(OS_ANDROID) +#define ALLOW_DEATH_TEST +#endif + +namespace base { +namespace strings { + +TEST(SafeSPrintfTest, Empty) { + char buf[2] = { 'X', 'X' }; + + // Negative buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, static_cast<size_t>(-1), "")); + EXPECT_EQ('X', buf[0]); + EXPECT_EQ('X', buf[1]); + + // Zero buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, 0, "")); + EXPECT_EQ('X', buf[0]); + EXPECT_EQ('X', buf[1]); + + // A one-byte buffer should always print a single NUL byte. + EXPECT_EQ(0, SafeSNPrintf(buf, 1, "")); + EXPECT_EQ(0, buf[0]); + EXPECT_EQ('X', buf[1]); + buf[0] = 'X'; + + // A larger buffer should leave the trailing bytes unchanged. + EXPECT_EQ(0, SafeSNPrintf(buf, 2, "")); + EXPECT_EQ(0, buf[0]); + EXPECT_EQ('X', buf[1]); + buf[0] = 'X'; + + // The same test using SafeSPrintf() instead of SafeSNPrintf(). + EXPECT_EQ(0, SafeSPrintf(buf, "")); + EXPECT_EQ(0, buf[0]); + EXPECT_EQ('X', buf[1]); + buf[0] = 'X'; +} + +TEST(SafeSPrintfTest, NoArguments) { + // Output a text message that doesn't require any substitutions. This + // is roughly equivalent to calling strncpy() (but unlike strncpy(), it does + // always add a trailing NUL; it always deduplicates '%' characters). + static const char text[] = "hello world"; + char ref[20], buf[20]; + memset(ref, 'X', sizeof(ref)); + memcpy(buf, ref, sizeof(buf)); + + // A negative buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, static_cast<size_t>(-1), text)); + EXPECT_TRUE(!memcmp(buf, ref, sizeof(buf))); + + // Zero buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, 0, text)); + EXPECT_TRUE(!memcmp(buf, ref, sizeof(buf))); + + // A one-byte buffer should always print a single NUL byte. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, SafeSNPrintf(buf, 1, text)); + EXPECT_EQ(0, buf[0]); + EXPECT_TRUE(!memcmp(buf+1, ref+1, sizeof(buf)-1)); + memcpy(buf, ref, sizeof(buf)); + + // A larger (but limited) buffer should always leave the trailing bytes + // unchanged. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, SafeSNPrintf(buf, 2, text)); + EXPECT_EQ(text[0], buf[0]); + EXPECT_EQ(0, buf[1]); + EXPECT_TRUE(!memcmp(buf+2, ref+2, sizeof(buf)-2)); + memcpy(buf, ref, sizeof(buf)); + + // A unrestricted buffer length should always leave the trailing bytes + // unchanged. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, + SafeSNPrintf(buf, sizeof(buf), text)); + EXPECT_EQ(std::string(text), std::string(buf)); + EXPECT_TRUE(!memcmp(buf + sizeof(text), ref + sizeof(text), + sizeof(buf) - sizeof(text))); + memcpy(buf, ref, sizeof(buf)); + + // The same test using SafeSPrintf() instead of SafeSNPrintf(). + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, SafeSPrintf(buf, text)); + EXPECT_EQ(std::string(text), std::string(buf)); + EXPECT_TRUE(!memcmp(buf + sizeof(text), ref + sizeof(text), + sizeof(buf) - sizeof(text))); + memcpy(buf, ref, sizeof(buf)); + + // Check for deduplication of '%' percent characters. + EXPECT_EQ(1, SafeSPrintf(buf, "%%")); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%%")); + EXPECT_EQ(2, SafeSPrintf(buf, "%%X")); + EXPECT_EQ(3, SafeSPrintf(buf, "%%%%X")); +#if defined(NDEBUG) + EXPECT_EQ(1, SafeSPrintf(buf, "%")); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%")); + EXPECT_EQ(2, SafeSPrintf(buf, "%X")); + EXPECT_EQ(3, SafeSPrintf(buf, "%%%X")); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, "%"), "src.1. == '%'"); + EXPECT_DEATH(SafeSPrintf(buf, "%%%"), "src.1. == '%'"); + EXPECT_DEATH(SafeSPrintf(buf, "%X"), "src.1. == '%'"); + EXPECT_DEATH(SafeSPrintf(buf, "%%%X"), "src.1. == '%'"); +#endif +} + +TEST(SafeSPrintfTest, OneArgument) { + // Test basic single-argument single-character substitution. + const char text[] = "hello world"; + const char fmt[] = "hello%cworld"; + char ref[20], buf[20]; + memset(ref, 'X', sizeof(buf)); + memcpy(buf, ref, sizeof(buf)); + + // A negative buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, static_cast<size_t>(-1), fmt, ' ')); + EXPECT_TRUE(!memcmp(buf, ref, sizeof(buf))); + + // Zero buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, 0, fmt, ' ')); + EXPECT_TRUE(!memcmp(buf, ref, sizeof(buf))); + + // A one-byte buffer should always print a single NUL byte. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, + SafeSNPrintf(buf, 1, fmt, ' ')); + EXPECT_EQ(0, buf[0]); + EXPECT_TRUE(!memcmp(buf+1, ref+1, sizeof(buf)-1)); + memcpy(buf, ref, sizeof(buf)); + + // A larger (but limited) buffer should always leave the trailing bytes + // unchanged. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, + SafeSNPrintf(buf, 2, fmt, ' ')); + EXPECT_EQ(text[0], buf[0]); + EXPECT_EQ(0, buf[1]); + EXPECT_TRUE(!memcmp(buf+2, ref+2, sizeof(buf)-2)); + memcpy(buf, ref, sizeof(buf)); + + // A unrestricted buffer length should always leave the trailing bytes + // unchanged. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, + SafeSNPrintf(buf, sizeof(buf), fmt, ' ')); + EXPECT_EQ(std::string(text), std::string(buf)); + EXPECT_TRUE(!memcmp(buf + sizeof(text), ref + sizeof(text), + sizeof(buf) - sizeof(text))); + memcpy(buf, ref, sizeof(buf)); + + // The same test using SafeSPrintf() instead of SafeSNPrintf(). + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, SafeSPrintf(buf, fmt, ' ')); + EXPECT_EQ(std::string(text), std::string(buf)); + EXPECT_TRUE(!memcmp(buf + sizeof(text), ref + sizeof(text), + sizeof(buf) - sizeof(text))); + memcpy(buf, ref, sizeof(buf)); + + // Check for deduplication of '%' percent characters. + EXPECT_EQ(1, SafeSPrintf(buf, "%%", 0)); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%%", 0)); + EXPECT_EQ(2, SafeSPrintf(buf, "%Y", 0)); + EXPECT_EQ(2, SafeSPrintf(buf, "%%Y", 0)); + EXPECT_EQ(3, SafeSPrintf(buf, "%%%Y", 0)); + EXPECT_EQ(3, SafeSPrintf(buf, "%%%%Y", 0)); +#if defined(NDEBUG) + EXPECT_EQ(1, SafeSPrintf(buf, "%", 0)); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%", 0)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, "%", 0), "ch"); + EXPECT_DEATH(SafeSPrintf(buf, "%%%", 0), "ch"); +#endif +} + +TEST(SafeSPrintfTest, MissingArg) { +#if defined(NDEBUG) + char buf[20]; + EXPECT_EQ(3, SafeSPrintf(buf, "%c%c", 'A')); + EXPECT_EQ("A%c", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + char buf[20]; + EXPECT_DEATH(SafeSPrintf(buf, "%c%c", 'A'), "cur_arg < max_args"); +#endif +} + +TEST(SafeSPrintfTest, ASANFriendlyBufferTest) { + // Print into a buffer that is sized exactly to size. ASAN can verify that + // nobody attempts to write past the end of the buffer. + // There is a more complicated test in PrintLongString() that covers a lot + // more edge case, but it is also harder to debug in case of a failure. + const char kTestString[] = "This is a test"; + scoped_ptr<char[]> buf(new char[sizeof(kTestString)]); + EXPECT_EQ(static_cast<ssize_t>(sizeof(kTestString) - 1), + SafeSNPrintf(buf.get(), sizeof(kTestString), kTestString)); + EXPECT_EQ(std::string(kTestString), std::string(buf.get())); + EXPECT_EQ(static_cast<ssize_t>(sizeof(kTestString) - 1), + SafeSNPrintf(buf.get(), sizeof(kTestString), "%s", kTestString)); + EXPECT_EQ(std::string(kTestString), std::string(buf.get())); +} + +TEST(SafeSPrintfTest, NArgs) { + // Pre-C++11 compilers have a different code path, that can only print + // up to ten distinct arguments. + // We test both SafeSPrintf() and SafeSNPrintf(). This makes sure we don't + // have typos in the copy-n-pasted code that is needed to deal with various + // numbers of arguments. + char buf[12]; + EXPECT_EQ(1, SafeSPrintf(buf, "%c", 1)); + EXPECT_EQ("\1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%c%c", 1, 2)); + EXPECT_EQ("\1\2", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%c%c%c", 1, 2, 3)); + EXPECT_EQ("\1\2\3", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%c%c%c%c", 1, 2, 3, 4)); + EXPECT_EQ("\1\2\3\4", std::string(buf)); + EXPECT_EQ(5, SafeSPrintf(buf, "%c%c%c%c%c", 1, 2, 3, 4, 5)); + EXPECT_EQ("\1\2\3\4\5", std::string(buf)); + EXPECT_EQ(6, SafeSPrintf(buf, "%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6)); + EXPECT_EQ("\1\2\3\4\5\6", std::string(buf)); + EXPECT_EQ(7, SafeSPrintf(buf, "%c%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6, 7)); + EXPECT_EQ("\1\2\3\4\5\6\7", std::string(buf)); + EXPECT_EQ(8, SafeSPrintf(buf, "%c%c%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6, 7, 8)); + EXPECT_EQ("\1\2\3\4\5\6\7\10", std::string(buf)); + EXPECT_EQ(9, SafeSPrintf(buf, "%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11", std::string(buf)); + EXPECT_EQ(10, SafeSPrintf(buf, "%c%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); + + // Repeat all the tests with SafeSNPrintf() instead of SafeSPrintf(). + EXPECT_EQ("\1\2\3\4\5\6\7\10\11\12", std::string(buf)); + EXPECT_EQ(1, SafeSNPrintf(buf, 11, "%c", 1)); + EXPECT_EQ("\1", std::string(buf)); + EXPECT_EQ(2, SafeSNPrintf(buf, 11, "%c%c", 1, 2)); + EXPECT_EQ("\1\2", std::string(buf)); + EXPECT_EQ(3, SafeSNPrintf(buf, 11, "%c%c%c", 1, 2, 3)); + EXPECT_EQ("\1\2\3", std::string(buf)); + EXPECT_EQ(4, SafeSNPrintf(buf, 11, "%c%c%c%c", 1, 2, 3, 4)); + EXPECT_EQ("\1\2\3\4", std::string(buf)); + EXPECT_EQ(5, SafeSNPrintf(buf, 11, "%c%c%c%c%c", 1, 2, 3, 4, 5)); + EXPECT_EQ("\1\2\3\4\5", std::string(buf)); + EXPECT_EQ(6, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6)); + EXPECT_EQ("\1\2\3\4\5\6", std::string(buf)); + EXPECT_EQ(7, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6, 7)); + EXPECT_EQ("\1\2\3\4\5\6\7", std::string(buf)); + EXPECT_EQ(8, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8)); + EXPECT_EQ("\1\2\3\4\5\6\7\10", std::string(buf)); + EXPECT_EQ(9, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11", std::string(buf)); + EXPECT_EQ(10, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11\12", std::string(buf)); + + EXPECT_EQ(11, SafeSPrintf(buf, "%c%c%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11\12\13", std::string(buf)); + EXPECT_EQ(11, SafeSNPrintf(buf, 12, "%c%c%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11\12\13", std::string(buf)); +} + +TEST(SafeSPrintfTest, DataTypes) { + char buf[40]; + + // Bytes + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (uint8_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%d", (uint8_t)-1)); + EXPECT_EQ("255", std::string(buf)); + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (int8_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%d", (int8_t)-1)); + EXPECT_EQ("-1", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%d", (int8_t)-128)); + EXPECT_EQ("-128", std::string(buf)); + + // Half-words + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (uint16_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(5, SafeSPrintf(buf, "%d", (uint16_t)-1)); + EXPECT_EQ("65535", std::string(buf)); + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (int16_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%d", (int16_t)-1)); + EXPECT_EQ("-1", std::string(buf)); + EXPECT_EQ(6, SafeSPrintf(buf, "%d", (int16_t)-32768)); + EXPECT_EQ("-32768", std::string(buf)); + + // Words + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (uint32_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(10, SafeSPrintf(buf, "%d", (uint32_t)-1)); + EXPECT_EQ("4294967295", std::string(buf)); + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (int32_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%d", (int32_t)-1)); + EXPECT_EQ("-1", std::string(buf)); + // Work-around for an limitation of C90 + EXPECT_EQ(11, SafeSPrintf(buf, "%d", (int32_t)-2147483647-1)); + EXPECT_EQ("-2147483648", std::string(buf)); + + // Quads + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (uint64_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(20, SafeSPrintf(buf, "%d", (uint64_t)-1)); + EXPECT_EQ("18446744073709551615", std::string(buf)); + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (int64_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%d", (int64_t)-1)); + EXPECT_EQ("-1", std::string(buf)); + // Work-around for an limitation of C90 + EXPECT_EQ(20, SafeSPrintf(buf, "%d", (int64_t)-9223372036854775807LL-1)); + EXPECT_EQ("-9223372036854775808", std::string(buf)); + + // Strings (both const and mutable). + EXPECT_EQ(4, SafeSPrintf(buf, "test")); + EXPECT_EQ("test", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, buf)); + EXPECT_EQ("test", std::string(buf)); + + // Pointer + char addr[20]; + sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)buf); + SafeSPrintf(buf, "%p", buf); + EXPECT_EQ(std::string(addr), std::string(buf)); + SafeSPrintf(buf, "%p", (const char *)buf); + EXPECT_EQ(std::string(addr), std::string(buf)); + sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)sprintf); + SafeSPrintf(buf, "%p", sprintf); + EXPECT_EQ(std::string(addr), std::string(buf)); + + // Padding for pointers is a little more complicated because of the "0x" + // prefix. Padding with '0' zeros is relatively straight-forward, but + // padding with ' ' spaces requires more effort. + sprintf(addr, "0x%017llX", (unsigned long long)(uintptr_t)buf); + SafeSPrintf(buf, "%019p", buf); + EXPECT_EQ(std::string(addr), std::string(buf)); + sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)buf); + memset(addr, ' ', + (char*)memmove(addr + sizeof(addr) - strlen(addr) - 1, + addr, strlen(addr)+1) - addr); + SafeSPrintf(buf, "%19p", buf); + EXPECT_EQ(std::string(addr), std::string(buf)); +} + +namespace { +void PrintLongString(char* buf, size_t sz) { + // Output a reasonably complex expression into a limited-size buffer. + // At least one byte is available for writing the NUL character. + CHECK_GT(sz, static_cast<size_t>(0)); + + // Allocate slightly more space, so that we can verify that SafeSPrintf() + // never writes past the end of the buffer. + scoped_ptr<char[]> tmp(new char[sz+2]); + memset(tmp.get(), 'X', sz+2); + + // Use SafeSPrintf() to output a complex list of arguments: + // - test padding and truncating %c single characters. + // - test truncating %s simple strings. + // - test mismatching arguments and truncating (for %d != %s). + // - test zero-padding and truncating %x hexadecimal numbers. + // - test outputting and truncating %d MININT. + // - test outputting and truncating %p arbitrary pointer values. + // - test outputting, padding and truncating NULL-pointer %s strings. + char* out = tmp.get(); + size_t out_sz = sz; + size_t len; + for (scoped_ptr<char[]> perfect_buf;;) { + size_t needed = SafeSNPrintf(out, out_sz, +#if defined(NDEBUG) + "A%2cong %s: %d %010X %d %p%7s", 'l', "string", "", +#else + "A%2cong %s: %%d %010X %d %p%7s", 'l', "string", +#endif + 0xDEADBEEF, std::numeric_limits<intptr_t>::min(), + PrintLongString, static_cast<char*>(NULL)) + 1; + + // Various sanity checks: + // The numbered of characters needed to print the full string should always + // be bigger or equal to the bytes that have actually been output. + len = strlen(tmp.get()); + CHECK_GE(needed, len+1); + + // The number of characters output should always fit into the buffer that + // was passed into SafeSPrintf(). + CHECK_LT(len, out_sz); + + // The output is always terminated with a NUL byte (actually, this test is + // always going to pass, as strlen() already verified this) + EXPECT_FALSE(tmp[len]); + + // ASAN can check that we are not overwriting buffers, iff we make sure the + // buffer is exactly the size that we are expecting to be written. After + // running SafeSNPrintf() the first time, it is possible to compute the + // correct buffer size for this test. So, allocate a second buffer and run + // the exact same SafeSNPrintf() command again. + if (!perfect_buf.get()) { + out_sz = std::min(needed, sz); + out = new char[out_sz]; + perfect_buf.reset(out); + } else { + break; + } + } + + // All trailing bytes are unchanged. + for (size_t i = len+1; i < sz+2; ++i) + EXPECT_EQ('X', tmp[i]); + + // The text that was generated by SafeSPrintf() should always match the + // equivalent text generated by sprintf(). Please note that the format + // string for sprintf() is not complicated, as it does not have the + // benefit of getting type information from the C++ compiler. + // + // N.B.: It would be so much cleaner to use snprintf(). But unfortunately, + // Visual Studio doesn't support this function, and the work-arounds + // are all really awkward. + char ref[256]; + CHECK_LE(sz, sizeof(ref)); + sprintf(ref, "A long string: %%d 00DEADBEEF %lld 0x%llX <NULL>", + static_cast<long long>(std::numeric_limits<intptr_t>::min()), + static_cast<unsigned long long>( + reinterpret_cast<uintptr_t>(PrintLongString))); + ref[sz-1] = '\000'; + +#if defined(NDEBUG) + const size_t kSSizeMax = std::numeric_limits<ssize_t>::max(); +#else + const size_t kSSizeMax = internal::GetSafeSPrintfSSizeMaxForTest(); +#endif + + // Compare the output from SafeSPrintf() to the one from sprintf(). + EXPECT_EQ(std::string(ref).substr(0, kSSizeMax-1), std::string(tmp.get())); + + // We allocated a slightly larger buffer, so that we could perform some + // extra sanity checks. Now that the tests have all passed, we copy the + // data to the output buffer that the caller provided. + memcpy(buf, tmp.get(), len+1); +} + +#if !defined(NDEBUG) +class ScopedSafeSPrintfSSizeMaxSetter { + public: + ScopedSafeSPrintfSSizeMaxSetter(size_t sz) { + old_ssize_max_ = internal::GetSafeSPrintfSSizeMaxForTest(); + internal::SetSafeSPrintfSSizeMaxForTest(sz); + } + + ~ScopedSafeSPrintfSSizeMaxSetter() { + internal::SetSafeSPrintfSSizeMaxForTest(old_ssize_max_); + } + + private: + size_t old_ssize_max_; + + DISALLOW_COPY_AND_ASSIGN(ScopedSafeSPrintfSSizeMaxSetter); +}; +#endif + +} // anonymous namespace + +TEST(SafeSPrintfTest, Truncation) { + // We use PrintLongString() to print a complex long string and then + // truncate to all possible lengths. This ends up exercising a lot of + // different code paths in SafeSPrintf() and IToASCII(), as truncation can + // happen in a lot of different states. + char ref[256]; + PrintLongString(ref, sizeof(ref)); + for (size_t i = strlen(ref)+1; i; --i) { + char buf[sizeof(ref)]; + PrintLongString(buf, i); + EXPECT_EQ(std::string(ref, i - 1), std::string(buf)); + } + + // When compiling in debug mode, we have the ability to fake a small + // upper limit for the maximum value that can be stored in an ssize_t. + // SafeSPrintf() uses this upper limit to determine how many bytes it will + // write to the buffer, even if the caller claimed a bigger buffer size. + // Repeat the truncation test and verify that this other code path in + // SafeSPrintf() works correctly, too. +#if !defined(NDEBUG) + for (size_t i = strlen(ref)+1; i > 1; --i) { + ScopedSafeSPrintfSSizeMaxSetter ssize_max_setter(i); + char buf[sizeof(ref)]; + PrintLongString(buf, sizeof(buf)); + EXPECT_EQ(std::string(ref, i - 1), std::string(buf)); + } + + // kSSizeMax is also used to constrain the maximum amount of padding, before + // SafeSPrintf() detects an error in the format string. + ScopedSafeSPrintfSSizeMaxSetter ssize_max_setter(100); + char buf[256]; + EXPECT_EQ(99, SafeSPrintf(buf, "%99c", ' ')); + EXPECT_EQ(std::string(99, ' '), std::string(buf)); + *buf = '\000'; +#if defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, "%100c", ' '), "padding <= max_padding"); +#endif + EXPECT_EQ(0, *buf); +#endif +} + +TEST(SafeSPrintfTest, Padding) { + char buf[40], fmt[40]; + + // Chars %c + EXPECT_EQ(1, SafeSPrintf(buf, "%c", 'A')); + EXPECT_EQ("A", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2c", 'A')); + EXPECT_EQ(" A", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02c", 'A')); + EXPECT_EQ(" A", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2c", 'A')); + EXPECT_EQ("%-2c", std::string(buf)); + SafeSPrintf(fmt, "%%%dc", std::numeric_limits<ssize_t>::max() - 1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, SafeSPrintf(buf, fmt, 'A')); + SafeSPrintf(fmt, "%%%dc", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 'A')); + EXPECT_EQ("%c", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 'A'), "padding <= max_padding"); +#endif + + // Octal %o + EXPECT_EQ(1, SafeSPrintf(buf, "%o", 1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2o", 1)); + EXPECT_EQ(" 1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02o", 1)); + EXPECT_EQ("01", std::string(buf)); + EXPECT_EQ(12, SafeSPrintf(buf, "%12o", -1)); + EXPECT_EQ(" 37777777777", std::string(buf)); + EXPECT_EQ(12, SafeSPrintf(buf, "%012o", -1)); + EXPECT_EQ("037777777777", std::string(buf)); + EXPECT_EQ(23, SafeSPrintf(buf, "%23o", -1LL)); + EXPECT_EQ(" 1777777777777777777777", std::string(buf)); + EXPECT_EQ(23, SafeSPrintf(buf, "%023o", -1LL)); + EXPECT_EQ("01777777777777777777777", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%2o", 0111)); + EXPECT_EQ("111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2o", 1)); + EXPECT_EQ("%-2o", std::string(buf)); + SafeSPrintf(fmt, "%%%do", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%do", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ("000", std::string(buf)); + SafeSPrintf(fmt, "%%%do", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 1)); + EXPECT_EQ("%o", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 1), "padding <= max_padding"); +#endif + + // Decimals %d + EXPECT_EQ(1, SafeSPrintf(buf, "%d", 1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2d", 1)); + EXPECT_EQ(" 1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02d", 1)); + EXPECT_EQ("01", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%3d", -1)); + EXPECT_EQ(" -1", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%03d", -1)); + EXPECT_EQ("-01", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%2d", 111)); + EXPECT_EQ("111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%2d", -111)); + EXPECT_EQ("-111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2d", 1)); + EXPECT_EQ("%-2d", std::string(buf)); + SafeSPrintf(fmt, "%%%dd", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%dd", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ("000", std::string(buf)); + SafeSPrintf(fmt, "%%%dd", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 1)); + EXPECT_EQ("%d", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 1), "padding <= max_padding"); +#endif + + // Hex %X + EXPECT_EQ(1, SafeSPrintf(buf, "%X", 1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2X", 1)); + EXPECT_EQ(" 1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02X", 1)); + EXPECT_EQ("01", std::string(buf)); + EXPECT_EQ(9, SafeSPrintf(buf, "%9X", -1)); + EXPECT_EQ(" FFFFFFFF", std::string(buf)); + EXPECT_EQ(9, SafeSPrintf(buf, "%09X", -1)); + EXPECT_EQ("0FFFFFFFF", std::string(buf)); + EXPECT_EQ(17, SafeSPrintf(buf, "%17X", -1LL)); + EXPECT_EQ(" FFFFFFFFFFFFFFFF", std::string(buf)); + EXPECT_EQ(17, SafeSPrintf(buf, "%017X", -1LL)); + EXPECT_EQ("0FFFFFFFFFFFFFFFF", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%2X", 0x111)); + EXPECT_EQ("111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2X", 1)); + EXPECT_EQ("%-2X", std::string(buf)); + SafeSPrintf(fmt, "%%%dX", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%dX", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ("000", std::string(buf)); + SafeSPrintf(fmt, "%%%dX", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 1)); + EXPECT_EQ("%X", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 1), "padding <= max_padding"); +#endif + + // Pointer %p + EXPECT_EQ(3, SafeSPrintf(buf, "%p", (void*)1)); + EXPECT_EQ("0x1", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%4p", (void*)1)); + EXPECT_EQ(" 0x1", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%04p", (void*)1)); + EXPECT_EQ("0x01", std::string(buf)); + EXPECT_EQ(5, SafeSPrintf(buf, "%4p", (void*)0x111)); + EXPECT_EQ("0x111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2p", (void*)1)); + EXPECT_EQ("%-2p", std::string(buf)); + SafeSPrintf(fmt, "%%%dp", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, (void*)1)); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%dp", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, (void*)1)); + EXPECT_EQ("0x0", std::string(buf)); + SafeSPrintf(fmt, "%%%dp", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 1)); + EXPECT_EQ("%p", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 1), "padding <= max_padding"); +#endif + + // String + EXPECT_EQ(1, SafeSPrintf(buf, "%s", "A")); + EXPECT_EQ("A", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2s", "A")); + EXPECT_EQ(" A", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02s", "A")); + EXPECT_EQ(" A", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%2s", "AAA")); + EXPECT_EQ("AAA", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2s", "A")); + EXPECT_EQ("%-2s", std::string(buf)); + SafeSPrintf(fmt, "%%%ds", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, "A")); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%ds", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, "A")); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%%ds", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, "A")); + EXPECT_EQ("%s", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, "A"), "padding <= max_padding"); +#endif +} + +TEST(SafeSPrintfTest, EmbeddedNul) { + char buf[] = { 'X', 'X', 'X', 'X' }; + EXPECT_EQ(2, SafeSPrintf(buf, "%3c", 0)); + EXPECT_EQ(' ', buf[0]); + EXPECT_EQ(' ', buf[1]); + EXPECT_EQ(0, buf[2]); + EXPECT_EQ('X', buf[3]); + + // Check handling of a NUL format character. N.B. this takes two different + // code paths depending on whether we are actually passing arguments. If + // we don't have any arguments, we are running in the fast-path code, that + // looks (almost) like a strncpy(). +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, "%%%")); + EXPECT_EQ("%%", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%", 0)); + EXPECT_EQ("%%", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, "%%%"), "src.1. == '%'"); + EXPECT_DEATH(SafeSPrintf(buf, "%%%", 0), "ch"); +#endif +} + +TEST(SafeSPrintfTest, EmitNULL) { + char buf[40]; +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion-null" +#endif + EXPECT_EQ(1, SafeSPrintf(buf, "%d", NULL)); + EXPECT_EQ("0", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%p", NULL)); + EXPECT_EQ("0x0", std::string(buf)); + EXPECT_EQ(6, SafeSPrintf(buf, "%s", NULL)); + EXPECT_EQ("<NULL>", std::string(buf)); +#if defined(__GCC__) +#pragma GCC diagnostic pop +#endif +} + +TEST(SafeSPrintfTest, PointerSize) { + // The internal data representation is a 64bit value, independent of the + // native word size. We want to perform sign-extension for signed integers, + // but we want to avoid doing so for pointer types. This could be a + // problem on systems, where pointers are only 32bit. This tests verifies + // that there is no such problem. + char *str = reinterpret_cast<char *>(0x80000000u); + void *ptr = str; + char buf[40]; + EXPECT_EQ(10, SafeSPrintf(buf, "%p", str)); + EXPECT_EQ("0x80000000", std::string(buf)); + EXPECT_EQ(10, SafeSPrintf(buf, "%p", ptr)); + EXPECT_EQ("0x80000000", std::string(buf)); +} + +} // namespace strings +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string16.cc b/security/sandbox/chromium/base/strings/string16.cc new file mode 100644 index 000000000..f4c8cf746 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string16.cc @@ -0,0 +1,82 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string16.h" + +#if defined(WCHAR_T_IS_UTF16) + +#error This file should not be used on 2-byte wchar_t systems +// If this winds up being needed on 2-byte wchar_t systems, either the +// definitions below can be used, or the host system's wide character +// functions like wmemcmp can be wrapped. + +#elif defined(WCHAR_T_IS_UTF32) + +#include <ostream> + +#include "base/strings/utf_string_conversions.h" + +namespace base { + +int c16memcmp(const char16* s1, const char16* s2, size_t n) { + // We cannot call memcmp because that changes the semantics. + while (n-- > 0) { + if (*s1 != *s2) { + // We cannot use (*s1 - *s2) because char16 is unsigned. + return ((*s1 < *s2) ? -1 : 1); + } + ++s1; + ++s2; + } + return 0; +} + +size_t c16len(const char16* s) { + const char16 *s_orig = s; + while (*s) { + ++s; + } + return s - s_orig; +} + +const char16* c16memchr(const char16* s, char16 c, size_t n) { + while (n-- > 0) { + if (*s == c) { + return s; + } + ++s; + } + return 0; +} + +char16* c16memmove(char16* s1, const char16* s2, size_t n) { + return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16))); +} + +char16* c16memcpy(char16* s1, const char16* s2, size_t n) { + return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16))); +} + +char16* c16memset(char16* s, char16 c, size_t n) { + char16 *s_orig = s; + while (n-- > 0) { + *s = c; + ++s; + } + return s_orig; +} + +std::ostream& operator<<(std::ostream& out, const string16& str) { + return out << UTF16ToUTF8(str); +} + +void PrintTo(const string16& str, std::ostream* out) { + *out << str; +} + +} // namespace base + +template class std::basic_string<base::char16, base::string16_char_traits>; + +#endif // WCHAR_T_IS_UTF32 diff --git a/security/sandbox/chromium/base/strings/string16.h b/security/sandbox/chromium/base/strings/string16.h new file mode 100644 index 000000000..e47669c1b --- /dev/null +++ b/security/sandbox/chromium/base/strings/string16.h @@ -0,0 +1,187 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING16_H_ +#define BASE_STRINGS_STRING16_H_ + +// WHAT: +// A version of std::basic_string that provides 2-byte characters even when +// wchar_t is not implemented as a 2-byte type. You can access this class as +// string16. We also define char16, which string16 is based upon. +// +// WHY: +// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 +// data. Plenty of existing code operates on strings encoded as UTF-16. +// +// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make +// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails +// at run time, because it calls some functions (like wcslen) that come from +// the system's native C library -- which was built with a 4-byte wchar_t! +// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's +// entirely improper on those systems where the encoding of wchar_t is defined +// as UTF-32. +// +// Here, we define string16, which is similar to std::wstring but replaces all +// libc functions with custom, 2-byte-char compatible routines. It is capable +// of carrying UTF-16-encoded data. + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string> + +#include "base/base_export.h" +#include "build/build_config.h" + +#if defined(WCHAR_T_IS_UTF16) + +namespace base { + +typedef wchar_t char16; +typedef std::wstring string16; +typedef std::char_traits<wchar_t> string16_char_traits; + +} // namespace base + +#elif defined(WCHAR_T_IS_UTF32) + +namespace base { + +typedef uint16_t char16; + +// char16 versions of the functions required by string16_char_traits; these +// are based on the wide character functions of similar names ("w" or "wcs" +// instead of "c16"). +BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n); +BASE_EXPORT size_t c16len(const char16* s); +BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n); +BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n); +BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n); +BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n); + +struct string16_char_traits { + typedef char16 char_type; + typedef int int_type; + + // int_type needs to be able to hold each possible value of char_type, and in + // addition, the distinct value of eof(). + static_assert(sizeof(int_type) > sizeof(char_type), + "int must be larger than 16 bits wide"); + + typedef std::streamoff off_type; + typedef mbstate_t state_type; + typedef std::fpos<state_type> pos_type; + + static void assign(char_type& c1, const char_type& c2) { + c1 = c2; + } + + static bool eq(const char_type& c1, const char_type& c2) { + return c1 == c2; + } + static bool lt(const char_type& c1, const char_type& c2) { + return c1 < c2; + } + + static int compare(const char_type* s1, const char_type* s2, size_t n) { + return c16memcmp(s1, s2, n); + } + + static size_t length(const char_type* s) { + return c16len(s); + } + + static const char_type* find(const char_type* s, size_t n, + const char_type& a) { + return c16memchr(s, a, n); + } + + static char_type* move(char_type* s1, const char_type* s2, size_t n) { + return c16memmove(s1, s2, n); + } + + static char_type* copy(char_type* s1, const char_type* s2, size_t n) { + return c16memcpy(s1, s2, n); + } + + static char_type* assign(char_type* s, size_t n, char_type a) { + return c16memset(s, a, n); + } + + static int_type not_eof(const int_type& c) { + return eq_int_type(c, eof()) ? 0 : c; + } + + static char_type to_char_type(const int_type& c) { + return char_type(c); + } + + static int_type to_int_type(const char_type& c) { + return int_type(c); + } + + static bool eq_int_type(const int_type& c1, const int_type& c2) { + return c1 == c2; + } + + static int_type eof() { + return static_cast<int_type>(EOF); + } +}; + +typedef std::basic_string<char16, base::string16_char_traits> string16; + +BASE_EXPORT extern std::ostream& operator<<(std::ostream& out, + const string16& str); + +// This is required by googletest to print a readable output on test failures. +BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out); + +} // namespace base + +// The string class will be explicitly instantiated only once, in string16.cc. +// +// std::basic_string<> in GNU libstdc++ contains a static data member, +// _S_empty_rep_storage, to represent empty strings. When an operation such +// as assignment or destruction is performed on a string, causing its existing +// data member to be invalidated, it must not be freed if this static data +// member is being used. Otherwise, it counts as an attempt to free static +// (and not allocated) data, which is a memory error. +// +// Generally, due to C++ template magic, _S_empty_rep_storage will be marked +// as a coalesced symbol, meaning that the linker will combine multiple +// instances into a single one when generating output. +// +// If a string class is used by multiple shared libraries, a problem occurs. +// Each library will get its own copy of _S_empty_rep_storage. When strings +// are passed across a library boundary for alteration or destruction, memory +// errors will result. GNU libstdc++ contains a configuration option, +// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which +// disables the static data member optimization, but it's a good optimization +// and non-STL code is generally at the mercy of the system's STL +// configuration. Fully-dynamic strings are not the default for GNU libstdc++ +// libstdc++ itself or for the libstdc++ installations on the systems we care +// about, such as Mac OS X and relevant flavors of Linux. +// +// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 . +// +// To avoid problems, string classes need to be explicitly instantiated only +// once, in exactly one library. All other string users see it via an "extern" +// declaration. This is precisely how GNU libstdc++ handles +// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring). +// +// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2), +// in which the linker does not fully coalesce symbols when dead code +// stripping is enabled. This bug causes the memory errors described above +// to occur even when a std::basic_string<> does not cross shared library +// boundaries, such as in statically-linked executables. +// +// TODO(mark): File this bug with Apple and update this note with a bug number. + +extern template +class BASE_EXPORT std::basic_string<base::char16, base::string16_char_traits>; + +#endif // WCHAR_T_IS_UTF32 + +#endif // BASE_STRINGS_STRING16_H_ diff --git a/security/sandbox/chromium/base/strings/string_number_conversions.cc b/security/sandbox/chromium/base/strings/string_number_conversions.cc new file mode 100644 index 000000000..07248501e --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_number_conversions.cc @@ -0,0 +1,485 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_number_conversions.h" + +#include <ctype.h> +#include <errno.h> +#include <stdlib.h> +#include <wctype.h> + +#include <limits> + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "base/numerics/safe_math.h" +#include "base/scoped_clear_errno.h" +#include "base/strings/utf_string_conversions.h" +#include "base/third_party/dmg_fp/dmg_fp.h" + +namespace base { + +namespace { + +template <typename STR, typename INT> +struct IntToStringT { + static STR IntToString(INT value) { + // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. + // So round up to allocate 3 output characters per byte, plus 1 for '-'. + const size_t kOutputBufSize = + 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed; + + // Create the string in a temporary buffer, write it back to front, and + // then return the substr of what we ended up using. + using CHR = typename STR::value_type; + CHR outbuf[kOutputBufSize]; + + // The ValueOrDie call below can never fail, because UnsignedAbs is valid + // for all valid inputs. + auto res = CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie(); + + CHR* end = outbuf + kOutputBufSize; + CHR* i = end; + do { + --i; + DCHECK(i != outbuf); + *i = static_cast<CHR>((res % 10) + '0'); + res /= 10; + } while (res != 0); + if (IsValueNegative(value)) { + --i; + DCHECK(i != outbuf); + *i = static_cast<CHR>('-'); + } + return STR(i, end); + } +}; + +// Utility to convert a character to a digit in a given base +template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit { +}; + +// Faster specialization for bases <= 10 +template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> { + public: + static bool Convert(CHAR c, uint8_t* digit) { + if (c >= '0' && c < '0' + BASE) { + *digit = static_cast<uint8_t>(c - '0'); + return true; + } + return false; + } +}; + +// Specialization for bases where 10 < base <= 36 +template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> { + public: + static bool Convert(CHAR c, uint8_t* digit) { + if (c >= '0' && c <= '9') { + *digit = c - '0'; + } else if (c >= 'a' && c < 'a' + BASE - 10) { + *digit = c - 'a' + 10; + } else if (c >= 'A' && c < 'A' + BASE - 10) { + *digit = c - 'A' + 10; + } else { + return false; + } + return true; + } +}; + +template <int BASE, typename CHAR> +bool CharToDigit(CHAR c, uint8_t* digit) { + return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit); +} + +// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it +// is locale independent, whereas the functions we are replacing were +// locale-dependent. TBD what is desired, but for the moment let's not +// introduce a change in behaviour. +template<typename CHAR> class WhitespaceHelper { +}; + +template<> class WhitespaceHelper<char> { + public: + static bool Invoke(char c) { + return 0 != isspace(static_cast<unsigned char>(c)); + } +}; + +template<> class WhitespaceHelper<char16> { + public: + static bool Invoke(char16 c) { + return 0 != iswspace(c); + } +}; + +template<typename CHAR> bool LocalIsWhitespace(CHAR c) { + return WhitespaceHelper<CHAR>::Invoke(c); +} + +// IteratorRangeToNumberTraits should provide: +// - a typedef for iterator_type, the iterator type used as input. +// - a typedef for value_type, the target numeric type. +// - static functions min, max (returning the minimum and maximum permitted +// values) +// - constant kBase, the base in which to interpret the input +template<typename IteratorRangeToNumberTraits> +class IteratorRangeToNumber { + public: + typedef IteratorRangeToNumberTraits traits; + typedef typename traits::iterator_type const_iterator; + typedef typename traits::value_type value_type; + + // Generalized iterator-range-to-number conversion. + // + static bool Invoke(const_iterator begin, + const_iterator end, + value_type* output) { + bool valid = true; + + while (begin != end && LocalIsWhitespace(*begin)) { + valid = false; + ++begin; + } + + if (begin != end && *begin == '-') { + if (!std::numeric_limits<value_type>::is_signed) { + valid = false; + } else if (!Negative::Invoke(begin + 1, end, output)) { + valid = false; + } + } else { + if (begin != end && *begin == '+') { + ++begin; + } + if (!Positive::Invoke(begin, end, output)) { + valid = false; + } + } + + return valid; + } + + private: + // Sign provides: + // - a static function, CheckBounds, that determines whether the next digit + // causes an overflow/underflow + // - a static function, Increment, that appends the next digit appropriately + // according to the sign of the number being parsed. + template<typename Sign> + class Base { + public: + static bool Invoke(const_iterator begin, const_iterator end, + typename traits::value_type* output) { + *output = 0; + + if (begin == end) { + return false; + } + + // Note: no performance difference was found when using template + // specialization to remove this check in bases other than 16 + if (traits::kBase == 16 && end - begin > 2 && *begin == '0' && + (*(begin + 1) == 'x' || *(begin + 1) == 'X')) { + begin += 2; + } + + for (const_iterator current = begin; current != end; ++current) { + uint8_t new_digit = 0; + + if (!CharToDigit<traits::kBase>(*current, &new_digit)) { + return false; + } + + if (current != begin) { + if (!Sign::CheckBounds(output, new_digit)) { + return false; + } + *output *= traits::kBase; + } + + Sign::Increment(new_digit, output); + } + return true; + } + }; + + class Positive : public Base<Positive> { + public: + static bool CheckBounds(value_type* output, uint8_t new_digit) { + if (*output > static_cast<value_type>(traits::max() / traits::kBase) || + (*output == static_cast<value_type>(traits::max() / traits::kBase) && + new_digit > traits::max() % traits::kBase)) { + *output = traits::max(); + return false; + } + return true; + } + static void Increment(uint8_t increment, value_type* output) { + *output += increment; + } + }; + + class Negative : public Base<Negative> { + public: + static bool CheckBounds(value_type* output, uint8_t new_digit) { + if (*output < traits::min() / traits::kBase || + (*output == traits::min() / traits::kBase && + new_digit > 0 - traits::min() % traits::kBase)) { + *output = traits::min(); + return false; + } + return true; + } + static void Increment(uint8_t increment, value_type* output) { + *output -= increment; + } + }; +}; + +template<typename ITERATOR, typename VALUE, int BASE> +class BaseIteratorRangeToNumberTraits { + public: + typedef ITERATOR iterator_type; + typedef VALUE value_type; + static value_type min() { + return std::numeric_limits<value_type>::min(); + } + static value_type max() { + return std::numeric_limits<value_type>::max(); + } + static const int kBase = BASE; +}; + +template<typename ITERATOR> +class BaseHexIteratorRangeToIntTraits + : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> { +}; + +template <typename ITERATOR> +class BaseHexIteratorRangeToUIntTraits + : public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {}; + +template <typename ITERATOR> +class BaseHexIteratorRangeToInt64Traits + : public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {}; + +template <typename ITERATOR> +class BaseHexIteratorRangeToUInt64Traits + : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {}; + +typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator> + HexIteratorRangeToIntTraits; + +typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator> + HexIteratorRangeToUIntTraits; + +typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator> + HexIteratorRangeToInt64Traits; + +typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator> + HexIteratorRangeToUInt64Traits; + +template <typename STR> +bool HexStringToBytesT(const STR& input, std::vector<uint8_t>* output) { + DCHECK_EQ(output->size(), 0u); + size_t count = input.size(); + if (count == 0 || (count % 2) != 0) + return false; + for (uintptr_t i = 0; i < count / 2; ++i) { + uint8_t msb = 0; // most significant 4 bits + uint8_t lsb = 0; // least significant 4 bits + if (!CharToDigit<16>(input[i * 2], &msb) || + !CharToDigit<16>(input[i * 2 + 1], &lsb)) + return false; + output->push_back((msb << 4) | lsb); + } + return true; +} + +template <typename VALUE, int BASE> +class StringPieceToNumberTraits + : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator, + VALUE, + BASE> { +}; + +template <typename VALUE> +bool StringToIntImpl(const StringPiece& input, VALUE* output) { + return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke( + input.begin(), input.end(), output); +} + +template <typename VALUE, int BASE> +class StringPiece16ToNumberTraits + : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator, + VALUE, + BASE> { +}; + +template <typename VALUE> +bool String16ToIntImpl(const StringPiece16& input, VALUE* output) { + return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke( + input.begin(), input.end(), output); +} + +} // namespace + +std::string IntToString(int value) { + return IntToStringT<std::string, int>::IntToString(value); +} + +string16 IntToString16(int value) { + return IntToStringT<string16, int>::IntToString(value); +} + +std::string UintToString(unsigned int value) { + return IntToStringT<std::string, unsigned int>::IntToString(value); +} + +string16 UintToString16(unsigned int value) { + return IntToStringT<string16, unsigned int>::IntToString(value); +} + +std::string Int64ToString(int64_t value) { + return IntToStringT<std::string, int64_t>::IntToString(value); +} + +string16 Int64ToString16(int64_t value) { + return IntToStringT<string16, int64_t>::IntToString(value); +} + +std::string Uint64ToString(uint64_t value) { + return IntToStringT<std::string, uint64_t>::IntToString(value); +} + +string16 Uint64ToString16(uint64_t value) { + return IntToStringT<string16, uint64_t>::IntToString(value); +} + +std::string SizeTToString(size_t value) { + return IntToStringT<std::string, size_t>::IntToString(value); +} + +string16 SizeTToString16(size_t value) { + return IntToStringT<string16, size_t>::IntToString(value); +} + +std::string DoubleToString(double value) { + // According to g_fmt.cc, it is sufficient to declare a buffer of size 32. + char buffer[32]; + dmg_fp::g_fmt(buffer, value); + return std::string(buffer); +} + +bool StringToInt(const StringPiece& input, int* output) { + return StringToIntImpl(input, output); +} + +bool StringToInt(const StringPiece16& input, int* output) { + return String16ToIntImpl(input, output); +} + +bool StringToUint(const StringPiece& input, unsigned* output) { + return StringToIntImpl(input, output); +} + +bool StringToUint(const StringPiece16& input, unsigned* output) { + return String16ToIntImpl(input, output); +} + +bool StringToInt64(const StringPiece& input, int64_t* output) { + return StringToIntImpl(input, output); +} + +bool StringToInt64(const StringPiece16& input, int64_t* output) { + return String16ToIntImpl(input, output); +} + +bool StringToUint64(const StringPiece& input, uint64_t* output) { + return StringToIntImpl(input, output); +} + +bool StringToUint64(const StringPiece16& input, uint64_t* output) { + return String16ToIntImpl(input, output); +} + +bool StringToSizeT(const StringPiece& input, size_t* output) { + return StringToIntImpl(input, output); +} + +bool StringToSizeT(const StringPiece16& input, size_t* output) { + return String16ToIntImpl(input, output); +} + +bool StringToDouble(const std::string& input, double* output) { + // Thread-safe? It is on at least Mac, Linux, and Windows. + ScopedClearErrno clear_errno; + + char* endptr = NULL; + *output = dmg_fp::strtod(input.c_str(), &endptr); + + // Cases to return false: + // - If errno is ERANGE, there was an overflow or underflow. + // - If the input string is empty, there was nothing to parse. + // - If endptr does not point to the end of the string, there are either + // characters remaining in the string after a parsed number, or the string + // does not begin with a parseable number. endptr is compared to the + // expected end given the string's stated length to correctly catch cases + // where the string contains embedded NUL characters. + // - If the first character is a space, there was leading whitespace + return errno == 0 && + !input.empty() && + input.c_str() + input.length() == endptr && + !isspace(input[0]); +} + +// Note: if you need to add String16ToDouble, first ask yourself if it's +// really necessary. If it is, probably the best implementation here is to +// convert to 8-bit and then use the 8-bit version. + +// Note: if you need to add an iterator range version of StringToDouble, first +// ask yourself if it's really necessary. If it is, probably the best +// implementation here is to instantiate a string and use the string version. + +std::string HexEncode(const void* bytes, size_t size) { + static const char kHexChars[] = "0123456789ABCDEF"; + + // Each input byte creates two output hex characters. + std::string ret(size * 2, '\0'); + + for (size_t i = 0; i < size; ++i) { + char b = reinterpret_cast<const char*>(bytes)[i]; + ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; + ret[(i * 2) + 1] = kHexChars[b & 0xf]; + } + return ret; +} + +bool HexStringToInt(const StringPiece& input, int* output) { + return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke( + input.begin(), input.end(), output); +} + +bool HexStringToUInt(const StringPiece& input, uint32_t* output) { + return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke( + input.begin(), input.end(), output); +} + +bool HexStringToInt64(const StringPiece& input, int64_t* output) { + return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke( + input.begin(), input.end(), output); +} + +bool HexStringToUInt64(const StringPiece& input, uint64_t* output) { + return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke( + input.begin(), input.end(), output); +} + +bool HexStringToBytes(const std::string& input, std::vector<uint8_t>* output) { + return HexStringToBytesT(input, output); +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_number_conversions.h b/security/sandbox/chromium/base/strings/string_number_conversions.h new file mode 100644 index 000000000..1265f0dcb --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_number_conversions.h @@ -0,0 +1,137 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_ +#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <string> +#include <vector> + +#include "base/base_export.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" + +// ---------------------------------------------------------------------------- +// IMPORTANT MESSAGE FROM YOUR SPONSOR +// +// This file contains no "wstring" variants. New code should use string16. If +// you need to make old code work, use the UTF8 version and convert. Please do +// not add wstring variants. +// +// Please do not add "convenience" functions for converting strings to integers +// that return the value and ignore success/failure. That encourages people to +// write code that doesn't properly handle the error conditions. +// ---------------------------------------------------------------------------- + +namespace base { + +// Number -> string conversions ------------------------------------------------ + +BASE_EXPORT std::string IntToString(int value); +BASE_EXPORT string16 IntToString16(int value); + +BASE_EXPORT std::string UintToString(unsigned value); +BASE_EXPORT string16 UintToString16(unsigned value); + +BASE_EXPORT std::string Int64ToString(int64_t value); +BASE_EXPORT string16 Int64ToString16(int64_t value); + +BASE_EXPORT std::string Uint64ToString(uint64_t value); +BASE_EXPORT string16 Uint64ToString16(uint64_t value); + +BASE_EXPORT std::string SizeTToString(size_t value); +BASE_EXPORT string16 SizeTToString16(size_t value); + +// DoubleToString converts the double to a string format that ignores the +// locale. If you want to use locale specific formatting, use ICU. +BASE_EXPORT std::string DoubleToString(double value); + +// String -> number conversions ------------------------------------------------ + +// Perform a best-effort conversion of the input string to a numeric type, +// setting |*output| to the result of the conversion. Returns true for +// "perfect" conversions; returns false in the following cases: +// - Overflow. |*output| will be set to the maximum value supported +// by the data type. +// - Underflow. |*output| will be set to the minimum value supported +// by the data type. +// - Trailing characters in the string after parsing the number. |*output| +// will be set to the value of the number that was parsed. +// - Leading whitespace in the string before parsing the number. |*output| will +// be set to the value of the number that was parsed. +// - No characters parseable as a number at the beginning of the string. +// |*output| will be set to 0. +// - Empty string. |*output| will be set to 0. +// WARNING: Will write to |output| even when returning false. +// Read the comments above carefully. +BASE_EXPORT bool StringToInt(const StringPiece& input, int* output); +BASE_EXPORT bool StringToInt(const StringPiece16& input, int* output); + +BASE_EXPORT bool StringToUint(const StringPiece& input, unsigned* output); +BASE_EXPORT bool StringToUint(const StringPiece16& input, unsigned* output); + +BASE_EXPORT bool StringToInt64(const StringPiece& input, int64_t* output); +BASE_EXPORT bool StringToInt64(const StringPiece16& input, int64_t* output); + +BASE_EXPORT bool StringToUint64(const StringPiece& input, uint64_t* output); +BASE_EXPORT bool StringToUint64(const StringPiece16& input, uint64_t* output); + +BASE_EXPORT bool StringToSizeT(const StringPiece& input, size_t* output); +BASE_EXPORT bool StringToSizeT(const StringPiece16& input, size_t* output); + +// For floating-point conversions, only conversions of input strings in decimal +// form are defined to work. Behavior with strings representing floating-point +// numbers in hexadecimal, and strings representing non-finite values (such as +// NaN and inf) is undefined. Otherwise, these behave the same as the integral +// variants. This expects the input string to NOT be specific to the locale. +// If your input is locale specific, use ICU to read the number. +// WARNING: Will write to |output| even when returning false. +// Read the comments here and above StringToInt() carefully. +BASE_EXPORT bool StringToDouble(const std::string& input, double* output); + +// Hex encoding ---------------------------------------------------------------- + +// Returns a hex string representation of a binary buffer. The returned hex +// string will be in upper case. This function does not check if |size| is +// within reasonable limits since it's written with trusted data in mind. If +// you suspect that the data you want to format might be large, the absolute +// max size for |size| should be is +// std::numeric_limits<size_t>::max() / 2 +BASE_EXPORT std::string HexEncode(const void* bytes, size_t size); + +// Best effort conversion, see StringToInt above for restrictions. +// Will only successful parse hex values that will fit into |output|, i.e. +// -0x80000000 < |input| < 0x7FFFFFFF. +BASE_EXPORT bool HexStringToInt(const StringPiece& input, int* output); + +// Best effort conversion, see StringToInt above for restrictions. +// Will only successful parse hex values that will fit into |output|, i.e. +// 0x00000000 < |input| < 0xFFFFFFFF. +// The string is not required to start with 0x. +BASE_EXPORT bool HexStringToUInt(const StringPiece& input, uint32_t* output); + +// Best effort conversion, see StringToInt above for restrictions. +// Will only successful parse hex values that will fit into |output|, i.e. +// -0x8000000000000000 < |input| < 0x7FFFFFFFFFFFFFFF. +BASE_EXPORT bool HexStringToInt64(const StringPiece& input, int64_t* output); + +// Best effort conversion, see StringToInt above for restrictions. +// Will only successful parse hex values that will fit into |output|, i.e. +// 0x0000000000000000 < |input| < 0xFFFFFFFFFFFFFFFF. +// The string is not required to start with 0x. +BASE_EXPORT bool HexStringToUInt64(const StringPiece& input, uint64_t* output); + +// Similar to the previous functions, except that output is a vector of bytes. +// |*output| will contain as many bytes as were successfully parsed prior to the +// error. There is no overflow, but input.size() must be evenly divisible by 2. +// Leading 0x or +/- are not allowed. +BASE_EXPORT bool HexStringToBytes(const std::string& input, + std::vector<uint8_t>* output); + +} // namespace base + +#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_ diff --git a/security/sandbox/chromium/base/strings/string_piece.cc b/security/sandbox/chromium/base/strings/string_piece.cc new file mode 100644 index 000000000..c26bb3652 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_piece.cc @@ -0,0 +1,452 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// Copied from strings/stringpiece.cc with modifications + +#include "base/strings/string_piece.h" + +#include <limits.h> + +#include <algorithm> +#include <ostream> + +#include "base/logging.h" + +namespace base { +namespace { + +// For each character in characters_wanted, sets the index corresponding +// to the ASCII code of that character to 1 in table. This is used by +// the find_.*_of methods below to tell whether or not a character is in +// the lookup table in constant time. +// The argument `table' must be an array that is large enough to hold all +// the possible values of an unsigned char. Thus it should be be declared +// as follows: +// bool table[UCHAR_MAX + 1] +inline void BuildLookupTable(const StringPiece& characters_wanted, + bool* table) { + const size_t length = characters_wanted.length(); + const char* const data = characters_wanted.data(); + for (size_t i = 0; i < length; ++i) { + table[static_cast<unsigned char>(data[i])] = true; + } +} + +} // namespace + +// MSVC doesn't like complex extern templates and DLLs. +#if !defined(COMPILER_MSVC) +template class BasicStringPiece<std::string>; +template class BasicStringPiece<string16>; +#endif + +bool operator==(const StringPiece& x, const StringPiece& y) { + if (x.size() != y.size()) + return false; + + return StringPiece::wordmemcmp(x.data(), y.data(), x.size()) == 0; +} + +std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { + o.write(piece.data(), static_cast<std::streamsize>(piece.size())); + return o; +} + +namespace internal { + +template<typename STR> +void CopyToStringT(const BasicStringPiece<STR>& self, STR* target) { + if (self.empty()) + target->clear(); + else + target->assign(self.data(), self.size()); +} + +void CopyToString(const StringPiece& self, std::string* target) { + CopyToStringT(self, target); +} + +void CopyToString(const StringPiece16& self, string16* target) { + CopyToStringT(self, target); +} + +template<typename STR> +void AppendToStringT(const BasicStringPiece<STR>& self, STR* target) { + if (!self.empty()) + target->append(self.data(), self.size()); +} + +void AppendToString(const StringPiece& self, std::string* target) { + AppendToStringT(self, target); +} + +void AppendToString(const StringPiece16& self, string16* target) { + AppendToStringT(self, target); +} + +template<typename STR> +size_t copyT(const BasicStringPiece<STR>& self, + typename STR::value_type* buf, + size_t n, + size_t pos) { + size_t ret = std::min(self.size() - pos, n); + memcpy(buf, self.data() + pos, ret * sizeof(typename STR::value_type)); + return ret; +} + +size_t copy(const StringPiece& self, char* buf, size_t n, size_t pos) { + return copyT(self, buf, n, pos); +} + +size_t copy(const StringPiece16& self, char16* buf, size_t n, size_t pos) { + return copyT(self, buf, n, pos); +} + +template<typename STR> +size_t findT(const BasicStringPiece<STR>& self, + const BasicStringPiece<STR>& s, + size_t pos) { + if (pos > self.size()) + return BasicStringPiece<STR>::npos; + + typename BasicStringPiece<STR>::const_iterator result = + std::search(self.begin() + pos, self.end(), s.begin(), s.end()); + const size_t xpos = + static_cast<size_t>(result - self.begin()); + return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos; +} + +size_t find(const StringPiece& self, const StringPiece& s, size_t pos) { + return findT(self, s, pos); +} + +size_t find(const StringPiece16& self, const StringPiece16& s, size_t pos) { + return findT(self, s, pos); +} + +template<typename STR> +size_t findT(const BasicStringPiece<STR>& self, + typename STR::value_type c, + size_t pos) { + if (pos >= self.size()) + return BasicStringPiece<STR>::npos; + + typename BasicStringPiece<STR>::const_iterator result = + std::find(self.begin() + pos, self.end(), c); + return result != self.end() ? + static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos; +} + +size_t find(const StringPiece& self, char c, size_t pos) { + return findT(self, c, pos); +} + +size_t find(const StringPiece16& self, char16 c, size_t pos) { + return findT(self, c, pos); +} + +template<typename STR> +size_t rfindT(const BasicStringPiece<STR>& self, + const BasicStringPiece<STR>& s, + size_t pos) { + if (self.size() < s.size()) + return BasicStringPiece<STR>::npos; + + if (s.empty()) + return std::min(self.size(), pos); + + typename BasicStringPiece<STR>::const_iterator last = + self.begin() + std::min(self.size() - s.size(), pos) + s.size(); + typename BasicStringPiece<STR>::const_iterator result = + std::find_end(self.begin(), last, s.begin(), s.end()); + return result != last ? + static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos; +} + +size_t rfind(const StringPiece& self, const StringPiece& s, size_t pos) { + return rfindT(self, s, pos); +} + +size_t rfind(const StringPiece16& self, const StringPiece16& s, size_t pos) { + return rfindT(self, s, pos); +} + +template<typename STR> +size_t rfindT(const BasicStringPiece<STR>& self, + typename STR::value_type c, + size_t pos) { + if (self.size() == 0) + return BasicStringPiece<STR>::npos; + + for (size_t i = std::min(pos, self.size() - 1); ; + --i) { + if (self.data()[i] == c) + return i; + if (i == 0) + break; + } + return BasicStringPiece<STR>::npos; +} + +size_t rfind(const StringPiece& self, char c, size_t pos) { + return rfindT(self, c, pos); +} + +size_t rfind(const StringPiece16& self, char16 c, size_t pos) { + return rfindT(self, c, pos); +} + +// 8-bit version using lookup table. +size_t find_first_of(const StringPiece& self, + const StringPiece& s, + size_t pos) { + if (self.size() == 0 || s.size() == 0) + return StringPiece::npos; + + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.size() == 1) + return find(self, s.data()[0], pos); + + bool lookup[UCHAR_MAX + 1] = { false }; + BuildLookupTable(s, lookup); + for (size_t i = pos; i < self.size(); ++i) { + if (lookup[static_cast<unsigned char>(self.data()[i])]) { + return i; + } + } + return StringPiece::npos; +} + +// 16-bit brute force version. +size_t find_first_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos) { + StringPiece16::const_iterator found = + std::find_first_of(self.begin() + pos, self.end(), s.begin(), s.end()); + if (found == self.end()) + return StringPiece16::npos; + return found - self.begin(); +} + +// 8-bit version using lookup table. +size_t find_first_not_of(const StringPiece& self, + const StringPiece& s, + size_t pos) { + if (self.size() == 0) + return StringPiece::npos; + + if (s.size() == 0) + return 0; + + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.size() == 1) + return find_first_not_of(self, s.data()[0], pos); + + bool lookup[UCHAR_MAX + 1] = { false }; + BuildLookupTable(s, lookup); + for (size_t i = pos; i < self.size(); ++i) { + if (!lookup[static_cast<unsigned char>(self.data()[i])]) { + return i; + } + } + return StringPiece::npos; +} + +// 16-bit brute-force version. +BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos) { + if (self.size() == 0) + return StringPiece16::npos; + + for (size_t self_i = pos; self_i < self.size(); ++self_i) { + bool found = false; + for (size_t s_i = 0; s_i < s.size(); ++s_i) { + if (self[self_i] == s[s_i]) { + found = true; + break; + } + } + if (!found) + return self_i; + } + return StringPiece16::npos; +} + +template<typename STR> +size_t find_first_not_ofT(const BasicStringPiece<STR>& self, + typename STR::value_type c, + size_t pos) { + if (self.size() == 0) + return BasicStringPiece<STR>::npos; + + for (; pos < self.size(); ++pos) { + if (self.data()[pos] != c) { + return pos; + } + } + return BasicStringPiece<STR>::npos; +} + +size_t find_first_not_of(const StringPiece& self, + char c, + size_t pos) { + return find_first_not_ofT(self, c, pos); +} + +size_t find_first_not_of(const StringPiece16& self, + char16 c, + size_t pos) { + return find_first_not_ofT(self, c, pos); +} + +// 8-bit version using lookup table. +size_t find_last_of(const StringPiece& self, const StringPiece& s, size_t pos) { + if (self.size() == 0 || s.size() == 0) + return StringPiece::npos; + + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.size() == 1) + return rfind(self, s.data()[0], pos); + + bool lookup[UCHAR_MAX + 1] = { false }; + BuildLookupTable(s, lookup); + for (size_t i = std::min(pos, self.size() - 1); ; --i) { + if (lookup[static_cast<unsigned char>(self.data()[i])]) + return i; + if (i == 0) + break; + } + return StringPiece::npos; +} + +// 16-bit brute-force version. +size_t find_last_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos) { + if (self.size() == 0) + return StringPiece16::npos; + + for (size_t self_i = std::min(pos, self.size() - 1); ; + --self_i) { + for (size_t s_i = 0; s_i < s.size(); s_i++) { + if (self.data()[self_i] == s[s_i]) + return self_i; + } + if (self_i == 0) + break; + } + return StringPiece16::npos; +} + +// 8-bit version using lookup table. +size_t find_last_not_of(const StringPiece& self, + const StringPiece& s, + size_t pos) { + if (self.size() == 0) + return StringPiece::npos; + + size_t i = std::min(pos, self.size() - 1); + if (s.size() == 0) + return i; + + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.size() == 1) + return find_last_not_of(self, s.data()[0], pos); + + bool lookup[UCHAR_MAX + 1] = { false }; + BuildLookupTable(s, lookup); + for (; ; --i) { + if (!lookup[static_cast<unsigned char>(self.data()[i])]) + return i; + if (i == 0) + break; + } + return StringPiece::npos; +} + +// 16-bit brute-force version. +size_t find_last_not_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos) { + if (self.size() == 0) + return StringPiece::npos; + + for (size_t self_i = std::min(pos, self.size() - 1); ; --self_i) { + bool found = false; + for (size_t s_i = 0; s_i < s.size(); s_i++) { + if (self.data()[self_i] == s[s_i]) { + found = true; + break; + } + } + if (!found) + return self_i; + if (self_i == 0) + break; + } + return StringPiece16::npos; +} + +template<typename STR> +size_t find_last_not_ofT(const BasicStringPiece<STR>& self, + typename STR::value_type c, + size_t pos) { + if (self.size() == 0) + return BasicStringPiece<STR>::npos; + + for (size_t i = std::min(pos, self.size() - 1); ; --i) { + if (self.data()[i] != c) + return i; + if (i == 0) + break; + } + return BasicStringPiece<STR>::npos; +} + +size_t find_last_not_of(const StringPiece& self, + char c, + size_t pos) { + return find_last_not_ofT(self, c, pos); +} + +size_t find_last_not_of(const StringPiece16& self, + char16 c, + size_t pos) { + return find_last_not_ofT(self, c, pos); +} + +template<typename STR> +BasicStringPiece<STR> substrT(const BasicStringPiece<STR>& self, + size_t pos, + size_t n) { + if (pos > self.size()) pos = self.size(); + if (n > self.size() - pos) n = self.size() - pos; + return BasicStringPiece<STR>(self.data() + pos, n); +} + +StringPiece substr(const StringPiece& self, + size_t pos, + size_t n) { + return substrT(self, pos, n); +} + +StringPiece16 substr(const StringPiece16& self, + size_t pos, + size_t n) { + return substrT(self, pos, n); +} + +#if DCHECK_IS_ON() +void AssertIteratorsInOrder(std::string::const_iterator begin, + std::string::const_iterator end) { + DCHECK(begin <= end) << "StringPiece iterators swapped or invalid."; +} +void AssertIteratorsInOrder(string16::const_iterator begin, + string16::const_iterator end) { + DCHECK(begin <= end) << "StringPiece iterators swapped or invalid."; +} +#endif + +} // namespace internal +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_piece.h b/security/sandbox/chromium/base/strings/string_piece.h new file mode 100644 index 000000000..31e7596d1 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_piece.h @@ -0,0 +1,469 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// Copied from strings/stringpiece.h with modifications +// +// A string-like object that points to a sized piece of memory. +// +// You can use StringPiece as a function or method parameter. A StringPiece +// parameter can receive a double-quoted string literal argument, a "const +// char*" argument, a string argument, or a StringPiece argument with no data +// copying. Systematic use of StringPiece for arguments reduces data +// copies and strlen() calls. +// +// Prefer passing StringPieces by value: +// void MyFunction(StringPiece arg); +// If circumstances require, you may also pass by const reference: +// void MyFunction(const StringPiece& arg); // not preferred +// Both of these have the same lifetime semantics. Passing by value +// generates slightly smaller code. For more discussion, Googlers can see +// the thread go/stringpiecebyvalue on c-users. + +#ifndef BASE_STRINGS_STRING_PIECE_H_ +#define BASE_STRINGS_STRING_PIECE_H_ + +#include <stddef.h> + +#include <iosfwd> +#include <string> + +#include "base/base_export.h" +#include "base/containers/hash_tables.h" +#include "base/logging.h" +#include "base/strings/string16.h" + +namespace base { + +template <typename STRING_TYPE> class BasicStringPiece; +typedef BasicStringPiece<std::string> StringPiece; +typedef BasicStringPiece<string16> StringPiece16; + +// internal -------------------------------------------------------------------- + +// Many of the StringPiece functions use different implementations for the +// 8-bit and 16-bit versions, and we don't want lots of template expansions in +// this (very common) header that will slow down compilation. +// +// So here we define overloaded functions called by the StringPiece template. +// For those that share an implementation, the two versions will expand to a +// template internal to the .cc file. +namespace internal { + +BASE_EXPORT void CopyToString(const StringPiece& self, std::string* target); +BASE_EXPORT void CopyToString(const StringPiece16& self, string16* target); + +BASE_EXPORT void AppendToString(const StringPiece& self, std::string* target); +BASE_EXPORT void AppendToString(const StringPiece16& self, string16* target); + +BASE_EXPORT size_t copy(const StringPiece& self, + char* buf, + size_t n, + size_t pos); +BASE_EXPORT size_t copy(const StringPiece16& self, + char16* buf, + size_t n, + size_t pos); + +BASE_EXPORT size_t find(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t find(const StringPiece& self, + char c, + size_t pos); +BASE_EXPORT size_t find(const StringPiece16& self, + char16 c, + size_t pos); + +BASE_EXPORT size_t rfind(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t rfind(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t rfind(const StringPiece& self, + char c, + size_t pos); +BASE_EXPORT size_t rfind(const StringPiece16& self, + char16 c, + size_t pos); + +BASE_EXPORT size_t find_first_of(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find_first_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos); + +BASE_EXPORT size_t find_first_not_of(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t find_first_not_of(const StringPiece& self, + char c, + size_t pos); +BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, + char16 c, + size_t pos); + +BASE_EXPORT size_t find_last_of(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find_last_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t find_last_of(const StringPiece& self, + char c, + size_t pos); +BASE_EXPORT size_t find_last_of(const StringPiece16& self, + char16 c, + size_t pos); + +BASE_EXPORT size_t find_last_not_of(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find_last_not_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t find_last_not_of(const StringPiece16& self, + char16 c, + size_t pos); +BASE_EXPORT size_t find_last_not_of(const StringPiece& self, + char c, + size_t pos); + +BASE_EXPORT StringPiece substr(const StringPiece& self, + size_t pos, + size_t n); +BASE_EXPORT StringPiece16 substr(const StringPiece16& self, + size_t pos, + size_t n); + +#if DCHECK_IS_ON() +// Asserts that begin <= end to catch some errors with iterator usage. +BASE_EXPORT void AssertIteratorsInOrder(std::string::const_iterator begin, + std::string::const_iterator end); +BASE_EXPORT void AssertIteratorsInOrder(string16::const_iterator begin, + string16::const_iterator end); +#endif + +} // namespace internal + +// BasicStringPiece ------------------------------------------------------------ + +// Defines the types, methods, operators, and data members common to both +// StringPiece and StringPiece16. Do not refer to this class directly, but +// rather to BasicStringPiece, StringPiece, or StringPiece16. +// +// This is templatized by string class type rather than character type, so +// BasicStringPiece<std::string> or BasicStringPiece<base::string16>. +template <typename STRING_TYPE> class BasicStringPiece { + public: + // Standard STL container boilerplate. + typedef size_t size_type; + typedef typename STRING_TYPE::value_type value_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef const value_type& const_reference; + typedef ptrdiff_t difference_type; + typedef const value_type* const_iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + + static const size_type npos; + + public: + // We provide non-explicit singleton constructors so users can pass + // in a "const char*" or a "string" wherever a "StringPiece" is + // expected (likewise for char16, string16, StringPiece16). + BasicStringPiece() : ptr_(NULL), length_(0) {} + BasicStringPiece(const value_type* str) + : ptr_(str), + length_((str == NULL) ? 0 : STRING_TYPE::traits_type::length(str)) {} + BasicStringPiece(const STRING_TYPE& str) + : ptr_(str.data()), length_(str.size()) {} + BasicStringPiece(const value_type* offset, size_type len) + : ptr_(offset), length_(len) {} + BasicStringPiece(const typename STRING_TYPE::const_iterator& begin, + const typename STRING_TYPE::const_iterator& end) { +#if DCHECK_IS_ON() + // This assertion is done out-of-line to avoid bringing in logging.h and + // instantiating logging macros for every instantiation. + internal::AssertIteratorsInOrder(begin, end); +#endif + length_ = static_cast<size_t>(std::distance(begin, end)); + + // The length test before assignment is to avoid dereferencing an iterator + // that may point to the end() of a string. + ptr_ = length_ > 0 ? &*begin : nullptr; + } + + // data() may return a pointer to a buffer with embedded NULs, and the + // returned buffer may or may not be null terminated. Therefore it is + // typically a mistake to pass data() to a routine that expects a NUL + // terminated string. + const value_type* data() const { return ptr_; } + size_type size() const { return length_; } + size_type length() const { return length_; } + bool empty() const { return length_ == 0; } + + void clear() { + ptr_ = NULL; + length_ = 0; + } + void set(const value_type* data, size_type len) { + ptr_ = data; + length_ = len; + } + void set(const value_type* str) { + ptr_ = str; + length_ = str ? STRING_TYPE::traits_type::length(str) : 0; + } + + value_type operator[](size_type i) const { return ptr_[i]; } + + void remove_prefix(size_type n) { + ptr_ += n; + length_ -= n; + } + + void remove_suffix(size_type n) { + length_ -= n; + } + + int compare(const BasicStringPiece<STRING_TYPE>& x) const { + int r = wordmemcmp( + ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_)); + if (r == 0) { + if (length_ < x.length_) r = -1; + else if (length_ > x.length_) r = +1; + } + return r; + } + + STRING_TYPE as_string() const { + // std::string doesn't like to take a NULL pointer even with a 0 size. + return empty() ? STRING_TYPE() : STRING_TYPE(data(), size()); + } + + const_iterator begin() const { return ptr_; } + const_iterator end() const { return ptr_ + length_; } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(ptr_ + length_); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(ptr_); + } + + size_type max_size() const { return length_; } + size_type capacity() const { return length_; } + + static int wordmemcmp(const value_type* p, + const value_type* p2, + size_type N) { + return STRING_TYPE::traits_type::compare(p, p2, N); + } + + // Sets the value of the given string target type to be the current string. + // This saves a temporary over doing |a = b.as_string()| + void CopyToString(STRING_TYPE* target) const { + internal::CopyToString(*this, target); + } + + void AppendToString(STRING_TYPE* target) const { + internal::AppendToString(*this, target); + } + + size_type copy(value_type* buf, size_type n, size_type pos = 0) const { + return internal::copy(*this, buf, n, pos); + } + + // Does "this" start with "x" + bool starts_with(const BasicStringPiece& x) const { + return ((this->length_ >= x.length_) && + (wordmemcmp(this->ptr_, x.ptr_, x.length_) == 0)); + } + + // Does "this" end with "x" + bool ends_with(const BasicStringPiece& x) const { + return ((this->length_ >= x.length_) && + (wordmemcmp(this->ptr_ + (this->length_-x.length_), + x.ptr_, x.length_) == 0)); + } + + // find: Search for a character or substring at a given offset. + size_type find(const BasicStringPiece<STRING_TYPE>& s, + size_type pos = 0) const { + return internal::find(*this, s, pos); + } + size_type find(value_type c, size_type pos = 0) const { + return internal::find(*this, c, pos); + } + + // rfind: Reverse find. + size_type rfind(const BasicStringPiece& s, + size_type pos = BasicStringPiece::npos) const { + return internal::rfind(*this, s, pos); + } + size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const { + return internal::rfind(*this, c, pos); + } + + // find_first_of: Find the first occurence of one of a set of characters. + size_type find_first_of(const BasicStringPiece& s, + size_type pos = 0) const { + return internal::find_first_of(*this, s, pos); + } + size_type find_first_of(value_type c, size_type pos = 0) const { + return find(c, pos); + } + + // find_first_not_of: Find the first occurence not of a set of characters. + size_type find_first_not_of(const BasicStringPiece& s, + size_type pos = 0) const { + return internal::find_first_not_of(*this, s, pos); + } + size_type find_first_not_of(value_type c, size_type pos = 0) const { + return internal::find_first_not_of(*this, c, pos); + } + + // find_last_of: Find the last occurence of one of a set of characters. + size_type find_last_of(const BasicStringPiece& s, + size_type pos = BasicStringPiece::npos) const { + return internal::find_last_of(*this, s, pos); + } + size_type find_last_of(value_type c, + size_type pos = BasicStringPiece::npos) const { + return rfind(c, pos); + } + + // find_last_not_of: Find the last occurence not of a set of characters. + size_type find_last_not_of(const BasicStringPiece& s, + size_type pos = BasicStringPiece::npos) const { + return internal::find_last_not_of(*this, s, pos); + } + size_type find_last_not_of(value_type c, + size_type pos = BasicStringPiece::npos) const { + return internal::find_last_not_of(*this, c, pos); + } + + // substr. + BasicStringPiece substr(size_type pos, + size_type n = BasicStringPiece::npos) const { + return internal::substr(*this, pos, n); + } + + protected: + const value_type* ptr_; + size_type length_; +}; + +template <typename STRING_TYPE> +const typename BasicStringPiece<STRING_TYPE>::size_type +BasicStringPiece<STRING_TYPE>::npos = + typename BasicStringPiece<STRING_TYPE>::size_type(-1); + +// MSVC doesn't like complex extern templates and DLLs. +#if !defined(COMPILER_MSVC) +extern template class BASE_EXPORT BasicStringPiece<std::string>; +extern template class BASE_EXPORT BasicStringPiece<string16>; +#endif + +// StingPiece operators -------------------------------------------------------- + +BASE_EXPORT bool operator==(const StringPiece& x, const StringPiece& y); + +inline bool operator!=(const StringPiece& x, const StringPiece& y) { + return !(x == y); +} + +inline bool operator<(const StringPiece& x, const StringPiece& y) { + const int r = StringPiece::wordmemcmp( + x.data(), y.data(), (x.size() < y.size() ? x.size() : y.size())); + return ((r < 0) || ((r == 0) && (x.size() < y.size()))); +} + +inline bool operator>(const StringPiece& x, const StringPiece& y) { + return y < x; +} + +inline bool operator<=(const StringPiece& x, const StringPiece& y) { + return !(x > y); +} + +inline bool operator>=(const StringPiece& x, const StringPiece& y) { + return !(x < y); +} + +// StringPiece16 operators ----------------------------------------------------- + +inline bool operator==(const StringPiece16& x, const StringPiece16& y) { + if (x.size() != y.size()) + return false; + + return StringPiece16::wordmemcmp(x.data(), y.data(), x.size()) == 0; +} + +inline bool operator!=(const StringPiece16& x, const StringPiece16& y) { + return !(x == y); +} + +inline bool operator<(const StringPiece16& x, const StringPiece16& y) { + const int r = StringPiece16::wordmemcmp( + x.data(), y.data(), (x.size() < y.size() ? x.size() : y.size())); + return ((r < 0) || ((r == 0) && (x.size() < y.size()))); +} + +inline bool operator>(const StringPiece16& x, const StringPiece16& y) { + return y < x; +} + +inline bool operator<=(const StringPiece16& x, const StringPiece16& y) { + return !(x > y); +} + +inline bool operator>=(const StringPiece16& x, const StringPiece16& y) { + return !(x < y); +} + +BASE_EXPORT std::ostream& operator<<(std::ostream& o, + const StringPiece& piece); + +} // namespace base + +// Hashing --------------------------------------------------------------------- + +// We provide appropriate hash functions so StringPiece and StringPiece16 can +// be used as keys in hash sets and maps. + +// This hash function is copied from base/containers/hash_tables.h. We don't +// use the ones already defined for string and string16 directly because it +// would require the string constructors to be called, which we don't want. +#define HASH_STRING_PIECE(StringPieceType, string_piece) \ + std::size_t result = 0; \ + for (StringPieceType::const_iterator i = string_piece.begin(); \ + i != string_piece.end(); ++i) \ + result = (result * 131) + *i; \ + return result; \ + +namespace BASE_HASH_NAMESPACE { + +template<> +struct hash<base::StringPiece> { + std::size_t operator()(const base::StringPiece& sp) const { + HASH_STRING_PIECE(base::StringPiece, sp); + } +}; +template<> +struct hash<base::StringPiece16> { + std::size_t operator()(const base::StringPiece16& sp16) const { + HASH_STRING_PIECE(base::StringPiece16, sp16); + } +}; + +} // namespace BASE_HASH_NAMESPACE + +#endif // BASE_STRINGS_STRING_PIECE_H_ diff --git a/security/sandbox/chromium/base/strings/string_split.cc b/security/sandbox/chromium/base/strings/string_split.cc new file mode 100644 index 000000000..6c949b989 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_split.cc @@ -0,0 +1,264 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_split.h" + +#include <stddef.h> + +#include "base/logging.h" +#include "base/strings/string_util.h" +#include "base/third_party/icu/icu_utf.h" + +namespace base { + +namespace { + +// PieceToOutputType converts a StringPiece as needed to a given output type, +// which is either the same type of StringPiece (a NOP) or the corresponding +// non-piece string type. +// +// The default converter is a NOP, it works when the OutputType is the +// correct StringPiece. +template<typename Str, typename OutputType> +OutputType PieceToOutputType(BasicStringPiece<Str> piece) { + return piece; +} +template<> // Convert StringPiece to std::string +std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { + return piece.as_string(); +} +template<> // Convert StringPiece16 to string16. +string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { + return piece.as_string(); +} + +// Returns either the ASCII or UTF-16 whitespace. +template<typename Str> BasicStringPiece<Str> WhitespaceForType(); +template<> StringPiece16 WhitespaceForType<string16>() { + return kWhitespaceUTF16; +} +template<> StringPiece WhitespaceForType<std::string>() { + return kWhitespaceASCII; +} + +// Optimize the single-character case to call find() on the string instead, +// since this is the common case and can be made faster. This could have been +// done with template specialization too, but would have been less clear. +// +// There is no corresponding FindFirstNotOf because StringPiece already +// implements these different versions that do the optimized searching. +size_t FindFirstOf(StringPiece piece, char c, size_t pos) { + return piece.find(c, pos); +} +size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { + return piece.find(c, pos); +} +size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { + return piece.find_first_of(one_of, pos); +} +size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { + return piece.find_first_of(one_of, pos); +} + +// General string splitter template. Can take 8- or 16-bit input, can produce +// the corresponding string or StringPiece output, and can take single- or +// multiple-character delimiters. +// +// DelimiterType is either a character (Str::value_type) or a string piece of +// multiple characters (BasicStringPiece<Str>). StringPiece has a version of +// find for both of these cases, and the single-character version is the most +// common and can be implemented faster, which is why this is a template. +template<typename Str, typename OutputStringType, typename DelimiterType> +static std::vector<OutputStringType> SplitStringT( + BasicStringPiece<Str> str, + DelimiterType delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + std::vector<OutputStringType> result; + if (str.empty()) + return result; + + size_t start = 0; + while (start != Str::npos) { + size_t end = FindFirstOf(str, delimiter, start); + + BasicStringPiece<Str> piece; + if (end == Str::npos) { + piece = str.substr(start); + start = Str::npos; + } else { + piece = str.substr(start, end - start); + start = end + 1; + } + + if (whitespace == TRIM_WHITESPACE) + piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); + + if (result_type == SPLIT_WANT_ALL || !piece.empty()) + result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); + } + return result; +} + +bool AppendStringKeyValue(StringPiece input, + char delimiter, + StringPairs* result) { + // Always append a new item regardless of success (it might be empty). The + // below code will copy the strings directly into the result pair. + result->resize(result->size() + 1); + auto& result_pair = result->back(); + + // Find the delimiter. + size_t end_key_pos = input.find_first_of(delimiter); + if (end_key_pos == std::string::npos) { + DVLOG(1) << "cannot find delimiter in: " << input; + return false; // No delimiter. + } + input.substr(0, end_key_pos).CopyToString(&result_pair.first); + + // Find the value string. + StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos); + size_t begin_value_pos = remains.find_first_not_of(delimiter); + if (begin_value_pos == StringPiece::npos) { + DVLOG(1) << "cannot parse value from input: " << input; + return false; // No value. + } + remains.substr(begin_value_pos, remains.size() - begin_value_pos) + .CopyToString(&result_pair.second); + + return true; +} + +template <typename Str, typename OutputStringType> +void SplitStringUsingSubstrT(BasicStringPiece<Str> input, + BasicStringPiece<Str> delimiter, + WhitespaceHandling whitespace, + SplitResult result_type, + std::vector<OutputStringType>* result) { + using Piece = BasicStringPiece<Str>; + using size_type = typename Piece::size_type; + + result->clear(); + for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos; + begin_index = end_index + delimiter.size()) { + end_index = input.find(delimiter, begin_index); + Piece term = end_index == Piece::npos + ? input.substr(begin_index) + : input.substr(begin_index, end_index - begin_index); + + if (whitespace == TRIM_WHITESPACE) + term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); + + if (result_type == SPLIT_WANT_ALL || !term.empty()) + result->push_back(PieceToOutputType<Str, OutputStringType>(term)); + } +} + +} // namespace + +std::vector<std::string> SplitString(StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + if (separators.size() == 1) { + return SplitStringT<std::string, std::string, char>( + input, separators[0], whitespace, result_type); + } + return SplitStringT<std::string, std::string, StringPiece>( + input, separators, whitespace, result_type); +} + +std::vector<string16> SplitString(StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + if (separators.size() == 1) { + return SplitStringT<string16, string16, char16>( + input, separators[0], whitespace, result_type); + } + return SplitStringT<string16, string16, StringPiece16>( + input, separators, whitespace, result_type); +} + +std::vector<StringPiece> SplitStringPiece(StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + if (separators.size() == 1) { + return SplitStringT<std::string, StringPiece, char>( + input, separators[0], whitespace, result_type); + } + return SplitStringT<std::string, StringPiece, StringPiece>( + input, separators, whitespace, result_type); +} + +std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + if (separators.size() == 1) { + return SplitStringT<string16, StringPiece16, char16>( + input, separators[0], whitespace, result_type); + } + return SplitStringT<string16, StringPiece16, StringPiece16>( + input, separators, whitespace, result_type); +} + +bool SplitStringIntoKeyValuePairs(StringPiece input, + char key_value_delimiter, + char key_value_pair_delimiter, + StringPairs* key_value_pairs) { + key_value_pairs->clear(); + + std::vector<StringPiece> pairs = SplitStringPiece( + input, std::string(1, key_value_pair_delimiter), + TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); + key_value_pairs->reserve(pairs.size()); + + bool success = true; + for (const StringPiece& pair : pairs) { + if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) { + // Don't return here, to allow for pairs without associated + // value or key; just record that the split failed. + success = false; + } + } + return success; +} + +void SplitStringUsingSubstr(StringPiece16 input, + StringPiece16 delimiter, + std::vector<string16>* result) { + SplitStringUsingSubstrT(input, delimiter, TRIM_WHITESPACE, SPLIT_WANT_ALL, + result); +} + +void SplitStringUsingSubstr(StringPiece input, + StringPiece delimiter, + std::vector<std::string>* result) { + SplitStringUsingSubstrT(input, delimiter, TRIM_WHITESPACE, SPLIT_WANT_ALL, + result); +} + +std::vector<StringPiece16> SplitStringPieceUsingSubstr( + StringPiece16 input, + StringPiece16 delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + std::vector<StringPiece16> result; + SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result); + return result; +} + +std::vector<StringPiece> SplitStringPieceUsingSubstr( + StringPiece input, + StringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + std::vector<StringPiece> result; + SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result); + return result; +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_split.h b/security/sandbox/chromium/base/strings/string_split.h new file mode 100644 index 000000000..ec9f24604 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_split.h @@ -0,0 +1,129 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_SPLIT_H_ +#define BASE_STRINGS_STRING_SPLIT_H_ + +#include <string> +#include <utility> +#include <vector> + +#include "base/base_export.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" + +namespace base { + +enum WhitespaceHandling { + KEEP_WHITESPACE, + TRIM_WHITESPACE, +}; + +enum SplitResult { + // Strictly return all results. + // + // If the input is ",," and the separator is ',' this will return a + // vector of three empty strings. + SPLIT_WANT_ALL, + + // Only nonempty results will be added to the results. Multiple separators + // will be coalesced. Separators at the beginning and end of the input will + // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped. + // + // If the input is ",," and the separator is ',', this will return an empty + // vector. + SPLIT_WANT_NONEMPTY, +}; + +// Split the given string on ANY of the given separators, returning copies of +// the result. +// +// To split on either commas or semicolons, keeping all whitespace: +// +// std::vector<std::string> tokens = base::SplitString( +// input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); +BASE_EXPORT std::vector<std::string> SplitString( + StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type); +BASE_EXPORT std::vector<string16> SplitString( + StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type); + +// Like SplitString above except it returns a vector of StringPieces which +// reference the original buffer without copying. Although you have to be +// careful to keep the original string unmodified, this provides an efficient +// way to iterate through tokens in a string. +// +// To iterate through all whitespace-separated tokens in an input string: +// +// for (const auto& cur : +// base::SplitStringPiece(input, base::kWhitespaceASCII, +// base::KEEP_WHITESPACE, +// base::SPLIT_WANT_NONEMPTY)) { +// ... +BASE_EXPORT std::vector<StringPiece> SplitStringPiece( + StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type); +BASE_EXPORT std::vector<StringPiece16> SplitStringPiece( + StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type); + +using StringPairs = std::vector<std::pair<std::string, std::string>>; + +// Splits |line| into key value pairs according to the given delimiters and +// removes whitespace leading each key and trailing each value. Returns true +// only if each pair has a non-empty key and value. |key_value_pairs| will +// include ("","") pairs for entries without |key_value_delimiter|. +BASE_EXPORT bool SplitStringIntoKeyValuePairs(StringPiece input, + char key_value_delimiter, + char key_value_pair_delimiter, + StringPairs* key_value_pairs); + +// Similar to SplitString, but use a substring delimiter instead of a list of +// characters that are all possible delimiters. +// +// TODO(brettw) this should probably be changed and expanded to provide a +// mirror of the SplitString[Piece] API above, just with the different +// delimiter handling. +BASE_EXPORT void SplitStringUsingSubstr(StringPiece16 input, + StringPiece16 delimiter, + std::vector<string16>* result); +BASE_EXPORT void SplitStringUsingSubstr(StringPiece input, + StringPiece delimiter, + std::vector<std::string>* result); + +// Like SplitStringUsingSubstr above except it returns a vector of StringPieces +// which reference the original buffer without copying. Although you have to be +// careful to keep the original string unmodified, this provides an efficient +// way to iterate through tokens in a string. +// +// To iterate through all newline-separated tokens in an input string: +// +// for (const auto& cur : +// base::SplitStringUsingSubstr(input, "\r\n", +// base::KEEP_WHITESPACE, +// base::SPLIT_WANT_NONEMPTY)) { +// ... +BASE_EXPORT std::vector<StringPiece16> SplitStringPieceUsingSubstr( + StringPiece16 input, + StringPiece16 delimiter, + WhitespaceHandling whitespace, + SplitResult result_type); +BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr( + StringPiece input, + StringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type); + +} // namespace base + +#endif // BASE_STRINGS_STRING_SPLIT_H_ diff --git a/security/sandbox/chromium/base/strings/string_util.cc b/security/sandbox/chromium/base/strings/string_util.cc new file mode 100644 index 000000000..e8000abd4 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util.cc @@ -0,0 +1,1001 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_util.h" + +#include <ctype.h> +#include <errno.h> +#include <math.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <wchar.h> +#include <wctype.h> + +#include <algorithm> +#include <limits> +#include <vector> + +#include "base/logging.h" +#include "base/macros.h" +#include "base/memory/singleton.h" +#include "base/strings/string_split.h" +#include "base/strings/utf_string_conversion_utils.h" +#include "base/strings/utf_string_conversions.h" +#include "base/third_party/icu/icu_utf.h" +#include "build/build_config.h" + +namespace base { + +namespace { + +// Force the singleton used by EmptyString[16] to be a unique type. This +// prevents other code that might accidentally use Singleton<string> from +// getting our internal one. +struct EmptyStrings { + EmptyStrings() {} + const std::string s; + const string16 s16; + + static EmptyStrings* GetInstance() { + return Singleton<EmptyStrings>::get(); + } +}; + +// Used by ReplaceStringPlaceholders to track the position in the string of +// replaced parameters. +struct ReplacementOffset { + ReplacementOffset(uintptr_t parameter, size_t offset) + : parameter(parameter), + offset(offset) {} + + // Index of the parameter. + uintptr_t parameter; + + // Starting position in the string. + size_t offset; +}; + +static bool CompareParameter(const ReplacementOffset& elem1, + const ReplacementOffset& elem2) { + return elem1.parameter < elem2.parameter; +} + +// Assuming that a pointer is the size of a "machine word", then +// uintptr_t is an integer type that is also a machine word. +typedef uintptr_t MachineWord; +const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1; + +inline bool IsAlignedToMachineWord(const void* pointer) { + return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask); +} + +template<typename T> inline T* AlignToMachineWord(T* pointer) { + return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) & + ~kMachineWordAlignmentMask); +} + +template<size_t size, typename CharacterType> struct NonASCIIMask; +template<> struct NonASCIIMask<4, char16> { + static inline uint32_t value() { return 0xFF80FF80U; } +}; +template<> struct NonASCIIMask<4, char> { + static inline uint32_t value() { return 0x80808080U; } +}; +template<> struct NonASCIIMask<8, char16> { + static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } +}; +template<> struct NonASCIIMask<8, char> { + static inline uint64_t value() { return 0x8080808080808080ULL; } +}; +#if defined(WCHAR_T_IS_UTF32) +template<> struct NonASCIIMask<4, wchar_t> { + static inline uint32_t value() { return 0xFFFFFF80U; } +}; +template<> struct NonASCIIMask<8, wchar_t> { + static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; } +}; +#endif // WCHAR_T_IS_UTF32 + +} // namespace + +bool IsWprintfFormatPortable(const wchar_t* format) { + for (const wchar_t* position = format; *position != '\0'; ++position) { + if (*position == '%') { + bool in_specification = true; + bool modifier_l = false; + while (in_specification) { + // Eat up characters until reaching a known specifier. + if (*++position == '\0') { + // The format string ended in the middle of a specification. Call + // it portable because no unportable specifications were found. The + // string is equally broken on all platforms. + return true; + } + + if (*position == 'l') { + // 'l' is the only thing that can save the 's' and 'c' specifiers. + modifier_l = true; + } else if (((*position == 's' || *position == 'c') && !modifier_l) || + *position == 'S' || *position == 'C' || *position == 'F' || + *position == 'D' || *position == 'O' || *position == 'U') { + // Not portable. + return false; + } + + if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { + // Portable, keep scanning the rest of the format string. + in_specification = false; + } + } + } + } + + return true; +} + +namespace { + +template<typename StringType> +StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { + StringType ret; + ret.reserve(str.size()); + for (size_t i = 0; i < str.size(); i++) + ret.push_back(ToLowerASCII(str[i])); + return ret; +} + +template<typename StringType> +StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { + StringType ret; + ret.reserve(str.size()); + for (size_t i = 0; i < str.size(); i++) + ret.push_back(ToUpperASCII(str[i])); + return ret; +} + +} // namespace + +std::string ToLowerASCII(StringPiece str) { + return ToLowerASCIIImpl<std::string>(str); +} + +string16 ToLowerASCII(StringPiece16 str) { + return ToLowerASCIIImpl<string16>(str); +} + +std::string ToUpperASCII(StringPiece str) { + return ToUpperASCIIImpl<std::string>(str); +} + +string16 ToUpperASCII(StringPiece16 str) { + return ToUpperASCIIImpl<string16>(str); +} + +template<class StringType> +int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, + BasicStringPiece<StringType> b) { + // Find the first characters that aren't equal and compare them. If the end + // of one of the strings is found before a nonequal character, the lengths + // of the strings are compared. + size_t i = 0; + while (i < a.length() && i < b.length()) { + typename StringType::value_type lower_a = ToLowerASCII(a[i]); + typename StringType::value_type lower_b = ToLowerASCII(b[i]); + if (lower_a < lower_b) + return -1; + if (lower_a > lower_b) + return 1; + i++; + } + + // End of one string hit before finding a different character. Expect the + // common case to be "strings equal" at this point so check that first. + if (a.length() == b.length()) + return 0; + + if (a.length() < b.length()) + return -1; + return 1; +} + +int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) { + return CompareCaseInsensitiveASCIIT<std::string>(a, b); +} + +int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { + return CompareCaseInsensitiveASCIIT<string16>(a, b); +} + +bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { + if (a.length() != b.length()) + return false; + return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0; +} + +bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { + if (a.length() != b.length()) + return false; + return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0; +} + +const std::string& EmptyString() { + return EmptyStrings::GetInstance()->s; +} + +const string16& EmptyString16() { + return EmptyStrings::GetInstance()->s16; +} + +template<typename STR> +bool ReplaceCharsT(const STR& input, + const STR& replace_chars, + const STR& replace_with, + STR* output) { + bool removed = false; + size_t replace_length = replace_with.length(); + + *output = input; + + size_t found = output->find_first_of(replace_chars); + while (found != STR::npos) { + removed = true; + output->replace(found, 1, replace_with); + found = output->find_first_of(replace_chars, found + replace_length); + } + + return removed; +} + +bool ReplaceChars(const string16& input, + const StringPiece16& replace_chars, + const string16& replace_with, + string16* output) { + return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); +} + +bool ReplaceChars(const std::string& input, + const StringPiece& replace_chars, + const std::string& replace_with, + std::string* output) { + return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); +} + +bool RemoveChars(const string16& input, + const StringPiece16& remove_chars, + string16* output) { + return ReplaceChars(input, remove_chars.as_string(), string16(), output); +} + +bool RemoveChars(const std::string& input, + const StringPiece& remove_chars, + std::string* output) { + return ReplaceChars(input, remove_chars.as_string(), std::string(), output); +} + +template<typename Str> +TrimPositions TrimStringT(const Str& input, + BasicStringPiece<Str> trim_chars, + TrimPositions positions, + Str* output) { + // Find the edges of leading/trailing whitespace as desired. Need to use + // a StringPiece version of input to be able to call find* on it with the + // StringPiece version of trim_chars (normally the trim_chars will be a + // constant so avoid making a copy). + BasicStringPiece<Str> input_piece(input); + const size_t last_char = input.length() - 1; + const size_t first_good_char = (positions & TRIM_LEADING) ? + input_piece.find_first_not_of(trim_chars) : 0; + const size_t last_good_char = (positions & TRIM_TRAILING) ? + input_piece.find_last_not_of(trim_chars) : last_char; + + // When the string was all trimmed, report that we stripped off characters + // from whichever position the caller was interested in. For empty input, we + // stripped no characters, but we still need to clear |output|. + if (input.empty() || + (first_good_char == Str::npos) || (last_good_char == Str::npos)) { + bool input_was_empty = input.empty(); // in case output == &input + output->clear(); + return input_was_empty ? TRIM_NONE : positions; + } + + // Trim. + *output = + input.substr(first_good_char, last_good_char - first_good_char + 1); + + // Return where we trimmed from. + return static_cast<TrimPositions>( + ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | + ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); +} + +bool TrimString(const string16& input, + StringPiece16 trim_chars, + string16* output) { + return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; +} + +bool TrimString(const std::string& input, + StringPiece trim_chars, + std::string* output) { + return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; +} + +template<typename Str> +BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, + BasicStringPiece<Str> trim_chars, + TrimPositions positions) { + size_t begin = (positions & TRIM_LEADING) ? + input.find_first_not_of(trim_chars) : 0; + size_t end = (positions & TRIM_TRAILING) ? + input.find_last_not_of(trim_chars) + 1 : input.size(); + return input.substr(begin, end - begin); +} + +StringPiece16 TrimString(StringPiece16 input, + const StringPiece16& trim_chars, + TrimPositions positions) { + return TrimStringPieceT(input, trim_chars, positions); +} + +StringPiece TrimString(StringPiece input, + const StringPiece& trim_chars, + TrimPositions positions) { + return TrimStringPieceT(input, trim_chars, positions); +} + +void TruncateUTF8ToByteSize(const std::string& input, + const size_t byte_size, + std::string* output) { + DCHECK(output); + if (byte_size > input.length()) { + *output = input; + return; + } + DCHECK_LE(byte_size, + static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); + // Note: This cast is necessary because CBU8_NEXT uses int32_ts. + int32_t truncation_length = static_cast<int32_t>(byte_size); + int32_t char_index = truncation_length - 1; + const char* data = input.data(); + + // Using CBU8, we will move backwards from the truncation point + // to the beginning of the string looking for a valid UTF8 + // character. Once a full UTF8 character is found, we will + // truncate the string to the end of that character. + while (char_index >= 0) { + int32_t prev = char_index; + base_icu::UChar32 code_point = 0; + CBU8_NEXT(data, char_index, truncation_length, code_point); + if (!IsValidCharacter(code_point) || + !IsValidCodepoint(code_point)) { + char_index = prev - 1; + } else { + break; + } + } + + if (char_index >= 0 ) + *output = input.substr(0, char_index); + else + output->clear(); +} + +TrimPositions TrimWhitespace(const string16& input, + TrimPositions positions, + string16* output) { + return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output); +} + +StringPiece16 TrimWhitespace(StringPiece16 input, + TrimPositions positions) { + return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions); +} + +TrimPositions TrimWhitespaceASCII(const std::string& input, + TrimPositions positions, + std::string* output) { + return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output); +} + +StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) { + return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions); +} + +template<typename STR> +STR CollapseWhitespaceT(const STR& text, + bool trim_sequences_with_line_breaks) { + STR result; + result.resize(text.size()); + + // Set flags to pretend we're already in a trimmed whitespace sequence, so we + // will trim any leading whitespace. + bool in_whitespace = true; + bool already_trimmed = true; + + int chars_written = 0; + for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { + if (IsUnicodeWhitespace(*i)) { + if (!in_whitespace) { + // Reduce all whitespace sequences to a single space. + in_whitespace = true; + result[chars_written++] = L' '; + } + if (trim_sequences_with_line_breaks && !already_trimmed && + ((*i == '\n') || (*i == '\r'))) { + // Whitespace sequences containing CR or LF are eliminated entirely. + already_trimmed = true; + --chars_written; + } + } else { + // Non-whitespace chracters are copied straight across. + in_whitespace = false; + already_trimmed = false; + result[chars_written++] = *i; + } + } + + if (in_whitespace && !already_trimmed) { + // Any trailing whitespace is eliminated. + --chars_written; + } + + result.resize(chars_written); + return result; +} + +string16 CollapseWhitespace(const string16& text, + bool trim_sequences_with_line_breaks) { + return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); +} + +std::string CollapseWhitespaceASCII(const std::string& text, + bool trim_sequences_with_line_breaks) { + return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); +} + +bool ContainsOnlyChars(const StringPiece& input, + const StringPiece& characters) { + return input.find_first_not_of(characters) == StringPiece::npos; +} + +bool ContainsOnlyChars(const StringPiece16& input, + const StringPiece16& characters) { + return input.find_first_not_of(characters) == StringPiece16::npos; +} + +template <class Char> +inline bool DoIsStringASCII(const Char* characters, size_t length) { + MachineWord all_char_bits = 0; + const Char* end = characters + length; + + // Prologue: align the input. + while (!IsAlignedToMachineWord(characters) && characters != end) { + all_char_bits |= *characters; + ++characters; + } + + // Compare the values of CPU word size. + const Char* word_end = AlignToMachineWord(end); + const size_t loop_increment = sizeof(MachineWord) / sizeof(Char); + while (characters < word_end) { + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); + characters += loop_increment; + } + + // Process the remaining bytes. + while (characters != end) { + all_char_bits |= *characters; + ++characters; + } + + MachineWord non_ascii_bit_mask = + NonASCIIMask<sizeof(MachineWord), Char>::value(); + return !(all_char_bits & non_ascii_bit_mask); +} + +bool IsStringASCII(const StringPiece& str) { + return DoIsStringASCII(str.data(), str.length()); +} + +bool IsStringASCII(const StringPiece16& str) { + return DoIsStringASCII(str.data(), str.length()); +} + +bool IsStringASCII(const string16& str) { + return DoIsStringASCII(str.data(), str.length()); +} + +#if defined(WCHAR_T_IS_UTF32) +bool IsStringASCII(const std::wstring& str) { + return DoIsStringASCII(str.data(), str.length()); +} +#endif + +bool IsStringUTF8(const StringPiece& str) { + const char *src = str.data(); + int32_t src_len = static_cast<int32_t>(str.length()); + int32_t char_index = 0; + + while (char_index < src_len) { + int32_t code_point; + CBU8_NEXT(src, char_index, src_len, code_point); + if (!IsValidCharacter(code_point)) + return false; + } + return true; +} + +// Implementation note: Normally this function will be called with a hardcoded +// constant for the lowercase_ascii parameter. Constructing a StringPiece from +// a C constant requires running strlen, so the result will be two passes +// through the buffers, one to file the length of lowercase_ascii, and one to +// compare each letter. +// +// This function could have taken a const char* to avoid this and only do one +// pass through the string. But the strlen is faster than the case-insensitive +// compares and lets us early-exit in the case that the strings are different +// lengths (will often be the case for non-matches). So whether one approach or +// the other will be faster depends on the case. +// +// The hardcoded strings are typically very short so it doesn't matter, and the +// string piece gives additional flexibility for the caller (doesn't have to be +// null terminated) so we choose the StringPiece route. +template<typename Str> +static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, + StringPiece lowercase_ascii) { + if (str.size() != lowercase_ascii.size()) + return false; + for (size_t i = 0; i < str.size(); i++) { + if (ToLowerASCII(str[i]) != lowercase_ascii[i]) + return false; + } + return true; +} + +bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) { + return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii); +} + +bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) { + return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii); +} + +bool EqualsASCII(StringPiece16 str, StringPiece ascii) { + if (str.length() != ascii.length()) + return false; + return std::equal(ascii.begin(), ascii.end(), str.begin()); +} + +template<typename Str> +bool StartsWithT(BasicStringPiece<Str> str, + BasicStringPiece<Str> search_for, + CompareCase case_sensitivity) { + if (search_for.size() > str.size()) + return false; + + BasicStringPiece<Str> source = str.substr(0, search_for.size()); + + switch (case_sensitivity) { + case CompareCase::SENSITIVE: + return source == search_for; + + case CompareCase::INSENSITIVE_ASCII: + return std::equal( + search_for.begin(), search_for.end(), + source.begin(), + CaseInsensitiveCompareASCII<typename Str::value_type>()); + + default: + NOTREACHED(); + return false; + } +} + +bool StartsWith(StringPiece str, + StringPiece search_for, + CompareCase case_sensitivity) { + return StartsWithT<std::string>(str, search_for, case_sensitivity); +} + +bool StartsWith(StringPiece16 str, + StringPiece16 search_for, + CompareCase case_sensitivity) { + return StartsWithT<string16>(str, search_for, case_sensitivity); +} + +template <typename Str> +bool EndsWithT(BasicStringPiece<Str> str, + BasicStringPiece<Str> search_for, + CompareCase case_sensitivity) { + if (search_for.size() > str.size()) + return false; + + BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(), + search_for.size()); + + switch (case_sensitivity) { + case CompareCase::SENSITIVE: + return source == search_for; + + case CompareCase::INSENSITIVE_ASCII: + return std::equal( + source.begin(), source.end(), + search_for.begin(), + CaseInsensitiveCompareASCII<typename Str::value_type>()); + + default: + NOTREACHED(); + return false; + } +} + +bool EndsWith(StringPiece str, + StringPiece search_for, + CompareCase case_sensitivity) { + return EndsWithT<std::string>(str, search_for, case_sensitivity); +} + +bool EndsWith(StringPiece16 str, + StringPiece16 search_for, + CompareCase case_sensitivity) { + return EndsWithT<string16>(str, search_for, case_sensitivity); +} + +char HexDigitToInt(wchar_t c) { + DCHECK(IsHexDigit(c)); + if (c >= '0' && c <= '9') + return static_cast<char>(c - '0'); + if (c >= 'A' && c <= 'F') + return static_cast<char>(c - 'A' + 10); + if (c >= 'a' && c <= 'f') + return static_cast<char>(c - 'a' + 10); + return 0; +} + +bool IsUnicodeWhitespace(wchar_t c) { + // kWhitespaceWide is a NULL-terminated string + for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) { + if (*cur == c) + return true; + } + return false; +} + +static const char* const kByteStringsUnlocalized[] = { + " B", + " kB", + " MB", + " GB", + " TB", + " PB" +}; + +string16 FormatBytesUnlocalized(int64_t bytes) { + double unit_amount = static_cast<double>(bytes); + size_t dimension = 0; + const int kKilo = 1024; + while (unit_amount >= kKilo && + dimension < arraysize(kByteStringsUnlocalized) - 1) { + unit_amount /= kKilo; + dimension++; + } + + char buf[64]; + if (bytes != 0 && dimension > 0 && unit_amount < 100) { + base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, + kByteStringsUnlocalized[dimension]); + } else { + base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, + kByteStringsUnlocalized[dimension]); + } + + return ASCIIToUTF16(buf); +} + +// Runs in O(n) time in the length of |str|. +template<class StringType> +void DoReplaceSubstringsAfterOffset(StringType* str, + size_t offset, + BasicStringPiece<StringType> find_this, + BasicStringPiece<StringType> replace_with, + bool replace_all) { + DCHECK(!find_this.empty()); + + // If the find string doesn't appear, there's nothing to do. + offset = str->find(find_this.data(), offset, find_this.size()); + if (offset == StringType::npos) + return; + + // If we're only replacing one instance, there's no need to do anything + // complicated. + size_t find_length = find_this.length(); + if (!replace_all) { + str->replace(offset, find_length, replace_with.data(), replace_with.size()); + return; + } + + // If the find and replace strings are the same length, we can simply use + // replace() on each instance, and finish the entire operation in O(n) time. + size_t replace_length = replace_with.length(); + if (find_length == replace_length) { + do { + str->replace(offset, find_length, + replace_with.data(), replace_with.size()); + offset = str->find(find_this.data(), offset + replace_length, + find_this.size()); + } while (offset != StringType::npos); + return; + } + + // Since the find and replace strings aren't the same length, a loop like the + // one above would be O(n^2) in the worst case, as replace() will shift the + // entire remaining string each time. We need to be more clever to keep + // things O(n). + // + // If we're shortening the string, we can alternate replacements with shifting + // forward the intervening characters using memmove(). + size_t str_length = str->length(); + if (find_length > replace_length) { + size_t write_offset = offset; + do { + if (replace_length) { + str->replace(write_offset, replace_length, + replace_with.data(), replace_with.size()); + write_offset += replace_length; + } + size_t read_offset = offset + find_length; + offset = std::min( + str->find(find_this.data(), read_offset, find_this.size()), + str_length); + size_t length = offset - read_offset; + if (length) { + memmove(&(*str)[write_offset], &(*str)[read_offset], + length * sizeof(typename StringType::value_type)); + write_offset += length; + } + } while (offset < str_length); + str->resize(write_offset); + return; + } + + // We're lengthening the string. We can use alternating replacements and + // memmove() calls like above, but we need to precalculate the final string + // length and then expand from back-to-front to avoid overwriting the string + // as we're reading it, needing to shift, or having to copy to a second string + // temporarily. + size_t first_match = offset; + + // First, calculate the final length and resize the string. + size_t final_length = str_length; + size_t expansion = replace_length - find_length; + size_t current_match; + do { + final_length += expansion; + // Minor optimization: save this offset into |current_match|, so that on + // exit from the loop, |current_match| will point at the last instance of + // the find string, and we won't need to find() it again immediately. + current_match = offset; + offset = str->find(find_this.data(), offset + find_length, + find_this.size()); + } while (offset != StringType::npos); + str->resize(final_length); + + // Now do the replacement loop, working backwards through the string. + for (size_t prev_match = str_length, write_offset = final_length; ; + current_match = str->rfind(find_this.data(), current_match - 1, + find_this.size())) { + size_t read_offset = current_match + find_length; + size_t length = prev_match - read_offset; + if (length) { + write_offset -= length; + memmove(&(*str)[write_offset], &(*str)[read_offset], + length * sizeof(typename StringType::value_type)); + } + write_offset -= replace_length; + str->replace(write_offset, replace_length, + replace_with.data(), replace_with.size()); + if (current_match == first_match) + return; + prev_match = current_match; + } +} + +void ReplaceFirstSubstringAfterOffset(string16* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with) { + DoReplaceSubstringsAfterOffset<string16>( + str, start_offset, find_this, replace_with, false); // Replace first. +} + +void ReplaceFirstSubstringAfterOffset(std::string* str, + size_t start_offset, + StringPiece find_this, + StringPiece replace_with) { + DoReplaceSubstringsAfterOffset<std::string>( + str, start_offset, find_this, replace_with, false); // Replace first. +} + +void ReplaceSubstringsAfterOffset(string16* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with) { + DoReplaceSubstringsAfterOffset<string16>( + str, start_offset, find_this, replace_with, true); // Replace all. +} + +void ReplaceSubstringsAfterOffset(std::string* str, + size_t start_offset, + StringPiece find_this, + StringPiece replace_with) { + DoReplaceSubstringsAfterOffset<std::string>( + str, start_offset, find_this, replace_with, true); // Replace all. +} + +template <class string_type> +inline typename string_type::value_type* WriteIntoT(string_type* str, + size_t length_with_null) { + DCHECK_GT(length_with_null, 1u); + str->reserve(length_with_null); + str->resize(length_with_null - 1); + return &((*str)[0]); +} + +char* WriteInto(std::string* str, size_t length_with_null) { + return WriteIntoT(str, length_with_null); +} + +char16* WriteInto(string16* str, size_t length_with_null) { + return WriteIntoT(str, length_with_null); +} + +template<typename STR> +static STR JoinStringT(const std::vector<STR>& parts, + BasicStringPiece<STR> sep) { + if (parts.empty()) + return STR(); + + STR result(parts[0]); + auto iter = parts.begin(); + ++iter; + + for (; iter != parts.end(); ++iter) { + sep.AppendToString(&result); + result += *iter; + } + + return result; +} + +std::string JoinString(const std::vector<std::string>& parts, + StringPiece separator) { + return JoinStringT(parts, separator); +} + +string16 JoinString(const std::vector<string16>& parts, + StringPiece16 separator) { + return JoinStringT(parts, separator); +} + +template<class FormatStringType, class OutStringType> +OutStringType DoReplaceStringPlaceholders( + const FormatStringType& format_string, + const std::vector<OutStringType>& subst, + std::vector<size_t>* offsets) { + size_t substitutions = subst.size(); + + size_t sub_length = 0; + for (const auto& cur : subst) + sub_length += cur.length(); + + OutStringType formatted; + formatted.reserve(format_string.length() + sub_length); + + std::vector<ReplacementOffset> r_offsets; + for (auto i = format_string.begin(); i != format_string.end(); ++i) { + if ('$' == *i) { + if (i + 1 != format_string.end()) { + ++i; + DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; + if ('$' == *i) { + while (i != format_string.end() && '$' == *i) { + formatted.push_back('$'); + ++i; + } + --i; + } else { + uintptr_t index = 0; + while (i != format_string.end() && '0' <= *i && *i <= '9') { + index *= 10; + index += *i - '0'; + ++i; + } + --i; + index -= 1; + if (offsets) { + ReplacementOffset r_offset(index, + static_cast<int>(formatted.size())); + r_offsets.insert(std::lower_bound(r_offsets.begin(), + r_offsets.end(), + r_offset, + &CompareParameter), + r_offset); + } + if (index < substitutions) + formatted.append(subst.at(index)); + } + } + } else { + formatted.push_back(*i); + } + } + if (offsets) { + for (const auto& cur : r_offsets) + offsets->push_back(cur.offset); + } + return formatted; +} + +string16 ReplaceStringPlaceholders(const string16& format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets) { + return DoReplaceStringPlaceholders(format_string, subst, offsets); +} + +std::string ReplaceStringPlaceholders(const StringPiece& format_string, + const std::vector<std::string>& subst, + std::vector<size_t>* offsets) { + return DoReplaceStringPlaceholders(format_string, subst, offsets); +} + +string16 ReplaceStringPlaceholders(const string16& format_string, + const string16& a, + size_t* offset) { + std::vector<size_t> offsets; + std::vector<string16> subst; + subst.push_back(a); + string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); + + DCHECK_EQ(1U, offsets.size()); + if (offset) + *offset = offsets[0]; + return result; +} + +// The following code is compatible with the OpenBSD lcpy interface. See: +// http://www.gratisoft.us/todd/papers/strlcpy.html +// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c + +namespace { + +template <typename CHAR> +size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { + for (size_t i = 0; i < dst_size; ++i) { + if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. + return i; + } + + // We were left off at dst_size. We over copied 1 byte. Null terminate. + if (dst_size != 0) + dst[dst_size - 1] = 0; + + // Count the rest of the |src|, and return it's length in characters. + while (src[dst_size]) ++dst_size; + return dst_size; +} + +} // namespace + +size_t strlcpy(char* dst, const char* src, size_t dst_size) { + return lcpyT<char>(dst, src, dst_size); +} +size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { + return lcpyT<wchar_t>(dst, src, dst_size); +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_util.h b/security/sandbox/chromium/base/strings/string_util.h new file mode 100644 index 000000000..e369f294d --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util.h @@ -0,0 +1,461 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file defines utility functions for working with strings. + +#ifndef BASE_STRINGS_STRING_UTIL_H_ +#define BASE_STRINGS_STRING_UTIL_H_ + +#include <ctype.h> +#include <stdarg.h> // va_list +#include <stddef.h> +#include <stdint.h> + +#include <string> +#include <vector> + +#include "base/base_export.h" +#include "base/compiler_specific.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" // For implicit conversions. +#include "build/build_config.h" + +namespace base { + +// C standard-library functions that aren't cross-platform are provided as +// "base::...", and their prototypes are listed below. These functions are +// then implemented as inline calls to the platform-specific equivalents in the +// platform-specific headers. + +// Wrapper for vsnprintf that always null-terminates and always returns the +// number of characters that would be in an untruncated formatted +// string, even when truncation occurs. +int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) + PRINTF_FORMAT(3, 0); + +// Some of these implementations need to be inlined. + +// We separate the declaration from the implementation of this inline +// function just so the PRINTF_FORMAT works. +inline int snprintf(char* buffer, + size_t size, + _Printf_format_string_ const char* format, + ...) PRINTF_FORMAT(3, 4); +inline int snprintf(char* buffer, + size_t size, + _Printf_format_string_ const char* format, + ...) { + va_list arguments; + va_start(arguments, format); + int result = vsnprintf(buffer, size, format, arguments); + va_end(arguments); + return result; +} + +// BSD-style safe and consistent string copy functions. +// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. +// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as +// long as |dst_size| is not 0. Returns the length of |src| in characters. +// If the return value is >= dst_size, then the output was truncated. +// NOTE: All sizes are in number of characters, NOT in bytes. +BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); +BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); + +// Scan a wprintf format string to determine whether it's portable across a +// variety of systems. This function only checks that the conversion +// specifiers used by the format string are supported and have the same meaning +// on a variety of systems. It doesn't check for other errors that might occur +// within a format string. +// +// Nonportable conversion specifiers for wprintf are: +// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char +// data on all systems except Windows, which treat them as wchar_t data. +// Use %ls and %lc for wchar_t data instead. +// - 'S' and 'C', which operate on wchar_t data on all systems except Windows, +// which treat them as char data. Use %ls and %lc for wchar_t data +// instead. +// - 'F', which is not identified by Windows wprintf documentation. +// - 'D', 'O', and 'U', which are deprecated and not available on all systems. +// Use %ld, %lo, and %lu instead. +// +// Note that there is no portable conversion specifier for char data when +// working with wprintf. +// +// This function is intended to be called from base::vswprintf. +BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); + +// ASCII-specific tolower. The standard library's tolower is locale sensitive, +// so we don't want to use it here. +inline char ToLowerASCII(char c) { + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; +} +inline char16 ToLowerASCII(char16 c) { + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; +} + +// ASCII-specific toupper. The standard library's toupper is locale sensitive, +// so we don't want to use it here. +inline char ToUpperASCII(char c) { + return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; +} +inline char16 ToUpperASCII(char16 c) { + return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; +} + +// Converts the given string to it's ASCII-lowercase equivalent. +BASE_EXPORT std::string ToLowerASCII(StringPiece str); +BASE_EXPORT string16 ToLowerASCII(StringPiece16 str); + +// Converts the given string to it's ASCII-uppercase equivalent. +BASE_EXPORT std::string ToUpperASCII(StringPiece str); +BASE_EXPORT string16 ToUpperASCII(StringPiece16 str); + +// Functor for case-insensitive ASCII comparisons for STL algorithms like +// std::search. +// +// Note that a full Unicode version of this functor is not possible to write +// because case mappings might change the number of characters, depend on +// context (combining accents), and require handling UTF-16. If you need +// proper Unicode support, use base::i18n::ToLower/FoldCase and then just +// use a normal operator== on the result. +template<typename Char> struct CaseInsensitiveCompareASCII { + public: + bool operator()(Char x, Char y) const { + return ToLowerASCII(x) == ToLowerASCII(y); + } +}; + +// Like strcasecmp for case-insensitive ASCII characters only. Returns: +// -1 (a < b) +// 0 (a == b) +// 1 (a > b) +// (unlike strcasecmp which can return values greater or less than 1/-1). For +// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase +// and then just call the normal string operators on the result. +BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b); +BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b); + +// Equality for ASCII case-insensitive comparisons. For full Unicode support, +// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either +// == or !=. +BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b); +BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b); + +// These threadsafe functions return references to globally unique empty +// strings. +// +// It is likely faster to construct a new empty string object (just a few +// instructions to set the length to 0) than to get the empty string singleton +// returned by these functions (which requires threadsafe singleton access). +// +// Therefore, DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT +// CONSTRUCTORS. There is only one case where you should use these: functions +// which need to return a string by reference (e.g. as a class member +// accessor), and don't have an empty string to use (e.g. in an error case). +// These should not be used as initializers, function arguments, or return +// values for functions which return by value or outparam. +BASE_EXPORT const std::string& EmptyString(); +BASE_EXPORT const string16& EmptyString16(); + +// Contains the set of characters representing whitespace in the corresponding +// encoding. Null-terminated. The ASCII versions are the whitespaces as defined +// by HTML5, and don't include control characters. +BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode. +BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode. +BASE_EXPORT extern const char kWhitespaceASCII[]; +BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode. + +// Null-terminated string representing the UTF-8 byte order mark. +BASE_EXPORT extern const char kUtf8ByteOrderMark[]; + +// Removes characters in |remove_chars| from anywhere in |input|. Returns true +// if any characters were removed. |remove_chars| must be null-terminated. +// NOTE: Safe to use the same variable for both |input| and |output|. +BASE_EXPORT bool RemoveChars(const string16& input, + const StringPiece16& remove_chars, + string16* output); +BASE_EXPORT bool RemoveChars(const std::string& input, + const StringPiece& remove_chars, + std::string* output); + +// Replaces characters in |replace_chars| from anywhere in |input| with +// |replace_with|. Each character in |replace_chars| will be replaced with +// the |replace_with| string. Returns true if any characters were replaced. +// |replace_chars| must be null-terminated. +// NOTE: Safe to use the same variable for both |input| and |output|. +BASE_EXPORT bool ReplaceChars(const string16& input, + const StringPiece16& replace_chars, + const string16& replace_with, + string16* output); +BASE_EXPORT bool ReplaceChars(const std::string& input, + const StringPiece& replace_chars, + const std::string& replace_with, + std::string* output); + +enum TrimPositions { + TRIM_NONE = 0, + TRIM_LEADING = 1 << 0, + TRIM_TRAILING = 1 << 1, + TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, +}; + +// Removes characters in |trim_chars| from the beginning and end of |input|. +// The 8-bit version only works on 8-bit characters, not UTF-8. +// +// It is safe to use the same variable for both |input| and |output| (this is +// the normal usage to trim in-place). +BASE_EXPORT bool TrimString(const string16& input, + StringPiece16 trim_chars, + string16* output); +BASE_EXPORT bool TrimString(const std::string& input, + StringPiece trim_chars, + std::string* output); + +// StringPiece versions of the above. The returned pieces refer to the original +// buffer. +BASE_EXPORT StringPiece16 TrimString(StringPiece16 input, + const StringPiece16& trim_chars, + TrimPositions positions); +BASE_EXPORT StringPiece TrimString(StringPiece input, + const StringPiece& trim_chars, + TrimPositions positions); + +// Truncates a string to the nearest UTF-8 character that will leave +// the string less than or equal to the specified byte size. +BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, + const size_t byte_size, + std::string* output); + +// Trims any whitespace from either end of the input string. +// +// The StringPiece versions return a substring referencing the input buffer. +// The ASCII versions look only for ASCII whitespace. +// +// The std::string versions return where whitespace was found. +// NOTE: Safe to use the same variable for both input and output. +BASE_EXPORT TrimPositions TrimWhitespace(const string16& input, + TrimPositions positions, + string16* output); +BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input, + TrimPositions positions); +BASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, + TrimPositions positions, + std::string* output); +BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input, + TrimPositions positions); + +// Searches for CR or LF characters. Removes all contiguous whitespace +// strings that contain them. This is useful when trying to deal with text +// copied from terminals. +// Returns |text|, with the following three transformations: +// (1) Leading and trailing whitespace is trimmed. +// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace +// sequences containing a CR or LF are trimmed. +// (3) All other whitespace sequences are converted to single spaces. +BASE_EXPORT string16 CollapseWhitespace( + const string16& text, + bool trim_sequences_with_line_breaks); +BASE_EXPORT std::string CollapseWhitespaceASCII( + const std::string& text, + bool trim_sequences_with_line_breaks); + +// Returns true if |input| is empty or contains only characters found in +// |characters|. +BASE_EXPORT bool ContainsOnlyChars(const StringPiece& input, + const StringPiece& characters); +BASE_EXPORT bool ContainsOnlyChars(const StringPiece16& input, + const StringPiece16& characters); + +// Returns true if the specified string matches the criteria. How can a wide +// string be 8-bit or UTF8? It contains only characters that are < 256 (in the +// first case) or characters that use only 8-bits and whose 8-bit +// representation looks like a UTF-8 string (the second case). +// +// Note that IsStringUTF8 checks not only if the input is structurally +// valid but also if it doesn't contain any non-character codepoint +// (e.g. U+FFFE). It's done on purpose because all the existing callers want +// to have the maximum 'discriminating' power from other encodings. If +// there's a use case for just checking the structural validity, we have to +// add a new function for that. +// +// IsStringASCII assumes the input is likely all ASCII, and does not leave early +// if it is not the case. +BASE_EXPORT bool IsStringUTF8(const StringPiece& str); +BASE_EXPORT bool IsStringASCII(const StringPiece& str); +BASE_EXPORT bool IsStringASCII(const StringPiece16& str); +// A convenience adaptor for WebStrings, as they don't convert into +// StringPieces directly. +BASE_EXPORT bool IsStringASCII(const string16& str); +#if defined(WCHAR_T_IS_UTF32) +BASE_EXPORT bool IsStringASCII(const std::wstring& str); +#endif + +// Compare the lower-case form of the given string against the given +// previously-lower-cased ASCII string (typically a constant). +BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str, + StringPiece lowecase_ascii); +BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str, + StringPiece lowecase_ascii); + +// Performs a case-sensitive string compare of the given 16-bit string against +// the given 8-bit ASCII string (typically a constant). The behavior is +// undefined if the |ascii| string is not ASCII. +BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii); + +// Indicates case sensitivity of comparisons. Only ASCII case insensitivity +// is supported. Full Unicode case-insensitive conversions would need to go in +// base/i18n so it can use ICU. +// +// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's +// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see +// base/i18n/case_conversion.h for usage advice) on the arguments, and then use +// the results to a case-sensitive comparison. +enum class CompareCase { + SENSITIVE, + INSENSITIVE_ASCII, +}; + +BASE_EXPORT bool StartsWith(StringPiece str, + StringPiece search_for, + CompareCase case_sensitivity); +BASE_EXPORT bool StartsWith(StringPiece16 str, + StringPiece16 search_for, + CompareCase case_sensitivity); +BASE_EXPORT bool EndsWith(StringPiece str, + StringPiece search_for, + CompareCase case_sensitivity); +BASE_EXPORT bool EndsWith(StringPiece16 str, + StringPiece16 search_for, + CompareCase case_sensitivity); + +// Determines the type of ASCII character, independent of locale (the C +// library versions will change based on locale). +template <typename Char> +inline bool IsAsciiWhitespace(Char c) { + return c == ' ' || c == '\r' || c == '\n' || c == '\t'; +} +template <typename Char> +inline bool IsAsciiAlpha(Char c) { + return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); +} +template <typename Char> +inline bool IsAsciiDigit(Char c) { + return c >= '0' && c <= '9'; +} + +template <typename Char> +inline bool IsHexDigit(Char c) { + return (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f'); +} + +// Returns the integer corresponding to the given hex character. For example: +// '4' -> 4 +// 'a' -> 10 +// 'B' -> 11 +// Assumes the input is a valid hex character. DCHECKs in debug builds if not. +BASE_EXPORT char HexDigitToInt(wchar_t c); + +// Returns true if it's a Unicode whitespace character. +BASE_EXPORT bool IsUnicodeWhitespace(wchar_t c); + +// Return a byte string in human-readable format with a unit suffix. Not +// appropriate for use in any UI; use of FormatBytes and friends in ui/base is +// highly recommended instead. TODO(avi): Figure out how to get callers to use +// FormatBytes instead; remove this. +BASE_EXPORT string16 FormatBytesUnlocalized(int64_t bytes); + +// Starting at |start_offset| (usually 0), replace the first instance of +// |find_this| with |replace_with|. +BASE_EXPORT void ReplaceFirstSubstringAfterOffset( + base::string16* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with); +BASE_EXPORT void ReplaceFirstSubstringAfterOffset( + std::string* str, + size_t start_offset, + StringPiece find_this, + StringPiece replace_with); + +// Starting at |start_offset| (usually 0), look through |str| and replace all +// instances of |find_this| with |replace_with|. +// +// This does entire substrings; use std::replace in <algorithm> for single +// characters, for example: +// std::replace(str.begin(), str.end(), 'a', 'b'); +BASE_EXPORT void ReplaceSubstringsAfterOffset( + string16* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with); +BASE_EXPORT void ReplaceSubstringsAfterOffset( + std::string* str, + size_t start_offset, + StringPiece find_this, + StringPiece replace_with); + +// Reserves enough memory in |str| to accommodate |length_with_null| characters, +// sets the size of |str| to |length_with_null - 1| characters, and returns a +// pointer to the underlying contiguous array of characters. This is typically +// used when calling a function that writes results into a character array, but +// the caller wants the data to be managed by a string-like object. It is +// convenient in that is can be used inline in the call, and fast in that it +// avoids copying the results of the call from a char* into a string. +// +// |length_with_null| must be at least 2, since otherwise the underlying string +// would have size 0, and trying to access &((*str)[0]) in that case can result +// in a number of problems. +// +// Internally, this takes linear time because the resize() call 0-fills the +// underlying array for potentially all +// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we +// could avoid this aspect of the resize() call, as we expect the caller to +// immediately write over this memory, but there is no other way to set the size +// of the string, and not doing that will mean people who access |str| rather +// than str.c_str() will get back a string of whatever size |str| had on entry +// to this function (probably 0). +BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null); +BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null); +#ifndef OS_WIN +BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null); +#endif + +// Does the opposite of SplitString(). +BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts, + StringPiece separator); +BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, + StringPiece16 separator); + +// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. +// Additionally, any number of consecutive '$' characters is replaced by that +// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be +// NULL. This only allows you to use up to nine replacements. +BASE_EXPORT string16 ReplaceStringPlaceholders( + const string16& format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets); + +BASE_EXPORT std::string ReplaceStringPlaceholders( + const StringPiece& format_string, + const std::vector<std::string>& subst, + std::vector<size_t>* offsets); + +// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. +BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, + const string16& a, + size_t* offset); + +} // namespace base + +#if defined(OS_WIN) +#include "base/strings/string_util_win.h" +#elif defined(OS_POSIX) +#include "base/strings/string_util_posix.h" +#else +#error Define string operations appropriately for your platform +#endif + +#endif // BASE_STRINGS_STRING_UTIL_H_ diff --git a/security/sandbox/chromium/base/strings/string_util_constants.cc b/security/sandbox/chromium/base/strings/string_util_constants.cc new file mode 100644 index 000000000..aba1b12b8 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util_constants.cc @@ -0,0 +1,67 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_util.h" + +namespace base { + +#define WHITESPACE_UNICODE \ + 0x0009, /* CHARACTER TABULATION */ \ + 0x000A, /* LINE FEED (LF) */ \ + 0x000B, /* LINE TABULATION */ \ + 0x000C, /* FORM FEED (FF) */ \ + 0x000D, /* CARRIAGE RETURN (CR) */ \ + 0x0020, /* SPACE */ \ + 0x0085, /* NEXT LINE (NEL) */ \ + 0x00A0, /* NO-BREAK SPACE */ \ + 0x1680, /* OGHAM SPACE MARK */ \ + 0x2000, /* EN QUAD */ \ + 0x2001, /* EM QUAD */ \ + 0x2002, /* EN SPACE */ \ + 0x2003, /* EM SPACE */ \ + 0x2004, /* THREE-PER-EM SPACE */ \ + 0x2005, /* FOUR-PER-EM SPACE */ \ + 0x2006, /* SIX-PER-EM SPACE */ \ + 0x2007, /* FIGURE SPACE */ \ + 0x2008, /* PUNCTUATION SPACE */ \ + 0x2009, /* THIN SPACE */ \ + 0x200A, /* HAIR SPACE */ \ + 0x2028, /* LINE SEPARATOR */ \ + 0x2029, /* PARAGRAPH SEPARATOR */ \ + 0x202F, /* NARROW NO-BREAK SPACE */ \ + 0x205F, /* MEDIUM MATHEMATICAL SPACE */ \ + 0x3000, /* IDEOGRAPHIC SPACE */ \ + 0 + +const wchar_t kWhitespaceWide[] = { + WHITESPACE_UNICODE +}; + +const char16 kWhitespaceUTF16[] = { + WHITESPACE_UNICODE +}; + +const char kWhitespaceASCII[] = { + 0x09, // CHARACTER TABULATION + 0x0A, // LINE FEED (LF) + 0x0B, // LINE TABULATION + 0x0C, // FORM FEED (FF) + 0x0D, // CARRIAGE RETURN (CR) + 0x20, // SPACE + 0 +}; + +const char16 kWhitespaceASCIIAs16[] = { + 0x09, // CHARACTER TABULATION + 0x0A, // LINE FEED (LF) + 0x0B, // LINE TABULATION + 0x0C, // FORM FEED (FF) + 0x0D, // CARRIAGE RETURN (CR) + 0x20, // SPACE + 0 +}; + +const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_util_posix.h b/security/sandbox/chromium/base/strings/string_util_posix.h new file mode 100644 index 000000000..8299118e1 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util_posix.h @@ -0,0 +1,37 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_UTIL_POSIX_H_ +#define BASE_STRINGS_STRING_UTIL_POSIX_H_ + +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +#include "base/logging.h" + +namespace base { + +// Chromium code style is to not use malloc'd strings; this is only for use +// for interaction with APIs that require it. +inline char* strdup(const char* str) { + return ::strdup(str); +} + +inline int vsnprintf(char* buffer, size_t size, + const char* format, va_list arguments) { + return ::vsnprintf(buffer, size, format, arguments); +} + +inline int vswprintf(wchar_t* buffer, size_t size, + const wchar_t* format, va_list arguments) { + DCHECK(IsWprintfFormatPortable(format)); + return ::vswprintf(buffer, size, format, arguments); +} + +} // namespace base + +#endif // BASE_STRINGS_STRING_UTIL_POSIX_H_ diff --git a/security/sandbox/chromium/base/strings/string_util_win.h b/security/sandbox/chromium/base/strings/string_util_win.h new file mode 100644 index 000000000..7f260bfc8 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util_win.h @@ -0,0 +1,44 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_UTIL_WIN_H_ +#define BASE_STRINGS_STRING_UTIL_WIN_H_ + +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +#include "base/logging.h" + +namespace base { + +// Chromium code style is to not use malloc'd strings; this is only for use +// for interaction with APIs that require it. +inline char* strdup(const char* str) { + return _strdup(str); +} + +inline int vsnprintf(char* buffer, size_t size, + const char* format, va_list arguments) { + int length = vsnprintf_s(buffer, size, size - 1, format, arguments); + if (length < 0) + return _vscprintf(format, arguments); + return length; +} + +inline int vswprintf(wchar_t* buffer, size_t size, + const wchar_t* format, va_list arguments) { + DCHECK(IsWprintfFormatPortable(format)); + + int length = _vsnwprintf_s(buffer, size, size - 1, format, arguments); + if (length < 0) + return _vscwprintf(format, arguments); + return length; +} + +} // namespace base + +#endif // BASE_STRINGS_STRING_UTIL_WIN_H_ diff --git a/security/sandbox/chromium/base/strings/stringprintf.cc b/security/sandbox/chromium/base/strings/stringprintf.cc new file mode 100644 index 000000000..415845d61 --- /dev/null +++ b/security/sandbox/chromium/base/strings/stringprintf.cc @@ -0,0 +1,189 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/stringprintf.h" + +#include <errno.h> +#include <stddef.h> + +#include <vector> + +#include "base/macros.h" +#include "base/scoped_clear_errno.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "build/build_config.h" + +namespace base { + +namespace { + +// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter +// is the size of the buffer. These return the number of characters in the +// formatted string excluding the NUL terminator. If the buffer is not +// large enough to accommodate the formatted string without truncation, they +// return the number of characters that would be in the fully-formatted string +// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms). +inline int vsnprintfT(char* buffer, + size_t buf_size, + const char* format, + va_list argptr) { + return base::vsnprintf(buffer, buf_size, format, argptr); +} + +#if defined(OS_WIN) +inline int vsnprintfT(wchar_t* buffer, + size_t buf_size, + const wchar_t* format, + va_list argptr) { + return base::vswprintf(buffer, buf_size, format, argptr); +} +#endif + +// Templatized backend for StringPrintF/StringAppendF. This does not finalize +// the va_list, the caller is expected to do that. +template <class StringType> +static void StringAppendVT(StringType* dst, + const typename StringType::value_type* format, + va_list ap) { + // First try with a small fixed size buffer. + // This buffer size should be kept in sync with StringUtilTest.GrowBoundary + // and StringUtilTest.StringPrintfBounds. + typename StringType::value_type stack_buf[1024]; + + va_list ap_copy; + va_copy(ap_copy, ap); + +#if !defined(OS_WIN) + ScopedClearErrno clear_errno; +#endif + int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, ap_copy); + va_end(ap_copy); + + if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) { + // It fit. + dst->append(stack_buf, result); + return; + } + + // Repeatedly increase buffer size until it fits. + int mem_length = arraysize(stack_buf); + while (true) { + if (result < 0) { +#if defined(OS_WIN) + // On Windows, vsnprintfT always returns the number of characters in a + // fully-formatted string, so if we reach this point, something else is + // wrong and no amount of buffer-doubling is going to fix it. + return; +#else + if (errno != 0 && errno != EOVERFLOW) + return; + // Try doubling the buffer size. + mem_length *= 2; +#endif + } else { + // We need exactly "result + 1" characters. + mem_length = result + 1; + } + + if (mem_length > 32 * 1024 * 1024) { + // That should be plenty, don't try anything larger. This protects + // against huge allocations when using vsnprintfT implementations that + // return -1 for reasons other than overflow without setting errno. + DLOG(WARNING) << "Unable to printf the requested string due to size."; + return; + } + + std::vector<typename StringType::value_type> mem_buf(mem_length); + + // NOTE: You can only use a va_list once. Since we're in a while loop, we + // need to make a new copy each time so we don't use up the original. + va_copy(ap_copy, ap); + result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy); + va_end(ap_copy); + + if ((result >= 0) && (result < mem_length)) { + // It fit. + dst->append(&mem_buf[0], result); + return; + } + } +} + +} // namespace + +std::string StringPrintf(const char* format, ...) { + va_list ap; + va_start(ap, format); + std::string result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} + +#if defined(OS_WIN) +std::wstring StringPrintf(const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + std::wstring result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} +#endif + +std::string StringPrintV(const char* format, va_list ap) { + std::string result; + StringAppendV(&result, format, ap); + return result; +} + +const std::string& SStringPrintf(std::string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; +} + +#if defined(OS_WIN) +const std::wstring& SStringPrintf(std::wstring* dst, + const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; +} +#endif + +void StringAppendF(std::string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} + +#if defined(OS_WIN) +void StringAppendF(std::wstring* dst, const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} +#endif + +void StringAppendV(std::string* dst, const char* format, va_list ap) { + StringAppendVT(dst, format, ap); +} + +#if defined(OS_WIN) +void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) { + StringAppendVT(dst, format, ap); +} +#endif + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/stringprintf.h b/security/sandbox/chromium/base/strings/stringprintf.h new file mode 100644 index 000000000..7a75d89e1 --- /dev/null +++ b/security/sandbox/chromium/base/strings/stringprintf.h @@ -0,0 +1,66 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRINGPRINTF_H_ +#define BASE_STRINGS_STRINGPRINTF_H_ + +#include <stdarg.h> // va_list + +#include <string> + +#include "base/base_export.h" +#include "base/compiler_specific.h" +#include "build/build_config.h" + +namespace base { + +// Return a C++ string given printf-like input. +BASE_EXPORT std::string StringPrintf(_Printf_format_string_ const char* format, + ...) + PRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT; +#if defined(OS_WIN) +BASE_EXPORT std::wstring StringPrintf( + _Printf_format_string_ const wchar_t* format, + ...) WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT; +#endif + +// Return a C++ string given vprintf-like input. +BASE_EXPORT std::string StringPrintV(const char* format, va_list ap) + PRINTF_FORMAT(1, 0) WARN_UNUSED_RESULT; + +// Store result into a supplied string and return it. +BASE_EXPORT const std::string& SStringPrintf( + std::string* dst, + _Printf_format_string_ const char* format, + ...) PRINTF_FORMAT(2, 3); +#if defined(OS_WIN) +BASE_EXPORT const std::wstring& SStringPrintf( + std::wstring* dst, + _Printf_format_string_ const wchar_t* format, + ...) WPRINTF_FORMAT(2, 3); +#endif + +// Append result to a supplied string. +BASE_EXPORT void StringAppendF(std::string* dst, + _Printf_format_string_ const char* format, + ...) PRINTF_FORMAT(2, 3); +#if defined(OS_WIN) +BASE_EXPORT void StringAppendF(std::wstring* dst, + _Printf_format_string_ const wchar_t* format, + ...) WPRINTF_FORMAT(2, 3); +#endif + +// Lower-level routine that takes a va_list and appends to a specified +// string. All other routines are just convenience wrappers around it. +BASE_EXPORT void StringAppendV(std::string* dst, const char* format, va_list ap) + PRINTF_FORMAT(2, 0); +#if defined(OS_WIN) +BASE_EXPORT void StringAppendV(std::wstring* dst, + const wchar_t* format, va_list ap) + WPRINTF_FORMAT(2, 0); +#endif + +} // namespace base + +#endif // BASE_STRINGS_STRINGPRINTF_H_ diff --git a/security/sandbox/chromium/base/strings/utf_string_conversion_utils.cc b/security/sandbox/chromium/base/strings/utf_string_conversion_utils.cc new file mode 100644 index 000000000..3101a6028 --- /dev/null +++ b/security/sandbox/chromium/base/strings/utf_string_conversion_utils.cc @@ -0,0 +1,148 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/utf_string_conversion_utils.h" + +#include "base/third_party/icu/icu_utf.h" + +namespace base { + +// ReadUnicodeCharacter -------------------------------------------------------- + +bool ReadUnicodeCharacter(const char* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point_out) { + // U8_NEXT expects to be able to use -1 to signal an error, so we must + // use a signed type for code_point. But this function returns false + // on error anyway, so code_point_out is unsigned. + int32_t code_point; + CBU8_NEXT(src, *char_index, src_len, code_point); + *code_point_out = static_cast<uint32_t>(code_point); + + // The ICU macro above moves to the next char, we want to point to the last + // char consumed. + (*char_index)--; + + // Validate the decoded value. + return IsValidCodepoint(code_point); +} + +bool ReadUnicodeCharacter(const char16* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point) { + if (CBU16_IS_SURROGATE(src[*char_index])) { + if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) || + *char_index + 1 >= src_len || + !CBU16_IS_TRAIL(src[*char_index + 1])) { + // Invalid surrogate pair. + return false; + } + + // Valid surrogate pair. + *code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index], + src[*char_index + 1]); + (*char_index)++; + } else { + // Not a surrogate, just one 16-bit word. + *code_point = src[*char_index]; + } + + return IsValidCodepoint(*code_point); +} + +#if defined(WCHAR_T_IS_UTF32) +bool ReadUnicodeCharacter(const wchar_t* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point) { + // Conversion is easy since the source is 32-bit. + *code_point = src[*char_index]; + + // Validate the value. + return IsValidCodepoint(*code_point); +} +#endif // defined(WCHAR_T_IS_UTF32) + +// WriteUnicodeCharacter ------------------------------------------------------- + +size_t WriteUnicodeCharacter(uint32_t code_point, std::string* output) { + if (code_point <= 0x7f) { + // Fast path the common case of one byte. + output->push_back(static_cast<char>(code_point)); + return 1; + } + + + // CBU8_APPEND_UNSAFE can append up to 4 bytes. + size_t char_offset = output->length(); + size_t original_char_offset = char_offset; + output->resize(char_offset + CBU8_MAX_LENGTH); + + CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); + + // CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so + // it will represent the new length of the string. + output->resize(char_offset); + return char_offset - original_char_offset; +} + +size_t WriteUnicodeCharacter(uint32_t code_point, string16* output) { + if (CBU16_LENGTH(code_point) == 1) { + // Thie code point is in the Basic Multilingual Plane (BMP). + output->push_back(static_cast<char16>(code_point)); + return 1; + } + // Non-BMP characters use a double-character encoding. + size_t char_offset = output->length(); + output->resize(char_offset + CBU16_MAX_LENGTH); + CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); + return CBU16_MAX_LENGTH; +} + +// Generalized Unicode converter ----------------------------------------------- + +template<typename CHAR> +void PrepareForUTF8Output(const CHAR* src, + size_t src_len, + std::string* output) { + output->clear(); + if (src_len == 0) + return; + if (src[0] < 0x80) { + // Assume that the entire input will be ASCII. + output->reserve(src_len); + } else { + // Assume that the entire input is non-ASCII and will have 3 bytes per char. + output->reserve(src_len * 3); + } +} + +// Instantiate versions we know callers will need. +template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*); +template void PrepareForUTF8Output(const char16*, size_t, std::string*); + +template<typename STRING> +void PrepareForUTF16Or32Output(const char* src, + size_t src_len, + STRING* output) { + output->clear(); + if (src_len == 0) + return; + if (static_cast<unsigned char>(src[0]) < 0x80) { + // Assume the input is all ASCII, which means 1:1 correspondence. + output->reserve(src_len); + } else { + // Otherwise assume that the UTF-8 sequences will have 2 bytes for each + // character. + output->reserve(src_len / 2); + } +} + +// Instantiate versions we know callers will need. +template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*); +template void PrepareForUTF16Or32Output(const char*, size_t, string16*); + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/utf_string_conversion_utils.h b/security/sandbox/chromium/base/strings/utf_string_conversion_utils.h new file mode 100644 index 000000000..c71640453 --- /dev/null +++ b/security/sandbox/chromium/base/strings/utf_string_conversion_utils.h @@ -0,0 +1,99 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ +#define BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ + +// This should only be used by the various UTF string conversion files. + +#include <stddef.h> +#include <stdint.h> + +#include "base/base_export.h" +#include "base/strings/string16.h" + +namespace base { + +inline bool IsValidCodepoint(uint32_t code_point) { + // Excludes the surrogate code points ([0xD800, 0xDFFF]) and + // codepoints larger than 0x10FFFF (the highest codepoint allowed). + // Non-characters and unassigned codepoints are allowed. + return code_point < 0xD800u || + (code_point >= 0xE000u && code_point <= 0x10FFFFu); +} + +inline bool IsValidCharacter(uint32_t code_point) { + // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in + // 0xFFFE or 0xFFFF) from the set of valid code points. + return code_point < 0xD800u || (code_point >= 0xE000u && + code_point < 0xFDD0u) || (code_point > 0xFDEFu && + code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu); +} + +// ReadUnicodeCharacter -------------------------------------------------------- + +// Reads a UTF-8 stream, placing the next code point into the given output +// |*code_point|. |src| represents the entire string to read, and |*char_index| +// is the character offset within the string to start reading at. |*char_index| +// will be updated to index the last character read, such that incrementing it +// (as in a for loop) will take the reader to the next character. +// +// Returns true on success. On false, |*code_point| will be invalid. +BASE_EXPORT bool ReadUnicodeCharacter(const char* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point_out); + +// Reads a UTF-16 character. The usage is the same as the 8-bit version above. +BASE_EXPORT bool ReadUnicodeCharacter(const char16* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point); + +#if defined(WCHAR_T_IS_UTF32) +// Reads UTF-32 character. The usage is the same as the 8-bit version above. +BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point); +#endif // defined(WCHAR_T_IS_UTF32) + +// WriteUnicodeCharacter ------------------------------------------------------- + +// Appends a UTF-8 character to the given 8-bit string. Returns the number of +// bytes written. +BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, + std::string* output); + +// Appends the given code point as a UTF-16 character to the given 16-bit +// string. Returns the number of 16-bit values written. +BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, string16* output); + +#if defined(WCHAR_T_IS_UTF32) +// Appends the given UTF-32 character to the given 32-bit string. Returns the +// number of 32-bit values written. +inline size_t WriteUnicodeCharacter(uint32_t code_point, std::wstring* output) { + // This is the easy case, just append the character. + output->push_back(code_point); + return 1; +} +#endif // defined(WCHAR_T_IS_UTF32) + +// Generalized Unicode converter ----------------------------------------------- + +// Guesses the length of the output in UTF-8 in bytes, clears that output +// string, and reserves that amount of space. We assume that the input +// character types are unsigned, which will be true for UTF-16 and -32 on our +// systems. +template<typename CHAR> +void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output); + +// Prepares an output buffer (containing either UTF-16 or -32 data) given some +// UTF-8 input that will be converted to it. See PrepareForUTF8Output(). +template<typename STRING> +void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output); + +} // namespace base + +#endif // BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ diff --git a/security/sandbox/chromium/base/strings/utf_string_conversions.cc b/security/sandbox/chromium/base/strings/utf_string_conversions.cc new file mode 100644 index 000000000..6b17eacd6 --- /dev/null +++ b/security/sandbox/chromium/base/strings/utf_string_conversions.cc @@ -0,0 +1,231 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/utf_string_conversions.h" + +#include <stdint.h> + +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversion_utils.h" +#include "build/build_config.h" + +namespace base { + +namespace { + +// Generalized Unicode converter ----------------------------------------------- + +// Converts the given source Unicode character type to the given destination +// Unicode character type as a STL string. The given input buffer and size +// determine the source, and the given output STL string will be replaced by +// the result. +template<typename SRC_CHAR, typename DEST_STRING> +bool ConvertUnicode(const SRC_CHAR* src, + size_t src_len, + DEST_STRING* output) { + // ICU requires 32-bit numbers. + bool success = true; + int32_t src_len32 = static_cast<int32_t>(src_len); + for (int32_t i = 0; i < src_len32; i++) { + uint32_t code_point; + if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { + WriteUnicodeCharacter(code_point, output); + } else { + WriteUnicodeCharacter(0xFFFD, output); + success = false; + } + } + + return success; +} + +} // namespace + +// UTF-8 <-> Wide -------------------------------------------------------------- + +bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { + if (IsStringASCII(std::wstring(src, src_len))) { + output->assign(src, src + src_len); + return true; + } else { + PrepareForUTF8Output(src, src_len, output); + return ConvertUnicode(src, src_len, output); + } +} + +std::string WideToUTF8(const std::wstring& wide) { + if (IsStringASCII(wide)) { + return std::string(wide.data(), wide.data() + wide.length()); + } + + std::string ret; + PrepareForUTF8Output(wide.data(), wide.length(), &ret); + ConvertUnicode(wide.data(), wide.length(), &ret); + return ret; +} + +bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { + if (IsStringASCII(StringPiece(src, src_len))) { + output->assign(src, src + src_len); + return true; + } else { + PrepareForUTF16Or32Output(src, src_len, output); + return ConvertUnicode(src, src_len, output); + } +} + +std::wstring UTF8ToWide(StringPiece utf8) { + if (IsStringASCII(utf8)) { + return std::wstring(utf8.begin(), utf8.end()); + } + + std::wstring ret; + PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); + ConvertUnicode(utf8.data(), utf8.length(), &ret); + return ret; +} + +// UTF-16 <-> Wide ------------------------------------------------------------- + +#if defined(WCHAR_T_IS_UTF16) + +// When wide == UTF-16, then conversions are a NOP. +bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { + output->assign(src, src_len); + return true; +} + +string16 WideToUTF16(const std::wstring& wide) { + return wide; +} + +bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { + output->assign(src, src_len); + return true; +} + +std::wstring UTF16ToWide(const string16& utf16) { + return utf16; +} + +#elif defined(WCHAR_T_IS_UTF32) + +bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { + output->clear(); + // Assume that normally we won't have any non-BMP characters so the counts + // will be the same. + output->reserve(src_len); + return ConvertUnicode(src, src_len, output); +} + +string16 WideToUTF16(const std::wstring& wide) { + string16 ret; + WideToUTF16(wide.data(), wide.length(), &ret); + return ret; +} + +bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { + output->clear(); + // Assume that normally we won't have any non-BMP characters so the counts + // will be the same. + output->reserve(src_len); + return ConvertUnicode(src, src_len, output); +} + +std::wstring UTF16ToWide(const string16& utf16) { + std::wstring ret; + UTF16ToWide(utf16.data(), utf16.length(), &ret); + return ret; +} + +#endif // defined(WCHAR_T_IS_UTF32) + +// UTF16 <-> UTF8 -------------------------------------------------------------- + +#if defined(WCHAR_T_IS_UTF32) + +bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { + if (IsStringASCII(StringPiece(src, src_len))) { + output->assign(src, src + src_len); + return true; + } else { + PrepareForUTF16Or32Output(src, src_len, output); + return ConvertUnicode(src, src_len, output); + } +} + +string16 UTF8ToUTF16(StringPiece utf8) { + if (IsStringASCII(utf8)) { + return string16(utf8.begin(), utf8.end()); + } + + string16 ret; + PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); + // Ignore the success flag of this call, it will do the best it can for + // invalid input, which is what we want here. + ConvertUnicode(utf8.data(), utf8.length(), &ret); + return ret; +} + +bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { + if (IsStringASCII(StringPiece16(src, src_len))) { + output->assign(src, src + src_len); + return true; + } else { + PrepareForUTF8Output(src, src_len, output); + return ConvertUnicode(src, src_len, output); + } +} + +std::string UTF16ToUTF8(StringPiece16 utf16) { + if (IsStringASCII(utf16)) { + return std::string(utf16.begin(), utf16.end()); + } + + std::string ret; + // Ignore the success flag of this call, it will do the best it can for + // invalid input, which is what we want here. + UTF16ToUTF8(utf16.data(), utf16.length(), &ret); + return ret; +} + +#elif defined(WCHAR_T_IS_UTF16) +// Easy case since we can use the "wide" versions we already wrote above. + +bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { + return UTF8ToWide(src, src_len, output); +} + +string16 UTF8ToUTF16(StringPiece utf8) { + return UTF8ToWide(utf8); +} + +bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { + return WideToUTF8(src, src_len, output); +} + +std::string UTF16ToUTF8(StringPiece16 utf16) { + if (IsStringASCII(utf16)) + return std::string(utf16.data(), utf16.data() + utf16.length()); + + std::string ret; + PrepareForUTF8Output(utf16.data(), utf16.length(), &ret); + ConvertUnicode(utf16.data(), utf16.length(), &ret); + return ret; +} + +#endif + +string16 ASCIIToUTF16(StringPiece ascii) { + DCHECK(IsStringASCII(ascii)) << ascii; + return string16(ascii.begin(), ascii.end()); +} + +std::string UTF16ToASCII(StringPiece16 utf16) { + DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16); + return std::string(utf16.begin(), utf16.end()); +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/utf_string_conversions.h b/security/sandbox/chromium/base/strings/utf_string_conversions.h new file mode 100644 index 000000000..2995f4cbc --- /dev/null +++ b/security/sandbox/chromium/base/strings/utf_string_conversions.h @@ -0,0 +1,54 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ +#define BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ + +#include <stddef.h> + +#include <string> + +#include "base/base_export.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" + +namespace base { + +// These convert between UTF-8, -16, and -32 strings. They are potentially slow, +// so avoid unnecessary conversions. The low-level versions return a boolean +// indicating whether the conversion was 100% valid. In this case, it will still +// do the best it can and put the result in the output buffer. The versions that +// return strings ignore this error and just return the best conversion +// possible. +BASE_EXPORT bool WideToUTF8(const wchar_t* src, size_t src_len, + std::string* output); +BASE_EXPORT std::string WideToUTF8(const std::wstring& wide); +BASE_EXPORT bool UTF8ToWide(const char* src, size_t src_len, + std::wstring* output); +BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8); + +BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len, + string16* output); +BASE_EXPORT string16 WideToUTF16(const std::wstring& wide); +BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len, + std::wstring* output); +BASE_EXPORT std::wstring UTF16ToWide(const string16& utf16); + +BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output); +BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8); +BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len, + std::string* output); +BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16); + +// This converts an ASCII string, typically a hardcoded constant, to a UTF16 +// string. +BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii); + +// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII +// beforehand. +BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16); + +} // namespace base + +#endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ |