diff options
Diffstat (limited to 'ipc/chromium/src/base/string_util.cc')
-rw-r--r-- | ipc/chromium/src/base/string_util.cc | 780 |
1 files changed, 780 insertions, 0 deletions
diff --git a/ipc/chromium/src/base/string_util.cc b/ipc/chromium/src/base/string_util.cc new file mode 100644 index 000000000..339b12271 --- /dev/null +++ b/ipc/chromium/src/base/string_util.cc @@ -0,0 +1,780 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/string_util.h" + +#include "build/build_config.h" + +#include <ctype.h> +#include <errno.h> +#include <math.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <wchar.h> +#include <wctype.h> + +#include <algorithm> +#include <vector> + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/singleton.h" + +namespace { + +// Force the singleton used by Empty[W]String[16] to be a unique type. This +// prevents other code that might accidentally use Singleton<string> from +// getting our internal one. +struct EmptyStrings { + EmptyStrings() {} + const std::string s; + const std::wstring ws; + const string16 s16; +}; + +// Hack to convert any char-like type to its unsigned counterpart. +// For example, it will convert char, signed char and unsigned char to unsigned +// char. +template<typename T> +struct ToUnsigned { + typedef T Unsigned; +}; + +template<> +struct ToUnsigned<char> { + typedef unsigned char Unsigned; +}; +template<> +struct ToUnsigned<signed char> { + typedef unsigned char Unsigned; +}; +template<> +struct ToUnsigned<wchar_t> { +#if defined(WCHAR_T_IS_UTF16) + typedef unsigned short Unsigned; +#elif defined(WCHAR_T_IS_UTF32) + typedef uint32_t Unsigned; +#endif +}; +template<> +struct ToUnsigned<short> { + typedef unsigned short Unsigned; +}; + +// Generalized string-to-number conversion. +// +// StringToNumberTraits should provide: +// - a typedef for string_type, the STL string type used as input. +// - a typedef for value_type, the target numeric type. +// - a static function, convert_func, which dispatches to an appropriate +// strtol-like function and returns type value_type. +// - a static function, valid_func, which validates |input| and returns a bool +// indicating whether it is in proper form. This is used to check for +// conditions that convert_func tolerates but should result in +// StringToNumber returning false. For strtol-like funtions, valid_func +// should check for leading whitespace. +template<typename StringToNumberTraits> +bool StringToNumber(const typename StringToNumberTraits::string_type& input, + typename StringToNumberTraits::value_type* output) { + typedef StringToNumberTraits traits; + + errno = 0; // Thread-safe? It is on at least Mac, Linux, and Windows. + typename traits::string_type::value_type* endptr = NULL; + typename traits::value_type value = traits::convert_func(input.c_str(), + &endptr); + *output = value; + + // Cases to return false: + // - If errno is ERANGE, there was an overflow or underflow. + // - If the input string is empty, there was nothing to parse. + // - If endptr does not point to the end of the string, there are either + // characters remaining in the string after a parsed number, or the string + // does not begin with a parseable number. endptr is compared to the + // expected end given the string's stated length to correctly catch cases + // where the string contains embedded NUL characters. + // - valid_func determines that the input is not in preferred form. + return errno == 0 && + !input.empty() && + input.c_str() + input.length() == endptr && + traits::valid_func(input); +} + +class StringToLongTraits { + public: + typedef std::string string_type; + typedef long value_type; + static const int kBase = 10; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { + return strtol(str, endptr, kBase); + } + static inline bool valid_func(const string_type& str) { + return !str.empty() && !isspace(str[0]); + } +}; + +class String16ToLongTraits { + public: + typedef string16 string_type; + typedef long value_type; + static const int kBase = 10; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { +#if defined(WCHAR_T_IS_UTF16) + return wcstol(str, endptr, kBase); +#elif defined(WCHAR_T_IS_UTF32) + std::string ascii_string = UTF16ToASCII(string16(str)); + char* ascii_end = NULL; + value_type ret = strtol(ascii_string.c_str(), &ascii_end, kBase); + if (ascii_string.c_str() + ascii_string.length() == ascii_end) { + *endptr = + const_cast<string_type::value_type*>(str) + ascii_string.length(); + } + return ret; +#endif + } + static inline bool valid_func(const string_type& str) { + return !str.empty() && !iswspace(str[0]); + } +}; + +class StringToInt64Traits { + public: + typedef std::string string_type; + typedef int64_t value_type; + static const int kBase = 10; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { +#ifdef OS_WIN + return _strtoi64(str, endptr, kBase); +#else // assume OS_POSIX + return strtoll(str, endptr, kBase); +#endif + } + static inline bool valid_func(const string_type& str) { + return !str.empty() && !isspace(str[0]); + } +}; + +class String16ToInt64Traits { + public: + typedef string16 string_type; + typedef int64_t value_type; + static const int kBase = 10; + static inline value_type convert_func(const string_type::value_type* str, + string_type::value_type** endptr) { +#ifdef OS_WIN + return _wcstoi64(str, endptr, kBase); +#else // assume OS_POSIX + std::string ascii_string = UTF16ToASCII(string16(str)); + char* ascii_end = NULL; + value_type ret = strtoll(ascii_string.c_str(), &ascii_end, kBase); + if (ascii_string.c_str() + ascii_string.length() == ascii_end) { + *endptr = + const_cast<string_type::value_type*>(str) + ascii_string.length(); + } + return ret; +#endif + } + static inline bool valid_func(const string_type& str) { + return !str.empty() && !iswspace(str[0]); + } +}; + +} // namespace + + +namespace base { + +bool IsWprintfFormatPortable(const wchar_t* format) { + for (const wchar_t* position = format; *position != '\0'; ++position) { + + if (*position == '%') { + bool in_specification = true; + bool modifier_l = false; + while (in_specification) { + // Eat up characters until reaching a known specifier. + if (*++position == '\0') { + // The format string ended in the middle of a specification. Call + // it portable because no unportable specifications were found. The + // string is equally broken on all platforms. + return true; + } + + if (*position == 'l') { + // 'l' is the only thing that can save the 's' and 'c' specifiers. + modifier_l = true; + } else if (((*position == 's' || *position == 'c') && !modifier_l) || + *position == 'S' || *position == 'C' || *position == 'F' || + *position == 'D' || *position == 'O' || *position == 'U') { + // Not portable. + return false; + } + + if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { + // Portable, keep scanning the rest of the format string. + in_specification = false; + } + } + } + + } + + return true; +} + + +} // namespace base + +static const wchar_t kWhitespaceWide[] = { + 0x0009, // <control-0009> to <control-000D> + 0x000A, + 0x000B, + 0x000C, + 0x000D, + 0x0020, // Space + 0x0085, // <control-0085> + 0x00A0, // No-Break Space + 0x1680, // Ogham Space Mark + 0x180E, // Mongolian Vowel Separator + 0x2000, // En Quad to Hair Space + 0x2001, + 0x2002, + 0x2003, + 0x2004, + 0x2005, + 0x2006, + 0x2007, + 0x2008, + 0x2009, + 0x200A, + 0x200C, // Zero Width Non-Joiner + 0x2028, // Line Separator + 0x2029, // Paragraph Separator + 0x202F, // Narrow No-Break Space + 0x205F, // Medium Mathematical Space + 0x3000, // Ideographic Space + 0 +}; +static const char kWhitespaceASCII[] = { + 0x09, // <control-0009> to <control-000D> + 0x0A, + 0x0B, + 0x0C, + 0x0D, + 0x20, // Space + 0 +}; + +template<typename STR> +TrimPositions TrimStringT(const STR& input, + const typename STR::value_type trim_chars[], + TrimPositions positions, + STR* output) { + // Find the edges of leading/trailing whitespace as desired. + const typename STR::size_type last_char = input.length() - 1; + const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? + input.find_first_not_of(trim_chars) : 0; + const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? + input.find_last_not_of(trim_chars) : last_char; + + // When the string was all whitespace, report that we stripped off whitespace + // from whichever position the caller was interested in. For empty input, we + // stripped no whitespace, but we still need to clear |output|. + if (input.empty() || + (first_good_char == STR::npos) || (last_good_char == STR::npos)) { + bool input_was_empty = input.empty(); // in case output == &input + output->clear(); + return input_was_empty ? TRIM_NONE : positions; + } + + // Trim the whitespace. + *output = + input.substr(first_good_char, last_good_char - first_good_char + 1); + + // Return where we trimmed from. + return static_cast<TrimPositions>( + ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | + ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); +} + +TrimPositions TrimWhitespace(const std::wstring& input, + TrimPositions positions, + std::wstring* output) { + return TrimStringT(input, kWhitespaceWide, positions, output); +} + +TrimPositions TrimWhitespaceASCII(const std::string& input, + TrimPositions positions, + std::string* output) { + return TrimStringT(input, kWhitespaceASCII, positions, output); +} + +// This function is only for backward-compatibility. +// To be removed when all callers are updated. +TrimPositions TrimWhitespace(const std::string& input, + TrimPositions positions, + std::string* output) { + return TrimWhitespaceASCII(input, positions, output); +} + +std::string WideToASCII(const std::wstring& wide) { + DCHECK(IsStringASCII(wide)); + return std::string(wide.begin(), wide.end()); +} + +std::wstring ASCIIToWide(const std::string& ascii) { + DCHECK(IsStringASCII(ascii)); + return std::wstring(ascii.begin(), ascii.end()); +} + +std::string UTF16ToASCII(const string16& utf16) { + DCHECK(IsStringASCII(utf16)); + return std::string(utf16.begin(), utf16.end()); +} + +string16 ASCIIToUTF16(const std::string& ascii) { + DCHECK(IsStringASCII(ascii)); + return string16(ascii.begin(), ascii.end()); +} + +template<class STR> +static bool DoIsStringASCII(const STR& str) { + for (size_t i = 0; i < str.length(); i++) { + typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; + if (c > 0x7F) + return false; + } + return true; +} + +bool IsStringASCII(const std::wstring& str) { + return DoIsStringASCII(str); +} + +#if !defined(WCHAR_T_IS_UTF16) +bool IsStringASCII(const string16& str) { + return DoIsStringASCII(str); +} +#endif + +bool IsStringASCII(const std::string& str) { + return DoIsStringASCII(str); +} + +// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter +// is the size of the buffer. These return the number of characters in the +// formatted string excluding the NUL terminator. If the buffer is not +// large enough to accommodate the formatted string without truncation, they +// return the number of characters that would be in the fully-formatted string +// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms). +inline int vsnprintfT(char* buffer, + size_t buf_size, + const char* format, + va_list argptr) { + return base::vsnprintf(buffer, buf_size, format, argptr); +} + +inline int vsnprintfT(wchar_t* buffer, + size_t buf_size, + const wchar_t* format, + va_list argptr) { + return base::vswprintf(buffer, buf_size, format, argptr); +} + +// Templatized backend for StringPrintF/StringAppendF. This does not finalize +// the va_list, the caller is expected to do that. +template <class StringType> +static void StringAppendVT(StringType* dst, + const typename StringType::value_type* format, + va_list ap) { + // First try with a small fixed size buffer. + // This buffer size should be kept in sync with StringUtilTest.GrowBoundary + // and StringUtilTest.StringPrintfBounds. + typename StringType::value_type stack_buf[1024]; + + va_list backup_ap; + base_va_copy(backup_ap, ap); + +#if !defined(OS_WIN) + errno = 0; +#endif + int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, backup_ap); + va_end(backup_ap); + + if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) { + // It fit. + dst->append(stack_buf, result); + return; + } + + // Repeatedly increase buffer size until it fits. + int mem_length = arraysize(stack_buf); + while (true) { + if (result < 0) { +#if !defined(OS_WIN) + // On Windows, vsnprintfT always returns the number of characters in a + // fully-formatted string, so if we reach this point, something else is + // wrong and no amount of buffer-doubling is going to fix it. + if (errno != 0 && errno != EOVERFLOW) +#endif + { + // If an error other than overflow occurred, it's never going to work. + DLOG(WARNING) << "Unable to printf the requested string due to error."; + return; + } + // Try doubling the buffer size. + mem_length *= 2; + } else { + // We need exactly "result + 1" characters. + mem_length = result + 1; + } + + if (mem_length > 32 * 1024 * 1024) { + // That should be plenty, don't try anything larger. This protects + // against huge allocations when using vsnprintfT implementations that + // return -1 for reasons other than overflow without setting errno. + DLOG(WARNING) << "Unable to printf the requested string due to size."; + return; + } + + std::vector<typename StringType::value_type> mem_buf(mem_length); + + // Restore the va_list before we use it again. + base_va_copy(backup_ap, ap); + + result = vsnprintfT(&mem_buf[0], mem_length, format, ap); + va_end(backup_ap); + + if ((result >= 0) && (result < mem_length)) { + // It fit. + dst->append(&mem_buf[0], result); + return; + } + } +} + +namespace { + +template <typename STR, typename INT, typename UINT, bool NEG> +struct IntToStringT { + + // This is to avoid a compiler warning about unary minus on unsigned type. + // For example, say you had the following code: + // template <typename INT> + // INT abs(INT value) { return value < 0 ? -value : value; } + // Even though if INT is unsigned, it's impossible for value < 0, so the + // unary minus will never be taken, the compiler will still generate a + // warning. We do a little specialization dance... + template <typename INT2, typename UINT2, bool NEG2> + struct ToUnsignedT { }; + + template <typename INT2, typename UINT2> + struct ToUnsignedT<INT2, UINT2, false> { + static UINT2 ToUnsigned(INT2 value) { + return static_cast<UINT2>(value); + } + }; + + template <typename INT2, typename UINT2> + struct ToUnsignedT<INT2, UINT2, true> { + static UINT2 ToUnsigned(INT2 value) { + return static_cast<UINT2>(value < 0 ? -value : value); + } + }; + + // This set of templates is very similar to the above templates, but + // for testing whether an integer is negative. + template <typename INT2, bool NEG2> + struct TestNegT {}; + template <typename INT2> + struct TestNegT<INT2, false> { + static bool TestNeg(INT2 value) { + // value is unsigned, and can never be negative. + return false; + } + }; + template <typename INT2> + struct TestNegT<INT2, true> { + static bool TestNeg(INT2 value) { + return value < 0; + } + }; + + static STR IntToString(INT value) { + // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. + // So round up to allocate 3 output characters per byte, plus 1 for '-'. + const int kOutputBufSize = 3 * sizeof(INT) + 1; + + // Allocate the whole string right away, we will right back to front, and + // then return the substr of what we ended up using. + STR outbuf(kOutputBufSize, 0); + + bool is_neg = TestNegT<INT, NEG>::TestNeg(value); + // Even though is_neg will never be true when INT is parameterized as + // unsigned, even the presence of the unary operation causes a warning. + UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value); + + for (typename STR::iterator it = outbuf.end();;) { + --it; + DCHECK(it != outbuf.begin()); + *it = static_cast<typename STR::value_type>((res % 10) + '0'); + res /= 10; + + // We're done.. + if (res == 0) { + if (is_neg) { + --it; + DCHECK(it != outbuf.begin()); + *it = static_cast<typename STR::value_type>('-'); + } + return STR(it, outbuf.end()); + } + } + NOTREACHED(); + return STR(); + } +}; + +} + +std::string IntToString(int value) { + return IntToStringT<std::string, int, unsigned int, true>:: + IntToString(value); +} +std::wstring IntToWString(int value) { + return IntToStringT<std::wstring, int, unsigned int, true>:: + IntToString(value); +} +std::string UintToString(unsigned int value) { + return IntToStringT<std::string, unsigned int, unsigned int, false>:: + IntToString(value); +} +std::wstring UintToWString(unsigned int value) { + return IntToStringT<std::wstring, unsigned int, unsigned int, false>:: + IntToString(value); +} +std::string Int64ToString(int64_t value) { + return IntToStringT<std::string, int64_t, uint64_t, true>:: + IntToString(value); +} +std::wstring Int64ToWString(int64_t value) { + return IntToStringT<std::wstring, int64_t, uint64_t, true>:: + IntToString(value); +} +std::string Uint64ToString(uint64_t value) { + return IntToStringT<std::string, uint64_t, uint64_t, false>:: + IntToString(value); +} +std::wstring Uint64ToWString(uint64_t value) { + return IntToStringT<std::wstring, uint64_t, uint64_t, false>:: + IntToString(value); +} + +// Lower-level routine that takes a va_list and appends to a specified +// string. All other routines are just convenience wrappers around it. +static void StringAppendV(std::string* dst, const char* format, va_list ap) { + StringAppendVT(dst, format, ap); +} + +static void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) { + StringAppendVT(dst, format, ap); +} + +std::string StringPrintf(const char* format, ...) { + va_list ap; + va_start(ap, format); + std::string result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} + +std::wstring StringPrintf(const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + std::wstring result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} + +const std::string& SStringPrintf(std::string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; +} + +const std::wstring& SStringPrintf(std::wstring* dst, + const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; +} + +void StringAppendF(std::string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} + +void StringAppendF(std::wstring* dst, const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} + +template<typename STR> +static void SplitStringT(const STR& str, + const typename STR::value_type s, + bool trim_whitespace, + std::vector<STR>* r) { + size_t last = 0; + size_t i; + size_t c = str.size(); + for (i = 0; i <= c; ++i) { + if (i == c || str[i] == s) { + size_t len = i - last; + STR tmp = str.substr(last, len); + if (trim_whitespace) { + STR t_tmp; + TrimWhitespace(tmp, TRIM_ALL, &t_tmp); + r->push_back(t_tmp); + } else { + r->push_back(tmp); + } + last = i + 1; + } + } +} + +void SplitString(const std::wstring& str, + wchar_t s, + std::vector<std::wstring>* r) { + SplitStringT(str, s, true, r); +} + +void SplitString(const std::string& str, + char s, + std::vector<std::string>* r) { + SplitStringT(str, s, true, r); +} + +// For the various *ToInt conversions, there are no *ToIntTraits classes to use +// because there's no such thing as strtoi. Use *ToLongTraits through a cast +// instead, requiring that long and int are compatible and equal-width. They +// are on our target platforms. + +// XXX Sigh. + +#if !defined(ARCH_CPU_64_BITS) +bool StringToInt(const std::string& input, int* output) { + COMPILE_ASSERT(sizeof(int) == sizeof(long), cannot_strtol_to_int); + return StringToNumber<StringToLongTraits>(input, + reinterpret_cast<long*>(output)); +} + +bool StringToInt(const string16& input, int* output) { + COMPILE_ASSERT(sizeof(int) == sizeof(long), cannot_wcstol_to_int); + return StringToNumber<String16ToLongTraits>(input, + reinterpret_cast<long*>(output)); +} + +#else +bool StringToInt(const std::string& input, int* output) { + long tmp; + bool ok = StringToNumber<StringToLongTraits>(input, &tmp); + if (!ok || tmp > kint32max) { + return false; + } + *output = static_cast<int>(tmp); + return true; +} + +bool StringToInt(const string16& input, int* output) { + long tmp; + bool ok = StringToNumber<String16ToLongTraits>(input, &tmp); + if (!ok || tmp > kint32max) { + return false; + } + *output = static_cast<int>(tmp); + return true; +} +#endif // !defined(ARCH_CPU_64_BITS) + +bool StringToInt64(const std::string& input, int64_t* output) { + return StringToNumber<StringToInt64Traits>(input, output); +} + +bool StringToInt64(const string16& input, int64_t* output) { + return StringToNumber<String16ToInt64Traits>(input, output); +} + +int StringToInt(const std::string& value) { + int result; + StringToInt(value, &result); + return result; +} + +int StringToInt(const string16& value) { + int result; + StringToInt(value, &result); + return result; +} + +int64_t StringToInt64(const std::string& value) { + int64_t result; + StringToInt64(value, &result); + return result; +} + +int64_t StringToInt64(const string16& value) { + int64_t result; + StringToInt64(value, &result); + return result; +} + +// The following code is compatible with the OpenBSD lcpy interface. See: +// http://www.gratisoft.us/todd/papers/strlcpy.html +// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c + +namespace { + +template <typename CHAR> +size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { + for (size_t i = 0; i < dst_size; ++i) { + if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. + return i; + } + + // We were left off at dst_size. We over copied 1 byte. Null terminate. + if (dst_size != 0) + dst[dst_size - 1] = 0; + + // Count the rest of the |src|, and return it's length in characters. + while (src[dst_size]) ++dst_size; + return dst_size; +} + +} // namespace + +size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { + return lcpyT<char>(dst, src, dst_size); +} +size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { + return lcpyT<wchar_t>(dst, src, dst_size); +} |