diff options
Diffstat (limited to 'xpcom/ds/nsCharSeparatedTokenizer.h')
-rw-r--r-- | xpcom/ds/nsCharSeparatedTokenizer.h | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/xpcom/ds/nsCharSeparatedTokenizer.h b/xpcom/ds/nsCharSeparatedTokenizer.h new file mode 100644 index 000000000..0e24d9d3e --- /dev/null +++ b/xpcom/ds/nsCharSeparatedTokenizer.h @@ -0,0 +1,200 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __nsCharSeparatedTokenizer_h +#define __nsCharSeparatedTokenizer_h + +#include "mozilla/RangedPtr.h" + +#include "nsDependentSubstring.h" +#include "nsCRT.h" + +/** + * This parses a SeparatorChar-separated string into tokens. + * Whitespace surrounding tokens is not treated as part of tokens, however + * whitespace inside a token is. If the final token is the empty string, it is + * not returned. + * + * Some examples, with SeparatorChar = ',': + * + * "foo, bar, baz" -> "foo" "bar" "baz" + * "foo,bar,baz" -> "foo" "bar" "baz" + * "foo , bar hi , baz" -> "foo" "bar hi" "baz" + * "foo, ,bar,baz" -> "foo" "" "bar" "baz" + * "foo,,bar,baz" -> "foo" "" "bar" "baz" + * "foo,bar,baz," -> "foo" "bar" "baz" + * + * The function used for whitespace detection is a template argument. + * By default, it is NS_IsAsciiWhitespace. + */ +template<typename DependentSubstringType, bool IsWhitespace(char16_t)> +class nsTCharSeparatedTokenizer +{ + typedef typename DependentSubstringType::char_type CharType; + typedef typename DependentSubstringType::substring_type SubstringType; + +public: + // Flags -- only one for now. If we need more, they should be defined to + // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) + enum + { + SEPARATOR_OPTIONAL = 1 + }; + + nsTCharSeparatedTokenizer(const SubstringType& aSource, + CharType aSeparatorChar, + uint32_t aFlags = 0) + : mIter(aSource.Data(), aSource.Length()) + , mEnd(aSource.Data() + aSource.Length(), aSource.Data(), + aSource.Length()) + , mSeparatorChar(aSeparatorChar) + , mWhitespaceBeforeFirstToken(false) + , mWhitespaceAfterCurrentToken(false) + , mSeparatorAfterCurrentToken(false) + , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) + { + // Skip initial whitespace + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceBeforeFirstToken = true; + ++mIter; + } + } + + /** + * Checks if any more tokens are available. + */ + bool hasMoreTokens() const + { + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), + "Should be at beginning of token if there is one"); + + return mIter < mEnd; + } + + /* + * Returns true if there is whitespace prior to the first token. + */ + bool whitespaceBeforeFirstToken() const + { + return mWhitespaceBeforeFirstToken; + } + + /* + * Returns true if there is a separator after the current token. + * Useful if you want to check whether the last token has a separator + * after it which may not be valid. + */ + bool separatorAfterCurrentToken() const + { + return mSeparatorAfterCurrentToken; + } + + /* + * Returns true if there is any whitespace after the current token. + */ + bool whitespaceAfterCurrentToken() const + { + return mWhitespaceAfterCurrentToken; + } + + /** + * Returns the next token. + */ + const DependentSubstringType nextToken() + { + mozilla::RangedPtr<const CharType> tokenStart = mIter; + mozilla::RangedPtr<const CharType> tokenEnd = mIter; + + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), + "Should be at beginning of token if there is one"); + + // Search until we hit separator or end (or whitespace, if a separator + // isn't required -- see clause with 'break' below). + while (mIter < mEnd && *mIter != mSeparatorChar) { + // Skip to end of the current word. + while (mIter < mEnd && + !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { + ++mIter; + } + tokenEnd = mIter; + + // Skip whitespace after the current word. + mWhitespaceAfterCurrentToken = false; + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceAfterCurrentToken = true; + ++mIter; + } + if (mSeparatorOptional) { + // We've hit (and skipped) whitespace, and that's sufficient to end + // our token, regardless of whether we've reached a SeparatorChar. + break; + } // (else, we'll keep looping until we hit mEnd or SeparatorChar) + } + + mSeparatorAfterCurrentToken = (mIter != mEnd && + *mIter == mSeparatorChar); + MOZ_ASSERT(mSeparatorOptional || + (mSeparatorAfterCurrentToken == (mIter < mEnd)), + "If we require a separator and haven't hit the end of " + "our string, then we shouldn't have left the loop " + "unless we hit a separator"); + + // Skip separator (and any whitespace after it), if we're at one. + if (mSeparatorAfterCurrentToken) { + ++mIter; + + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceAfterCurrentToken = true; + ++mIter; + } + } + + return Substring(tokenStart.get(), tokenEnd.get()); + } + +private: + mozilla::RangedPtr<const CharType> mIter; + const mozilla::RangedPtr<const CharType> mEnd; + CharType mSeparatorChar; + bool mWhitespaceBeforeFirstToken; + bool mWhitespaceAfterCurrentToken; + bool mSeparatorAfterCurrentToken; + bool mSeparatorOptional; +}; + +template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> +class nsCharSeparatedTokenizerTemplate + : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace> +{ +public: + nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource, + char16_t aSeparatorChar, + uint32_t aFlags = 0) + : nsTCharSeparatedTokenizer<nsDependentSubstring, + IsWhitespace>(aSource, aSeparatorChar, aFlags) + { + } +}; + +typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer; + +template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> +class nsCCharSeparatedTokenizerTemplate + : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace> +{ +public: + nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource, + char aSeparatorChar, + uint32_t aFlags = 0) + : nsTCharSeparatedTokenizer<nsDependentCSubstring, + IsWhitespace>(aSource, aSeparatorChar, aFlags) + { + } +}; + +typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer; + +#endif /* __nsCharSeparatedTokenizer_h */ |