summaryrefslogtreecommitdiffstats
path: root/xpcom/ds/nsCharSeparatedTokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'xpcom/ds/nsCharSeparatedTokenizer.h')
-rw-r--r--xpcom/ds/nsCharSeparatedTokenizer.h200
1 files changed, 200 insertions, 0 deletions
diff --git a/xpcom/ds/nsCharSeparatedTokenizer.h b/xpcom/ds/nsCharSeparatedTokenizer.h
new file mode 100644
index 000000000..0e24d9d3e
--- /dev/null
+++ b/xpcom/ds/nsCharSeparatedTokenizer.h
@@ -0,0 +1,200 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __nsCharSeparatedTokenizer_h
+#define __nsCharSeparatedTokenizer_h
+
+#include "mozilla/RangedPtr.h"
+
+#include "nsDependentSubstring.h"
+#include "nsCRT.h"
+
+/**
+ * This parses a SeparatorChar-separated string into tokens.
+ * Whitespace surrounding tokens is not treated as part of tokens, however
+ * whitespace inside a token is. If the final token is the empty string, it is
+ * not returned.
+ *
+ * Some examples, with SeparatorChar = ',':
+ *
+ * "foo, bar, baz" -> "foo" "bar" "baz"
+ * "foo,bar,baz" -> "foo" "bar" "baz"
+ * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
+ * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
+ * "foo,,bar,baz" -> "foo" "" "bar" "baz"
+ * "foo,bar,baz," -> "foo" "bar" "baz"
+ *
+ * The function used for whitespace detection is a template argument.
+ * By default, it is NS_IsAsciiWhitespace.
+ */
+template<typename DependentSubstringType, bool IsWhitespace(char16_t)>
+class nsTCharSeparatedTokenizer
+{
+ typedef typename DependentSubstringType::char_type CharType;
+ typedef typename DependentSubstringType::substring_type SubstringType;
+
+public:
+ // Flags -- only one for now. If we need more, they should be defined to
+ // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
+ enum
+ {
+ SEPARATOR_OPTIONAL = 1
+ };
+
+ nsTCharSeparatedTokenizer(const SubstringType& aSource,
+ CharType aSeparatorChar,
+ uint32_t aFlags = 0)
+ : mIter(aSource.Data(), aSource.Length())
+ , mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
+ aSource.Length())
+ , mSeparatorChar(aSeparatorChar)
+ , mWhitespaceBeforeFirstToken(false)
+ , mWhitespaceAfterCurrentToken(false)
+ , mSeparatorAfterCurrentToken(false)
+ , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
+ {
+ // Skip initial whitespace
+ while (mIter < mEnd && IsWhitespace(*mIter)) {
+ mWhitespaceBeforeFirstToken = true;
+ ++mIter;
+ }
+ }
+
+ /**
+ * Checks if any more tokens are available.
+ */
+ bool hasMoreTokens() const
+ {
+ MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
+ "Should be at beginning of token if there is one");
+
+ return mIter < mEnd;
+ }
+
+ /*
+ * Returns true if there is whitespace prior to the first token.
+ */
+ bool whitespaceBeforeFirstToken() const
+ {
+ return mWhitespaceBeforeFirstToken;
+ }
+
+ /*
+ * Returns true if there is a separator after the current token.
+ * Useful if you want to check whether the last token has a separator
+ * after it which may not be valid.
+ */
+ bool separatorAfterCurrentToken() const
+ {
+ return mSeparatorAfterCurrentToken;
+ }
+
+ /*
+ * Returns true if there is any whitespace after the current token.
+ */
+ bool whitespaceAfterCurrentToken() const
+ {
+ return mWhitespaceAfterCurrentToken;
+ }
+
+ /**
+ * Returns the next token.
+ */
+ const DependentSubstringType nextToken()
+ {
+ mozilla::RangedPtr<const CharType> tokenStart = mIter;
+ mozilla::RangedPtr<const CharType> tokenEnd = mIter;
+
+ MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
+ "Should be at beginning of token if there is one");
+
+ // Search until we hit separator or end (or whitespace, if a separator
+ // isn't required -- see clause with 'break' below).
+ while (mIter < mEnd && *mIter != mSeparatorChar) {
+ // Skip to end of the current word.
+ while (mIter < mEnd &&
+ !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
+ ++mIter;
+ }
+ tokenEnd = mIter;
+
+ // Skip whitespace after the current word.
+ mWhitespaceAfterCurrentToken = false;
+ while (mIter < mEnd && IsWhitespace(*mIter)) {
+ mWhitespaceAfterCurrentToken = true;
+ ++mIter;
+ }
+ if (mSeparatorOptional) {
+ // We've hit (and skipped) whitespace, and that's sufficient to end
+ // our token, regardless of whether we've reached a SeparatorChar.
+ break;
+ } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
+ }
+
+ mSeparatorAfterCurrentToken = (mIter != mEnd &&
+ *mIter == mSeparatorChar);
+ MOZ_ASSERT(mSeparatorOptional ||
+ (mSeparatorAfterCurrentToken == (mIter < mEnd)),
+ "If we require a separator and haven't hit the end of "
+ "our string, then we shouldn't have left the loop "
+ "unless we hit a separator");
+
+ // Skip separator (and any whitespace after it), if we're at one.
+ if (mSeparatorAfterCurrentToken) {
+ ++mIter;
+
+ while (mIter < mEnd && IsWhitespace(*mIter)) {
+ mWhitespaceAfterCurrentToken = true;
+ ++mIter;
+ }
+ }
+
+ return Substring(tokenStart.get(), tokenEnd.get());
+ }
+
+private:
+ mozilla::RangedPtr<const CharType> mIter;
+ const mozilla::RangedPtr<const CharType> mEnd;
+ CharType mSeparatorChar;
+ bool mWhitespaceBeforeFirstToken;
+ bool mWhitespaceAfterCurrentToken;
+ bool mSeparatorAfterCurrentToken;
+ bool mSeparatorOptional;
+};
+
+template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
+class nsCharSeparatedTokenizerTemplate
+ : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace>
+{
+public:
+ nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,
+ char16_t aSeparatorChar,
+ uint32_t aFlags = 0)
+ : nsTCharSeparatedTokenizer<nsDependentSubstring,
+ IsWhitespace>(aSource, aSeparatorChar, aFlags)
+ {
+ }
+};
+
+typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer;
+
+template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
+class nsCCharSeparatedTokenizerTemplate
+ : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace>
+{
+public:
+ nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource,
+ char aSeparatorChar,
+ uint32_t aFlags = 0)
+ : nsTCharSeparatedTokenizer<nsDependentCSubstring,
+ IsWhitespace>(aSource, aSeparatorChar, aFlags)
+ {
+ }
+};
+
+typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer;
+
+#endif /* __nsCharSeparatedTokenizer_h */