summaryrefslogtreecommitdiffstats
path: root/xpcom/tests/gtest/TestTokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'xpcom/tests/gtest/TestTokenizer.cpp')
-rw-r--r--xpcom/tests/gtest/TestTokenizer.cpp1134
1 files changed, 1134 insertions, 0 deletions
diff --git a/xpcom/tests/gtest/TestTokenizer.cpp b/xpcom/tests/gtest/TestTokenizer.cpp
new file mode 100644
index 000000000..283bbd3b8
--- /dev/null
+++ b/xpcom/tests/gtest/TestTokenizer.cpp
@@ -0,0 +1,1134 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Tokenizer.h"
+#include "mozilla/IncrementalTokenizer.h"
+#include "mozilla/Unused.h"
+#include "gtest/gtest.h"
+
+using namespace mozilla;
+
+static bool IsOperator(char const c)
+{
+ return c == '+' || c == '*';
+}
+
+static bool HttpHeaderCharacter(char const c)
+{
+ return (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_') ||
+ (c == '-');
+}
+
+TEST(Tokenizer, HTTPResponse)
+{
+ Tokenizer::Token t;
+
+ // Real life test, HTTP response
+
+ Tokenizer p(NS_LITERAL_CSTRING(
+ "HTTP/1.0 304 Not modified\r\n"
+ "ETag: hallo\r\n"
+ "Content-Length: 16\r\n"
+ "\r\n"
+ "This is the body"));
+
+ EXPECT_TRUE(p.CheckWord("HTTP"));
+ EXPECT_TRUE(p.CheckChar('/'));
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
+ EXPECT_TRUE(t.AsInteger() == 1);
+ EXPECT_TRUE(p.CheckChar('.'));
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
+ EXPECT_TRUE(t.AsInteger() == 0);
+ p.SkipWhites();
+
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
+ EXPECT_TRUE(t.AsInteger() == 304);
+ p.SkipWhites();
+
+ p.Record();
+ while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOL);
+ EXPECT_FALSE(p.HasFailed());
+ nsAutoCString h;
+ p.Claim(h);
+ EXPECT_TRUE(h == "Not modified");
+
+ p.Record();
+ while (p.CheckChar(HttpHeaderCharacter));
+ p.Claim(h, Tokenizer::INCLUDE_LAST);
+ EXPECT_TRUE(h == "ETag");
+ p.SkipWhites();
+ EXPECT_TRUE(p.CheckChar(':'));
+ p.SkipWhites();
+ p.Record();
+ while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOL);
+ EXPECT_FALSE(p.HasFailed());
+ p.Claim(h);
+ EXPECT_TRUE(h == "hallo");
+
+ p.Record();
+ while (p.CheckChar(HttpHeaderCharacter));
+ p.Claim(h, Tokenizer::INCLUDE_LAST);
+ EXPECT_TRUE(h == "Content-Length");
+ p.SkipWhites();
+ EXPECT_TRUE(p.CheckChar(':'));
+ p.SkipWhites();
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t));
+ EXPECT_TRUE(t.AsInteger() == 16);
+ EXPECT_TRUE(p.CheckEOL());
+
+ EXPECT_TRUE(p.CheckEOL());
+
+ p.Record();
+ while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOF);
+ nsAutoCString b;
+ p.Claim(b);
+ EXPECT_TRUE(b == "This is the body");
+}
+
+TEST(Tokenizer, Main)
+{
+ Tokenizer::Token t;
+
+ // Synthetic code-specific test
+
+ Tokenizer p(NS_LITERAL_CSTRING("test123 ,15 \t*\r\n%xx,-15\r\r"));
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_WORD);
+ EXPECT_TRUE(t.AsString() == "test123");
+
+ Tokenizer::Token u;
+ EXPECT_FALSE(p.Check(u));
+
+ EXPECT_FALSE(p.CheckChar('!'));
+
+ EXPECT_FALSE(p.Check(Tokenizer::Token::Number(123)));
+
+ EXPECT_TRUE(p.CheckWhite());
+
+ EXPECT_TRUE(p.CheckChar(','));
+
+ EXPECT_TRUE(p.Check(Tokenizer::Token::Number(15)));
+
+ p.Rollback();
+ EXPECT_TRUE(p.Check(Tokenizer::Token::Number(15)));
+
+ p.Rollback();
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
+ EXPECT_TRUE(t.AsInteger() == 15);
+
+ EXPECT_FALSE(p.CheckChar(IsOperator));
+
+ EXPECT_TRUE(p.CheckWhite());
+
+ p.SkipWhites();
+
+ EXPECT_FALSE(p.CheckWhite());
+
+ p.Rollback();
+
+ EXPECT_TRUE(p.CheckWhite());
+ EXPECT_TRUE(p.CheckWhite());
+
+ p.Record(Tokenizer::EXCLUDE_LAST);
+
+ EXPECT_TRUE(p.CheckChar(IsOperator));
+
+ p.Rollback();
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR);
+ EXPECT_TRUE(t.AsChar() == '*');
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL);
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR);
+ EXPECT_TRUE(t.AsChar() == '%');
+
+ nsAutoCString claim;
+ p.Claim(claim, Tokenizer::EXCLUDE_LAST);
+ EXPECT_TRUE(claim == "*\r\n");
+ p.Claim(claim, Tokenizer::INCLUDE_LAST);
+ EXPECT_TRUE(claim == "*\r\n%");
+
+ p.Rollback();
+ EXPECT_TRUE(p.CheckChar('%'));
+
+ p.Record(Tokenizer::INCLUDE_LAST);
+
+ EXPECT_FALSE(p.CheckWord("xy"));
+
+ EXPECT_TRUE(p.CheckWord("xx"));
+
+
+ p.Claim(claim, Tokenizer::INCLUDE_LAST);
+ EXPECT_TRUE(claim == "%xx");
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR);
+ EXPECT_TRUE(t.AsChar() == ',');
+
+ EXPECT_TRUE(p.CheckChar('-'));
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
+ EXPECT_TRUE(t.AsInteger() == 15);
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL);
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL);
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOF);
+
+ EXPECT_FALSE(p.Next(t));
+
+ p.Rollback();
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOF);
+
+ EXPECT_FALSE(p.Next(t));
+
+ p.Rollback();
+ EXPECT_TRUE(p.CheckEOF());
+
+ EXPECT_FALSE(p.CheckEOF());
+}
+
+TEST(Tokenizer, SingleWord)
+{
+ // Single word with numbers in it test
+
+ Tokenizer p(NS_LITERAL_CSTRING("test123"));
+
+ EXPECT_TRUE(p.CheckWord("test123"));
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, EndingAfterNumber)
+{
+ // An end handling after a number
+
+ Tokenizer p(NS_LITERAL_CSTRING("123"));
+
+ EXPECT_FALSE(p.CheckWord("123"));
+ EXPECT_TRUE(p.Check(Tokenizer::Token::Number(123)));
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, BadInteger)
+{
+ Tokenizer::Token t;
+
+ // A bad integer test
+
+ Tokenizer p(NS_LITERAL_CSTRING("189234891274981758617846178651647620587135"));
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_ERROR);
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, CheckExpectedTokenValue)
+{
+ Tokenizer::Token t;
+
+ // Check expected token value test
+
+ Tokenizer p(NS_LITERAL_CSTRING("blue velvet"));
+
+ EXPECT_FALSE(p.Check(Tokenizer::TOKEN_INTEGER, t));
+
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t));
+ EXPECT_TRUE(t.AsString() == "blue");
+
+ EXPECT_FALSE(p.Check(Tokenizer::TOKEN_WORD, t));
+
+ EXPECT_TRUE(p.CheckWhite());
+
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t));
+ EXPECT_TRUE(t.AsString() == "velvet");
+
+ EXPECT_TRUE(p.CheckEOF());
+
+ EXPECT_FALSE(p.Next(t));
+}
+
+TEST(Tokenizer, HasFailed)
+{
+ Tokenizer::Token t;
+
+ // HasFailed test
+
+ Tokenizer p1(NS_LITERAL_CSTRING("a b"));
+
+ while (p1.Next(t) && t.Type() != Tokenizer::TOKEN_CHAR);
+ EXPECT_TRUE(p1.HasFailed());
+
+
+ Tokenizer p2(NS_LITERAL_CSTRING("a b ?!c"));
+
+ EXPECT_FALSE(p2.CheckChar('c'));
+ EXPECT_TRUE(p2.HasFailed());
+ EXPECT_TRUE(p2.CheckChar(HttpHeaderCharacter));
+ EXPECT_FALSE(p2.HasFailed());
+ p2.SkipWhites();
+ EXPECT_FALSE(p2.HasFailed());
+ EXPECT_FALSE(p2.CheckChar('c'));
+ EXPECT_TRUE(p2.HasFailed());
+ EXPECT_TRUE(p2.Next(t));
+ EXPECT_FALSE(p2.HasFailed());
+ EXPECT_TRUE(p2.Next(t));
+ EXPECT_FALSE(p2.HasFailed());
+ EXPECT_FALSE(p2.CheckChar('c'));
+ EXPECT_TRUE(p2.HasFailed());
+ EXPECT_TRUE(p2.Check(Tokenizer::TOKEN_CHAR, t));
+ EXPECT_FALSE(p2.HasFailed());
+ EXPECT_FALSE(p2.CheckChar('#'));
+ EXPECT_TRUE(p2.HasFailed());
+ t = Tokenizer::Token::Char('!');
+ EXPECT_TRUE(p2.Check(t));
+ EXPECT_FALSE(p2.HasFailed());
+
+ while (p2.Next(t) && t.Type() != Tokenizer::TOKEN_CHAR);
+ EXPECT_TRUE(p2.HasFailed());
+}
+
+TEST(Tokenizer, Construction)
+{
+ {
+ nsCString a("test");
+ Tokenizer p1(a);
+ EXPECT_TRUE(p1.CheckWord("test"));
+ EXPECT_TRUE(p1.CheckEOF());
+ }
+
+ {
+ nsAutoCString a("test");
+ Tokenizer p1(a);
+ EXPECT_TRUE(p1.CheckWord("test"));
+ EXPECT_TRUE(p1.CheckEOF());
+ }
+
+ {
+ static const char _a[] = "test";
+ nsDependentCString a(_a);
+ Tokenizer p1(a);
+ EXPECT_TRUE(p1.CheckWord("test"));
+ EXPECT_TRUE(p1.CheckEOF());
+ }
+
+ {
+ static const char* _a = "test";
+ nsDependentCString a(_a);
+ Tokenizer p1(a);
+ EXPECT_TRUE(p1.CheckWord("test"));
+ EXPECT_TRUE(p1.CheckEOF());
+ }
+
+ {
+ Tokenizer p1(nsDependentCString("test"));
+ EXPECT_TRUE(p1.CheckWord("test"));
+ EXPECT_TRUE(p1.CheckEOF());
+ }
+
+ {
+ Tokenizer p1(NS_LITERAL_CSTRING("test"));
+ EXPECT_TRUE(p1.CheckWord("test"));
+ EXPECT_TRUE(p1.CheckEOF());
+ }
+
+ {
+ Tokenizer p1("test");
+ EXPECT_TRUE(p1.CheckWord("test"));
+ EXPECT_TRUE(p1.CheckEOF());
+ }
+}
+
+TEST(Tokenizer, Customization)
+{
+ Tokenizer p1(NS_LITERAL_CSTRING("test-custom*words and\tdefault-whites"), nullptr, "-*");
+ EXPECT_TRUE(p1.CheckWord("test-custom*words"));
+ EXPECT_TRUE(p1.CheckWhite());
+ EXPECT_TRUE(p1.CheckWord("and"));
+ EXPECT_TRUE(p1.CheckWhite());
+ EXPECT_TRUE(p1.CheckWord("default-whites"));
+
+ Tokenizer p2(NS_LITERAL_CSTRING("test, custom,whites"), ", ");
+ EXPECT_TRUE(p2.CheckWord("test"));
+ EXPECT_TRUE(p2.CheckWhite());
+ EXPECT_TRUE(p2.CheckWhite());
+ EXPECT_TRUE(p2.CheckWord("custom"));
+ EXPECT_TRUE(p2.CheckWhite());
+ EXPECT_TRUE(p2.CheckWord("whites"));
+
+ Tokenizer p3(NS_LITERAL_CSTRING("test, custom, whites-and#word-chars"), ",", "-#");
+ EXPECT_TRUE(p3.CheckWord("test"));
+ EXPECT_TRUE(p3.CheckWhite());
+ EXPECT_FALSE(p3.CheckWhite());
+ EXPECT_TRUE(p3.CheckChar(' '));
+ EXPECT_TRUE(p3.CheckWord("custom"));
+ EXPECT_TRUE(p3.CheckWhite());
+ EXPECT_FALSE(p3.CheckWhite());
+ EXPECT_TRUE(p3.CheckChar(' '));
+ EXPECT_TRUE(p3.CheckWord("whites-and#word-chars"));
+}
+
+TEST(Tokenizer, ShortcutChecks)
+{
+ Tokenizer p("test1 test2,123");
+
+ nsAutoCString test1;
+ nsDependentCSubstring test2;
+ char comma;
+ uint32_t integer;
+
+ EXPECT_TRUE(p.ReadWord(test1));
+ EXPECT_TRUE(test1 == "test1");
+ p.SkipWhites();
+ EXPECT_TRUE(p.ReadWord(test2));
+ EXPECT_TRUE(test2 == "test2");
+ EXPECT_TRUE(p.ReadChar(&comma));
+ EXPECT_TRUE(comma == ',');
+ EXPECT_TRUE(p.ReadInteger(&integer));
+ EXPECT_TRUE(integer == 123);
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+static bool ABChar(const char aChar)
+{
+ return aChar == 'a' || aChar == 'b';
+}
+
+TEST(Tokenizer, ReadCharClassified)
+{
+ Tokenizer p("abc");
+
+ char c;
+ EXPECT_TRUE(p.ReadChar(ABChar, &c));
+ EXPECT_TRUE(c == 'a');
+ EXPECT_TRUE(p.ReadChar(ABChar, &c));
+ EXPECT_TRUE(c == 'b');
+ EXPECT_FALSE(p.ReadChar(ABChar, &c));
+ nsDependentCSubstring w;
+ EXPECT_TRUE(p.ReadWord(w));
+ EXPECT_TRUE(w == "c");
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, ClaimSubstring)
+{
+ Tokenizer p(" abc ");
+
+ EXPECT_TRUE(p.CheckWhite());
+
+ p.Record();
+ EXPECT_TRUE(p.CheckWord("abc"));
+ nsDependentCSubstring v;
+ p.Claim(v, Tokenizer::INCLUDE_LAST);
+ EXPECT_TRUE(v == "abc");
+ EXPECT_TRUE(p.CheckWhite());
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, Fragment)
+{
+ const char str[] = "ab;cd:10 ";
+ Tokenizer p(str);
+ nsDependentCSubstring f;
+
+ Tokenizer::Token t1, t2;
+
+ EXPECT_TRUE(p.Next(t1));
+ EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_WORD);
+ EXPECT_TRUE(t1.Fragment() == "ab");
+ EXPECT_TRUE(t1.Fragment().BeginReading() == &str[0]);
+
+ p.Rollback();
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t2));
+ EXPECT_TRUE(t2.Fragment() == "ab");
+ EXPECT_TRUE(t2.Fragment().BeginReading() == &str[0]);
+
+
+ EXPECT_TRUE(p.Next(t1));
+ EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_CHAR);
+ EXPECT_TRUE(t1.Fragment() == ";");
+ EXPECT_TRUE(t1.Fragment().BeginReading() == &str[2]);
+
+ p.Rollback();
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_CHAR, t2));
+ EXPECT_TRUE(t2.Fragment() == ";");
+ EXPECT_TRUE(t2.Fragment().BeginReading() == &str[2]);
+
+
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t2));
+ EXPECT_TRUE(t2.Fragment() == "cd");
+ EXPECT_TRUE(t2.Fragment().BeginReading() == &str[3]);
+
+ p.Rollback();
+ EXPECT_TRUE(p.Next(t1));
+ EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_WORD);
+ EXPECT_TRUE(t1.Fragment() == "cd");
+ EXPECT_TRUE(t1.Fragment().BeginReading() == &str[3]);
+
+
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_CHAR, t2));
+ EXPECT_TRUE(t2.Fragment() == ":");
+ EXPECT_TRUE(t2.Fragment().BeginReading() == &str[5]);
+
+ p.Rollback();
+ EXPECT_TRUE(p.Next(t1));
+ EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_CHAR);
+ EXPECT_TRUE(t1.Fragment() == ":");
+ EXPECT_TRUE(t1.Fragment().BeginReading() == &str[5]);
+
+
+ EXPECT_TRUE(p.Next(t1));
+ EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_INTEGER);
+ EXPECT_TRUE(t1.Fragment() == "10");
+ EXPECT_TRUE(t1.Fragment().BeginReading() == &str[6]);
+
+
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WS, t2));
+ EXPECT_TRUE(t2.Fragment() == " ");
+ EXPECT_TRUE(t2.Fragment().BeginReading() == &str[8]);
+
+
+ EXPECT_TRUE(p.Check(Tokenizer::TOKEN_EOF, t1));
+ EXPECT_TRUE(t1.Fragment() == "");
+ EXPECT_TRUE(t1.Fragment().BeginReading() == &str[9]);
+}
+
+TEST(Tokenizer, SkipWhites)
+{
+ Tokenizer p("Text1 \nText2 \nText3\n Text4\n ");
+
+ EXPECT_TRUE(p.CheckWord("Text1"));
+ p.SkipWhites();
+ EXPECT_TRUE(p.CheckEOL());
+
+ EXPECT_TRUE(p.CheckWord("Text2"));
+ p.SkipWhites(Tokenizer::INCLUDE_NEW_LINE);
+
+ EXPECT_TRUE(p.CheckWord("Text3"));
+ p.SkipWhites();
+ EXPECT_TRUE(p.CheckEOL());
+ p.SkipWhites();
+
+ EXPECT_TRUE(p.CheckWord("Text4"));
+ p.SkipWhites(Tokenizer::INCLUDE_NEW_LINE);
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, SkipCustomWhites)
+{
+ Tokenizer p("Text1 \n\r\t.Text2 \n\r\t.", " \n\r\t.");
+
+ EXPECT_TRUE(p.CheckWord("Text1"));
+ p.SkipWhites();
+ EXPECT_TRUE(p.CheckWord("Text2"));
+ EXPECT_TRUE(p.CheckWhite());
+ EXPECT_TRUE(p.CheckWhite());
+ EXPECT_TRUE(p.CheckWhite());
+ EXPECT_TRUE(p.CheckWhite());
+ EXPECT_TRUE(p.CheckWhite());
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, IntegerReading)
+{
+#define INT_6_BITS 64U
+#define INT_30_BITS 1073741824UL
+#define INT_32_BITS 4294967295UL
+#define INT_50_BITS 1125899906842624ULL
+#define STR_INT_MORE_THAN_64_BITS "922337203685477580899"
+
+ {
+ Tokenizer p(NS_STRINGIFY(INT_6_BITS));
+ uint8_t u8;
+ uint16_t u16;
+ uint32_t u32;
+ uint64_t u64;
+ EXPECT_TRUE(p.ReadInteger(&u8));
+ EXPECT_TRUE(u8 == INT_6_BITS);
+ p.Rollback();
+ EXPECT_TRUE(p.ReadInteger(&u16));
+ EXPECT_TRUE(u16 == INT_6_BITS);
+ p.Rollback();
+ EXPECT_TRUE(p.ReadInteger(&u32));
+ EXPECT_TRUE(u32 == INT_6_BITS);
+ p.Rollback();
+ EXPECT_TRUE(p.ReadInteger(&u64));
+ EXPECT_TRUE(u64 == INT_6_BITS);
+
+ p.Rollback();
+
+ int8_t s8;
+ int16_t s16;
+ int32_t s32;
+ int64_t s64;
+ EXPECT_TRUE(p.ReadInteger(&s8));
+ EXPECT_TRUE(s8 == INT_6_BITS);
+ p.Rollback();
+ EXPECT_TRUE(p.ReadInteger(&s16));
+ EXPECT_TRUE(s16 == INT_6_BITS);
+ p.Rollback();
+ EXPECT_TRUE(p.ReadInteger(&s32));
+ EXPECT_TRUE(s32 == INT_6_BITS);
+ p.Rollback();
+ EXPECT_TRUE(p.ReadInteger(&s64));
+ EXPECT_TRUE(s64 == INT_6_BITS);
+
+ EXPECT_TRUE(p.CheckWord("U"));
+ EXPECT_TRUE(p.CheckEOF());
+ }
+
+ {
+ Tokenizer p(NS_STRINGIFY(INT_30_BITS));
+ uint8_t u8;
+ uint16_t u16;
+ uint32_t u32;
+ uint64_t u64;
+ EXPECT_FALSE(p.ReadInteger(&u8));
+ EXPECT_FALSE(p.ReadInteger(&u16));
+ EXPECT_TRUE(p.ReadInteger(&u32));
+ EXPECT_TRUE(u32 == INT_30_BITS);
+ p.Rollback();
+ EXPECT_TRUE(p.ReadInteger(&u64));
+ EXPECT_TRUE(u64 == INT_30_BITS);
+
+ p.Rollback();
+
+ int8_t s8;
+ int16_t s16;
+ int32_t s32;
+ int64_t s64;
+ EXPECT_FALSE(p.ReadInteger(&s8));
+ EXPECT_FALSE(p.ReadInteger(&s16));
+ EXPECT_TRUE(p.ReadInteger(&s32));
+ EXPECT_TRUE(s32 == INT_30_BITS);
+ p.Rollback();
+ EXPECT_TRUE(p.ReadInteger(&s64));
+ EXPECT_TRUE(s64 == INT_30_BITS);
+ EXPECT_TRUE(p.CheckWord("UL"));
+ EXPECT_TRUE(p.CheckEOF());
+ }
+
+ {
+ Tokenizer p(NS_STRINGIFY(INT_32_BITS));
+ uint32_t u32;
+ int32_t s32;
+ EXPECT_FALSE(p.ReadInteger(&s32));
+ EXPECT_TRUE(p.ReadInteger(&u32));
+ EXPECT_TRUE(u32 == INT_32_BITS);
+ EXPECT_TRUE(p.CheckWord("UL"));
+ EXPECT_TRUE(p.CheckEOF());
+ }
+
+ {
+ Tokenizer p(NS_STRINGIFY(INT_50_BITS));
+ uint8_t u8;
+ uint16_t u16;
+ uint32_t u32;
+ uint64_t u64;
+ EXPECT_FALSE(p.ReadInteger(&u8));
+ EXPECT_FALSE(p.ReadInteger(&u16));
+ EXPECT_FALSE(p.ReadInteger(&u32));
+ EXPECT_TRUE(p.ReadInteger(&u64));
+ EXPECT_TRUE(u64 == INT_50_BITS);
+ EXPECT_TRUE(p.CheckWord("ULL"));
+ EXPECT_TRUE(p.CheckEOF());
+ }
+
+ {
+ Tokenizer p(STR_INT_MORE_THAN_64_BITS);
+ int64_t i;
+ EXPECT_FALSE(p.ReadInteger(&i));
+ uint64_t u;
+ EXPECT_FALSE(p.ReadInteger(&u));
+ EXPECT_FALSE(p.CheckEOF());
+ }
+}
+
+TEST(Tokenizer, ReadUntil)
+{
+ Tokenizer p("Hello;test 4,");
+ nsDependentCSubstring f;
+ EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Char(';'), f));
+ EXPECT_TRUE(f == "Hello");
+ p.Rollback();
+
+ EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Char(';'), f, Tokenizer::INCLUDE_LAST));
+ EXPECT_TRUE(f == "Hello;");
+ p.Rollback();
+
+ EXPECT_FALSE(p.ReadUntil(Tokenizer::Token::Char('!'), f));
+ EXPECT_TRUE(f == "Hello;test 4,");
+ p.Rollback();
+
+ EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Word(NS_LITERAL_CSTRING("test")), f));
+ EXPECT_TRUE(f == "Hello;");
+ p.Rollback();
+
+ EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Word(NS_LITERAL_CSTRING("test")), f, Tokenizer::INCLUDE_LAST));
+ EXPECT_TRUE(f == "Hello;test");
+ EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Char(','), f));
+ EXPECT_TRUE(f == " 4");
+}
+
+TEST(Tokenizer, SkipUntil)
+{
+ {
+ Tokenizer p("test1,test2,,,test3");
+
+ p.SkipUntil(Tokenizer::Token::Char(','));
+ EXPECT_TRUE(p.CheckChar(','));
+ EXPECT_TRUE(p.CheckWord("test2"));
+
+ p.SkipUntil(Tokenizer::Token::Char(',')); // must not move
+ EXPECT_TRUE(p.CheckChar(',')); // check the first comma of the ',,,' string
+
+ p.Rollback(); // moves cursor back to the first comma of the ',,,' string
+
+ p.SkipUntil(Tokenizer::Token::Char(',')); // must not move, we are on the ',' char
+ EXPECT_TRUE(p.CheckChar(','));
+ EXPECT_TRUE(p.CheckChar(','));
+ EXPECT_TRUE(p.CheckChar(','));
+ EXPECT_TRUE(p.CheckWord("test3"));
+ p.Rollback();
+
+ p.SkipUntil(Tokenizer::Token::Char(','));
+ EXPECT_TRUE(p.CheckEOF());
+ }
+
+ {
+ Tokenizer p("test0,test1,test2");
+
+ p.SkipUntil(Tokenizer::Token::Char(','));
+ EXPECT_TRUE(p.CheckChar(','));
+
+ p.SkipUntil(Tokenizer::Token::Char(','));
+ p.Rollback();
+
+ EXPECT_TRUE(p.CheckWord("test1"));
+ EXPECT_TRUE(p.CheckChar(','));
+
+ p.SkipUntil(Tokenizer::Token::Char(','));
+ p.Rollback();
+
+ EXPECT_TRUE(p.CheckWord("test2"));
+ EXPECT_TRUE(p.CheckEOF());
+ }
+}
+
+TEST(Tokenizer, Custom)
+{
+ Tokenizer p("aaaaaacustom-1\r,custom-1,Custom-1,Custom-1,00custom-2xxxx,CUSTOM-2");
+
+ Tokenizer::Token c1 = p.AddCustomToken("custom-1", Tokenizer::CASE_INSENSITIVE);
+ Tokenizer::Token c2 = p.AddCustomToken("custom-2", Tokenizer::CASE_SENSITIVE);
+
+ // It's expected to NOT FIND the custom token if it's not on an edge
+ // between other recognizable tokens.
+ EXPECT_TRUE(p.CheckWord("aaaaaacustom"));
+ EXPECT_TRUE(p.CheckChar('-'));
+ EXPECT_TRUE(p.Check(Tokenizer::Token::Number(1)));
+ EXPECT_TRUE(p.CheckEOL());
+ EXPECT_TRUE(p.CheckChar(','));
+
+ EXPECT_TRUE(p.Check(c1));
+ EXPECT_TRUE(p.CheckChar(','));
+
+ EXPECT_TRUE(p.Check(c1));
+ EXPECT_TRUE(p.CheckChar(','));
+
+ p.EnableCustomToken(c1, false);
+ EXPECT_TRUE(p.CheckWord("Custom"));
+ EXPECT_TRUE(p.CheckChar('-'));
+ EXPECT_TRUE(p.Check(Tokenizer::Token::Number(1)));
+ EXPECT_TRUE(p.CheckChar(','));
+
+ EXPECT_TRUE(p.Check(Tokenizer::Token::Number(0)));
+ EXPECT_TRUE(p.Check(c2));
+ EXPECT_TRUE(p.CheckWord("xxxx"));
+ EXPECT_TRUE(p.CheckChar(','));
+
+ EXPECT_TRUE(p.CheckWord("CUSTOM"));
+ EXPECT_TRUE(p.CheckChar('-'));
+ EXPECT_TRUE(p.Check(Tokenizer::Token::Number(2)));
+
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, CustomRaw)
+{
+ Tokenizer p("aaaaaacustom-1\r,custom-1,Custom-1,Custom-1,00custom-2xxxx,CUSTOM-2");
+
+ Tokenizer::Token c1 = p.AddCustomToken("custom-1", Tokenizer::CASE_INSENSITIVE);
+ Tokenizer::Token c2 = p.AddCustomToken("custom-2", Tokenizer::CASE_SENSITIVE);
+
+ // In this mode it's expected to find all custom tokens among any kind of input.
+ p.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);
+
+ Tokenizer::Token t;
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
+ EXPECT_TRUE(t.Fragment().EqualsLiteral("aaaaaa"));
+
+ EXPECT_TRUE(p.Check(c1));
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
+ EXPECT_TRUE(t.Fragment().EqualsLiteral("\r,"));
+
+ EXPECT_TRUE(p.Check(c1));
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
+ EXPECT_TRUE(t.Fragment().EqualsLiteral(","));
+
+ EXPECT_TRUE(p.Check(c1));
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
+ EXPECT_TRUE(t.Fragment().EqualsLiteral(","));
+
+ EXPECT_TRUE(p.Check(c1));
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
+ EXPECT_TRUE(t.Fragment().EqualsLiteral(",00"));
+
+ EXPECT_TRUE(p.Check(c2));
+
+ EXPECT_TRUE(p.Next(t));
+ EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
+ EXPECT_TRUE(t.Fragment().EqualsLiteral("xxxx,CUSTOM-2"));
+
+ EXPECT_TRUE(p.CheckEOF());
+}
+
+TEST(Tokenizer, Incremental)
+{
+ typedef TokenizerBase::Token Token;
+
+ int test = 0;
+ IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
+ {
+ switch (++test) {
+ case 1: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test1")))); break;
+ case 2: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
+ case 3: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test2")))); break;
+ case 4: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
+ case 5: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
+ case 6: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
+ case 7: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test3")))); break;
+ case 8: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
+ }
+
+ return NS_OK;
+ });
+
+ NS_NAMED_LITERAL_CSTRING(input, "test1,test2,,,test3");
+ auto cur = input.BeginReading();
+ auto end = input.EndReading();
+ for (; cur < end; ++cur) {
+ i.FeedInput(nsDependentCSubstring(cur, 1));
+ }
+
+ EXPECT_TRUE(test == 6);
+ i.FinishInput();
+ EXPECT_TRUE(test == 8);
+}
+
+TEST(Tokenizer, IncrementalRollback)
+{
+ typedef TokenizerBase::Token Token;
+
+ int test = 0;
+ IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
+ {
+ switch (++test) {
+ case 1: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test1")))); break;
+ case 2: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
+ case 3: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test2"))));
+ i.Rollback(); // so that we get the token again
+ break;
+ case 4: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test2")))); break;
+ case 5: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
+ case 6: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
+ case 7: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
+ case 8: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test3")))); break;
+ case 9: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
+ }
+
+ return NS_OK;
+ });
+
+ NS_NAMED_LITERAL_CSTRING(input, "test1,test2,,,test3");
+ auto cur = input.BeginReading();
+ auto end = input.EndReading();
+ for (; cur < end; ++cur) {
+ i.FeedInput(nsDependentCSubstring(cur, 1));
+ }
+
+ EXPECT_TRUE(test == 7);
+ i.FinishInput();
+ EXPECT_TRUE(test == 9);
+}
+
+TEST(Tokenizer, IncrementalNeedMoreInput)
+{
+ typedef TokenizerBase::Token Token;
+
+ int test = 0;
+ IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
+ {
+ Token t2;
+ switch (++test) {
+ case 1:
+ EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("a"))));
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ EXPECT_TRUE(t.Equals(Token::Whitespace()));
+ if (i.Next(t2)) {
+ EXPECT_TRUE(test == 5);
+ EXPECT_TRUE(t2.Equals(Token::Word(NS_LITERAL_CSTRING("bb"))));
+ } else {
+ EXPECT_TRUE(test < 5);
+ i.NeedMoreInput();
+ }
+ break;
+ case 6:
+ EXPECT_TRUE(t.Equals(Token::Char(',')));
+ break;
+ case 7:
+ EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("c"))));
+ return NS_ERROR_FAILURE;
+ default:
+ EXPECT_TRUE(false);
+ break;
+ }
+
+ return NS_OK;
+ });
+
+ NS_NAMED_LITERAL_CSTRING(input, "a bb,c");
+ auto cur = input.BeginReading();
+ auto end = input.EndReading();
+
+ nsresult rv;
+ for (; cur < end; ++cur) {
+ rv = i.FeedInput(nsDependentCSubstring(cur, 1));
+ if (NS_FAILED(rv)) {
+ break;
+ }
+ }
+
+ EXPECT_TRUE(rv == NS_OK);
+ EXPECT_TRUE(test == 6);
+
+ rv = i.FinishInput();
+ EXPECT_TRUE(rv == NS_ERROR_FAILURE);
+ EXPECT_TRUE(test == 7);
+}
+
+TEST(Tokenizer, IncrementalCustom)
+{
+ typedef TokenizerBase::Token Token;
+
+ int test = 0;
+ Token custom;
+ IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
+ {
+ switch (++test) {
+ case 1: EXPECT_TRUE(t.Equals(custom)); break;
+ case 2: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("bla")))); break;
+ case 3: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
+ }
+
+ return NS_OK;
+ }, nullptr, "-");
+
+ custom = i.AddCustomToken("some-test", Tokenizer::CASE_SENSITIVE);
+ i.FeedInput(NS_LITERAL_CSTRING("some-"));
+ EXPECT_TRUE(test == 0);
+ i.FeedInput(NS_LITERAL_CSTRING("tes"));
+ EXPECT_TRUE(test == 0);
+ i.FeedInput(NS_LITERAL_CSTRING("tbla"));
+ EXPECT_TRUE(test == 1);
+ i.FinishInput();
+ EXPECT_TRUE(test == 3);
+}
+
+TEST(Tokenizer, IncrementalCustomRaw)
+{
+ typedef TokenizerBase::Token Token;
+
+ int test = 0;
+ Token custom;
+ IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
+ {
+ switch (++test) {
+ case 1: EXPECT_TRUE(t.Fragment().EqualsLiteral("test1,")); break;
+ case 2: EXPECT_TRUE(t.Equals(custom)); break;
+ case 3: EXPECT_TRUE(t.Fragment().EqualsLiteral("!,,test3"));
+ i.Rollback();
+ i.SetTokenizingMode(Tokenizer::Mode::FULL);
+ break;
+ case 4: EXPECT_TRUE(t.Equals(Token::Char('!')));
+ i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);
+ break;
+ case 5: EXPECT_TRUE(t.Fragment().EqualsLiteral(",,test3")); break;
+ case 6: EXPECT_TRUE(t.Equals(custom)); break;
+ case 7: EXPECT_TRUE(t.Fragment().EqualsLiteral("tes")); break;
+ case 8: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
+ }
+
+ return NS_OK;
+ });
+
+ custom = i.AddCustomToken("test2", Tokenizer::CASE_SENSITIVE);
+ i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);
+
+ NS_NAMED_LITERAL_CSTRING(input, "test1,test2!,,test3test2tes");
+ auto cur = input.BeginReading();
+ auto end = input.EndReading();
+ for (; cur < end; ++cur) {
+ i.FeedInput(nsDependentCSubstring(cur, 1));
+ }
+
+ EXPECT_TRUE(test == 6);
+ i.FinishInput();
+ EXPECT_TRUE(test == 8);
+}
+
+TEST(Tokenizer, IncrementalCustomRemove)
+{
+ typedef TokenizerBase::Token Token;
+
+ int test = 0;
+ Token custom;
+ IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
+ {
+ switch (++test) {
+ case 1: EXPECT_TRUE(t.Equals(custom));
+ i.RemoveCustomToken(custom);
+ break;
+ case 2: EXPECT_FALSE(t.Equals(custom)); break;
+ case 3: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
+ }
+
+ return NS_OK;
+ });
+
+ custom = i.AddCustomToken("custom1", Tokenizer::CASE_SENSITIVE);
+
+ NS_NAMED_LITERAL_CSTRING(input, "custom1custom1");
+ i.FeedInput(input);
+ EXPECT_TRUE(test == 1);
+ i.FinishInput();
+ EXPECT_TRUE(test == 3);
+}
+
+TEST(Tokenizer, IncrementalBuffering1)
+{
+ typedef TokenizerBase::Token Token;
+
+ int test = 0;
+ Token custom;
+ nsDependentCSubstring observedFragment;
+ IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
+ {
+ switch (++test) {
+ case 1: EXPECT_TRUE(t.Fragment().EqualsLiteral("012")); break;
+ case 2: EXPECT_TRUE(t.Fragment().EqualsLiteral("3456789")); break;
+ case 3: EXPECT_TRUE(t.Equals(custom)); break;
+ case 4: EXPECT_TRUE(t.Fragment().EqualsLiteral("qwe")); break;
+ case 5: EXPECT_TRUE(t.Fragment().EqualsLiteral("rt")); break;
+ case 6: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
+ }
+
+ observedFragment.Rebind(t.Fragment().BeginReading(),
+ t.Fragment().Length());
+ return NS_OK;
+ }, nullptr, nullptr, 3);
+
+ custom = i.AddCustomToken("aaa", Tokenizer::CASE_SENSITIVE);
+ // This externally unused token is added only to check the internal algorithm
+ // does work correctly as expected when there are two different length tokens.
+ Unused << i.AddCustomToken("bb", Tokenizer::CASE_SENSITIVE);
+ i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);
+
+ i.FeedInput(NS_LITERAL_CSTRING("01234"));
+ EXPECT_TRUE(test == 1);
+ EXPECT_TRUE(observedFragment.EqualsLiteral("012"));
+
+ i.FeedInput(NS_LITERAL_CSTRING("5"));
+ EXPECT_TRUE(test == 1);
+ i.FeedInput(NS_LITERAL_CSTRING("6789aa"));
+ EXPECT_TRUE(test == 2);
+ EXPECT_TRUE(observedFragment.EqualsLiteral("3456789"));
+
+ i.FeedInput(NS_LITERAL_CSTRING("aqwert"));
+ EXPECT_TRUE(test == 4);
+ EXPECT_TRUE(observedFragment.EqualsLiteral("qwe"));
+
+ i.FinishInput();
+ EXPECT_TRUE(test == 6);
+}
+
+TEST(Tokenizer, IncrementalBuffering2)
+{
+ typedef TokenizerBase::Token Token;
+
+ int test = 0;
+ Token custom;
+ IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
+ {
+ switch (++test) {
+ case 1: EXPECT_TRUE(t.Fragment().EqualsLiteral("01")); break;
+ case 2: EXPECT_TRUE(t.Fragment().EqualsLiteral("234567")); break;
+ case 3: EXPECT_TRUE(t.Fragment().EqualsLiteral("89")); break;
+ case 4: EXPECT_TRUE(t.Equals(custom)); break;
+ case 5: EXPECT_TRUE(t.Fragment().EqualsLiteral("qwert")); break;
+ case 6: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
+ }
+ return NS_OK;
+ }, nullptr, nullptr, 3);
+
+ custom = i.AddCustomToken("aaa", Tokenizer::CASE_SENSITIVE);
+ // This externally unused token is added only to check the internal algorithm
+ // does work correctly as expected when there are two different length tokens.
+ Unused << i.AddCustomToken("bbbbb", Tokenizer::CASE_SENSITIVE);
+ i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);
+
+ i.FeedInput(NS_LITERAL_CSTRING("01234"));
+ EXPECT_TRUE(test == 0);
+ i.FeedInput(NS_LITERAL_CSTRING("5"));
+ EXPECT_TRUE(test == 1);
+ i.FeedInput(NS_LITERAL_CSTRING("6789aa"));
+ EXPECT_TRUE(test == 2);
+ i.FeedInput(NS_LITERAL_CSTRING("aqwert"));
+ EXPECT_TRUE(test == 4);
+ i.FinishInput();
+ EXPECT_TRUE(test == 6);
+}