/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "mozilla/Tokenizer.h"
#include "mozilla/IncrementalTokenizer.h"
#include "mozilla/Unused.h"
#include "gtest/gtest.h"

using namespace mozilla;

static bool IsOperator(char const c)
{
  return c == '+' || c == '*';
}

static bool HttpHeaderCharacter(char const c)
{
  return (c >= 'a' && c <= 'z') ||
         (c >= 'A' && c <= 'Z') ||
         (c >= '0' && c <= '9') ||
         (c == '_') ||
         (c == '-');
}

TEST(Tokenizer, HTTPResponse)
{
  Tokenizer::Token t;

  // Real life test, HTTP response

  Tokenizer p(NS_LITERAL_CSTRING(
    "HTTP/1.0 304 Not modified\r\n"
    "ETag: hallo\r\n"
    "Content-Length: 16\r\n"
    "\r\n"
    "This is the body"));

  EXPECT_TRUE(p.CheckWord("HTTP"));
  EXPECT_TRUE(p.CheckChar('/'));
  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
  EXPECT_TRUE(t.AsInteger() == 1);
  EXPECT_TRUE(p.CheckChar('.'));
  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
  EXPECT_TRUE(t.AsInteger() == 0);
  p.SkipWhites();

  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
  EXPECT_TRUE(t.AsInteger() == 304);
  p.SkipWhites();

  p.Record();
  while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOL);
  EXPECT_FALSE(p.HasFailed());
  nsAutoCString h;
  p.Claim(h);
  EXPECT_TRUE(h == "Not modified");

  p.Record();
  while (p.CheckChar(HttpHeaderCharacter));
  p.Claim(h, Tokenizer::INCLUDE_LAST);
  EXPECT_TRUE(h == "ETag");
  p.SkipWhites();
  EXPECT_TRUE(p.CheckChar(':'));
  p.SkipWhites();
  p.Record();
  while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOL);
  EXPECT_FALSE(p.HasFailed());
  p.Claim(h);
  EXPECT_TRUE(h == "hallo");

  p.Record();
  while (p.CheckChar(HttpHeaderCharacter));
  p.Claim(h, Tokenizer::INCLUDE_LAST);
  EXPECT_TRUE(h == "Content-Length");
  p.SkipWhites();
  EXPECT_TRUE(p.CheckChar(':'));
  p.SkipWhites();
  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_INTEGER, t));
  EXPECT_TRUE(t.AsInteger() == 16);
  EXPECT_TRUE(p.CheckEOL());

  EXPECT_TRUE(p.CheckEOL());

  p.Record();
  while (p.Next(t) && t.Type() != Tokenizer::TOKEN_EOF);
  nsAutoCString b;
  p.Claim(b);
  EXPECT_TRUE(b == "This is the body");
}

TEST(Tokenizer, Main)
{
  Tokenizer::Token t;

  // Synthetic code-specific test

  Tokenizer p(NS_LITERAL_CSTRING("test123 ,15  \t*\r\n%xx,-15\r\r"));

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_WORD);
  EXPECT_TRUE(t.AsString() == "test123");

  Tokenizer::Token u;
  EXPECT_FALSE(p.Check(u));

  EXPECT_FALSE(p.CheckChar('!'));

  EXPECT_FALSE(p.Check(Tokenizer::Token::Number(123)));

  EXPECT_TRUE(p.CheckWhite());

  EXPECT_TRUE(p.CheckChar(','));

  EXPECT_TRUE(p.Check(Tokenizer::Token::Number(15)));

  p.Rollback();
  EXPECT_TRUE(p.Check(Tokenizer::Token::Number(15)));

  p.Rollback();
  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
  EXPECT_TRUE(t.AsInteger() == 15);

  EXPECT_FALSE(p.CheckChar(IsOperator));

  EXPECT_TRUE(p.CheckWhite());

  p.SkipWhites();

  EXPECT_FALSE(p.CheckWhite());

  p.Rollback();

  EXPECT_TRUE(p.CheckWhite());
  EXPECT_TRUE(p.CheckWhite());

  p.Record(Tokenizer::EXCLUDE_LAST);

  EXPECT_TRUE(p.CheckChar(IsOperator));

  p.Rollback();

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR);
  EXPECT_TRUE(t.AsChar() == '*');

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL);

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR);
  EXPECT_TRUE(t.AsChar() == '%');

  nsAutoCString claim;
  p.Claim(claim, Tokenizer::EXCLUDE_LAST);
  EXPECT_TRUE(claim == "*\r\n");
  p.Claim(claim, Tokenizer::INCLUDE_LAST);
  EXPECT_TRUE(claim == "*\r\n%");

  p.Rollback();
  EXPECT_TRUE(p.CheckChar('%'));

  p.Record(Tokenizer::INCLUDE_LAST);

  EXPECT_FALSE(p.CheckWord("xy"));

  EXPECT_TRUE(p.CheckWord("xx"));


  p.Claim(claim, Tokenizer::INCLUDE_LAST);
  EXPECT_TRUE(claim == "%xx");

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_CHAR);
  EXPECT_TRUE(t.AsChar() == ',');

  EXPECT_TRUE(p.CheckChar('-'));

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_INTEGER);
  EXPECT_TRUE(t.AsInteger() == 15);

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL);

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOL);

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOF);

  EXPECT_FALSE(p.Next(t));

  p.Rollback();
  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_EOF);

  EXPECT_FALSE(p.Next(t));

  p.Rollback();
  EXPECT_TRUE(p.CheckEOF());

  EXPECT_FALSE(p.CheckEOF());
}

TEST(Tokenizer, SingleWord)
{
  // Single word with numbers in it test

  Tokenizer p(NS_LITERAL_CSTRING("test123"));

  EXPECT_TRUE(p.CheckWord("test123"));
  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, EndingAfterNumber)
{
  // An end handling after a number

  Tokenizer p(NS_LITERAL_CSTRING("123"));

  EXPECT_FALSE(p.CheckWord("123"));
  EXPECT_TRUE(p.Check(Tokenizer::Token::Number(123)));
  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, BadInteger)
{
  Tokenizer::Token t;

  // A bad integer test

  Tokenizer p(NS_LITERAL_CSTRING("189234891274981758617846178651647620587135"));

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_ERROR);
  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, CheckExpectedTokenValue)
{
  Tokenizer::Token t;

  // Check expected token value test

  Tokenizer p(NS_LITERAL_CSTRING("blue velvet"));

  EXPECT_FALSE(p.Check(Tokenizer::TOKEN_INTEGER, t));

  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t));
  EXPECT_TRUE(t.AsString() == "blue");

  EXPECT_FALSE(p.Check(Tokenizer::TOKEN_WORD, t));

  EXPECT_TRUE(p.CheckWhite());

  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t));
  EXPECT_TRUE(t.AsString() == "velvet");

  EXPECT_TRUE(p.CheckEOF());

  EXPECT_FALSE(p.Next(t));
}

TEST(Tokenizer, HasFailed)
{
  Tokenizer::Token t;

  // HasFailed test

  Tokenizer p1(NS_LITERAL_CSTRING("a b"));

  while (p1.Next(t) && t.Type() != Tokenizer::TOKEN_CHAR);
  EXPECT_TRUE(p1.HasFailed());


  Tokenizer p2(NS_LITERAL_CSTRING("a b ?!c"));

  EXPECT_FALSE(p2.CheckChar('c'));
  EXPECT_TRUE(p2.HasFailed());
  EXPECT_TRUE(p2.CheckChar(HttpHeaderCharacter));
  EXPECT_FALSE(p2.HasFailed());
  p2.SkipWhites();
  EXPECT_FALSE(p2.HasFailed());
  EXPECT_FALSE(p2.CheckChar('c'));
  EXPECT_TRUE(p2.HasFailed());
  EXPECT_TRUE(p2.Next(t));
  EXPECT_FALSE(p2.HasFailed());
  EXPECT_TRUE(p2.Next(t));
  EXPECT_FALSE(p2.HasFailed());
  EXPECT_FALSE(p2.CheckChar('c'));
  EXPECT_TRUE(p2.HasFailed());
  EXPECT_TRUE(p2.Check(Tokenizer::TOKEN_CHAR, t));
  EXPECT_FALSE(p2.HasFailed());
  EXPECT_FALSE(p2.CheckChar('#'));
  EXPECT_TRUE(p2.HasFailed());
  t = Tokenizer::Token::Char('!');
  EXPECT_TRUE(p2.Check(t));
  EXPECT_FALSE(p2.HasFailed());

  while (p2.Next(t) && t.Type() != Tokenizer::TOKEN_CHAR);
  EXPECT_TRUE(p2.HasFailed());
}

TEST(Tokenizer, Construction)
{
  {
    nsCString a("test");
    Tokenizer p1(a);
    EXPECT_TRUE(p1.CheckWord("test"));
    EXPECT_TRUE(p1.CheckEOF());
  }

  {
    nsAutoCString a("test");
    Tokenizer p1(a);
    EXPECT_TRUE(p1.CheckWord("test"));
    EXPECT_TRUE(p1.CheckEOF());
  }

  {
    static const char _a[] = "test";
    nsDependentCString a(_a);
    Tokenizer p1(a);
    EXPECT_TRUE(p1.CheckWord("test"));
    EXPECT_TRUE(p1.CheckEOF());
  }

  {
    static const char* _a = "test";
    nsDependentCString a(_a);
    Tokenizer p1(a);
    EXPECT_TRUE(p1.CheckWord("test"));
    EXPECT_TRUE(p1.CheckEOF());
  }

  {
    Tokenizer p1(nsDependentCString("test"));
    EXPECT_TRUE(p1.CheckWord("test"));
    EXPECT_TRUE(p1.CheckEOF());
  }

  {
    Tokenizer p1(NS_LITERAL_CSTRING("test"));
    EXPECT_TRUE(p1.CheckWord("test"));
    EXPECT_TRUE(p1.CheckEOF());
  }

  {
    Tokenizer p1("test");
    EXPECT_TRUE(p1.CheckWord("test"));
    EXPECT_TRUE(p1.CheckEOF());
  }
}

TEST(Tokenizer, Customization)
{
  Tokenizer p1(NS_LITERAL_CSTRING("test-custom*words and\tdefault-whites"), nullptr, "-*");
  EXPECT_TRUE(p1.CheckWord("test-custom*words"));
  EXPECT_TRUE(p1.CheckWhite());
  EXPECT_TRUE(p1.CheckWord("and"));
  EXPECT_TRUE(p1.CheckWhite());
  EXPECT_TRUE(p1.CheckWord("default-whites"));

  Tokenizer p2(NS_LITERAL_CSTRING("test, custom,whites"), ", ");
  EXPECT_TRUE(p2.CheckWord("test"));
  EXPECT_TRUE(p2.CheckWhite());
  EXPECT_TRUE(p2.CheckWhite());
  EXPECT_TRUE(p2.CheckWord("custom"));
  EXPECT_TRUE(p2.CheckWhite());
  EXPECT_TRUE(p2.CheckWord("whites"));

  Tokenizer p3(NS_LITERAL_CSTRING("test, custom, whites-and#word-chars"), ",", "-#");
  EXPECT_TRUE(p3.CheckWord("test"));
  EXPECT_TRUE(p3.CheckWhite());
  EXPECT_FALSE(p3.CheckWhite());
  EXPECT_TRUE(p3.CheckChar(' '));
  EXPECT_TRUE(p3.CheckWord("custom"));
  EXPECT_TRUE(p3.CheckWhite());
  EXPECT_FALSE(p3.CheckWhite());
  EXPECT_TRUE(p3.CheckChar(' '));
  EXPECT_TRUE(p3.CheckWord("whites-and#word-chars"));
}

TEST(Tokenizer, ShortcutChecks)
{
  Tokenizer p("test1 test2,123");

  nsAutoCString test1;
  nsDependentCSubstring test2;
  char comma;
  uint32_t integer;

  EXPECT_TRUE(p.ReadWord(test1));
  EXPECT_TRUE(test1 == "test1");
  p.SkipWhites();
  EXPECT_TRUE(p.ReadWord(test2));
  EXPECT_TRUE(test2 == "test2");
  EXPECT_TRUE(p.ReadChar(&comma));
  EXPECT_TRUE(comma == ',');
  EXPECT_TRUE(p.ReadInteger(&integer));
  EXPECT_TRUE(integer == 123);
  EXPECT_TRUE(p.CheckEOF());
}

static bool ABChar(const char aChar)
{
  return aChar == 'a' || aChar == 'b';
}

TEST(Tokenizer, ReadCharClassified)
{
  Tokenizer p("abc");

  char c;
  EXPECT_TRUE(p.ReadChar(ABChar, &c));
  EXPECT_TRUE(c == 'a');
  EXPECT_TRUE(p.ReadChar(ABChar, &c));
  EXPECT_TRUE(c == 'b');
  EXPECT_FALSE(p.ReadChar(ABChar, &c));
  nsDependentCSubstring w;
  EXPECT_TRUE(p.ReadWord(w));
  EXPECT_TRUE(w == "c");
  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, ClaimSubstring)
{
  Tokenizer p(" abc ");

  EXPECT_TRUE(p.CheckWhite());

  p.Record();
  EXPECT_TRUE(p.CheckWord("abc"));
  nsDependentCSubstring v;
  p.Claim(v, Tokenizer::INCLUDE_LAST);
  EXPECT_TRUE(v == "abc");
  EXPECT_TRUE(p.CheckWhite());
  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, Fragment)
{
  const char str[] = "ab;cd:10 ";
  Tokenizer p(str);
  nsDependentCSubstring f;

  Tokenizer::Token t1, t2;

  EXPECT_TRUE(p.Next(t1));
  EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_WORD);
  EXPECT_TRUE(t1.Fragment() == "ab");
  EXPECT_TRUE(t1.Fragment().BeginReading() == &str[0]);

  p.Rollback();
  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t2));
  EXPECT_TRUE(t2.Fragment() == "ab");
  EXPECT_TRUE(t2.Fragment().BeginReading() == &str[0]);


  EXPECT_TRUE(p.Next(t1));
  EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_CHAR);
  EXPECT_TRUE(t1.Fragment() == ";");
  EXPECT_TRUE(t1.Fragment().BeginReading() == &str[2]);

  p.Rollback();
  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_CHAR, t2));
  EXPECT_TRUE(t2.Fragment() == ";");
  EXPECT_TRUE(t2.Fragment().BeginReading() == &str[2]);


  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WORD, t2));
  EXPECT_TRUE(t2.Fragment() == "cd");
  EXPECT_TRUE(t2.Fragment().BeginReading() == &str[3]);

  p.Rollback();
  EXPECT_TRUE(p.Next(t1));
  EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_WORD);
  EXPECT_TRUE(t1.Fragment() == "cd");
  EXPECT_TRUE(t1.Fragment().BeginReading() == &str[3]);


  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_CHAR, t2));
  EXPECT_TRUE(t2.Fragment() == ":");
  EXPECT_TRUE(t2.Fragment().BeginReading() == &str[5]);

  p.Rollback();
  EXPECT_TRUE(p.Next(t1));
  EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_CHAR);
  EXPECT_TRUE(t1.Fragment() == ":");
  EXPECT_TRUE(t1.Fragment().BeginReading() == &str[5]);


  EXPECT_TRUE(p.Next(t1));
  EXPECT_TRUE(t1.Type() == Tokenizer::TOKEN_INTEGER);
  EXPECT_TRUE(t1.Fragment() == "10");
  EXPECT_TRUE(t1.Fragment().BeginReading() == &str[6]);


  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_WS, t2));
  EXPECT_TRUE(t2.Fragment() == " ");
  EXPECT_TRUE(t2.Fragment().BeginReading() == &str[8]);


  EXPECT_TRUE(p.Check(Tokenizer::TOKEN_EOF, t1));
  EXPECT_TRUE(t1.Fragment() == "");
  EXPECT_TRUE(t1.Fragment().BeginReading() == &str[9]);
}

TEST(Tokenizer, SkipWhites)
{
  Tokenizer p("Text1 \nText2 \nText3\n Text4\n ");

  EXPECT_TRUE(p.CheckWord("Text1"));
  p.SkipWhites();
  EXPECT_TRUE(p.CheckEOL());

  EXPECT_TRUE(p.CheckWord("Text2"));
  p.SkipWhites(Tokenizer::INCLUDE_NEW_LINE);

  EXPECT_TRUE(p.CheckWord("Text3"));
  p.SkipWhites();
  EXPECT_TRUE(p.CheckEOL());
  p.SkipWhites();

  EXPECT_TRUE(p.CheckWord("Text4"));
  p.SkipWhites(Tokenizer::INCLUDE_NEW_LINE);
  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, SkipCustomWhites)
{
  Tokenizer p("Text1 \n\r\t.Text2 \n\r\t.", " \n\r\t.");

  EXPECT_TRUE(p.CheckWord("Text1"));
  p.SkipWhites();
  EXPECT_TRUE(p.CheckWord("Text2"));
  EXPECT_TRUE(p.CheckWhite());
  EXPECT_TRUE(p.CheckWhite());
  EXPECT_TRUE(p.CheckWhite());
  EXPECT_TRUE(p.CheckWhite());
  EXPECT_TRUE(p.CheckWhite());
  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, IntegerReading)
{
#define INT_6_BITS                 64U
#define INT_30_BITS                1073741824UL
#define INT_32_BITS                4294967295UL
#define INT_50_BITS                1125899906842624ULL
#define STR_INT_MORE_THAN_64_BITS "922337203685477580899"

  {
    Tokenizer p(NS_STRINGIFY(INT_6_BITS));
    uint8_t u8;
    uint16_t u16;
    uint32_t u32;
    uint64_t u64;
    EXPECT_TRUE(p.ReadInteger(&u8));
    EXPECT_TRUE(u8 == INT_6_BITS);
    p.Rollback();
    EXPECT_TRUE(p.ReadInteger(&u16));
    EXPECT_TRUE(u16 == INT_6_BITS);
    p.Rollback();
    EXPECT_TRUE(p.ReadInteger(&u32));
    EXPECT_TRUE(u32 == INT_6_BITS);
    p.Rollback();
    EXPECT_TRUE(p.ReadInteger(&u64));
    EXPECT_TRUE(u64 == INT_6_BITS);

    p.Rollback();

    int8_t s8;
    int16_t s16;
    int32_t s32;
    int64_t s64;
    EXPECT_TRUE(p.ReadInteger(&s8));
    EXPECT_TRUE(s8 == INT_6_BITS);
    p.Rollback();
    EXPECT_TRUE(p.ReadInteger(&s16));
    EXPECT_TRUE(s16 == INT_6_BITS);
    p.Rollback();
    EXPECT_TRUE(p.ReadInteger(&s32));
    EXPECT_TRUE(s32 == INT_6_BITS);
    p.Rollback();
    EXPECT_TRUE(p.ReadInteger(&s64));
    EXPECT_TRUE(s64 == INT_6_BITS);

    EXPECT_TRUE(p.CheckWord("U"));
    EXPECT_TRUE(p.CheckEOF());
  }

  {
    Tokenizer p(NS_STRINGIFY(INT_30_BITS));
    uint8_t u8;
    uint16_t u16;
    uint32_t u32;
    uint64_t u64;
    EXPECT_FALSE(p.ReadInteger(&u8));
    EXPECT_FALSE(p.ReadInteger(&u16));
    EXPECT_TRUE(p.ReadInteger(&u32));
    EXPECT_TRUE(u32 == INT_30_BITS);
    p.Rollback();
    EXPECT_TRUE(p.ReadInteger(&u64));
    EXPECT_TRUE(u64 == INT_30_BITS);

    p.Rollback();

    int8_t s8;
    int16_t s16;
    int32_t s32;
    int64_t s64;
    EXPECT_FALSE(p.ReadInteger(&s8));
    EXPECT_FALSE(p.ReadInteger(&s16));
    EXPECT_TRUE(p.ReadInteger(&s32));
    EXPECT_TRUE(s32 == INT_30_BITS);
    p.Rollback();
    EXPECT_TRUE(p.ReadInteger(&s64));
    EXPECT_TRUE(s64 == INT_30_BITS);
    EXPECT_TRUE(p.CheckWord("UL"));
    EXPECT_TRUE(p.CheckEOF());
  }

  {
    Tokenizer p(NS_STRINGIFY(INT_32_BITS));
    uint32_t u32;
    int32_t s32;
    EXPECT_FALSE(p.ReadInteger(&s32));
    EXPECT_TRUE(p.ReadInteger(&u32));
    EXPECT_TRUE(u32 == INT_32_BITS);
    EXPECT_TRUE(p.CheckWord("UL"));
    EXPECT_TRUE(p.CheckEOF());
  }

  {
    Tokenizer p(NS_STRINGIFY(INT_50_BITS));
    uint8_t u8;
    uint16_t u16;
    uint32_t u32;
    uint64_t u64;
    EXPECT_FALSE(p.ReadInteger(&u8));
    EXPECT_FALSE(p.ReadInteger(&u16));
    EXPECT_FALSE(p.ReadInteger(&u32));
    EXPECT_TRUE(p.ReadInteger(&u64));
    EXPECT_TRUE(u64 == INT_50_BITS);
    EXPECT_TRUE(p.CheckWord("ULL"));
    EXPECT_TRUE(p.CheckEOF());
  }

  {
    Tokenizer p(STR_INT_MORE_THAN_64_BITS);
    int64_t i;
    EXPECT_FALSE(p.ReadInteger(&i));
    uint64_t u;
    EXPECT_FALSE(p.ReadInteger(&u));
    EXPECT_FALSE(p.CheckEOF());
  }
}

TEST(Tokenizer, ReadUntil)
{
  Tokenizer p("Hello;test 4,");
  nsDependentCSubstring f;
  EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Char(';'), f));
  EXPECT_TRUE(f == "Hello");
  p.Rollback();

  EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Char(';'), f, Tokenizer::INCLUDE_LAST));
  EXPECT_TRUE(f == "Hello;");
  p.Rollback();

  EXPECT_FALSE(p.ReadUntil(Tokenizer::Token::Char('!'), f));
  EXPECT_TRUE(f == "Hello;test 4,");
  p.Rollback();

  EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Word(NS_LITERAL_CSTRING("test")), f));
  EXPECT_TRUE(f == "Hello;");
  p.Rollback();

  EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Word(NS_LITERAL_CSTRING("test")), f, Tokenizer::INCLUDE_LAST));
  EXPECT_TRUE(f == "Hello;test");
  EXPECT_TRUE(p.ReadUntil(Tokenizer::Token::Char(','), f));
  EXPECT_TRUE(f == " 4");
}

TEST(Tokenizer, SkipUntil)
{
  {
    Tokenizer p("test1,test2,,,test3");

    p.SkipUntil(Tokenizer::Token::Char(','));
    EXPECT_TRUE(p.CheckChar(','));
    EXPECT_TRUE(p.CheckWord("test2"));

    p.SkipUntil(Tokenizer::Token::Char(',')); // must not move
    EXPECT_TRUE(p.CheckChar(',')); // check the first comma of the ',,,' string

    p.Rollback(); // moves cursor back to the first comma of the ',,,' string

    p.SkipUntil(Tokenizer::Token::Char(',')); // must not move, we are on the ',' char
    EXPECT_TRUE(p.CheckChar(','));
    EXPECT_TRUE(p.CheckChar(','));
    EXPECT_TRUE(p.CheckChar(','));
    EXPECT_TRUE(p.CheckWord("test3"));
    p.Rollback();

    p.SkipUntil(Tokenizer::Token::Char(','));
    EXPECT_TRUE(p.CheckEOF());
  }

  {
    Tokenizer p("test0,test1,test2");

    p.SkipUntil(Tokenizer::Token::Char(','));
    EXPECT_TRUE(p.CheckChar(','));

    p.SkipUntil(Tokenizer::Token::Char(','));
    p.Rollback();

    EXPECT_TRUE(p.CheckWord("test1"));
    EXPECT_TRUE(p.CheckChar(','));

    p.SkipUntil(Tokenizer::Token::Char(','));
    p.Rollback();

    EXPECT_TRUE(p.CheckWord("test2"));
    EXPECT_TRUE(p.CheckEOF());
  }
}

TEST(Tokenizer, Custom)
{
  Tokenizer p("aaaaaacustom-1\r,custom-1,Custom-1,Custom-1,00custom-2xxxx,CUSTOM-2");

  Tokenizer::Token c1 = p.AddCustomToken("custom-1", Tokenizer::CASE_INSENSITIVE);
  Tokenizer::Token c2 = p.AddCustomToken("custom-2", Tokenizer::CASE_SENSITIVE);

  // It's expected to NOT FIND the custom token if it's not on an edge
  // between other recognizable tokens.
  EXPECT_TRUE(p.CheckWord("aaaaaacustom"));
  EXPECT_TRUE(p.CheckChar('-'));
  EXPECT_TRUE(p.Check(Tokenizer::Token::Number(1)));
  EXPECT_TRUE(p.CheckEOL());
  EXPECT_TRUE(p.CheckChar(','));

  EXPECT_TRUE(p.Check(c1));
  EXPECT_TRUE(p.CheckChar(','));

  EXPECT_TRUE(p.Check(c1));
  EXPECT_TRUE(p.CheckChar(','));

  p.EnableCustomToken(c1, false);
  EXPECT_TRUE(p.CheckWord("Custom"));
  EXPECT_TRUE(p.CheckChar('-'));
  EXPECT_TRUE(p.Check(Tokenizer::Token::Number(1)));
  EXPECT_TRUE(p.CheckChar(','));

  EXPECT_TRUE(p.Check(Tokenizer::Token::Number(0)));
  EXPECT_TRUE(p.Check(c2));
  EXPECT_TRUE(p.CheckWord("xxxx"));
  EXPECT_TRUE(p.CheckChar(','));

  EXPECT_TRUE(p.CheckWord("CUSTOM"));
  EXPECT_TRUE(p.CheckChar('-'));
  EXPECT_TRUE(p.Check(Tokenizer::Token::Number(2)));

  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, CustomRaw)
{
  Tokenizer p("aaaaaacustom-1\r,custom-1,Custom-1,Custom-1,00custom-2xxxx,CUSTOM-2");

  Tokenizer::Token c1 = p.AddCustomToken("custom-1", Tokenizer::CASE_INSENSITIVE);
  Tokenizer::Token c2 = p.AddCustomToken("custom-2", Tokenizer::CASE_SENSITIVE);

  // In this mode it's expected to find all custom tokens among any kind of input.
  p.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);

  Tokenizer::Token t;

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
  EXPECT_TRUE(t.Fragment().EqualsLiteral("aaaaaa"));

  EXPECT_TRUE(p.Check(c1));

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
  EXPECT_TRUE(t.Fragment().EqualsLiteral("\r,"));

  EXPECT_TRUE(p.Check(c1));

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
  EXPECT_TRUE(t.Fragment().EqualsLiteral(","));

  EXPECT_TRUE(p.Check(c1));

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
  EXPECT_TRUE(t.Fragment().EqualsLiteral(","));

  EXPECT_TRUE(p.Check(c1));

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
  EXPECT_TRUE(t.Fragment().EqualsLiteral(",00"));

  EXPECT_TRUE(p.Check(c2));

  EXPECT_TRUE(p.Next(t));
  EXPECT_TRUE(t.Type() == Tokenizer::TOKEN_RAW);
  EXPECT_TRUE(t.Fragment().EqualsLiteral("xxxx,CUSTOM-2"));

  EXPECT_TRUE(p.CheckEOF());
}

TEST(Tokenizer, Incremental)
{
  typedef TokenizerBase::Token Token;

  int test = 0;
  IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
  {
    switch (++test) {
      case 1: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test1")))); break;
      case 2: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
      case 3: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test2")))); break;
      case 4: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
      case 5: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
      case 6: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
      case 7: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test3")))); break;
      case 8: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
    }

    return NS_OK;
  });

  NS_NAMED_LITERAL_CSTRING(input, "test1,test2,,,test3");
  auto cur = input.BeginReading();
  auto end = input.EndReading();
  for (; cur < end; ++cur) {
    i.FeedInput(nsDependentCSubstring(cur, 1));
  }

  EXPECT_TRUE(test == 6);
  i.FinishInput();
  EXPECT_TRUE(test == 8);
}

TEST(Tokenizer, IncrementalRollback)
{
  typedef TokenizerBase::Token Token;

  int test = 0;
  IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
  {
    switch (++test) {
      case 1: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test1")))); break;
      case 2: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
      case 3: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test2"))));
        i.Rollback(); // so that we get the token again
        break;
      case 4: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test2")))); break;
      case 5: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
      case 6: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
      case 7: EXPECT_TRUE(t.Equals(Token::Char(','))); break;
      case 8: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("test3")))); break;
      case 9: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
    }

    return NS_OK;
  });

  NS_NAMED_LITERAL_CSTRING(input, "test1,test2,,,test3");
  auto cur = input.BeginReading();
  auto end = input.EndReading();
  for (; cur < end; ++cur) {
    i.FeedInput(nsDependentCSubstring(cur, 1));
  }

  EXPECT_TRUE(test == 7);
  i.FinishInput();
  EXPECT_TRUE(test == 9);
}

TEST(Tokenizer, IncrementalNeedMoreInput)
{
  typedef TokenizerBase::Token Token;

  int test = 0;
  IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
  {
    Token t2;
    switch (++test) {
    case 1:
      EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("a"))));
      break;
    case 2:
    case 3:
    case 4:
    case 5:
      EXPECT_TRUE(t.Equals(Token::Whitespace()));
      if (i.Next(t2)) {
        EXPECT_TRUE(test == 5);
        EXPECT_TRUE(t2.Equals(Token::Word(NS_LITERAL_CSTRING("bb"))));
      } else {
        EXPECT_TRUE(test < 5);
        i.NeedMoreInput();
      }
      break;
    case 6:
      EXPECT_TRUE(t.Equals(Token::Char(',')));
      break;
    case 7:
      EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("c"))));
      return NS_ERROR_FAILURE;
    default:
      EXPECT_TRUE(false);
      break;
    }

    return NS_OK;
  });

  NS_NAMED_LITERAL_CSTRING(input, "a bb,c");
  auto cur = input.BeginReading();
  auto end = input.EndReading();

  nsresult rv;
  for (; cur < end; ++cur) {
    rv = i.FeedInput(nsDependentCSubstring(cur, 1));
    if (NS_FAILED(rv)) {
      break;
    }
  }

  EXPECT_TRUE(rv == NS_OK);
  EXPECT_TRUE(test == 6);

  rv = i.FinishInput();
  EXPECT_TRUE(rv == NS_ERROR_FAILURE);
  EXPECT_TRUE(test == 7);
}

TEST(Tokenizer, IncrementalCustom)
{
  typedef TokenizerBase::Token Token;

  int test = 0;
  Token custom;
  IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
  {
    switch (++test) {
      case 1: EXPECT_TRUE(t.Equals(custom)); break;
      case 2: EXPECT_TRUE(t.Equals(Token::Word(NS_LITERAL_CSTRING("bla")))); break;
      case 3: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
    }

    return NS_OK;
  }, nullptr, "-");

  custom = i.AddCustomToken("some-test", Tokenizer::CASE_SENSITIVE);
  i.FeedInput(NS_LITERAL_CSTRING("some-"));
  EXPECT_TRUE(test == 0);
  i.FeedInput(NS_LITERAL_CSTRING("tes"));
  EXPECT_TRUE(test == 0);
  i.FeedInput(NS_LITERAL_CSTRING("tbla"));
  EXPECT_TRUE(test == 1);
  i.FinishInput();
  EXPECT_TRUE(test == 3);
}

TEST(Tokenizer, IncrementalCustomRaw)
{
  typedef TokenizerBase::Token Token;

  int test = 0;
  Token custom;
  IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
  {
    switch (++test) {
      case 1: EXPECT_TRUE(t.Fragment().EqualsLiteral("test1,")); break;
      case 2: EXPECT_TRUE(t.Equals(custom)); break;
      case 3: EXPECT_TRUE(t.Fragment().EqualsLiteral("!,,test3"));
        i.Rollback();
        i.SetTokenizingMode(Tokenizer::Mode::FULL);
        break;
      case 4: EXPECT_TRUE(t.Equals(Token::Char('!')));
        i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);
        break;
      case 5: EXPECT_TRUE(t.Fragment().EqualsLiteral(",,test3")); break;
      case 6: EXPECT_TRUE(t.Equals(custom)); break;
      case 7: EXPECT_TRUE(t.Fragment().EqualsLiteral("tes")); break;
      case 8: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
    }

    return NS_OK;
  });

  custom = i.AddCustomToken("test2", Tokenizer::CASE_SENSITIVE);
  i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);

  NS_NAMED_LITERAL_CSTRING(input, "test1,test2!,,test3test2tes");
  auto cur = input.BeginReading();
  auto end = input.EndReading();
  for (; cur < end; ++cur) {
    i.FeedInput(nsDependentCSubstring(cur, 1));
  }

  EXPECT_TRUE(test == 6);
  i.FinishInput();
  EXPECT_TRUE(test == 8);
}

TEST(Tokenizer, IncrementalCustomRemove)
{
  typedef TokenizerBase::Token Token;

  int test = 0;
  Token custom;
  IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
  {
    switch (++test) {
      case 1: EXPECT_TRUE(t.Equals(custom));
        i.RemoveCustomToken(custom);
        break;
      case 2: EXPECT_FALSE(t.Equals(custom)); break;
      case 3: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
    }

    return NS_OK;
  });

  custom = i.AddCustomToken("custom1", Tokenizer::CASE_SENSITIVE);

  NS_NAMED_LITERAL_CSTRING(input, "custom1custom1");
  i.FeedInput(input);
  EXPECT_TRUE(test == 1);
  i.FinishInput();
  EXPECT_TRUE(test == 3);
}

TEST(Tokenizer, IncrementalBuffering1)
{
  typedef TokenizerBase::Token Token;

  int test = 0;
  Token custom;
  nsDependentCSubstring observedFragment;
  IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
  {
    switch (++test) {
      case 1: EXPECT_TRUE(t.Fragment().EqualsLiteral("012")); break;
      case 2: EXPECT_TRUE(t.Fragment().EqualsLiteral("3456789")); break;
      case 3: EXPECT_TRUE(t.Equals(custom)); break;
      case 4: EXPECT_TRUE(t.Fragment().EqualsLiteral("qwe")); break;
      case 5: EXPECT_TRUE(t.Fragment().EqualsLiteral("rt")); break;
      case 6: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
    }

    observedFragment.Rebind(t.Fragment().BeginReading(),
                            t.Fragment().Length());
    return NS_OK;
  }, nullptr, nullptr, 3);

  custom = i.AddCustomToken("aaa", Tokenizer::CASE_SENSITIVE);
  // This externally unused token is added only to check the internal algorithm
  // does work correctly as expected when there are two different length tokens.
  Unused << i.AddCustomToken("bb", Tokenizer::CASE_SENSITIVE);
  i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);

  i.FeedInput(NS_LITERAL_CSTRING("01234"));
  EXPECT_TRUE(test == 1);
  EXPECT_TRUE(observedFragment.EqualsLiteral("012"));

  i.FeedInput(NS_LITERAL_CSTRING("5"));
  EXPECT_TRUE(test == 1);
  i.FeedInput(NS_LITERAL_CSTRING("6789aa"));
  EXPECT_TRUE(test == 2);
  EXPECT_TRUE(observedFragment.EqualsLiteral("3456789"));

  i.FeedInput(NS_LITERAL_CSTRING("aqwert"));
  EXPECT_TRUE(test == 4);
  EXPECT_TRUE(observedFragment.EqualsLiteral("qwe"));

  i.FinishInput();
  EXPECT_TRUE(test == 6);
}

TEST(Tokenizer, IncrementalBuffering2)
{
  typedef TokenizerBase::Token Token;

  int test = 0;
  Token custom;
  IncrementalTokenizer i([&](Token const& t, IncrementalTokenizer& i) -> nsresult
  {
    switch (++test) {
      case 1: EXPECT_TRUE(t.Fragment().EqualsLiteral("01")); break;
      case 2: EXPECT_TRUE(t.Fragment().EqualsLiteral("234567")); break;
      case 3: EXPECT_TRUE(t.Fragment().EqualsLiteral("89")); break;
      case 4: EXPECT_TRUE(t.Equals(custom)); break;
      case 5: EXPECT_TRUE(t.Fragment().EqualsLiteral("qwert")); break;
      case 6: EXPECT_TRUE(t.Equals(Token::EndOfFile())); break;
    }
    return NS_OK;
  }, nullptr, nullptr, 3);

  custom = i.AddCustomToken("aaa", Tokenizer::CASE_SENSITIVE);
  // This externally unused token is added only to check the internal algorithm
  // does work correctly as expected when there are two different length tokens.
  Unused << i.AddCustomToken("bbbbb", Tokenizer::CASE_SENSITIVE);
  i.SetTokenizingMode(Tokenizer::Mode::CUSTOM_ONLY);

  i.FeedInput(NS_LITERAL_CSTRING("01234"));
  EXPECT_TRUE(test == 0);
  i.FeedInput(NS_LITERAL_CSTRING("5"));
  EXPECT_TRUE(test == 1);
  i.FeedInput(NS_LITERAL_CSTRING("6789aa"));
  EXPECT_TRUE(test == 2);
  i.FeedInput(NS_LITERAL_CSTRING("aqwert"));
  EXPECT_TRUE(test == 4);
  i.FinishInput();
  EXPECT_TRUE(test == 6);
}