/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "VariableLengthPrefixSet.h"
#include "nsUrlClassifierPrefixSet.h"
#include "nsPrintfCString.h"
#include "nsThreadUtils.h"
#include "mozilla/EndianUtils.h"
#include "mozilla/Telemetry.h"
#include "mozilla/Unused.h"
#include <algorithm>

// MOZ_LOG=UrlClassifierPrefixSet:5
static mozilla::LazyLogModule gUrlClassifierPrefixSetLog("UrlClassifierPrefixSet");
#define LOG(args) MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args)
#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug)

namespace mozilla {
namespace safebrowsing {

#define PREFIX_SIZE_FIXED 4

NS_IMPL_ISUPPORTS(VariableLengthPrefixSet, nsIMemoryReporter)

// Definition required due to std::max<>()
const uint32_t VariableLengthPrefixSet::MAX_BUFFER_SIZE;

// This class will process prefix size between 4~32. But for 4 bytes prefixes,
// they will be passed to nsUrlClassifierPrefixSet because of better optimization.
VariableLengthPrefixSet::VariableLengthPrefixSet()
  : mLock("VariableLengthPrefixSet.mLock")
  , mMemoryReportPath()
{
  mFixedPrefixSet = new nsUrlClassifierPrefixSet();
}

NS_IMETHODIMP
VariableLengthPrefixSet::Init(const nsACString& aName)
{
  mMemoryReportPath =
    nsPrintfCString(
      "explicit/storage/prefix-set/%s",
      (!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!")
    );

  RegisterWeakMemoryReporter(this);

  return NS_OK;
}

VariableLengthPrefixSet::~VariableLengthPrefixSet()
{
  UnregisterWeakMemoryReporter(this);
}

NS_IMETHODIMP
VariableLengthPrefixSet::SetPrefixes(const PrefixStringMap& aPrefixMap)
{
  MutexAutoLock lock(mLock);

  // Prefix size should not less than 4-bytes or greater than 32-bytes
  for (auto iter = aPrefixMap.ConstIter(); !iter.Done(); iter.Next()) {
    if (iter.Key() < PREFIX_SIZE_FIXED ||
        iter.Key() > COMPLETE_SIZE) {
      return NS_ERROR_FAILURE;
    }
  }

  // Clear old prefixSet before setting new one.
  mFixedPrefixSet->SetPrefixes(nullptr, 0);
  mVLPrefixSet.Clear();

  // 4-bytes prefixes are handled by nsUrlClassifierPrefixSet.
  nsCString* prefixes = aPrefixMap.Get(PREFIX_SIZE_FIXED);
  if (prefixes) {
    NS_ENSURE_TRUE(prefixes->Length() % PREFIX_SIZE_FIXED == 0, NS_ERROR_FAILURE);

    uint32_t numPrefixes = prefixes->Length() / PREFIX_SIZE_FIXED;

#if MOZ_BIG_ENDIAN
    const uint32_t* arrayPtr = reinterpret_cast<const uint32_t*>(prefixes->BeginReading());
#else
    FallibleTArray<uint32_t> array;
    // Prefixes are lexicographically-sorted, so the interger array
    // passed to nsUrlClassifierPrefixSet should also follow the same order.
    // To make sure of that, we convert char array to integer with Big-Endian
    // instead of casting to integer directly.
    if (!array.SetCapacity(numPrefixes, fallible)) {
      return NS_ERROR_OUT_OF_MEMORY;
    }

    const char* begin = prefixes->BeginReading();
    const char* end = prefixes->EndReading();

    while (begin != end) {
      array.AppendElement(BigEndian::readUint32(begin), fallible);
      begin += sizeof(uint32_t);
    }

    const uint32_t* arrayPtr = array.Elements();
#endif

    nsresult rv = mFixedPrefixSet->SetPrefixes(arrayPtr, numPrefixes);
    NS_ENSURE_SUCCESS(rv, rv);
  }

  // 5~32 bytes prefixes are stored in mVLPrefixSet.
  for (auto iter = aPrefixMap.ConstIter(); !iter.Done(); iter.Next()) {
    // Skip 4bytes prefixes because it is already stored in mFixedPrefixSet.
    if (iter.Key() == PREFIX_SIZE_FIXED) {
      continue;
    }

    mVLPrefixSet.Put(iter.Key(), new nsCString(*iter.Data()));
  }

  return NS_OK;
}

nsresult
VariableLengthPrefixSet::GetPrefixes(PrefixStringMap& aPrefixMap)
{
  MutexAutoLock lock(mLock);

  // 4-bytes prefixes are handled by nsUrlClassifierPrefixSet.
  FallibleTArray<uint32_t> array;
  nsresult rv = mFixedPrefixSet->GetPrefixesNative(array);
  NS_ENSURE_SUCCESS(rv, rv);

  size_t count = array.Length();
  if (count) {
    nsCString* prefixes = new nsCString();
    prefixes->SetLength(PREFIX_SIZE_FIXED * count);

    // Writing integer array to character array
    uint32_t* begin = reinterpret_cast<uint32_t*>(prefixes->BeginWriting());
    for (uint32_t i = 0; i < count; i++) {
      begin[i] = NativeEndian::swapToBigEndian(array[i]);
    }

    aPrefixMap.Put(PREFIX_SIZE_FIXED, prefixes);
  }

  // Copy variable-length prefix set
  for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
    aPrefixMap.Put(iter.Key(), new nsCString(*iter.Data()));
  }

  return NS_OK;
}

// It should never be the case that more than one hash prefixes match a given
// full hash. However, if that happens, this method returns any one of them.
// It does not guarantee which one of those will be returned.
NS_IMETHODIMP
VariableLengthPrefixSet::Matches(const nsACString& aFullHash, uint32_t* aLength)
{
  MutexAutoLock lock(mLock);

  // Only allow full-length hash to check if match any of the prefix
  MOZ_ASSERT(aFullHash.Length() == COMPLETE_SIZE);
  NS_ENSURE_ARG_POINTER(aLength);

  *aLength = 0;

  // Check if it matches 4-bytes prefixSet first
  const uint32_t* hash = reinterpret_cast<const uint32_t*>(aFullHash.BeginReading());
  uint32_t value = BigEndian::readUint32(hash);

  bool found = false;
  nsresult rv = mFixedPrefixSet->Contains(value, &found);
  NS_ENSURE_SUCCESS(rv, rv);

  if (found) {
    *aLength = PREFIX_SIZE_FIXED;
    return NS_OK;
  }

  for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
    if (BinarySearch(aFullHash, *iter.Data(), iter.Key())) {
      *aLength = iter.Key();
      return NS_OK;
    }
  }

  return NS_OK;
}

NS_IMETHODIMP
VariableLengthPrefixSet::IsEmpty(bool* aEmpty)
{
  MutexAutoLock lock(mLock);

  NS_ENSURE_ARG_POINTER(aEmpty);

  mFixedPrefixSet->IsEmpty(aEmpty);
  *aEmpty = *aEmpty && mVLPrefixSet.IsEmpty();

  return NS_OK;
}

NS_IMETHODIMP
VariableLengthPrefixSet::LoadFromFile(nsIFile* aFile)
{
  MutexAutoLock lock(mLock);

  NS_ENSURE_ARG_POINTER(aFile);

  nsCOMPtr<nsIInputStream> localInFile;
  nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
                                           PR_RDONLY | nsIFile::OS_READAHEAD);
  NS_ENSURE_SUCCESS(rv, rv);

  // Calculate how big the file is, make sure our read buffer isn't bigger
  // than the file itself which is just wasting memory.
  int64_t fileSize;
  rv = aFile->GetFileSize(&fileSize);
  NS_ENSURE_SUCCESS(rv, rv);

  if (fileSize < 0 || fileSize > UINT32_MAX) {
    return NS_ERROR_FAILURE;
  }

  uint32_t bufferSize = std::min<uint32_t>(static_cast<uint32_t>(fileSize),
                                           MAX_BUFFER_SIZE);

  // Convert to buffered stream
  nsCOMPtr<nsIInputStream> in = NS_BufferInputStream(localInFile, bufferSize);

  rv = mFixedPrefixSet->LoadPrefixes(in);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = LoadPrefixes(in);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;;
}

NS_IMETHODIMP
VariableLengthPrefixSet::StoreToFile(nsIFile* aFile)
{
  NS_ENSURE_ARG_POINTER(aFile);

  MutexAutoLock lock(mLock);

  nsCOMPtr<nsIOutputStream> localOutFile;
  nsresult rv = NS_NewLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
                                            PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
  NS_ENSURE_SUCCESS(rv, rv);

  uint32_t fileSize = 0;
  // Preallocate the file storage
  nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));

  fileSize += mFixedPrefixSet->CalculatePreallocateSize();
  fileSize += CalculatePreallocateSize();

  Unused << fos->Preallocate(fileSize);

  // Convert to buffered stream
  nsCOMPtr<nsIOutputStream> out =
    NS_BufferOutputStream(localOutFile, std::min(fileSize, MAX_BUFFER_SIZE));

  rv = mFixedPrefixSet->WritePrefixes(out);
  NS_ENSURE_SUCCESS(rv, rv);

  rv = WritePrefixes(out);
  NS_ENSURE_SUCCESS(rv, rv);

  return NS_OK;
}

nsresult
VariableLengthPrefixSet::LoadPrefixes(nsIInputStream* in)
{
  uint32_t magic;
  uint32_t read;

  nsresult rv = in->Read(reinterpret_cast<char*>(&magic), sizeof(uint32_t), &read);
  NS_ENSURE_SUCCESS(rv, rv);
  NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);

  if (magic != PREFIXSET_VERSION_MAGIC) {
    LOG(("Version magic mismatch, not loading"));
    return NS_ERROR_FILE_CORRUPTED;
  }

  mVLPrefixSet.Clear();

  uint32_t count;
  rv = in->Read(reinterpret_cast<char*>(&count), sizeof(uint32_t), &read);
  NS_ENSURE_SUCCESS(rv, rv);
  NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);

  for(;count > 0; count--) {
    uint8_t prefixSize;
    rv = in->Read(reinterpret_cast<char*>(&prefixSize), sizeof(uint8_t), &read);
    NS_ENSURE_SUCCESS(rv, rv);
    NS_ENSURE_TRUE(read == sizeof(uint8_t), NS_ERROR_FAILURE);

    uint32_t stringLength;
    rv = in->Read(reinterpret_cast<char*>(&stringLength), sizeof(uint32_t), &read);
    NS_ENSURE_SUCCESS(rv, rv);
    NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);

    nsCString* vlPrefixes = new nsCString();
    if (!vlPrefixes->SetLength(stringLength, fallible)) {
      return NS_ERROR_OUT_OF_MEMORY;
    }

    rv = in->Read(reinterpret_cast<char*>(vlPrefixes->BeginWriting()), stringLength, &read);
    NS_ENSURE_SUCCESS(rv, rv);
    NS_ENSURE_TRUE(read == stringLength, NS_ERROR_FAILURE);

    mVLPrefixSet.Put(prefixSize, vlPrefixes);
  }

  return NS_OK;
}

uint32_t
VariableLengthPrefixSet::CalculatePreallocateSize()
{
  uint32_t fileSize = 0;

  // Store how many prefix string.
  fileSize += sizeof(uint32_t);

  for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
    // Store prefix size, prefix string length, and prefix string.
    fileSize += sizeof(uint8_t);
    fileSize += sizeof(uint32_t);
    fileSize += iter.Data()->Length();
  }
  return fileSize;
}

nsresult
VariableLengthPrefixSet::WritePrefixes(nsIOutputStream* out)
{
  uint32_t written;
  uint32_t writelen = sizeof(uint32_t);
  uint32_t magic = PREFIXSET_VERSION_MAGIC;
  nsresult rv = out->Write(reinterpret_cast<char*>(&magic), writelen, &written);
  NS_ENSURE_SUCCESS(rv, rv);
  NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);

  uint32_t count = mVLPrefixSet.Count();
  rv = out->Write(reinterpret_cast<char*>(&count), writelen, &written);
  NS_ENSURE_SUCCESS(rv, rv);
  NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);

  // Store PrefixSize, Length of Prefix String and then Prefix String
  for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
    const nsCString& vlPrefixes = *iter.Data();

    uint8_t prefixSize = iter.Key();
    writelen = sizeof(uint8_t);
    rv = out->Write(reinterpret_cast<char*>(&prefixSize), writelen, &written);
    NS_ENSURE_SUCCESS(rv, rv);
    NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);

    uint32_t stringLength = vlPrefixes.Length();
    writelen = sizeof(uint32_t);
    rv = out->Write(reinterpret_cast<char*>(&stringLength), writelen, &written);
    NS_ENSURE_SUCCESS(rv, rv);
    NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);

    rv = out->Write(const_cast<char*>(vlPrefixes.BeginReading()),
                    stringLength, &written);
    NS_ENSURE_SUCCESS(rv, rv);
    NS_ENSURE_TRUE(stringLength == written, NS_ERROR_FAILURE);
  }

  return NS_OK;
}

bool
VariableLengthPrefixSet::BinarySearch(const nsACString& aFullHash,
                                         const nsACString& aPrefixes,
                                         uint32_t aPrefixSize)
{
  const char* fullhash = aFullHash.BeginReading();
  const char* prefixes = aPrefixes.BeginReading();
  int32_t begin = 0, end = aPrefixes.Length() / aPrefixSize;

  while (end > begin) {
    int32_t mid = (begin + end) >> 1;
    int cmp = memcmp(fullhash, prefixes + mid*aPrefixSize, aPrefixSize);
    if (cmp < 0) {
      end = mid;
    } else if (cmp > 0) {
      begin = mid + 1;
    } else {
      return true;
    }
  }
  return false;
}

MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)

NS_IMETHODIMP
VariableLengthPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport,
                                           nsISupports* aData, bool aAnonymize)
{
  MOZ_ASSERT(NS_IsMainThread());

  size_t amount = SizeOfIncludingThis(UrlClassifierMallocSizeOf);

  return aHandleReport->Callback(
    EmptyCString(), mMemoryReportPath, KIND_HEAP, UNITS_BYTES, amount,
    NS_LITERAL_CSTRING("Memory used by the variable-length prefix set for a URL classifier."),
    aData);
}

size_t
VariableLengthPrefixSet::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
{
  MutexAutoLock lock(mLock);

  size_t n = 0;
  n += aMallocSizeOf(this);
  n += mFixedPrefixSet->SizeOfIncludingThis(moz_malloc_size_of) - aMallocSizeOf(mFixedPrefixSet);

  n += mVLPrefixSet.ShallowSizeOfExcludingThis(aMallocSizeOf);
  for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
    n += iter.Data()->SizeOfExcludingThisIfUnshared(aMallocSizeOf);
  }

  return n;
}

} // namespace safebrowsing
} // namespace mozilla