1 files changed, 599 insertions, 0 deletions
diff --git a/toolkit/components/url-classifier/LookupCache.cpp b/toolkit/components/url-classifier/LookupCache.cpp
new file mode 100644
index 000000000..5a3b1e36d
--- /dev/null
+++ b/toolkit/components/url-classifier/LookupCache.cpp
@@ -0,0 +1,599 @@
+//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LookupCache.h"
+#include "HashStore.h"
+#include "nsISeekableStream.h"
+#include "mozilla/Telemetry.h"
+#include "mozilla/Logging.h"
+#include "nsNetUtil.h"
+#include "prprf.h"
+#include "Classifier.h"
+
+// We act as the main entry point for all the real lookups,
+// so note that those are not done to the actual HashStore.
+// The latter solely exists to store the data needed to handle
+// the updates from the protocol.
+
+// This module provides a front for PrefixSet, mUpdateCompletions,
+// and mGetHashCache, which together contain everything needed to
+// provide a classification as long as the data is up to date.
+
+// PrefixSet stores and provides lookups for 4-byte prefixes.
+// mUpdateCompletions contains 32-byte completions which were
+// contained in updates. They are retrieved from HashStore/.sbtore
+// on startup.
+// mGetHashCache contains 32-byte completions which were
+// returned from the gethash server. They are not serialized,
+// only cached until the next update.
+
+// Name of the persistent PrefixSet storage
+#define PREFIXSET_SUFFIX  ".pset"
+
+// MOZ_LOG=UrlClassifierDbService:5
+extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
+#define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
+#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
+
+namespace mozilla {
+namespace safebrowsing {
+
+const int LookupCacheV2::VER = 2;
+
+LookupCache::LookupCache(const nsACString& aTableName,
+                         const nsACString& aProvider,
+                         nsIFile* aRootStoreDir)
+  : mPrimed(false)
+  , mTableName(aTableName)
+  , mProvider(aProvider)
+  , mRootStoreDirectory(aRootStoreDir)
+{
+  UpdateRootDirHandle(mRootStoreDirectory);
+}
+
+nsresult
+LookupCache::Open()
+{
+  LOG(("Loading PrefixSet"));
+  nsresult rv = LoadPrefixSet();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+nsresult
+LookupCache::UpdateRootDirHandle(nsIFile* aNewRootStoreDirectory)
+{
+  nsresult rv;
+
+  if (aNewRootStoreDirectory != mRootStoreDirectory) {
+    rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
+    NS_ENSURE_SUCCESS(rv, rv);
+  }
+
+  rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory,
+                                            mTableName,
+                                            mProvider,
+                                            getter_AddRefs(mStoreDirectory));
+
+  if (NS_FAILED(rv)) {
+    LOG(("Failed to get private store directory for %s", mTableName.get()));
+    mStoreDirectory = mRootStoreDirectory;
+  }
+
+  if (LOG_ENABLED()) {
+    nsString path;
+    mStoreDirectory->GetPath(path);
+    LOG(("Private store directory for %s is %s", mTableName.get(),
+                                                 NS_ConvertUTF16toUTF8(path).get()));
+  }
+
+  return rv;
+}
+
+nsresult
+LookupCache::Reset()
+{
+  LOG(("LookupCache resetting"));
+
+  nsCOMPtr<nsIFile> prefixsetFile;
+  nsresult rv = mStoreDirectory->Clone(getter_AddRefs(prefixsetFile));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = prefixsetFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = prefixsetFile->Remove(false);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  ClearAll();
+
+  return NS_OK;
+}
+
+nsresult
+LookupCache::AddCompletionsToCache(AddCompleteArray& aAddCompletes)
+{
+  for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
+    if (mGetHashCache.BinaryIndexOf(aAddCompletes[i].CompleteHash()) == mGetHashCache.NoIndex) {
+      mGetHashCache.AppendElement(aAddCompletes[i].CompleteHash());
+    }
+  }
+  mGetHashCache.Sort();
+
+  return NS_OK;
+}
+
+#if defined(DEBUG)
+void
+LookupCache::DumpCache()
+{
+  if (!LOG_ENABLED())
+    return;
+
+  for (uint32_t i = 0; i < mGetHashCache.Length(); i++) {
+    nsAutoCString str;
+    mGetHashCache[i].ToHexString(str);
+    LOG(("Caches: %s", str.get()));
+  }
+}
+#endif
+
+nsresult
+LookupCache::WriteFile()
+{
+  if (nsUrlClassifierDBService::ShutdownHasStarted()) {
+    return NS_ERROR_ABORT;
+  }
+
+  nsCOMPtr<nsIFile> psFile;
+  nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = StoreToFile(psFile);
+  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset");
+
+  return NS_OK;
+}
+
+void
+LookupCache::ClearAll()
+{
+  ClearCache();
+  ClearPrefixes();
+  mPrimed = false;
+}
+
+void
+LookupCache::ClearCache()
+{
+  mGetHashCache.Clear();
+}
+
+/* static */ bool
+LookupCache::IsCanonicalizedIP(const nsACString& aHost)
+{
+  // The canonicalization process will have left IP addresses in dotted
+  // decimal with no surprises.
+  uint32_t i1, i2, i3, i4;
+  char c;
+  if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
+                &i1, &i2, &i3, &i4, &c) == 4) {
+    return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
+  }
+
+  return false;
+}
+
+/* static */ nsresult
+LookupCache::GetLookupFragments(const nsACString& aSpec,
+                                nsTArray<nsCString>* aFragments)
+
+{
+  aFragments->Clear();
+
+  nsACString::const_iterator begin, end, iter;
+  aSpec.BeginReading(begin);
+  aSpec.EndReading(end);
+
+  iter = begin;
+  if (!FindCharInReadable('/', iter, end)) {
+    return NS_OK;
+  }
+
+  const nsCSubstring& host = Substring(begin, iter++);
+  nsAutoCString path;
+  path.Assign(Substring(iter, end));
+
+  /**
+   * From the protocol doc:
+   * For the hostname, the client will try at most 5 different strings.  They
+   * are:
+   * a) The exact hostname of the url
+   * b) The 4 hostnames formed by starting with the last 5 components and
+   *    successivly removing the leading component.  The top-level component
+   *    can be skipped. This is not done if the hostname is a numerical IP.
+   */
+  nsTArray<nsCString> hosts;
+  hosts.AppendElement(host);
+
+  if (!IsCanonicalizedIP(host)) {
+    host.BeginReading(begin);
+    host.EndReading(end);
+    int numHostComponents = 0;
+    while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
+           numHostComponents < MAX_HOST_COMPONENTS) {
+      // don't bother checking toplevel domains
+      if (++numHostComponents >= 2) {
+        host.EndReading(iter);
+        hosts.AppendElement(Substring(end, iter));
+      }
+      end = begin;
+      host.BeginReading(begin);
+    }
+  }
+
+  /**
+   * From the protocol doc:
+   * For the path, the client will also try at most 6 different strings.
+   * They are:
+   * a) the exact path of the url, including query parameters
+   * b) the exact path of the url, without query parameters
+   * c) the 4 paths formed by starting at the root (/) and
+   *    successively appending path components, including a trailing
+   *    slash.  This behavior should only extend up to the next-to-last
+   *    path component, that is, a trailing slash should never be
+   *    appended that was not present in the original url.
+   */
+  nsTArray<nsCString> paths;
+  nsAutoCString pathToAdd;
+
+  path.BeginReading(begin);
+  path.EndReading(end);
+  iter = begin;
+  if (FindCharInReadable('?', iter, end)) {
+    pathToAdd = Substring(begin, iter);
+    paths.AppendElement(pathToAdd);
+    end = iter;
+  }
+
+  int numPathComponents = 1;
+  iter = begin;
+  while (FindCharInReadable('/', iter, end) &&
+         numPathComponents < MAX_PATH_COMPONENTS) {
+    iter++;
+    pathToAdd.Assign(Substring(begin, iter));
+    paths.AppendElement(pathToAdd);
+    numPathComponents++;
+  }
+
+  // If we haven't already done so, add the full path
+  if (!pathToAdd.Equals(path)) {
+    paths.AppendElement(path);
+  }
+  // Check an empty path (for whole-domain blacklist entries)
+  paths.AppendElement(EmptyCString());
+
+  for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
+    for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
+      nsCString key;
+      key.Assign(hosts[hostIndex]);
+      key.Append('/');
+      key.Append(paths[pathIndex]);
+      LOG(("Checking fragment %s", key.get()));
+
+      aFragments->AppendElement(key);
+    }
+  }
+
+  return NS_OK;
+}
+
+/* static */ nsresult
+LookupCache::GetHostKeys(const nsACString& aSpec,
+                         nsTArray<nsCString>* aHostKeys)
+{
+  nsACString::const_iterator begin, end, iter;
+  aSpec.BeginReading(begin);
+  aSpec.EndReading(end);
+
+  iter = begin;
+  if (!FindCharInReadable('/', iter, end)) {
+    return NS_OK;
+  }
+
+  const nsCSubstring& host = Substring(begin, iter);
+
+  if (IsCanonicalizedIP(host)) {
+    nsCString *key = aHostKeys->AppendElement();
+    if (!key)
+      return NS_ERROR_OUT_OF_MEMORY;
+
+    key->Assign(host);
+    key->Append("/");
+    return NS_OK;
+  }
+
+  nsTArray<nsCString> hostComponents;
+  ParseString(PromiseFlatCString(host), '.', hostComponents);
+
+  if (hostComponents.Length() < 2) {
+    // no host or toplevel host, this won't match anything in the db
+    return NS_OK;
+  }
+
+  // First check with two domain components
+  int32_t last = int32_t(hostComponents.Length()) - 1;
+  nsCString *lookupHost = aHostKeys->AppendElement();
+  if (!lookupHost)
+    return NS_ERROR_OUT_OF_MEMORY;
+
+  lookupHost->Assign(hostComponents[last - 1]);
+  lookupHost->Append(".");
+  lookupHost->Append(hostComponents[last]);
+  lookupHost->Append("/");
+
+  // Now check with three domain components
+  if (hostComponents.Length() > 2) {
+    nsCString *lookupHost2 = aHostKeys->AppendElement();
+    if (!lookupHost2)
+      return NS_ERROR_OUT_OF_MEMORY;
+    lookupHost2->Assign(hostComponents[last - 2]);
+    lookupHost2->Append(".");
+    lookupHost2->Append(*lookupHost);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+LookupCache::LoadPrefixSet()
+{
+  nsCOMPtr<nsIFile> psFile;
+  nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  bool exists;
+  rv = psFile->Exists(&exists);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  if (exists) {
+    LOG(("stored PrefixSet exists, loading from disk"));
+    rv = LoadFromFile(psFile);
+    if (NS_FAILED(rv)) {
+      if (rv == NS_ERROR_FILE_CORRUPTED) {
+        Reset();
+      }
+      return rv;
+    }
+    mPrimed = true;
+  } else {
+    LOG(("no (usable) stored PrefixSet found"));
+  }
+
+#ifdef DEBUG
+  if (mPrimed) {
+    uint32_t size = SizeOfPrefixSet();
+    LOG(("SB tree done, size = %d bytes\n", size));
+  }
+#endif
+
+  return NS_OK;
+}
+
+nsresult
+LookupCacheV2::Init()
+{
+  mPrefixSet = new nsUrlClassifierPrefixSet();
+  nsresult rv = mPrefixSet->Init(mTableName);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+nsresult
+LookupCacheV2::Open()
+{
+  nsresult rv = LookupCache::Open();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  LOG(("Reading Completions"));
+  rv = ReadCompletions();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  return NS_OK;
+}
+
+void
+LookupCacheV2::ClearAll()
+{
+  LookupCache::ClearAll();
+  mUpdateCompletions.Clear();
+}
+
+nsresult
+LookupCacheV2::Has(const Completion& aCompletion,
+                   bool* aHas, bool* aComplete)
+{
+  *aHas = *aComplete = false;
+
+  uint32_t prefix = aCompletion.ToUint32();
+
+  bool found;
+  nsresult rv = mPrefixSet->Contains(prefix, &found);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  LOG(("Probe in %s: %X, found %d", mTableName.get(), prefix, found));
+
+  if (found) {
+    *aHas = true;
+  }
+
+  if ((mGetHashCache.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) ||
+      (mUpdateCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex)) {
+    LOG(("Complete in %s", mTableName.get()));
+    *aComplete = true;
+    *aHas = true;
+  }
+
+  return NS_OK;
+}
+
+nsresult
+LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
+                     AddCompleteArray& aAddCompletes)
+{
+  Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
+                        static_cast<uint32_t>(aAddCompletes.Length()));
+
+  mUpdateCompletions.Clear();
+  mUpdateCompletions.SetCapacity(aAddCompletes.Length());
+  for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
+    mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash());
+  }
+  aAddCompletes.Clear();
+  mUpdateCompletions.Sort();
+
+  Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
+                        static_cast<uint32_t>(aAddPrefixes.Length()));
+
+  nsresult rv = ConstructPrefixSet(aAddPrefixes);
+  NS_ENSURE_SUCCESS(rv, rv);
+  mPrimed = true;
+
+  return NS_OK;
+}
+
+nsresult
+LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes)
+{
+  if (!mPrimed) {
+    // This can happen if its a new table, so no error.
+    LOG(("GetPrefixes from empty LookupCache"));
+    return NS_OK;
+  }
+  return mPrefixSet->GetPrefixesNative(aAddPrefixes);
+}
+
+nsresult
+LookupCacheV2::ReadCompletions()
+{
+  HashStore store(mTableName, mProvider, mRootStoreDirectory);
+
+  nsresult rv = store.Open();
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  mUpdateCompletions.Clear();
+
+  const AddCompleteArray& addComplete = store.AddCompletes();
+  for (uint32_t i = 0; i < addComplete.Length(); i++) {
+    mUpdateCompletions.AppendElement(addComplete[i].complete);
+  }
+
+  return NS_OK;
+}
+
+nsresult
+LookupCacheV2::ClearPrefixes()
+{
+  return mPrefixSet->SetPrefixes(nullptr, 0);
+}
+
+nsresult
+LookupCacheV2::StoreToFile(nsIFile* aFile)
+{
+  return mPrefixSet->StoreToFile(aFile);
+}
+
+nsresult
+LookupCacheV2::LoadFromFile(nsIFile* aFile)
+{
+  return mPrefixSet->LoadFromFile(aFile);
+}
+
+size_t
+LookupCacheV2::SizeOfPrefixSet()
+{
+  return mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
+}
+
+#ifdef DEBUG
+template <class T>
+static void EnsureSorted(T* aArray)
+{
+  typename T::elem_type* start = aArray->Elements();
+  typename T::elem_type* end = aArray->Elements() + aArray->Length();
+  typename T::elem_type* iter = start;
+  typename T::elem_type* previous = start;
+
+  while (iter != end) {
+    previous = iter;
+    ++iter;
+    if (iter != end) {
+      MOZ_ASSERT(*previous <= *iter);
+    }
+  }
+  return;
+}
+#endif
+
+nsresult
+LookupCacheV2::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
+{
+  Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
+
+  nsTArray<uint32_t> array;
+  if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }
+
+  for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) {
+    array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
+  }
+  aAddPrefixes.Clear();
+
+#ifdef DEBUG
+  // PrefixSet requires sorted order
+  EnsureSorted(&array);
+#endif
+
+  // construct new one, replace old entries
+  nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
+  NS_ENSURE_SUCCESS(rv, rv);
+
+#ifdef DEBUG
+  uint32_t size;
+  size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
+  LOG(("SB tree done, size = %d bytes\n", size));
+#endif
+
+  mPrimed = true;
+
+  return NS_OK;
+}
+
+#if defined(DEBUG)
+void
+LookupCacheV2::DumpCompletions()
+{
+  if (!LOG_ENABLED())
+    return;
+
+  for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) {
+    nsAutoCString str;
+    mUpdateCompletions[i].ToHexString(str);
+    LOG(("Update: %s", str.get()));
+  }
+}
+#endif
+
+} // namespace safebrowsing
+} // namespace mozilla