summaryrefslogtreecommitdiffstats
path: root/toolkit/components/url-classifier/LookupCache.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/url-classifier/LookupCache.cpp')
-rw-r--r--toolkit/components/url-classifier/LookupCache.cpp599
1 files changed, 599 insertions, 0 deletions
diff --git a/toolkit/components/url-classifier/LookupCache.cpp b/toolkit/components/url-classifier/LookupCache.cpp
new file mode 100644
index 000000000..5a3b1e36d
--- /dev/null
+++ b/toolkit/components/url-classifier/LookupCache.cpp
@@ -0,0 +1,599 @@
+//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LookupCache.h"
+#include "HashStore.h"
+#include "nsISeekableStream.h"
+#include "mozilla/Telemetry.h"
+#include "mozilla/Logging.h"
+#include "nsNetUtil.h"
+#include "prprf.h"
+#include "Classifier.h"
+
+// We act as the main entry point for all the real lookups,
+// so note that those are not done to the actual HashStore.
+// The latter solely exists to store the data needed to handle
+// the updates from the protocol.
+
+// This module provides a front for PrefixSet, mUpdateCompletions,
+// and mGetHashCache, which together contain everything needed to
+// provide a classification as long as the data is up to date.
+
+// PrefixSet stores and provides lookups for 4-byte prefixes.
+// mUpdateCompletions contains 32-byte completions which were
+// contained in updates. They are retrieved from HashStore/.sbtore
+// on startup.
+// mGetHashCache contains 32-byte completions which were
+// returned from the gethash server. They are not serialized,
+// only cached until the next update.
+
+// Name of the persistent PrefixSet storage
+#define PREFIXSET_SUFFIX ".pset"
+
+// MOZ_LOG=UrlClassifierDbService:5
+extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
+#define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
+#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
+
+namespace mozilla {
+namespace safebrowsing {
+
+const int LookupCacheV2::VER = 2;
+
+LookupCache::LookupCache(const nsACString& aTableName,
+ const nsACString& aProvider,
+ nsIFile* aRootStoreDir)
+ : mPrimed(false)
+ , mTableName(aTableName)
+ , mProvider(aProvider)
+ , mRootStoreDirectory(aRootStoreDir)
+{
+ UpdateRootDirHandle(mRootStoreDirectory);
+}
+
+nsresult
+LookupCache::Open()
+{
+ LOG(("Loading PrefixSet"));
+ nsresult rv = LoadPrefixSet();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return NS_OK;
+}
+
+nsresult
+LookupCache::UpdateRootDirHandle(nsIFile* aNewRootStoreDirectory)
+{
+ nsresult rv;
+
+ if (aNewRootStoreDirectory != mRootStoreDirectory) {
+ rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory,
+ mTableName,
+ mProvider,
+ getter_AddRefs(mStoreDirectory));
+
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to get private store directory for %s", mTableName.get()));
+ mStoreDirectory = mRootStoreDirectory;
+ }
+
+ if (LOG_ENABLED()) {
+ nsString path;
+ mStoreDirectory->GetPath(path);
+ LOG(("Private store directory for %s is %s", mTableName.get(),
+ NS_ConvertUTF16toUTF8(path).get()));
+ }
+
+ return rv;
+}
+
+nsresult
+LookupCache::Reset()
+{
+ LOG(("LookupCache resetting"));
+
+ nsCOMPtr<nsIFile> prefixsetFile;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(prefixsetFile));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = prefixsetFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = prefixsetFile->Remove(false);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ ClearAll();
+
+ return NS_OK;
+}
+
+nsresult
+LookupCache::AddCompletionsToCache(AddCompleteArray& aAddCompletes)
+{
+ for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
+ if (mGetHashCache.BinaryIndexOf(aAddCompletes[i].CompleteHash()) == mGetHashCache.NoIndex) {
+ mGetHashCache.AppendElement(aAddCompletes[i].CompleteHash());
+ }
+ }
+ mGetHashCache.Sort();
+
+ return NS_OK;
+}
+
+#if defined(DEBUG)
+void
+LookupCache::DumpCache()
+{
+ if (!LOG_ENABLED())
+ return;
+
+ for (uint32_t i = 0; i < mGetHashCache.Length(); i++) {
+ nsAutoCString str;
+ mGetHashCache[i].ToHexString(str);
+ LOG(("Caches: %s", str.get()));
+ }
+}
+#endif
+
+nsresult
+LookupCache::WriteFile()
+{
+ if (nsUrlClassifierDBService::ShutdownHasStarted()) {
+ return NS_ERROR_ABORT;
+ }
+
+ nsCOMPtr<nsIFile> psFile;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = StoreToFile(psFile);
+ NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset");
+
+ return NS_OK;
+}
+
+void
+LookupCache::ClearAll()
+{
+ ClearCache();
+ ClearPrefixes();
+ mPrimed = false;
+}
+
+void
+LookupCache::ClearCache()
+{
+ mGetHashCache.Clear();
+}
+
+/* static */ bool
+LookupCache::IsCanonicalizedIP(const nsACString& aHost)
+{
+ // The canonicalization process will have left IP addresses in dotted
+ // decimal with no surprises.
+ uint32_t i1, i2, i3, i4;
+ char c;
+ if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
+ &i1, &i2, &i3, &i4, &c) == 4) {
+ return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
+ }
+
+ return false;
+}
+
+/* static */ nsresult
+LookupCache::GetLookupFragments(const nsACString& aSpec,
+ nsTArray<nsCString>* aFragments)
+
+{
+ aFragments->Clear();
+
+ nsACString::const_iterator begin, end, iter;
+ aSpec.BeginReading(begin);
+ aSpec.EndReading(end);
+
+ iter = begin;
+ if (!FindCharInReadable('/', iter, end)) {
+ return NS_OK;
+ }
+
+ const nsCSubstring& host = Substring(begin, iter++);
+ nsAutoCString path;
+ path.Assign(Substring(iter, end));
+
+ /**
+ * From the protocol doc:
+ * For the hostname, the client will try at most 5 different strings. They
+ * are:
+ * a) The exact hostname of the url
+ * b) The 4 hostnames formed by starting with the last 5 components and
+ * successivly removing the leading component. The top-level component
+ * can be skipped. This is not done if the hostname is a numerical IP.
+ */
+ nsTArray<nsCString> hosts;
+ hosts.AppendElement(host);
+
+ if (!IsCanonicalizedIP(host)) {
+ host.BeginReading(begin);
+ host.EndReading(end);
+ int numHostComponents = 0;
+ while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
+ numHostComponents < MAX_HOST_COMPONENTS) {
+ // don't bother checking toplevel domains
+ if (++numHostComponents >= 2) {
+ host.EndReading(iter);
+ hosts.AppendElement(Substring(end, iter));
+ }
+ end = begin;
+ host.BeginReading(begin);
+ }
+ }
+
+ /**
+ * From the protocol doc:
+ * For the path, the client will also try at most 6 different strings.
+ * They are:
+ * a) the exact path of the url, including query parameters
+ * b) the exact path of the url, without query parameters
+ * c) the 4 paths formed by starting at the root (/) and
+ * successively appending path components, including a trailing
+ * slash. This behavior should only extend up to the next-to-last
+ * path component, that is, a trailing slash should never be
+ * appended that was not present in the original url.
+ */
+ nsTArray<nsCString> paths;
+ nsAutoCString pathToAdd;
+
+ path.BeginReading(begin);
+ path.EndReading(end);
+ iter = begin;
+ if (FindCharInReadable('?', iter, end)) {
+ pathToAdd = Substring(begin, iter);
+ paths.AppendElement(pathToAdd);
+ end = iter;
+ }
+
+ int numPathComponents = 1;
+ iter = begin;
+ while (FindCharInReadable('/', iter, end) &&
+ numPathComponents < MAX_PATH_COMPONENTS) {
+ iter++;
+ pathToAdd.Assign(Substring(begin, iter));
+ paths.AppendElement(pathToAdd);
+ numPathComponents++;
+ }
+
+ // If we haven't already done so, add the full path
+ if (!pathToAdd.Equals(path)) {
+ paths.AppendElement(path);
+ }
+ // Check an empty path (for whole-domain blacklist entries)
+ paths.AppendElement(EmptyCString());
+
+ for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
+ for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
+ nsCString key;
+ key.Assign(hosts[hostIndex]);
+ key.Append('/');
+ key.Append(paths[pathIndex]);
+ LOG(("Checking fragment %s", key.get()));
+
+ aFragments->AppendElement(key);
+ }
+ }
+
+ return NS_OK;
+}
+
+/* static */ nsresult
+LookupCache::GetHostKeys(const nsACString& aSpec,
+ nsTArray<nsCString>* aHostKeys)
+{
+ nsACString::const_iterator begin, end, iter;
+ aSpec.BeginReading(begin);
+ aSpec.EndReading(end);
+
+ iter = begin;
+ if (!FindCharInReadable('/', iter, end)) {
+ return NS_OK;
+ }
+
+ const nsCSubstring& host = Substring(begin, iter);
+
+ if (IsCanonicalizedIP(host)) {
+ nsCString *key = aHostKeys->AppendElement();
+ if (!key)
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ key->Assign(host);
+ key->Append("/");
+ return NS_OK;
+ }
+
+ nsTArray<nsCString> hostComponents;
+ ParseString(PromiseFlatCString(host), '.', hostComponents);
+
+ if (hostComponents.Length() < 2) {
+ // no host or toplevel host, this won't match anything in the db
+ return NS_OK;
+ }
+
+ // First check with two domain components
+ int32_t last = int32_t(hostComponents.Length()) - 1;
+ nsCString *lookupHost = aHostKeys->AppendElement();
+ if (!lookupHost)
+ return NS_ERROR_OUT_OF_MEMORY;
+
+ lookupHost->Assign(hostComponents[last - 1]);
+ lookupHost->Append(".");
+ lookupHost->Append(hostComponents[last]);
+ lookupHost->Append("/");
+
+ // Now check with three domain components
+ if (hostComponents.Length() > 2) {
+ nsCString *lookupHost2 = aHostKeys->AppendElement();
+ if (!lookupHost2)
+ return NS_ERROR_OUT_OF_MEMORY;
+ lookupHost2->Assign(hostComponents[last - 2]);
+ lookupHost2->Append(".");
+ lookupHost2->Append(*lookupHost);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+LookupCache::LoadPrefixSet()
+{
+ nsCOMPtr<nsIFile> psFile;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ bool exists;
+ rv = psFile->Exists(&exists);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (exists) {
+ LOG(("stored PrefixSet exists, loading from disk"));
+ rv = LoadFromFile(psFile);
+ if (NS_FAILED(rv)) {
+ if (rv == NS_ERROR_FILE_CORRUPTED) {
+ Reset();
+ }
+ return rv;
+ }
+ mPrimed = true;
+ } else {
+ LOG(("no (usable) stored PrefixSet found"));
+ }
+
+#ifdef DEBUG
+ if (mPrimed) {
+ uint32_t size = SizeOfPrefixSet();
+ LOG(("SB tree done, size = %d bytes\n", size));
+ }
+#endif
+
+ return NS_OK;
+}
+
+nsresult
+LookupCacheV2::Init()
+{
+ mPrefixSet = new nsUrlClassifierPrefixSet();
+ nsresult rv = mPrefixSet->Init(mTableName);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return NS_OK;
+}
+
+nsresult
+LookupCacheV2::Open()
+{
+ nsresult rv = LookupCache::Open();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ LOG(("Reading Completions"));
+ rv = ReadCompletions();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return NS_OK;
+}
+
+void
+LookupCacheV2::ClearAll()
+{
+ LookupCache::ClearAll();
+ mUpdateCompletions.Clear();
+}
+
+nsresult
+LookupCacheV2::Has(const Completion& aCompletion,
+ bool* aHas, bool* aComplete)
+{
+ *aHas = *aComplete = false;
+
+ uint32_t prefix = aCompletion.ToUint32();
+
+ bool found;
+ nsresult rv = mPrefixSet->Contains(prefix, &found);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ LOG(("Probe in %s: %X, found %d", mTableName.get(), prefix, found));
+
+ if (found) {
+ *aHas = true;
+ }
+
+ if ((mGetHashCache.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) ||
+ (mUpdateCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex)) {
+ LOG(("Complete in %s", mTableName.get()));
+ *aComplete = true;
+ *aHas = true;
+ }
+
+ return NS_OK;
+}
+
+nsresult
+LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
+ AddCompleteArray& aAddCompletes)
+{
+ Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
+ static_cast<uint32_t>(aAddCompletes.Length()));
+
+ mUpdateCompletions.Clear();
+ mUpdateCompletions.SetCapacity(aAddCompletes.Length());
+ for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
+ mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash());
+ }
+ aAddCompletes.Clear();
+ mUpdateCompletions.Sort();
+
+ Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
+ static_cast<uint32_t>(aAddPrefixes.Length()));
+
+ nsresult rv = ConstructPrefixSet(aAddPrefixes);
+ NS_ENSURE_SUCCESS(rv, rv);
+ mPrimed = true;
+
+ return NS_OK;
+}
+
+nsresult
+LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes)
+{
+ if (!mPrimed) {
+ // This can happen if its a new table, so no error.
+ LOG(("GetPrefixes from empty LookupCache"));
+ return NS_OK;
+ }
+ return mPrefixSet->GetPrefixesNative(aAddPrefixes);
+}
+
+nsresult
+LookupCacheV2::ReadCompletions()
+{
+ HashStore store(mTableName, mProvider, mRootStoreDirectory);
+
+ nsresult rv = store.Open();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ mUpdateCompletions.Clear();
+
+ const AddCompleteArray& addComplete = store.AddCompletes();
+ for (uint32_t i = 0; i < addComplete.Length(); i++) {
+ mUpdateCompletions.AppendElement(addComplete[i].complete);
+ }
+
+ return NS_OK;
+}
+
+nsresult
+LookupCacheV2::ClearPrefixes()
+{
+ return mPrefixSet->SetPrefixes(nullptr, 0);
+}
+
+nsresult
+LookupCacheV2::StoreToFile(nsIFile* aFile)
+{
+ return mPrefixSet->StoreToFile(aFile);
+}
+
+nsresult
+LookupCacheV2::LoadFromFile(nsIFile* aFile)
+{
+ return mPrefixSet->LoadFromFile(aFile);
+}
+
+size_t
+LookupCacheV2::SizeOfPrefixSet()
+{
+ return mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
+}
+
+#ifdef DEBUG
+template <class T>
+static void EnsureSorted(T* aArray)
+{
+ typename T::elem_type* start = aArray->Elements();
+ typename T::elem_type* end = aArray->Elements() + aArray->Length();
+ typename T::elem_type* iter = start;
+ typename T::elem_type* previous = start;
+
+ while (iter != end) {
+ previous = iter;
+ ++iter;
+ if (iter != end) {
+ MOZ_ASSERT(*previous <= *iter);
+ }
+ }
+ return;
+}
+#endif
+
+nsresult
+LookupCacheV2::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
+{
+ Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
+
+ nsTArray<uint32_t> array;
+ if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) {
+ array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
+ }
+ aAddPrefixes.Clear();
+
+#ifdef DEBUG
+ // PrefixSet requires sorted order
+ EnsureSorted(&array);
+#endif
+
+ // construct new one, replace old entries
+ nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
+ NS_ENSURE_SUCCESS(rv, rv);
+
+#ifdef DEBUG
+ uint32_t size;
+ size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
+ LOG(("SB tree done, size = %d bytes\n", size));
+#endif
+
+ mPrimed = true;
+
+ return NS_OK;
+}
+
+#if defined(DEBUG)
+void
+LookupCacheV2::DumpCompletions()
+{
+ if (!LOG_ENABLED())
+ return;
+
+ for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) {
+ nsAutoCString str;
+ mUpdateCompletions[i].ToHexString(str);
+ LOG(("Update: %s", str.get()));
+ }
+}
+#endif
+
+} // namespace safebrowsing
+} // namespace mozilla