diff options
Diffstat (limited to 'toolkit/components/url-classifier/LookupCacheV4.cpp')
-rw-r--r-- | toolkit/components/url-classifier/LookupCacheV4.cpp | 584 |
1 files changed, 584 insertions, 0 deletions
diff --git a/toolkit/components/url-classifier/LookupCacheV4.cpp b/toolkit/components/url-classifier/LookupCacheV4.cpp new file mode 100644 index 000000000..7258ae358 --- /dev/null +++ b/toolkit/components/url-classifier/LookupCacheV4.cpp @@ -0,0 +1,584 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "LookupCacheV4.h" +#include "HashStore.h" +#include "mozilla/Unused.h" +#include <string> + +// MOZ_LOG=UrlClassifierDbService:5 +extern mozilla::LazyLogModule gUrlClassifierDbServiceLog; +#define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args) +#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug) + +#define METADATA_SUFFIX NS_LITERAL_CSTRING(".metadata") + +namespace mozilla { +namespace safebrowsing { + +const int LookupCacheV4::VER = 4; + +// Prefixes coming from updates and VLPrefixSet are both stored in the HashTable +// where the (key, value) pair is a prefix size and a lexicographic-sorted string. +// The difference is prefixes from updates use std:string(to avoid additional copies) +// and prefixes from VLPrefixSet use nsCString. +// This class provides a common interface for the partial update algorithm to make it +// easier to operate on two different kind prefix string map.. +class VLPrefixSet +{ +public: + explicit VLPrefixSet(const PrefixStringMap& aMap); + explicit VLPrefixSet(const TableUpdateV4::PrefixStdStringMap& aMap); + + // This function will merge the prefix map in VLPrefixSet to aPrefixMap. + void Merge(PrefixStringMap& aPrefixMap); + + // Find the smallest string from the map in VLPrefixSet. + bool GetSmallestPrefix(nsDependentCSubstring& aOutString); + + // Return the number of prefixes in the map + uint32_t Count() const { return mCount; } + +private: + // PrefixString structure contains a lexicographic-sorted string with + // a |pos| variable to indicate which substring we are pointing to right now. + // |pos| increases each time GetSmallestPrefix finds the smallest string. + struct PrefixString { + PrefixString(const nsACString& aStr, uint32_t aSize) + : pos(0) + , size(aSize) + { + data.Rebind(aStr.BeginReading(), aStr.Length()); + } + + const char* get() { + return pos < data.Length() ? data.BeginReading() + pos : nullptr; + } + void next() { pos += size; } + uint32_t remaining() { return data.Length() - pos; } + + nsDependentCSubstring data; + uint32_t pos; + uint32_t size; + }; + + nsClassHashtable<nsUint32HashKey, PrefixString> mMap; + uint32_t mCount; +}; + +nsresult +LookupCacheV4::Init() +{ + mVLPrefixSet = new VariableLengthPrefixSet(); + nsresult rv = mVLPrefixSet->Init(mTableName); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult +LookupCacheV4::Has(const Completion& aCompletion, + bool* aHas, bool* aComplete) +{ + *aHas = false; + + uint32_t length = 0; + nsDependentCSubstring fullhash; + fullhash.Rebind((const char *)aCompletion.buf, COMPLETE_SIZE); + + nsresult rv = mVLPrefixSet->Matches(fullhash, &length); + NS_ENSURE_SUCCESS(rv, rv); + + *aHas = length >= PREFIX_SIZE; + *aComplete = length == COMPLETE_SIZE; + + if (LOG_ENABLED()) { + uint32_t prefix = aCompletion.ToUint32(); + LOG(("Probe in V4 %s: %X, found %d, complete %d", mTableName.get(), + prefix, *aHas, *aComplete)); + } + + return NS_OK; +} + +nsresult +LookupCacheV4::Build(PrefixStringMap& aPrefixMap) +{ + return mVLPrefixSet->SetPrefixes(aPrefixMap); +} + +nsresult +LookupCacheV4::GetPrefixes(PrefixStringMap& aPrefixMap) +{ + return mVLPrefixSet->GetPrefixes(aPrefixMap); +} + +nsresult +LookupCacheV4::ClearPrefixes() +{ + // Clear by seting a empty map + PrefixStringMap map; + return mVLPrefixSet->SetPrefixes(map); +} + +nsresult +LookupCacheV4::StoreToFile(nsIFile* aFile) +{ + return mVLPrefixSet->StoreToFile(aFile); +} + +nsresult +LookupCacheV4::LoadFromFile(nsIFile* aFile) +{ + nsresult rv = mVLPrefixSet->LoadFromFile(aFile); + if (NS_FAILED(rv)) { + return rv; + } + + nsCString state, checksum; + rv = LoadMetadata(state, checksum); + if (NS_FAILED(rv)) { + return rv; + } + + rv = VerifyChecksum(checksum); + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_VLPS_LOAD_CORRUPT, + rv == NS_ERROR_FILE_CORRUPTED); + + return rv; +} + +size_t +LookupCacheV4::SizeOfPrefixSet() +{ + return mVLPrefixSet->SizeOfIncludingThis(moz_malloc_size_of); +} + +static void +AppendPrefixToMap(PrefixStringMap& prefixes, nsDependentCSubstring& prefix) +{ + if (!prefix.Length()) { + return; + } + + nsCString* prefixString = prefixes.LookupOrAdd(prefix.Length()); + prefixString->Append(prefix.BeginReading(), prefix.Length()); +} + +// Read prefix into a buffer and also update the hash which +// keeps track of the checksum +static void +UpdateChecksum(nsICryptoHash* aCrypto, const nsACString& aPrefix) +{ + MOZ_ASSERT(aCrypto); + aCrypto->Update(reinterpret_cast<uint8_t*>(const_cast<char*>( + aPrefix.BeginReading())), + aPrefix.Length()); +} + +// Please see https://bug1287058.bmoattachments.org/attachment.cgi?id=8795366 +// for detail about partial update algorithm. +nsresult +LookupCacheV4::ApplyUpdate(TableUpdateV4* aTableUpdate, + PrefixStringMap& aInputMap, + PrefixStringMap& aOutputMap) +{ + MOZ_ASSERT(aOutputMap.IsEmpty()); + + nsCOMPtr<nsICryptoHash> crypto; + nsresult rv = InitCrypto(crypto); + if (NS_FAILED(rv)) { + return rv; + } + + // oldPSet contains prefixes we already have or we just merged last round. + // addPSet contains prefixes stored in tableUpdate which should be merged with oldPSet. + VLPrefixSet oldPSet(aInputMap); + VLPrefixSet addPSet(aTableUpdate->Prefixes()); + + // RemovalIndiceArray is a sorted integer array indicating the index of prefix we should + // remove from the old prefix set(according to lexigraphic order). + // |removalIndex| is the current index of RemovalIndiceArray. + // |numOldPrefixPicked| is used to record how many prefixes we picked from the old map. + TableUpdateV4::RemovalIndiceArray& removalArray = aTableUpdate->RemovalIndices(); + uint32_t removalIndex = 0; + int32_t numOldPrefixPicked = -1; + + nsDependentCSubstring smallestOldPrefix; + nsDependentCSubstring smallestAddPrefix; + + bool isOldMapEmpty = false, isAddMapEmpty = false; + + // This is used to avoid infinite loop for partial update algorithm. + // The maximum loops will be the number of old prefixes plus the number of add prefixes. + int32_t index = oldPSet.Count() + addPSet.Count() + 1; + for(;index > 0; index--) { + // Get smallest prefix from the old prefix set if we don't have one + if (smallestOldPrefix.IsEmpty() && !isOldMapEmpty) { + isOldMapEmpty = !oldPSet.GetSmallestPrefix(smallestOldPrefix); + } + + // Get smallest prefix from add prefix set if we don't have one + if (smallestAddPrefix.IsEmpty() && !isAddMapEmpty) { + isAddMapEmpty = !addPSet.GetSmallestPrefix(smallestAddPrefix); + } + + bool pickOld; + + // If both prefix sets are not empty, then compare to find the smaller one. + if (!isOldMapEmpty && !isAddMapEmpty) { + if (smallestOldPrefix == smallestAddPrefix) { + LOG(("Add prefix should not exist in the original prefix set.")); + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_UPDATE_ERROR_TYPE, + DUPLICATE_PREFIX); + return NS_ERROR_FAILURE; + } + + // Compare the smallest string in old prefix set and add prefix set, + // merge the smaller one into new map to ensure merged string still + // follows lexigraphic order. + pickOld = smallestOldPrefix < smallestAddPrefix; + } else if (!isOldMapEmpty && isAddMapEmpty) { + pickOld = true; + } else if (isOldMapEmpty && !isAddMapEmpty) { + pickOld = false; + // If both maps are empty, then partial update is complete. + } else { + break; + } + + if (pickOld) { + numOldPrefixPicked++; + + // If the number of picks from old map matches the removalIndex, then this prefix + // will be removed by not merging it to new map. + if (removalIndex < removalArray.Length() && + numOldPrefixPicked == removalArray[removalIndex]) { + removalIndex++; + } else { + AppendPrefixToMap(aOutputMap, smallestOldPrefix); + UpdateChecksum(crypto, smallestOldPrefix); + } + smallestOldPrefix.SetLength(0); + } else { + AppendPrefixToMap(aOutputMap, smallestAddPrefix); + UpdateChecksum(crypto, smallestAddPrefix); + + smallestAddPrefix.SetLength(0); + } + } + + // We expect index will be greater to 0 because max number of runs will be + // the number of original prefix plus add prefix. + if (index <= 0) { + LOG(("There are still prefixes remaining after reaching maximum runs.")); + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_UPDATE_ERROR_TYPE, + INFINITE_LOOP); + return NS_ERROR_FAILURE; + } + + if (removalIndex < removalArray.Length()) { + LOG(("There are still prefixes to remove after exhausting the old PrefixSet.")); + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_UPDATE_ERROR_TYPE, + WRONG_REMOVAL_INDICES); + return NS_ERROR_FAILURE; + } + + nsAutoCString checksum; + crypto->Finish(false, checksum); + if (aTableUpdate->Checksum().IsEmpty()) { + LOG(("Update checksum missing.")); + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_UPDATE_ERROR_TYPE, + MISSING_CHECKSUM); + + // Generate our own checksum to tableUpdate to ensure there is always + // checksum in .metadata + std::string stdChecksum(checksum.BeginReading(), checksum.Length()); + aTableUpdate->NewChecksum(stdChecksum); + + } else if (aTableUpdate->Checksum() != checksum){ + LOG(("Checksum mismatch after applying partial update")); + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_UPDATE_ERROR_TYPE, + CHECKSUM_MISMATCH); + return NS_ERROR_FAILURE; + } + + return NS_OK; +} + +nsresult +LookupCacheV4::InitCrypto(nsCOMPtr<nsICryptoHash>& aCrypto) +{ + nsresult rv; + aCrypto = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv); + if (NS_WARN_IF(NS_FAILED(rv))) { + return rv; + } + + rv = aCrypto->Init(nsICryptoHash::SHA256); + NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "InitCrypto failed"); + + return rv; +} + +nsresult +LookupCacheV4::VerifyChecksum(const nsACString& aChecksum) +{ + nsCOMPtr<nsICryptoHash> crypto; + nsresult rv = InitCrypto(crypto); + if (NS_FAILED(rv)) { + return rv; + } + + PrefixStringMap map; + mVLPrefixSet->GetPrefixes(map); + + VLPrefixSet loadPSet(map); + uint32_t index = loadPSet.Count() + 1; + for(;index > 0; index--) { + nsDependentCSubstring prefix; + if (!loadPSet.GetSmallestPrefix(prefix)) { + break; + } + UpdateChecksum(crypto, prefix); + } + + nsAutoCString checksum; + crypto->Finish(false, checksum); + + if (checksum != aChecksum) { + LOG(("Checksum mismatch when loading prefixes from file.")); + return NS_ERROR_FILE_CORRUPTED; + } + + return NS_OK; +} + +////////////////////////////////////////////////////////////////////////// +// A set of lightweight functions for reading/writing value from/to file. + +namespace { + +template<typename T> +struct ValueTraits +{ + static uint32_t Length(const T& aValue) { return sizeof(T); } + static char* WritePtr(T& aValue, uint32_t aLength) { return (char*)&aValue; } + static const char* ReadPtr(const T& aValue) { return (char*)&aValue; } + static bool IsFixedLength() { return true; } +}; + +template<> +struct ValueTraits<nsACString> +{ + static bool IsFixedLength() { return false; } + + static uint32_t Length(const nsACString& aValue) + { + return aValue.Length(); + } + + static char* WritePtr(nsACString& aValue, uint32_t aLength) + { + aValue.SetLength(aLength); + return aValue.BeginWriting(); + } + + static const char* ReadPtr(const nsACString& aValue) + { + return aValue.BeginReading(); + } +}; + +template<typename T> static nsresult +WriteValue(nsIOutputStream *aOutputStream, const T& aValue) +{ + uint32_t writeLength = ValueTraits<T>::Length(aValue); + if (!ValueTraits<T>::IsFixedLength()) { + // We need to write out the variable value length. + nsresult rv = WriteValue(aOutputStream, writeLength); + NS_ENSURE_SUCCESS(rv, rv); + } + + // Write out the value. + auto valueReadPtr = ValueTraits<T>::ReadPtr(aValue); + uint32_t written; + nsresult rv = aOutputStream->Write(valueReadPtr, writeLength, &written); + if (NS_FAILED(rv) || written != writeLength) { + LOG(("Failed to write the value.")); + return NS_FAILED(rv) ? rv : NS_ERROR_FAILURE; + } + + return rv; +} + +template<typename T> static nsresult +ReadValue(nsIInputStream* aInputStream, T& aValue) +{ + nsresult rv; + + uint32_t readLength; + if (ValueTraits<T>::IsFixedLength()) { + readLength = ValueTraits<T>::Length(aValue); + } else { + // Read the variable value length from file. + nsresult rv = ReadValue(aInputStream, readLength); + NS_ENSURE_SUCCESS(rv, rv); + } + + // Read the value. + uint32_t read; + auto valueWritePtr = ValueTraits<T>::WritePtr(aValue, readLength); + rv = aInputStream->Read(valueWritePtr, readLength, &read); + if (NS_FAILED(rv) || read != readLength) { + LOG(("Failed to read the value.")); + return NS_FAILED(rv) ? rv : NS_ERROR_FAILURE; + } + + return rv; +} + +} // end of unnamed namespace. +//////////////////////////////////////////////////////////////////////// + +nsresult +LookupCacheV4::WriteMetadata(TableUpdateV4* aTableUpdate) +{ + NS_ENSURE_ARG_POINTER(aTableUpdate); + if (nsUrlClassifierDBService::ShutdownHasStarted()) { + return NS_ERROR_ABORT; + } + + nsCOMPtr<nsIFile> metaFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(metaFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = metaFile->AppendNative(mTableName + METADATA_SUFFIX); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr<nsIOutputStream> outputStream; + rv = NS_NewLocalFileOutputStream(getter_AddRefs(outputStream), metaFile, + PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE); + if (!NS_SUCCEEDED(rv)) { + LOG(("Unable to create file to store metadata.")); + return rv; + } + + // Write the state. + rv = WriteValue(outputStream, aTableUpdate->ClientState()); + if (NS_FAILED(rv)) { + LOG(("Failed to write the list state.")); + return rv; + } + + // Write the checksum. + rv = WriteValue(outputStream, aTableUpdate->Checksum()); + if (NS_FAILED(rv)) { + LOG(("Failed to write the list checksum.")); + return rv; + } + + return rv; +} + +nsresult +LookupCacheV4::LoadMetadata(nsACString& aState, nsACString& aChecksum) +{ + nsCOMPtr<nsIFile> metaFile; + nsresult rv = mStoreDirectory->Clone(getter_AddRefs(metaFile)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = metaFile->AppendNative(mTableName + METADATA_SUFFIX); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr<nsIInputStream> localInFile; + rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), metaFile, + PR_RDONLY | nsIFile::OS_READAHEAD); + if (NS_FAILED(rv)) { + LOG(("Unable to open metadata file.")); + return rv; + } + + // Read the list state. + rv = ReadValue(localInFile, aState); + if (NS_FAILED(rv)) { + LOG(("Failed to read state.")); + return rv; + } + + // Read the checksum. + rv = ReadValue(localInFile, aChecksum); + if (NS_FAILED(rv)) { + LOG(("Failed to read checksum.")); + return rv; + } + + return rv; +} + +VLPrefixSet::VLPrefixSet(const PrefixStringMap& aMap) + : mCount(0) +{ + for (auto iter = aMap.ConstIter(); !iter.Done(); iter.Next()) { + uint32_t size = iter.Key(); + mMap.Put(size, new PrefixString(*iter.Data(), size)); + mCount += iter.Data()->Length() / size; + } +} + +VLPrefixSet::VLPrefixSet(const TableUpdateV4::PrefixStdStringMap& aMap) + : mCount(0) +{ + for (auto iter = aMap.ConstIter(); !iter.Done(); iter.Next()) { + uint32_t size = iter.Key(); + mMap.Put(size, new PrefixString(iter.Data()->GetPrefixString(), size)); + mCount += iter.Data()->GetPrefixString().Length() / size; + } +} + +void +VLPrefixSet::Merge(PrefixStringMap& aPrefixMap) { + for (auto iter = mMap.ConstIter(); !iter.Done(); iter.Next()) { + nsCString* prefixString = aPrefixMap.LookupOrAdd(iter.Key()); + PrefixString* str = iter.Data(); + + if (str->get()) { + prefixString->Append(str->get(), str->remaining()); + } + } +} + +bool +VLPrefixSet::GetSmallestPrefix(nsDependentCSubstring& aOutString) { + PrefixString* pick = nullptr; + for (auto iter = mMap.ConstIter(); !iter.Done(); iter.Next()) { + PrefixString* str = iter.Data(); + + if (!str->get()) { + continue; + } + + if (aOutString.IsEmpty()) { + aOutString.Rebind(str->get(), iter.Key()); + pick = str; + continue; + } + + nsDependentCSubstring cur(str->get(), iter.Key()); + if (cur < aOutString) { + aOutString.Rebind(str->get(), iter.Key()); + pick = str; + } + } + + if (pick) { + pick->next(); + } + + return pick != nullptr; +} + +} // namespace safebrowsing +} // namespace mozilla |