diff options
Diffstat (limited to 'toolkit/components/url-classifier/HashStore.h')
-rw-r--r-- | toolkit/components/url-classifier/HashStore.h | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/toolkit/components/url-classifier/HashStore.h b/toolkit/components/url-classifier/HashStore.h new file mode 100644 index 000000000..3473b2f02 --- /dev/null +++ b/toolkit/components/url-classifier/HashStore.h @@ -0,0 +1,314 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef HashStore_h__ +#define HashStore_h__ + +#include "Entries.h" +#include "ChunkSet.h" + +#include "nsString.h" +#include "nsTArray.h" +#include "nsIFile.h" +#include "nsIFileStreams.h" +#include "nsCOMPtr.h" +#include "nsClassHashtable.h" +#include "safebrowsing.pb.h" +#include <string> + +namespace mozilla { +namespace safebrowsing { + +// The abstract class of TableUpdateV2 and TableUpdateV4. This +// is convenient for passing the TableUpdate* around associated +// with v2 and v4 instance. +class TableUpdate { +public: + TableUpdate(const nsACString& aTable) + : mTable(aTable) + { + } + + virtual ~TableUpdate() {} + + // To be overriden. + virtual bool Empty() const = 0; + + // Common interfaces. + const nsCString& TableName() const { return mTable; } + + template<typename T> + static T* Cast(TableUpdate* aThat) { + return (T::TAG == aThat->Tag() ? reinterpret_cast<T*>(aThat) : nullptr); + } + +private: + virtual int Tag() const = 0; + + nsCString mTable; +}; + +// A table update is built from a single update chunk from the server. As the +// protocol parser processes each chunk, it constructs a table update with the +// new hashes. +class TableUpdateV2 : public TableUpdate { +public: + explicit TableUpdateV2(const nsACString& aTable) + : TableUpdate(aTable) {} + + bool Empty() const override { + return mAddChunks.Length() == 0 && + mSubChunks.Length() == 0 && + mAddExpirations.Length() == 0 && + mSubExpirations.Length() == 0 && + mAddPrefixes.Length() == 0 && + mSubPrefixes.Length() == 0 && + mAddCompletes.Length() == 0 && + mSubCompletes.Length() == 0; + } + + // Throughout, uint32_t aChunk refers only to the chunk number. Chunk data is + // stored in the Prefix structures. + MOZ_MUST_USE nsresult NewAddChunk(uint32_t aChunk) { + return mAddChunks.Set(aChunk); + }; + MOZ_MUST_USE nsresult NewSubChunk(uint32_t aChunk) { + return mSubChunks.Set(aChunk); + }; + MOZ_MUST_USE nsresult NewAddExpiration(uint32_t aChunk) { + return mAddExpirations.Set(aChunk); + }; + MOZ_MUST_USE nsresult NewSubExpiration(uint32_t aChunk) { + return mSubExpirations.Set(aChunk); + }; + MOZ_MUST_USE nsresult NewAddPrefix(uint32_t aAddChunk, const Prefix& aPrefix); + MOZ_MUST_USE nsresult NewSubPrefix(uint32_t aAddChunk, + const Prefix& aPrefix, + uint32_t aSubChunk); + MOZ_MUST_USE nsresult NewAddComplete(uint32_t aChunk, + const Completion& aCompletion); + MOZ_MUST_USE nsresult NewSubComplete(uint32_t aAddChunk, + const Completion& aCompletion, + uint32_t aSubChunk); + + ChunkSet& AddChunks() { return mAddChunks; } + ChunkSet& SubChunks() { return mSubChunks; } + + // Expirations for chunks. + ChunkSet& AddExpirations() { return mAddExpirations; } + ChunkSet& SubExpirations() { return mSubExpirations; } + + // Hashes associated with this chunk. + AddPrefixArray& AddPrefixes() { return mAddPrefixes; } + SubPrefixArray& SubPrefixes() { return mSubPrefixes; } + AddCompleteArray& AddCompletes() { return mAddCompletes; } + SubCompleteArray& SubCompletes() { return mSubCompletes; } + + // For downcasting. + static const int TAG = 2; + +private: + + // The list of chunk numbers that we have for each of the type of chunks. + ChunkSet mAddChunks; + ChunkSet mSubChunks; + ChunkSet mAddExpirations; + ChunkSet mSubExpirations; + + // 4-byte sha256 prefixes. + AddPrefixArray mAddPrefixes; + SubPrefixArray mSubPrefixes; + + // 32-byte hashes. + AddCompleteArray mAddCompletes; + SubCompleteArray mSubCompletes; + + virtual int Tag() const override { return TAG; } +}; + +// Structure for DBService/HashStore/Classifiers to update. +// It would contain the prefixes (both fixed and variable length) +// for addition and indices to removal. See Bug 1283009. +class TableUpdateV4 : public TableUpdate { +public: + struct PrefixStdString { + private: + std::string mStorage; + nsDependentCSubstring mString; + + public: + explicit PrefixStdString(std::string& aString) + { + aString.swap(mStorage); + mString.Rebind(mStorage.data(), mStorage.size()); + }; + + const nsACString& GetPrefixString() const { return mString; }; + }; + + typedef nsClassHashtable<nsUint32HashKey, PrefixStdString> PrefixStdStringMap; + typedef nsTArray<int32_t> RemovalIndiceArray; + +public: + explicit TableUpdateV4(const nsACString& aTable) + : TableUpdate(aTable) + , mFullUpdate(false) + { + } + + bool Empty() const override + { + return mPrefixesMap.IsEmpty() && mRemovalIndiceArray.IsEmpty(); + } + + bool IsFullUpdate() const { return mFullUpdate; } + PrefixStdStringMap& Prefixes() { return mPrefixesMap; } + RemovalIndiceArray& RemovalIndices() { return mRemovalIndiceArray; } + const nsACString& ClientState() const { return mClientState; } + const nsACString& Checksum() const { return mChecksum; } + + // For downcasting. + static const int TAG = 4; + + void SetFullUpdate(bool aIsFullUpdate) { mFullUpdate = aIsFullUpdate; } + void NewPrefixes(int32_t aSize, std::string& aPrefixes); + void NewRemovalIndices(const uint32_t* aIndices, size_t aNumOfIndices); + void SetNewClientState(const nsACString& aState) { mClientState = aState; } + void NewChecksum(const std::string& aChecksum); + +private: + virtual int Tag() const override { return TAG; } + + bool mFullUpdate; + PrefixStdStringMap mPrefixesMap; + RemovalIndiceArray mRemovalIndiceArray; + nsCString mClientState; + nsCString mChecksum; +}; + +// There is one hash store per table. +class HashStore { +public: + HashStore(const nsACString& aTableName, + const nsACString& aProvider, + nsIFile* aRootStoreFile); + ~HashStore(); + + const nsCString& TableName() const { return mTableName; } + + nsresult Open(); + // Add Prefixes are stored partly in the PrefixSet (contains the + // Prefix data organized for fast lookup/low RAM usage) and partly in the + // HashStore (Add Chunk numbers - only used for updates, slow retrieval). + // AugmentAdds function joins the separate datasets into one complete + // prefixes+chunknumbers dataset. + nsresult AugmentAdds(const nsTArray<uint32_t>& aPrefixes); + + ChunkSet& AddChunks(); + ChunkSet& SubChunks(); + AddPrefixArray& AddPrefixes() { return mAddPrefixes; } + SubPrefixArray& SubPrefixes() { return mSubPrefixes; } + AddCompleteArray& AddCompletes(); + SubCompleteArray& SubCompletes(); + + // ======= + // Updates + // ======= + // Begin the update process. Reads the store into memory. + nsresult BeginUpdate(); + + // Imports the data from a TableUpdate. + nsresult ApplyUpdate(TableUpdate &aUpdate); + + // Process expired chunks + nsresult Expire(); + + // Rebuild the store, Incorporating all the applied updates. + nsresult Rebuild(); + + // Write the current state of the store to disk. + // If you call between ApplyUpdate() and Rebuild(), you'll + // have a mess on your hands. + nsresult WriteFile(); + + // Wipe out all Completes. + void ClearCompletes(); + +private: + nsresult Reset(); + + nsresult ReadHeader(); + nsresult SanityCheck(); + nsresult CalculateChecksum(nsAutoCString& aChecksum, uint32_t aFileSize, + bool aChecksumPresent); + nsresult CheckChecksum(uint32_t aFileSize); + void UpdateHeader(); + + nsresult ReadCompletions(); + nsresult ReadChunkNumbers(); + nsresult ReadHashes(); + + nsresult ReadAddPrefixes(); + nsresult ReadSubPrefixes(); + + nsresult WriteAddPrefixes(nsIOutputStream* aOut); + nsresult WriteSubPrefixes(nsIOutputStream* aOut); + + nsresult ProcessSubs(); + + nsresult PrepareForUpdate(); + + bool AlreadyReadChunkNumbers(); + bool AlreadyReadCompletions(); + + // This is used for checking that the database is correct and for figuring out + // the number of chunks, etc. to read from disk on restart. + struct Header { + uint32_t magic; + uint32_t version; + uint32_t numAddChunks; + uint32_t numSubChunks; + uint32_t numAddPrefixes; + uint32_t numSubPrefixes; + uint32_t numAddCompletes; + uint32_t numSubCompletes; + }; + + Header mHeader; + + // The name of the table (must end in -shavar or -digest256, or evidently + // -simple for unittesting. + nsCString mTableName; + nsCOMPtr<nsIFile> mStoreDirectory; + + bool mInUpdate; + + nsCOMPtr<nsIInputStream> mInputStream; + + // Chunk numbers, stored as uint32_t arrays. + ChunkSet mAddChunks; + ChunkSet mSubChunks; + + ChunkSet mAddExpirations; + ChunkSet mSubExpirations; + + // Chunk data for shavar tables. See Entries.h for format. + AddPrefixArray mAddPrefixes; + SubPrefixArray mSubPrefixes; + + // See bug 806422 for background. We must be able to distinguish between + // updates from the completion server and updates from the regular server. + AddCompleteArray mAddCompletes; + SubCompleteArray mSubCompletes; + + uint32_t mFileSize; + + // For gtest to inspect private members. + friend class PerProviderDirectoryTestUtils; +}; + +} // namespace safebrowsing +} // namespace mozilla + +#endif |