diff options
Diffstat (limited to 'netwerk/dns/nsEffectiveTLDService.cpp')
-rw-r--r-- | netwerk/dns/nsEffectiveTLDService.cpp | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/netwerk/dns/nsEffectiveTLDService.cpp b/netwerk/dns/nsEffectiveTLDService.cpp new file mode 100644 index 000000000..4dab2178f --- /dev/null +++ b/netwerk/dns/nsEffectiveTLDService.cpp @@ -0,0 +1,368 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This service reads a file of rules describing TLD-like domain names. For a +// complete description of the expected file format and parsing rules, see +// http://wiki.mozilla.org/Gecko:Effective_TLD_Service + +#include "mozilla/ArrayUtils.h" +#include "mozilla/MemoryReporting.h" + +#include "nsEffectiveTLDService.h" +#include "nsIIDNService.h" +#include "nsNetUtil.h" +#include "prnetdb.h" +#include "nsIURI.h" +#include "nsNetCID.h" +#include "nsServiceManagerUtils.h" + +using namespace mozilla; + +NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, + nsIMemoryReporter) + +// ---------------------------------------------------------------------- + +#define ETLD_STR_NUM_1(line) str##line +#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line) +#define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__)) + +const ETLDEntry ETLDEntry::entries[] = { +#define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild }, +#include "etld_data.inc" +#undef ETLD_ENTRY +}; + +const union ETLDEntry::etld_strings ETLDEntry::strings = { + { +#define ETLD_ENTRY(name, ex, wild) name, +#include "etld_data.inc" +#undef ETLD_ENTRY + } +}; + +/* static */ const ETLDEntry* +ETLDEntry::GetEntry(const char* aDomain) +{ + size_t i; + if (BinarySearchIf(entries, 0, ArrayLength(ETLDEntry::entries), + Cmp(aDomain), &i)) { + return &entries[i]; + } + return nullptr; +} + +// Dummy function to statically ensure that our indices don't overflow +// the storage provided for them. +void +ETLDEntry::FuncForStaticAsserts(void) +{ +#define ETLD_ENTRY(name, ex, wild) \ + static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \ + "invalid strtab index"); +#include "etld_data.inc" +#undef ETLD_ENTRY +} + +#undef ETLD_ENTRY_OFFSET +#undef ETLD_STR_NUM +#undef ETLD_STR_NUM1 + +// ---------------------------------------------------------------------- + +static nsEffectiveTLDService *gService = nullptr; + +nsEffectiveTLDService::nsEffectiveTLDService() +{ +} + +nsresult +nsEffectiveTLDService::Init() +{ + nsresult rv; + mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv); + if (NS_FAILED(rv)) return rv; + +#ifdef DEBUG + // Sanity-check the eTLD entries. + for (uint32_t i = 0; i < ArrayLength(ETLDEntry::entries); i++) { + const char* domain = ETLDEntry::entries[i].GetEffectiveTLDName(); + nsDependentCString name(domain); + nsAutoCString normalizedName(domain); + MOZ_ASSERT(NS_SUCCEEDED(NormalizeHostname(normalizedName)), + "normalization failure!"); + MOZ_ASSERT(name.Equals(normalizedName), "domain not normalized!"); + + // Domains must be in sorted order for binary search to work. + if (i > 0) { + const char* domain0 = ETLDEntry::entries[i - 1].GetEffectiveTLDName(); + MOZ_ASSERT(strcmp(domain0, domain) < 0, "domains not in sorted order!"); + } + } +#endif + + MOZ_ASSERT(!gService); + gService = this; + RegisterWeakMemoryReporter(this); + + return NS_OK; +} + +nsEffectiveTLDService::~nsEffectiveTLDService() +{ + UnregisterWeakMemoryReporter(this); + gService = nullptr; +} + +MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf) + +// The amount of heap memory measured here is tiny. It used to be bigger when +// nsEffectiveTLDService used a separate hash table instead of binary search. +// Nonetheless, we keep this code here in anticipation of bug 1083971 which will +// change ETLDEntries::entries to a heap-allocated array modifiable at runtime. +NS_IMETHODIMP +nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport, + nsISupports* aData, bool aAnonymize) +{ + MOZ_COLLECT_REPORT( + "explicit/network/effective-TLD-service", KIND_HEAP, UNITS_BYTES, + SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf), + "Memory used by the effective TLD service."); + + return NS_OK; +} + +size_t +nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) +{ + size_t n = aMallocSizeOf(this); + + // Measurement of the following members may be added later if DMD finds it is + // worthwhile: + // - mIDNService + + return n; +} + +// External function for dealing with URI's correctly. +// Pulls out the host portion from an nsIURI, and calls through to +// GetPublicSuffixFromHost(). +NS_IMETHODIMP +nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI, + nsACString &aPublicSuffix) +{ + NS_ENSURE_ARG_POINTER(aURI); + + nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); + NS_ENSURE_ARG_POINTER(innerURI); + + nsAutoCString host; + nsresult rv = innerURI->GetAsciiHost(host); + if (NS_FAILED(rv)) return rv; + + return GetBaseDomainInternal(host, 0, aPublicSuffix); +} + +// External function for dealing with URI's correctly. +// Pulls out the host portion from an nsIURI, and calls through to +// GetBaseDomainFromHost(). +NS_IMETHODIMP +nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI, + uint32_t aAdditionalParts, + nsACString &aBaseDomain) +{ + NS_ENSURE_ARG_POINTER(aURI); + NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); + + nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); + NS_ENSURE_ARG_POINTER(innerURI); + + nsAutoCString host; + nsresult rv = innerURI->GetAsciiHost(host); + if (NS_FAILED(rv)) return rv; + + return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain); +} + +// External function for dealing with a host string directly: finds the public +// suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal(). +NS_IMETHODIMP +nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname, + nsACString &aPublicSuffix) +{ + // Create a mutable copy of the hostname and normalize it to ACE. + // This will fail if the hostname includes invalid characters. + nsAutoCString normHostname(aHostname); + nsresult rv = NormalizeHostname(normHostname); + if (NS_FAILED(rv)) return rv; + + return GetBaseDomainInternal(normHostname, 0, aPublicSuffix); +} + +// External function for dealing with a host string directly: finds the base +// domain (e.g. www.co.uk) for the given hostname and number of subdomain parts +// requested. See GetBaseDomainInternal(). +NS_IMETHODIMP +nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname, + uint32_t aAdditionalParts, + nsACString &aBaseDomain) +{ + NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); + + // Create a mutable copy of the hostname and normalize it to ACE. + // This will fail if the hostname includes invalid characters. + nsAutoCString normHostname(aHostname); + nsresult rv = NormalizeHostname(normHostname); + if (NS_FAILED(rv)) return rv; + + return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain); +} + +NS_IMETHODIMP +nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname, + nsACString& aBaseDomain) +{ + // Create a mutable copy of the hostname and normalize it to ACE. + // This will fail if the hostname includes invalid characters. + nsAutoCString normHostname(aHostname); + nsresult rv = NormalizeHostname(normHostname); + NS_ENSURE_SUCCESS(rv, rv); + + return GetBaseDomainInternal(normHostname, -1, aBaseDomain); +} + +// Finds the base domain for a host, with requested number of additional parts. +// This will fail, generating an error, if the host is an IPv4/IPv6 address, +// if more subdomain parts are requested than are available, or if the hostname +// includes characters that are not valid in a URL. Normalization is performed +// on the host string and the result will be in UTF8. +nsresult +nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname, + int32_t aAdditionalParts, + nsACString &aBaseDomain) +{ + if (aHostname.IsEmpty()) + return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; + + // chomp any trailing dot, and keep track of it for later + bool trailingDot = aHostname.Last() == '.'; + if (trailingDot) + aHostname.Truncate(aHostname.Length() - 1); + + // check the edge cases of the host being '.' or having a second trailing '.', + // since subsequent checks won't catch it. + if (aHostname.IsEmpty() || aHostname.Last() == '.') + return NS_ERROR_INVALID_ARG; + + // Check if we're dealing with an IPv4/IPv6 hostname, and return + PRNetAddr addr; + PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr); + if (result == PR_SUCCESS) + return NS_ERROR_HOST_IS_IP_ADDRESS; + + // Walk up the domain tree, most specific to least specific, + // looking for matches at each level. Note that a given level may + // have multiple attributes (e.g. IsWild() and IsNormal()). + const char *prevDomain = nullptr; + const char *currDomain = aHostname.get(); + const char *nextDot = strchr(currDomain, '.'); + const char *end = currDomain + aHostname.Length(); + // Default value of *eTLD is currDomain as set in the while loop below + const char *eTLD = nullptr; + while (true) { + // sanity check the string we're about to look up: it should not begin with + // a '.'; this would mean the hostname began with a '.' or had an + // embedded '..' sequence. + if (*currDomain == '.') + return NS_ERROR_INVALID_ARG; + + // Perform the lookup. + const ETLDEntry* entry = ETLDEntry::GetEntry(currDomain); + if (entry) { + if (entry->IsWild() && prevDomain) { + // wildcard rules imply an eTLD one level inferior to the match. + eTLD = prevDomain; + break; + + } else if (entry->IsNormal() || !nextDot) { + // specific match, or we've hit the top domain level + eTLD = currDomain; + break; + + } else if (entry->IsException()) { + // exception rules imply an eTLD one level superior to the match. + eTLD = nextDot + 1; + break; + } + } + + if (!nextDot) { + // we've hit the top domain level; use it by default. + eTLD = currDomain; + break; + } + + prevDomain = currDomain; + currDomain = nextDot + 1; + nextDot = strchr(currDomain, '.'); + } + + const char *begin, *iter; + if (aAdditionalParts < 0) { + NS_ASSERTION(aAdditionalParts == -1, + "aAdditionalParts can't be negative and different from -1"); + + for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++); + + if (iter != eTLD) { + iter++; + } + if (iter != eTLD) { + aAdditionalParts = 0; + } + } else { + // count off the number of requested domains. + begin = aHostname.get(); + iter = eTLD; + + while (true) { + if (iter == begin) + break; + + if (*(--iter) == '.' && aAdditionalParts-- == 0) { + ++iter; + ++aAdditionalParts; + break; + } + } + } + + if (aAdditionalParts != 0) + return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; + + aBaseDomain = Substring(iter, end); + // add on the trailing dot, if applicable + if (trailingDot) + aBaseDomain.Append('.'); + + return NS_OK; +} + +// Normalizes the given hostname, component by component. ASCII/ACE +// components are lower-cased, and UTF-8 components are normalized per +// RFC 3454 and converted to ACE. +nsresult +nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname) +{ + if (!IsASCII(aHostname)) { + nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname); + if (NS_FAILED(rv)) + return rv; + } + + ToLowerCase(aHostname); + return NS_OK; +} |