summaryrefslogtreecommitdiffstats
path: root/mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java
diff options
context:
space:
mode:
Diffstat (limited to 'mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java')
-rw-r--r--mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java121
1 files changed, 121 insertions, 0 deletions
diff --git a/mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java b/mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java
new file mode 100644
index 000000000..6a146cfcf
--- /dev/null
+++ b/mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java
@@ -0,0 +1,121 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+package org.mozilla.gecko.util.publicsuffix;
+
+import android.content.Context;
+import android.support.annotation.NonNull;
+import android.support.annotation.WorkerThread;
+
+import org.mozilla.gecko.util.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Helper methods for the public suffix part of a domain.
+ *
+ * A "public suffix" is one under which Internet users can (or historically could) directly register
+ * names. Some examples of public suffixes are .com, .co.uk and pvt.k12.ma.us.
+ *
+ * https://publicsuffix.org/
+ *
+ * Some parts of the implementation of this class are based on InternetDomainName class of the Guava
+ * project: https://github.com/google/guava
+ */
+public class PublicSuffix {
+ /**
+ * Strip the public suffix from the domain. Returns the original domain if no public suffix
+ * could be found.
+ *
+ * www.mozilla.org -> www.mozilla
+ * independent.co.uk -> independent
+ */
+ @NonNull
+ @WorkerThread // This method might need to load data from disk
+ public static String stripPublicSuffix(Context context, @NonNull String domain) {
+ if (domain.length() == 0) {
+ return domain;
+ }
+
+ final int index = findPublicSuffixIndex(context, domain);
+ if (index == -1) {
+ return domain;
+ }
+
+ return domain.substring(0, index);
+ }
+
+ /**
+ * Returns the index of the leftmost part of the public suffix, or -1 if not found.
+ */
+ @WorkerThread
+ private static int findPublicSuffixIndex(Context context, String domain) {
+ final List<String> parts = normalizeAndSplit(domain);
+ final int partsSize = parts.size();
+ final Set<String> exact = PublicSuffixPatterns.getExactSet(context);
+
+ for (int i = 0; i < partsSize; i++) {
+ String ancestorName = StringUtils.join(".", parts.subList(i, partsSize));
+
+ if (exact.contains(ancestorName)) {
+ return joinIndex(parts, i);
+ }
+
+ // Excluded domains (e.g. !nhs.uk) use the next highest
+ // domain as the effective public suffix (e.g. uk).
+ if (PublicSuffixPatterns.EXCLUDED.contains(ancestorName)) {
+ return joinIndex(parts, i + 1);
+ }
+
+ if (matchesWildcardPublicSuffix(ancestorName)) {
+ return joinIndex(parts, i);
+ }
+ }
+
+ return -1;
+ }
+
+ /**
+ * Normalize domain and split into domain parts (www.mozilla.org -> [www, mozilla, org]).
+ */
+ private static List<String> normalizeAndSplit(String domain) {
+ domain = domain.replaceAll("[.\u3002\uFF0E\uFF61]", "."); // All dot-like characters to '.'
+ domain = domain.toLowerCase();
+
+ if (domain.endsWith(".")) {
+ domain = domain.substring(0, domain.length() - 1); // Strip trailing '.'
+ }
+
+ List<String> parts = new ArrayList<>();
+ Collections.addAll(parts, domain.split("\\."));
+
+ return parts;
+ }
+
+ /**
+ * Translate the index of the leftmost part of the public suffix to the index of the domain string.
+ *
+ * [www, mozilla, org] and 2 => 12 (www.mozilla)
+ */
+ private static int joinIndex(List<String> parts, int index) {
+ int actualIndex = parts.get(0).length();
+
+ for (int i = 1; i < index; i++) {
+ actualIndex += parts.get(i).length() + 1; // Add one for the "." that is not part of the list elements
+ }
+
+ return actualIndex;
+ }
+
+ /**
+ * Does the domain name match one of the "wildcard" patterns (e.g. {@code "*.ar"})?
+ */
+ private static boolean matchesWildcardPublicSuffix(String domain) {
+ final String[] pieces = domain.split("\\.", 2);
+ return pieces.length == 2 && PublicSuffixPatterns.UNDER.contains(pieces[1]);
+ }
+}