1 files changed, 121 insertions, 0 deletions
diff --git a/mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java b/mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java
new file mode 100644
index 000000000..6a146cfcf
--- /dev/null
+++ b/mobile/android/geckoview/src/main/java/org/mozilla/gecko/util/publicsuffix/PublicSuffix.java
@@ -0,0 +1,121 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+package org.mozilla.gecko.util.publicsuffix;
+
+import android.content.Context;
+import android.support.annotation.NonNull;
+import android.support.annotation.WorkerThread;
+
+import org.mozilla.gecko.util.StringUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Helper methods for the public suffix part of a domain.
+ *
+ * A "public suffix" is one under which Internet users can (or historically could) directly register
+ * names. Some examples of public suffixes are .com, .co.uk and pvt.k12.ma.us.
+ *
+ * https://publicsuffix.org/
+ *
+ * Some parts of the implementation of this class are based on InternetDomainName class of the Guava
+ * project: https://github.com/google/guava
+ */
+public class PublicSuffix {
+    /**
+     * Strip the public suffix from the domain. Returns the original domain if no public suffix
+     * could be found.
+     *
+     * www.mozilla.org -> www.mozilla
+     * independent.co.uk -> independent
+     */
+    @NonNull
+    @WorkerThread // This method might need to load data from disk
+    public static String stripPublicSuffix(Context context, @NonNull String domain) {
+        if (domain.length() == 0) {
+            return domain;
+        }
+
+        final int index = findPublicSuffixIndex(context, domain);
+        if (index == -1) {
+            return domain;
+        }
+
+        return domain.substring(0, index);
+    }
+
+    /**
+     * Returns the index of the leftmost part of the public suffix, or -1 if not found.
+     */
+    @WorkerThread
+    private static int findPublicSuffixIndex(Context context, String domain) {
+        final List<String> parts = normalizeAndSplit(domain);
+        final int partsSize = parts.size();
+        final Set<String> exact = PublicSuffixPatterns.getExactSet(context);
+
+        for (int i = 0; i < partsSize; i++) {
+            String ancestorName = StringUtils.join(".", parts.subList(i, partsSize));
+
+            if (exact.contains(ancestorName)) {
+                return joinIndex(parts, i);
+            }
+
+            // Excluded domains (e.g. !nhs.uk) use the next highest
+            // domain as the effective public suffix (e.g. uk).
+            if (PublicSuffixPatterns.EXCLUDED.contains(ancestorName)) {
+                return joinIndex(parts, i + 1);
+            }
+
+            if (matchesWildcardPublicSuffix(ancestorName)) {
+                return joinIndex(parts, i);
+            }
+        }
+
+        return -1;
+    }
+
+    /**
+     * Normalize domain and split into domain parts (www.mozilla.org -> [www, mozilla, org]).
+     */
+    private static List<String> normalizeAndSplit(String domain) {
+        domain = domain.replaceAll("[.\u3002\uFF0E\uFF61]", "."); // All dot-like characters to '.'
+        domain = domain.toLowerCase();
+
+        if (domain.endsWith(".")) {
+            domain = domain.substring(0, domain.length() - 1); // Strip trailing '.'
+        }
+
+        List<String> parts = new ArrayList<>();
+        Collections.addAll(parts, domain.split("\\."));
+
+        return parts;
+    }
+
+    /**
+     * Translate the index of the leftmost part of the public suffix to the index of the domain string.
+     *
+     * [www, mozilla, org] and 2 => 12 (www.mozilla)
+     */
+    private static int joinIndex(List<String> parts, int index) {
+        int actualIndex = parts.get(0).length();
+
+        for (int i = 1; i < index; i++) {
+            actualIndex += parts.get(i).length() + 1; // Add one for the "." that is not part of the list elements
+        }
+
+        return actualIndex;
+    }
+
+    /**
+     * Does the domain name match one of the "wildcard" patterns (e.g. {@code "*.ar"})?
+     */
+    private static boolean matchesWildcardPublicSuffix(String domain) {
+        final String[] pieces = domain.split("\\.", 2);
+        return pieces.length == 2 && PublicSuffixPatterns.UNDER.contains(pieces[1]);
+    }
+}