1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
package org.mozilla.gecko.util.publicsuffix;
import android.content.Context;
import android.support.annotation.NonNull;
import android.support.annotation.WorkerThread;
import org.mozilla.gecko.util.StringUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
/**
* Helper methods for the public suffix part of a domain.
*
* A "public suffix" is one under which Internet users can (or historically could) directly register
* names. Some examples of public suffixes are .com, .co.uk and pvt.k12.ma.us.
*
* https://publicsuffix.org/
*
* Some parts of the implementation of this class are based on InternetDomainName class of the Guava
* project: https://github.com/google/guava
*/
public class PublicSuffix {
/**
* Strip the public suffix from the domain. Returns the original domain if no public suffix
* could be found.
*
* www.mozilla.org -> www.mozilla
* independent.co.uk -> independent
*/
@NonNull
@WorkerThread // This method might need to load data from disk
public static String stripPublicSuffix(Context context, @NonNull String domain) {
if (domain.length() == 0) {
return domain;
}
final int index = findPublicSuffixIndex(context, domain);
if (index == -1) {
return domain;
}
return domain.substring(0, index);
}
/**
* Returns the index of the leftmost part of the public suffix, or -1 if not found.
*/
@WorkerThread
private static int findPublicSuffixIndex(Context context, String domain) {
final List<String> parts = normalizeAndSplit(domain);
final int partsSize = parts.size();
final Set<String> exact = PublicSuffixPatterns.getExactSet(context);
for (int i = 0; i < partsSize; i++) {
String ancestorName = StringUtils.join(".", parts.subList(i, partsSize));
if (exact.contains(ancestorName)) {
return joinIndex(parts, i);
}
// Excluded domains (e.g. !nhs.uk) use the next highest
// domain as the effective public suffix (e.g. uk).
if (PublicSuffixPatterns.EXCLUDED.contains(ancestorName)) {
return joinIndex(parts, i + 1);
}
if (matchesWildcardPublicSuffix(ancestorName)) {
return joinIndex(parts, i);
}
}
return -1;
}
/**
* Normalize domain and split into domain parts (www.mozilla.org -> [www, mozilla, org]).
*/
private static List<String> normalizeAndSplit(String domain) {
domain = domain.replaceAll("[.\u3002\uFF0E\uFF61]", "."); // All dot-like characters to '.'
domain = domain.toLowerCase();
if (domain.endsWith(".")) {
domain = domain.substring(0, domain.length() - 1); // Strip trailing '.'
}
List<String> parts = new ArrayList<>();
Collections.addAll(parts, domain.split("\\."));
return parts;
}
/**
* Translate the index of the leftmost part of the public suffix to the index of the domain string.
*
* [www, mozilla, org] and 2 => 12 (www.mozilla)
*/
private static int joinIndex(List<String> parts, int index) {
int actualIndex = parts.get(0).length();
for (int i = 1; i < index; i++) {
actualIndex += parts.get(i).length() + 1; // Add one for the "." that is not part of the list elements
}
return actualIndex;
}
/**
* Does the domain name match one of the "wildcard" patterns (e.g. {@code "*.ar"})?
*/
private static boolean matchesWildcardPublicSuffix(String domain) {
final String[] pieces = domain.split("\\.", 2);
return pieces.length == 2 && PublicSuffixPatterns.UNDER.contains(pieces[1]);
}
}
|