summaryrefslogtreecommitdiffstats
path: root/mobile/android/modules/WebsiteMetadata.jsm
diff options
context:
space:
mode:
Diffstat (limited to 'mobile/android/modules/WebsiteMetadata.jsm')
-rw-r--r--mobile/android/modules/WebsiteMetadata.jsm475
1 files changed, 0 insertions, 475 deletions
diff --git a/mobile/android/modules/WebsiteMetadata.jsm b/mobile/android/modules/WebsiteMetadata.jsm
deleted file mode 100644
index 39af9ddeb..000000000
--- a/mobile/android/modules/WebsiteMetadata.jsm
+++ /dev/null
@@ -1,475 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-'use strict';
-
-const { classes: Cc, interfaces: Ci, utils: Cu } = Components;
-
-this.EXPORTED_SYMBOLS = ["WebsiteMetadata"];
-
-Cu.import("resource://gre/modules/XPCOMUtils.jsm");
-
-XPCOMUtils.defineLazyModuleGetter(this, "Messaging", "resource://gre/modules/Messaging.jsm");
-XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm");
-
-var WebsiteMetadata = {
- /**
- * Asynchronously parse the document extract metadata. A 'Website:Metadata' event with the metadata
- * will be sent.
- */
- parseAsynchronously: function(doc) {
- Task.spawn(function() {
- let metadata = getMetadata(doc, doc.location.href, {
- image_url: metadataRules['image_url']
- });
-
- // No metadata was extracted, so don't bother sending it.
- if (Object.keys(metadata).length === 0) {
- return;
- }
-
- let msg = {
- type: 'Website:Metadata',
- location: doc.location.href,
- metadata: metadata,
- };
-
- Messaging.sendRequest(msg);
- });
- }
-};
-
-// #################################################################################################
-// # Modified version of makeUrlAbsolute() to not import url parser library (and dependencies)
-// #################################################################################################
-
-function makeUrlAbsolute(context, relative) {
- var a = context.doc.createElement('a');
- a.href = relative;
- return a.href;
-}
-
-// #################################################################################################
-// # page-metadata-parser
-// # https://github.com/mozilla/page-metadata-parser/
-// # 61c58cbd0f0bf2153df832a388a79c66b288b98c
-// #################################################################################################
-
-function buildRuleset(name, rules, processors) {
- const reversedRules = Array.from(rules).reverse();
- const builtRuleset = ruleset(...reversedRules.map(([query, handler], order) => rule(
- dom(query),
- node => [{
- score: order,
- flavor: name,
- notes: handler(node),
- }]
- )));
-
- return (doc, context) => {
- const kb = builtRuleset.score(doc);
- const maxNode = kb.max(name);
-
- if (maxNode) {
- let value = maxNode.flavors.get(name);
-
- if (processors) {
- processors.forEach(processor => {
- value = processor(value, context);
- });
- }
-
- if (value) {
- if (value.trim) {
- return value.trim();
- }
- return value;
- }
- }
- };
-}
-
-const metadataRules = {
- description: {
- rules: [
- ['meta[property="og:description"]', node => node.element.getAttribute('content')],
- ['meta[name="description"]', node => node.element.getAttribute('content')],
- ],
- },
-
- icon_url: {
- rules: [
- ['link[rel="apple-touch-icon"]', node => node.element.getAttribute('href')],
- ['link[rel="apple-touch-icon-precomposed"]', node => node.element.getAttribute('href')],
- ['link[rel="icon"]', node => node.element.getAttribute('href')],
- ['link[rel="fluid-icon"]', node => node.element.getAttribute('href')],
- ['link[rel="shortcut icon"]', node => node.element.getAttribute('href')],
- ['link[rel="Shortcut Icon"]', node => node.element.getAttribute('href')],
- ['link[rel="mask-icon"]', node => node.element.getAttribute('href')],
- ],
- processors: [
- (icon_url, context) => makeUrlAbsolute(context, icon_url)
- ]
- },
-
- image_url: {
- rules: [
- ['meta[property="og:image:secure_url"]', node => node.element.getAttribute('content')],
- ['meta[property="og:image:url"]', node => node.element.getAttribute('content')],
- ['meta[property="og:image"]', node => node.element.getAttribute('content')],
- ['meta[property="twitter:image"]', node => node.element.getAttribute('content')],
- ['meta[name="thumbnail"]', node => node.element.getAttribute('content')],
- ],
- processors: [
- (image_url, context) => makeUrlAbsolute(context, image_url)
- ],
- },
-
- keywords: {
- rules: [
- ['meta[name="keywords"]', node => node.element.getAttribute('content')],
- ],
- processors: [
- (keywords) => keywords.split(',').map((keyword) => keyword.trim()),
- ]
- },
-
- title: {
- rules: [
- ['meta[property="og:title"]', node => node.element.getAttribute('content')],
- ['meta[property="twitter:title"]', node => node.element.getAttribute('content')],
- ['meta[name="hdl"]', node => node.element.getAttribute('content')],
- ['title', node => node.element.text],
- ],
- },
-
- type: {
- rules: [
- ['meta[property="og:type"]', node => node.element.getAttribute('content')],
- ],
- },
-
- url: {
- rules: [
- ['meta[property="og:url"]', node => node.element.getAttribute('content')],
- ['link[rel="canonical"]', node => node.element.getAttribute('href')],
- ],
- },
-};
-
-function getMetadata(doc, url, rules) {
- const metadata = {};
- const context = {url,doc};
- const ruleSet = rules || metadataRules;
-
- Object.keys(ruleSet).map(metadataKey => {
- const metadataRule = ruleSet[metadataKey];
-
- if(Array.isArray(metadataRule.rules)) {
- const builtRule = buildRuleset(metadataKey, metadataRule.rules, metadataRule.processors);
- metadata[metadataKey] = builtRule(doc, context);
- } else {
- metadata[metadataKey] = getMetadata(doc, url, metadataRule);
- }
- });
-
- return metadata;
-}
-
-// #################################################################################################
-// # Fathom dependencies resolved
-// #################################################################################################
-
-// const {forEach} = require('wu');
-function forEach(fn, obj) {
- for (let x of obj) {
- fn(x);
- }
-}
-
-function best(iterable, by, isBetter) {
- let bestSoFar, bestKeySoFar;
- let isFirst = true;
- forEach(
- function (item) {
- const key = by(item);
- if (isBetter(key, bestKeySoFar) || isFirst) {
- bestSoFar = item;
- bestKeySoFar = key;
- isFirst = false;
- }
- },
- iterable);
- if (isFirst) {
- throw new Error('Tried to call best() on empty iterable');
- }
- return bestSoFar;
-}
-
-// const {max} = require('./utils');
-function max(iterable, by = identity) {
- return best(iterable, by, (a, b) => a > b);
-}
-
-// #################################################################################################
-// # Fathom
-// # https://github.com/mozilla/fathom
-// # cac59e470816f17fc1efd4a34437b585e3e451cd
-// #################################################################################################
-
-// Get a key of a map, first setting it to a default value if it's missing.
-function getDefault(map, key, defaultMaker) {
- if (map.has(key)) {
- return map.get(key);
- }
- const defaultValue = defaultMaker();
- map.set(key, defaultValue);
- return defaultValue;
-}
-
-
-// Construct a filtration network of rules.
-function ruleset(...rules) {
- const rulesByInputFlavor = new Map(); // [someInputFlavor: [rule, ...]]
-
- // File each rule under its input flavor:
- forEach(rule => getDefault(rulesByInputFlavor, rule.source.inputFlavor, () => []).push(rule),
- rules);
-
- return {
- // Iterate over a DOM tree or subtree, building up a knowledgebase, a
- // data structure holding scores and annotations for interesting
- // elements. Return the knowledgebase.
- //
- // This is the "rank" portion of the rank-and-yank algorithm.
- score: function (tree) {
- const kb = knowledgebase();
-
- // Introduce the whole DOM into the KB as flavor 'dom' to get
- // things started:
- const nonterminals = [[{tree}, 'dom']]; // [[node, flavor], [node, flavor], ...]
-
- // While there are new facts, run the applicable rules over them to
- // generate even newer facts. Repeat until everything's fully
- // digested. Rules run in no particular guaranteed order.
- while (nonterminals.length) {
- const [inNode, inFlavor] = nonterminals.pop();
- for (let rule of getDefault(rulesByInputFlavor, inFlavor, () => [])) {
- const outFacts = resultsOf(rule, inNode, inFlavor, kb);
- for (let fact of outFacts) {
- const outNode = kb.nodeForElement(fact.element);
-
- // No matter whether or not this flavor has been
- // emitted before for this node, we multiply the score.
- // We want to be able to add rules that refine the
- // scoring of a node, without having to rewire the path
- // of flavors that winds through the ruleset.
- //
- // 1 score per Node is plenty. That simplifies our
- // data, our rankers, our flavor system (since we don't
- // need to represent score axes), and our engine. If
- // somebody wants more score axes, they can fake it
- // themselves with notes, thus paying only for what
- // they eat. (We can even provide functions that help
- // with that.) Most rulesets will probably be concerned
- // with scoring only 1 thing at a time anyway. So,
- // rankers return a score multiplier + 0 or more new
- // flavors with optional notes. Facts can never be
- // deleted from the KB by rankers (or order would start
- // to matter); after all, they're *facts*.
- outNode.score *= fact.score;
-
- // Add a new annotation to a node--but only if there
- // wasn't already one of the given flavor already
- // there; otherwise there's no point.
- //
- // You might argue that we might want to modify an
- // existing note here, but that would be a bad
- // idea. Notes of a given flavor should be
- // considered immutable once laid down. Otherwise, the
- // order of execution of same-flavored rules could
- // matter, hurting pluggability. Emit a new flavor and
- // a new note if you want to do that.
- //
- // Also, choosing not to add a new fact to nonterminals
- // when we're not adding a new flavor saves the work of
- // running the rules against it, which would be
- // entirely redundant and perform no new work (unless
- // the rankers were nondeterministic, but don't do
- // that).
- if (!outNode.flavors.has(fact.flavor)) {
- outNode.flavors.set(fact.flavor, fact.notes);
- kb.indexNodeByFlavor(outNode, fact.flavor); // TODO: better encapsulation rather than indexing explicitly
- nonterminals.push([outNode, fact.flavor]);
- }
- }
- }
- }
- return kb;
- }
- };
-}
-
-
-// Construct a container for storing and querying facts, where a fact has a
-// flavor (used to dispatch further rules upon), a corresponding DOM element, a
-// score, and some other arbitrary notes opaque to fathom.
-function knowledgebase() {
- const nodesByFlavor = new Map(); // Map{'texty' -> [NodeA],
- // 'spiffy' -> [NodeA, NodeB]}
- // NodeA = {element: <someElement>,
- //
- // // Global nodewide score. Add
- // // custom ones with notes if
- // // you want.
- // score: 8,
- //
- // // Flavors is a map of flavor names to notes:
- // flavors: Map{'texty' -> {ownText: 'blah',
- // someOtherNote: 'foo',
- // someCustomScore: 10},
- // // This is an empty note:
- // 'fluffy' -> undefined}}
- const nodesByElement = new Map();
-
- return {
- // Return the "node" (our own data structure that we control) that
- // corresponds to a given DOM element, creating one if necessary.
- nodeForElement: function (element) {
- return getDefault(nodesByElement,
- element,
- () => ({element,
- score: 1,
- flavors: new Map()}));
- },
-
- // Return the highest-scored node of the given flavor, undefined if
- // there is none.
- max: function (flavor) {
- const nodes = nodesByFlavor.get(flavor);
- return nodes === undefined ? undefined : max(nodes, node => node.score);
- },
-
- // Let the KB know that a new flavor has been added to an element.
- indexNodeByFlavor: function (node, flavor) {
- getDefault(nodesByFlavor, flavor, () => []).push(node);
- },
-
- nodesOfFlavor: function (flavor) {
- return getDefault(nodesByFlavor, flavor, () => []);
- }
- };
-}
-
-
-// Apply a rule (as returned by a call to rule()) to a fact, and return the
-// new facts that result.
-function resultsOf(rule, node, flavor, kb) {
- // If more types of rule pop up someday, do fancier dispatching here.
- return rule.source.flavor === 'flavor' ? resultsOfFlavorRule(rule, node, flavor) : resultsOfDomRule(rule, node, kb);
-}
-
-
-// Pull the DOM tree off the special property of the root "dom" fact, and query
-// against it.
-function *resultsOfDomRule(rule, specialDomNode, kb) {
- // Use the special "tree" property of the special starting node:
- const matches = specialDomNode.tree.querySelectorAll(rule.source.selector);
-
- for (let i = 0; i < matches.length; i++) { // matches is a NodeList, which doesn't conform to iterator protocol
- const element = matches[i];
- const newFacts = explicitFacts(rule.ranker(kb.nodeForElement(element)));
- for (let fact of newFacts) {
- if (fact.element === undefined) {
- fact.element = element;
- }
- if (fact.flavor === undefined) {
- throw new Error('Rankers of dom() rules must return a flavor in each fact. Otherwise, there is no way for that fact to be used later.');
- }
- yield fact;
- }
- }
-}
-
-
-function *resultsOfFlavorRule(rule, node, flavor) {
- const newFacts = explicitFacts(rule.ranker(node));
-
- for (let fact of newFacts) {
- // If the ranker didn't specify a different element, assume it's
- // talking about the one we passed in:
- if (fact.element === undefined) {
- fact.element = node.element;
- }
- if (fact.flavor === undefined) {
- fact.flavor = flavor;
- }
- yield fact;
- }
-}
-
-
-// Take the possibly abbreviated output of a ranker function, and make it
-// explicitly an iterable with a defined score.
-//
-// Rankers can return undefined, which means "no facts", a single fact, or an
-// array of facts.
-function *explicitFacts(rankerResult) {
- const array = (rankerResult === undefined) ? [] : (Array.isArray(rankerResult) ? rankerResult : [rankerResult]);
- for (let fact of array) {
- if (fact.score === undefined) {
- fact.score = 1;
- }
- yield fact;
- }
-}
-
-
-// TODO: For the moment, a lot of responsibility is on the rankers to return a
-// pretty big data structure of up to 4 properties. This is a bit verbose for
-// an arrow function (as I hope we can use most of the time) and the usual case
-// will probably be returning just a score multiplier. Make that case more
-// concise.
-
-// TODO: It is likely that rankers should receive the notes of their input type
-// as a 2nd arg, for brevity.
-
-
-// Return a condition that uses a DOM selector to find its matches from the
-// original DOM tree.
-//
-// For consistency, Nodes will still be delivered to the transformers, but
-// they'll have empty flavors and score = 1.
-//
-// Condition constructors like dom() and flavor() build stupid, introspectable
-// objects that the query engine can read. They don't actually do the query
-// themselves. That way, the query planner can be smarter than them, figuring
-// out which indices to use based on all of them. (We'll probably keep a heap
-// by each dimension's score and a hash by flavor name, for starters.) Someday,
-// fancy things like this may be possible: rule(and(tag('p'), klass('snork')),
-// ...)
-function dom(selector) {
- return {
- flavor: 'dom',
- inputFlavor: 'dom',
- selector
- };
-}
-
-
-// Return a condition that discriminates on nodes of the knowledgebase by flavor.
-function flavor(inputFlavor) {
- return {
- flavor: 'flavor',
- inputFlavor
- };
-}
-
-
-function rule(source, ranker) {
- return {
- source,
- ranker
- };
-}