summaryrefslogtreecommitdiffstats
path: root/mailnews/db/gloda/modules/facet.js
diff options
context:
space:
mode:
Diffstat (limited to 'mailnews/db/gloda/modules/facet.js')
-rw-r--r--mailnews/db/gloda/modules/facet.js582
1 files changed, 582 insertions, 0 deletions
diff --git a/mailnews/db/gloda/modules/facet.js b/mailnews/db/gloda/modules/facet.js
new file mode 100644
index 000000000..0f73d3d5b
--- /dev/null
+++ b/mailnews/db/gloda/modules/facet.js
@@ -0,0 +1,582 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This file provides faceting logic.
+ */
+
+var EXPORTED_SYMBOLS = ["FacetDriver", "FacetUtils"];
+
+var Cc = Components.classes;
+var Ci = Components.interfaces;
+var Cr = Components.results;
+var Cu = Components.utils;
+
+Cu.import("resource:///modules/gloda/public.js");
+
+/**
+ * Decides the appropriate faceters for the noun type and drives the faceting
+ * process. This class and the faceters are intended to be reusable so that
+ * you only need one instance per faceting session. (Although each faceting
+ * pass is accordingly destructive to previous results.)
+ *
+ * Our strategy for faceting is to process one attribute at a time across all
+ * the items in the provided set. The alternative would be to iterate over
+ * the items and then iterate over the attributes on each item. While both
+ * approaches have caching downsides
+ */
+function FacetDriver(aNounDef, aWindow) {
+ this.nounDef = aNounDef;
+ this._window = aWindow;
+
+ this._makeFaceters();
+}
+FacetDriver.prototype = {
+ /**
+ * Populate |this.faceters| with a set of faceters appropriate to the noun
+ * definition associated with this instance.
+ */
+ _makeFaceters: function() {
+ let faceters = this.faceters = [];
+
+ function makeFaceter(aAttrDef, aFacetDef) {
+ let facetType = aFacetDef.type;
+
+ if (aAttrDef.singular) {
+ if (facetType == "date")
+ faceters.push(new DateFaceter(aAttrDef, aFacetDef));
+ else
+ faceters.push(new DiscreteFaceter(aAttrDef, aFacetDef));
+ }
+ else {
+ if (facetType == "nonempty?")
+ faceters.push(new NonEmptySetFaceter(aAttrDef, aFacetDef));
+ else
+ faceters.push(new DiscreteSetFaceter(aAttrDef, aFacetDef));
+ }
+ }
+
+ for (let key in this.nounDef.attribsByBoundName) {
+ let attrDef = this.nounDef.attribsByBoundName[key];
+ // ignore attributes that do not want to be faceted
+ if (!attrDef.facet)
+ continue;
+
+ makeFaceter(attrDef, attrDef.facet);
+
+ if ("extraFacets" in attrDef) {
+ for (let facetDef of attrDef.extraFacets) {
+ makeFaceter(attrDef, facetDef);
+ }
+ }
+ }
+ },
+ /**
+ * Asynchronously facet the provided items, calling the provided callback when
+ * completed.
+ */
+ go: function FacetDriver_go(aItems, aCallback, aCallbackThis) {
+ this.items = aItems;
+ this.callback = aCallback;
+ this.callbackThis = aCallbackThis;
+
+ this._nextFaceter = 0;
+ this._drive();
+ },
+
+ _MAX_FACETING_TIMESLICE_MS: 100,
+ _FACETING_YIELD_DURATION_MS: 0,
+ _driveWrapper: function(aThis) {
+ aThis._drive();
+ },
+ _drive: function() {
+ let start = Date.now();
+
+ while (this._nextFaceter < this.faceters.length) {
+ let faceter = this.faceters[this._nextFaceter++];
+ // for now we facet in one go, but the long-term plan allows for them to
+ // be generators.
+ faceter.facetItems(this.items);
+
+ let delta = Date.now() - start;
+ if (delta > this._MAX_FACETING_TIMESLICE_MS) {
+ this._window.setTimeout(this._driveWrapper,
+ this._FACETING_YIELD_DURATION_MS,
+ this);
+ return;
+ }
+ }
+
+ // we only get here once we are done with the faceters
+ this.callback.call(this.callbackThis);
+ }
+};
+
+var FacetUtils = {
+ _groupSizeComparator: function(a, b) {
+ return b[1].length - a[1].length;
+ },
+
+ /**
+ * Given a list where each entry is a tuple of [group object, list of items
+ * belonging to that group], produce a new list of the top grouped items. We
+ * used to also produce an "other" aggregation, but that turned out to be
+ * conceptually difficult to deal with, so that's gone, leaving this method
+ * with much less to do.
+ *
+ * @param aAttrDef The attribute for the facet we are working with.
+ * @param aGroups The list of groups built for the facet.
+ * @param aMaxCount The number of result rows you want back.
+ */
+ makeTopGroups: function FacetUtils_makeTopGroups(aAttrDef, aGroups,
+ aMaxCount) {
+ let nounDef = aAttrDef.objectNounDef;
+ let realGroupsToUse = aMaxCount;
+
+ let orderedBySize = aGroups.concat();
+ orderedBySize.sort(this._groupSizeComparator);
+
+ // - get the real groups to use and order them by the attribute comparator
+ let outGroups = orderedBySize.slice(0, realGroupsToUse);
+ let comparator = nounDef.comparator;
+ function comparatorHelper(a, b) {
+ return comparator(a[0], b[0]);
+ }
+ outGroups.sort(comparatorHelper);
+
+ return outGroups;
+ }
+};
+
+/**
+ * Facet discrete things like message authors, boolean values, etc. Only
+ * appropriate for use on singular values. Use |DiscreteSetFaceter| for
+ * non-singular values.
+ */
+function DiscreteFaceter(aAttrDef, aFacetDef) {
+ this.attrDef = aAttrDef;
+ this.facetDef = aFacetDef;
+}
+DiscreteFaceter.prototype = {
+ type: "discrete",
+ /**
+ * Facet the given set of items, deferring to the appropriate helper method
+ */
+ facetItems: function(aItems) {
+ if (this.attrDef.objectNounDef.isPrimitive)
+ return this.facetPrimitiveItems(aItems);
+ else
+ return this.facetComplexItems(aItems);
+ },
+ /**
+ * Facet an attribute whose value is primitive, meaning that it is a raw
+ * numeric value or string, rather than a complex object.
+ */
+ facetPrimitiveItems: function(aItems) {
+ let attrKey = this.attrDef.boundName;
+ let nounDef = this.attrDef.objectNounDef;
+ let filter = this.facetDef.filter;
+
+ let valStrToVal = {};
+ let groups = this.groups = {};
+ this.groupCount = 0;
+
+ for (let item of aItems) {
+ let val = (attrKey in item) ? item[attrKey] : null;
+ if (val === Gloda.IGNORE_FACET)
+ continue;
+
+ // skip items the filter tells us to ignore
+ if (filter && !filter(val))
+ continue;
+
+ // We need to use hasOwnProperty because we cannot guarantee that the
+ // contents of val won't collide with the attributes in Object.prototype.
+ if (groups.hasOwnProperty(val))
+ groups[val].push(item);
+ else {
+ groups[val] = [item];
+ valStrToVal[val] = val;
+ this.groupCount++;
+ }
+ }
+
+ let orderedGroups = Object.keys(groups).
+ map(key => [valStrToVal[key], groups[key]]);
+ let comparator = this.facetDef.groupComparator;
+ function comparatorHelper(a, b) {
+ return comparator(a[0], b[0]);
+ }
+ orderedGroups.sort(comparatorHelper);
+ this.orderedGroups = orderedGroups;
+ },
+ /**
+ * Facet an attribute whose value is a complex object that can be identified
+ * by its 'id' attribute. This is the case where the value is itself a noun
+ * instance.
+ */
+ facetComplexItems: function(aItems) {
+ let attrKey = this.attrDef.boundName;
+ let nounDef = this.attrDef.objectNounDef;
+ let filter = this.facetDef.filter;
+ let idAttr = this.facetDef.groupIdAttr;
+
+ let groups = this.groups = {};
+ let groupMap = this.groupMap = {};
+ this.groupCount = 0;
+
+ for (let item of aItems) {
+ let val = (attrKey in item) ? item[attrKey] : null;
+ if (val === Gloda.IGNORE_FACET)
+ continue;
+
+ // skip items the filter tells us to ignore
+ if (filter && !filter(val))
+ continue;
+
+ let valId = (val == null) ? null : val[idAttr];
+ // We need to use hasOwnProperty because tag nouns are complex objects
+ // with id's that are non-numeric and so can collide with the contents
+ // of Object.prototype. (Note: the "tags" attribute is actually handled
+ // by the DiscreteSetFaceter.)
+ if (groupMap.hasOwnProperty(valId)) {
+ groups[valId].push(item);
+ }
+ else {
+ groupMap[valId] = val;
+ groups[valId] = [item];
+ this.groupCount++;
+ }
+ }
+
+ let orderedGroups = Object.keys(groups).
+ map(key => [groupMap[key], groups[key]]);
+ let comparator = this.facetDef.groupComparator;
+ function comparatorHelper(a, b) {
+ return comparator(a[0], b[0]);
+ }
+ orderedGroups.sort(comparatorHelper);
+ this.orderedGroups = orderedGroups;
+ },
+};
+
+/**
+ * Facet sets of discrete items. For example, tags applied to messages.
+ *
+ * The main differences between us and |DiscreteFaceter| are:
+ * - The empty set is notable.
+ * - Specific set configurations could be interesting, but are not low-hanging
+ * fruit.
+ */
+function DiscreteSetFaceter(aAttrDef, aFacetDef) {
+ this.attrDef = aAttrDef;
+ this.facetDef = aFacetDef;
+}
+DiscreteSetFaceter.prototype = {
+ type: "discrete",
+ /**
+ * Facet the given set of items, deferring to the appropriate helper method
+ */
+ facetItems: function(aItems) {
+ if (this.attrDef.objectNounDef.isPrimitive)
+ return this.facetPrimitiveItems(aItems);
+ else
+ return this.facetComplexItems(aItems);
+ },
+ /**
+ * Facet an attribute whose value is primitive, meaning that it is a raw
+ * numeric value or string, rather than a complex object.
+ */
+ facetPrimitiveItems: function(aItems) {
+ let attrKey = this.attrDef.boundName;
+ let nounDef = this.attrDef.objectNounDef;
+ let filter = this.facetDef.filter;
+
+ let groups = this.groups = {};
+ let valStrToVal = {};
+ this.groupCount = 0;
+
+ for (let item of aItems) {
+ let vals = (attrKey in item) ? item[attrKey] : null;
+ if (vals === Gloda.IGNORE_FACET)
+ continue;
+
+ if (vals == null || vals.length == 0) {
+ vals = [null];
+ }
+ for (let val of vals) {
+ // skip items the filter tells us to ignore
+ if (filter && !filter(val))
+ continue;
+
+ // We need to use hasOwnProperty because we cannot guarantee that the
+ // contents of val won't collide with the attributes in
+ // Object.prototype.
+ if (groups.hasOwnProperty(val))
+ groups[val].push(item);
+ else {
+ groups[val] = [item];
+ valStrToVal[val] = val;
+ this.groupCount++;
+ }
+ }
+ }
+
+ let orderedGroups = Object.keys(groups).
+ map(key => [valStrToVal[key], groups[key]]);
+ let comparator = this.facetDef.groupComparator;
+ function comparatorHelper(a, b) {
+ return comparator(a[0], b[0]);
+ }
+ orderedGroups.sort(comparatorHelper);
+ this.orderedGroups = orderedGroups;
+ },
+ /**
+ * Facet an attribute whose value is a complex object that can be identified
+ * by its 'id' attribute. This is the case where the value is itself a noun
+ * instance.
+ */
+ facetComplexItems: function(aItems) {
+ let attrKey = this.attrDef.boundName;
+ let nounDef = this.attrDef.objectNounDef;
+ let filter = this.facetDef.filter;
+ let idAttr = this.facetDef.groupIdAttr;
+
+ let groups = this.groups = {};
+ let groupMap = this.groupMap = {};
+ this.groupCount = 0;
+
+ for (let item of aItems) {
+ let vals = (attrKey in item) ? item[attrKey] : null;
+ if (vals === Gloda.IGNORE_FACET)
+ continue;
+
+ if (vals == null || vals.length == 0) {
+ vals = [null];
+ }
+ for (let val of vals) {
+ // skip items the filter tells us to ignore
+ if (filter && !filter(val))
+ continue;
+
+ let valId = (val == null) ? null : val[idAttr];
+ // We need to use hasOwnProperty because tag nouns are complex objects
+ // with id's that are non-numeric and so can collide with the contents
+ // of Object.prototype.
+ if (groupMap.hasOwnProperty(valId)) {
+ groups[valId].push(item);
+ }
+ else {
+ groupMap[valId] = val;
+ groups[valId] = [item];
+ this.groupCount++;
+ }
+ }
+ }
+
+ let orderedGroups = Object.keys(groups).
+ map(key => [groupMap[key], groups[key]]);
+ let comparator = this.facetDef.groupComparator;
+ function comparatorHelper(a, b) {
+ return comparator(a[0], b[0]);
+ }
+ orderedGroups.sort(comparatorHelper);
+ this.orderedGroups = orderedGroups;
+ },
+};
+
+/**
+ * Given a non-singular attribute, facet it as if it were a boolean based on
+ * whether there is anything in the list (set).
+ */
+function NonEmptySetFaceter(aAttrDef, aFacetDef) {
+ this.attrDef = aAttrDef;
+ this.facetDef = aFacetDef;
+}
+NonEmptySetFaceter.prototype = {
+ type: "boolean",
+ /**
+ * Facet the given set of items, deferring to the appropriate helper method
+ */
+ facetItems: function(aItems) {
+ let attrKey = this.attrDef.boundName;
+ let nounDef = this.attrDef.objectNounDef;
+
+ let trueValues = [];
+ let falseValues = [];
+
+ let groups = this.groups = {};
+ this.groupCount = 0;
+
+ for (let item of aItems) {
+ let vals = (attrKey in item) ? item[attrKey] : null;
+ if (vals == null || vals.length == 0)
+ falseValues.push(item);
+ else
+ trueValues.push(item);
+ }
+
+ this.orderedGroups = [];
+ if (trueValues.length)
+ this.orderedGroups.push([true, trueValues]);
+ if (falseValues.length)
+ this.orderedGroups.push([false, falseValues]);
+ this.groupCount = this.orderedGroups.length;
+ },
+ makeQuery: function(aGroupValues, aInclusive) {
+ let query = this.query = Gloda.newQuery(Gloda.NOUN_MESSAGE);
+
+ let constraintFunc = query[this.attrDef.boundName];
+ constraintFunc.call(query);
+
+ // Our query is always for non-empty lists (at this time), so we want to
+ // invert if they're excluding 'true' or including 'false', which means !=.
+ let invert = aGroupValues[0] != aInclusive;
+
+ return [query, invert];
+ }
+};
+
+
+/**
+ * Facet dates. We build a hierarchical nested structure of year, month, and
+ * day nesting levels. This decision was made speculatively in the hopes that
+ * it would allow us to do clustered analysis and that there might be a benefit
+ * for that. For example, if you search for "Christmas", we might notice
+ * clusters of messages around December of each year. We could then present
+ * these in a list as likely candidates, rather than a graphical timeline.
+ * Alternately, it could be used to inform a non-linear visualization. As it
+ * stands (as of this writing), it's just a complicating factor.
+ */
+function DateFaceter(aAttrDef, aFacetDef) {
+ this.attrDef = aAttrDef;
+ this.facetDef = aFacetDef;
+}
+DateFaceter.prototype = {
+ type: "date",
+ /**
+ *
+ */
+ facetItems: function(aItems) {
+ let attrKey = this.attrDef.boundName;
+ let nounDef = this.attrDef.objectNounDef;
+
+ let years = this.years = {_subCount: 0};
+ // generally track the time range
+ let oldest = null, newest = null;
+
+ let validItems = this.validItems = [];
+
+ // just cheat and put us at the front...
+ this.groupCount = aItems.length ? 1000 : 0;
+ this.orderedGroups = null;
+
+ /** The number of items with a null/missing attribute. */
+ this.missing = 0;
+
+ /**
+ * The number of items with a date that is unreasonably far in the past or
+ * in the future. Old-wise, we are concerned about incorrectly formatted
+ * messages (spam) that end up placed around the UNIX epoch. New-wise,
+ * we are concerned about messages that can't be explained by users who
+ * don't know how to set their clocks (both the current user and people
+ * sending them mail), mainly meaning spam.
+ * We want to avoid having our clever time-scale logic being made useless by
+ * these unreasonable messages.
+ */
+ this.unreasonable = 0;
+ // feb 1, 1970
+ let tooOld = new Date(1970, 1, 1);
+ // 3 days from now
+ let tooNew = new Date(Date.now() + 3 * 24 * 60 * 60 * 1000);
+
+ for (let item of aItems) {
+ let val = (attrKey in item) ? item[attrKey] : null;
+ // -- missing
+ if (val == null) {
+ this.missing++;
+ continue;
+ }
+
+ // -- unreasonable
+ if (val < tooOld || val > tooNew) {
+ this.unreasonable++;
+ continue;
+ }
+
+ this.validItems.push(item);
+
+ // -- time range
+ if (oldest == null)
+ oldest = newest = val;
+ else if (val < oldest)
+ oldest = val;
+ else if (val > newest)
+ newest = val;
+
+ // -- bucket
+ // - year
+ let year, valYear = val.getYear();
+ if (valYear in years) {
+ year = years[valYear];
+ year._dateCount++;
+ }
+ else {
+ year = years[valYear] = {
+ _dateCount: 1,
+ _subCount: 0
+ };
+ years._subCount++;
+ }
+
+ // - month
+ let month, valMonth = val.getMonth();
+ if (valMonth in year) {
+ month = year[valMonth];
+ month._dateCount++;
+ }
+ else {
+ month = year[valMonth] = {
+ _dateCount: 1,
+ _subCount: 0
+ };
+ year._subCount++;
+ }
+
+ // - day
+ let valDate = val.getDate();
+ if (valDate in month) {
+ month[valDate].push(item);
+ }
+ else {
+ month[valDate] = [item];
+ }
+ }
+
+ this.oldest = oldest;
+ this.newest = newest;
+ },
+
+ _unionMonth: function(aMonthObj) {
+ let dayItemLists = [];
+ for (let key in aMonthObj) {
+ let dayItemList = aMonthObj[key];
+ if (typeof(key) == "string" && key.startsWith('_'))
+ continue;
+ dayItemLists.push(dayItemList);
+ }
+ return Array.concat.apply([], dayItemLists);
+ },
+
+ _unionYear: function(aYearObj) {
+ let monthItemLists = [];
+ for (let key in aYearObj) {
+ let monthObj = aYearObj[key];
+ if (typeof(key) == "string" && key.startsWith('_'))
+ continue;
+ monthItemLists.push(this._unionMonth(monthObj));
+ }
+ return Array.concat.apply([], monthItemLists);
+ }
+};