diff options
Diffstat (limited to 'mailnews/db/gloda/modules/facet.js')
-rw-r--r-- | mailnews/db/gloda/modules/facet.js | 582 |
1 files changed, 582 insertions, 0 deletions
diff --git a/mailnews/db/gloda/modules/facet.js b/mailnews/db/gloda/modules/facet.js new file mode 100644 index 000000000..0f73d3d5b --- /dev/null +++ b/mailnews/db/gloda/modules/facet.js @@ -0,0 +1,582 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file provides faceting logic. + */ + +var EXPORTED_SYMBOLS = ["FacetDriver", "FacetUtils"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/public.js"); + +/** + * Decides the appropriate faceters for the noun type and drives the faceting + * process. This class and the faceters are intended to be reusable so that + * you only need one instance per faceting session. (Although each faceting + * pass is accordingly destructive to previous results.) + * + * Our strategy for faceting is to process one attribute at a time across all + * the items in the provided set. The alternative would be to iterate over + * the items and then iterate over the attributes on each item. While both + * approaches have caching downsides + */ +function FacetDriver(aNounDef, aWindow) { + this.nounDef = aNounDef; + this._window = aWindow; + + this._makeFaceters(); +} +FacetDriver.prototype = { + /** + * Populate |this.faceters| with a set of faceters appropriate to the noun + * definition associated with this instance. + */ + _makeFaceters: function() { + let faceters = this.faceters = []; + + function makeFaceter(aAttrDef, aFacetDef) { + let facetType = aFacetDef.type; + + if (aAttrDef.singular) { + if (facetType == "date") + faceters.push(new DateFaceter(aAttrDef, aFacetDef)); + else + faceters.push(new DiscreteFaceter(aAttrDef, aFacetDef)); + } + else { + if (facetType == "nonempty?") + faceters.push(new NonEmptySetFaceter(aAttrDef, aFacetDef)); + else + faceters.push(new DiscreteSetFaceter(aAttrDef, aFacetDef)); + } + } + + for (let key in this.nounDef.attribsByBoundName) { + let attrDef = this.nounDef.attribsByBoundName[key]; + // ignore attributes that do not want to be faceted + if (!attrDef.facet) + continue; + + makeFaceter(attrDef, attrDef.facet); + + if ("extraFacets" in attrDef) { + for (let facetDef of attrDef.extraFacets) { + makeFaceter(attrDef, facetDef); + } + } + } + }, + /** + * Asynchronously facet the provided items, calling the provided callback when + * completed. + */ + go: function FacetDriver_go(aItems, aCallback, aCallbackThis) { + this.items = aItems; + this.callback = aCallback; + this.callbackThis = aCallbackThis; + + this._nextFaceter = 0; + this._drive(); + }, + + _MAX_FACETING_TIMESLICE_MS: 100, + _FACETING_YIELD_DURATION_MS: 0, + _driveWrapper: function(aThis) { + aThis._drive(); + }, + _drive: function() { + let start = Date.now(); + + while (this._nextFaceter < this.faceters.length) { + let faceter = this.faceters[this._nextFaceter++]; + // for now we facet in one go, but the long-term plan allows for them to + // be generators. + faceter.facetItems(this.items); + + let delta = Date.now() - start; + if (delta > this._MAX_FACETING_TIMESLICE_MS) { + this._window.setTimeout(this._driveWrapper, + this._FACETING_YIELD_DURATION_MS, + this); + return; + } + } + + // we only get here once we are done with the faceters + this.callback.call(this.callbackThis); + } +}; + +var FacetUtils = { + _groupSizeComparator: function(a, b) { + return b[1].length - a[1].length; + }, + + /** + * Given a list where each entry is a tuple of [group object, list of items + * belonging to that group], produce a new list of the top grouped items. We + * used to also produce an "other" aggregation, but that turned out to be + * conceptually difficult to deal with, so that's gone, leaving this method + * with much less to do. + * + * @param aAttrDef The attribute for the facet we are working with. + * @param aGroups The list of groups built for the facet. + * @param aMaxCount The number of result rows you want back. + */ + makeTopGroups: function FacetUtils_makeTopGroups(aAttrDef, aGroups, + aMaxCount) { + let nounDef = aAttrDef.objectNounDef; + let realGroupsToUse = aMaxCount; + + let orderedBySize = aGroups.concat(); + orderedBySize.sort(this._groupSizeComparator); + + // - get the real groups to use and order them by the attribute comparator + let outGroups = orderedBySize.slice(0, realGroupsToUse); + let comparator = nounDef.comparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + outGroups.sort(comparatorHelper); + + return outGroups; + } +}; + +/** + * Facet discrete things like message authors, boolean values, etc. Only + * appropriate for use on singular values. Use |DiscreteSetFaceter| for + * non-singular values. + */ +function DiscreteFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DiscreteFaceter.prototype = { + type: "discrete", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems: function(aItems) { + if (this.attrDef.objectNounDef.isPrimitive) + return this.facetPrimitiveItems(aItems); + else + return this.facetComplexItems(aItems); + }, + /** + * Facet an attribute whose value is primitive, meaning that it is a raw + * numeric value or string, rather than a complex object. + */ + facetPrimitiveItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + let filter = this.facetDef.filter; + + let valStrToVal = {}; + let groups = this.groups = {}; + this.groupCount = 0; + + for (let item of aItems) { + let val = (attrKey in item) ? item[attrKey] : null; + if (val === Gloda.IGNORE_FACET) + continue; + + // skip items the filter tells us to ignore + if (filter && !filter(val)) + continue; + + // We need to use hasOwnProperty because we cannot guarantee that the + // contents of val won't collide with the attributes in Object.prototype. + if (groups.hasOwnProperty(val)) + groups[val].push(item); + else { + groups[val] = [item]; + valStrToVal[val] = val; + this.groupCount++; + } + } + + let orderedGroups = Object.keys(groups). + map(key => [valStrToVal[key], groups[key]]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, + /** + * Facet an attribute whose value is a complex object that can be identified + * by its 'id' attribute. This is the case where the value is itself a noun + * instance. + */ + facetComplexItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + let filter = this.facetDef.filter; + let idAttr = this.facetDef.groupIdAttr; + + let groups = this.groups = {}; + let groupMap = this.groupMap = {}; + this.groupCount = 0; + + for (let item of aItems) { + let val = (attrKey in item) ? item[attrKey] : null; + if (val === Gloda.IGNORE_FACET) + continue; + + // skip items the filter tells us to ignore + if (filter && !filter(val)) + continue; + + let valId = (val == null) ? null : val[idAttr]; + // We need to use hasOwnProperty because tag nouns are complex objects + // with id's that are non-numeric and so can collide with the contents + // of Object.prototype. (Note: the "tags" attribute is actually handled + // by the DiscreteSetFaceter.) + if (groupMap.hasOwnProperty(valId)) { + groups[valId].push(item); + } + else { + groupMap[valId] = val; + groups[valId] = [item]; + this.groupCount++; + } + } + + let orderedGroups = Object.keys(groups). + map(key => [groupMap[key], groups[key]]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, +}; + +/** + * Facet sets of discrete items. For example, tags applied to messages. + * + * The main differences between us and |DiscreteFaceter| are: + * - The empty set is notable. + * - Specific set configurations could be interesting, but are not low-hanging + * fruit. + */ +function DiscreteSetFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DiscreteSetFaceter.prototype = { + type: "discrete", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems: function(aItems) { + if (this.attrDef.objectNounDef.isPrimitive) + return this.facetPrimitiveItems(aItems); + else + return this.facetComplexItems(aItems); + }, + /** + * Facet an attribute whose value is primitive, meaning that it is a raw + * numeric value or string, rather than a complex object. + */ + facetPrimitiveItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + let filter = this.facetDef.filter; + + let groups = this.groups = {}; + let valStrToVal = {}; + this.groupCount = 0; + + for (let item of aItems) { + let vals = (attrKey in item) ? item[attrKey] : null; + if (vals === Gloda.IGNORE_FACET) + continue; + + if (vals == null || vals.length == 0) { + vals = [null]; + } + for (let val of vals) { + // skip items the filter tells us to ignore + if (filter && !filter(val)) + continue; + + // We need to use hasOwnProperty because we cannot guarantee that the + // contents of val won't collide with the attributes in + // Object.prototype. + if (groups.hasOwnProperty(val)) + groups[val].push(item); + else { + groups[val] = [item]; + valStrToVal[val] = val; + this.groupCount++; + } + } + } + + let orderedGroups = Object.keys(groups). + map(key => [valStrToVal[key], groups[key]]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, + /** + * Facet an attribute whose value is a complex object that can be identified + * by its 'id' attribute. This is the case where the value is itself a noun + * instance. + */ + facetComplexItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + let filter = this.facetDef.filter; + let idAttr = this.facetDef.groupIdAttr; + + let groups = this.groups = {}; + let groupMap = this.groupMap = {}; + this.groupCount = 0; + + for (let item of aItems) { + let vals = (attrKey in item) ? item[attrKey] : null; + if (vals === Gloda.IGNORE_FACET) + continue; + + if (vals == null || vals.length == 0) { + vals = [null]; + } + for (let val of vals) { + // skip items the filter tells us to ignore + if (filter && !filter(val)) + continue; + + let valId = (val == null) ? null : val[idAttr]; + // We need to use hasOwnProperty because tag nouns are complex objects + // with id's that are non-numeric and so can collide with the contents + // of Object.prototype. + if (groupMap.hasOwnProperty(valId)) { + groups[valId].push(item); + } + else { + groupMap[valId] = val; + groups[valId] = [item]; + this.groupCount++; + } + } + } + + let orderedGroups = Object.keys(groups). + map(key => [groupMap[key], groups[key]]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, +}; + +/** + * Given a non-singular attribute, facet it as if it were a boolean based on + * whether there is anything in the list (set). + */ +function NonEmptySetFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +NonEmptySetFaceter.prototype = { + type: "boolean", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + + let trueValues = []; + let falseValues = []; + + let groups = this.groups = {}; + this.groupCount = 0; + + for (let item of aItems) { + let vals = (attrKey in item) ? item[attrKey] : null; + if (vals == null || vals.length == 0) + falseValues.push(item); + else + trueValues.push(item); + } + + this.orderedGroups = []; + if (trueValues.length) + this.orderedGroups.push([true, trueValues]); + if (falseValues.length) + this.orderedGroups.push([false, falseValues]); + this.groupCount = this.orderedGroups.length; + }, + makeQuery: function(aGroupValues, aInclusive) { + let query = this.query = Gloda.newQuery(Gloda.NOUN_MESSAGE); + + let constraintFunc = query[this.attrDef.boundName]; + constraintFunc.call(query); + + // Our query is always for non-empty lists (at this time), so we want to + // invert if they're excluding 'true' or including 'false', which means !=. + let invert = aGroupValues[0] != aInclusive; + + return [query, invert]; + } +}; + + +/** + * Facet dates. We build a hierarchical nested structure of year, month, and + * day nesting levels. This decision was made speculatively in the hopes that + * it would allow us to do clustered analysis and that there might be a benefit + * for that. For example, if you search for "Christmas", we might notice + * clusters of messages around December of each year. We could then present + * these in a list as likely candidates, rather than a graphical timeline. + * Alternately, it could be used to inform a non-linear visualization. As it + * stands (as of this writing), it's just a complicating factor. + */ +function DateFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DateFaceter.prototype = { + type: "date", + /** + * + */ + facetItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + + let years = this.years = {_subCount: 0}; + // generally track the time range + let oldest = null, newest = null; + + let validItems = this.validItems = []; + + // just cheat and put us at the front... + this.groupCount = aItems.length ? 1000 : 0; + this.orderedGroups = null; + + /** The number of items with a null/missing attribute. */ + this.missing = 0; + + /** + * The number of items with a date that is unreasonably far in the past or + * in the future. Old-wise, we are concerned about incorrectly formatted + * messages (spam) that end up placed around the UNIX epoch. New-wise, + * we are concerned about messages that can't be explained by users who + * don't know how to set their clocks (both the current user and people + * sending them mail), mainly meaning spam. + * We want to avoid having our clever time-scale logic being made useless by + * these unreasonable messages. + */ + this.unreasonable = 0; + // feb 1, 1970 + let tooOld = new Date(1970, 1, 1); + // 3 days from now + let tooNew = new Date(Date.now() + 3 * 24 * 60 * 60 * 1000); + + for (let item of aItems) { + let val = (attrKey in item) ? item[attrKey] : null; + // -- missing + if (val == null) { + this.missing++; + continue; + } + + // -- unreasonable + if (val < tooOld || val > tooNew) { + this.unreasonable++; + continue; + } + + this.validItems.push(item); + + // -- time range + if (oldest == null) + oldest = newest = val; + else if (val < oldest) + oldest = val; + else if (val > newest) + newest = val; + + // -- bucket + // - year + let year, valYear = val.getYear(); + if (valYear in years) { + year = years[valYear]; + year._dateCount++; + } + else { + year = years[valYear] = { + _dateCount: 1, + _subCount: 0 + }; + years._subCount++; + } + + // - month + let month, valMonth = val.getMonth(); + if (valMonth in year) { + month = year[valMonth]; + month._dateCount++; + } + else { + month = year[valMonth] = { + _dateCount: 1, + _subCount: 0 + }; + year._subCount++; + } + + // - day + let valDate = val.getDate(); + if (valDate in month) { + month[valDate].push(item); + } + else { + month[valDate] = [item]; + } + } + + this.oldest = oldest; + this.newest = newest; + }, + + _unionMonth: function(aMonthObj) { + let dayItemLists = []; + for (let key in aMonthObj) { + let dayItemList = aMonthObj[key]; + if (typeof(key) == "string" && key.startsWith('_')) + continue; + dayItemLists.push(dayItemList); + } + return Array.concat.apply([], dayItemLists); + }, + + _unionYear: function(aYearObj) { + let monthItemLists = []; + for (let key in aYearObj) { + let monthObj = aYearObj[key]; + if (typeof(key) == "string" && key.startsWith('_')) + continue; + monthItemLists.push(this._unionMonth(monthObj)); + } + return Array.concat.apply([], monthItemLists); + } +}; |