/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * This file provides faceting logic. */ var EXPORTED_SYMBOLS = ["FacetDriver", "FacetUtils"]; var Cc = Components.classes; var Ci = Components.interfaces; var Cr = Components.results; var Cu = Components.utils; Cu.import("resource:///modules/gloda/public.js"); /** * Decides the appropriate faceters for the noun type and drives the faceting * process. This class and the faceters are intended to be reusable so that * you only need one instance per faceting session. (Although each faceting * pass is accordingly destructive to previous results.) * * Our strategy for faceting is to process one attribute at a time across all * the items in the provided set. The alternative would be to iterate over * the items and then iterate over the attributes on each item. While both * approaches have caching downsides */ function FacetDriver(aNounDef, aWindow) { this.nounDef = aNounDef; this._window = aWindow; this._makeFaceters(); } FacetDriver.prototype = { /** * Populate |this.faceters| with a set of faceters appropriate to the noun * definition associated with this instance. */ _makeFaceters: function() { let faceters = this.faceters = []; function makeFaceter(aAttrDef, aFacetDef) { let facetType = aFacetDef.type; if (aAttrDef.singular) { if (facetType == "date") faceters.push(new DateFaceter(aAttrDef, aFacetDef)); else faceters.push(new DiscreteFaceter(aAttrDef, aFacetDef)); } else { if (facetType == "nonempty?") faceters.push(new NonEmptySetFaceter(aAttrDef, aFacetDef)); else faceters.push(new DiscreteSetFaceter(aAttrDef, aFacetDef)); } } for (let key in this.nounDef.attribsByBoundName) { let attrDef = this.nounDef.attribsByBoundName[key]; // ignore attributes that do not want to be faceted if (!attrDef.facet) continue; makeFaceter(attrDef, attrDef.facet); if ("extraFacets" in attrDef) { for (let facetDef of attrDef.extraFacets) { makeFaceter(attrDef, facetDef); } } } }, /** * Asynchronously facet the provided items, calling the provided callback when * completed. */ go: function FacetDriver_go(aItems, aCallback, aCallbackThis) { this.items = aItems; this.callback = aCallback; this.callbackThis = aCallbackThis; this._nextFaceter = 0; this._drive(); }, _MAX_FACETING_TIMESLICE_MS: 100, _FACETING_YIELD_DURATION_MS: 0, _driveWrapper: function(aThis) { aThis._drive(); }, _drive: function() { let start = Date.now(); while (this._nextFaceter < this.faceters.length) { let faceter = this.faceters[this._nextFaceter++]; // for now we facet in one go, but the long-term plan allows for them to // be generators. faceter.facetItems(this.items); let delta = Date.now() - start; if (delta > this._MAX_FACETING_TIMESLICE_MS) { this._window.setTimeout(this._driveWrapper, this._FACETING_YIELD_DURATION_MS, this); return; } } // we only get here once we are done with the faceters this.callback.call(this.callbackThis); } }; var FacetUtils = { _groupSizeComparator: function(a, b) { return b[1].length - a[1].length; }, /** * Given a list where each entry is a tuple of [group object, list of items * belonging to that group], produce a new list of the top grouped items. We * used to also produce an "other" aggregation, but that turned out to be * conceptually difficult to deal with, so that's gone, leaving this method * with much less to do. * * @param aAttrDef The attribute for the facet we are working with. * @param aGroups The list of groups built for the facet. * @param aMaxCount The number of result rows you want back. */ makeTopGroups: function FacetUtils_makeTopGroups(aAttrDef, aGroups, aMaxCount) { let nounDef = aAttrDef.objectNounDef; let realGroupsToUse = aMaxCount; let orderedBySize = aGroups.concat(); orderedBySize.sort(this._groupSizeComparator); // - get the real groups to use and order them by the attribute comparator let outGroups = orderedBySize.slice(0, realGroupsToUse); let comparator = nounDef.comparator; function comparatorHelper(a, b) { return comparator(a[0], b[0]); } outGroups.sort(comparatorHelper); return outGroups; } }; /** * Facet discrete things like message authors, boolean values, etc. Only * appropriate for use on singular values. Use |DiscreteSetFaceter| for * non-singular values. */ function DiscreteFaceter(aAttrDef, aFacetDef) { this.attrDef = aAttrDef; this.facetDef = aFacetDef; } DiscreteFaceter.prototype = { type: "discrete", /** * Facet the given set of items, deferring to the appropriate helper method */ facetItems: function(aItems) { if (this.attrDef.objectNounDef.isPrimitive) return this.facetPrimitiveItems(aItems); else return this.facetComplexItems(aItems); }, /** * Facet an attribute whose value is primitive, meaning that it is a raw * numeric value or string, rather than a complex object. */ facetPrimitiveItems: function(aItems) { let attrKey = this.attrDef.boundName; let nounDef = this.attrDef.objectNounDef; let filter = this.facetDef.filter; let valStrToVal = {}; let groups = this.groups = {}; this.groupCount = 0; for (let item of aItems) { let val = (attrKey in item) ? item[attrKey] : null; if (val === Gloda.IGNORE_FACET) continue; // skip items the filter tells us to ignore if (filter && !filter(val)) continue; // We need to use hasOwnProperty because we cannot guarantee that the // contents of val won't collide with the attributes in Object.prototype. if (groups.hasOwnProperty(val)) groups[val].push(item); else { groups[val] = [item]; valStrToVal[val] = val; this.groupCount++; } } let orderedGroups = Object.keys(groups). map(key => [valStrToVal[key], groups[key]]); let comparator = this.facetDef.groupComparator; function comparatorHelper(a, b) { return comparator(a[0], b[0]); } orderedGroups.sort(comparatorHelper); this.orderedGroups = orderedGroups; }, /** * Facet an attribute whose value is a complex object that can be identified * by its 'id' attribute. This is the case where the value is itself a noun * instance. */ facetComplexItems: function(aItems) { let attrKey = this.attrDef.boundName; let nounDef = this.attrDef.objectNounDef; let filter = this.facetDef.filter; let idAttr = this.facetDef.groupIdAttr; let groups = this.groups = {}; let groupMap = this.groupMap = {}; this.groupCount = 0; for (let item of aItems) { let val = (attrKey in item) ? item[attrKey] : null; if (val === Gloda.IGNORE_FACET) continue; // skip items the filter tells us to ignore if (filter && !filter(val)) continue; let valId = (val == null) ? null : val[idAttr]; // We need to use hasOwnProperty because tag nouns are complex objects // with id's that are non-numeric and so can collide with the contents // of Object.prototype. (Note: the "tags" attribute is actually handled // by the DiscreteSetFaceter.) if (groupMap.hasOwnProperty(valId)) { groups[valId].push(item); } else { groupMap[valId] = val; groups[valId] = [item]; this.groupCount++; } } let orderedGroups = Object.keys(groups). map(key => [groupMap[key], groups[key]]); let comparator = this.facetDef.groupComparator; function comparatorHelper(a, b) { return comparator(a[0], b[0]); } orderedGroups.sort(comparatorHelper); this.orderedGroups = orderedGroups; }, }; /** * Facet sets of discrete items. For example, tags applied to messages. * * The main differences between us and |DiscreteFaceter| are: * - The empty set is notable. * - Specific set configurations could be interesting, but are not low-hanging * fruit. */ function DiscreteSetFaceter(aAttrDef, aFacetDef) { this.attrDef = aAttrDef; this.facetDef = aFacetDef; } DiscreteSetFaceter.prototype = { type: "discrete", /** * Facet the given set of items, deferring to the appropriate helper method */ facetItems: function(aItems) { if (this.attrDef.objectNounDef.isPrimitive) return this.facetPrimitiveItems(aItems); else return this.facetComplexItems(aItems); }, /** * Facet an attribute whose value is primitive, meaning that it is a raw * numeric value or string, rather than a complex object. */ facetPrimitiveItems: function(aItems) { let attrKey = this.attrDef.boundName; let nounDef = this.attrDef.objectNounDef; let filter = this.facetDef.filter; let groups = this.groups = {}; let valStrToVal = {}; this.groupCount = 0; for (let item of aItems) { let vals = (attrKey in item) ? item[attrKey] : null; if (vals === Gloda.IGNORE_FACET) continue; if (vals == null || vals.length == 0) { vals = [null]; } for (let val of vals) { // skip items the filter tells us to ignore if (filter && !filter(val)) continue; // We need to use hasOwnProperty because we cannot guarantee that the // contents of val won't collide with the attributes in // Object.prototype. if (groups.hasOwnProperty(val)) groups[val].push(item); else { groups[val] = [item]; valStrToVal[val] = val; this.groupCount++; } } } let orderedGroups = Object.keys(groups). map(key => [valStrToVal[key], groups[key]]); let comparator = this.facetDef.groupComparator; function comparatorHelper(a, b) { return comparator(a[0], b[0]); } orderedGroups.sort(comparatorHelper); this.orderedGroups = orderedGroups; }, /** * Facet an attribute whose value is a complex object that can be identified * by its 'id' attribute. This is the case where the value is itself a noun * instance. */ facetComplexItems: function(aItems) { let attrKey = this.attrDef.boundName; let nounDef = this.attrDef.objectNounDef; let filter = this.facetDef.filter; let idAttr = this.facetDef.groupIdAttr; let groups = this.groups = {}; let groupMap = this.groupMap = {}; this.groupCount = 0; for (let item of aItems) { let vals = (attrKey in item) ? item[attrKey] : null; if (vals === Gloda.IGNORE_FACET) continue; if (vals == null || vals.length == 0) { vals = [null]; } for (let val of vals) { // skip items the filter tells us to ignore if (filter && !filter(val)) continue; let valId = (val == null) ? null : val[idAttr]; // We need to use hasOwnProperty because tag nouns are complex objects // with id's that are non-numeric and so can collide with the contents // of Object.prototype. if (groupMap.hasOwnProperty(valId)) { groups[valId].push(item); } else { groupMap[valId] = val; groups[valId] = [item]; this.groupCount++; } } } let orderedGroups = Object.keys(groups). map(key => [groupMap[key], groups[key]]); let comparator = this.facetDef.groupComparator; function comparatorHelper(a, b) { return comparator(a[0], b[0]); } orderedGroups.sort(comparatorHelper); this.orderedGroups = orderedGroups; }, }; /** * Given a non-singular attribute, facet it as if it were a boolean based on * whether there is anything in the list (set). */ function NonEmptySetFaceter(aAttrDef, aFacetDef) { this.attrDef = aAttrDef; this.facetDef = aFacetDef; } NonEmptySetFaceter.prototype = { type: "boolean", /** * Facet the given set of items, deferring to the appropriate helper method */ facetItems: function(aItems) { let attrKey = this.attrDef.boundName; let nounDef = this.attrDef.objectNounDef; let trueValues = []; let falseValues = []; let groups = this.groups = {}; this.groupCount = 0; for (let item of aItems) { let vals = (attrKey in item) ? item[attrKey] : null; if (vals == null || vals.length == 0) falseValues.push(item); else trueValues.push(item); } this.orderedGroups = []; if (trueValues.length) this.orderedGroups.push([true, trueValues]); if (falseValues.length) this.orderedGroups.push([false, falseValues]); this.groupCount = this.orderedGroups.length; }, makeQuery: function(aGroupValues, aInclusive) { let query = this.query = Gloda.newQuery(Gloda.NOUN_MESSAGE); let constraintFunc = query[this.attrDef.boundName]; constraintFunc.call(query); // Our query is always for non-empty lists (at this time), so we want to // invert if they're excluding 'true' or including 'false', which means !=. let invert = aGroupValues[0] != aInclusive; return [query, invert]; } }; /** * Facet dates. We build a hierarchical nested structure of year, month, and * day nesting levels. This decision was made speculatively in the hopes that * it would allow us to do clustered analysis and that there might be a benefit * for that. For example, if you search for "Christmas", we might notice * clusters of messages around December of each year. We could then present * these in a list as likely candidates, rather than a graphical timeline. * Alternately, it could be used to inform a non-linear visualization. As it * stands (as of this writing), it's just a complicating factor. */ function DateFaceter(aAttrDef, aFacetDef) { this.attrDef = aAttrDef; this.facetDef = aFacetDef; } DateFaceter.prototype = { type: "date", /** * */ facetItems: function(aItems) { let attrKey = this.attrDef.boundName; let nounDef = this.attrDef.objectNounDef; let years = this.years = {_subCount: 0}; // generally track the time range let oldest = null, newest = null; let validItems = this.validItems = []; // just cheat and put us at the front... this.groupCount = aItems.length ? 1000 : 0; this.orderedGroups = null; /** The number of items with a null/missing attribute. */ this.missing = 0; /** * The number of items with a date that is unreasonably far in the past or * in the future. Old-wise, we are concerned about incorrectly formatted * messages (spam) that end up placed around the UNIX epoch. New-wise, * we are concerned about messages that can't be explained by users who * don't know how to set their clocks (both the current user and people * sending them mail), mainly meaning spam. * We want to avoid having our clever time-scale logic being made useless by * these unreasonable messages. */ this.unreasonable = 0; // feb 1, 1970 let tooOld = new Date(1970, 1, 1); // 3 days from now let tooNew = new Date(Date.now() + 3 * 24 * 60 * 60 * 1000); for (let item of aItems) { let val = (attrKey in item) ? item[attrKey] : null; // -- missing if (val == null) { this.missing++; continue; } // -- unreasonable if (val < tooOld || val > tooNew) { this.unreasonable++; continue; } this.validItems.push(item); // -- time range if (oldest == null) oldest = newest = val; else if (val < oldest) oldest = val; else if (val > newest) newest = val; // -- bucket // - year let year, valYear = val.getYear(); if (valYear in years) { year = years[valYear]; year._dateCount++; } else { year = years[valYear] = { _dateCount: 1, _subCount: 0 }; years._subCount++; } // - month let month, valMonth = val.getMonth(); if (valMonth in year) { month = year[valMonth]; month._dateCount++; } else { month = year[valMonth] = { _dateCount: 1, _subCount: 0 }; year._subCount++; } // - day let valDate = val.getDate(); if (valDate in month) { month[valDate].push(item); } else { month[valDate] = [item]; } } this.oldest = oldest; this.newest = newest; }, _unionMonth: function(aMonthObj) { let dayItemLists = []; for (let key in aMonthObj) { let dayItemList = aMonthObj[key]; if (typeof(key) == "string" && key.startsWith('_')) continue; dayItemLists.push(dayItemList); } return Array.concat.apply([], dayItemLists); }, _unionYear: function(aYearObj) { let monthItemLists = []; for (let key in aYearObj) { let monthObj = aYearObj[key]; if (typeof(key) == "string" && key.startsWith('_')) continue; monthItemLists.push(this._unionMonth(monthObj)); } return Array.concat.apply([], monthItemLists); } };