diff options
Diffstat (limited to 'mailnews/db/gloda/modules')
26 files changed, 19293 insertions, 0 deletions
diff --git a/mailnews/db/gloda/modules/collection.js b/mailnews/db/gloda/modules/collection.js new file mode 100644 index 000000000..466d7df1a --- /dev/null +++ b/mailnews/db/gloda/modules/collection.js @@ -0,0 +1,772 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['GlodaCollection', 'GlodaCollectionManager']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); + +var LOG = Log4Moz.repository.getLogger("gloda.collection"); + +/** + * @namespace Central registry and logic for all collections. + * + * The collection manager is a singleton that has the following tasks: + * - Let views of objects (nouns) know when their objects have changed. For + * example, an attribute has changed due to user action. + * - Let views of objects based on queries know when new objects match their + * query, or when their existing objects no longer match due to changes. + * - Caching/object-identity maintenance. It is ideal if we only ever have + * one instance of an object at a time. (More specifically, only one instance + * per database row 'id'.) The collection mechanism lets us find existing + * instances to this end. Caching can be directly integrated by being treated + * as a special collection. + */ +var GlodaCollectionManager = { + _collectionsByNoun: {}, + _cachesByNoun: {}, + + /** + * Registers the existence of a collection with the collection manager. This + * is done using a weak reference so that the collection can go away if it + * wants to. + */ + registerCollection: function gloda_colm_registerCollection(aCollection) { + let collections; + let nounID = aCollection.query._nounDef.id; + if (!(nounID in this._collectionsByNoun)) + collections = this._collectionsByNoun[nounID] = []; + else { + // purge dead weak references while we're at it + collections = this._collectionsByNoun[nounID].filter(function (aRef) { + return aRef.get(); }); + this._collectionsByNoun[nounID] = collections; + } + collections.push(Cu.getWeakReference(aCollection)); + }, + + getCollectionsForNounID: function gloda_colm_getCollectionsForNounID(aNounID){ + if (!(aNounID in this._collectionsByNoun)) + return []; + + // generator would be nice, but I suspect get() is too expensive to use + // twice (guard/predicate and value) + let weakCollections = this._collectionsByNoun[aNounID]; + let collections = []; + for (let iColl = 0; iColl < weakCollections.length; iColl++) { + let collection = weakCollections[iColl].get(); + if (collection) + collections.push(collection); + } + return collections; + }, + + defineCache: function gloda_colm_defineCache(aNounDef, aCacheSize) { + this._cachesByNoun[aNounDef.id] = new GlodaLRUCacheCollection(aNounDef, + aCacheSize); + }, + + /** + * Attempt to locate an instance of the object of the given noun type with the + * given id. Counts as a cache hit if found. (And if it was't in a cache, + * but rather a collection, it is added to the cache.) + */ + cacheLookupOne: function gloda_colm_cacheLookupOne(aNounID, aID, aDoCache) { + let cache = this._cachesByNoun[aNounID]; + + if (cache) { + if (aID in cache._idMap) { + let item = cache._idMap[aID]; + return cache.hit(item); + } + } + + if (aDoCache === false) + cache = null; + + for (let collection of this.getCollectionsForNounID(aNounID)) { + if (aID in collection._idMap) { + let item = collection._idMap[aID]; + if (cache) + cache.add([item]); + return item; + } + } + + return null; + }, + + /** + * Lookup multiple nouns by ID from the cache/existing collections. + * + * @param aNounID The kind of noun identified by its ID. + * @param aIDMap A dictionary/map whose keys must be gloda noun ids for the + * given noun type and whose values are ignored. + * @param aTargetMap An object to hold the noun id's (key) and noun instances + * (value) for the noun instances that were found available in memory + * because they were cached or in existing query collections. + * @param [aDoCache=true] Should we add any items to the cache that we found + * in collections that were in memory but not in the cache? You would + * likely want to pass false if you are only updating in-memory + * representations rather than performing a new query. + * + * @return [The number that were found, the number that were not found, + * a dictionary whose keys are the ids of noun instances that + * were not found.] + */ + cacheLookupMany: function gloda_colm_cacheLookupMany(aNounID, aIDMap, + aTargetMap, aDoCache) { + let foundCount = 0, notFoundCount = 0, notFound = {}; + + let cache = this._cachesByNoun[aNounID]; + + if (cache) { + for (let key in aIDMap) { + let cacheValue = cache._idMap[key]; + if (cacheValue === undefined) { + notFoundCount++; + notFound[key] = null; + } + else { + foundCount++; + aTargetMap[key] = cacheValue; + cache.hit(cacheValue); + } + } + } + + if (aDoCache === false) + cache = null; + + for (let collection of this.getCollectionsForNounID(aNounID)) { + for (let key in notFound) { + let collValue = collection._idMap[key]; + if (collValue !== undefined) { + aTargetMap[key] = collValue; + delete notFound[key]; + foundCount++; + notFoundCount--; + if (cache) + cache.add([collValue]); + } + } + } + + return [foundCount, notFoundCount, notFound]; + }, + + /** + * Friendlier version of |cacheLookupMany|; takes a list of ids and returns + * an object whose keys and values are the gloda id's and instances of the + * instances that were found. We don't tell you who we didn't find. The + * assumption is this is being used for in-memory updates where we only need + * to tweak what is in memory. + */ + cacheLookupManyList: function gloda_colm_cacheLookupManyList(aNounID, aIds) { + let checkMap = {}, targetMap = {}; + for (let id of aIds) { + checkMap[id] = null; + } + // do not promote found items into the cache + this.cacheLookupMany(aNounID, checkMap, targetMap, false); + return targetMap; + }, + + /** + * Attempt to locate an instance of the object of the given noun type with the + * given id. Counts as a cache hit if found. (And if it was't in a cache, + * but rather a collection, it is added to the cache.) + */ + cacheLookupOneByUniqueValue: + function gloda_colm_cacheLookupOneByUniqueValue(aNounID, aUniqueValue, + aDoCache) { + let cache = this._cachesByNoun[aNounID]; + + if (cache) { + if (aUniqueValue in cache._uniqueValueMap) { + let item = cache._uniqueValueMap[aUniqueValue]; + return cache.hit(item); + } + } + + if (aDoCache === false) + cache = null; + + for (let collection of this.getCollectionsForNounID(aNounID)) { + if (aUniqueValue in collection._uniqueValueMap) { + let item = collection._uniqueValueMap[aUniqueValue]; + if (cache) + cache.add([item]); + return item; + } + } + + return null; + }, + + /** + * Checks whether the provided item with the given id is actually a duplicate + * of an instance that already exists in the cache/a collection. If it is, + * the pre-existing instance is returned and counts as a cache hit. If it + * is not, the passed-in instance is added to the cache and returned. + */ + cacheLoadUnifyOne: function gloda_colm_cacheLoadUnifyOne(aItem) { + let items = [aItem]; + this.cacheLoadUnify(aItem.NOUN_ID, items); + return items[0]; + }, + + /** + * Given a list of items, check if any of them already have duplicate, + * canonical, instances in the cache or collections. Items with pre-existing + * instances are replaced by those instances in the provided list, and each + * counts as a cache hit. Items without pre-existing instances are added + * to the cache and left intact. + */ + cacheLoadUnify: function gloda_colm_cacheLoadUnify(aNounID, aItems, + aCacheIfMissing) { + let cache = this._cachesByNoun[aNounID]; + if (aCacheIfMissing === undefined) + aCacheIfMissing = true; + + // track the items we haven't yet found in a cache/collection (value) and + // their index in aItems (key). We're somewhat abusing the dictionary + // metaphor with the intent of storing tuples here. We also do it because + // it allows random-access deletion theoretically without cost. (Since + // we delete during iteration, that may be wrong, but it sounds like the + // semantics still work?) + let unresolvedIndexToItem = {}; + let numUnresolved = 0; + + if (cache) { + for (let iItem = 0; iItem < aItems.length; iItem++) { + let item = aItems[iItem]; + + if (item.id in cache._idMap) { + let realItem = cache._idMap[item.id]; + // update the caller's array with the reference to the 'real' item + aItems[iItem] = realItem; + cache.hit(realItem); + } + else { + unresolvedIndexToItem[iItem] = item; + numUnresolved++; + } + } + + // we're done if everyone was a hit. + if (numUnresolved == 0) + return; + } + else { + for (let iItem = 0; iItem < aItems.length; iItem++) { + unresolvedIndexToItem[iItem] = aItems[iItem]; + } + numUnresolved = aItems.length; + } + + let needToCache = []; + // next, let's fall back to our collections + for (let collection of this.getCollectionsForNounID(aNounID)) { + for (let [iItem, item] in Iterator(unresolvedIndexToItem)) { + if (item.id in collection._idMap) { + let realItem = collection._idMap[item.id]; + // update the caller's array to now have the 'real' object + aItems[iItem] = realItem; + // flag that we need to cache this guy (we use an inclusive cache) + needToCache.push(realItem); + // we no longer need to resolve this item... + delete unresolvedIndexToItem[iItem]; + // stop checking collections if we got everybody + if (--numUnresolved == 0) + break; + } + } + } + + // anything left in unresolvedIndexToItem should be added to the cache + // unless !aCacheIfMissing. plus, we already have 'needToCache' + if (cache && aCacheIfMissing) { + cache.add(needToCache.concat(Object.keys(unresolvedIndexToItem). + map(key => unresolvedIndexToItem[key]))); + } + + return aItems; + }, + + cacheCommitDirty: function glod_colm_cacheCommitDirty() { + for (let id in this._cachesByNoun) { + let cache = this._cachesByNoun[id]; + cache.commitDirty(); + } + }, + + /** + * Notifies the collection manager that an item has been loaded and should + * be cached, assuming caching is active. + */ + itemLoaded: function gloda_colm_itemsLoaded(aItem) { + let cache = this._cachesByNoun[aItem.NOUN_ID]; + if (cache) { + cache.add([aItem]); + } + }, + + /** + * Notifies the collection manager that multiple items has been loaded and + * should be cached, assuming caching is active. + */ + itemsLoaded: function gloda_colm_itemsLoaded(aNounID, aItems) { + let cache = this._cachesByNoun[aNounID]; + if (cache) { + cache.add(aItems); + } + }, + + /** + * This should be called when items are added to the global database. This + * should generally mean during indexing by indexers or an attribute + * provider. + * We walk all existing collections for the given noun type and add the items + * to the collection if the item meets the query that defines the collection. + */ + itemsAdded: function gloda_colm_itemsAdded(aNounID, aItems) { + let cache = this._cachesByNoun[aNounID]; + if (cache) { + cache.add(aItems); + } + + for (let collection of this.getCollectionsForNounID(aNounID)) { + let addItems = aItems.filter(item => collection.query.test(item)); + if (addItems.length) + collection._onItemsAdded(addItems); + } + }, + /** + * This should be called when items in the global database are modified. For + * example, as a result of indexing. This should generally only be called + * by indexers or by attribute providers. + * We walk all existing collections for the given noun type. For items + * currently included in each collection but should no longer be (per the + * collection's defining query) we generate onItemsRemoved events. For items + * not currently included in the collection but should now be, we generate + * onItemsAdded events. For items included that still match the query, we + * generate onItemsModified events. + */ + itemsModified: function gloda_colm_itemsModified(aNounID, aItems) { + for (let collection of this.getCollectionsForNounID(aNounID)) { + let added = [], modified = [], removed = []; + for (let item of aItems) { + if (item.id in collection._idMap) { + // currently in... but should it still be there? + if (collection.query.test(item)) + modified.push(item); // yes, keep it + // oy, so null queries really don't want any notifications, and they + // sorta fit into our existing model, except for the removal bit. + // so we need a specialized check for them, and we're using the + // frozen attribute to this end. + else if (!collection.query.frozen) + removed.push(item); // no, bin it + } + else if (collection.query.test(item)) // not in, should it be? + added.push(item); // yep, add it + } + if (added.length) + collection._onItemsAdded(added); + if (modified.length) + collection._onItemsModified(modified); + if (removed.length) + collection._onItemsRemoved(removed); + } + }, + /** + * This should be called when items in the global database are permanently-ish + * deleted. (This is distinct from concepts like message deletion which may + * involved trash folders or other modified forms of existence. Deleted + * means the data is gone and if it were to come back, it would come back + * via an itemsAdded event.) + * We walk all existing collections for the given noun type. For items + * currently in the collection, we generate onItemsRemoved events. + * + * @param aItemIds A list of item ids that are being deleted. + */ + itemsDeleted: function gloda_colm_itemsDeleted(aNounID, aItemIds) { + // cache + let cache = this._cachesByNoun[aNounID]; + if (cache) { + for (let itemId of aItemIds) { + if (itemId in cache._idMap) + cache.deleted(cache._idMap[itemId]); + } + } + + // collections + for (let collection of this.getCollectionsForNounID(aNounID)) { + let removeItems = aItemIds.filter(itemId => itemId in collection._idMap). + map(itemId => collection._idMap[itemId]); + if (removeItems.length) + collection._onItemsRemoved(removeItems); + } + }, + /** + * Like |itemsDeleted| but for the case where the deletion is based on an + * attribute that SQLite can more efficiently check than we can and where the + * cost of scanning the in-memory items is presumably much cheaper than + * trying to figure out what actually got deleted. + * + * Since we are doing an in-memory walk, this is obviously O(n) where n is the + * number of noun instances of a given type in-memory. We are assuming this + * is a reasonable number of things and that this type of deletion call is + * not going to happen all that frequently. If these assumptions are wrong, + * callers are advised to re-think the whole situation. + * + * @param aNounID Type of noun we are talking about here. + * @param aFilter A filter function that returns true when the item should be + * thought of as deleted, or false if the item is still good. Screw this + * up and you will get some seriously wacky bugs, yo. + */ + itemsDeletedByAttribute: function gloda_colm_itemsDeletedByAttribute( + aNounID, aFilter) { + // cache + let cache = this._cachesByNoun[aNounID]; + if (cache) { + for (let id in cache._idMap) { + let item = cache._idMap[id]; + if (aFilter(item)) + cache.deleted(item); + } + } + + // collections + for (let collection of this.getCollectionsForNounID(aNounID)) { + let removeItems = collection.items.filter(aFilter); + if (removeItems.length) + collection._onItemsRemoved(removeItems); + } + }, +}; + +/** + * @class A current view of the set of first-class nouns meeting a given query. + * Assuming a listener is present, events are + * generated when new objects meet the query, existing objects no longer meet + * the query, or existing objects have experienced a change in attributes that + * does not affect their ability to be present (but the listener may care about + * because it is exposing those attributes). + * @constructor + */ +function GlodaCollection(aNounDef, aItems, aQuery, aListener, + aMasterCollection) { + // if aNounDef is null, we are just being invoked for subclassing + if (aNounDef === undefined) + return; + + this._nounDef = aNounDef; + // should we also maintain a unique value mapping... + if (this._nounDef.usesUniqueValue) + this._uniqueValueMap = {}; + + this.pendingItems = []; + this._pendingIdMap = {}; + this.items = []; + this._idMap = {}; + + // force the listener to null for our call to _onItemsAdded; no events for + // the initial load-out. + this._listener = null; + if (aItems && aItems.length) + this._onItemsAdded(aItems); + + this.query = aQuery || null; + if (this.query) { + this.query.collection = this; + if (this.query.options.stashColumns) + this.stashedColumns = {}; + } + this._listener = aListener || null; + + this.deferredCount = 0; + this.resolvedCount = 0; + + if (aMasterCollection) { + this.masterCollection = aMasterCollection.masterCollection; + } + else { + this.masterCollection = this; + /** a dictionary of dictionaries. at the top level, the keys are noun IDs. + * each of these sub-dictionaries maps the IDs of desired noun instances to + * the actual instance, or null if it has not yet been loaded. + */ + this.referencesByNounID = {}; + /** + * a dictionary of dictionaries. at the top level, the keys are noun IDs. + * each of the sub-dictionaries maps the IDs of the _recognized parent + * noun_ to the list of children, or null if the list has not yet been + * populated. + * + * So if we have a noun definition A with ID 1 who is the recognized parent + * noun of noun definition B with ID 2, AND we have an instance A(1) with + * two children B(10), B(11), then an example might be: {2: {1: [10, 11]}}. + */ + this.inverseReferencesByNounID = {}; + this.subCollections = {}; + } +} + +GlodaCollection.prototype = { + get listener() { return this._listener; }, + set listener(aListener) { this._listener = aListener; }, + + /** + * If this collection still has a query associated with it, drop the query + * and replace it with an 'explicit query'. This means that the Collection + * Manager will not attempt to match new items indexed to the system against + * our query criteria. + * Once you call this method, your collection's listener will no longer + * receive onItemsAdded notifications that are not the result of your + * initial database query. It will, however, receive onItemsModified + * notifications if items in the collection are re-indexed. + */ + becomeExplicit: function gloda_coll_becomeExplicit() { + if (!(this.query instanceof this._nounDef.explicitQueryClass)) { + this.query = new this._nounDef.explicitQueryClass(this); + } + }, + + /** + * Clear the contents of this collection. This only makes sense for explicit + * collections or wildcard collections. (Actual query-based collections + * should represent the state of the query, so unless we're going to delete + * all the items, clearing the collection would violate that constraint.) + */ + clear: function gloda_coll_clear() { + this._idMap = {}; + if (this._uniqueValueMap) + this._uniqueValueMap = {}; + this.items = []; + }, + + _onItemsAdded: function gloda_coll_onItemsAdded(aItems) { + this.items.push.apply(this.items, aItems); + if (this._uniqueValueMap) { + for (let item of this.items) { + this._idMap[item.id] = item; + this._uniqueValueMap[item.uniqueValue] = item; + } + } + else { + for (let item of this.items) { + this._idMap[item.id] = item; + } + } + if (this._listener) { + try { + this._listener.onItemsAdded(aItems, this); + } + catch (ex) { + LOG.error("caught exception from listener in onItemsAdded: " + + ex.fileName + ":" + ex.lineNumber + ": " + ex); + } + } + }, + + _onItemsModified: function gloda_coll_onItemsModified(aItems) { + if (this._listener) { + try { + this._listener.onItemsModified(aItems, this); + } + catch (ex) { + LOG.error("caught exception from listener in onItemsModified: " + + ex.fileName + ":" + ex.lineNumber + ": " + ex); + } + } + }, + + /** + * Given a list of items that definitely no longer belong in this collection, + * remove them from the collection and notify the listener. The 'tricky' + * part is that we need to remove the deleted items from our list of items. + */ + _onItemsRemoved: function gloda_coll_onItemsRemoved(aItems) { + // we want to avoid the O(n^2) deletion performance case, and deletion + // should be rare enough that the extra cost of building the deletion map + // should never be a real problem. + let deleteMap = {}; + // build the delete map while also nuking from our id map/unique value map + for (let item of aItems) { + deleteMap[item.id] = true; + delete this._idMap[item.id]; + if (this._uniqueValueMap) + delete this._uniqueValueMap[item.uniqueValue]; + } + let items = this.items; + // in-place filter. probably needless optimization. + let iWrite=0; + for (let iRead = 0; iRead < items.length; iRead++) { + let item = items[iRead]; + if (!(item.id in deleteMap)) + items[iWrite++] = item; + } + items.splice(iWrite); + + if (this._listener) { + try { + this._listener.onItemsRemoved(aItems, this); + } + catch (ex) { + LOG.error("caught exception from listener in onItemsRemoved: " + + ex.fileName + ":" + ex.lineNumber + ": " + ex); + } + } + }, + + _onQueryCompleted: function gloda_coll_onQueryCompleted() { + this.query.completed = true; + if (this._listener && this._listener.onQueryCompleted) + this._listener.onQueryCompleted(this); + } +}; + +/** + * Create an LRU cache collection for the given noun with the given size. + * @constructor + */ +function GlodaLRUCacheCollection(aNounDef, aCacheSize) { + GlodaCollection.call(this, aNounDef, null, null, null); + + this._head = null; // aka oldest! + this._tail = null; // aka newest! + this._size = 0; + // let's keep things sane, and simplify our logic a little... + if (aCacheSize < 32) + aCacheSize = 32; + this._maxCacheSize = aCacheSize; +} +/** + * @class A LRU-discard cache. We use a doubly linked-list for the eviction + * tracking. Since we require that there is at most one LRU-discard cache per + * noun class, we simplify our lives by adding our own attributes to the + * cached objects. + * @augments GlodaCollection + */ +GlodaLRUCacheCollection.prototype = new GlodaCollection; +GlodaLRUCacheCollection.prototype.add = function cache_add(aItems) { + for (let item of aItems) { + if (item.id in this._idMap) { + // DEBUGME so, we're dealing with this, but it shouldn't happen. need + // trace-debuggage. + continue; + } + this._idMap[item.id] = item; + if (this._uniqueValueMap) + this._uniqueValueMap[item.uniqueValue] = item; + + item._lruPrev = this._tail; + // we do have to make sure that we will set _head the first time we insert + // something + if (this._tail !== null) + this._tail._lruNext = item; + else + this._head = item; + item._lruNext = null; + this._tail = item; + + this._size++; + } + + while (this._size > this._maxCacheSize) { + let item = this._head; + + // we never have to deal with the possibility of needing to make _head/_tail + // null. + this._head = item._lruNext; + this._head._lruPrev = null; + // (because we are nice, we will delete the properties...) + delete item._lruNext; + delete item._lruPrev; + + // nuke from our id map + delete this._idMap[item.id]; + if (this._uniqueValueMap) + delete this._uniqueValueMap[item.uniqueValue]; + + // flush dirty items to disk (they may not have this attribute, in which + // case, this returns false, which is fine.) + if (item.dirty) { + this._nounDef.objUpdate.call(this._nounDef.datastore, item); + delete item.dirty; + } + + this._size--; + } +}; + +GlodaLRUCacheCollection.prototype.hit = function cache_hit(aItem) { + // don't do anything in the 0 or 1 items case, or if we're already + // the last item + if ((this._head === this._tail) || (this._tail === aItem)) + return aItem; + + // - unlink the item + if (aItem._lruPrev !== null) + aItem._lruPrev._lruNext = aItem._lruNext; + else + this._head = aItem._lruNext; + // (_lruNext cannot be null) + aItem._lruNext._lruPrev = aItem._lruPrev; + // - link it in to the end + this._tail._lruNext = aItem; + aItem._lruPrev = this._tail; + aItem._lruNext = null; + // update tail tracking + this._tail = aItem; + + return aItem; +}; + +GlodaLRUCacheCollection.prototype.deleted = function cache_deleted(aItem) { + // unlink the item + if (aItem._lruPrev !== null) + aItem._lruPrev._lruNext = aItem._lruNext; + else + this._head = aItem._lruNext; + if (aItem._lruNext !== null) + aItem._lruNext._lruPrev = aItem._lruPrev; + else + this._tail = aItem._lruPrev; + + // (because we are nice, we will delete the properties...) + delete aItem._lruNext; + delete aItem._lruPrev; + + // nuke from our id map + delete this._idMap[aItem.id]; + if (this._uniqueValueMap) + delete this._uniqueValueMap[aItem.uniqueValue]; + + this._size--; +}; + +/** + * If any of the cached items are dirty, commit them, and make them no longer + * dirty. + */ +GlodaLRUCacheCollection.prototype.commitDirty = function cache_commitDirty() { + // we can only do this if there is an update method available... + if (!this._nounDef.objUpdate) + return; + + for (let iItem in this._idMap) { + let item = this._idMap[iItem]; + if (item.dirty) { + LOG.debug("flushing dirty: " + item); + this._nounDef.objUpdate.call(this._nounDef.datastore, item); + delete item.dirty; + } + } +}; diff --git a/mailnews/db/gloda/modules/connotent.js b/mailnews/db/gloda/modules/connotent.js new file mode 100644 index 000000000..4ef424d43 --- /dev/null +++ b/mailnews/db/gloda/modules/connotent.js @@ -0,0 +1,273 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['GlodaContent', 'whittlerRegistry', + 'mimeMsgToContentAndMeta', 'mimeMsgToContentSnippetAndMeta']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); + +var LOG = Log4Moz.repository.getLogger("gloda.connotent"); + + + +/** + * Given a MimeMsg and the corresponding folder, return the GlodaContent object. + * + * @param aMimeMsg: the MimeMessage instance + * @param folder: the nsIMsgDBFolder + * @return an array containing the GlodaContent instance, and the meta dictionary + * that the Gloda content providers may have filled with useful data. + */ + +function mimeMsgToContentAndMeta(aMimeMsg, folder) { + let content = new GlodaContent(); + let meta = {subject: aMimeMsg.get("subject")}; + let bodyLines = aMimeMsg.coerceBodyToPlaintext(folder).split(/\r?\n/); + + for (let whittler of whittlerRegistry.getWhittlers()) + whittler.contentWhittle(meta, bodyLines, content); + + return [content, meta]; +} + + +/** + * Given a MimeMsg, return the whittled content string, suitable for summarizing + * a message. + * + * @param aMimeMsg: the MimeMessage instance + * @param folder: the nsIMsgDBFolder + * @param length: optional number of characters to trim the whittled content. + * If the actual length of the message is greater than |length|, then the return + * value is the first (length-1) characters with an ellipsis appended. + * @return an array containing the text of the snippet, and the meta dictionary + * that the Gloda content providers may have filled with useful data. + */ + +function mimeMsgToContentSnippetAndMeta(aMimeMsg, folder, length) { + let [content, meta] = mimeMsgToContentAndMeta(aMimeMsg, folder); + + let text = content.getContentSnippet(length + 1); + if (length && text.length > length) + text = text.substring(0, length-1) + "\u2026"; // ellipsis + + return [text, meta]; +} + + +/** + * A registry of gloda providers that have contentWhittle() functions. + * used by mimeMsgToContentSnippet, but populated by the Gloda object as it's + * processing providers. + */ +function WhittlerRegistry() { + this._whittlers = []; +} + +WhittlerRegistry.prototype = { + /** + * Add a provider as a content whittler. + */ + registerWhittler: function whittler_registry_registerWhittler(provider) { + this._whittlers.push(provider); + }, + /** + * get the list of content whittlers, sorted from the most specific to + * the most generic + */ + getWhittlers: function whittler_registry_getWhittlers() { + // Use the concat() trick to avoid mutating the internal object and + // leaking an internal representation. + return this._whittlers.concat().reverse(); + } +} + +this.whittlerRegistry = new WhittlerRegistry(); + +function GlodaContent() { + this._contentPriority = null; + this._producing = false; + this._hunks = []; +} + +GlodaContent.prototype = { + kPriorityBase: 0, + kPriorityPerfect: 100, + + kHunkMeta: 1, + kHunkQuoted: 2, + kHunkContent: 3, + + _resetContent: function gloda_content__resetContent() { + this._keysAndValues = []; + this._keysAndDeltaValues = []; + this._hunks = []; + this._curHunk = null; + }, + + /* ===== Consumer API ===== */ + hasContent: function gloda_content_hasContent() { + return (this._contentPriority != null); + }, + + /** + * Return content suitable for snippet display. This means that no quoting + * or meta-data should be returned. + * + * @param aMaxLength The maximum snippet length desired. + */ + getContentSnippet: function gloda_content_getContentSnippet(aMaxLength) { + let content = this.getContentString(); + if (aMaxLength) + content = content.substring(0, aMaxLength); + return content; + }, + + getContentString: function gloda_content_getContent(aIndexingPurposes) { + let data = ""; + for (let hunk of this._hunks) { + if (hunk.hunkType == this.kHunkContent) { + if (data) + data += "\n" + hunk.data; + else + data = hunk.data; + } + } + + if (aIndexingPurposes) { + // append the values for indexing. we assume the keywords are cruft. + // this may be crazy, but things that aren't a science aren't an exact + // science. + for (let kv of this._keysAndValues) { + data += "\n" + kv[1]; + } + for (let kon of this._keysAndValues) { + data += "\n" + kon[1] + "\n" + kon[2]; + } + } + + return data; + }, + + /* ===== Producer API ===== */ + /** + * Called by a producer with the priority they believe their interpretation + * of the content comes in at. + * + * @returns true if we believe the producer's interpretation will be + * interesting and they should go ahead and generate events. We return + * false if we don't think they are interesting, in which case they should + * probably not issue calls to us, although we don't care. (We will + * ignore their calls if we return false, this allows the simplification + * of code that needs to run anyways.) + */ + volunteerContent: function gloda_content_volunteerContent(aPriority) { + if (this._contentPriority === null || this._contentPriority < aPriority) { + this._contentPriority = aPriority; + this._resetContent(); + this._producing = true; + return true; + } + this._producing = false; + return false; + }, + + keyValue: function gloda_content_keyValue(aKey, aValue) { + if (!this._producing) + return; + + this._keysAndValues.push([aKey, aValue]); + }, + keyValueDelta: function gloda_content_keyValueDelta (aKey, aOldValue, + aNewValue) { + if (!this._producing) + return; + + this._keysAndDeltaValues.push([aKey, aOldValue, aNewValue]); + }, + + /** + * Meta lines are lines that have to do with the content but are not the + * content and can generally be related to an attribute that has been derived + * and stored on the item. + * For example, a bugzilla bug may note that an attachment was created; this + * is not content and wouldn't be desired in a snippet, but is still + * potentially interesting meta-data. + * + * @param aLineOrLines The line or list of lines that are meta-data. + * @param aAttr The attribute this meta-data is associated with. + * @param aIndex If the attribute is non-singular, indicate the specific + * index of the item in the attribute's bound list that the meta-data + * is associated with. + */ + meta: function gloda_content_meta(aLineOrLines, aAttr, aIndex) { + if (!this._producing) + return; + + let data; + if (typeof(aLineOrLines) == "string") + data = aLineOrLines; + else + data = aLineOrLines.join("\n"); + + this._curHunk = {hunkType: this.kHunkMeta, attr: aAttr, index: aIndex, + data: data}; + this._hunks.push(this._curHunk); + }, + /** + * Quoted lines reference previous messages or what not. + * + * @param aLineOrLiens The line or list of lines that are quoted. + * @param aDepth The depth of the quoting. + * @param aOrigin The item that originated the original content, if known. + * For example, perhaps a GlodaMessage? + * @param aTarget A reference to the location in the original content, if + * known. For example, the index of a line in a message or something? + */ + quoted: function gloda_content_quoted(aLineOrLines, aDepth, aOrigin, + aTarget) { + if (!this._producing) + return; + + let data; + if (typeof(aLineOrLines) == "string") + data = aLineOrLines; + else + data = aLineOrLines.join("\n"); + + if (!this._curHunk || + this._curHunk.hunkType != this.kHunkQuoted || + this._curHunk.depth != aDepth || + this._curHunk.origin != aOrigin || this._curHunk.target != aTarget) { + this._curHunk = {hunkType: this.kHunkQuoted, data: data, + depth: aDepth, origin: aOrigin, target: aTarget}; + this._hunks.push(this._curHunk); + } + else + this._curHunk.data += "\n" + data; + }, + + content: function gloda_content_content(aLineOrLines) { + if (!this._producing) + return; + + let data; + if (typeof(aLineOrLines) == "string") + data = aLineOrLines; + else + data = aLineOrLines.join("\n"); + + if (!this._curHunk || this._curHunk.hunkType != this.kHunkContent) { + this._curHunk = {hunkType: this.kHunkContent, data: data}; + this._hunks.push(this._curHunk); + } + else + this._curHunk.data += "\n" + data; + }, +}; diff --git a/mailnews/db/gloda/modules/databind.js b/mailnews/db/gloda/modules/databind.js new file mode 100644 index 000000000..a2ecf1773 --- /dev/null +++ b/mailnews/db/gloda/modules/databind.js @@ -0,0 +1,194 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ["GlodaDatabind"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); + +var DBC_LOG = Log4Moz.repository.getLogger("gloda.ds.dbc"); + +function GlodaDatabind(aNounDef, aDatastore) { + this._nounDef = aNounDef; + this._tableName = aNounDef.tableName; + this._tableDef = aNounDef.schema; + this._datastore = aDatastore; + this._log = Log4Moz.repository.getLogger("gloda.databind." + this._tableName); + + // process the column definitions and make sure they have an attribute mapping + for (let [iColDef, coldef] of this._tableDef.columns.entries()) { + // default to the other dude's thing. + if (coldef.length < 3) + coldef[2] = coldef[0]; + if (coldef[0] == "id") + this._idAttr = coldef[2]; + // colDef[3] is the index of us in our SQL bindings, storage-numbering + coldef[3] = iColDef; + } + + // XXX This is obviously synchronous and not perfectly async. Since we are + // doing this, we don't actually need to move to ordinal binding below + // since we could just as well compel creation of the name map and thereby + // avoid ever acquiring the mutex after bootstrap. + // However, this specific check can be cleverly avoided with future work. + // Namely, at startup we can scan for extension-defined tables and get their + // maximum id so that we don't need to do it here. The table will either + // be brand new and thus have a maximum id of 1 or we will already know it + // because of that scan. + this._nextId = 1; + let stmt = this._datastore._createSyncStatement( + "SELECT MAX(id) FROM " + this._tableName, true); + if (stmt.executeStep()) { // no chance of this SQLITE_BUSY on this call + this._nextId = stmt.getInt64(0) + 1; + } + stmt.finalize(); + + let insertColumns = []; + let insertValues = []; + let updateItems = []; + for (let [iColDef, coldef] of this._tableDef.columns.entries()) { + let column = coldef[0]; + let placeholder = "?" + (iColDef + 1); + insertColumns.push(column); + insertValues.push(placeholder); + if (column != "id") { + updateItems.push(column + " = " + placeholder); + } + } + + let insertSql = "INSERT INTO " + this._tableName + " (" + + insertColumns.join(", ") + ") VALUES (" + insertValues.join(", ") + ")"; + + // For the update, we want the 'id' to be a constraint and not a value + // that gets set... + let updateSql = "UPDATE " + this._tableName + " SET " + + updateItems.join(", ") + " WHERE id = ?1"; + this._insertStmt = aDatastore._createAsyncStatement(insertSql); + this._updateStmt = aDatastore._createAsyncStatement(updateSql); + + if (this._tableDef.fulltextColumns) { + for (let [iColDef, coldef] of this._tableDef.fulltextColumns.entries()) { + if (coldef.length < 3) + coldef[2] = coldef[0]; + // colDef[3] is the index of us in our SQL bindings, storage-numbering + coldef[3] = iColDef + 1; + } + + let insertColumns = []; + let insertValues = []; + let updateItems = []; + for (var [iColDef, coldef] of this._tableDef.fulltextColumns.entries()) { + let column = coldef[0]; + // +2 instead of +1 because docid is implied + let placeholder = "?" + (iColDef + 2); + insertColumns.push(column); + insertValues.push(placeholder); + if (column != "id") { + updateItems.push(column + " = " + placeholder); + } + } + + let insertFulltextSql = "INSERT INTO " + this._tableName + "Text (docid," + + insertColumns.join(", ") + ") VALUES (?1," + insertValues.join(", ") + + ")"; + + // For the update, we want the 'id' to be a constraint and not a value + // that gets set... + let updateFulltextSql = "UPDATE " + this._tableName + "Text SET " + + updateItems.join(", ") + " WHERE docid = ?1"; + + this._insertFulltextStmt = + aDatastore._createAsyncStatement(insertFulltextSql); + this._updateFulltextStmt = + aDatastore._createAsyncStatement(updateFulltextSql); + } +} + +GlodaDatabind.prototype = { + /** + * Perform appropriate binding coercion based on the schema provided to us. + * Although we end up effectively coercing JS Date objects to numeric values, + * we should not be provided with JS Date objects! There is no way for us + * to know to turn them back into JS Date objects on the way out. + * Additionally, there is the small matter of storage's bias towards + * PRTime representations which may not always be desirable. + */ + bindByType: function(aStmt, aColDef, aValue) { + if (aValue == null) + aStmt.bindNullParameter(aColDef[3]); + else if (aColDef[1] == "STRING" || aColDef[1] == "TEXT") + aStmt.bindStringParameter(aColDef[3], aValue); + else + aStmt.bindInt64Parameter(aColDef[3], aValue); + }, + + objFromRow: function(aRow) { + let getVariant = this._datastore._getVariant; + let obj = new this._nounDef.class(); + for (let [iCol, colDef] of this._tableDef.columns.entries()) { + obj[colDef[2]] = getVariant(aRow, iCol); + } + return obj; + }, + + objInsert: function(aThing) { + let bindByType = this.bindByType; + if (!aThing[this._idAttr]) + aThing[this._idAttr] = this._nextId++; + + let stmt = this._insertStmt; + for (let colDef of this._tableDef.columns) { + bindByType(stmt, colDef, aThing[colDef[2]]); + } + + stmt.executeAsync(this._datastore.trackAsync()); + + if (this._insertFulltextStmt) { + stmt = this._insertFulltextStmt; + stmt.bindInt64Parameter(0, aThing[this._idAttr]); + for (let colDef of this._tableDef.fulltextColumns) { + bindByType(stmt, colDef, aThing[colDef[2]]); + } + stmt.executeAsync(this._datastore.trackAsync()); + } + }, + + objUpdate: function(aThing) { + let bindByType = this.bindByType; + let stmt = this._updateStmt; + // note, we specially bound the location of 'id' for the insert, but since + // we're using named bindings, there is nothing special about setting it + for (let colDef of this._tableDef.columns) { + bindByType(stmt, colDef, aThing[colDef[2]]); + } + stmt.executeAsync(this._datastore.trackAsync()); + + if (this._updateFulltextStmt) { + stmt = this._updateFulltextStmt; + // fulltextColumns doesn't include id/docid, need to explicitly set it + stmt.bindInt64Parameter(0, aThing[this._idAttr]); + for (let colDef of this._tableDef.fulltextColumns) { + bindByType(stmt, colDef, aThing[colDef[2]]); + } + stmt.executeAsync(this._datastore.trackAsync()); + } + }, + + adjustAttributes: function() { + // just proxy the call over to the datastore... we have to do this for + // 'this' reasons. we don't refactor things to avoid this because it does + // make some sense to have all the methods exposed from a single object, + // even if the implementation does live elsewhere. + return this._datastore.adjustAttributes.apply(this._datastore, arguments); + }, + + // also proxied... + queryFromQuery: function() { + return this._datastore.queryFromQuery.apply(this._datastore, arguments); + } +}; diff --git a/mailnews/db/gloda/modules/datamodel.js b/mailnews/db/gloda/modules/datamodel.js new file mode 100644 index 000000000..1bdd6d01d --- /dev/null +++ b/mailnews/db/gloda/modules/datamodel.js @@ -0,0 +1,907 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ["GlodaAttributeDBDef", "GlodaAccount", + "GlodaConversation", "GlodaFolder", "GlodaMessage", + "GlodaContact", "GlodaIdentity", "GlodaAttachment"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/mailServices.js"); + +Cu.import("resource:///modules/gloda/log4moz.js"); +var LOG = Log4Moz.repository.getLogger("gloda.datamodel"); + +Cu.import("resource:///modules/gloda/utils.js"); + +// Make it lazy. +var gMessenger; +function getMessenger () { + if (!gMessenger) + gMessenger = Cc["@mozilla.org/messenger;1"].createInstance(Ci.nsIMessenger); + return gMessenger; +} + +/** + * @class Represents a gloda attribute definition's DB form. This class + * stores the information in the database relating to this attribute + * definition. Access its attrDef attribute to get at the realy juicy data. + * This main interesting thing this class does is serve as the keeper of the + * mapping from parameters to attribute ids in the database if this is a + * parameterized attribute. + */ +function GlodaAttributeDBDef(aDatastore, aID, aCompoundName, aAttrType, + aPluginName, aAttrName) { + // _datastore is now set on the prototype by GlodaDatastore + this._id = aID; + this._compoundName = aCompoundName; + this._attrType = aAttrType; + this._pluginName = aPluginName; + this._attrName = aAttrName; + + this.attrDef = null; + + /** Map parameter values to the underlying database id. */ + this._parameterBindings = {}; +} + +GlodaAttributeDBDef.prototype = { + // set by GlodaDatastore + _datastore: null, + get id() { return this._id; }, + get attributeName() { return this._attrName; }, + + get parameterBindings() { return this._parameterBindings; }, + + /** + * Bind a parameter value to the attribute definition, allowing use of the + * attribute-parameter as an attribute. + * + * @return + */ + bindParameter: function gloda_attr_bindParameter(aValue) { + // people probably shouldn't call us with null, but handle it + if (aValue == null) { + return this._id; + } + if (aValue in this._parameterBindings) { + return this._parameterBindings[aValue]; + } + // no database entry exists if we are here, so we must create it... + let id = this._datastore._createAttributeDef(this._attrType, + this._pluginName, this._attrName, aValue); + this._parameterBindings[aValue] = id; + this._datastore.reportBinding(id, this, aValue); + return id; + }, + + /** + * Given a list of values, return a list (regardless of plurality) of + * database-ready [attribute id, value] tuples. This is intended to be used + * to directly convert the value of a property on an object that corresponds + * to a bound attribute. + * + * @param {Array} aInstanceValues An array of instance values regardless of + * whether or not the attribute is singular. + */ + convertValuesToDBAttributes: + function gloda_attr_convertValuesToDBAttributes(aInstanceValues) { + let nounDef = this.attrDef.objectNounDef; + let dbAttributes = []; + if (nounDef.usesParameter) { + for (let instanceValue of aInstanceValues) { + let [param, dbValue] = nounDef.toParamAndValue(instanceValue); + dbAttributes.push([this.bindParameter(param), dbValue]); + } + } + else { + // Not generating any attributes is ok. This basically means the noun is + // just an informative property on the Gloda Message and has no real + // indexing purposes. + if ("toParamAndValue" in nounDef) { + for (let instanceValue of aInstanceValues) { + dbAttributes.push([this._id, + nounDef.toParamAndValue(instanceValue)[1]]); + } + } + } + return dbAttributes; + }, + + toString: function() { + return this._compoundName; + } +}; + +var GlodaHasAttributesMixIn = { + enumerateAttributes: function* gloda_attrix_enumerateAttributes() { + let nounDef = this.NOUN_DEF; + for (let key in this) { + let value = this[key]; + let attrDef = nounDef.attribsByBoundName[key]; + // we expect to not have attributes for underscore prefixed values (those + // are managed by the instance's logic. we also want to not explode + // should someone crap other values in there, we get both birds with this + // one stone. + if (attrDef === undefined) + continue; + if (attrDef.singular) { + // ignore attributes with null values + if (value != null) + yield [attrDef, [value]]; + } + else { + // ignore attributes with no values + if (value.length) + yield [attrDef, value]; + } + } + }, + + domContribute: function gloda_attrix_domContribute(aDomNode) { + let nounDef = this.NOUN_DEF; + for (let attrName in nounDef.domExposeAttribsByBoundName) { + let attr = nounDef.domExposeAttribsByBoundName[attrName]; + if (this[attrName]) + aDomNode.setAttribute(attr.domExpose, this[attrName]); + } + }, +}; + +function MixIn(aConstructor, aMixIn) { + let proto = aConstructor.prototype; + for (let [name, func] in Iterator(aMixIn)) { + if (name.startsWith("get_")) + proto.__defineGetter__(name.substring(4), func); + else + proto[name] = func; + } +} + +/** + * @class A gloda wrapper around nsIMsgIncomingServer. + */ +function GlodaAccount(aIncomingServer) { + this._incomingServer = aIncomingServer; +} + +GlodaAccount.prototype = { + NOUN_ID: 106, + get id() { return this._incomingServer.key; }, + get name() { return this._incomingServer.prettyName; }, + get incomingServer() { return this._incomingServer; }, + toString: function gloda_account_toString() { + return "Account: " + this.id; + }, + + toLocaleString: function gloda_account_toLocaleString() { + return this.name; + } +}; + +/** + * @class A gloda conversation (thread) exists so that messages can belong. + */ +function GlodaConversation(aDatastore, aID, aSubject, aOldestMessageDate, + aNewestMessageDate) { + // _datastore is now set on the prototype by GlodaDatastore + this._id = aID; + this._subject = aSubject; + this._oldestMessageDate = aOldestMessageDate; + this._newestMessageDate = aNewestMessageDate; +} + +GlodaConversation.prototype = { + NOUN_ID: 101, + // set by GlodaDatastore + _datastore: null, + get id() { return this._id; }, + get subject() { return this._subject; }, + get oldestMessageDate() { return this._oldestMessageDate; }, + get newestMessageDate() { return this._newestMessageDate; }, + + getMessagesCollection: function gloda_conversation_getMessagesCollection( + aListener, aData) { + let query = new GlodaMessage.prototype.NOUN_DEF.queryClass(); + query.conversation(this._id).orderBy("date"); + return query.getCollection(aListener, aData); + }, + + toString: function gloda_conversation_toString() { + return "Conversation:" + this._id; + }, + + toLocaleString: function gloda_conversation_toLocaleString() { + return this._subject; + } +}; + +function GlodaFolder(aDatastore, aID, aURI, aDirtyStatus, aPrettyName, + aIndexingPriority) { + // _datastore is now set by GlodaDatastore + this._id = aID; + this._uri = aURI; + this._dirtyStatus = aDirtyStatus; + this._prettyName = aPrettyName; + this._xpcomFolder = null; + this._account = null; + this._activeIndexing = false; + this._activeHeaderRetrievalLastStamp = 0; + this._indexingPriority = aIndexingPriority; + this._deleted = false; + this._compacting = false; +} + +GlodaFolder.prototype = { + NOUN_ID: 100, + // set by GlodaDatastore + _datastore: null, + + /** The folder is believed to be up-to-date */ + kFolderClean: 0, + /** The folder has some un-indexed or dirty messages */ + kFolderDirty: 1, + /** The folder needs to be entirely re-indexed, regardless of the flags on + * the messages in the folder. This state will be downgraded to dirty */ + kFolderFilthy: 2, + + _kFolderDirtyStatusMask: 0x7, + /** + * The (local) folder has been compacted and all of its message keys are + * potentially incorrect. This is not a possible state for IMAP folders + * because their message keys are based on UIDs rather than offsets into + * the mbox file. + */ + _kFolderCompactedFlag: 0x8, + + /** The folder should never be indexed. */ + kIndexingNeverPriority: -1, + /** The lowest priority assigned to a folder. */ + kIndexingLowestPriority: 0, + /** The highest priority assigned to a folder. */ + kIndexingHighestPriority: 100, + + /** The indexing priority for a folder if no other priority is assigned. */ + kIndexingDefaultPriority: 20, + /** Folders marked check new are slightly more important I guess. */ + kIndexingCheckNewPriority: 30, + /** Favorite folders are more interesting to the user, presumably. */ + kIndexingFavoritePriority: 40, + /** The indexing priority for inboxes. */ + kIndexingInboxPriority: 50, + /** The indexing priority for sent mail folders. */ + kIndexingSentMailPriority: 60, + + get id() { return this._id; }, + get uri() { return this._uri; }, + get dirtyStatus() { + return this._dirtyStatus & this._kFolderDirtyStatusMask; + }, + /** + * Mark a folder as dirty if it was clean. Do nothing if it was already dirty + * or filthy. For use by GlodaMsgIndexer only. And maybe rkent and his + * marvelous extensions. + */ + _ensureFolderDirty: function gloda_folder__markFolderDirty() { + if (this.dirtyStatus == this.kFolderClean) { + this._dirtyStatus = (this.kFolderDirty & this._kFolderDirtyStatusMask) | + (this._dirtyStatus & ~this._kFolderDirtyStatusMask); + this._datastore.updateFolderDirtyStatus(this); + } + }, + /** + * Definitely for use only by GlodaMsgIndexer to downgrade the dirty status of + * a folder. + */ + _downgradeDirtyStatus: function gloda_folder__downgradeDirtyStatus( + aNewStatus) { + if (this.dirtyStatus != aNewStatus) { + this._dirtyStatus = (aNewStatus & this._kFolderDirtyStatusMask) | + (this._dirtyStatus & ~this._kFolderDirtyStatusMask); + this._datastore.updateFolderDirtyStatus(this); + } + }, + /** + * Indicate whether this folder is currently being compacted. The + * |GlodaMsgIndexer| keeps this in-memory-only value up-to-date. + */ + get compacting() { + return this._compacting; + }, + /** + * Set whether this folder is currently being compacted. This is really only + * for the |GlodaMsgIndexer| to set. + */ + set compacting(aCompacting) { + this._compacting = aCompacting; + }, + /** + * Indicate whether this folder was compacted and has not yet been + * compaction processed. + */ + get compacted() { + return Boolean(this._dirtyStatus & this._kFolderCompactedFlag); + }, + /** + * For use only by GlodaMsgIndexer to set/clear the compaction state of this + * folder. + */ + _setCompactedState: function gloda_folder__clearCompactedState(aCompacted) { + if (this.compacted != aCompacted) { + if (aCompacted) + this._dirtyStatus |= this._kFolderCompactedFlag; + else + this._dirtyStatus &= ~this._kFolderCompactedFlag; + this._datastore.updateFolderDirtyStatus(this); + } + }, + + get name() { return this._prettyName; }, + toString: function gloda_folder_toString() { + return "Folder:" + this._id; + }, + + toLocaleString: function gloda_folder_toLocaleString() { + let xpcomFolder = this.getXPCOMFolder(this.kActivityFolderOnlyNoData); + if (!xpcomFolder) + return this._prettyName; + return xpcomFolder.prettiestName + + " (" + xpcomFolder.rootFolder.prettiestName + ")"; + }, + + get indexingPriority() { + return this._indexingPriority; + }, + + /** We are going to index this folder. */ + kActivityIndexing: 0, + /** Asking for the folder to perform header retrievals. */ + kActivityHeaderRetrieval: 1, + /** We only want the folder for its metadata but are not going to open it. */ + kActivityFolderOnlyNoData: 2, + + + /** Is this folder known to be actively used for indexing? */ + _activeIndexing: false, + /** Get our indexing status. */ + get indexing() { + return this._activeIndexing; + }, + /** + * Set our indexing status. Normally, this will be enabled through passing + * an activity type of kActivityIndexing (which will set us), but we will + * still need to be explicitly disabled by the indexing code. + * When disabling indexing, we will call forgetFolderIfUnused to take care of + * shutting things down. + * We are not responsible for committing changes to the message database! + * That is on you! + */ + set indexing(aIndexing) { + this._activeIndexing = aIndexing; + if (!aIndexing) + this.forgetFolderIfUnused(); + }, + /** When was this folder last used for header retrieval purposes? */ + _activeHeaderRetrievalLastStamp: 0, + + /** + * Retrieve the nsIMsgFolder instance corresponding to this folder, providing + * an explanation of why you are requesting it for tracking/cleanup purposes. + * + * @param aActivity One of the kActivity* constants. If you pass + * kActivityIndexing, we will set indexing for you, but you will need to + * clear it when you are done. + * @return The nsIMsgFolder if available, null on failure. + */ + getXPCOMFolder: function gloda_folder_getXPCOMFolder(aActivity) { + if (!this._xpcomFolder) { + let rdfService = Cc['@mozilla.org/rdf/rdf-service;1'] + .getService(Ci.nsIRDFService); + this._xpcomFolder = rdfService.GetResource(this.uri) + .QueryInterface(Ci.nsIMsgFolder); + } + switch (aActivity) { + case this.kActivityIndexing: + // mark us as indexing, but don't bother with live tracking. we do + // that independently and only for header retrieval. + this.indexing = true; + break; + case this.kActivityHeaderRetrieval: + if (this._activeHeaderRetrievalLastStamp === 0) + this._datastore.markFolderLive(this); + this._activeHeaderRetrievalLastStamp = Date.now(); + break; + case this.kActivityFolderOnlyNoData: + // we don't have to do anything here. + break; + } + + return this._xpcomFolder; + }, + + /** + * Retrieve a GlodaAccount instance corresponding to this folder. + * + * @return The GlodaAccount instance. + */ + getAccount: function gloda_folder_getAccount() { + if (!this._account) { + let msgFolder = this.getXPCOMFolder(this.kActivityFolderOnlyNoData); + this._account = new GlodaAccount(msgFolder.server); + } + return this._account; + }, + + /** + * How many milliseconds must a folder have not had any header retrieval + * activity before it's okay to lose the database reference? + */ + ACCEPTABLY_OLD_THRESHOLD: 10000, + + /** + * Cleans up our nsIMsgFolder reference if we have one and it's not "in use". + * In use, from our perspective, means that it is not being used for indexing + * and some arbitrary interval of time has elapsed since it was last + * retrieved for header retrieval reasons. The time interval is because if + * we have one GlodaMessage requesting a header, there's a high probability + * that another message will request a header in the near future. + * Because setting indexing to false disables us, we are written in an + * idempotent fashion. (It is possible for disabling indexing's call to us + * to cause us to return true but for the datastore's timer call to have not + * yet triggered.) + * + * @returns true if we are cleaned up and can be considered 'dead', false if + * we should still be considered alive and this method should be called + * again in the future. + */ + forgetFolderIfUnused: function gloda_folder_forgetFolderIfUnused() { + // we are not cleaning/cleaned up if we are indexing + if (this._activeIndexing) + return false; + + // set a point in the past as the threshold. the timestamp must be older + // than this to be eligible for cleanup. + let acceptablyOld = Date.now() - this.ACCEPTABLY_OLD_THRESHOLD; + // we are not cleaning/cleaned up if we have retrieved a header more + // recently than the acceptably old threshold. + if (this._activeHeaderRetrievalLastStamp > acceptablyOld) + return false; + + if (this._xpcomFolder) { + // This is the key action we take; the nsIMsgFolder will continue to + // exist, but we want it to forget about its database so that it can + // be closed and its memory can be reclaimed. + this._xpcomFolder.msgDatabase = null; + this._xpcomFolder = null; + // since the last retrieval time tracks whether we have marked live or + // not, this needs to be reset to 0 too. + this._activeHeaderRetrievalLastStamp = 0; + } + + return true; + } +}; + +/** + * @class A message representation. + */ +function GlodaMessage(aDatastore, aID, aFolderID, aMessageKey, + aConversationID, aConversation, aDate, + aHeaderMessageID, aDeleted, aJsonText, + aNotability, + aSubject, aIndexedBodyText, aAttachmentNames) { + // _datastore is now set on the prototype by GlodaDatastore + this._id = aID; + this._folderID = aFolderID; + this._messageKey = aMessageKey; + this._conversationID = aConversationID; + this._conversation = aConversation; + this._date = aDate; + this._headerMessageID = aHeaderMessageID; + this._jsonText = aJsonText; + this._notability = aNotability; + this._subject = aSubject; + this._indexedBodyText = aIndexedBodyText; + this._attachmentNames = aAttachmentNames; + + // only set _deleted if we're deleted, otherwise the undefined does our + // speaking for us. + if (aDeleted) + this._deleted = aDeleted; +} + +GlodaMessage.prototype = { + NOUN_ID: 102, + // set by GlodaDatastore + _datastore: null, + get id() { return this._id; }, + get folderID() { return this._folderID; }, + get messageKey() { return this._messageKey; }, + get conversationID() { return this._conversationID; }, + // conversation is special + get headerMessageID() { return this._headerMessageID; }, + get notability() { return this._notability; }, + set notability(aNotability) { this._notability = aNotability; }, + + get subject() { return this._subject; }, + get indexedBodyText() { return this._indexedBodyText; }, + get attachmentNames() { return this._attachmentNames; }, + + get date() { return this._date; }, + set date(aNewDate) { this._date = aNewDate; }, + + get folder() { + // XXX due to a deletion bug it is currently possible to get in a state + // where we have an illegal folderID value. This will result in an + // exception. As a workaround, let's just return null in that case. + try { + if (this._folderID != null) + return this._datastore._mapFolderID(this._folderID); + } + catch (ex) { + } + return null; + }, + get folderURI() { + // XXX just like for folder, handle mapping failures and return null + try { + if (this._folderID != null) + return this._datastore._mapFolderID(this._folderID).uri; + } + catch (ex) { + } + return null; + }, + get account() { + // XXX due to a deletion bug it is currently possible to get in a state + // where we have an illegal folderID value. This will result in an + // exception. As a workaround, let's just return null in that case. + try { + if (this._folderID == null) + return null; + let folder = this._datastore._mapFolderID(this._folderID); + return folder.getAccount(); + } + catch (ex) { } + return null; + }, + get conversation() { + return this._conversation; + }, + + toString: function gloda_message_toString() { + // uh, this is a tough one... + return "Message:" + this._id; + }, + + _clone: function gloda_message_clone() { + return new GlodaMessage(/* datastore */ null, this._id, this._folderID, + this._messageKey, this._conversationID, this._conversation, this._date, + this._headerMessageID, "_deleted" in this ? this._deleted : undefined, + "_jsonText" in this ? this._jsonText : undefined, this._notability, + this._subject, this._indexedBodyText, this._attachmentNames); + }, + + /** + * Provide a means of propagating changed values on our clone back to + * ourselves. This is required because of an object identity trick gloda + * does; when indexing an already existing object, all mutations happen on + * a clone of the existing object so that + */ + _declone: function gloda_message_declone(aOther) { + if ("_content" in aOther) + this._content = aOther._content; + + // The _indexedAuthor/_indexedRecipients fields don't get updated on + // fulltext update so we don't need to propagate. + this._indexedBodyText = aOther._indexedBodyText; + this._attachmentNames = aOther._attachmentNames; + }, + + /** + * Mark this message as a ghost. Ghosts are characterized by having no folder + * id and no message key. They also are not deleted or they would be of + * absolutely no use to us. + * + * These changes are suitable for persistence. + */ + _ghost: function gloda_message_ghost() { + this._folderID = null; + this._messageKey = null; + if ("_deleted" in this) + delete this._deleted; + }, + + /** + * Are we a ghost (which implies not deleted)? We are not a ghost if we have + * a definite folder location (we may not know our message key in the case + * of IMAP moves not fully completed) and are not deleted. + */ + get _isGhost() { + return this._folderID == null && !this._isDeleted; + }, + + /** + * If we were dead, un-dead us. + */ + _ensureNotDeleted: function gloda_message__ensureNotDeleted() { + if ("_deleted" in this) + delete this._deleted; + }, + + /** + * Are we deleted? This is private because deleted gloda messages are not + * visible to non-core-gloda code. + */ + get _isDeleted() { + return ("_deleted" in this) && this._deleted; + }, + + /** + * Trash this message's in-memory representation because it should no longer + * be reachable by any code. The database record is gone, it's not coming + * back. + */ + _objectPurgedMakeYourselfUnpleasant: function gloda_message_nuke() { + this._id = null; + this._folderID = null; + this._messageKey = null; + this._conversationID = null; + this._conversation = null; + this.date = null; + this._headerMessageID = null; + }, + + /** + * Return the underlying nsIMsgDBHdr from the folder storage for this, or + * null if the message does not exist for one reason or another. We may log + * to our logger in the failure cases. + * + * This method no longer caches the result, so if you need to hold onto it, + * hold onto it. + * + * In the process of retrieving the underlying message header, we may have to + * open the message header database associated with the folder. This may + * result in blocking while the load happens, so you may want to try and find + * an alternate way to initiate the load before calling us. + * We provide hinting to the GlodaDatastore via the GlodaFolder so that it + * knows when it's a good time for it to go and detach from the database. + * + * @returns The nsIMsgDBHdr associated with this message if available, null on + * failure. + */ + get folderMessage() { + if (this._folderID === null || this._messageKey === null) + return null; + + // XXX like for folder and folderURI, return null if we can't map the folder + let glodaFolder; + try { + glodaFolder = this._datastore._mapFolderID(this._folderID); + } + catch (ex) { + return null; + } + let folder = glodaFolder.getXPCOMFolder( + glodaFolder.kActivityHeaderRetrieval); + if (folder) { + let folderMessage; + try { + folderMessage = folder.GetMessageHeader(this._messageKey); + } + catch (ex) { + folderMessage = null; + } + if (folderMessage !== null) { + // verify the message-id header matches what we expect... + if (folderMessage.messageId != this._headerMessageID) { + LOG.info("Message with message key " + this._messageKey + + " in folder '" + folder.URI + "' does not match expected " + + "header! (" + this._headerMessageID + " expected, got " + + folderMessage.messageId + ")"); + folderMessage = null; + } + } + return folderMessage; + } + + // this only gets logged if things have gone very wrong. we used to throw + // here, but it's unlikely our caller can do anything more meaningful than + // treating this as a disappeared message. + LOG.info("Unable to locate folder message for: " + this._folderID + ":" + + this._messageKey); + return null; + }, + get folderMessageURI() { + let folderMessage = this.folderMessage; + if (folderMessage) + return folderMessage.folder.getUriForMsg(folderMessage); + else + return null; + } +}; +MixIn(GlodaMessage, GlodaHasAttributesMixIn); + +/** + * @class Contacts correspond to people (one per person), and may own multiple + * identities (e-mail address, IM account, etc.) + */ +function GlodaContact(aDatastore, aID, aDirectoryUUID, aContactUUID, aName, + aPopularity, aFrecency, aJsonText) { + // _datastore set on the prototype by GlodaDatastore + this._id = aID; + this._directoryUUID = aDirectoryUUID; + this._contactUUID = aContactUUID; + this._name = aName; + this._popularity = aPopularity; + this._frecency = aFrecency; + if (aJsonText) + this._jsonText = aJsonText; + + this._identities = null; +} + +GlodaContact.prototype = { + NOUN_ID: 103, + // set by GlodaDatastore + _datastore: null, + + get id() { return this._id; }, + get directoryUUID() { return this._directoryUUID; }, + get contactUUID() { return this._contactUUID; }, + get name() { return this._name; }, + set name(aName) { this._name = aName; }, + + get popularity() { return this._popularity; }, + set popularity(aPopularity) { + this._popularity = aPopularity; + this.dirty = true; + }, + + get frecency() { return this._frecency; }, + set frecency(aFrecency) { + this._frecency = aFrecency; + this.dirty = true; + }, + + get identities() { + return this._identities; + }, + + toString: function gloda_contact_toString() { + return "Contact:" + this._id; + }, + + get accessibleLabel() { + return "Contact: " + this._name; + }, + + _clone: function gloda_contact_clone() { + return new GlodaContact(/* datastore */ null, this._id, this._directoryUUID, + this._contactUUID, this._name, this._popularity, this._frecency); + }, +}; +MixIn(GlodaContact, GlodaHasAttributesMixIn); + + +/** + * @class A specific means of communication for a contact. + */ +function GlodaIdentity(aDatastore, aID, aContactID, aContact, aKind, aValue, + aDescription, aIsRelay) { + // _datastore set on the prototype by GlodaDatastore + this._id = aID; + this._contactID = aContactID; + this._contact = aContact; + this._kind = aKind; + this._value = aValue; + this._description = aDescription; + this._isRelay = aIsRelay; + /// Cached indication of whether there is an address book card for this + /// identity. We keep this up-to-date via address book listener + /// notifications in |GlodaABIndexer|. + this._hasAddressBookCard = undefined; +} + +GlodaIdentity.prototype = { + NOUN_ID: 104, + // set by GlodaDatastore + _datastore: null, + get id() { return this._id; }, + get contactID() { return this._contactID; }, + get contact() { return this._contact; }, + get kind() { return this._kind; }, + get value() { return this._value; }, + get description() { return this._description; }, + get isRelay() { return this._isRelay; }, + + get uniqueValue() { + return this._kind + "@" + this._value; + }, + + toString: function gloda_identity_toString() { + return "Identity:" + this._kind + ":" + this._value; + }, + + toLocaleString: function gloda_identity_toLocaleString() { + if (this.contact.name == this.value) + return this.value; + return this.contact.name + " : " + this.value; + }, + + get abCard() { + // for our purposes, the address book only speaks email + if (this._kind != "email") + return false; + let card = GlodaUtils.getCardForEmail(this._value); + this._hasAddressBookCard = (card != null); + return card; + }, + + /** + * Indicates whether we have an address book card for this identity. This + * value is cached once looked-up and kept up-to-date by |GlodaABIndexer| + * and its notifications. + */ + get inAddressBook() { + if (this._hasAddressBookCard !== undefined) + return this._hasAddressBookCard; + return (this.abCard && true) || false; + }, + + pictureURL: function(aSize) { + if (this.inAddressBook) { + // XXX should get the photo if we have it. + } + return ""; + } +}; + + +/** + * An attachment, with as much information as we can gather on it + */ +function GlodaAttachment(aGlodaMessage, aName, aContentType, aSize, aPart, aExternalUrl, aIsExternal) { + // _datastore set on the prototype by GlodaDatastore + this._glodaMessage = aGlodaMessage; + this._name = aName; + this._contentType = aContentType; + this._size = aSize; + this._part = aPart; + this._externalUrl = aExternalUrl; + this._isExternal = aIsExternal; +} + +GlodaAttachment.prototype = { + NOUN_ID: 105, + // set by GlodaDatastore + get name() { return this._name; }, + get contentType() { return this._contentType; }, + get size() { return this._size; }, + get url() { + if (this.isExternal) + return this._externalUrl; + else { + let uri = this._glodaMessage.folderMessageURI; + if (!uri) + throw new Error("The message doesn't exist anymore, unable to rebuild attachment URL"); + let neckoURL = {}; + let msgService = getMessenger().messageServiceFromURI(uri); + msgService.GetUrlForUri(uri, neckoURL, null); + let url = neckoURL.value.spec; + let hasParamAlready = url.match(/\?[a-z]+=[^\/]+$/); + let sep = hasParamAlready ? "&" : "?"; + return url+sep+"part="+this._part+"&filename="+encodeURIComponent(this._name); + } + }, + get isExternal() { return this._isExternal; }, + + toString: function gloda_attachment_toString() { + return "attachment: " + this._name + ":" + this._contentType; + }, + +}; diff --git a/mailnews/db/gloda/modules/datastore.js b/mailnews/db/gloda/modules/datastore.js new file mode 100644 index 000000000..70bbdc6a7 --- /dev/null +++ b/mailnews/db/gloda/modules/datastore.js @@ -0,0 +1,3989 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file looks to Myk Melez <myk@mozilla.org>'s Mozilla Labs snowl + * project's (http://hg.mozilla.org/labs/snowl/) modules/datastore.js + * for inspiration and idioms (and also a name :). + */ + +this.EXPORTED_SYMBOLS = ["GlodaDatastore"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/IOUtils.js"); +Cu.import("resource://gre/modules/Services.jsm"); + +Cu.import("resource:///modules/gloda/log4moz.js"); + +Cu.import("resource:///modules/gloda/datamodel.js"); +Cu.import("resource:///modules/gloda/databind.js"); +Cu.import("resource:///modules/gloda/collection.js"); + +var MIN_CACHE_SIZE = 8 * 1048576; +var MAX_CACHE_SIZE = 64 * 1048576; +var MEMSIZE_FALLBACK_BYTES = 256 * 1048576; + +var PCH_LOG = Log4Moz.repository.getLogger("gloda.ds.pch"); + +/** + * Commit async handler; hands off the notification to + * |GlodaDatastore._asyncCompleted|. + */ +function PostCommitHandler(aCallbacks) { + this.callbacks = aCallbacks; + GlodaDatastore._pendingAsyncStatements++; +} + +PostCommitHandler.prototype = { + handleResult: function gloda_ds_pch_handleResult(aResultSet) { + }, + + handleError: function gloda_ds_pch_handleError(aError) { + PCH_LOG.error("database error:" + aError); + }, + + handleCompletion: function gloda_ds_pch_handleCompletion(aReason) { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) + return; + + if (aReason == Ci.mozIStorageStatementCallback.REASON_FINISHED) { + for (let callback of this.callbacks) { + try { + callback(); + } + catch (ex) { + PCH_LOG.error("PostCommitHandler callback (" + ex.fileName + ":" + + ex.lineNumber + ") threw: " + ex); + } + } + } + try { + GlodaDatastore._asyncCompleted(); + } + catch (e) { + PCH_LOG.error("Exception in handleCompletion:", e); + } + + } +}; + +var QFQ_LOG = Log4Moz.repository.getLogger("gloda.ds.qfq"); + +/** + * Singleton collection listener used by |QueryFromQueryCallback| to assist in + * the loading of referenced noun instances. Which is to say, messages have + * identities (specific e-mail addresses) associated with them via attributes. + * And these identities in turn reference / are referenced by contacts (the + * notion of a person). + * + * This listener is primarily concerned with fixing up the references in each + * noun instance to its referenced instances once they have been loaded. It + * also deals with caching so that our identity invariant is maintained: user + * code should only ever see one distinct instance of a thing at a time. + */ +var QueryFromQueryResolver = { + onItemsAdded: function(aIgnoredItems, aCollection, aFake) { + let originColl = aCollection.dataStack ? aCollection.dataStack.pop() + : aCollection.data; + //QFQ_LOG.debug("QFQR: originColl: " + originColl); + if (aCollection.completionShifter) + aCollection.completionShifter.push(originColl); + else + aCollection.completionShifter = [originColl]; + + if (!aFake) { + originColl.deferredCount--; + originColl.resolvedCount++; + } + + // bail if we are still pending on some other load completion + if (originColl.deferredCount > 0) { + //QFQ_LOG.debug("QFQR: bailing " + originColl._nounDef.name); + return; + } + + let referencesByNounID = originColl.masterCollection.referencesByNounID; + let inverseReferencesByNounID = + originColl.masterCollection.inverseReferencesByNounID; + + if (originColl.pendingItems) { + for (let [, item] in Iterator(originColl.pendingItems)) { + //QFQ_LOG.debug("QFQR: loading deferred " + item.NOUN_ID + ":" + item.id); + GlodaDatastore.loadNounDeferredDeps(item, referencesByNounID, + inverseReferencesByNounID); + } + + // we need to consider the possibility that we are racing a collection very + // much like our own. as such, this means we need to perform cache + // unification as our last step. + GlodaCollectionManager.cacheLoadUnify(originColl._nounDef.id, + originColl.pendingItems, false); + + // just directly tell the collection about the items. we know the query + // matches (at least until we introduce predicates that we cannot express + // in SQL.) + //QFQ_LOG.debug(" QFQR: about to trigger listener: " + originColl._listener + + // "with collection: " + originColl._nounDef.name); + originColl._onItemsAdded(originColl.pendingItems); + delete originColl.pendingItems; + delete originColl._pendingIdMap; + } + }, + onItemsModified: function() { + }, + onItemsRemoved: function() { + }, + onQueryCompleted: function(aCollection) { + let originColl = aCollection.completionShifter ? + aCollection.completionShifter.shift() : aCollection.data; + //QFQ_LOG.debug(" QFQR about to trigger completion with collection: " + + // originColl._nounDef.name); + if (originColl.deferredCount <= 0) { + originColl._onQueryCompleted(); + } + }, +}; + +/** + * Handles the results from a GlodaDatastore.queryFromQuery call in cooperation + * with the |QueryFromQueryResolver| collection listener. We do a lot of + * legwork related to satisfying references to other noun instances on the + * noun instances the user directy queried. Messages reference identities + * reference contacts which in turn (implicitly) reference identities again. + * We have to spin up those other queries and stitch things together. + * + * While the code is generally up to the existing set of tasks it is called to + * handle, I would not be surprised for it to fall down if things get more + * complex. Some of the logic here 'evolved' a bit and could benefit from + * additional documentation and a fresh go-through. + */ +function QueryFromQueryCallback(aStatement, aNounDef, aCollection) { + this.statement = aStatement; + this.nounDef = aNounDef; + this.collection = aCollection; + + //QFQ_LOG.debug("Creating QFQCallback for noun: " + aNounDef.name); + + // the master collection holds the referencesByNounID + this.referencesByNounID = {}; + this.masterReferencesByNounID = + this.collection.masterCollection.referencesByNounID; + this.inverseReferencesByNounID = {}; + this.masterInverseReferencesByNounID = + this.collection.masterCollection.inverseReferencesByNounID; + // we need to contribute our references as we load things; we need this + // because of the potential for circular dependencies and our inability to + // put things into the caching layer (or collection's _idMap) until we have + // fully resolved things. + if (this.nounDef.id in this.masterReferencesByNounID) + this.selfReferences = this.masterReferencesByNounID[this.nounDef.id]; + else + this.selfReferences = this.masterReferencesByNounID[this.nounDef.id] = {}; + if (this.nounDef.parentColumnAttr) { + if (this.nounDef.id in this.masterInverseReferencesByNounID) + this.selfInverseReferences = + this.masterInverseReferencesByNounID[this.nounDef.id]; + else + this.selfInverseReferences = + this.masterInverseReferencesByNounID[this.nounDef.id] = {}; + } + + this.needsLoads = false; + + GlodaDatastore._pendingAsyncStatements++; +} + +QueryFromQueryCallback.prototype = { + handleResult: function gloda_ds_qfq_handleResult(aResultSet) { + try { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) + return; + + let pendingItems = this.collection.pendingItems; + let pendingIdMap = this.collection._pendingIdMap; + let row; + let nounDef = this.nounDef; + let nounID = nounDef.id; + while ((row = aResultSet.getNextRow())) { + let item = nounDef.objFromRow.call(nounDef.datastore, row); + if (this.collection.stashedColumns) { + let stashed = this.collection.stashedColumns[item.id] = []; + for (let [,iCol] in + Iterator(this.collection.query.options.stashColumns)) { + stashed.push(GlodaDatastore._getVariant(row, iCol)); + } + } + // try and replace the item with one from the cache, if we can + let cachedItem = GlodaCollectionManager.cacheLookupOne(nounID, item.id, + false); + + // if we already have a copy in the pending id map, skip it + if (item.id in pendingIdMap) + continue; + + //QFQ_LOG.debug("loading item " + nounDef.id + ":" + item.id + " existing: " + + // this.selfReferences[item.id] + " cached: " + cachedItem); + if (cachedItem) + item = cachedItem; + // we may already have been loaded by this process + else if (this.selfReferences[item.id] != null) + item = this.selfReferences[item.id]; + // perform loading logic which may produce reference dependencies + else + this.needsLoads = + GlodaDatastore.loadNounItem(item, this.referencesByNounID, + this.inverseReferencesByNounID) || + this.needsLoads; + + // add ourself to the references by our id + // QFQ_LOG.debug("saving item " + nounDef.id + ":" + item.id + " to self-refs"); + this.selfReferences[item.id] = item; + + // if we're tracking it, add ourselves to our parent's list of children + // too + if (this.selfInverseReferences) { + let parentID = item[nounDef.parentColumnAttr.idStorageAttributeName]; + let childrenList = this.selfInverseReferences[parentID]; + if (childrenList === undefined) + childrenList = this.selfInverseReferences[parentID] = []; + childrenList.push(item); + } + + pendingItems.push(item); + pendingIdMap[item.id] = item; + } + } + catch (e) { + GlodaDatastore._log.error("Exception in handleResult:", e); + } + }, + + handleError: function gloda_ds_qfq_handleError(aError) { + GlodaDatastore._log.error("Async queryFromQuery error: " + + aError.result + ": " + aError.message); + }, + + handleCompletion: function gloda_ds_qfq_handleCompletion(aReason) { + try { + try { + this.statement.finalize(); + this.statement = null; + + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) + return; + + //QFQ_LOG.debug("handleCompletion: " + this.collection._nounDef.name); + + if (this.needsLoads) { + for (let nounID in this.referencesByNounID) { + let references = this.referencesByNounID[nounID]; + if (nounID == this.nounDef.id) + continue; + let nounDef = GlodaDatastore._nounIDToDef[nounID]; + //QFQ_LOG.debug(" have references for noun: " + nounDef.name); + // try and load them out of the cache/existing collections. items in the + // cache will be fully formed, which is nice for us. + // XXX this mechanism will get dubious when we have multiple paths to a + // single noun-type. For example, a -> b -> c, a-> c; two paths to c + // and we're looking at issuing two requests to c, the latter of which + // will be a superset of the first one. This does not currently pose + // a problem because we only have a -> b -> c -> b, and sequential + // processing means no alarms and no surprises. + let masterReferences = this.masterReferencesByNounID[nounID]; + if (masterReferences === undefined) + masterReferences = this.masterReferencesByNounID[nounID] = {}; + let outReferences; + if (nounDef.parentColumnAttr) + outReferences = {}; + else + outReferences = masterReferences; + let [foundCount, notFoundCount, notFound] = + GlodaCollectionManager.cacheLookupMany(nounDef.id, references, + outReferences); + + if (nounDef.parentColumnAttr) { + let inverseReferences; + if (nounDef.id in this.masterInverseReferencesByNounID) + inverseReferences = + this.masterInverseReferencesByNounID[nounDef.id]; + else + inverseReferences = + this.masterInverseReferencesByNounID[nounDef.id] = {}; + + for (let key in outReferences) { + let item = outReferences[key]; + masterReferences[item.id] = item; + let parentID = item[nounDef.parentColumnAttr.idStorageAttributeName]; + let childrenList = inverseReferences[parentID]; + if (childrenList === undefined) + childrenList = inverseReferences[parentID] = []; + childrenList.push(item); + } + } + + //QFQ_LOG.debug(" found: " + foundCount + " not found: " + notFoundCount); + if (notFoundCount === 0) { + this.collection.resolvedCount++; + } + else { + this.collection.deferredCount++; + let query = new nounDef.queryClass(); + query.id.apply(query, Object.keys(notFound)); + + this.collection.masterCollection.subCollections[nounDef.id] = + GlodaDatastore.queryFromQuery(query, QueryFromQueryResolver, + this.collection, + // we fully expect/allow for there being no such subcollection yet. + this.collection.masterCollection.subCollections[nounDef.id], + this.collection.masterCollection, + {becomeExplicit: true}); + } + } + + for (let nounID in this.inverseReferencesByNounID) { + let inverseReferences = this.inverseReferencesByNounID[nounID]; + this.collection.deferredCount++; + let nounDef = GlodaDatastore._nounIDToDef[nounID]; + + //QFQ_LOG.debug("Want to load inverse via " + nounDef.parentColumnAttr.boundName); + + let query = new nounDef.queryClass(); + // we want to constrain using the parent column + let queryConstrainer = query[nounDef.parentColumnAttr.boundName]; + queryConstrainer.apply(query, Object.keys(inverseReferences)); + this.collection.masterCollection.subCollections[nounDef.id] = + GlodaDatastore.queryFromQuery(query, QueryFromQueryResolver, + this.collection, + // we fully expect/allow for there being no such subcollection yet. + this.collection.masterCollection.subCollections[nounDef.id], + this.collection.masterCollection, + {becomeExplicit: true}); + } + } + else { + this.collection.deferredCount--; + this.collection.resolvedCount++; + } + + //QFQ_LOG.debug(" defer: " + this.collection.deferredCount + + // " resolved: " + this.collection.resolvedCount); + + // process immediately and kick-up to the master collection... + if (this.collection.deferredCount <= 0) { + // this guy will resolve everyone using referencesByNounID and issue the + // call to this.collection._onItemsAdded to propagate things to the + // next concerned subCollection or the actual listener if this is the + // master collection. (Also, call _onQueryCompleted). + QueryFromQueryResolver.onItemsAdded(null, {data: this.collection}, true); + QueryFromQueryResolver.onQueryCompleted({data: this.collection}); + } + } + catch (e) { + Components.utils.reportError(e); + QFQ_LOG.error("Exception:", e); + } + } + finally { + GlodaDatastore._asyncCompleted(); + } + } +}; + +/** + * Used by |GlodaDatastore.folderCompactionPassBlockFetch| to accumulate the + * results and pass them back in to the compaction process in + * |GlodaMsgIndexer._worker_folderCompactionPass|. + */ +function CompactionBlockFetcherHandler(aCallback) { + this.callback = aCallback; + this.idsAndMessageKeys = []; + GlodaDatastore._pendingAsyncStatements++; +} +CompactionBlockFetcherHandler.prototype = { + handleResult: function gloda_ds_cbfh_handleResult(aResultSet) { + let row; + while ((row = aResultSet.getNextRow())) { + this.idsAndMessageKeys.push([ + row.getInt64(0), // id + row.getInt64(1), // messageKey + row.getString(2), // headerMessageID + ]); + } + }, + handleError: function gloda_ds_cbfh_handleError(aError) { + GlodaDatastore._log.error("CompactionBlockFetcherHandler error: " + + aError.result + ": " + aError.message); + }, + handleCompletion: function gloda_ds_cbfh_handleCompletion(aReason) { + GlodaDatastore._asyncCompleted(); + this.callback(this.idsAndMessageKeys); + } +}; + +/** + * Use this as the callback handler when you have a SQL query that returns a + * single row with a single integer column value, like a COUNT() query. + */ +function SingletonResultValueHandler(aCallback) { + this.callback = aCallback; + this.result = null; + GlodaDatastore._pendingAsyncStatements++; +} +SingletonResultValueHandler.prototype = { + handleResult: function gloda_ds_cbfh_handleResult(aResultSet) { + let row; + while ((row = aResultSet.getNextRow())) { + this.result = row.getInt64(0); + } + }, + handleError: function gloda_ds_cbfh_handleError(aError) { + GlodaDatastore._log.error("SingletonResultValueHandler error: " + + aError.result + ": " + aError.message); + }, + handleCompletion: function gloda_ds_cbfh_handleCompletion(aReason) { + GlodaDatastore._asyncCompleted(); + this.callback(this.result); + } +}; + +/** + * Wrapper that duplicates actions taken on a real statement to an explain + * statement. Currently only fires an explain statement once. + */ +function ExplainedStatementWrapper(aRealStatement, aExplainStatement, + aSQLString, aExplainHandler) { + this.real = aRealStatement; + this.explain = aExplainStatement; + this.sqlString = aSQLString; + this.explainHandler = aExplainHandler; + this.done = false; +} +ExplainedStatementWrapper.prototype = { + bindNullParameter: function(aColIndex) { + this.real.bindNullParameter(aColIndex); + if (!this.done) + this.explain.bindNullParameter(aColIndex); + }, + bindStringParameter: function(aColIndex, aValue) { + this.real.bindStringParameter(aColIndex, aValue); + if (!this.done) + this.explain.bindStringParameter(aColIndex, aValue); + }, + bindInt64Parameter: function(aColIndex, aValue) { + this.real.bindInt64Parameter(aColIndex, aValue); + if (!this.done) + this.explain.bindInt64Parameter(aColIndex, aValue); + }, + bindDoubleParameter: function(aColIndex, aValue) { + this.real.bindDoubleParameter(aColIndex, aValue); + if (!this.done) + this.explain.bindDoubleParameter(aColIndex, aValue); + }, + executeAsync: function wrapped_executeAsync(aCallback) { + if (!this.done) { + this.explainHandler.sqlEnRoute(this.sqlString); + this.explain.executeAsync(this.explainHandler); + this.explain.finalize(); + this.done = true; + } + return this.real.executeAsync(aCallback); + }, + finalize: function wrapped_finalize() { + if (!this.done) + this.explain.finalize(); + this.real.finalize(); + }, +}; + +/** + * Writes a single JSON document to the provide file path in a streaming + * fashion. At startup we open an array to place the queries in and at + * shutdown we close it. + */ +function ExplainedStatementProcessor(aDumpPath) { + Services.obs.addObserver(this, "quit-application", false); + + this._sqlStack = []; + this._curOps = []; + this._objsWritten = 0; + + let filePath = Cc["@mozilla.org/file/local;1"] + .createInstance(Ci.nsILocalFile); + filePath.initWithPath(aDumpPath); + + this._ostream = Cc["@mozilla.org/network/file-output-stream;1"] + .createInstance(Ci.nsIFileOutputStream); + this._ostream.init(filePath, -1, -1, 0); + + let s = '{"queries": ['; + this._ostream.write(s, s.length); +} +ExplainedStatementProcessor.prototype = { + sqlEnRoute: function esp_sqlEnRoute(aSQLString) { + this._sqlStack.push(aSQLString); + }, + handleResult: function esp_handleResult(aResultSet) { + let row; + // addr opcode (s) p1 p2 p3 p4 (s) p5 comment (s) + while ((row = aResultSet.getNextRow())) { + this._curOps.push([ + row.getInt64(0), // addr + row.getString(1), // opcode + row.getInt64(2), // p1 + row.getInt64(3), // p2 + row.getInt64(4), // p3 + row.getString(5), // p4 + row.getString(6), // p5 + row.getString(7) // comment + ]); + } + }, + handleError: function esp_handleError(aError) { + Cu.reportError("Unexpected error in EXPLAIN handler: " + aError); + }, + handleCompletion: function esp_handleCompletion(aReason) { + let obj = { + sql: this._sqlStack.shift(), + operations: this._curOps, + }; + let s = (this._objsWritten++ ? ", " : "") + JSON.stringify(obj, null, 2); + this._ostream.write(s, s.length); + + this._curOps = []; + }, + + observe: function esp_observe(aSubject, aTopic, aData) { + if (aTopic == "quit-application") + this.shutdown(); + }, + + shutdown: function esp_shutdown() { + let s = "]}"; + this._ostream.write(s, s.length); + this._ostream.close(); + + Services.obs.removeObserver(this, "quit-application"); + } +}; + +// See the documentation on GlodaDatastore._schemaVersion to understand these: +var DB_SCHEMA_ACCEPT_LEAVE_LOW = 31, + DB_SCHEMA_ACCEPT_LEAVE_HIGH = 34, + DB_SCHEMA_ACCEPT_DOWNGRADE_LOW = 35, + DB_SCHEMA_ACCEPT_DOWNGRADE_HIGH = 39, + DB_SCHEMA_DOWNGRADE_DELTA = 5; + +/** + * Database abstraction layer. Contains explicit SQL schemas for our + * fundamental representations (core 'nouns', if you will) as well as + * specialized functions for then dealing with each type of object. At the + * same time, we are beginning to support extension-provided tables, which + * call into question whether we really need our hand-rolled code, or could + * simply improve the extension-provided table case to work for most of our + * hand-rolled cases. + * For now, the argument can probably be made that our explicit schemas and code + * is readable/intuitive (not magic) and efficient (although generic stuff + * could also be made efficient, if slightly evil through use of eval or some + * other code generation mechanism.) + * + * === Data Model Interaction / Dependencies + * + * Dependent on and assumes limited knowledge of the datamodel.js + * implementations. datamodel.js actually has an implicit dependency on + * our implementation, reaching back into the datastore via the _datastore + * attribute which we pass into every instance we create. + * We pass a reference to ourself as we create the datamodel.js instances (and + * they store it as _datastore) because of a half-implemented attempt to make + * it possible to live in a world where we have multiple datastores. This + * would be desirable in the cases where we are dealing with multiple SQLite + * databases. This could be because of per-account global databases or + * some other segmentation. This was abandoned when the importance of + * per-account databases was diminished following public discussion, at least + * for the short-term, but no attempted was made to excise the feature or + * preclude it. (Merely a recognition that it's too much to try and implement + * correct right now, especially because our solution might just be another + * (aggregating) layer on top of things, rather than complicating the lower + * levels.) + * + * === Object Identity / Caching + * + * The issue of object identity is handled by integration with the collection.js + * provided GlodaCollectionManager. By "Object Identity", I mean that we only + * should ever have one object instance alive at a time that corresponds to + * an underlying database row in the database. Where possible we avoid + * performing database look-ups when we can check if the object is already + * present in memory; in practice, this means when we are asking for an object + * by ID. When we cannot avoid a database query, we attempt to make sure that + * we do not return a duplicate object instance, instead replacing it with the + * 'live' copy of the object. (Ideally, we would avoid any redundant + * construction costs, but that is not currently the case.) + * Although you should consult the GlodaCollectionManager for details, the + * general idea is that we have 'collections' which represent views of the + * database (based on a query) which use a single mechanism for double duty. + * The collections are registered with the collection manager via weak + * reference. The first 'duty' is that since the collections may be desired + * to be 'live views' of the data, we want them to update as changes occur. + * The weak reference allows the collection manager to track the 'live' + * collections and update them. The second 'duty' is the caching/object + * identity duty. In theory, every live item should be referenced by at least + * one collection, making it reachable for object identity/caching purposes. + * There is also an explicit (inclusive) caching layer present to both try and + * avoid poor performance from some of the costs of this strategy, as well as + * to try and keep track of objects that are being worked with that are not + * (yet) tracked by a collection. Using a size-bounded cache is clearly not + * a guarantee of correctness for this, but is suspected will work quite well. + * (Well enough to be dangerous because the inevitable failure case will not be + * expected.) + * + * The current strategy may not be the optimal one, feel free to propose and/or + * implement better ones, especially if you have numbers. + * The current strategy is not fully implemented in this file, but the common + * cases are believed to be covered. (Namely, we fail to purge items from the + * cache as they are purged from the database.) + * + * === Things That May Not Be Obvious (Gotchas) + * + * Although the schema includes "triggers", they are currently not used + * and were added when thinking about implementing the feature. We will + * probably implement this feature at some point, which is why they are still + * in there. + * + * We, and the layers above us, are not sufficiently thorough at cleaning out + * data from the database, and may potentially orphan it _as new functionality + * is added in the future at layers above us_. That is, currently we should + * not be leaking database rows, but we may in the future. This is because + * we/the layers above us lack a mechanism to track dependencies based on + * attributes. Say a plugin exists that extracts recipes from messages and + * relates them via an attribute. To do so, it must create new recipe rows + * in its own table as new recipes are discovered. No automatic mechanism + * will purge recipes as their source messages are purged, nor does any + * event-driven mechanism explicitly inform the plugin. (It could infer + * such an event from the indexing/attribute-providing process, or poll the + * states of attributes to accomplish this, but that is not desirable.) This + * needs to be addressed, and may be best addressed at layers above + * datastore.js. + * @namespace + */ +var GlodaDatastore = { + _log: null, + + /* see Gloda's documentation for these constants */ + kSpecialNotAtAll: 0, + kSpecialColumn: 16, + kSpecialColumnChildren: 16|1, + kSpecialColumnParent: 16|2, + kSpecialString: 32, + kSpecialFulltext: 64, + IGNORE_FACET: {}, + + kConstraintIdIn: 0, + kConstraintIn: 1, + kConstraintRanges: 2, + kConstraintEquals: 3, + kConstraintStringLike: 4, + kConstraintFulltext: 5, + + /* ******************* SCHEMA ******************* */ + + /** + * Schema version policy. IMPORTANT! We expect the following potential things + * to happen in the life of gloda that can impact our schema and the ability + * to move between different versions of Thunderbird: + * + * - Fundamental changes to the schema so that two versions of Thunderbird + * cannot use the same global database. To wit, Thunderbird N+1 needs to + * blow away the database of Thunderbird N and reindex from scratch. + * Likewise, Thunderbird N will need to blow away Thunderbird N+1's + * database because it can't understand it. And we can't simply use a + * different file because there would be fatal bookkeeping losses. + * + * - Bidirectional minor schema changes (rare). + * Thunderbird N+1 does something that does not affect Thunderbird N's use + * of the database, and a user switching back to Thunderbird N will not be + * negatively impacted. It will also be fine when they go back to N+1 and + * N+1 will not be missing any vital data. The historic example of this is + * when we added a missing index that was important for performance. In + * that case, Thunderbird N could have potentially left the schema revision + * intact (if there was a safe revision), rather than swapping it on the + * downgrade, compelling N+1 to redo the transform on upgrade. + * + * - Backwards compatible, upgrade-transition minor schema changes. + * Thunderbird N+1 does something that does not require nuking the + * database / a full re-index, but does require processing on upgrade from + * a version of the database previously used by Thunderbird. These changes + * do not impact N's ability to use the database. For example, adding a + * new indexed attribute that affects a small number of messages could be + * handled by issuing a query on upgrade to dirty/index those messages. + * However, if the user goes back to N from N+1, when they upgrade to N+1 + * again, we need to re-index. In this case N would need to have downgrade + * the schema revision. + * + * - Backwards incompatible, minor schema changes. + * Thunderbird N+1 does something that does not require nuking the database + * but will break Thunderbird N's ability to use the database. + * + * - Regression fixes. Sometimes we may land something that screws up + * databases, or the platform changes in a way that breaks our code and we + * had insufficient unit test coverage and so don't detect it until some + * databases have gotten messed up. + * + * Accordingly, every version of Thunderbird has a concept of potential schema + * versions with associated semantics to prepare for the minor schema upgrade + * cases were inter-op is possible. These ranges and their semantics are: + * - accepts and leaves intact. Covers: + * - regression fixes that no longer exist with the landing of the upgrade + * code as long as users never go back a build in the given channel. + * - bidirectional minor schema changes. + * - accepts but downgrades version to self. Covers: + * - backwards compatible, upgrade-transition minor schema changes. + * - nuke range (anything beyond a specific revision needs to be nuked): + * - backwards incompatible, minor scheme changes + * - fundamental changes + * + * + * SO, YOU WANT TO CHANGE THE SCHEMA? + * + * Use the ranges below for Thunderbird 11 as a guide, bumping things as little + * as possible. If we start to use up the "accepts and leaves intact" range + * without majorly changing things up, re-do the numbering acceptance range + * to give us additional runway. + * + * Also, if we keep needing non-nuking upgrades, consider adding an additional + * table to the database that can tell older versions of Thunderbird what to + * do when confronted with a newer database and where it can set flags to tell + * the newer Thunderbird what the older Thunderbird got up to. For example, + * it would be much easier if we just tell Thunderbird N what to do when it's + * confronted with the database. + * + * + * CURRENT STATE OF THE MIGRATION LOGIC: + * + * Thunderbird 11: uses 30 (regression fix from 26) + * - accepts and leaves intact: 31-34 + * - accepts and downgrades by 5: 35-39 + * - nukes: 40+ + */ + _schemaVersion: 30, + // what is the schema in the database right now? + _actualSchemaVersion: 0, + _schema: { + tables: { + + // ----- Messages + folderLocations: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["folderURI", "TEXT NOT NULL"], + ["dirtyStatus", "INTEGER NOT NULL"], + ["name", "TEXT NOT NULL"], + ["indexingPriority", "INTEGER NOT NULL"], + ], + + triggers: { + delete: "DELETE from messages WHERE folderID = OLD.id", + }, + }, + + conversations: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["subject", "TEXT NOT NULL"], + ["oldestMessageDate", "INTEGER"], + ["newestMessageDate", "INTEGER"], + ], + + indices: { + subject: ['subject'], + oldestMessageDate: ['oldestMessageDate'], + newestMessageDate: ['newestMessageDate'], + }, + + fulltextColumns: [ + ["subject", "TEXT"], + ], + + triggers: { + delete: "DELETE from messages WHERE conversationID = OLD.id", + }, + }, + + /** + * A message record correspond to an actual message stored in a folder + * somewhere, or is a ghost record indicating a message that we know + * should exist, but which we have not seen (and which we may never see). + * We represent these ghost messages by storing NULL values in the + * folderID and messageKey fields; this may need to change to other + * sentinel values if this somehow impacts performance. + */ + messages: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["folderID", "INTEGER"], + ["messageKey", "INTEGER"], + // conversationID used to have a REFERENCES but I'm losing it for + // presumed performance reasons and it doesn't do anything for us. + ["conversationID", "INTEGER NOT NULL"], + ["date", "INTEGER"], + // we used to have the parentID, but because of the very real + // possibility of multiple copies of a message with a given + // message-id, the parentID concept is unreliable. + ["headerMessageID", "TEXT"], + ["deleted", "INTEGER NOT NULL default 0"], + ["jsonAttributes", "TEXT"], + // Notability attempts to capture the static 'interestingness' of a + // message as a result of being starred/flagged, labeled, read + // multiple times, authored by someone in your address book or that + // you converse with a lot, etc. + ["notability", "INTEGER NOT NULL default 0"], + ], + + indices: { + messageLocation: ['folderID', 'messageKey'], + headerMessageID: ['headerMessageID'], + conversationID: ['conversationID'], + date: ['date'], + deleted: ['deleted'], + }, + + // note: if reordering the columns, you need to change this file's + // row-loading logic, msg_search.js's ranking usages and also the + // column saturations in nsGlodaRankerFunction + fulltextColumns: [ + ["body", "TEXT"], + ["subject", "TEXT"], + ["attachmentNames", "TEXT"], + ["author", "TEXT"], + ["recipients", "TEXT"], + ], + + triggers: { + delete: "DELETE FROM messageAttributes WHERE messageID = OLD.id", + }, + }, + + // ----- Attributes + attributeDefinitions: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["attributeType", "INTEGER NOT NULL"], + ["extensionName", "TEXT NOT NULL"], + ["name", "TEXT NOT NULL"], + ["parameter", "BLOB"], + ], + + triggers: { + delete: "DELETE FROM messageAttributes WHERE attributeID = OLD.id", + }, + }, + + messageAttributes: { + columns: [ + // conversationID and messageID used to have REFERENCES back to their + // appropriate types. I removed it when removing attributeID for + // better reasons and because the code is not capable of violating + // this constraint, so the check is just added cost. (And we have + // unit tests that sanity check my assertions.) + ["conversationID", "INTEGER NOT NULL"], + ["messageID", "INTEGER NOT NULL"], + // This used to be REFERENCES attributeDefinitions(id) but then we + // introduced sentinel values and it's hard to justify the effort + // to compel injection of the record or the overhead to do the + // references checking. + ["attributeID", "INTEGER NOT NULL"], + ["value", "NUMERIC"], + ], + + indices: { + attribQuery: [ + "attributeID", "value", + /* covering: */ "conversationID", "messageID"], + // This is required for deletion of a message's attributes to be + // performant. We could optimize this index away if we changed our + // deletion logic to issue specific attribute deletions based on the + // information it already has available in the message's JSON blob. + // The rub there is that if we screwed up we could end up leaking + // attributes and there is a non-trivial performance overhead to + // the many requests it would cause (which can also be reduced in + // the future by changing our SQL dispatch code.) + messageAttribFastDeletion: [ + "messageID"], + }, + }, + + // ----- Contacts / Identities + + /** + * Corresponds to a human being and roughly to an address book entry. + * Constrast with an identity, which is a specific e-mail address, IRC + * nick, etc. Identities belong to contacts, and this relationship is + * expressed on the identityAttributes table. + */ + contacts: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["directoryUUID", "TEXT"], + ["contactUUID", "TEXT"], + ["popularity", "INTEGER"], + ["frecency", "INTEGER"], + ["name", "TEXT"], + ["jsonAttributes", "TEXT"], + ], + indices: { + popularity: ["popularity"], + frecency: ["frecency"], + }, + }, + + contactAttributes: { + columns: [ + ["contactID", "INTEGER NOT NULL"], + ["attributeID", + "INTEGER NOT NULL"], + ["value", "NUMERIC"] + ], + indices: { + contactAttribQuery: [ + "attributeID", "value", + /* covering: */ "contactID"], + } + }, + + /** + * Identities correspond to specific e-mail addresses, IRC nicks, etc. + */ + identities: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["contactID", "INTEGER NOT NULL"], + ["kind", "TEXT NOT NULL"], // ex: email, irc, etc. + ["value", "TEXT NOT NULL"], // ex: e-mail address, irc nick/handle... + ["description", "NOT NULL"], // what makes this identity different + // from the others? (ex: home, work, etc.) + ["relay", "INTEGER NOT NULL"], // is the identity just a relay + // mechanism? (ex: mailing list, twitter 'bouncer', IRC gateway, etc.) + ], + + indices: { + contactQuery: ["contactID"], + valueQuery: ["kind", "value"] + } + }, + }, + }, + + + /* ******************* LOGIC ******************* */ + /** + * We only have one connection; this name exists for legacy reasons but helps + * track when we are intentionally doing synchronous things during startup. + * We do nothing synchronous once our setup has completed. + */ + syncConnection: null, + /** + * We only have one connection and we only do asynchronous things after setup; + * this name still exists mainly for legacy reasons. + */ + asyncConnection: null, + + /** + * Our "mailnews.database.global.datastore." preferences branch for debug + * notification handling. We register as an observer against this. + */ + _prefBranch: null, + + /** + * The unique ID assigned to an index when it has been built. This value + * changes once the index has been rebuilt. + */ + _datastoreID: null, + + /** + * Initialize logging, create the database if it doesn't exist, "upgrade" it + * if it does and it's not up-to-date, fill our authoritative folder uri/id + * mapping. + */ + _init: function gloda_ds_init(aNounIDToDef) { + this._log = Log4Moz.repository.getLogger("gloda.datastore"); + this._log.debug("Beginning datastore initialization."); + + this._nounIDToDef = aNounIDToDef; + + let branch = Services.prefs.getBranch("mailnews.database.global.datastore."); + this._prefBranch = branch; + + // Not sure the weak reference really makes a difference given that we are a + // GC root. + branch.addObserver("", this, false); + // claim the pref changed so we can centralize our logic there. + this.observe(null, "nsPref:changed", "explainToPath"); + + // Get the path to our global database + var dbFile = Services.dirsvc.get("ProfD", Ci.nsIFile); + dbFile.append("global-messages-db.sqlite"); + + var dbConnection; + + // Report about the size of the database through telemetry (if there's a + // database, naturally). + if (dbFile.exists()) { + try { + let h = Services.telemetry.getHistogramById("THUNDERBIRD_GLODA_SIZE_MB"); + h.add(dbFile.fileSize/1048576); + } catch (e) { + this._log.warn("Couldn't report telemetry", e); + } + } + + // Create the file if it does not exist + if (!dbFile.exists()) { + this._log.debug("Creating database because it doesn't exist."); + dbConnection = this._createDB(dbFile); + } + // It does exist, but we (someday) might need to upgrade the schema + else { + // (Exceptions may be thrown if the database is corrupt) + try { + dbConnection = Services.storage.openUnsharedDatabase(dbFile); + let cacheSize = this._determineCachePages(dbConnection); + // see _createDB... + dbConnection.executeSimpleSQL("PRAGMA cache_size = "+cacheSize); + dbConnection.executeSimpleSQL("PRAGMA synchronous = FULL"); + + // Register custom tokenizer to index all language text + var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"]. + getService(Ci.nsIFts3Tokenizer); + tokenizer.registerTokenizer(dbConnection); + + // -- database schema changes + let dbSchemaVersion = this._actualSchemaVersion = + dbConnection.schemaVersion; + // - database from the future! + if (dbSchemaVersion > this._schemaVersion) { + if (dbSchemaVersion >= DB_SCHEMA_ACCEPT_LEAVE_LOW && + dbSchemaVersion <= DB_SCHEMA_ACCEPT_LEAVE_HIGH) { + this._log.debug("db from the future in acceptable range; leaving " + + "version at: " + dbSchemaVersion); + } + else if (dbSchemaVersion >= DB_SCHEMA_ACCEPT_DOWNGRADE_LOW && + dbSchemaVersion <= DB_SCHEMA_ACCEPT_DOWNGRADE_HIGH) { + let newVersion = dbSchemaVersion - DB_SCHEMA_DOWNGRADE_DELTA; + this._log.debug("db from the future in downgrade range; setting " + + "version to " + newVersion + " down from " + + dbSchemaVersion); + dbConnection.schemaVersion = this._actualSchemaVersion = newVersion; + } + // too far from the future, nuke it. + else { + dbConnection = this._nukeMigration(dbFile, dbConnection); + } + } + // - database from the past! migrate it, possibly. + else if (dbSchemaVersion < this._schemaVersion) { + this._log.debug("Need to migrate database. (DB version: " + + this._actualSchemaVersion + " desired version: " + + this._schemaVersion); + dbConnection = this._migrate(dbFile, + dbConnection, + this._actualSchemaVersion, + this._schemaVersion); + this._log.debug("Migration call completed."); + } + // else: this database is juuust right. + + // If we never had a datastore ID, make sure to create one now. + if (!this._prefBranch.prefHasUserValue("id")) { + this._datastoreID = this._generateDatastoreID(); + this._prefBranch.setCharPref("id", this._datastoreID); + } else { + this._datastoreID = this._prefBranch.getCharPref("id"); + } + } + // Handle corrupt databases, other oddities + catch (ex) { + if (ex.result == Cr.NS_ERROR_FILE_CORRUPTED) { + this._log.warn("Database was corrupt, removing the old one."); + dbFile.remove(false); + this._log.warn("Removed old database, creating a new one."); + dbConnection = this._createDB(dbFile); + } + else { + this._log.error("Unexpected error when trying to open the database:", + ex); + throw ex; + } + } + } + + this.syncConnection = dbConnection; + this.asyncConnection = dbConnection; + + this._log.debug("Initializing folder mappings."); + this._getAllFolderMappings(); + // we need to figure out the next id's for all of the tables where we + // manage that. + this._log.debug("Populating managed id counters."); + this._populateAttributeDefManagedId(); + this._populateConversationManagedId(); + this._populateMessageManagedId(); + this._populateContactManagedId(); + this._populateIdentityManagedId(); + + // create the timer we use to periodically drop our references to folders + // we no longer need XPCOM references to (or more significantly, their + // message databases.) + this._folderCleanupTimer = + Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); + + this._log.debug("Completed datastore initialization."); + }, + + observe: function gloda_ds_observe(aSubject, aTopic, aData) { + if(aTopic != "nsPref:changed") + return; + + if (aData == "explainToPath") { + let explainToPath = null; + try { + explainToPath = this._prefBranch.getCharPref("explainToPath"); + if (explainToPath.trim() == "") + explainToPath = null; + } + catch (ex) { + // don't care if the pref is not there. + } + + // It is conceivable that the name is changing and this isn't a boolean + // toggle, so always clean out the explain processor. + if (this._explainProcessor) { + this._explainProcessor.shutdown(); + this._explainProcessor = null; + } + + if (explainToPath) { + this._createAsyncStatement = this._createExplainedAsyncStatement; + this._explainProcessor = new ExplainedStatementProcessor( + explainToPath); + } + else { + this._createAsyncStatement = this._realCreateAsyncStatement; + } + } + }, + + datastoreIsShutdown: false, + + /** + * Perform datastore shutdown. + */ + shutdown: function gloda_ds_shutdown() { + // Clear out any pending transaction by committing it. + // The indexer has been shutdown by this point; it no longer has any active + // indexing logic and it no longer has active event listeners capable of + // generating new activity. + // Semantic consistency of the database is guaranteed by the indexer's + // strategy of only yielding control at coherent times. Although it takes + // multiple calls and multiple SQL operations to update the state of our + // database representations, the generator does not yield until it has + // issued all the database statements required for said update. As such, + // this commit will leave us in a good way (and the commit will happen + // because closing the connection will drain the async execution queue.) + while (this._transactionDepth) { + this._log.info("Closing pending transaction out for shutdown."); + // just schedule this function to be run again once the transaction has + // been closed out. + this._commitTransaction(); + } + + this.datastoreIsShutdown = true; + + // shutdown our folder cleanup timer, if active and null it out. + if (this._folderCleanupActive) + this._folderCleanupTimer.cancel(); + this._folderCleanupTimer = null; + + this._log.info("Closing db connection"); + + // we do not expect exceptions, but it's a good idea to avoid having our + // shutdown process explode. + try { + this._cleanupAsyncStatements(); + this._cleanupSyncStatements(); + } + catch (ex) { + this._log.debug("Unexpected exception during statement cleanup: " + ex); + } + + // it's conceivable we might get a spurious exception here, but we really + // shouldn't get one. again, we want to ensure shutdown runs to completion + // and doesn't break our caller. + try { + // This currently causes all pending asynchronous operations to be run to + // completion. this simplifies things from a correctness perspective, + // and, honestly, is a lot easier than us tracking all of the async + // event tasks so that we can explicitly cancel them. + // This is a reasonable thing to do because we don't actually ever have + // a huge number of statements outstanding. The indexing process needs + // to issue async requests periodically, so the most we have in-flight + // from a write perspective is strictly less than the work required to + // update the database state for a single message. + // However, the potential for multiple pending expensive queries does + // exist, and it may be advisable to attempt to track and cancel those. + // For simplicity we don't currently do this, and I expect this should + // not pose a major problem, but those are famous last words. + // Note: asyncClose does not spin a nested event loop, but the thread + // manager shutdown code will spin the async thread's event loop, so it + // nets out to be the same. + this.asyncConnection.asyncClose(); + } + catch (ex) { + this._log.debug("Potentially expected exception during connection " + + "closure: " + ex); + } + + this.asyncConnection = null; + this.syncConnection = null; + }, + + /** + * Generates and returns a UUID. + * + * @return a UUID as a string, ex: "c4dd0159-9287-480f-a648-a4613e147fdb" + */ + _generateDatastoreID: function gloda_ds_generateDatastoreID() { + let uuidGen = Cc["@mozilla.org/uuid-generator;1"] + .getService(Ci.nsIUUIDGenerator); + let uuid = uuidGen.generateUUID().toString(); + // We snip off the { and } from each end of the UUID. + return uuid.substring(1, uuid.length - 2); + }, + + _determineCachePages: function gloda_ds_determineCachePages(aDBConn) { + try { + // For the details of the computations, one should read + // nsNavHistory::InitDB. We're slightly diverging from them in the sense + // that we won't allow gloda to use insane amounts of memory cache, and + // we start with 1% instead of 6% like them. + let pageStmt = aDBConn.createStatement("PRAGMA page_size"); + pageStmt.executeStep(); + let pageSize = pageStmt.row.page_size; + pageStmt.finalize(); + let cachePermillage = this._prefBranch + .getIntPref("cache_to_memory_permillage"); + cachePermillage = Math.min(cachePermillage, 50); + cachePermillage = Math.max(cachePermillage, 0); + let physMem = IOUtils.getPhysicalMemorySize(); + if (physMem == 0) + physMem = MEMSIZE_FALLBACK_BYTES; + let cacheSize = Math.round(physMem * cachePermillage / 1000); + cacheSize = Math.max(cacheSize, MIN_CACHE_SIZE); + cacheSize = Math.min(cacheSize, MAX_CACHE_SIZE); + let cachePages = Math.round(cacheSize / pageSize); + return cachePages; + } catch (ex) { + this._log.warn("Error determining cache size: " + ex); + // A little bit lower than on my personal machine, will result in ~40M. + return 1000; + } + }, + + /** + * Create our database; basically a wrapper around _createSchema. + */ + _createDB: function gloda_ds_createDB(aDBFile) { + var dbConnection = Services.storage.openUnsharedDatabase(aDBFile); + // We now follow the Firefox strategy for places, which mainly consists in + // picking a default 32k page size, and then figuring out the amount of + // cache accordingly. The default 32k come from mozilla/toolkit/storage, + // but let's get it directly from sqlite in case they change it. + let cachePages = this._determineCachePages(dbConnection); + // This is a maximum number of pages to be used. If the database does not + // get this large, then the memory does not get used. + // Do not forget to update the code in _init if you change this value. + dbConnection.executeSimpleSQL("PRAGMA cache_size = "+cachePages); + // The mozStorage default is NORMAL which shaves off some fsyncs in the + // interest of performance. Since everything we do after bootstrap is + // async, we do not care about the performance, but we really want the + // correctness. Bug reports and support avenues indicate a non-zero number + // of corrupt databases. Note that this may not fix everything; OS X + // also supports an F_FULLSYNC flag enabled by PRAGMA fullfsync that we are + // not enabling that is much more comprehensive. We can think about + // turning that on after we've seen how this reduces our corruption count. + dbConnection.executeSimpleSQL("PRAGMA synchronous = FULL"); + // Register custom tokenizer to index all language text + var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"]. + getService(Ci.nsIFts3Tokenizer); + tokenizer.registerTokenizer(dbConnection); + + // We're creating a new database, so let's generate a new ID for this + // version of the datastore. This way, indexers can know when the index + // has been rebuilt in the event that they need to rebuild dependent data. + this._datastoreID = this._generateDatastoreID(); + this._prefBranch.setCharPref("id", this._datastoreID); + + dbConnection.beginTransaction(); + try { + this._createSchema(dbConnection); + dbConnection.commitTransaction(); + } + catch(ex) { + dbConnection.rollbackTransaction(); + throw ex; + } + + return dbConnection; + }, + + _createTableSchema: function gloda_ds_createTableSchema(aDBConnection, + aTableName, aTableDef) { + // - Create the table + this._log.info("Creating table: " + aTableName); + let columnDefs = []; + for (let [column, type] of aTableDef.columns) { + columnDefs.push(column + " " + type); + } + aDBConnection.createTable(aTableName, columnDefs.join(", ")); + + // - Create the fulltext table if applicable + if (aTableDef.fulltextColumns) { + let columnDefs = []; + for (let [column, type] of aTableDef.fulltextColumns) { + columnDefs.push(column + " " + type); + } + let createFulltextSQL = "CREATE VIRTUAL TABLE " + aTableName + "Text" + + " USING fts3(tokenize mozporter, " + columnDefs.join(", ") + ")"; + this._log.info("Creating fulltext table: " + createFulltextSQL); + aDBConnection.executeSimpleSQL(createFulltextSQL); + } + + // - Create its indices + if (aTableDef.indices) { + for (let indexName in aTableDef.indices) { + let indexColumns = aTableDef.indices[indexName]; + aDBConnection.executeSimpleSQL( + "CREATE INDEX " + indexName + " ON " + aTableName + + "(" + indexColumns.join(", ") + ")"); + } + } + + // - Create the attributes table if applicable + if (aTableDef.genericAttributes) { + aTableDef.genericAttributes = { + columns: [ + ["nounID", "INTEGER NOT NULL"], + ["attributeID", "INTEGER NOT NULL"], + ["value", "NUMERIC"] + ], + indices: {} + }; + aTableDef.genericAttributes.indices[aTableName + "AttribQuery"] = + ["attributeID", "value", /* covering: */ "nounID"]; + // let's use this very function! (since we created genericAttributes, + // explodey recursion is avoided.) + this._createTableSchema(aDBConnection, aTableName + "Attributes", + aTableDef.genericAttributes); + } + }, + + /** + * Create our database schema assuming a newly created database. This + * comes down to creating normal tables, their full-text variants (if + * applicable), and their indices. + */ + _createSchema: function gloda_ds_createSchema(aDBConnection) { + // -- For each table... + for (let tableName in this._schema.tables) { + let tableDef = this._schema.tables[tableName]; + this._createTableSchema(aDBConnection, tableName, tableDef); + } + + aDBConnection.schemaVersion = this._actualSchemaVersion = + this._schemaVersion; + }, + + /** + * Create a table for a noun, replete with data binding. + */ + createNounTable: function gloda_ds_createTableIfNotExists(aNounDef) { + // give it a _jsonText attribute if appropriate... + if (aNounDef.allowsArbitraryAttrs) + aNounDef.schema.columns.push(['jsonAttributes', 'STRING', '_jsonText']); + // check if the table exists + if (!this.asyncConnection.tableExists(aNounDef.tableName)) { + // it doesn't! create it (and its potentially many variants) + try { + this._createTableSchema(this.asyncConnection, aNounDef.tableName, + aNounDef.schema); + } + catch (ex) { + this._log.error("Problem creating table " + aNounDef.tableName + " " + + "because: " + ex + " at " + ex.fileName + ":" + ex.lineNumber); + return; + } + } + + aNounDef._dataBinder = new GlodaDatabind(aNounDef, this); + aNounDef.datastore = aNounDef._dataBinder; + aNounDef.objFromRow = aNounDef._dataBinder.objFromRow; + aNounDef.objInsert = aNounDef._dataBinder.objInsert; + aNounDef.objUpdate = aNounDef._dataBinder.objUpdate; + aNounDef.dbAttribAdjuster = aNounDef._dataBinder.adjustAttributes; + + if (aNounDef.schema.genericAttributes) { + aNounDef.attrTableName = aNounDef.tableName + "Attributes"; + aNounDef.attrIDColumnName = "nounID"; + } + }, + + _nukeMigration: function gloda_ds_nukeMigration(aDBFile, aDBConnection) { + aDBConnection.close(); + aDBFile.remove(false); + this._log.warn("Global database has been purged due to schema change. " + + "old version was " + this._actualSchemaVersion + + ", new version is: " + this._schemaVersion); + return this._createDB(aDBFile); + }, + + /** + * Migrate the database _to the latest version_ from an older version. We + * only keep enough logic around to get us to the recent version. This code + * is not a time machine! If we need to blow away the database to get to the + * most recent version, then that's the sum total of the migration! + */ + _migrate: function gloda_ds_migrate(aDBFile, aDBConnection, + aCurVersion, aNewVersion) { + + // version 12: + // - notability column added + // version 13: + // - we are adding a new fulltext index column. blow away! + // - note that I screwed up and failed to mark the schema change; apparently + // no database will claim to be version 13... + // version 14ish, still labeled 13?: + // - new attributes: forwarded, repliedTo, bcc, recipients + // - altered fromMeTo and fromMeCc to fromMe + // - altered toMe and ccMe to just be toMe + // - exposes bcc to cc-related attributes + // - MIME type DB schema overhaul + // version 15ish, still labeled 13: + // - change tokenizer to mozporter to support CJK + // (We are slip-streaming this so that only people who want to test CJK + // have to test it. We will properly bump the schema revision when the + // gloda correctness patch lands.) + // version 16ish, labeled 14 and now 16 + // - gloda message id's start from 32 now + // - all kinds of correctness changes (blow away) + // version 17 + // - more correctness fixes. (blow away) + // version 18 + // - significant empty set support (blow away) + // version 19 + // - there was a typo that was resulting in deleted getting set to the + // numeric value of the javascript undefined value. (migrate-able) + // version 20 + // - tokenizer changes to provide for case/accent-folding. (blow away) + // version 21 + // - add the messagesAttribFastDeletion index we thought was already covered + // by an index we removed a while ago (migrate-able) + // version 26 + // - bump page size and also cache size (blow away) + // version 30 + // - recover from bug 732372 that affected TB 11 beta / TB 12 alpha / TB 13 + // trunk. The fix is bug 734507. The revision bump happens + // asynchronously. (migrate-able) + + // nuke if prior to 26 + if (aCurVersion < 26) + return this._nukeMigration(aDBFile, aDBConnection); + + // They must be desiring our "a.contact is undefined" fix! + // This fix runs asynchronously as the first indexing job the indexer ever + // performs. It is scheduled by the enabling of the message indexer and + // it is the one that updates the schema version when done. + + // return the same DB connection since we didn't create a new one or do + // anything. + return aDBConnection; + }, + + /** + * Asynchronously update the schema version; only for use by in-tree callers + * who asynchronously perform migration work triggered by their initial + * indexing sweep and who have properly updated the schema version in all + * the appropriate locations in this file. + * + * This is done without doing anything about the current transaction state, + * which is desired. + */ + _updateSchemaVersion: function(newSchemaVersion) { + this._actualSchemaVersion = newSchemaVersion; + let stmt = this._createAsyncStatement( + // we need to concat; pragmas don't like "?1" binds + "PRAGMA user_version = " + newSchemaVersion, true); + stmt.executeAsync(this.trackAsync()); + stmt.finalize(); + }, + + _outstandingAsyncStatements: [], + + /** + * Unless debugging, this is just _realCreateAsyncStatement, but in some + * debugging modes this is instead the helpful wrapper + * _createExplainedAsyncStatement. + */ + _createAsyncStatement: null, + + _realCreateAsyncStatement: function gloda_ds_createAsyncStatement(aSQLString, + aWillFinalize) { + let statement = null; + try { + statement = this.asyncConnection.createAsyncStatement(aSQLString); + } + catch(ex) { + throw new Error("error creating async statement " + aSQLString + " - " + + this.asyncConnection.lastError + ": " + + this.asyncConnection.lastErrorString + " - " + ex); + } + + if (!aWillFinalize) + this._outstandingAsyncStatements.push(statement); + + return statement; + }, + + /** + * The ExplainedStatementProcessor instance used by + * _createExplainedAsyncStatement. This will be null if + * _createExplainedAsyncStatement is not being used as _createAsyncStatement. + */ + _explainProcessor: null, + + /** + * Wrapped version of _createAsyncStatement that EXPLAINs the statement. When + * used this decorates _createAsyncStatement, in which case we are found at + * that name and the original is at _orig_createAsyncStatement. This is + * controlled by the explainToPath preference (see |_init|). + */ + _createExplainedAsyncStatement: + function gloda_ds__createExplainedAsyncStatement(aSQLString, + aWillFinalize) { + let realStatement = this._realCreateAsyncStatement(aSQLString, + aWillFinalize); + // don't wrap transaction control statements. + if (aSQLString == "COMMIT" || + aSQLString == "BEGIN TRANSACTION" || + aSQLString == "ROLLBACK") + return realStatement; + + let explainSQL = "EXPLAIN " + aSQLString; + let explainStatement = this._realCreateAsyncStatement(explainSQL); + + return new ExplainedStatementWrapper(realStatement, explainStatement, + aSQLString, this._explainProcessor); + }, + + _cleanupAsyncStatements: function gloda_ds_cleanupAsyncStatements() { + this._outstandingAsyncStatements.forEach(stmt => stmt.finalize()); + }, + + _outstandingSyncStatements: [], + + _createSyncStatement: function gloda_ds_createSyncStatement(aSQLString, + aWillFinalize) { + let statement = null; + try { + statement = this.syncConnection.createStatement(aSQLString); + } + catch(ex) { + throw new Error("error creating sync statement " + aSQLString + " - " + + this.syncConnection.lastError + ": " + + this.syncConnection.lastErrorString + " - " + ex); + } + + if (!aWillFinalize) + this._outstandingSyncStatements.push(statement); + + return statement; + }, + + _cleanupSyncStatements: function gloda_ds_cleanupSyncStatements() { + this._outstandingSyncStatements.forEach(stmt => stmt.finalize()); + }, + + /** + * Perform a synchronous executeStep on the statement, handling any + * SQLITE_BUSY fallout that could conceivably happen from a collision on our + * read with the async writes. + * Basically we keep trying until we succeed or run out of tries. + * We believe this to be a reasonable course of action because we don't + * expect this to happen much. + */ + _syncStep: function gloda_ds_syncStep(aStatement) { + let tries = 0; + while (tries < 32000) { + try { + return aStatement.executeStep(); + } + catch (e) { + // SQLITE_BUSY becomes NS_ERROR_FAILURE + if (e.result == 0x80004005) { + tries++; + // we really need to delay here, somehow. unfortunately, we can't + // allow event processing to happen, and most of the things we could + // do to delay ourselves result in event processing happening. (Use + // of a timer, a synchronous dispatch, etc.) + // in theory, nsIThreadEventFilter could allow us to stop other events + // that aren't our timer from happening, but it seems slightly + // dangerous and 'notxpcom' suggests it ain't happening anyways... + // so, let's just be dumb and hope that the underlying file I/O going + // on makes us more likely to yield to the other thread so it can + // finish what it is doing... + } else { + throw e; + } + } + } + this._log.error("Synchronous step gave up after " + tries + " tries."); + }, + + /** + * Helper to bind based on the actual type of the javascript value. Note + * that we always use int64 because under the hood sqlite just promotes the + * normal 'int' call to 'int64' anyways. + */ + _bindVariant: function gloda_ds_bindBlob(aStatement, aIndex, aVariant) { + if (aVariant == null) // catch both null and undefined + aStatement.bindNullParameter(aIndex); + else if (typeof aVariant == "string") + aStatement.bindStringParameter(aIndex, aVariant); + else if (typeof aVariant == "number") { + // we differentiate for storage representation reasons only. + if (Math.floor(aVariant) === aVariant) + aStatement.bindInt64Parameter(aIndex, aVariant); + else + aStatement.bindDoubleParameter(aIndex, aVariant); + } + else + throw new Error("Attempt to bind variant with unsupported type: " + + (typeof aVariant)); + }, + + /** + * Helper that uses the appropriate getter given the data type; should be + * mooted once we move to 1.9.2 and can use built-in variant support. + */ + _getVariant: function gloda_ds_getBlob(aRow, aIndex) { + let typeOfIndex = aRow.getTypeOfIndex(aIndex); + if (typeOfIndex == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + return null; + // XPConnect would just end up going through an intermediary double stage + // for the int64 case anyways... + else if (typeOfIndex == Ci.mozIStorageValueArray.VALUE_TYPE_INTEGER || + typeOfIndex == Ci.mozIStorageValueArray.VALUE_TYPE_DOUBLE) + return aRow.getDouble(aIndex); + else // typeOfIndex == Ci.mozIStorageValueArray.VALUE_TYPE_TEXT + return aRow.getString(aIndex); + }, + + /** Simple nested transaction support as a performance optimization. */ + _transactionDepth: 0, + _transactionGood: false, + + /** + * Self-memoizing BEGIN TRANSACTION statement. + */ + get _beginTransactionStatement() { + let statement = this._createAsyncStatement("BEGIN TRANSACTION"); + this.__defineGetter__("_beginTransactionStatement", () => statement); + return this._beginTransactionStatement; + }, + + /** + * Self-memoizing COMMIT statement. + */ + get _commitTransactionStatement() { + let statement = this._createAsyncStatement("COMMIT"); + this.__defineGetter__("_commitTransactionStatement", () => statement); + return this._commitTransactionStatement; + }, + + /** + * Self-memoizing ROLLBACK statement. + */ + get _rollbackTransactionStatement() { + let statement = this._createAsyncStatement("ROLLBACK"); + this.__defineGetter__("_rollbackTransactionStatement", () => statement); + return this._rollbackTransactionStatement; + }, + + _pendingPostCommitCallbacks: null, + /** + * Register a callback to be invoked when the current transaction's commit + * completes. + */ + runPostCommit: function gloda_ds_runPostCommit(aCallback) { + this._pendingPostCommitCallbacks.push(aCallback); + }, + + /** + * Begin a potentially nested transaction; only the outermost transaction gets + * to be an actual transaction, and the failure of any nested transaction + * results in a rollback of the entire outer transaction. If you really + * need an atomic transaction + */ + _beginTransaction: function gloda_ds_beginTransaction() { + if (this._transactionDepth == 0) { + this._pendingPostCommitCallbacks = []; + this._beginTransactionStatement.executeAsync(this.trackAsync()); + this._transactionGood = true; + } + this._transactionDepth++; + }, + /** + * Commit a potentially nested transaction; if we are the outer-most + * transaction and no sub-transaction issues a rollback + * (via _rollbackTransaction) then we commit, otherwise we rollback. + */ + _commitTransaction: function gloda_ds_commitTransaction() { + this._transactionDepth--; + if (this._transactionDepth == 0) { + try { + if (this._transactionGood) + this._commitTransactionStatement.executeAsync( + new PostCommitHandler(this._pendingPostCommitCallbacks)); + else + this._rollbackTransactionStatement.executeAsync(this.trackAsync()); + } + catch (ex) { + this._log.error("Commit problem:", ex); + } + this._pendingPostCommitCallbacks = []; + } + }, + /** + * Abort the commit of the potentially nested transaction. If we are not the + * outermost transaction, we set a flag that tells the outermost transaction + * that it must roll back. + */ + _rollbackTransaction: function gloda_ds_rollbackTransaction() { + this._transactionDepth--; + this._transactionGood = false; + if (this._transactionDepth == 0) { + try { + this._rollbackTransactionStatement.executeAsync(this.trackAsync()); + } + catch (ex) { + this._log.error("Rollback problem:", ex); + } + } + }, + + _pendingAsyncStatements: 0, + /** + * The function to call, if any, when we hit 0 pending async statements. + */ + _pendingAsyncCompletedListener: null, + _asyncCompleted: function () { + if (--this._pendingAsyncStatements == 0) { + if (this._pendingAsyncCompletedListener !== null) { + this._pendingAsyncCompletedListener(); + this._pendingAsyncCompletedListener = null; + } + } + }, + _asyncTrackerListener: { + handleResult: function () {}, + handleError: function(aError) { + GlodaDatastore._log.error("got error in _asyncTrackerListener.handleError(): " + + aError.result + ": " + aError.message); + }, + handleCompletion: function () { + try { + // the helper method exists because the other classes need to call it too + GlodaDatastore._asyncCompleted(); + } + catch (e) { + this._log.error("Exception in handleCompletion:", e); + } + } + }, + /** + * Increments _pendingAsyncStatements and returns a listener that will + * decrement the value when the statement completes. + */ + trackAsync: function() { + this._pendingAsyncStatements++; + return this._asyncTrackerListener; + }, + + /* ********** Attribute Definitions ********** */ + /** Maps (attribute def) compound names to the GlodaAttributeDBDef objects. */ + _attributeDBDefs: {}, + /** Map attribute ID to the definition and parameter value that produce it. */ + _attributeIDToDBDefAndParam: {}, + + /** + * This attribute id indicates that we are encoding that a non-singular + * attribute has an empty set. The value payload that goes with this should + * the attribute id of the attribute we are talking about. + */ + kEmptySetAttrId: 1, + + /** + * We maintain the attributeDefinitions next id counter mainly because we can. + * Since we mediate the access, there's no real risk to doing so, and it + * allows us to keep the writes on the async connection without having to + * wait for a completion notification. + * + * Start from 32 so we can have a number of sentinel values. + */ + _nextAttributeId: 32, + + _populateAttributeDefManagedId: function () { + let stmt = this._createSyncStatement( + "SELECT MAX(id) FROM attributeDefinitions", true); + if (stmt.executeStep()) { // no chance of this SQLITE_BUSY on this call + // 0 gets returned even if there are no messages... + let highestSeen = stmt.getInt64(0); + if (highestSeen != 0) + this._nextAttributeId = highestSeen + 1; + } + stmt.finalize(); + }, + + get _insertAttributeDefStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO attributeDefinitions (id, attributeType, extensionName, \ + name, parameter) \ + VALUES (?1, ?2, ?3, ?4, ?5)"); + this.__defineGetter__("_insertAttributeDefStatement", () => statement); + return this._insertAttributeDefStatement; + }, + + /** + * Create an attribute definition and return the row ID. Special/atypical + * in that it doesn't directly return a GlodaAttributeDBDef; we leave that up + * to the caller since they know much more than actually needs to go in the + * database. + * + * @return The attribute id allocated to this attribute. + */ + _createAttributeDef: function gloda_ds_createAttributeDef(aAttrType, + aExtensionName, aAttrName, aParameter) { + let attributeId = this._nextAttributeId++; + + let iads = this._insertAttributeDefStatement; + iads.bindInt64Parameter(0, attributeId); + iads.bindInt64Parameter(1, aAttrType); + iads.bindStringParameter(2, aExtensionName); + iads.bindStringParameter(3, aAttrName); + this._bindVariant(iads, 4, aParameter); + + iads.executeAsync(this.trackAsync()); + + return attributeId; + }, + + /** + * Sync-ly look-up all the attribute definitions, populating our authoritative + * _attributeDBDefss and _attributeIDToDBDefAndParam maps. (In other words, + * once this method is called, those maps should always be in sync with the + * underlying database.) + */ + getAllAttributes: function gloda_ds_getAllAttributes() { + let stmt = this._createSyncStatement( + "SELECT id, attributeType, extensionName, name, parameter \ + FROM attributeDefinitions", true); + + // map compound name to the attribute + let attribs = {}; + // map the attribute id to [attribute, parameter] where parameter is null + // in cases where parameter is unused. + let idToAttribAndParam = {}; + + this._log.info("loading all attribute defs"); + + while (stmt.executeStep()) { // no chance of this SQLITE_BUSY on this call + let rowId = stmt.getInt64(0); + let rowAttributeType = stmt.getInt64(1); + let rowExtensionName = stmt.getString(2); + let rowName = stmt.getString(3); + let rowParameter = this._getVariant(stmt, 4); + + let compoundName = rowExtensionName + ":" + rowName; + + let attrib; + if (compoundName in attribs) { + attrib = attribs[compoundName]; + } else { + attrib = new GlodaAttributeDBDef(this, /* aID */ null, + compoundName, rowAttributeType, rowExtensionName, rowName); + attribs[compoundName] = attrib; + } + // if the parameter is null, the id goes on the attribute def, otherwise + // it is a parameter binding and goes in the binding map. + if (rowParameter == null) { + this._log.debug(compoundName + " primary: " + rowId); + attrib._id = rowId; + idToAttribAndParam[rowId] = [attrib, null]; + } else { + this._log.debug(compoundName + " binding: " + rowParameter + + " = " + rowId); + attrib._parameterBindings[rowParameter] = rowId; + idToAttribAndParam[rowId] = [attrib, rowParameter]; + } + } + stmt.finalize(); + + this._log.info("done loading all attribute defs"); + + this._attributeDBDefs = attribs; + this._attributeIDToDBDefAndParam = idToAttribAndParam; + }, + + /** + * Helper method for GlodaAttributeDBDef to tell us when their bindParameter + * method is called and they have created a new binding (using + * GlodaDatastore._createAttributeDef). In theory, that method could take + * an additional argument and obviate the need for this method. + */ + reportBinding: function gloda_ds_reportBinding(aID, aAttrDef, aParamValue) { + this._attributeIDToDBDefAndParam[aID] = [aAttrDef, aParamValue]; + }, + + /* ********** Folders ********** */ + /** next folder (row) id to issue, populated by _getAllFolderMappings. */ + _nextFolderId: 1, + + get _insertFolderLocationStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO folderLocations (id, folderURI, dirtyStatus, name, \ + indexingPriority) VALUES \ + (?1, ?2, ?3, ?4, ?5)"); + this.__defineGetter__("_insertFolderLocationStatement", + () => statement); + return this._insertFolderLocationStatement; + }, + + /** + * Authoritative map from folder URI to folder ID. (Authoritative in the + * sense that this map exactly represents the state of the underlying + * database. If it does not, it's a bug in updating the database.) + */ + _folderByURI: {}, + /** Authoritative map from folder ID to folder URI */ + _folderByID: {}, + + /** Intialize our _folderByURI/_folderByID mappings, called by _init(). */ + _getAllFolderMappings: function gloda_ds_getAllFolderMappings() { + let stmt = this._createSyncStatement( + "SELECT id, folderURI, dirtyStatus, name, indexingPriority \ + FROM folderLocations", true); + + while (stmt.executeStep()) { // no chance of this SQLITE_BUSY on this call + let folderID = stmt.getInt64(0); + let folderURI = stmt.getString(1); + let dirtyStatus = stmt.getInt32(2); + let folderName = stmt.getString(3); + let indexingPriority = stmt.getInt32(4); + + let folder = new GlodaFolder(this, folderID, folderURI, dirtyStatus, + folderName, indexingPriority); + + this._folderByURI[folderURI] = folder; + this._folderByID[folderID] = folder; + + if (folderID >= this._nextFolderId) + this._nextFolderId = folderID + 1; + } + stmt.finalize(); + }, + + _folderKnown: function gloda_ds_folderKnown(aFolder) { + let folderURI = aFolder.URI; + return folderURI in this._folderByURI; + }, + + _folderIdKnown: function gloda_ds_folderIdKnown(aFolderID) { + return (aFolderID in this._folderByID); + }, + + /** + * Return the default messaging priority for a folder of this type, based + * on the folder's flags. If aAllowSpecialFolderIndexing is true, then + * folders suchs as Trash and Junk will be indexed. + * + * @param {nsIMsgFolder} aFolder + * @param {boolean} aAllowSpecialFolderIndexing + * @returns {Number} + */ + getDefaultIndexingPriority: function gloda_ds_getDefaultIndexingPriority(aFolder, aAllowSpecialFolderIndexing) { + + let indexingPriority = GlodaFolder.prototype.kIndexingDefaultPriority; + // Do not walk into trash/junk folders, unless the user is explicitly + // telling us to do so. + let specialFolderFlags = Ci.nsMsgFolderFlags.Trash | Ci.nsMsgFolderFlags.Junk; + if (aFolder.isSpecialFolder(specialFolderFlags, true)) + indexingPriority = aAllowSpecialFolderIndexing ? + GlodaFolder.prototype.kIndexingDefaultPriority : + GlodaFolder.prototype.kIndexingNeverPriority; + // Queue folders should always be ignored just because messages should not + // spend much time in there. + // We hate newsgroups, and public IMAP folders are similar. + // Other user IMAP folders should be ignored because it's not this user's + // mail. + else if (aFolder.flags & (Ci.nsMsgFolderFlags.Queue + | Ci.nsMsgFolderFlags.Newsgroup + // In unit testing at least folders can be + // confusingly labeled ImapPublic when they + // should not be. Or at least I don't think they + // should be. So they're legit for now. + //| Ci.nsMsgFolderFlags.ImapPublic + //| Ci.nsMsgFolderFlags.ImapOtherUser + )) + indexingPriority = GlodaFolder.prototype.kIndexingNeverPriority; + else if (aFolder.flags & Ci.nsMsgFolderFlags.Inbox) + indexingPriority = GlodaFolder.prototype.kIndexingInboxPriority; + else if (aFolder.flags & Ci.nsMsgFolderFlags.SentMail) + indexingPriority = GlodaFolder.prototype.kIndexingSentMailPriority; + else if (aFolder.flags & Ci.nsMsgFolderFlags.Favorite) + indexingPriority = GlodaFolder.prototype.kIndexingFavoritePriority; + else if (aFolder.flags & Ci.nsMsgFolderFlags.CheckNew) + indexingPriority = GlodaFolder.prototype.kIndexingCheckNewPriority; + + return indexingPriority; + }, + + /** + * Map a folder URI to a GlodaFolder instance, creating the mapping if it does + * not yet exist. + * + * @param aFolder The nsIMsgFolder instance you would like the GlodaFolder + * instance for. + * @returns The existing or newly created GlodaFolder instance. + */ + _mapFolder: function gloda_ds_mapFolderURI(aFolder) { + let folderURI = aFolder.URI; + if (folderURI in this._folderByURI) { + return this._folderByURI[folderURI]; + } + + let folderID = this._nextFolderId++; + + // if there's an indexingPriority stored on the folder, just use that + let indexingPriority; + let stringPrio = aFolder.getStringProperty("indexingPriority"); + if (stringPrio.length) + indexingPriority = parseInt(stringPrio); + else + // otherwise, fall back to the default for folders of this type + indexingPriority = this.getDefaultIndexingPriority(aFolder); + + // If there are messages in the folder, it is filthy. If there are no + // messages, it can be clean. + let dirtyStatus = aFolder.getTotalMessages(false) ? + GlodaFolder.prototype.kFolderFilthy : + GlodaFolder.prototype.kFolderClean; + let folder = new GlodaFolder(this, folderID, folderURI, dirtyStatus, + aFolder.prettiestName, indexingPriority); + + this._insertFolderLocationStatement.bindInt64Parameter(0, folder.id); + this._insertFolderLocationStatement.bindStringParameter(1, folder.uri); + this._insertFolderLocationStatement.bindInt64Parameter(2, + folder.dirtyStatus); + this._insertFolderLocationStatement.bindStringParameter(3, folder.name); + this._insertFolderLocationStatement.bindInt64Parameter( + 4, folder.indexingPriority); + this._insertFolderLocationStatement.executeAsync(this.trackAsync()); + + this._folderByURI[folderURI] = folder; + this._folderByID[folderID] = folder; + this._log.debug("!! mapped " + folder.id + " from " + folderURI); + return folder; + }, + + /** + * Map an integer gloda folder ID to the corresponding GlodaFolder instance. + * + * @param aFolderID The known valid gloda folder ID for which you would like + * a GlodaFolder instance. + * @return The GlodaFolder instance with the given id. If no such instance + * exists, we will throw an exception. + */ + _mapFolderID: function gloda_ds_mapFolderID(aFolderID) { + if (aFolderID === null) + return null; + if (aFolderID in this._folderByID) + return this._folderByID[aFolderID]; + throw new Error("Got impossible folder ID: " + aFolderID); + }, + + /** + * Mark the gloda folder as deleted for any outstanding references to it and + * remove it from our tables so we don't hand out any new references. The + * latter is especially important in the case a folder with the same name + * is created afterwards; we don't want to confuse the new one with the old + * one! + */ + _killGlodaFolderIntoTombstone: + function gloda_ds__killGlodaFolderIntoTombstone(aGlodaFolder) { + aGlodaFolder._deleted = true; + delete this._folderByURI[aGlodaFolder.uri]; + delete this._folderByID[aGlodaFolder.id]; + }, + + get _updateFolderDirtyStatusStatement() { + let statement = this._createAsyncStatement( + "UPDATE folderLocations SET dirtyStatus = ?1 \ + WHERE id = ?2"); + this.__defineGetter__("_updateFolderDirtyStatusStatement", + () => statement); + return this._updateFolderDirtyStatusStatement; + }, + + updateFolderDirtyStatus: function gloda_ds_updateFolderDirtyStatus(aFolder) { + let ufds = this._updateFolderDirtyStatusStatement; + ufds.bindInt64Parameter(1, aFolder.id); + ufds.bindInt64Parameter(0, aFolder.dirtyStatus); + ufds.executeAsync(this.trackAsync()); + }, + + get _updateFolderIndexingPriorityStatement() { + let statement = this._createAsyncStatement( + "UPDATE folderLocations SET indexingPriority = ?1 \ + WHERE id = ?2"); + this.__defineGetter__("_updateFolderIndexingPriorityStatement", + () => statement); + return this._updateFolderIndexingPriorityStatement; + }, + + updateFolderIndexingPriority: function gloda_ds_updateFolderIndexingPriority(aFolder) { + let ufip = this._updateFolderIndexingPriorityStatement; + ufip.bindInt64Parameter(1, aFolder.id); + ufip.bindInt64Parameter(0, aFolder.indexingPriority); + ufip.executeAsync(this.trackAsync()); + }, + + get _updateFolderLocationStatement() { + let statement = this._createAsyncStatement( + "UPDATE folderLocations SET folderURI = ?1 \ + WHERE id = ?2"); + this.__defineGetter__("_updateFolderLocationStatement", + () => statement); + return this._updateFolderLocationStatement; + }, + + /** + * Non-recursive asynchronous folder renaming based on the URI. + * + * @TODO provide a mechanism for recursive folder renames or have a higher + * layer deal with it and remove this note. + */ + renameFolder: function gloda_ds_renameFolder(aOldFolder, aNewURI) { + if (!(aOldFolder.URI in this._folderByURI)) + return; + let folder = this._mapFolder(aOldFolder); // ensure the folder is mapped + let oldURI = folder.uri; + this._folderByURI[aNewURI] = folder; + folder._uri = aNewURI; + this._log.info("renaming folder URI " + oldURI + " to " + aNewURI); + this._updateFolderLocationStatement.bindStringParameter(1, folder.id); + this._updateFolderLocationStatement.bindStringParameter(0, aNewURI); + this._updateFolderLocationStatement.executeAsync(this.trackAsync()); + + delete this._folderByURI[oldURI]; + }, + + get _deleteFolderByIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM folderLocations WHERE id = ?1"); + this.__defineGetter__("_deleteFolderByIDStatement", + () => statement); + return this._deleteFolderByIDStatement; + }, + + deleteFolderByID: function gloda_ds_deleteFolder(aFolderID) { + let dfbis = this._deleteFolderByIDStatement; + dfbis.bindInt64Parameter(0, aFolderID); + dfbis.executeAsync(this.trackAsync()); + }, + + /** + * This timer drives our folder cleanup logic that is in charge of dropping + * our folder references and more importantly the folder's msgDatabase + * reference, but only if they are no longer in use. + * This timer is only active when we have one or more live gloda folders (as + * tracked by _liveGlodaFolders). Although we choose our timer interval to + * be power-friendly, it doesn't really matter because unless the user or the + * indexing process is actively doing things, all of the folders will 'die' + * and so we will stop scheduling the timer. + */ + _folderCleanupTimer: null, + + /** + * When true, we have a folder cleanup timer event active. + */ + _folderCleanupActive: false, + + /** + * Interval at which we call the folder cleanup code, in milliseconds. + */ + _folderCleanupTimerInterval: 2000, + + /** + * Maps the id of 'live' GlodaFolders to the instances. If a GlodaFolder is + * in here, it means that it has a reference to its nsIMsgDBFolder which + * should have an open nsIMsgDatabase that we will need to close. This does + * not count folders that are being indexed unless they have also been used + * for header retrieval. + */ + _liveGlodaFolders: {}, + + /** + * Mark a GlodaFolder as having a live reference to its nsIMsgFolder with an + * implied opened associated message database. GlodaFolder calls this when + * it first acquires its reference. It is removed from the list of live + * folders only when our timer check calls the GlodaFolder's + * forgetFolderIfUnused method and that method returns true. + */ + markFolderLive: function gloda_ds_markFolderLive(aGlodaFolder) { + this._liveGlodaFolders[aGlodaFolder.id] = aGlodaFolder; + if (!this._folderCleanupActive) { + this._folderCleanupTimer.initWithCallback(this._performFolderCleanup, + this._folderCleanupTimerInterval, Ci.nsITimer.TYPE_REPEATING_SLACK); + this._folderCleanupActive = true; + } + }, + + /** + * Timer-driven folder cleanup logic. For every live folder tracked in + * _liveGlodaFolders, we call their forgetFolderIfUnused method each time + * until they return true indicating they have cleaned themselves up. + * This method is called without a 'this' context! + */ + _performFolderCleanup: function gloda_ds_performFolderCleanup() { + // we only need to keep going if there is at least one folder in the table + // that is still alive after this pass. + let keepGoing = false; + for (let id in GlodaDatastore._liveGlodaFolders) { + let glodaFolder = GlodaDatastore._liveGlodaFolders[id]; + // returns true if it is now 'dead' and doesn't need this heartbeat check + if (glodaFolder.forgetFolderIfUnused()) + delete GlodaDatastore._liveGlodaFolders[glodaFolder.id]; + else + keepGoing = true; + } + + if (!keepGoing) { + GlodaDatastore._folderCleanupTimer.cancel(); + GlodaDatastore._folderCleanupActive = false; + } + }, + + /* ********** Conversation ********** */ + /** The next conversation id to allocate. Initialize at startup. */ + _nextConversationId: 1, + + _populateConversationManagedId: function () { + let stmt = this._createSyncStatement( + "SELECT MAX(id) FROM conversations", true); + if (stmt.executeStep()) { // no chance of this SQLITE_BUSY on this call + this._nextConversationId = stmt.getInt64(0) + 1; + } + stmt.finalize(); + }, + + get _insertConversationStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO conversations (id, subject, oldestMessageDate, \ + newestMessageDate) \ + VALUES (?1, ?2, ?3, ?4)"); + this.__defineGetter__("_insertConversationStatement", () => statement); + return this._insertConversationStatement; + }, + + get _insertConversationTextStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO conversationsText (docid, subject) \ + VALUES (?1, ?2)"); + this.__defineGetter__("_insertConversationTextStatement", + () => statement); + return this._insertConversationTextStatement; + }, + + /** + * Asynchronously create a conversation. + */ + createConversation: function gloda_ds_createConversation(aSubject, + aOldestMessageDate, aNewestMessageDate) { + + // create the data row + let conversationID = this._nextConversationId++; + let ics = this._insertConversationStatement; + ics.bindInt64Parameter(0, conversationID); + ics.bindStringParameter(1, aSubject); + if (aOldestMessageDate == null) + ics.bindNullParameter(2); + else + ics.bindInt64Parameter(2, aOldestMessageDate); + if (aNewestMessageDate == null) + ics.bindNullParameter(3); + else + ics.bindInt64Parameter(3, aNewestMessageDate); + ics.executeAsync(this.trackAsync()); + + // create the fulltext row, using the same rowid/docid + let icts = this._insertConversationTextStatement; + icts.bindInt64Parameter(0, conversationID); + icts.bindStringParameter(1, aSubject); + icts.executeAsync(this.trackAsync()); + + // create it + let conversation = new GlodaConversation(this, conversationID, + aSubject, aOldestMessageDate, + aNewestMessageDate); + // it's new! let the collection manager know about it. + GlodaCollectionManager.itemsAdded(conversation.NOUN_ID, [conversation]); + // return it + return conversation; + }, + + get _deleteConversationByIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM conversations WHERE id = ?1"); + this.__defineGetter__("_deleteConversationByIDStatement", + () => statement); + return this._deleteConversationByIDStatement; + }, + + /** + * Asynchronously delete a conversation given its ID. + */ + deleteConversationByID: function gloda_ds_deleteConversationByID( + aConversationID) { + let dcbids = this._deleteConversationByIDStatement; + dcbids.bindInt64Parameter(0, aConversationID); + dcbids.executeAsync(this.trackAsync()); + + GlodaCollectionManager.itemsDeleted(GlodaConversation.prototype.NOUN_ID, + [aConversationID]); + }, + + _conversationFromRow: function gloda_ds_conversationFromRow(aStmt) { + let oldestMessageDate, newestMessageDate; + if (aStmt.getTypeOfIndex(2) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + oldestMessageDate = null; + else + oldestMessageDate = aStmt.getInt64(2); + if (aStmt.getTypeOfIndex(3) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + newestMessageDate = null; + else + newestMessageDate = aStmt.getInt64(3); + return new GlodaConversation(this, aStmt.getInt64(0), + aStmt.getString(1), oldestMessageDate, newestMessageDate); + }, + + /* ********** Message ********** */ + /** + * Next message id, managed because of our use of asynchronous inserts. + * Initialized by _populateMessageManagedId called by _init. + * + * Start from 32 to leave us all kinds of magical sentinel values at the + * bottom. + */ + _nextMessageId: 32, + + _populateMessageManagedId: function () { + let stmt = this._createSyncStatement( + "SELECT MAX(id) FROM messages", true); + if (stmt.executeStep()) { // no chance of this SQLITE_BUSY on this call + // 0 gets returned even if there are no messages... + let highestSeen = stmt.getInt64(0); + if (highestSeen != 0) + this._nextMessageId = highestSeen + 1; + } + stmt.finalize(); + }, + + get _insertMessageStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO messages (id, folderID, messageKey, conversationID, date, \ + headerMessageID, jsonAttributes, notability) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)"); + this.__defineGetter__("_insertMessageStatement", () => statement); + return this._insertMessageStatement; + }, + + get _insertMessageTextStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO messagesText (docid, subject, body, attachmentNames, \ + author, recipients) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)"); + this.__defineGetter__("_insertMessageTextStatement", () => statement); + return this._insertMessageTextStatement; + }, + + /** + * Create a GlodaMessage with the given properties. Because this is only half + * of the process of creating a message (the attributes still need to be + * completed), it's on the caller's head to call GlodaCollectionManager's + * itemAdded method once the message is fully created. + * + * This method uses the async connection, any downstream logic that depends on + * this message actually existing in the database must be done using an + * async query. + */ + createMessage: function gloda_ds_createMessage(aFolder, aMessageKey, + aConversationID, aDatePRTime, aHeaderMessageID) { + let folderID; + if (aFolder != null) { + folderID = this._mapFolder(aFolder).id; + } + else { + folderID = null; + } + + let messageID = this._nextMessageId++; + + let message = new GlodaMessage( + this, messageID, folderID, + aMessageKey, + aConversationID, /* conversation */ null, + aDatePRTime ? new Date(aDatePRTime / 1000) : null, + aHeaderMessageID, + /* deleted */ false, /* jsonText */ undefined, /* notability*/ 0); + + // We would love to notify the collection manager about the message at this + // point (at least if it's not a ghost), but we can't yet. We need to wait + // until the attributes have been indexed, which means it's out of our + // hands. (Gloda.processMessage does it.) + + return message; + }, + + insertMessage: function gloda_ds_insertMessage(aMessage) { + let ims = this._insertMessageStatement; + ims.bindInt64Parameter(0, aMessage.id); + if (aMessage.folderID == null) + ims.bindNullParameter(1); + else + ims.bindInt64Parameter(1, aMessage.folderID); + if (aMessage.messageKey == null) + ims.bindNullParameter(2); + else + ims.bindInt64Parameter(2, aMessage.messageKey); + ims.bindInt64Parameter(3, aMessage.conversationID); + if (aMessage.date == null) + ims.bindNullParameter(4); + else + ims.bindInt64Parameter(4, aMessage.date * 1000); + ims.bindStringParameter(5, aMessage.headerMessageID); + if (aMessage._jsonText) + ims.bindStringParameter(6, aMessage._jsonText); + else + ims.bindNullParameter(6); + ims.bindInt64Parameter(7, aMessage.notability); + + try { + ims.executeAsync(this.trackAsync()); + } + catch(ex) { + throw new Error("error executing statement... " + + this.asyncConnection.lastError + ": " + + this.asyncConnection.lastErrorString + " - " + ex); + } + + // we create the full-text row for any message that isn't a ghost, + // whether we have the body or not + if (aMessage.folderID !== null) + this._insertMessageText(aMessage); + }, + + /** + * Inserts a full-text row. This should only be called if you're sure you want + * to insert a row into the table. + */ + _insertMessageText: function gloda_ds__insertMessageText(aMessage) { + if (aMessage._content && aMessage._content.hasContent()) + aMessage._indexedBodyText = aMessage._content.getContentString(true); + else if (aMessage._bodyLines) + aMessage._indexedBodyText = aMessage._bodyLines.join("\n"); + else + aMessage._indexedBodyText = null; + + let imts = this._insertMessageTextStatement; + imts.bindInt64Parameter(0, aMessage.id); + imts.bindStringParameter(1, aMessage._subject); + if (aMessage._indexedBodyText == null) + imts.bindNullParameter(2); + else + imts.bindStringParameter(2, aMessage._indexedBodyText); + if (aMessage._attachmentNames === null) + imts.bindNullParameter(3); + else + imts.bindStringParameter(3, aMessage._attachmentNames.join("\n")); + + imts.bindStringParameter(4, aMessage._indexAuthor); + imts.bindStringParameter(5, aMessage._indexRecipients); + + try { + imts.executeAsync(this.trackAsync()); + } + catch(ex) { + throw new Error("error executing fulltext statement... " + + this.asyncConnection.lastError + ": " + + this.asyncConnection.lastErrorString + " - " + ex); + } + }, + + get _updateMessageStatement() { + let statement = this._createAsyncStatement( + "UPDATE messages SET folderID = ?1, \ + messageKey = ?2, \ + conversationID = ?3, \ + date = ?4, \ + headerMessageID = ?5, \ + jsonAttributes = ?6, \ + notability = ?7, \ + deleted = ?8 \ + WHERE id = ?9"); + this.__defineGetter__("_updateMessageStatement", () => statement); + return this._updateMessageStatement; + }, + + get _updateMessageTextStatement() { + let statement = this._createAsyncStatement( + "UPDATE messagesText SET body = ?1, \ + attachmentNames = ?2 \ + WHERE docid = ?3"); + + this.__defineGetter__("_updateMessageTextStatement", () => statement); + return this._updateMessageTextStatement; + }, + + /** + * Update the database row associated with the message. If the message is + * not a ghost and has _isNew defined, messagesText is affected. + * + * aMessage._isNew is currently equivalent to the fact that there is no + * full-text row associated with this message, and we work with this + * assumption here. Note that if aMessage._isNew is not defined, then + * we don't do anything. + */ + updateMessage: function gloda_ds_updateMessage(aMessage) { + let ums = this._updateMessageStatement; + ums.bindInt64Parameter(8, aMessage.id); + if (aMessage.folderID === null) + ums.bindNullParameter(0); + else + ums.bindInt64Parameter(0, aMessage.folderID); + if (aMessage.messageKey === null) + ums.bindNullParameter(1); + else + ums.bindInt64Parameter(1, aMessage.messageKey); + ums.bindInt64Parameter(2, aMessage.conversationID); + if (aMessage.date === null) + ums.bindNullParameter(3); + else + ums.bindInt64Parameter(3, aMessage.date * 1000); + ums.bindStringParameter(4, aMessage.headerMessageID); + if (aMessage._jsonText) + ums.bindStringParameter(5, aMessage._jsonText); + else + ums.bindNullParameter(5); + ums.bindInt64Parameter(6, aMessage.notability); + ums.bindInt64Parameter(7, aMessage._isDeleted ? 1 : 0); + + ums.executeAsync(this.trackAsync()); + + if (aMessage.folderID !== null) { + if (aMessage._isNew === true) + this._insertMessageText(aMessage); + else + this._updateMessageText(aMessage); + } + }, + + /** + * Updates the full-text row associated with this message. This only performs + * the UPDATE query if the indexed body text has changed, which means that if + * the body hasn't changed but the attachments have, we don't update. + */ + _updateMessageText: function gloda_ds__updateMessageText(aMessage) { + let newIndexedBodyText; + if (aMessage._content && aMessage._content.hasContent()) + newIndexedBodyText = aMessage._content.getContentString(true); + else if (aMessage._bodyLines) + newIndexedBodyText = aMessage._bodyLines.join("\n"); + else + newIndexedBodyText = null; + + // If the body text matches, don't perform an update + if (newIndexedBodyText == aMessage._indexedBodyText) { + this._log.debug("in _updateMessageText, skipping update because body matches"); + return; + } + + aMessage._indexedBodyText = newIndexedBodyText; + let umts = this._updateMessageTextStatement; + umts.bindInt64Parameter(2, aMessage.id); + + if (aMessage._indexedBodyText == null) + umts.bindNullParameter(0); + else + umts.bindStringParameter(0, aMessage._indexedBodyText); + + if (aMessage._attachmentNames == null) + umts.bindNullParameter(1); + else + umts.bindStringParameter(1, aMessage._attachmentNames.join("\n")); + + try { + umts.executeAsync(this.trackAsync()); + } + catch(ex) { + throw new Error("error executing fulltext statement... " + + this.asyncConnection.lastError + ": " + + this.asyncConnection.lastErrorString + " - " + ex); + } + }, + + get _updateMessageLocationStatement() { + let statement = this._createAsyncStatement( + "UPDATE messages SET folderID = ?1, messageKey = ?2 WHERE id = ?3"); + this.__defineGetter__("_updateMessageLocationStatement", + () => statement); + return this._updateMessageLocationStatement; + }, + + /** + * Given a list of gloda message ids, and a list of their new message keys in + * the given new folder location, asynchronously update the message's + * database locations. Also, update the in-memory representations. + */ + updateMessageLocations: function gloda_ds_updateMessageLocations(aMessageIds, + aNewMessageKeys, aDestFolder, aDoNotNotify) { + let statement = this._updateMessageLocationStatement; + let destFolderID = (typeof(aDestFolder) == "number") ? aDestFolder : + this._mapFolder(aDestFolder).id; + + // map gloda id to the new message key for in-memory rep transform below + let cacheLookupMap = {}; + + for (let iMsg = 0; iMsg < aMessageIds.length; iMsg++) { + let id = aMessageIds[iMsg], msgKey = aNewMessageKeys[iMsg]; + statement.bindInt64Parameter(0, destFolderID); + statement.bindInt64Parameter(1, msgKey); + statement.bindInt64Parameter(2, id); + statement.executeAsync(this.trackAsync()); + + cacheLookupMap[id] = msgKey; + } + + // - perform the cache lookup so we can update in-memory representations + // found in memory items, and converted to list form for notification + let inMemoryItems = {}, modifiedItems = []; + GlodaCollectionManager.cacheLookupMany(GlodaMessage.prototype.NOUN_ID, + cacheLookupMap, + inMemoryItems, + /* do not cache */ false); + for (let glodaId in inMemoryItems) { + let glodaMsg = inMemoryItems[glodaId]; + glodaMsg._folderID = destFolderID; + glodaMsg._messageKey = cacheLookupMap[glodaId]; + modifiedItems.push(glodaMsg); + } + + // tell the collection manager about the modified messages so it can update + // any existing views... + if (!aDoNotNotify && modifiedItems.length) { + GlodaCollectionManager.itemsModified(GlodaMessage.prototype.NOUN_ID, + modifiedItems); + } + }, + + get _updateMessageKeyStatement() { + let statement = this._createAsyncStatement( + "UPDATE messages SET messageKey = ?1 WHERE id = ?2"); + this.__defineGetter__("_updateMessageKeyStatement", + () => statement); + return this._updateMessageKeyStatement; + }, + + /** + * Update the message keys for the gloda messages with the given id's. This + * is to be used in response to msgKeyChanged notifications and is similar to + * `updateMessageLocations` except that we do not update the folder and we + * do not perform itemsModified notifications (because message keys are not + * intended to be relevant to the gloda message abstraction). + */ + updateMessageKeys: function(aMessageIds, aNewMessageKeys) { + let statement = this._updateMessageKeyStatement; + + // map gloda id to the new message key for in-memory rep transform below + let cacheLookupMap = {}; + + for (let iMsg = 0; iMsg < aMessageIds.length; iMsg++) { + let id = aMessageIds[iMsg], msgKey = aNewMessageKeys[iMsg]; + statement.bindInt64Parameter(0, msgKey); + statement.bindInt64Parameter(1, id); + statement.executeAsync(this.trackAsync()); + + cacheLookupMap[id] = msgKey; + } + + // - perform the cache lookup so we can update in-memory representations + let inMemoryItems = {}; + GlodaCollectionManager.cacheLookupMany(GlodaMessage.prototype.NOUN_ID, + cacheLookupMap, + inMemoryItems, + /* do not cache */ false); + for (let glodaId in inMemoryItems) { + let glodaMsg = inMemoryItems[glodaId]; + glodaMsg._messageKey = cacheLookupMap[glodaId]; + } + }, + + /** + * Asynchronously mutate message folder id/message keys for the given + * messages, indicating that we are moving them to the target folder, but + * don't yet know their target message keys. + * + * Updates in-memory representations too. + */ + updateMessageFoldersByKeyPurging: + function gloda_ds_updateMessageFoldersByKeyPurging(aGlodaIds, + aDestFolder) { + let destFolderID = this._mapFolder(aDestFolder).id; + + let sqlStr = "UPDATE messages SET folderID = ?1, \ + messageKey = ?2 \ + WHERE id IN (" + aGlodaIds.join(", ") + ")"; + let statement = this._createAsyncStatement(sqlStr, true); + statement.bindInt64Parameter(0, destFolderID); + statement.bindNullParameter(1); + statement.executeAsync(this.trackAsync()); + statement.finalize(); + + let cached = + GlodaCollectionManager.cacheLookupManyList(GlodaMessage.prototype.NOUN_ID, + aGlodaIds); + for (let id in cached) { + let glodaMsg = cached[id]; + glodaMsg._folderID = destFolderID; + glodaMsg._messageKey = null; + } + }, + + _messageFromRow: function gloda_ds_messageFromRow(aRow) { + let folderId, messageKey, date, jsonText, subject, indexedBodyText, + attachmentNames; + if (aRow.getTypeOfIndex(1) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + folderId = null; + else + folderId = aRow.getInt64(1); + if (aRow.getTypeOfIndex(2) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + messageKey = null; + else + messageKey = aRow.getInt64(2); + if (aRow.getTypeOfIndex(4) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + date = null; + else + date = new Date(aRow.getInt64(4) / 1000); + if (aRow.getTypeOfIndex(7) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + jsonText = undefined; + else + jsonText = aRow.getString(7); + // only queryFromQuery queries will have these columns + if (aRow.numEntries >= 14) { + if (aRow.getTypeOfIndex(10) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + subject = undefined; + else + subject = aRow.getString(10); + if (aRow.getTypeOfIndex(9) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + indexedBodyText = undefined; + else + indexedBodyText = aRow.getString(9); + if (aRow.getTypeOfIndex(11) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + attachmentNames = null; + else { + attachmentNames = aRow.getString(11); + if (attachmentNames) + attachmentNames = attachmentNames.split("\n"); + else + attachmentNames = null; + } + // we ignore 12, author + // we ignore 13, recipients + } + return new GlodaMessage(this, aRow.getInt64(0), folderId, messageKey, + aRow.getInt64(3), null, date, aRow.getString(5), + aRow.getInt64(6), jsonText, aRow.getInt64(8), + subject, indexedBodyText, attachmentNames); + }, + + get _updateMessagesMarkDeletedByFolderID() { + // When marking deleted clear the folderID and messageKey so that the + // indexing process can reuse it without any location constraints. + let statement = this._createAsyncStatement( + "UPDATE messages SET folderID = NULL, messageKey = NULL, \ + deleted = 1 WHERE folderID = ?1"); + this.__defineGetter__("_updateMessagesMarkDeletedByFolderID", + () => statement); + return this._updateMessagesMarkDeletedByFolderID; + }, + + /** + * Efficiently mark all the messages in a folder as deleted. Unfortunately, + * we obviously do not know the id's of the messages affected by this which + * complicates in-memory updates. The options are sending out to the SQL + * database for a list of the message id's or some form of in-memory + * traversal. I/O costs being what they are, users having a propensity to + * have folders with tens of thousands of messages, and the unlikeliness + * of all of those messages being gloda-memory-resident, we go with the + * in-memory traversal. + */ + markMessagesDeletedByFolderID: + function gloda_ds_markMessagesDeletedByFolderID(aFolderID) { + let statement = this._updateMessagesMarkDeletedByFolderID; + statement.bindInt64Parameter(0, aFolderID); + statement.executeAsync(this.trackAsync()); + + // Have the collection manager generate itemsRemoved events for any + // in-memory messages in that folder. + GlodaCollectionManager.itemsDeletedByAttribute( + GlodaMessage.prototype.NOUN_ID, + aMsg => aMsg._folderID == aFolderID); + }, + + /** + * Mark all the gloda messages as deleted blind-fire. Check if any of the + * messages are known to the collection manager and update them to be deleted + * along with the requisite collection notifications. + */ + markMessagesDeletedByIDs: function gloda_ds_markMessagesDeletedByIDs( + aMessageIDs) { + // When marking deleted clear the folderID and messageKey so that the + // indexing process can reuse it without any location constraints. + let sqlString = "UPDATE messages SET folderID = NULL, messageKey = NULL, " + + "deleted = 1 WHERE id IN (" + + aMessageIDs.join(",") + ")"; + + let statement = this._createAsyncStatement(sqlString, true); + statement.executeAsync(this.trackAsync()); + statement.finalize(); + + GlodaCollectionManager.itemsDeleted(GlodaMessage.prototype.NOUN_ID, + aMessageIDs); + }, + + get _countDeletedMessagesStatement() { + let statement = this._createAsyncStatement( + "SELECT COUNT(*) FROM messages WHERE deleted = 1"); + this.__defineGetter__("_countDeletedMessagesStatement", + () => statement); + return this._countDeletedMessagesStatement; + }, + + /** + * Count how many messages are currently marked as deleted in the database. + */ + countDeletedMessages: function gloda_ds_countDeletedMessages(aCallback) { + let cms = this._countDeletedMessagesStatement; + cms.executeAsync(new SingletonResultValueHandler(aCallback)); + }, + + get _deleteMessageByIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM messages WHERE id = ?1"); + this.__defineGetter__("_deleteMessageByIDStatement", + () => statement); + return this._deleteMessageByIDStatement; + }, + + get _deleteMessageTextByIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM messagesText WHERE docid = ?1"); + this.__defineGetter__("_deleteMessageTextByIDStatement", + () => statement); + return this._deleteMessageTextByIDStatement; + }, + + /** + * Delete a message and its fulltext from the database. It is assumed that + * the message was already marked as deleted and so is not visible to the + * collection manager and so nothing needs to be done about that. + */ + deleteMessageByID: function gloda_ds_deleteMessageByID(aMessageID) { + let dmbids = this._deleteMessageByIDStatement; + dmbids.bindInt64Parameter(0, aMessageID); + dmbids.executeAsync(this.trackAsync()); + + this.deleteMessageTextByID(aMessageID); + }, + + deleteMessageTextByID: function gloda_ds_deleteMessageTextByID(aMessageID) { + let dmt = this._deleteMessageTextByIDStatement; + dmt.bindInt64Parameter(0, aMessageID); + dmt.executeAsync(this.trackAsync()); + }, + + get _folderCompactionStatement() { + let statement = this._createAsyncStatement( + "SELECT id, messageKey, headerMessageID FROM messages \ + WHERE folderID = ?1 AND \ + messageKey >= ?2 AND +deleted = 0 ORDER BY messageKey LIMIT ?3"); + this.__defineGetter__("_folderCompactionStatement", + () => statement); + return this._folderCompactionStatement; + }, + + folderCompactionPassBlockFetch: + function gloda_ds_folderCompactionPassBlockFetch( + aFolderID, aStartingMessageKey, aLimit, aCallback) { + let fcs = this._folderCompactionStatement; + fcs.bindInt64Parameter(0, aFolderID); + fcs.bindInt64Parameter(1, aStartingMessageKey); + fcs.bindInt64Parameter(2, aLimit); + fcs.executeAsync(new CompactionBlockFetcherHandler(aCallback)); + }, + + /* ********** Message Attributes ********** */ + get _insertMessageAttributeStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO messageAttributes (conversationID, messageID, attributeID, \ + value) \ + VALUES (?1, ?2, ?3, ?4)"); + this.__defineGetter__("_insertMessageAttributeStatement", + () => statement); + return this._insertMessageAttributeStatement; + }, + + get _deleteMessageAttributeStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM messageAttributes WHERE attributeID = ?1 AND value = ?2 \ + AND conversationID = ?3 AND messageID = ?4"); + this.__defineGetter__("_deleteMessageAttributeStatement", + () => statement); + return this._deleteMessageAttributeStatement; + }, + + /** + * Insert and remove attributes relating to a GlodaMessage. This is performed + * inside a pseudo-transaction (we create one if we aren't in one, using + * our _beginTransaction wrapper, but if we are in one, no additional + * meaningful semantics are added). + * No attempt is made to verify uniqueness of inserted attributes, either + * against the current database or within the provided list of attributes. + * The caller is responsible for ensuring that unwanted duplicates are + * avoided. + * + * @param aMessage The GlodaMessage the attributes belong to. This is used + * to provide the message id and conversation id. + * @param aAddDBAttributes A list of attribute tuples to add, where each tuple + * contains an attribute ID and a value. Lest you forget, an attribute ID + * corresponds to a row in the attribute definition table. The attribute + * definition table stores the 'parameter' for the attribute, if any. + * (Which is to say, our frequent Attribute-Parameter-Value triple has + * the Attribute-Parameter part distilled to a single attribute id.) + * @param aRemoveDBAttributes A list of attribute tuples to remove. + */ + adjustMessageAttributes: function gloda_ds_adjustMessageAttributes(aMessage, + aAddDBAttributes, aRemoveDBAttributes) { + let imas = this._insertMessageAttributeStatement; + let dmas = this._deleteMessageAttributeStatement; + this._beginTransaction(); + try { + for (let iAttrib = 0; iAttrib < aAddDBAttributes.length; iAttrib++) { + let attribValueTuple = aAddDBAttributes[iAttrib]; + + imas.bindInt64Parameter(0, aMessage.conversationID); + imas.bindInt64Parameter(1, aMessage.id); + imas.bindInt64Parameter(2, attribValueTuple[0]); + // use 0 instead of null, otherwise the db gets upset. (and we don't + // really care anyways.) + if (attribValueTuple[1] == null) + imas.bindInt64Parameter(3, 0); + else if (Math.floor(attribValueTuple[1]) == attribValueTuple[1]) + imas.bindInt64Parameter(3, attribValueTuple[1]); + else + imas.bindDoubleParameter(3, attribValueTuple[1]); + imas.executeAsync(this.trackAsync()); + } + + for (let iAttrib = 0; iAttrib < aRemoveDBAttributes.length; iAttrib++) { + let attribValueTuple = aRemoveDBAttributes[iAttrib]; + + dmas.bindInt64Parameter(0, attribValueTuple[0]); + // use 0 instead of null, otherwise the db gets upset. (and we don't + // really care anyways.) + if (attribValueTuple[1] == null) + dmas.bindInt64Parameter(1, 0); + else if (Math.floor(attribValueTuple[1]) == attribValueTuple[1]) + dmas.bindInt64Parameter(1, attribValueTuple[1]); + else + dmas.bindDoubleParameter(1, attribValueTuple[1]); + dmas.bindInt64Parameter(2, aMessage.conversationID); + dmas.bindInt64Parameter(3, aMessage.id); + dmas.executeAsync(this.trackAsync()); + } + + this._commitTransaction(); + } + catch (ex) { + this._log.error("adjustMessageAttributes:", ex); + this._rollbackTransaction(); + throw ex; + } + }, + + get _deleteMessageAttributesByMessageIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM messageAttributes WHERE messageID = ?1"); + this.__defineGetter__("_deleteMessageAttributesByMessageIDStatement", + () => statement); + return this._deleteMessageAttributesByMessageIDStatement; + }, + + /** + * Clear all the message attributes for a given GlodaMessage. No changes + * are made to the in-memory representation of the message; it is up to the + * caller to ensure that it handles things correctly. + * + * @param aMessage The GlodaMessage whose database attributes should be + * purged. + */ + clearMessageAttributes: function gloda_ds_clearMessageAttributes(aMessage) { + if (aMessage.id != null) { + this._deleteMessageAttributesByMessageIDStatement.bindInt64Parameter(0, + aMessage.id); + this._deleteMessageAttributesByMessageIDStatement.executeAsync( + this.trackAsync()); + } + }, + + _stringSQLQuoter: function(aString) { + return "'" + aString.replace(/\'/g, "''") + "'"; + }, + _numberQuoter: function(aNum) { + return aNum; + }, + + /* ===== Generic Attribute Support ===== */ + adjustAttributes: function gloda_ds_adjustAttributes(aItem, aAddDBAttributes, + aRemoveDBAttributes) { + let nounDef = aItem.NOUN_DEF; + let dbMeta = nounDef._dbMeta; + if (dbMeta.insertAttrStatement === undefined) { + dbMeta.insertAttrStatement = this._createAsyncStatement( + "INSERT INTO " + nounDef.attrTableName + + " (" + nounDef.attrIDColumnName + ", attributeID, value) " + + " VALUES (?1, ?2, ?3)"); + // we always create this at the same time (right here), no need to check + dbMeta.deleteAttrStatement = this._createAsyncStatement( + "DELETE FROM " + nounDef.attrTableName + " WHERE " + + " attributeID = ?1 AND value = ?2 AND " + + nounDef.attrIDColumnName + " = ?3"); + } + + let ias = dbMeta.insertAttrStatement; + let das = dbMeta.deleteAttrStatement; + this._beginTransaction(); + try { + for (let iAttr = 0; iAttr < aAddDBAttributes.length; iAttr++) { + let attribValueTuple = aAddDBAttributes[iAttr]; + + ias.bindInt64Parameter(0, aItem.id); + ias.bindInt64Parameter(1, attribValueTuple[0]); + // use 0 instead of null, otherwise the db gets upset. (and we don't + // really care anyways.) + if (attribValueTuple[1] == null) + ias.bindInt64Parameter(2, 0); + else if (Math.floor(attribValueTuple[1]) == attribValueTuple[1]) + ias.bindInt64Parameter(2, attribValueTuple[1]); + else + ias.bindDoubleParameter(2, attribValueTuple[1]); + ias.executeAsync(this.trackAsync()); + } + + for (let iAttr = 0; iAttr < aRemoveDBAttributes.length; iAttr++) { + let attribValueTuple = aRemoveDBAttributes[iAttr]; + + das.bindInt64Parameter(0, attribValueTuple[0]); + // use 0 instead of null, otherwise the db gets upset. (and we don't + // really care anyways.) + if (attribValueTuple[1] == null) + das.bindInt64Parameter(1, 0); + else if (Math.floor(attribValueTuple[1]) == attribValueTuple[1]) + das.bindInt64Parameter(1, attribValueTuple[1]); + else + das.bindDoubleParameter(1, attribValueTuple[1]); + das.bindInt64Parameter(2, aItem.id); + das.executeAsync(this.trackAsync()); + } + + this._commitTransaction(); + } + catch (ex) { + this._log.error("adjustAttributes:", ex); + this._rollbackTransaction(); + throw ex; + } + }, + + clearAttributes: function gloda_ds_clearAttributes(aItem) { + let nounDef = aItem.NOUN_DEF; + let dbMeta = nounMeta._dbMeta; + if (dbMeta.clearAttrStatement === undefined) { + dbMeta.clearAttrStatement = this._createAsyncStatement( + "DELETE FROM " + nounDef.attrTableName + " WHERE " + + nounDef.attrIDColumnName + " = ?1"); + } + + if (aItem.id != null) { + dbMeta.clearAttrStatement.bindInt64Parameter(0, aItem.id); + dbMeta.clearAttrStatement.executeAsync(this.trackAsync()); + } + }, + + /** + * escapeStringForLIKE is only available on statements, and sometimes we want + * to use it before we create our statement, so we create a statement just + * for this reason. + */ + get _escapeLikeStatement() { + let statement = this._createAsyncStatement("SELECT 0"); + this.__defineGetter__("_escapeLikeStatement", () => statement); + return this._escapeLikeStatement; + }, + + _convertToDBValuesAndGroupByAttributeID: + function* gloda_ds__convertToDBValuesAndGroupByAttributeID(aAttrDef, + aValues) { + let objectNounDef = aAttrDef.objectNounDef; + if (!objectNounDef.usesParameter) { + let dbValues = []; + for (let iValue = 0; iValue < aValues.length; iValue++) { + let value = aValues[iValue]; + // If the empty set is significant and it's an empty signifier, emit + // the appropriate dbvalue. + if (value == null && aAttrDef.emptySetIsSignificant) { + yield [this.kEmptySetAttrId, [aAttrDef.id]]; + // Bail if the only value was us; we don't want to add a + // value-posessing wildcard into the mix. + if (aValues.length == 1) + return; + continue; + } + let dbValue = objectNounDef.toParamAndValue(value)[1]; + if (dbValue != null) + dbValues.push(dbValue); + } + yield [aAttrDef.special ? undefined : aAttrDef.id, dbValues]; + return; + } + + let curParam, attrID, dbValues; + let attrDBDef = aAttrDef.dbDef; + for (let iValue = 0; iValue < aValues.length; iValue++) { + let value = aValues[iValue]; + // If the empty set is significant and it's an empty signifier, emit + // the appropriate dbvalue. + if (value == null && aAttrDef.emptySetIsSignificant) { + yield [this.kEmptySetAttrId, [aAttrDef.id]]; + // Bail if the only value was us; we don't want to add a + // value-posessing wildcard into the mix. + if (aValues.length == 1) + return; + continue; + } + let [dbParam, dbValue] = objectNounDef.toParamAndValue(value); + if (curParam === undefined) { + curParam = dbParam; + attrID = attrDBDef.bindParameter(curParam); + if (dbValue != null) + dbValues = [dbValue]; + else + dbValues = []; + } + else if (curParam == dbParam) { + if (dbValue != null) + dbValues.push(dbValue); + } + else { + yield [attrID, dbValues]; + curParam = dbParam; + attrID = attrDBDef.bindParameter(curParam); + if (dbValue != null) + dbValues = [dbValue]; + else + dbValues = []; + } + } + if (dbValues !== undefined) + yield [attrID, dbValues]; + }, + + _convertRangesToDBStringsAndGroupByAttributeID: + function* gloda_ds__convertRangesToDBStringsAndGroupByAttributeID(aAttrDef, + aValues, aValueColumnName) { + let objectNounDef = aAttrDef.objectNounDef; + if (!objectNounDef.usesParameter) { + let dbStrings = []; + for (let iValue = 0; iValue < aValues.length; iValue++) { + let [lowerVal, upperVal] = aValues[iValue]; + // they both can't be null. that is the law. + if (lowerVal == null) + dbStrings.push(aValueColumnName + " <= " + + objectNounDef.toParamAndValue(upperVal)[1]); + else if (upperVal == null) + dbStrings.push(aValueColumnName + " >= " + + objectNounDef.toParamAndValue(lowerVal)[1]); + else // no one is null! + dbStrings.push(aValueColumnName + " BETWEEN " + + objectNounDef.toParamAndValue(lowerVal)[1] + " AND " + + objectNounDef.toParamAndValue(upperVal)[1]); + } + yield [aAttrDef.special ? undefined : aAttrDef.id, dbStrings]; + return; + } + + let curParam, attrID, dbStrings; + let attrDBDef = aAttrDef.dbDef; + for (let iValue = 0; iValue < aValues.length; iValue++) { + let [lowerVal, upperVal] = aValues[iValue]; + + let dbString, dbParam, lowerDBVal, upperDBVal; + // they both can't be null. that is the law. + if (lowerVal == null) { + [dbParam, upperDBVal] = objectNounDef.toParamAndValue(upperVal); + dbString = aValueColumnName + " <= " + upperDBVal; + } + else if (upperVal == null) { + [dbParam, lowerDBVal] = objectNounDef.toParamAndValue(lowerVal); + dbString = aValueColumnName + " >= " + lowerDBVal; + } + else { // no one is null! + [dbParam, lowerDBVal] = objectNounDef.toParamAndValue(lowerVal); + dbString = aValueColumnName + " BETWEEN " + lowerDBVal + " AND " + + objectNounDef.toParamAndValue(upperVal)[1]; + } + + if (curParam === undefined) { + curParam = dbParam; + attrID = attrDBDef.bindParameter(curParam); + dbStrings = [dbString]; + } + else if (curParam === dbParam) { + dbStrings.push(dbString); + } + else { + yield [attrID, dbStrings]; + curParam = dbParam; + attrID = attrDBDef.bindParameter(curParam); + dbStrings = [dbString]; + } + } + if (dbStrings !== undefined) + yield [attrID, dbStrings]; + }, + + /** + * Perform a database query given a GlodaQueryClass instance that specifies + * a set of constraints relating to the noun type associated with the query. + * A GlodaCollection is returned containing the results of the look-up. + * By default the collection is "live", and will mutate (generating events to + * its listener) as the state of the database changes. + * This functionality is made user/extension visible by the Query's + * getCollection (asynchronous). + * + * @param [aArgs] See |GlodaQuery.getCollection| for info. + */ + queryFromQuery: function gloda_ds_queryFromQuery(aQuery, aListener, + aListenerData, aExistingCollection, aMasterCollection, aArgs) { + // when changing this method, be sure that GlodaQuery's testMatch function + // likewise has its changes made. + let nounDef = aQuery._nounDef; + + let whereClauses = []; + let unionQueries = [aQuery].concat(aQuery._unions); + let boundArgs = []; + + // Use the dbQueryValidityConstraintSuffix to provide constraints that + // filter items down to those that are valid for the query mechanism to + // return. For example, in the case of messages, deleted or ghost + // messages should not be returned by this query layer. We require + // hand-rolled SQL to do that for now. + let validityConstraintSuffix; + if (nounDef.dbQueryValidityConstraintSuffix && + !aQuery.options.noDbQueryValidityConstraints) + validityConstraintSuffix = nounDef.dbQueryValidityConstraintSuffix; + else + validityConstraintSuffix = ""; + + for (let iUnion = 0; iUnion < unionQueries.length; iUnion++) { + let curQuery = unionQueries[iUnion]; + let selects = []; + + let lastConstraintWasSpecial = false; + let curConstraintIsSpecial; + + for (let iConstraint = 0; iConstraint < curQuery._constraints.length; + iConstraint++) { + let constraint = curQuery._constraints[iConstraint]; + let [constraintType, attrDef] = constraint; + let constraintValues = constraint.slice(2); + + let tableName, idColumnName, tableColumnName, valueColumnName; + if (constraintType == this.kConstraintIdIn) { + // we don't need any of the next cases' setup code, and we especially + // would prefer that attrDef isn't accessed since it's null for us. + } + else if (attrDef.special) { + tableName = nounDef.tableName; + idColumnName = "id"; // canonical id for a table is "id". + valueColumnName = attrDef.specialColumnName; + curConstraintIsSpecial = true; + } + else { + tableName = nounDef.attrTableName; + idColumnName = nounDef.attrIDColumnName; + valueColumnName = "value"; + curConstraintIsSpecial = false; + } + + let select = null, test = null, bindArgs = null; + if (constraintType === this.kConstraintIdIn) { + // this is somewhat of a trick. this does mean that this can be the + // only constraint. Namely, our idiom is: + // SELECT * FROM blah WHERE id IN (a INTERSECT b INTERSECT c) + // but if we only have 'a', then that becomes "...IN (a)", and if + // 'a' is not a select but a list of id's... tricky, no? + select = constraintValues.join(","); + } + // @testpoint gloda.datastore.sqlgen.kConstraintIn + else if (constraintType === this.kConstraintIn) { + let clauses = []; + for (let [attrID, values] of + this._convertToDBValuesAndGroupByAttributeID(attrDef, + constraintValues)) { + let clausePart; + if (attrID !== undefined) + clausePart = "(attributeID = " + attrID + + (values.length ? " AND " : ""); + else + clausePart = "("; + if (values.length) { + // strings need to be escaped, we would use ? binding, except + // that gets mad if we have too many strings... so we use our + // own escaping logic. correctly escaping is easy, but it still + // feels wrong to do it. (just double the quote character...) + if (attrDef.special == this.kSpecialString) + clausePart += valueColumnName + " IN (" + + values.map(v => "'" + v.replace(/\'/g, "''") + "'"). + join(",") + "))"; + else + clausePart += valueColumnName + " IN (" + values.join(",") + + "))"; + } + else + clausePart += ")"; + clauses.push(clausePart); + } + test = clauses.join(" OR "); + } + // @testpoint gloda.datastore.sqlgen.kConstraintRanges + else if (constraintType === this.kConstraintRanges) { + let clauses = []; + for (let [attrID, dbStrings] of + this._convertRangesToDBStringsAndGroupByAttributeID(attrDef, + constraintValues, valueColumnName)) { + if (attrID !== undefined) + clauses.push("(attributeID = " + attrID + + " AND (" + dbStrings.join(" OR ") + "))"); + else + clauses.push("(" + dbStrings.join(" OR ") + ")"); + } + test = clauses.join(" OR "); + } + // @testpoint gloda.datastore.sqlgen.kConstraintEquals + else if (constraintType === this.kConstraintEquals) { + let clauses = []; + for (let [attrID, values] of + this._convertToDBValuesAndGroupByAttributeID(attrDef, + constraintValues)) { + if (attrID !== undefined) + clauses.push("(attributeID = " + attrID + + " AND (" + values.map(_ => valueColumnName + " = ?"). + join(" OR ") + "))"); + else + clauses.push("(" + values.map(_ => valueColumnName + " = ?"). + join(" OR ") + ")"); + boundArgs.push.apply(boundArgs, values); + } + test = clauses.join(" OR "); + } + // @testpoint gloda.datastore.sqlgen.kConstraintStringLike + else if (constraintType === this.kConstraintStringLike) { + let likePayload = ''; + for (let valuePart of constraintValues) { + if (typeof valuePart == "string") + likePayload += this._escapeLikeStatement.escapeStringForLIKE( + valuePart, "/"); + else + likePayload += "%"; + } + test = valueColumnName + " LIKE ? ESCAPE '/'"; + boundArgs.push(likePayload); + } + // @testpoint gloda.datastore.sqlgen.kConstraintFulltext + else if (constraintType === this.kConstraintFulltext) { + let matchStr = constraintValues[0]; + select = "SELECT docid FROM " + nounDef.tableName + "Text" + + " WHERE " + attrDef.specialColumnName + " MATCH ?"; + boundArgs.push(matchStr); + } + + if (curConstraintIsSpecial && lastConstraintWasSpecial && test) { + selects[selects.length-1] += " AND " + test; + } + else if (select) + selects.push(select); + else if (test) { + select = "SELECT " + idColumnName + " FROM " + tableName + " WHERE " + + test; + selects.push(select); + } + else + this._log.warn("Unable to translate constraint of type " + + constraintType + " on attribute bound as " + nounDef.name); + + lastConstraintWasSpecial = curConstraintIsSpecial; + } + + if (selects.length) + whereClauses.push("id IN (" + selects.join(" INTERSECT ") + ")" + + validityConstraintSuffix); + } + + let sqlString = "SELECT * FROM " + nounDef.tableName; + if (!aQuery.options.noMagic) { + if (aQuery.options.noDbQueryValidityConstraints && + nounDef.dbQueryJoinMagicWithNoValidityConstraints) + sqlString += nounDef.dbQueryJoinMagicWithNoValidityConstraints; + else if (nounDef.dbQueryJoinMagic) + sqlString += nounDef.dbQueryJoinMagic; + } + + if (whereClauses.length) + sqlString += " WHERE (" + whereClauses.join(") OR (") + ")"; + + if (aQuery.options.explicitSQL) + sqlString = aQuery.options.explicitSQL; + + if (aQuery.options.outerWrapColumns) + sqlString = "SELECT *, " + aQuery.options.outerWrapColumns.join(", ") + + " FROM (" + sqlString + ")"; + + if (aQuery._order.length) { + let orderClauses = []; + for (let [, colName] in Iterator(aQuery._order)) { + if (colName.startsWith("-")) + orderClauses.push(colName.substring(1) + " DESC"); + else + orderClauses.push(colName + " ASC"); + } + sqlString += " ORDER BY " + orderClauses.join(", "); + } + + if (aQuery._limit) { + if (!("limitClauseAlreadyIncluded" in aQuery.options)) + sqlString += " LIMIT ?"; + boundArgs.push(aQuery._limit); + } + + this._log.debug("QUERY FROM QUERY: " + sqlString + " ARGS: " + boundArgs); + + // if we want to become explicit, replace the query (which has already + // provided our actual SQL query) with an explicit query. This will be + // what gets attached to the collection in the event we create a new + // collection. If we are reusing one, we assume that the explicitness, + // if desired, already happened. + // (we do not need to pass an argument to the explicitQueryClass constructor + // because it will be passed in to the collection's constructor, which will + // ensure that the collection attribute gets set.) + if (aArgs && ("becomeExplicit" in aArgs) && aArgs.becomeExplicit) + aQuery = new nounDef.explicitQueryClass(); + else if (aArgs && ("becomeNull" in aArgs) && aArgs.becomeNull) + aQuery = new nounDef.nullQueryClass(); + + return this._queryFromSQLString(sqlString, boundArgs, nounDef, aQuery, + aListener, aListenerData, aExistingCollection, aMasterCollection); + }, + + _queryFromSQLString: function gloda_ds__queryFromSQLString(aSqlString, + aBoundArgs, aNounDef, aQuery, aListener, aListenerData, + aExistingCollection, aMasterCollection) { + let statement = this._createAsyncStatement(aSqlString, true); + for (let [iBinding, bindingValue] in Iterator(aBoundArgs)) { + this._bindVariant(statement, iBinding, bindingValue); + } + + let collection; + if (aExistingCollection) + collection = aExistingCollection; + else { + collection = new GlodaCollection(aNounDef, [], aQuery, aListener, + aMasterCollection); + GlodaCollectionManager.registerCollection(collection); + // we don't want to overwrite the existing listener or its data, but this + // does raise the question about what should happen if we get passed in + // a different listener and/or data. + if (aListenerData !== undefined) + collection.data = aListenerData; + } + if (aListenerData) { + if (collection.dataStack) + collection.dataStack.push(aListenerData); + else + collection.dataStack = [aListenerData]; + } + + statement.executeAsync(new QueryFromQueryCallback(statement, aNounDef, + collection)); + statement.finalize(); + return collection; + }, + + /** + * + * + */ + loadNounItem: function gloda_ds_loadNounItem(aItem, aReferencesByNounID, + aInverseReferencesByNounID) { + let attribIDToDBDefAndParam = this._attributeIDToDBDefAndParam; + + let hadDeps = aItem._deps != null; + let deps = aItem._deps || {}; + let hasDeps = false; + + //this._log.debug(" hadDeps: " + hadDeps + " deps: " + + // Log4Moz.enumerateProperties(deps).join(",")); + + for (let attrib of aItem.NOUN_DEF.specialLoadAttribs) { + let objectNounDef = attrib.objectNounDef; + + if (attrib.special === this.kSpecialColumnChildren) { + let invReferences = aInverseReferencesByNounID[objectNounDef.id]; + if (invReferences === undefined) + invReferences = aInverseReferencesByNounID[objectNounDef.id] = {}; + // only contribute if it's not already pending or there + if (!(attrib.id in deps) && aItem[attrib.storageAttributeName] == null){ + //this._log.debug(" Adding inv ref for: " + aItem.id); + if (!(aItem.id in invReferences)) + invReferences[aItem.id] = null; + deps[attrib.id] = null; + hasDeps = true; + } + } + else if (attrib.special === this.kSpecialColumnParent) { + let references = aReferencesByNounID[objectNounDef.id]; + if (references === undefined) + references = aReferencesByNounID[objectNounDef.id] = {}; + // nothing to contribute if it's already there + if (!(attrib.id in deps) && + aItem[attrib.valueStorageAttributeName] == null) { + let parentID = aItem[attrib.idStorageAttributeName]; + if (!(parentID in references)) + references[parentID] = null; + //this._log.debug(" Adding parent ref for: " + + // aItem[attrib.idStorageAttributeName]); + deps[attrib.id] = null; + hasDeps = true; + } + else { + this._log.debug(" paranoia value storage: " + aItem[attrib.valueStorageAttributeName]); + } + } + } + + // bail here if arbitrary values are not allowed, there just is no + // encoded json, or we already had dependencies for this guy, implying + // the json pass has already been performed + if (!aItem.NOUN_DEF.allowsArbitraryAttrs || !aItem._jsonText || hadDeps) { + if (hasDeps) + aItem._deps = deps; + return hasDeps; + } + + //this._log.debug(" load json: " + aItem._jsonText); + let jsonDict = JSON.parse(aItem._jsonText); + delete aItem._jsonText; + + // Iterate over the attributes on the item + for (let attribId in jsonDict) { + let jsonValue = jsonDict[attribId]; + // It is technically impossible for attribute ids to go away at this + // point in time. This would require someone to monkey around with + // our schema. But we will introduce this functionality one day, so + // prepare for it now. + if (!(attribId in attribIDToDBDefAndParam)) + continue; + // find the attribute definition that corresponds to this key + let dbAttrib = attribIDToDBDefAndParam[attribId][0]; + + let attrib = dbAttrib.attrDef; + // The attribute definition will fail to exist if no one defines the + // attribute anymore. This can happen for many reasons: an extension + // was uninstalled, an extension was changed and no longer defines the + // attribute, or patches are being applied/unapplied. Ignore this + // attribute if missing. + if (attrib == null) + continue; + let objectNounDef = attrib.objectNounDef; + + // If it has a tableName member but no fromJSON, then it's a persistent + // object that needs to be loaded, which also means we need to hold it in + // a collection owned by our collection. + // (If it has a fromJSON method, then it's a special case like + // MimeTypeNoun where it is authoritatively backed by a table but caches + // everything into memory. There is no case where fromJSON would be + // implemented but we should still be doing database lookups.) + if (objectNounDef.tableName && !objectNounDef.fromJSON) { + let references = aReferencesByNounID[objectNounDef.id]; + if (references === undefined) + references = aReferencesByNounID[objectNounDef.id] = {}; + + if (attrib.singular) { + if (!(jsonValue in references)) + references[jsonValue] = null; + } + else { + for (let key in jsonValue) { + let anID = jsonValue[key]; + if (!(anID in references)) + references[anID] = null; + } + } + + deps[attribId] = jsonValue; + hasDeps = true; + } + /* if it has custom contribution logic, use it */ + else if (objectNounDef.contributeObjDependencies) { + if (objectNounDef.contributeObjDependencies(jsonValue, + aReferencesByNounID, aInverseReferencesByNounID)) { + deps[attribId] = jsonValue; + hasDeps = true; + } + else // just propagate the value, it's some form of simple sentinel + aItem[attrib.boundName] = jsonValue; + } + // otherwise, the value just needs to be de-persisted, or... + else if (objectNounDef.fromJSON) { + if (attrib.singular) { + // For consistency with the non-singular case, we don't assign the + // attribute if undefined is returned. + let deserialized = objectNounDef.fromJSON(jsonValue, aItem); + if (deserialized !== undefined) + aItem[attrib.boundName] = deserialized; + } + else { + // Convert all the entries in the list filtering out any undefined + // values. (TagNoun will do this if the tag is now dead.) + let outList = []; + for (let key in jsonValue) { + let val = jsonValue[key]; + let deserialized = objectNounDef.fromJSON(val, aItem); + if (deserialized !== undefined) + outList.push(deserialized); + } + // Note: It's possible if we filtered things out that this is an empty + // list. This is acceptable because this is somewhat of an unusual + // case and I don't think we want to further complicate our + // semantics. + aItem[attrib.boundName] = outList; + } + } + // it's fine as is + else + aItem[attrib.boundName] = jsonValue; + } + + if (hasDeps) + aItem._deps = deps; + return hasDeps; + }, + + loadNounDeferredDeps: function gloda_ds_loadNounDeferredDeps(aItem, + aReferencesByNounID, aInverseReferencesByNounID) { + if (aItem._deps === undefined) + return; + + //this._log.debug(" loading deferred, deps: " + + // Log4Moz.enumerateProperties(aItem._deps).join(",")); + + + let attribIDToDBDefAndParam = this._attributeIDToDBDefAndParam; + + for (let [attribId, jsonValue] in Iterator(aItem._deps)) { + let dbAttrib = attribIDToDBDefAndParam[attribId][0]; + let attrib = dbAttrib.attrDef; + + let objectNounDef = attrib.objectNounDef; + let references = aReferencesByNounID[objectNounDef.id]; + if (attrib.special) { + if (attrib.special === this.kSpecialColumnChildren) { + let inverseReferences = aInverseReferencesByNounID[objectNounDef.id]; + //this._log.info("inverse assignment: " + objectNounDef.id + + // " of " + aItem.id) + aItem[attrib.storageAttributeName] = inverseReferences[aItem.id]; + } + else if (attrib.special === this.kSpecialColumnParent) { + //this._log.info("parent column load: " + objectNounDef.id + + // " storage value: " + aItem[attrib.idStorageAttributeName]); + aItem[attrib.valueStorageAttributeName] = + references[aItem[attrib.idStorageAttributeName]]; + } + } + else if (objectNounDef.tableName) { + //this._log.info("trying to load: " + objectNounDef.id + " refs: " + + // jsonValue + ": " + Log4Moz.enumerateProperties(jsonValue).join(",")); + if (attrib.singular) + aItem[attrib.boundName] = references[jsonValue]; + else + aItem[attrib.boundName] = Object.keys(jsonValue). + map(key => references[jsonValue[key]]); + } + else if (objectNounDef.contributeObjDependencies) { + aItem[attrib.boundName] = + objectNounDef.resolveObjDependencies(jsonValue, aReferencesByNounID, + aInverseReferencesByNounID); + } + // there is no other case + } + + delete aItem._deps; + }, + + /* ********** Contact ********** */ + _nextContactId: 1, + + _populateContactManagedId: function () { + let stmt = this._createSyncStatement("SELECT MAX(id) FROM contacts", true); + if (stmt.executeStep()) { // no chance of this SQLITE_BUSY on this call + this._nextContactId = stmt.getInt64(0) + 1; + } + stmt.finalize(); + }, + + get _insertContactStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO contacts (id, directoryUUID, contactUUID, name, popularity,\ + frecency, jsonAttributes) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)"); + this.__defineGetter__("_insertContactStatement", () => statement); + return this._insertContactStatement; + }, + + createContact: function gloda_ds_createContact(aDirectoryUUID, aContactUUID, + aName, aPopularity, aFrecency) { + let contactID = this._nextContactId++; + let contact = new GlodaContact(this, contactID, + aDirectoryUUID, aContactUUID, aName, + aPopularity, aFrecency); + return contact; + }, + + insertContact: function gloda_ds_insertContact(aContact) { + let ics = this._insertContactStatement; + ics.bindInt64Parameter(0, aContact.id); + if (aContact.directoryUUID == null) + ics.bindNullParameter(1); + else + ics.bindStringParameter(1, aContact.directoryUUID); + if (aContact.contactUUID == null) + ics.bindNullParameter(2); + else + ics.bindStringParameter(2, aContact.contactUUID); + ics.bindStringParameter(3, aContact.name); + ics.bindInt64Parameter(4, aContact.popularity); + ics.bindInt64Parameter(5, aContact.frecency); + if (aContact._jsonText) + ics.bindStringParameter(6, aContact._jsonText); + else + ics.bindNullParameter(6); + + ics.executeAsync(this.trackAsync()); + + return aContact; + }, + + get _updateContactStatement() { + let statement = this._createAsyncStatement( + "UPDATE contacts SET directoryUUID = ?1, \ + contactUUID = ?2, \ + name = ?3, \ + popularity = ?4, \ + frecency = ?5, \ + jsonAttributes = ?6 \ + WHERE id = ?7"); + this.__defineGetter__("_updateContactStatement", () => statement); + return this._updateContactStatement; + }, + + updateContact: function gloda_ds_updateContact(aContact) { + let ucs = this._updateContactStatement; + ucs.bindInt64Parameter(6, aContact.id); + ucs.bindStringParameter(0, aContact.directoryUUID); + ucs.bindStringParameter(1, aContact.contactUUID); + ucs.bindStringParameter(2, aContact.name); + ucs.bindInt64Parameter(3, aContact.popularity); + ucs.bindInt64Parameter(4, aContact.frecency); + if (aContact._jsonText) + ucs.bindStringParameter(5, aContact._jsonText); + else + ucs.bindNullParameter(5); + + ucs.executeAsync(this.trackAsync()); + }, + + _contactFromRow: function gloda_ds_contactFromRow(aRow) { + let directoryUUID, contactUUID, jsonText; + if (aRow.getTypeOfIndex(1) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + directoryUUID = null; + else + directoryUUID = aRow.getString(1); + if (aRow.getTypeOfIndex(2) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + contactUUID = null; + else + contactUUID = aRow.getString(2); + if (aRow.getTypeOfIndex(6) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) + jsonText = undefined; + else + jsonText = aRow.getString(6); + + return new GlodaContact(this, aRow.getInt64(0), directoryUUID, + contactUUID, aRow.getString(5), + aRow.getInt64(3), aRow.getInt64(4), jsonText); + }, + + get _selectContactByIDStatement() { + let statement = this._createSyncStatement( + "SELECT * FROM contacts WHERE id = ?1"); + this.__defineGetter__("_selectContactByIDStatement", + () => statement); + return this._selectContactByIDStatement; + }, + + /** + * Synchronous contact lookup currently only for use by gloda's creation + * of the concept of "me". It is okay for it to be doing synchronous work + * because it is part of the startup process before any user code could + * have gotten a reference to Gloda, but no one else should do this. + */ + getContactByID: function gloda_ds_getContactByID(aContactID) { + let contact = GlodaCollectionManager.cacheLookupOne( + GlodaContact.prototype.NOUN_ID, aContactID); + + if (contact === null) { + let scbi = this._selectContactByIDStatement; + scbi.bindInt64Parameter(0, aContactID); + if (this._syncStep(scbi)) { + contact = this._contactFromRow(scbi); + GlodaCollectionManager.itemLoaded(contact); + } + scbi.reset(); + } + + return contact; + }, + + /* ********** Identity ********** */ + /** next identity id, managed for async use reasons. */ + _nextIdentityId: 1, + _populateIdentityManagedId: function () { + let stmt = this._createSyncStatement( + "SELECT MAX(id) FROM identities", true); + if (stmt.executeStep()) { // no chance of this SQLITE_BUSY on this call + this._nextIdentityId = stmt.getInt64(0) + 1; + } + stmt.finalize(); + }, + + get _insertIdentityStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO identities (id, contactID, kind, value, description, relay) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)"); + this.__defineGetter__("_insertIdentityStatement", () => statement); + return this._insertIdentityStatement; + }, + + createIdentity: function gloda_ds_createIdentity(aContactID, aContact, aKind, + aValue, aDescription, + aIsRelay) { + let identityID = this._nextIdentityId++; + let iis = this._insertIdentityStatement; + iis.bindInt64Parameter(0, identityID); + iis.bindInt64Parameter(1, aContactID); + iis.bindStringParameter(2, aKind); + iis.bindStringParameter(3, aValue); + iis.bindStringParameter(4, aDescription); + iis.bindInt64Parameter(5, aIsRelay ? 1 : 0); + iis.executeAsync(this.trackAsync()); + + let identity = new GlodaIdentity(this, identityID, + aContactID, aContact, aKind, aValue, + aDescription, aIsRelay); + GlodaCollectionManager.itemsAdded(identity.NOUN_ID, [identity]); + return identity; + }, + + _identityFromRow: function gloda_ds_identityFromRow(aRow) { + return new GlodaIdentity(this, aRow.getInt64(0), aRow.getInt64(1), null, + aRow.getString(2), aRow.getString(3), + aRow.getString(4), + aRow.getInt32(5) ? true : false); + }, + + get _selectIdentityByKindValueStatement() { + let statement = this._createSyncStatement( + "SELECT * FROM identities WHERE kind = ?1 AND value = ?2"); + this.__defineGetter__("_selectIdentityByKindValueStatement", + () => statement); + return this._selectIdentityByKindValueStatement; + }, + + /** + * Synchronous lookup of an identity by kind and value, only for use by + * the legacy gloda core code that creates a concept of "me". + * Ex: (email, foo@example.com) + */ + getIdentity: function gloda_ds_getIdentity(aKind, aValue) { + let identity = GlodaCollectionManager.cacheLookupOneByUniqueValue( + GlodaIdentity.prototype.NOUN_ID, aKind + "@" + aValue); + + let ibkv = this._selectIdentityByKindValueStatement; + ibkv.bindStringParameter(0, aKind); + ibkv.bindStringParameter(1, aValue); + if (this._syncStep(ibkv)) { + identity = this._identityFromRow(ibkv); + GlodaCollectionManager.itemLoaded(identity); + } + ibkv.reset(); + + return identity; + }, +}; +GlodaAttributeDBDef.prototype._datastore = GlodaDatastore; +GlodaConversation.prototype._datastore = GlodaDatastore; +GlodaFolder.prototype._datastore = GlodaDatastore; +GlodaMessage.prototype._datastore = GlodaDatastore; +GlodaContact.prototype._datastore = GlodaDatastore; +GlodaIdentity.prototype._datastore = GlodaDatastore; diff --git a/mailnews/db/gloda/modules/dbview.js b/mailnews/db/gloda/modules/dbview.js new file mode 100644 index 000000000..4d34cf3af --- /dev/null +++ b/mailnews/db/gloda/modules/dbview.js @@ -0,0 +1,178 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file is charged with providing you a way to have a pretty gloda-backed + * nsIMsgDBView. + */ + +this.EXPORTED_SYMBOLS = ["GlodaSyntheticView"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource://gre/modules/Services.jsm"); +Cu.import("resource:///modules/gloda/log4moz.js"); + +Cu.import("resource:///modules/gloda/public.js"); +Cu.import("resource:///modules/gloda/msg_search.js"); + +/** + * Create a synthetic view suitable for passing to |FolderDisplayWidget.show|. + * You must pass a query, collection, or conversation in. + * + * @param {GlodaQuery} [aArgs.query] A gloda query to run. + * @param {GlodaCollection} [aArgs.collection] An already-populated collection + * to display. Do not call getCollection on a query and hand us that. We + * will not register ourselves as a listener and things will not work. + * @param {GlodaConversation} [aArgs.conversation] A conversation whose messages + * you want to display. + */ +function GlodaSyntheticView(aArgs) { + if ("query" in aArgs) { + this.query = aArgs.query; + this.collection = this.query.getCollection(this); + this.completed = false; + this.viewType = "global"; + } + else if ("collection" in aArgs) { + this.query = null; + this.collection = aArgs.collection; + this.completed = true; + this.viewType = "global"; + } + else if ("conversation" in aArgs) { + this.collection = aArgs.conversation.getMessagesCollection(this); + this.query = this.collection.query; + this.completed = false; + this.viewType = "conversation"; + } + else { + throw new Error("You need to pass a query or collection"); + } + + this.customColumns = []; +} +GlodaSyntheticView.prototype = { + defaultSort: [[Ci.nsMsgViewSortType.byDate, Ci.nsMsgViewSortOrder.descending]], + + /** + * Request the search be performed and notification provided to + * aSearchListener. If results are already available, they should + * be provided to aSearchListener without re-performing the search. + */ + search: function(aSearchListener, aCompletionCallback) { + this.searchListener = aSearchListener; + this.completionCallback = aCompletionCallback; + + this.searchListener.onNewSearch(); + if (this.completed) { + this.reportResults(this.collection.items); + // we're not really aborting, but it closes things out nicely + this.abortSearch(); + return; + } + }, + + abortSearch: function() { + if (this.searchListener) + this.searchListener.onSearchDone(Cr.NS_OK); + if (this.completionCallback) + this.completionCallback(); + this.searchListener = null; + this.completionCallback = null; + }, + + reportResults: function(aItems) { + for (let item of aItems) { + let hdr = item.folderMessage; + if (hdr) + this.searchListener.onSearchHit(hdr, hdr.folder); + } + }, + + /** + * Helper function used by |DBViewWrapper.getMsgHdrForMessageID| since there + * are no actual backing folders for it to check. + */ + getMsgHdrForMessageID: function(aMessageId) { + for (let item of this.collection.items) { + if (item.headerMessageID == aMessageId) { + let hdr = item.folderMessage; + if (hdr) + return hdr; + } + } + return null; + }, + + /** + * The default set of columns to show. + */ + DEFAULT_COLUMN_STATES: { + threadCol: { + visible: true, + }, + flaggedCol: { + visible: true, + }, + subjectCol: { + visible: true, + }, + correspondentCol: { + visible: Services.prefs.getBoolPref("mail.threadpane.use_correspondents"), + }, + senderCol: { + visible: !Services.prefs.getBoolPref("mail.threadpane.use_correspondents"), + }, + dateCol: { + visible: true, + }, + locationCol: { + visible: true, + }, + }, + + // --- settings persistence + getPersistedSetting: function(aSetting) { + try { + return JSON.parse(Services.prefs.getCharPref( + "mailnews.database.global.views." + this.viewType + "." + aSetting + )); + } + catch (e) { + return this.getDefaultSetting(aSetting); + } + }, + setPersistedSetting: function(aSetting, aValue) { + Services.prefs.setCharPref( + "mailnews.database.global.views." + this.viewType + "." + aSetting, + JSON.stringify(aValue) + ); + }, + getDefaultSetting: function(aSetting) { + if (aSetting == "columns") + return this.DEFAULT_COLUMN_STATES; + else + return undefined; + }, + + // --- collection listener + onItemsAdded: function(aItems, aCollection) { + if (this.searchListener) + this.reportResults(aItems); + }, + onItemsModified: function(aItems, aCollection) { + }, + onItemsRemoved: function(aItems, aCollection) { + }, + onQueryCompleted: function(aCollection) { + this.completed = true; + this.searchListener.onSearchDone(Cr.NS_OK); + if (this.completionCallback) + this.completionCallback(); + }, +}; diff --git a/mailnews/db/gloda/modules/everybody.js b/mailnews/db/gloda/modules/everybody.js new file mode 100644 index 000000000..b3332f473 --- /dev/null +++ b/mailnews/db/gloda/modules/everybody.js @@ -0,0 +1,50 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = []; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); +var LOG = Log4Moz.repository.getLogger("gloda.everybody"); + +var importNS = {}; + +function loadModule(aModuleURI, aNSContrib) { + try { + LOG.info("... loading " + aModuleURI); + Cu.import(aModuleURI, importNS); + } + catch (ex) { + LOG.error("!!! error loading " + aModuleURI); + LOG.error("(" + ex.fileName + ":" + ex.lineNumber + ") " + ex); + return false; + } + LOG.info("+++ loaded " + aModuleURI); + + if (aNSContrib) { + try { + importNS[aNSContrib].init(); + } + catch (ex) { + LOG.error("!!! error initializing " + aModuleURI); + LOG.error("(" + ex.fileName + ":" + ex.lineNumber + ") " + ex); + return false; + } + LOG.info("+++ inited " + aModuleURI); + } + return true; +} + +loadModule("resource:///modules/gloda/fundattr.js", "GlodaFundAttr"); +loadModule("resource:///modules/gloda/explattr.js", "GlodaExplicitAttr"); + +loadModule("resource:///modules/gloda/noun_tag.js"); +loadModule("resource:///modules/gloda/noun_freetag.js"); +loadModule("resource:///modules/gloda/noun_mimetype.js"); +loadModule("resource:///modules/gloda/index_msg.js"); +loadModule("resource:///modules/gloda/index_ab.js", "GlodaABAttrs"); diff --git a/mailnews/db/gloda/modules/explattr.js b/mailnews/db/gloda/modules/explattr.js new file mode 100644 index 000000000..851f88a11 --- /dev/null +++ b/mailnews/db/gloda/modules/explattr.js @@ -0,0 +1,193 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file provides the "explicit attribute" provider for messages. It is + * concerned with attributes that are the result of user actions. For example, + * whether a message is starred (flagged), message tags, whether it is + * read/unread, etc. + */ + +this.EXPORTED_SYMBOLS = ['GlodaExplicitAttr']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); +Cu.import("resource:///modules/StringBundle.js"); + +Cu.import("resource:///modules/gloda/utils.js"); +Cu.import("resource:///modules/gloda/gloda.js"); +Cu.import("resource:///modules/gloda/noun_tag.js"); +Cu.import("resource:///modules/mailServices.js"); + + +var nsMsgMessageFlags_Replied = Ci.nsMsgMessageFlags.Replied; +var nsMsgMessageFlags_Forwarded = Ci.nsMsgMessageFlags.Forwarded; + +var EXT_BUILTIN = "built-in"; + +/** + * @namespace Explicit attribute provider. Indexes/defines attributes that are + * explicitly a result of user action. This dubiously includes marking a + * message as read. + */ +var GlodaExplicitAttr = { + providerName: "gloda.explattr", + strings: new StringBundle("chrome://messenger/locale/gloda.properties"), + _log: null, + _msgTagService: null, + + init: function gloda_explattr_init() { + this._log = Log4Moz.repository.getLogger("gloda.explattr"); + + this._msgTagService = MailServices.tags; + + try { + this.defineAttributes(); + } + catch (ex) { + this._log.error("Error in init: " + ex); + throw ex; + } + }, + + /** Boost for starred messages. */ + NOTABILITY_STARRED: 16, + /** Boost for tagged messages, first tag. */ + NOTABILITY_TAGGED_FIRST: 8, + /** Boost for tagged messages, each additional tag. */ + NOTABILITY_TAGGED_ADDL: 1, + + defineAttributes: function() { + // Tag + this._attrTag = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrExplicit, + attributeName: "tag", + bindName: "tags", + singular: false, + emptySetIsSignificant: true, + facet: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_TAG, + parameterNoun: null, + // Property change notifications that we care about: + propertyChanges: ["keywords"], + }); // not-tested + + // Star + this._attrStar = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrExplicit, + attributeName: "star", + bindName: "starred", + singular: true, + facet: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_BOOLEAN, + parameterNoun: null, + }); // tested-by: test_attributes_explicit + // Read/Unread + this._attrRead = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrExplicit, + attributeName: "read", + // Make the message query-able but without using the database. + canQuery: "truthy-but-not-true", + singular: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_BOOLEAN, + parameterNoun: null, + }); // tested-by: test_attributes_explicit + + /** + * Has this message been replied to by the user. + */ + this._attrRepliedTo = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrExplicit, + attributeName: "repliedTo", + singular: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_BOOLEAN, + parameterNoun: null, + }); // tested-by: test_attributes_explicit + + /** + * Has this user forwarded this message to someone. + */ + this._attrForwarded = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrExplicit, + attributeName: "forwarded", + singular: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_BOOLEAN, + parameterNoun: null, + }); // tested-by: test_attributes_explicit + }, + + process: function* Gloda_explattr_process(aGlodaMessage, aRawReps, aIsNew, + aCallbackHandle) { + let aMsgHdr = aRawReps.header; + + aGlodaMessage.starred = aMsgHdr.isFlagged; + if (aGlodaMessage.starred) + aGlodaMessage.notability += this.NOTABILITY_STARRED; + + aGlodaMessage.read = aMsgHdr.isRead; + + let flags = aMsgHdr.flags; + aGlodaMessage.repliedTo = Boolean(flags & nsMsgMessageFlags_Replied); + aGlodaMessage.forwarded = Boolean(flags & nsMsgMessageFlags_Forwarded); + + let tags = aGlodaMessage.tags = []; + + // -- Tag + // build a map of the keywords + let keywords = aMsgHdr.getStringProperty("keywords"); + let keywordList = keywords.split(' '); + let keywordMap = {}; + for (let iKeyword = 0; iKeyword < keywordList.length; iKeyword++) { + let keyword = keywordList[iKeyword]; + keywordMap[keyword] = true; + } + + let tagArray = TagNoun.getAllTags(); + for (let iTag = 0; iTag < tagArray.length; iTag++) { + let tag = tagArray[iTag]; + if (tag.key in keywordMap) + tags.push(tag); + } + + if (tags.length) + aGlodaMessage.notability += this.NOTABILITY_TAGGED_FIRST + + (tags.length - 1) * this.NOTABILITY_TAGGED_ADDL; + + yield Gloda.kWorkDone; + }, + + /** + * Duplicates the notability logic from process(). Arguably process should + * be factored to call us, grokNounItem should be factored to call us, or we + * should get sufficiently fancy that our code wildly diverges. + */ + score: function Gloda_explattr_score(aMessage, aContext) { + let score = 0; + if (aMessage.starred) + score += this.NOTABILITY_STARRED; + if (aMessage.tags.length) + score += this.NOTABILITY_TAGGED_FIRST + + (aMessage.tags.length - 1) * this.NOTABILITY_TAGGED_ADDL; + return score; + }, +}; diff --git a/mailnews/db/gloda/modules/facet.js b/mailnews/db/gloda/modules/facet.js new file mode 100644 index 000000000..0f73d3d5b --- /dev/null +++ b/mailnews/db/gloda/modules/facet.js @@ -0,0 +1,582 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file provides faceting logic. + */ + +var EXPORTED_SYMBOLS = ["FacetDriver", "FacetUtils"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/public.js"); + +/** + * Decides the appropriate faceters for the noun type and drives the faceting + * process. This class and the faceters are intended to be reusable so that + * you only need one instance per faceting session. (Although each faceting + * pass is accordingly destructive to previous results.) + * + * Our strategy for faceting is to process one attribute at a time across all + * the items in the provided set. The alternative would be to iterate over + * the items and then iterate over the attributes on each item. While both + * approaches have caching downsides + */ +function FacetDriver(aNounDef, aWindow) { + this.nounDef = aNounDef; + this._window = aWindow; + + this._makeFaceters(); +} +FacetDriver.prototype = { + /** + * Populate |this.faceters| with a set of faceters appropriate to the noun + * definition associated with this instance. + */ + _makeFaceters: function() { + let faceters = this.faceters = []; + + function makeFaceter(aAttrDef, aFacetDef) { + let facetType = aFacetDef.type; + + if (aAttrDef.singular) { + if (facetType == "date") + faceters.push(new DateFaceter(aAttrDef, aFacetDef)); + else + faceters.push(new DiscreteFaceter(aAttrDef, aFacetDef)); + } + else { + if (facetType == "nonempty?") + faceters.push(new NonEmptySetFaceter(aAttrDef, aFacetDef)); + else + faceters.push(new DiscreteSetFaceter(aAttrDef, aFacetDef)); + } + } + + for (let key in this.nounDef.attribsByBoundName) { + let attrDef = this.nounDef.attribsByBoundName[key]; + // ignore attributes that do not want to be faceted + if (!attrDef.facet) + continue; + + makeFaceter(attrDef, attrDef.facet); + + if ("extraFacets" in attrDef) { + for (let facetDef of attrDef.extraFacets) { + makeFaceter(attrDef, facetDef); + } + } + } + }, + /** + * Asynchronously facet the provided items, calling the provided callback when + * completed. + */ + go: function FacetDriver_go(aItems, aCallback, aCallbackThis) { + this.items = aItems; + this.callback = aCallback; + this.callbackThis = aCallbackThis; + + this._nextFaceter = 0; + this._drive(); + }, + + _MAX_FACETING_TIMESLICE_MS: 100, + _FACETING_YIELD_DURATION_MS: 0, + _driveWrapper: function(aThis) { + aThis._drive(); + }, + _drive: function() { + let start = Date.now(); + + while (this._nextFaceter < this.faceters.length) { + let faceter = this.faceters[this._nextFaceter++]; + // for now we facet in one go, but the long-term plan allows for them to + // be generators. + faceter.facetItems(this.items); + + let delta = Date.now() - start; + if (delta > this._MAX_FACETING_TIMESLICE_MS) { + this._window.setTimeout(this._driveWrapper, + this._FACETING_YIELD_DURATION_MS, + this); + return; + } + } + + // we only get here once we are done with the faceters + this.callback.call(this.callbackThis); + } +}; + +var FacetUtils = { + _groupSizeComparator: function(a, b) { + return b[1].length - a[1].length; + }, + + /** + * Given a list where each entry is a tuple of [group object, list of items + * belonging to that group], produce a new list of the top grouped items. We + * used to also produce an "other" aggregation, but that turned out to be + * conceptually difficult to deal with, so that's gone, leaving this method + * with much less to do. + * + * @param aAttrDef The attribute for the facet we are working with. + * @param aGroups The list of groups built for the facet. + * @param aMaxCount The number of result rows you want back. + */ + makeTopGroups: function FacetUtils_makeTopGroups(aAttrDef, aGroups, + aMaxCount) { + let nounDef = aAttrDef.objectNounDef; + let realGroupsToUse = aMaxCount; + + let orderedBySize = aGroups.concat(); + orderedBySize.sort(this._groupSizeComparator); + + // - get the real groups to use and order them by the attribute comparator + let outGroups = orderedBySize.slice(0, realGroupsToUse); + let comparator = nounDef.comparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + outGroups.sort(comparatorHelper); + + return outGroups; + } +}; + +/** + * Facet discrete things like message authors, boolean values, etc. Only + * appropriate for use on singular values. Use |DiscreteSetFaceter| for + * non-singular values. + */ +function DiscreteFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DiscreteFaceter.prototype = { + type: "discrete", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems: function(aItems) { + if (this.attrDef.objectNounDef.isPrimitive) + return this.facetPrimitiveItems(aItems); + else + return this.facetComplexItems(aItems); + }, + /** + * Facet an attribute whose value is primitive, meaning that it is a raw + * numeric value or string, rather than a complex object. + */ + facetPrimitiveItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + let filter = this.facetDef.filter; + + let valStrToVal = {}; + let groups = this.groups = {}; + this.groupCount = 0; + + for (let item of aItems) { + let val = (attrKey in item) ? item[attrKey] : null; + if (val === Gloda.IGNORE_FACET) + continue; + + // skip items the filter tells us to ignore + if (filter && !filter(val)) + continue; + + // We need to use hasOwnProperty because we cannot guarantee that the + // contents of val won't collide with the attributes in Object.prototype. + if (groups.hasOwnProperty(val)) + groups[val].push(item); + else { + groups[val] = [item]; + valStrToVal[val] = val; + this.groupCount++; + } + } + + let orderedGroups = Object.keys(groups). + map(key => [valStrToVal[key], groups[key]]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, + /** + * Facet an attribute whose value is a complex object that can be identified + * by its 'id' attribute. This is the case where the value is itself a noun + * instance. + */ + facetComplexItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + let filter = this.facetDef.filter; + let idAttr = this.facetDef.groupIdAttr; + + let groups = this.groups = {}; + let groupMap = this.groupMap = {}; + this.groupCount = 0; + + for (let item of aItems) { + let val = (attrKey in item) ? item[attrKey] : null; + if (val === Gloda.IGNORE_FACET) + continue; + + // skip items the filter tells us to ignore + if (filter && !filter(val)) + continue; + + let valId = (val == null) ? null : val[idAttr]; + // We need to use hasOwnProperty because tag nouns are complex objects + // with id's that are non-numeric and so can collide with the contents + // of Object.prototype. (Note: the "tags" attribute is actually handled + // by the DiscreteSetFaceter.) + if (groupMap.hasOwnProperty(valId)) { + groups[valId].push(item); + } + else { + groupMap[valId] = val; + groups[valId] = [item]; + this.groupCount++; + } + } + + let orderedGroups = Object.keys(groups). + map(key => [groupMap[key], groups[key]]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, +}; + +/** + * Facet sets of discrete items. For example, tags applied to messages. + * + * The main differences between us and |DiscreteFaceter| are: + * - The empty set is notable. + * - Specific set configurations could be interesting, but are not low-hanging + * fruit. + */ +function DiscreteSetFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DiscreteSetFaceter.prototype = { + type: "discrete", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems: function(aItems) { + if (this.attrDef.objectNounDef.isPrimitive) + return this.facetPrimitiveItems(aItems); + else + return this.facetComplexItems(aItems); + }, + /** + * Facet an attribute whose value is primitive, meaning that it is a raw + * numeric value or string, rather than a complex object. + */ + facetPrimitiveItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + let filter = this.facetDef.filter; + + let groups = this.groups = {}; + let valStrToVal = {}; + this.groupCount = 0; + + for (let item of aItems) { + let vals = (attrKey in item) ? item[attrKey] : null; + if (vals === Gloda.IGNORE_FACET) + continue; + + if (vals == null || vals.length == 0) { + vals = [null]; + } + for (let val of vals) { + // skip items the filter tells us to ignore + if (filter && !filter(val)) + continue; + + // We need to use hasOwnProperty because we cannot guarantee that the + // contents of val won't collide with the attributes in + // Object.prototype. + if (groups.hasOwnProperty(val)) + groups[val].push(item); + else { + groups[val] = [item]; + valStrToVal[val] = val; + this.groupCount++; + } + } + } + + let orderedGroups = Object.keys(groups). + map(key => [valStrToVal[key], groups[key]]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, + /** + * Facet an attribute whose value is a complex object that can be identified + * by its 'id' attribute. This is the case where the value is itself a noun + * instance. + */ + facetComplexItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + let filter = this.facetDef.filter; + let idAttr = this.facetDef.groupIdAttr; + + let groups = this.groups = {}; + let groupMap = this.groupMap = {}; + this.groupCount = 0; + + for (let item of aItems) { + let vals = (attrKey in item) ? item[attrKey] : null; + if (vals === Gloda.IGNORE_FACET) + continue; + + if (vals == null || vals.length == 0) { + vals = [null]; + } + for (let val of vals) { + // skip items the filter tells us to ignore + if (filter && !filter(val)) + continue; + + let valId = (val == null) ? null : val[idAttr]; + // We need to use hasOwnProperty because tag nouns are complex objects + // with id's that are non-numeric and so can collide with the contents + // of Object.prototype. + if (groupMap.hasOwnProperty(valId)) { + groups[valId].push(item); + } + else { + groupMap[valId] = val; + groups[valId] = [item]; + this.groupCount++; + } + } + } + + let orderedGroups = Object.keys(groups). + map(key => [groupMap[key], groups[key]]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, +}; + +/** + * Given a non-singular attribute, facet it as if it were a boolean based on + * whether there is anything in the list (set). + */ +function NonEmptySetFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +NonEmptySetFaceter.prototype = { + type: "boolean", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + + let trueValues = []; + let falseValues = []; + + let groups = this.groups = {}; + this.groupCount = 0; + + for (let item of aItems) { + let vals = (attrKey in item) ? item[attrKey] : null; + if (vals == null || vals.length == 0) + falseValues.push(item); + else + trueValues.push(item); + } + + this.orderedGroups = []; + if (trueValues.length) + this.orderedGroups.push([true, trueValues]); + if (falseValues.length) + this.orderedGroups.push([false, falseValues]); + this.groupCount = this.orderedGroups.length; + }, + makeQuery: function(aGroupValues, aInclusive) { + let query = this.query = Gloda.newQuery(Gloda.NOUN_MESSAGE); + + let constraintFunc = query[this.attrDef.boundName]; + constraintFunc.call(query); + + // Our query is always for non-empty lists (at this time), so we want to + // invert if they're excluding 'true' or including 'false', which means !=. + let invert = aGroupValues[0] != aInclusive; + + return [query, invert]; + } +}; + + +/** + * Facet dates. We build a hierarchical nested structure of year, month, and + * day nesting levels. This decision was made speculatively in the hopes that + * it would allow us to do clustered analysis and that there might be a benefit + * for that. For example, if you search for "Christmas", we might notice + * clusters of messages around December of each year. We could then present + * these in a list as likely candidates, rather than a graphical timeline. + * Alternately, it could be used to inform a non-linear visualization. As it + * stands (as of this writing), it's just a complicating factor. + */ +function DateFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DateFaceter.prototype = { + type: "date", + /** + * + */ + facetItems: function(aItems) { + let attrKey = this.attrDef.boundName; + let nounDef = this.attrDef.objectNounDef; + + let years = this.years = {_subCount: 0}; + // generally track the time range + let oldest = null, newest = null; + + let validItems = this.validItems = []; + + // just cheat and put us at the front... + this.groupCount = aItems.length ? 1000 : 0; + this.orderedGroups = null; + + /** The number of items with a null/missing attribute. */ + this.missing = 0; + + /** + * The number of items with a date that is unreasonably far in the past or + * in the future. Old-wise, we are concerned about incorrectly formatted + * messages (spam) that end up placed around the UNIX epoch. New-wise, + * we are concerned about messages that can't be explained by users who + * don't know how to set their clocks (both the current user and people + * sending them mail), mainly meaning spam. + * We want to avoid having our clever time-scale logic being made useless by + * these unreasonable messages. + */ + this.unreasonable = 0; + // feb 1, 1970 + let tooOld = new Date(1970, 1, 1); + // 3 days from now + let tooNew = new Date(Date.now() + 3 * 24 * 60 * 60 * 1000); + + for (let item of aItems) { + let val = (attrKey in item) ? item[attrKey] : null; + // -- missing + if (val == null) { + this.missing++; + continue; + } + + // -- unreasonable + if (val < tooOld || val > tooNew) { + this.unreasonable++; + continue; + } + + this.validItems.push(item); + + // -- time range + if (oldest == null) + oldest = newest = val; + else if (val < oldest) + oldest = val; + else if (val > newest) + newest = val; + + // -- bucket + // - year + let year, valYear = val.getYear(); + if (valYear in years) { + year = years[valYear]; + year._dateCount++; + } + else { + year = years[valYear] = { + _dateCount: 1, + _subCount: 0 + }; + years._subCount++; + } + + // - month + let month, valMonth = val.getMonth(); + if (valMonth in year) { + month = year[valMonth]; + month._dateCount++; + } + else { + month = year[valMonth] = { + _dateCount: 1, + _subCount: 0 + }; + year._subCount++; + } + + // - day + let valDate = val.getDate(); + if (valDate in month) { + month[valDate].push(item); + } + else { + month[valDate] = [item]; + } + } + + this.oldest = oldest; + this.newest = newest; + }, + + _unionMonth: function(aMonthObj) { + let dayItemLists = []; + for (let key in aMonthObj) { + let dayItemList = aMonthObj[key]; + if (typeof(key) == "string" && key.startsWith('_')) + continue; + dayItemLists.push(dayItemList); + } + return Array.concat.apply([], dayItemLists); + }, + + _unionYear: function(aYearObj) { + let monthItemLists = []; + for (let key in aYearObj) { + let monthObj = aYearObj[key]; + if (typeof(key) == "string" && key.startsWith('_')) + continue; + monthItemLists.push(this._unionMonth(monthObj)); + } + return Array.concat.apply([], monthItemLists); + } +}; diff --git a/mailnews/db/gloda/modules/fundattr.js b/mailnews/db/gloda/modules/fundattr.js new file mode 100644 index 000000000..75a424adb --- /dev/null +++ b/mailnews/db/gloda/modules/fundattr.js @@ -0,0 +1,907 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['GlodaFundAttr']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); +Cu.import("resource:///modules/StringBundle.js"); + +Cu.import("resource:///modules/gloda/utils.js"); +Cu.import("resource:///modules/gloda/gloda.js"); +Cu.import("resource:///modules/gloda/datastore.js"); +Cu.import("resource:///modules/gloda/datamodel.js"); // for GlodaAttachment + +Cu.import("resource:///modules/gloda/noun_mimetype.js"); +Cu.import("resource:///modules/gloda/connotent.js"); + +/** + * @namespace The Gloda Fundamental Attribute provider is a special attribute + * provider; it provides attributes that the rest of the providers should be + * able to assume exist. Also, it may end up accessing things at a lower level + * than most extension providers should do. In summary, don't mimic this code + * unless you won't complain when your code breaks. + */ +var GlodaFundAttr = { + providerName: "gloda.fundattr", + strings: new StringBundle("chrome://messenger/locale/gloda.properties"), + _log: null, + + init: function gloda_explattr_init() { + this._log = Log4Moz.repository.getLogger("gloda.fundattr"); + + try { + this.defineAttributes(); + } + catch (ex) { + this._log.error("Error in init: " + ex); + throw ex; + } + }, + + POPULARITY_FROM_ME_TO: 10, + POPULARITY_FROM_ME_CC: 4, + POPULARITY_FROM_ME_BCC: 3, + POPULARITY_TO_ME: 5, + POPULARITY_CC_ME: 1, + POPULARITY_BCC_ME: 1, + + /** Boost for messages 'I' sent */ + NOTABILITY_FROM_ME: 10, + /** Boost for messages involving 'me'. */ + NOTABILITY_INVOLVING_ME: 1, + /** Boost for message from someone in 'my' address book. */ + NOTABILITY_FROM_IN_ADDR_BOOK: 10, + /** Boost for the first person involved in my address book. */ + NOTABILITY_INVOLVING_ADDR_BOOK_FIRST: 8, + /** Boost for each additional person involved in my address book. */ + NOTABILITY_INVOLVING_ADDR_BOOK_ADDL: 2, + + defineAttributes: function gloda_fundattr_defineAttributes() { + /* ***** Conversations ***** */ + // conversation: subjectMatches + this._attrConvSubject = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "subjectMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "subject", + subjectNouns: [Gloda.NOUN_CONVERSATION], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + /* ***** Messages ***** */ + // folder + this._attrFolder = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "folder", + singular: true, + facet: true, + special: Gloda.kSpecialColumn, + specialColumnName: "folderID", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FOLDER, + }); // tested-by: test_attributes_fundamental + this._attrAccount = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "account", + canQuery: "memory", + singular: true, + facet: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_ACCOUNT + }); + this._attrMessageKey = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "messageKey", + singular: true, + special: Gloda.kSpecialColumn, + specialColumnName: "messageKey", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_NUMBER, + canQuery: true, + }); // tested-by: test_attributes_fundamental + + // We need to surface the deleted attribute for querying, but there is no + // reason for user code, so let's call it "_deleted" rather than deleted. + // (In fact, our validity constraints require a special query formulation + // that user code should have no clue exists. That's right user code, + // that's a dare.) + Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "_deleted", + singular: true, + special: Gloda.kSpecialColumn, + specialColumnName: "deleted", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_NUMBER, + }); + + + // -- fulltext search helpers + // fulltextMatches. Match over message subject, body, and attachments + // @testpoint gloda.noun.message.attr.fulltextMatches + this._attrFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "fulltextMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "messagesText", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // subjectMatches. Fulltext match on subject + // @testpoint gloda.noun.message.attr.subjectMatches + this._attrSubjectText = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "subjectMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "subject", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // bodyMatches. super-synthetic full-text matching... + // @testpoint gloda.noun.message.attr.bodyMatches + this._attrBody = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "bodyMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "body", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // attachmentNamesMatch + // @testpoint gloda.noun.message.attr.attachmentNamesMatch + this._attrAttachmentNames = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "attachmentNamesMatch", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "attachmentNames", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // @testpoint gloda.noun.message.attr.authorMatches + this._attrAuthorFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "authorMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "author", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // @testpoint gloda.noun.message.attr.recipientsMatch + this._attrRecipientsFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "recipientsMatch", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "recipients", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // --- synthetic stuff for some reason + // conversation + // @testpoint gloda.noun.message.attr.conversation + this._attrConversation = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "conversation", + singular: true, + special: Gloda.kSpecialColumnParent, + specialColumnName: "conversationID", + idStorageAttributeName: "_conversationID", + valueStorageAttributeName: "_conversation", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_CONVERSATION, + canQuery: true, + }); + + // --- Fundamental + // From + this._attrFrom = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "from", + singular: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // tested-by: test_attributes_fundamental + // To + this._attrTo = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "to", + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // tested-by: test_attributes_fundamental + // Cc + this._attrCc = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "cc", + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested + /** + * Bcc'ed recipients; only makes sense for sent messages. + */ + this._attrBcc = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "bcc", + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested + + // Date. now lives on the row. + this._attrDate = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "date", + singular: true, + facet: { + type: "date", + }, + special: Gloda.kSpecialColumn, + specialColumnName: "date", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_DATE, + }); // tested-by: test_attributes_fundamental + + // Header message ID. + this._attrHeaderMessageID = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "headerMessageID", + singular: true, + special: Gloda.kSpecialString, + specialColumnName: "headerMessageID", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + + // Attachment MIME Types + this._attrAttachmentTypes = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "attachmentTypes", + singular: false, + emptySetIsSignificant: true, + facet: { + type: "default", + // This will group the MIME types by their category. + groupIdAttr: "category", + queryHelper: "Category", + }, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_MIME_TYPE, + }); + + // Attachment infos + this._attrIsEncrypted = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "isEncrypted", + singular: true, + emptySetIsSignificant: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_NUMBER, + }); + + // Attachment infos + this._attrAttachmentInfos = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "attachmentInfos", + singular: false, + emptySetIsSignificant: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_ATTACHMENT, + }); + + // --- Optimization + /** + * Involves means any of from/to/cc/bcc. The queries get ugly enough + * without this that it seems to justify the cost, especially given the + * frequent use case. (In fact, post-filtering for the specific from/to/cc + * is probably justifiable rather than losing this attribute...) + */ + this._attrInvolves = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrOptimization, + attributeName: "involves", + singular: false, + facet: { + type: "default", + /** + * Filter out 'me', as we have other facets that deal with that, and the + * 'me' identities are so likely that they distort things. + * + * @return true if the identity is not one of my identities, false if it + * is. + */ + filter: function gloda_explattr_involves_filter(aItem) { + return (!(aItem.id in Gloda.myIdentities)); + } + }, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested + + /** + * Any of to/cc/bcc. + */ + this._attrRecipients = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrOptimization, + attributeName: "recipients", + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested + + // From Me (To/Cc/Bcc) + this._attrFromMe = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrOptimization, + attributeName: "fromMe", + singular: false, + // The interesting thing to a facet is whether the message is from me. + facet: { + type: "nonempty?" + }, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_PARAM_IDENTITY, + }); // not-tested + // To/Cc/Bcc Me + this._attrToMe = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "toMe", + // The interesting thing to a facet is whether the message is to me. + facet: { + type: "nonempty?" + }, + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_PARAM_IDENTITY, + }); // not-tested + + + // -- Mailing List + // Non-singular, but a hard call. Namely, it is obvious that a message can + // be addressed to multiple mailing lists. However, I don't see how you + // could receive a message with more than one set of List-* headers, + // since each list-serve would each send you a copy. Based on our current + // decision to treat each physical message as separate, it almost seems + // right to limit the list attribute to the copy that originated at the + // list. That may sound entirely wrong, but keep in mind that until we + // have seen a message from the list with the List headers, we can't + // definitely know it's a mailing list (although heuristics could take us + // pretty far). As such, the quasi-singular thing is appealing. + // Of course, the reality is that we really want to know if a message was + // sent to multiple mailing lists and be able to query on that. + // Additionally, our implicit-to logic needs to work on messages that + // weren't relayed by the list-serve, especially messages sent to the list + // by the user. + this._attrList = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "mailing-list", + bindName: "mailingLists", + singular: false, + emptySetIsSignificant: true, + facet: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested, not-implemented + }, + + RE_LIST_POST: /<mailto:([^>]+)>/, + + /** + * + * Specializations: + * - Mailing Lists. Replies to a message on a mailing list frequently only + * have the list-serve as the 'to', so we try to generate a synthetic 'to' + * based on the author of the parent message when possible. (The 'possible' + * part is that we may not have a copy of the parent message at the time of + * processing.) + * - Newsgroups. Same deal as mailing lists. + */ + process: function* gloda_fundattr_process(aGlodaMessage, aRawReps, + aIsNew, aCallbackHandle) { + let aMsgHdr = aRawReps.header; + let aMimeMsg = aRawReps.mime; + + // -- From + // Let's use replyTo if available. + // er, since we are just dealing with mailing lists for now, forget the + // reply-to... + // TODO: deal with default charset issues + let author = null; + /* + try { + author = aMsgHdr.getStringProperty("replyTo"); + } + catch (ex) { + } + */ + if (author == null || author == "") + author = aMsgHdr.author; + + let normalizedListPost = ""; + if (aMimeMsg && aMimeMsg.has("list-post")) { + let match = this.RE_LIST_POST.exec(aMimeMsg.get("list-post")); + if (match) + normalizedListPost = "<" + match[1] + ">"; + } + + // Do not use the MIME decoded variants of any of the email addresses + // because if name is encoded and has a comma in it, it will break the + // address parser (which already knows how to do the decoding anyways). + let [authorIdentities, toIdentities, ccIdentities, bccIdentities, + listIdentities] = + yield aCallbackHandle.pushAndGo( + Gloda.getOrCreateMailIdentities(aCallbackHandle, + author, aMsgHdr.recipients, + aMsgHdr.ccList, aMsgHdr.bccList, + normalizedListPost)); + + if (authorIdentities.length != 1) { + throw new Gloda.BadItemContentsError( + "Message with subject '" + aMsgHdr.mime2DecodedSubject + + "' somehow lacks a valid author. Bailing."); + } + let authorIdentity = authorIdentities[0]; + aGlodaMessage.from = authorIdentity; + + // -- To, Cc, Bcc + aGlodaMessage.to = toIdentities; + aGlodaMessage.cc = ccIdentities; + aGlodaMessage.bcc = bccIdentities; + + // -- Mailing List + if (listIdentities.length) + aGlodaMessage.mailingLists = listIdentities; + + let findIsEncrypted = x => + x.isEncrypted || (x.parts ? x.parts.some(findIsEncrypted) : false); + + // -- Encryption + aGlodaMessage.isEncrypted = false; + if (aMimeMsg) { + aGlodaMessage.isEncrypted = findIsEncrypted(aMimeMsg); + } + + // -- Attachments + if (aMimeMsg) { + // nsParseMailbox.cpp puts the attachment flag on msgHdrs as soon as it + // finds a multipart/mixed part. This is a good heuristic, but if it turns + // out the part has no filename, then we don't treat it as an attachment. + // We just streamed the message, and we have all the information to figure + // that out, so now is a good place to clear the flag if needed. + let foundRealAttachment = false; + let attachmentTypes = []; + for (let attachment of aMimeMsg.allAttachments) { + // We don't care about would-be attachments that are not user-intended + // attachments but rather artifacts of the message content. + // We also want to avoid dealing with obviously bogus mime types. + // (If you don't have a "/", you are probably bogus.) + if (attachment.isRealAttachment && + attachment.contentType.includes("/")) { + attachmentTypes.push(MimeTypeNoun.getMimeType(attachment.contentType)); + } + if (attachment.isRealAttachment) + foundRealAttachment = true; + } + if (attachmentTypes.length) { + aGlodaMessage.attachmentTypes = attachmentTypes; + } + + let aMsgHdr = aRawReps.header; + let wasStreamed = aMsgHdr && + !aGlodaMessage.isEncrypted && + ((aMsgHdr.flags & Ci.nsMsgMessageFlags.Offline) || + (aMsgHdr.folder instanceof Ci.nsIMsgLocalMailFolder)); + + // Clear the flag if it turns out there's no attachment after all and we + // streamed completely the message (if we didn't, then we have no + // knowledge of attachments, unless bug 673370 is fixed). + if (!foundRealAttachment && wasStreamed) + aMsgHdr.markHasAttachments(false); + + // This is not the same kind of attachments as above. Now, we want to + // provide convenience attributes to Gloda consumers, so that they can run + // through the list of attachments of a given message, to possibly build a + // visualization on top of it. We still reject bogus mime types, which + // means yencode won't be supported. Oh, I feel really bad. + let attachmentInfos = []; + for (let att of aMimeMsg.allUserAttachments) { + attachmentInfos.push(this.glodaAttFromMimeAtt(aRawReps.trueGlodaRep, + att)); + } + aGlodaMessage.attachmentInfos = attachmentInfos; + } + + // TODO: deal with mailing lists, including implicit-to. this will require + // convincing the indexer to pass us in the previous message if it is + // available. (which we'll simply pass to everyone... it can help body + // logic for quoting purposes, etc. too.) + + yield Gloda.kWorkDone; + }, + + glodaAttFromMimeAtt: + function gloda_fundattr_glodaAttFromMimeAtt(aGlodaMessage, aAtt) { + // So we don't want to store the URL because it can change over time if + // the message is moved. What we do is store the full URL if it's a + // detached attachment, otherwise just keep the part information, and + // rebuild the URL according to where the message is sitting. + let part, externalUrl; + if (aAtt.isExternal) { + externalUrl = aAtt.url; + } else { + let matches = aAtt.url.match(GlodaUtils.PART_RE); + if (matches && matches.length) + part = matches[1]; + else + this._log.error("Error processing attachment: " + aAtt.url); + } + return new GlodaAttachment(aGlodaMessage, + aAtt.name, + aAtt.contentType, + aAtt.size, + part, + externalUrl, + aAtt.isExternal); + }, + + optimize: function* gloda_fundattr_optimize(aGlodaMessage, aRawReps, + aIsNew, aCallbackHandle) { + + let aMsgHdr = aRawReps.header; + + // for simplicity this is used for both involves and recipients + let involvesIdentities = {}; + let involves = aGlodaMessage.involves || []; + let recipients = aGlodaMessage.recipients || []; + + // 'me' specialization optimizations + let toMe = aGlodaMessage.toMe || []; + let fromMe = aGlodaMessage.fromMe || []; + + let myIdentities = Gloda.myIdentities; // needless optimization? + let authorIdentity = aGlodaMessage.from; + let isFromMe = authorIdentity.id in myIdentities; + + // The fulltext search column for the author. We want to have in here: + // - The e-mail address and display name as enclosed on the message. + // - The name per the address book card for this e-mail address, if we have + // one. + aGlodaMessage._indexAuthor = aMsgHdr.mime2DecodedAuthor; + // The fulltext search column for the recipients. (same deal) + aGlodaMessage._indexRecipients = aMsgHdr.mime2DecodedRecipients; + + if (isFromMe) + aGlodaMessage.notability += this.NOTABILITY_FROM_ME; + else { + let authorCard = authorIdentity.abCard; + if (authorCard) { + aGlodaMessage.notability += this.NOTABILITY_FROM_IN_ADDR_BOOK; + // @testpoint gloda.noun.message.attr.authorMatches + aGlodaMessage._indexAuthor += ' ' + authorCard.displayName; + } + } + + involves.push(authorIdentity); + involvesIdentities[authorIdentity.id] = true; + + let involvedAddrBookCount = 0; + + for (let toIdentity of aGlodaMessage.to) { + if (!(toIdentity.id in involvesIdentities)) { + involves.push(toIdentity); + recipients.push(toIdentity); + involvesIdentities[toIdentity.id] = true; + let toCard = toIdentity.abCard; + if (toCard) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += ' ' + toCard.displayName; + } + } + + // optimization attribute to-me ('I' am the parameter) + if (toIdentity.id in myIdentities) { + toMe.push([toIdentity, authorIdentity]); + if (aIsNew) + authorIdentity.contact.popularity += this.POPULARITY_TO_ME; + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, toIdentity]); + // also, popularity + if (aIsNew) + toIdentity.contact.popularity += this.POPULARITY_FROM_ME_TO; + } + } + for (let ccIdentity of aGlodaMessage.cc) { + if (!(ccIdentity.id in involvesIdentities)) { + involves.push(ccIdentity); + recipients.push(ccIdentity); + involvesIdentities[ccIdentity.id] = true; + let ccCard = ccIdentity.abCard; + if (ccCard) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += ' ' + ccCard.displayName; + } + } + // optimization attribute cc-me ('I' am the parameter) + if (ccIdentity.id in myIdentities) { + toMe.push([ccIdentity, authorIdentity]); + if (aIsNew) + authorIdentity.contact.popularity += this.POPULARITY_CC_ME; + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, ccIdentity]); + // also, popularity + if (aIsNew) + ccIdentity.contact.popularity += this.POPULARITY_FROM_ME_CC; + } + } + // just treat bcc like cc; the intent is the same although the exact + // semantics differ. + for (let bccIdentity of aGlodaMessage.bcc) { + if (!(bccIdentity.id in involvesIdentities)) { + involves.push(bccIdentity); + recipients.push(bccIdentity); + involvesIdentities[bccIdentity.id] = true; + let bccCard = bccIdentity.abCard; + if (bccCard) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += ' ' + bccCard.displayName; + } + } + // optimization attribute cc-me ('I' am the parameter) + if (bccIdentity.id in myIdentities) { + toMe.push([bccIdentity, authorIdentity]); + if (aIsNew) + authorIdentity.contact.popularity += this.POPULARITY_BCC_ME; + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, bccIdentity]); + // also, popularity + if (aIsNew) + bccIdentity.contact.popularity += this.POPULARITY_FROM_ME_BCC; + } + } + + if (involvedAddrBookCount) + aGlodaMessage.notability += this.NOTABILITY_INVOLVING_ADDR_BOOK_FIRST + + (involvedAddrBookCount - 1) * this.NOTABILITY_INVOLVING_ADDR_BOOK_ADDL; + + aGlodaMessage.involves = involves; + aGlodaMessage.recipients = recipients; + if (toMe.length) { + aGlodaMessage.toMe = toMe; + aGlodaMessage.notability += this.NOTABILITY_INVOLVING_ME; + } + if (fromMe.length) + aGlodaMessage.fromMe = fromMe; + + // Content + if (aRawReps.bodyLines) { + aGlodaMessage._content = aRawReps.content = new GlodaContent(); + if (this.contentWhittle({}, aRawReps.bodyLines, aGlodaMessage._content)) { + // we were going to do something here? + } + } + else { + aRawReps.content = null; + } + + yield Gloda.kWorkDone; + }, + + /** + * Duplicates the notability logic from optimize(). Arguably optimize should + * be factored to call us, grokNounItem should be factored to call us, or we + * should get sufficiently fancy that our code wildly diverges. + */ + score: function gloda_fundattr_score(aMessage, aContext) { + let score = 0; + + let authorIdentity = aMessage.from; + if (authorIdentity.id in Gloda.myIdentities) + score += this.NOTABILITY_FROM_ME; + else if (authorIdentity.inAddressBook) + score += this.NOTABILITY_FROM_IN_ADDR_BOOK; + if (aMessage.toMe) + score += this.NOTABILITY_INVOLVING_ME; + + let involvedAddrBookCount = 0; + for (let [, identity] in Iterator(aMessage.to)) + if (identity.inAddressBook) + involvedAddrBookCount++; + for (let [, identity] in Iterator(aMessage.cc)) + if (identity.inAddressBook) + involvedAddrBookCount++; + if (involvedAddrBookCount) + score += this.NOTABILITY_INVOLVING_ADDR_BOOK_FIRST + + (involvedAddrBookCount - 1) * this.NOTABILITY_INVOLVING_ADDR_BOOK_ADDL; + return score; + }, + + _countQuoteDepthAndNormalize: + function gloda_fundattr__countQuoteDepthAndNormalize(aLine) { + let count = 0; + let lastStartOffset = 0; + + for (let i = 0; i < aLine.length; i++) { + let c = aLine[i]; + if (c == ">") { + count++; + lastStartOffset = i+1; + } + else if (c == " ") { + } + else { + return [count, + lastStartOffset ? aLine.substring(lastStartOffset) : aLine]; + } + } + + return [count, lastStartOffset ? aLine.substring(lastStartOffset) : aLine]; + }, + + /** + * Attempt to understand simple quoting constructs that use ">" with + * obvious phrases to enter the quoting block. No support for other types + * of quoting at this time. Also no support for piercing the wrapper of + * forwarded messages to actually be the content of the forwarded message. + */ + contentWhittle: function gloda_fundattr_contentWhittle(aMeta, + aBodyLines, aContent) { + if (!aContent.volunteerContent(aContent.kPriorityBase)) + return false; + + // duplicate the list; we mutate somewhat... + let bodyLines = aBodyLines.concat(); + + // lastNonBlankLine originally was just for detecting quoting idioms where + // the "wrote" line was separated from the quoted block by a blank line. + // Now we also use it for whitespace suppression at the boundaries of + // quoted and un-quoted text. (We keep blank lines within the same + // 'block' of quoted or non-quoted text.) + // Because we now have two goals for it, and we still want to suppress blank + // lines when there is a 'wrote' line involved, we introduce... + // prevLastNonBlankLine! This arguably suggests refactoring should be the + // next step, but things work for now. + let rangeStart = 0, lastNonBlankLine = null, prevLastNonBlankLine = null; + let inQuoteDepth = 0; + for (let [iLine, line] of bodyLines.entries()) { + if (!line || (line == "\xa0")) /* unicode non breaking space */ + continue; + + if (line.startsWith(">")) { + if (!inQuoteDepth) { + let rangeEnd = iLine - 1; + let quoteRangeStart = iLine; + // see if the last non-blank-line was a lead-in... + if (lastNonBlankLine != null) { + // TODO: localize quote range start detection + if (aBodyLines[lastNonBlankLine].includes("wrote")) { + quoteRangeStart = lastNonBlankLine; + rangeEnd = lastNonBlankLine - 1; + // we 'used up' lastNonBlankLine, let's promote the prev guy to + // be the new lastNonBlankLine for the next logic block + lastNonBlankLine = prevLastNonBlankLine; + } + // eat the trailing whitespace... + if (lastNonBlankLine != null) + rangeEnd = Math.min(rangeEnd, lastNonBlankLine); + } + if (rangeEnd >= rangeStart) + aContent.content(aBodyLines.slice(rangeStart, rangeEnd+1)); + + [inQuoteDepth, line] = this._countQuoteDepthAndNormalize(line); + bodyLines[iLine] = line; + rangeStart = quoteRangeStart; + } + else { + let curQuoteDepth; + [curQuoteDepth, line] = this._countQuoteDepthAndNormalize(line); + bodyLines[iLine] = line; + + if (curQuoteDepth != inQuoteDepth) { + // we could do some "wrote" compensation here, but it's not really + // as important. let's wait for a more clever algorithm. + aContent.quoted(aBodyLines.slice(rangeStart, iLine), inQuoteDepth); + inQuoteDepth = curQuoteDepth; + rangeStart = iLine; + } + } + } + else { + if (inQuoteDepth) { + aContent.quoted(aBodyLines.slice(rangeStart, iLine), inQuoteDepth); + inQuoteDepth = 0; + rangeStart = iLine; + } + } + + prevLastNonBlankLine = lastNonBlankLine; + lastNonBlankLine = iLine; + } + + if (inQuoteDepth) { + aContent.quoted(aBodyLines.slice(rangeStart), inQuoteDepth); + } + else { + aContent.content(aBodyLines.slice(rangeStart, lastNonBlankLine+1)); + } + + return true; + }, +}; diff --git a/mailnews/db/gloda/modules/gloda.js b/mailnews/db/gloda/modules/gloda.js new file mode 100644 index 000000000..3a32e6041 --- /dev/null +++ b/mailnews/db/gloda/modules/gloda.js @@ -0,0 +1,2283 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['Gloda']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); + +Cu.import("resource:///modules/gloda/datastore.js"); +Cu.import("resource:///modules/gloda/datamodel.js"); +Cu.import("resource:///modules/gloda/databind.js"); +Cu.import("resource:///modules/gloda/collection.js"); +Cu.import("resource:///modules/gloda/connotent.js"); +Cu.import("resource:///modules/gloda/query.js"); +Cu.import("resource:///modules/gloda/utils.js"); + +Cu.import("resource:///modules/iteratorUtils.jsm"); +Cu.import("resource:///modules/IOUtils.js"); +Cu.import("resource:///modules/mailServices.js"); +Cu.import("resource://gre/modules/Services.jsm"); + +/** + * @see |Gloda.BadItemContentsError| + */ +function BadItemContentsError(aMessage) { + this.message = aMessage; +} +BadItemContentsError.prototype = { + toString: function BadItemContentsError_toString() { + return this.message; + } +}; + + +/** + * Provides the user-visible (and extension visible) global database + * functionality. There is currently a dependency/ordering + * problem in that the concept of 'gloda' also includes some logic that is + * contributed by built-in extensions, if you will. Those built-in extensions + * (fundattr.js, explattr.js) also import this file. To avoid a circular + * dependency, those built-in extensions are loaded by everybody.js. The + * simplest/best solution is probably to move everybody.js to be gloda.js and + * have it re-export only 'Gloda'. gloda.js (this file) can then move to be + * gloda_int.js (or whatever our eventual naming scheme is), which built-in + * extensions can explicitly rely upon. + * + * === Concepts + * + * == Nouns + * + * Inspired by reasonable uses of triple-stores, I have tried to leverage + * existing model and terminology rather than rolling out own for everything. + * The idea with triple-stores is that you have a subject, a predicate, and an + * object. For example, if we are talking about a message, that is the + * subject, the predicate could roughly be sent-by, and the object a person. + * We can generalize this idea to say that the subject and objects are nouns. + * Since we want to be more flexible than only dealing with messages, we + * therefore introduce the concept of nouns as an organizing principle. + * + * == Attributes + * + * Our attributes definitions are basically our predicates. When we define + * an attribute, it's a label with a bunch of meta-data. Our attribute + * instances are basically a 'triple' in a triple-store. The attributes + * are stored in database rows that imply a specific noun-type (ex: the + * messageAttributes table), with an ID identifying the message which is our + * subject, an attribute ID which identifies the attribute definition in use + * (and therefore the predicate), plus an object ID (given context aka the + * noun type by the attribute's meta-data) which identifies the 'object'. + * + * == But... + * + * Things aren't entirely as clear as they could be right now, terminology/ + * concept/implementation-wise. Some work is probably still in order. + * + * === Implementation + * + * == Nouns + * + * So, we go and define the nouns that are roughly the classes in our data + * model. Every 'class' we define in datamodel.js is a noun that gets defined + * here in the Gloda core. We provide sufficient meta-data about the noun to + * serialize/deserialize its representation from our database representation. + * Nouns do not have to be defined in this class, but can also be contributed + * by external code. + * We have a concept of 'first class' nouns versus non-first class nouns. The + * distinction is meant to be whether we can store meta-information about those + * nouns using attributes. Right now, only message are real first-class nouns, + * but we want to expand that to include contacts and eventually events and + * tasks as lightning-integration occurs. In practice, we are stretching the + * definition of first-class nouns slightly to include things we can't store + * meta-data about, but want to be able to query about. We do want to resolve + * this. + * + * == Attributes + * + * Attributes are defined by "attribute providers" who are responsible for + * taking an instance of a first-class noun (for which they are registered) + * plus perhaps some other meta-data, and returning a list of attributes + * extracted from that noun. For now, this means messages. Attribute + * providers may create new data records as a side-effect of the indexing + * process, although we have not yet fully dealt with the problem of deleting + * these records should they become orphaned in the database due to the + * purging of a message and its attributes. + * All of the 'core' gloda attributes are provided by the fundattr.js and + * explattr.js providers. + * + * === (Notable) Future Work + * + * == Attributes + * + * Attribute mechanisms currently lack any support for 'overriding' attributes + * provided by other attribute providers. For example, the fundattr provider + * tells us who a message is 'from' based on the e-mail address present. + * However, other plugins may actually know better. For example, the bugzilla + * daemon e-mails based on bug activity although the daemon gets the credit + * as the official sender. A bugzilla plugin can easily extract the actual + * person/e-mail addressed who did something on the bug to cause the + * notification to be sent. In practice, we would like that person to be + * the 'sender' of the bugmail. But we can't really do that right, yet. + * + * @namespace + */ +var Gloda = { + /** + * Initialize logging, the datastore (SQLite database), the core nouns and + * attributes, and the contact and identities that belong to the presumed + * current user (based on accounts). + * + * Additional nouns and the core attribute providers are initialized by the + * everybody.js module which ensures all of those dependencies are loaded + * (and initialized). + */ + _init: function gloda_ns_init() { + this._initLogging(); + GlodaDatastore._init(this._nounIDToDef); + this._initAttributes(); + this._initMyIdentities(); + }, + + _log: null, + /** + * Initialize logging; the error console window gets Warning/Error, and stdout + * (via dump) gets everything. + */ + _initLogging: function gloda_ns_initLogging() { + let formatter = new Log4Moz.BasicFormatter(); + Log4Moz.repository.rootLogger.level = Log4Moz.Level.Debug; + + let enableConsoleLogging = false; + let enableDumpLogging = false; + // should we assume there is someone else consuming our log4moz stream? + let enableUpstreamLogging = false; + let considerNetLogging = false; + + let glodaLog = Log4Moz.repository.getLogger("gloda"); + glodaLog.level = Log4Moz.Level.Warn; + + try { + // figure out if event-driven indexing should be enabled... + let branch = Services.prefs.getBranch("mailnews.database.global.logging."); + enableConsoleLogging = branch.getBoolPref("console"); + enableDumpLogging = branch.getBoolPref("dump"); + enableUpstreamLogging = branch.getBoolPref("upstream"); + considerNetLogging = branch.getBoolPref("net"); + } catch (ex) {} + + if (enableConsoleLogging) { + let capp = new Log4Moz.ConsoleAppender(formatter); + capp.level = Log4Moz.Level.Warn; + glodaLog.addAppender(capp); + } + + if (enableDumpLogging) { + let dapp = new Log4Moz.DumpAppender(formatter); + dapp.level = Log4Moz.Level.All; + glodaLog.level = Log4Moz.Level.All; + glodaLog.addAppender(dapp); + } + + if (enableUpstreamLogging) { + glodaLog.level = Log4Moz.Level.All; + } + + if (considerNetLogging) { + let file = Services.dirsvc.get("TmpD", Ci.nsIFile); + file.append("chainsaw.ptr"); + if (file.exists()) { + let data = IOUtils.loadFileToString(file); + data = data.trim(); + let [host, port] = data.split(":"); + let xf = new Log4Moz.XMLFormatter(); + let sapp = new Log4Moz.SocketAppender(host, Number(port), xf); + sapp.level = Log4Moz.Level.All; + glodaLog.level = Log4Moz.Level.All; + glodaLog.addAppender(sapp); + } + } + + this._log = Log4Moz.repository.getLogger("gloda.NS"); + this._log.info("Logging Initialized"); + }, + + /** + * The indexer is idle. + */ + kIndexerIdle: 0, + /** + * The indexer is doing something. We used to have other specific states, but + * they have been rendered irrelevant and wiped from existence. + */ + kIndexerIndexing: 1, + + /** + * Synchronous activities performed that can be thought of as one processing + * token. Potentially yield the event-loop and re-schedule for later based + * on how long we've actually taken/etc. The goal here is that code that + * is doing stuff synchronously yields with kWorkSync periodically to make + * sure that it doesn't dominate the event-loop. Unless the processing + * in question is particularly intensive, it should be reasonable to apply + * some decimation factor (ex: 32 or 64) with the general goal of yielding + * every 3-10 milliseconds. + */ + kWorkSync: 0, + /** + * Asynchronous activity performed, you need to relinquish flow control and + * trust us to call callbackDriver later. + */ + kWorkAsync: 1, + /** + * We are all done with our task, close us and figure out something else to do. + */ + kWorkDone: 2, + /** + * We are not done with our task, but we think it's a good idea to take a + * breather because we believe we have tied up the event loop for a + * non-trivial amount of time. So please re-schedule us in the future. + * + * This is currently only used internally by the indexer's batching logic; + * minor changes may be required if used by actual indexers. + */ + kWorkPause: 3, + /** + * We are done with our task, and have a result that we are returning. This + * should only be used by your callback handler's doneWithResult method. + * Ex: you are passed aCallbackHandle, and you do + * "yield aCallbackHandle.doneWithResult(myResult);". + */ + kWorkDoneWithResult: 4, + + /** + * Callers should access the unique ID for the GlodaDatastore + * with this getter. If the GlodaDatastore has not been + * initialized, this value is null. + * + * @return a UUID as a string, ex: "c4dd0159-9287-480f-a648-a4613e147fdb" + */ + get datastoreID() { + return GlodaDatastore._datastoreID; + }, + + /** + * Lookup a gloda message from an nsIMsgDBHdr, with the result returned as a + * collection. Keep in mind that the message may not be indexed, so you + * may end up with an empty collection. (Also keep in mind that this query + * is asynchronous, so you will want your action-taking logic to be found + * in your listener's onQueryCompleted method; the result will not be in + * the collection when this method returns.) + * + * @param aMsgHdr The header of the message you want the gloda message for. + * @param aListener The listener that should be registered with the collection + * @param aData The (optional) value to set as the data attribute on the + * collection. + * + * @return The collection that will receive the results. + * + * @testpoint gloda.ns.getMessageCollectionForHeader() + */ + getMessageCollectionForHeader: function gloda_ns_getMessageForHeader(aMsgHdr, + aListener, aData) { + let query = Gloda.newQuery(Gloda.NOUN_MESSAGE); + query.folder(aMsgHdr.folder).messageKey(aMsgHdr.messageKey); + return query.getCollection(aListener, aData); + }, + + /** + * Given a list of message headers, return a collection containing the gloda + * messages that correspond to those headers. Keep in mind that gloda may + * not have indexed all the messages, so the returned collection may not have + * a message for each header you provide. (Also keep in mind that this query + * is asynchronous, so you will want your action-taking logic to be found + * in your listener's onQueryCompleted method; no results will be present in + * the collection when this method returns.) + * + * @param aHeaders A javascript Array or and XPCOM list that fixIterator can + * can handle. + * @param aListener The listener that should be registered with the collection + * @param aData The (optional) value to set as the data attribute on the + * collection. + * + * @return The collection that will receive the results. + * + * @testpoint gloda.ns.getMessageCollectionForHeaders() + */ + getMessageCollectionForHeaders: function gloda_ns_getMessagesForHeaders( + aHeaders, aListener, aData) { + // group the headers by the folder they are found in + let headersByFolder = {}; + let iter; + for (let header in fixIterator(aHeaders)) { + let folderURI = header.folder.URI; + let headersForFolder = headersByFolder[folderURI]; + if (headersForFolder === undefined) + headersByFolder[folderURI] = [header]; + else + headersForFolder.push(header); + } + + let query = Gloda.newQuery(Gloda.NOUN_MESSAGE); + let clause; + // build a query, using a separate union clause for each folder. + for (let folderURI in headersByFolder) { + let headersForFolder = headersByFolder[folderURI]; + let folder = this.getFolderForFolder(headersForFolder[0].folder); + // if this is the first or clause, just use the query itself + if (!clause) + clause = query; + else // create a new query clause via the 'or' command + clause = query.or(); + + clause.folder(folder); + let messageKeys = headersForFolder.map(hdr => hdr.messageKey); + clause.messageKey.apply(clause, messageKeys); + } + + return query.getCollection(aListener, aData); + }, + + /** + * @testpoint gloda.ns.getMessageContent + */ + getMessageContent: function gloda_ns_getMessageContent(aGlodaMessage, aMimeMsg) { + return mimeMsgToContentAndMeta(aMimeMsg, aGlodaMessage.folderMessage.folder)[0]; + }, + + getFolderForFolder: function gloda_ns_getFolderForFolder(aMsgFolder) { + return GlodaDatastore._mapFolder(aMsgFolder); + }, + + /** + * Takes one or more strings containing lists of comma-delimited e-mail + * addresses with optional display names, and returns a list of sub-lists of + * identities, where each sub-list corresponds to each of the strings passed + * as arguments. These identities are loaded from the database if they + * already exist, or created if they do not yet exist. + * If the identities need to be created, they will also result in the + * creation of a gloda contact. If a display name was provided with the + * e-mail address, it will become the name of the gloda contact. If a + * display name was not provided, the e-mail address will also serve as the + * contact name. + * This method uses the indexer's callback handle mechanism, and does not + * obey traditional return semantics. + * + * We normalize all e-mail addresses to be lowercase as a normative measure. + * + * @param aCallbackHandle The GlodaIndexer callback handle (or equivalent) + * that you are operating under. + * @param ... One or more strings. Each string can contain zero or more + * e-mail addresses with display name. If more than one address is given, + * they should be comma-delimited. For example + * '"Bob Smith" <bob@example.com>' is an address with display name. Mime + * header decoding is performed, but is ignorant of any folder-level + * character set overrides. + * @returns via the callback handle mechanism, a list containing one sub-list + * for each string argument passed. Each sub-list containts zero or more + * GlodaIdentity instances corresponding to the addresses provided. + */ + getOrCreateMailIdentities: + function* gloda_ns_getOrCreateMailIdentities(aCallbackHandle) { + let addresses = {}; + let resultLists = []; + + // parse the strings + for (let iArg = 1; iArg < arguments.length; iArg++) { + let aMailAddresses = arguments[iArg]; + let parsed = GlodaUtils.parseMailAddresses(aMailAddresses); + + let resultList = []; + resultLists.push(resultList); + + let identities = []; + for (let iAddress = 0; iAddress < parsed.count; iAddress++) { + let address = parsed.addresses[iAddress].toLowerCase(); + if (address in addresses) + addresses[address].push(resultList); + else + addresses[address] = [parsed.names[iAddress], resultList]; + } + } + + let addressList = Object.keys(addresses); + if (addressList.length == 0) { + yield aCallbackHandle.doneWithResult(resultLists); + // we should be stopped before we reach this point, but safety first. + return; + } + + let query = this.newQuery(this.NOUN_IDENTITY); + query.kind("email"); + query.value.apply(query, addressList); + let collection = query.getCollection(aCallbackHandle); + yield this.kWorkAsync; + + // put the identities in the appropriate result lists + for (let identity of collection.items) { + let nameAndResultLists = addresses[identity.value]; + this._log.debug(" found identity for '" + nameAndResultLists[0] + "' (" + + identity.value + ")"); + // index 0 is the name, skip it + for (let iResList = 1; iResList < nameAndResultLists.length; iResList++) { + nameAndResultLists[iResList].push(identity); + } + delete addresses[identity.value]; + } + + // create the identities that did not exist yet + for (let address in addresses) { + let nameAndResultLists = addresses[address]; + let name = nameAndResultLists[0]; + + this._log.debug(" creating contact for '" + name + "' (" + address + ")"); + + // try and find an existing address book contact. + let card = GlodaUtils.getCardForEmail(address); + // XXX when we have the address book GUID stuff, we need to use that to + // find existing contacts... (this will introduce a new query phase + // where we batch all the GUIDs for an async query) + // XXX when the address book supports multiple e-mail addresses, we + // should also just create identities for any that don't yet exist + + // if there is no name, just use the e-mail (the ab indexer actually + // processes the card's displayName for synchronization, so we don't + // need to do that.) + if (!name) + name = address; + + let contact = GlodaDatastore.createContact(null, null, name, 0, 0); + + // we must create the identity. use a blank description because there's + // nothing to differentiate it from other identities, as this contact + // only has one initially (us). + // XXX when we have multiple e-mails and there is a meaning associated + // with each e-mail, try and use that to populate the description. + // XXX we are creating the identity here before we insert the contact. + // conceptually it is good for us to be creating the identity before + // exposing it to the address-book indexer, but we could get our id's + // in a bad way from not deferring the identity insertion until after + // the contact insertion. + let identity = GlodaDatastore.createIdentity(contact.id, contact, + "email", address, /* description */ "", /* relay? */ false); + contact._identities = [identity]; + + // give the address book indexer a chance if we have a card. + // (it will fix-up the name based on the card as appropriate) + if (card) + yield aCallbackHandle.pushAndGo( + Gloda.grokNounItem(contact, {card: card}, true, true, + aCallbackHandle)); + else // grokNounItem will issue the insert for us... + GlodaDatastore.insertContact(contact); + + for (let iResList = 1; iResList < nameAndResultLists.length; iResList++) { + nameAndResultLists[iResList].push(identity); + } + } + + yield aCallbackHandle.doneWithResult(resultLists); + }, + + /** + * Dictionary of the user's known identities; key is the identity id, value + * is the actual identity. This is populated by _initMyIdentities based on + * the accounts defined. + */ + myIdentities: {}, + /** + * The contact corresponding to the current user. We are assuming that only + * a single user/human being uses the current profile. This is known to be + * a flawed assumption, but is the best first approximation available. + * + * @TODO attempt to deal with multile people using the same profile + */ + myContact: null, + /** + * Populate myIdentities with all of our identities. Currently we do this + * by assuming that there is one human/user per profile, and that all of the + * accounts defined in the profile belong to them. The single contact is + * stored on myContact. + * + * @TODO deal with account addition/modification/removal + * @TODO attempt to deal with multiple people using the same profile + */ + _initMyIdentities: function gloda_ns_initMyIdentities() { + let myContact = null; + let myIdentities = {}; + let myEmailAddresses = {}; // process each email at most once; stored here + + let fullName, fallbackName; + let existingIdentities = []; + let identitiesToCreate = []; + + let numIdentities = MailServices.accounts.allIdentities.length; + + // nothing to do if there are no accounts/identities. + if (!numIdentities) + return; + + for (let iIdentity = 0; iIdentity < numIdentities; iIdentity++) { + let msgIdentity = + MailServices.accounts.allIdentities.queryElementAt(iIdentity, + Ci.nsIMsgIdentity); + + if (!fullName) + fullName = msgIdentity.fullName; + if (!fallbackName) + fallbackName = msgIdentity.email; + + let emailAddress = msgIdentity.email; + let replyTo = msgIdentity.replyTo; + + // find the identities if they exist, flag to create them if they don't + if (emailAddress) { + let parsed = GlodaUtils.parseMailAddresses(emailAddress); + if (!(parsed.addresses[0] in myEmailAddresses)) { + let identity = GlodaDatastore.getIdentity("email", + parsed.addresses[0]); + if (identity) + existingIdentities.push(identity); + else + identitiesToCreate.push(parsed.addresses[0]); + myEmailAddresses[parsed.addresses[0]] = true; + } + } + if (replyTo) { + let parsed = GlodaUtils.parseMailAddresses(replyTo); + if (!(parsed.addresses[0] in myEmailAddresses)) { + let identity = GlodaDatastore.getIdentity("email", + parsed.addresses[0]); + if (identity) + existingIdentities.push(identity); + else + identitiesToCreate.push(parsed.addresses[0]); + myEmailAddresses[parsed.addresses[0]] = true; + } + } + } + + // we need to establish the identity.contact portions of the relationship + for (let identity of existingIdentities) { + identity._contact = GlodaDatastore.getContactByID(identity.contactID); + } + + if (existingIdentities.length) { + // just use the first guy's contact + myContact = existingIdentities[0].contact; + } + else { + // create a new contact + myContact = GlodaDatastore.createContact(null, null, + fullName || fallbackName, + 0, 0); + GlodaDatastore.insertContact(myContact); + } + + if (identitiesToCreate.length) { + for (let iIdentity = 0; iIdentity < identitiesToCreate.length; + iIdentity++) { + let emailAddress = identitiesToCreate[iIdentity]; + // XXX this won't always be of type "email" as we add new account types + // XXX the blank string could be trying to differentiate; we do have + // enough info to do it. + let identity = GlodaDatastore.createIdentity(myContact.id, myContact, + "email", + emailAddress, + "", false); + existingIdentities.push(identity); + } + } + + for (let iIdentity = 0; iIdentity < existingIdentities.length; + iIdentity++) { + let identity = existingIdentities[iIdentity]; + myIdentities[identity.id] = identity; + } + + this.myContact = myContact; + this.myIdentities = myIdentities; + myContact._identities = Object.keys(myIdentities). + map(id => myIdentities[id]); + + // we need contacts to make these objects reachable via the collection + // manager. + this._myContactCollection = this.explicitCollection(this.NOUN_CONTACT, + [this.myContact]); + this._myIdentitiesCollection = + this.explicitCollection(this.NOUN_IDENTITY, this.myContact._identities); + }, + + /** + * An attribute that is a defining characteristic of the subject. + */ + kAttrFundamental: 0, + /** + * An attribute that is an optimization derived from two or more fundamental + * attributes and exists solely to improve database query performance. + */ + kAttrOptimization: 1, + /** + * An attribute that is derived from the content of the subject. For example, + * a message that references a bugzilla bug could have a "derived" attribute + * that captures the bugzilla reference. This is not + */ + kAttrDerived: 2, + /** + * An attribute that is the result of an explicit and intentional user action + * upon the subject. For example, a tag placed on a message by a user (or + * at the user's request by a filter) is explicit. + */ + kAttrExplicit: 3, + /** + * An attribute that is indirectly the result of a user's behaviour. For + * example, if a user consults a message multiple times, we may conclude that + * the user finds the message interesting. It is "implied", if you will, + * that the message is interesting. + */ + kAttrImplicit: 4, + + /** + * This attribute is not 'special'; it is stored as a (thing id, attribute id, + * attribute id) tuple in the database rather than on thing's row or on + * thing's fulltext row. (Where "thing" could be a message or any other + * first class noun.) + */ + kSpecialNotAtAll: GlodaDatastore.kSpecialNotAtAll, + /** + * This attribute is stored as a numeric column on the row for the noun. The + * attribute definition should include this value as 'special' and the + * column name that stores the attribute as 'specialColumnName'. + */ + kSpecialColumn: GlodaDatastore.kSpecialColumn, + kSpecialColumnChildren: GlodaDatastore.kSpecialColumnChildren, + kSpecialColumnParent: GlodaDatastore.kSpecialColumnParent, + /** + * This attribute is stored as a string column on the row for the noun. It + * differs from kSpecialColumn in that it is a string, which once had + * query ramifications and one day may have them again. + */ + kSpecialString: GlodaDatastore.kSpecialString, + /** + * This attribute is stored as a fulltext column on the fulltext table for + * the noun. The attribute defintion should include this value as 'special' + * and the column name that stores the table as 'specialColumnName'. + */ + kSpecialFulltext: GlodaDatastore.kSpecialFulltext, + + /** + * The extensionName used for the attributes defined by core gloda plugins + * such as fundattr.js and explattr.js. + */ + BUILT_IN: "built-in", + + /** + * Special sentinel value that will cause facets to skip a noun instance + * when an attribute has this value. + */ + IGNORE_FACET: GlodaDatastore.IGNORE_FACET, + + /* + * The following are explicit noun IDs. While most extension-provided nouns + * will have dynamically allocated id's that are looked up by name, these + * id's can be relied upon to exist and be accessible via these + * pseudo-constants. It's not really clear that we need these, although it + * does potentially simplify code to not have to look up all of their nouns + * at initialization time. + */ + /** + * Boolean values, expressed as 0/1 in the database and non-continuous for + * constraint purposes. Like numbers, such nouns require their attributes + * to provide them with context, lacking any of their own. + * Having this as a noun type may be a bad idea; a change of nomenclature + * (so that we are not claiming a boolean value is a noun, but still using + * it in the same way) or implementation to require each boolean noun + * actually be its own noun may be in order. + */ + NOUN_BOOLEAN: 1, + /** + * A number, which could mean an integer or floating point values. We treat + * these as continuous, meaning that queries on them can have ranged + * constraints expressed on them. Lacking any inherent context, numbers + * depend on their attributes to parameterize them as required. + * Same deal as with NOUN_BOOLEAN, we may need to change this up conceptually. + */ + NOUN_NUMBER: 2, + /** + * A (non-fulltext) string. + * Same deal as with NOUN_BOOLEAN, we may need to change this up conceptually. + */ + NOUN_STRING: 3, + /** A date, encoded as a PRTime, represented as a js Date object. */ + NOUN_DATE: 10, + /** + * Fulltext search support, somewhat magical. This is only intended to be + * used for kSpecialFulltext attributes, and exclusively as a constraint + * mechanism. The values are always represented as strings. It is presumed + * that the user of this functionality knows how to generate SQLite FTS3 + * style MATCH queries, or is okay with us just gluing them together with + * " OR " when used in an or-constraint case. Gloda's query mechanism + * currently lacks the ability to to compile Gloda-style and-constraints + * into a single MATCH query, but it will turn out okay, just less + * efficiently than it could. + */ + NOUN_FULLTEXT: 20, + /** + * Represents a MIME Type. We currently lack any human-intelligible + * descriptions of mime types. + */ + NOUN_MIME_TYPE: 40, + /** + * Captures a message tag as well as when the tag's presence was observed, + * hoping to approximate when the tag was applied. It's a somewhat dubious + * attempt to not waste our opporunity to store a value along with the tag. + * (The tag is actually stored as an attribute parameter on the attribute + * definition, rather than a value in the attribute 'instance' for the + * message.) + */ + NOUN_TAG: 50, + /** + * Doesn't actually work owing to a lack of an object to represent a folder. + * We do expose the folderURI and folderID of a message, but need to map that + * to a good abstraction. Probably something thin around a SteelFolder or + * the like; we would contribute the functionality to easily move from a + * folder to the list of gloda messages in that folder, as well as the + * indexing preferences for that folder. + * @TODO folder noun and related abstraction + */ + NOUN_FOLDER: GlodaFolder.prototype.NOUN_ID, // 100 + /** + * All messages belong to a conversation. See datamodel.js for the + * definition of the GlodaConversation class. + */ + NOUN_CONVERSATION: GlodaConversation.prototype.NOUN_ID, // 101 + /** + * A one-to-one correspondence with underlying (indexed) nsIMsgDBHdr + * instances. See datamodel.js for the definition of the GlodaMessage class. + */ + NOUN_MESSAGE: GlodaMessage.prototype.NOUN_ID, // 102 + /** + * Corresponds to a human being, who may have multiple electronic identities + * (a la NOUN_IDENTITY). There is no requirement for association with an + * address book contact, although when the address book contact exists, + * we want to be associated with it. See datamodel.js for the definition + * of the GlodaContact class. + */ + NOUN_CONTACT: GlodaContact.prototype.NOUN_ID, // 103 + /** + * A single identity of a contact, who may have one or more. E-mail accounts, + * instant messaging accounts, social network site accounts, etc. are each + * identities. See datamodel.js for the definition of the GlodaIdentity + * class. + */ + NOUN_IDENTITY: GlodaIdentity.prototype.NOUN_ID, // 104 + /** + * An attachment to a message. A message may have many different attachments. + */ + NOUN_ATTACHMENT: GlodaAttachment.prototype.NOUN_ID, // 105 + /** + * An account related to a message. A message can have only one account. + */ + NOUN_ACCOUNT: GlodaAccount.prototype.NOUN_ID, // 106 + + /** + * Parameterized identities, for use in the from-me, to-me, cc-me optimization + * cases. Not for reuse without some thought. These nouns use the parameter + * to store the 'me' identity that we are talking about, and the value to + * store the identity of the other party. So in both the from-me and to-me + * cases involving 'me' and 'foo@bar', the 'me' identity is always stored via + * the attribute parameter, and the 'foo@bar' identity is always stored as + * the attribute value. See fundattr.js for more information on this, but + * you probably shouldn't be touching this unless you are fundattr. + */ + NOUN_PARAM_IDENTITY: 200, + + /** Next Noun ID to hand out, these don't need to be persisted (for now). */ + _nextNounID: 1000, + + /** + * Maps noun names to noun IDs. + */ + _nounNameToNounID: {}, + /** + * Maps noun IDs to noun definition dictionaries. (Noun definition + * dictionaries provided to us at the time a noun was defined, plus some + * additional stuff we put in there.) + */ + _nounIDToDef: {}, + + _managedToJSON: function gloda_ns_managedToJSON(aItem) { + return aItem.id; + }, + + /** + * Define a noun. Takes a dictionary with the following keys/values: + * + * @param aNounDef.name The name of the noun. This is not a display name + * (anything being displayed needs to be localized, after all), but simply + * the canonical name for debugging purposes and for people to pass to + * lookupNoun. The suggested convention is lower-case-dash-delimited, + * with names being singular (since it's a single noun we are referring + * to.) + * @param aNounDef.class The 'class' to which an instance of the noun will + * belong (aka will pass an instanceof test). You may also provide this + * as 'clazz' if the keyword makes your IDE angry. + * @param aNounDef.allowsArbitraryAttrs Is this a 'first class noun'/can it be + * a subject, AKA can this noun have attributes stored on it that relate + * it to other things? For example, a message is first-class; we store + * attributes of messages. A date is not first-class now, nor is it + * likely to be; we will not store attributes about a date, although dates + * will be the objects of other subjects. (For example: we might + * associate a date with a calendar event, but the date is an attribute of + * the calendar event and not vice versa.) + * @param aNounDef.usesParameter A boolean indicating whether this noun + * requires use of the 'parameter' BLOB storage field on the attribute + * bindings in the database to persist itself. Use of parameters should + * be limited to a reasonable number of values (16-32 is okay, more than + * that is pushing it and 256 should be considered an absolute upper + * bound) because of the database organization. When false, your + * toParamAndValue function is expected to return null for the parameter + * and likewise your fromParamAndValue should expect ignore and generally + * ignore the argument. + * @param aNounDef.toParamAndValue A function that takes an instantiated noun + * instance and returns a 2-element list of [parameter, value] where + * parameter may only be non-null if you passed a usesParameter of true. + * Parameter may be of any type (BLOB), and value must be numeric (pass + * 0 if you don't need the value). + * + * @param aNounDef.isPrimitive True when the noun instance is a raw numeric + * value/string/boolean. False when the instance is an object. When + * false, it is assumed the attribute that serves as a unique identifier + * for the value is "id" unless 'idAttr' is provided. + * @param [aNounDef.idAttr="id"] For non-primitive nouns, this is the + * attribute on the object that uniquely identifies it. + * + * @param aNounDef.schema Unsupported mechanism by which you can define a + * table that corresponds to this noun. The table will be created if it + * does not exist. + * - name The table name; don't conflict with other things! + * - columns A list of [column name, sqlite type] tuples. You should + * always include a definition like ["id", "INTEGER PRIMARY KEY"] for + * now (and it should be the first column name too.) If you care about + * how the attributes are poked into your object (for example, you want + * underscores used for some of them because the attributes should be + * immutable), then you can include a third string that is the name of + * the attribute to use. + * - indices A dictionary of lists of column names, where the key name + * becomes the index name. Ex: {foo: ["bar"]} results in an index on + * the column "bar" where the index is named "foo". + */ + defineNoun: function gloda_ns_defineNoun(aNounDef, aNounID) { + this._log.info("Defining noun: " + aNounDef.name); + if (aNounID === undefined) + aNounID = this._nextNounID++; + aNounDef.id = aNounID; + + // Let people whose editors get angry about illegal attribute names use + // clazz instead of class. + if (aNounDef.clazz) + aNounDef.class = aNounDef.clazz; + + if (!("idAttr" in aNounDef)) + aNounDef.idAttr = "id"; + if (!("comparator" in aNounDef)) { + aNounDef.comparator = function() { + throw new Error("Noun type '" + aNounDef.name + + "' lacks a real comparator."); + }; + } + + // We allow nouns to have data tables associated with them where we do all + // the legwork. The schema attribute is the gateway to this magical world + // of functionality. Said door is officially unsupported. + if (aNounDef.schema) { + if (!aNounDef.tableName) { + if (aNounDef.schema.name) + aNounDef.tableName = "ext_" + aNounDef.schema.name; + else + aNounDef.tableName = "ext_" + aNounDef.name; + } + // this creates the data table and binder and hooks everything up + GlodaDatastore.createNounTable(aNounDef); + + if (!aNounDef.toParamAndValue) + aNounDef.toParamAndValue = function (aThing) { + if (aThing instanceof aNounDef.class) + return [null, aThing.id]; + else // assume they're just passing the id directly + return [null, aThing]; + }; + } + + // if it has a table, you can query on it. seems straight-forward. + if (aNounDef.tableName) { + [aNounDef.queryClass, aNounDef.nullQueryClass, + aNounDef.explicitQueryClass, aNounDef.wildcardQueryClass] = + GlodaQueryClassFactory(aNounDef); + aNounDef._dbMeta = {}; + aNounDef.class.prototype.NOUN_ID = aNounDef.id; + aNounDef.class.prototype.NOUN_DEF = aNounDef; + aNounDef.toJSON = this._managedToJSON; + + aNounDef.specialLoadAttribs = []; + + // - define the 'id' constrainer + let idConstrainer = function() { + let constraint = [GlodaDatastore.kConstraintIdIn, null]; + for (let iArg = 0; iArg < arguments.length; iArg++) { + constraint.push(arguments[iArg]); + } + this._constraints.push(constraint); + return this; + }; + aNounDef.queryClass.prototype.id = idConstrainer; + } + if (aNounDef.cache) { + let cacheCost = aNounDef.cacheCost || 1024; + let cacheBudget = aNounDef.cacheBudget || 128 * 1024; + let cacheSize = Math.floor(cacheBudget / cacheCost); + if (cacheSize) + GlodaCollectionManager.defineCache(aNounDef, cacheSize); + } + aNounDef.attribsByBoundName = {}; + aNounDef.domExposeAttribsByBoundName = {}; + + aNounDef.objectNounOfAttributes = []; + + this._nounNameToNounID[aNounDef.name] = aNounID; + this._nounIDToDef[aNounID] = aNounDef; + aNounDef.actions = []; + + this._attrProviderOrderByNoun[aNounDef.id] = []; + this._attrOptimizerOrderByNoun[aNounDef.id] = []; + this._attrProvidersByNoun[aNounDef.id] = {}; + + return aNounDef; + }, + + /** + * Lookup a noun (ID) suitable for passing to defineAttribute's various + * noun arguments. Throws an exception if the noun with the given name + * cannot be found; the assumption is that you can't live without the noun. + */ + lookupNoun: function gloda_ns_lookupNoun(aNounName) { + if (aNounName in this._nounNameToNounID) + return this._nounNameToNounID[aNounName]; + + throw Error("Unable to locate noun with name '" + aNounName + "', but I " + + "do know about: " + + Object.keys(this._nounNameToNounID).join(", ")); + }, + + /** + * Lookup a noun def given a name. + */ + lookupNounDef: function gloda_ns_lookupNoun(aNounName) { + return this._nounIDToDef[this.lookupNoun(aNounName)]; + }, + + + /** + * Define an action on a noun. During the prototype stage, this was conceived + * of as a way to expose all the constraints possible given a noun. For + * example, if you have an identity or a contact, you could use this to + * see all the messages sent from/to a given contact. It was likewise + * thought potentially usable for future expansion. For example, you could + * also decide to send an e-mail to a contact when you have the contact + * instance available. + * Outside of the 'expmess' checkbox-happy prototype, this functionality is + * not used. As such, this functionality should be considered in flux and + * subject to changes. Also, very open to specific suggestsions motivated + * by use cases. + * One conceptual issue raised by this mechanism is the interaction of actions + * with facts like "this message is read". We currently implement the 'fact' + * by defining an attribute with a 'boolean' noun type. To deal with this, + * in various places we pass-in the attribute as well as the noun value. + * Since the relationships for booleans and integers in these cases is + * standard and well-defined, this works out pretty well, but suggests we + * need to think things through. + * + * @param aNounID The ID of the noun you want to define an action on. + * @param aActionMeta The dictionary describing the noun. The dictionary + * should have the following fields: + * - actionType: a string indicating the type of action. Currently, only + * "filter" is a legal value. + * - actionTarget: the noun ID of the noun type on which this action is + * applicable. For example, + * + * The following should be present for actionType=="filter"; + * - shortName: The name that should be used to display this constraint. For + * example, a checkbox-heavy UI might display a checkbox for each constraint + * using shortName as the label. + * - makeConstraint: A function that takes the attribute that is the source + * of the noun and the noun instance as arguments, and returns APV-style + * constraints. Since the APV-style query mechanism is now deprecated, + * this signature is deprecated. Probably the way to update this would be + * to pass in the query instance that constraints should be contributed to. + */ + defineNounAction: function gloda_ns_defineNounAction(aNounID, aActionMeta) { + let nounDef = this._nounIDToDef[aNounID]; + nounDef.actions.push(aActionMeta); + }, + + /** + * Retrieve all of the actions (as defined using defineNounAction) for the + * given noun type (via noun ID) with the given action type (ex: filter). + */ + getNounActions: function gloda_ns_getNounActions(aNounID, aActionType) { + let nounDef = this._nounIDToDef[aNounID]; + if (!nounDef) + return []; + return nounDef.actions. + filter(action => !aActionType || (action.actionType == aActionType)); + }, + + /** Attribute providers in the sequence to process them. */ + _attrProviderOrderByNoun: {}, + /** Attribute providers that provide optimizers, in the sequence to proc. */ + _attrOptimizerOrderByNoun: {}, + /** Maps attribute providers to the list of attributes they provide */ + _attrProviders: {}, + /** + * Maps nouns to their attribute providers to a list of the attributes they + * provide for the noun. + */ + _attrProvidersByNoun: {}, + + /** + * Define the core nouns (that are not defined elsewhere) and a few noun + * actions. Core nouns could be defined in other files, assuming dependency + * issues are resolved via the everybody.js mechanism or something else. + * Right now, noun_tag defines the tag noun. If we broke more of these out, + * we would probably want to move the 'class' code from datamodel.js, the + * SQL table def and helper code from datastore.js (and this code) to their + * own noun_*.js files. There are some trade-offs to be made, and I think + * we can deal with those once we start to integrate lightning/calendar and + * our noun space gets large and more heterogeneous. + */ + _initAttributes: function gloda_ns_initAttributes() { + this.defineNoun({ + name: "bool", + clazz: Boolean, allowsArbitraryAttrs: false, + isPrimitive: true, + // favor true before false + comparator: function gloda_bool_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return b - a; + }, + toParamAndValue: function(aBool) { + return [null, aBool ? 1 : 0]; + }}, this.NOUN_BOOLEAN); + this.defineNoun({ + name: "number", + clazz: Number, allowsArbitraryAttrs: false, continuous: true, + isPrimitive: true, + comparator: function gloda_number_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a - b; + }, + toParamAndValue: function(aNum) { + return [null, aNum]; + }}, this.NOUN_NUMBER); + this.defineNoun({ + name: "string", + clazz: String, allowsArbitraryAttrs: false, + isPrimitive: true, + comparator: function gloda_string_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.localeCompare(b); + }, + toParamAndValue: function(aString) { + return [null, aString]; + }}, this.NOUN_STRING); + this.defineNoun({ + name: "date", + clazz: Date, allowsArbitraryAttrs: false, continuous: true, + isPrimitive: true, + comparator: function gloda_data_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a - b; + }, + toParamAndValue: function(aDate) { + return [null, aDate.valueOf() * 1000]; + }}, this.NOUN_DATE); + this.defineNoun({ + name: "fulltext", + clazz: String, allowsArbitraryAttrs: false, continuous: false, + isPrimitive: true, + comparator: function gloda_fulltext_comparator(a, b) { + throw new Error("Fulltext nouns are not comparable!"); + }, + // as noted on NOUN_FULLTEXT, we just pass the string around. it never + // hits the database, so it's okay. + toParamAndValue: function(aString) { + return [null, aString]; + }}, this.NOUN_FULLTEXT); + + this.defineNoun({ + name: "folder", + clazz: GlodaFolder, + allowsArbitraryAttrs: false, + isPrimitive: false, + queryHelpers: { + /** + * Query for accounts based on the account associated with folders. We + * walk all of the folders associated with an account and put them in + * the list of folders that match if gloda would index them. This is + * unsuitable for producing a persistable constraint since it does not + * adapt for added/deleted folders. However, it is sufficient for + * faceting. Also, we don't persist constraints yet. + * + * @TODO The long-term solution is to move towards using arithmetic + * encoding on folder-id's like we use for MIME types and friends. + */ + Account: function(aAttrDef, aArguments) { + let folderValues = []; + let seenRootFolders = {}; + for (let iArg = 0; iArg < aArguments.length; iArg++) { + let givenFolder = aArguments[iArg]; + let givenMsgFolder = givenFolder.getXPCOMFolder( + givenFolder.kActivityFolderOnlyNoData); + let rootFolder = givenMsgFolder.rootFolder; + + // skip processing this folder if we have already processed its + // root folder. + if (rootFolder.URI in seenRootFolders) + continue; + seenRootFolders[rootFolder.URI] = true; + + let allFolders = rootFolder.descendants; + for (let folder in fixIterator(allFolders, Ci.nsIMsgFolder)) { + let folderFlags = folder.flags; + + // Ignore virtual folders, non-mail folders. + // XXX this is derived from GlodaIndexer's shouldIndexFolder. + // This should probably just use centralized code or the like. + if (!(folderFlags & Ci.nsMsgFolderFlags.Mail) || + (folderFlags & Ci.nsMsgFolderFlags.Virtual)) + continue; + // we only index local or IMAP folders + if (!(folder instanceof Ci.nsIMsgLocalMailFolder) && + !(folder instanceof Ci.nsIMsgImapMailFolder)) + continue; + + let glodaFolder = Gloda.getFolderForFolder(folder); + folderValues.push(glodaFolder); + } + } + return this._inConstraintHelper(aAttrDef, folderValues); + } + }, + comparator: function gloda_folder_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.name.localeCompare(b.name); + }, + toParamAndValue: function(aFolderOrGlodaFolder) { + if (aFolderOrGlodaFolder instanceof GlodaFolder) + return [null, aFolderOrGlodaFolder.id]; + else + return [null, GlodaDatastore._mapFolder(aFolderOrGlodaFolder).id]; + }}, this.NOUN_FOLDER); + this.defineNoun({ + name: "account", + clazz: GlodaAccount, + allowsArbitraryAttrs: false, + isPrimitive: false, + equals: function(a, b) { + if (a && !b || !a && b) + return false; + if (!a && !b) + return true; + return a.id == b.id; + }, + comparator: function gloda_account_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.name.localeCompare(b.name); + }}, this.NOUN_ACCOUNT); + this.defineNoun({ + name: "conversation", + clazz: GlodaConversation, + allowsArbitraryAttrs: false, + isPrimitive: false, + cache: true, cacheCost: 512, + tableName: "conversations", + attrTableName: "messageAttributes", attrIDColumnName: "conversationID", + datastore: GlodaDatastore, + objFromRow: GlodaDatastore._conversationFromRow, + comparator: function gloda_conversation_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.subject.localeCompare(b.subject); + }, + toParamAndValue: function(aConversation) { + if (aConversation instanceof GlodaConversation) + return [null, aConversation.id]; + else // assume they're just passing the id directly + return [null, aConversation]; + }}, this.NOUN_CONVERSATION); + this.defineNoun({ + name: "message", + clazz: GlodaMessage, + allowsArbitraryAttrs: true, + isPrimitive: false, + cache: true, cacheCost: 2048, + tableName: "messages", + // we will always have a fulltext row, even for messages where we don't + // have the body available. this is because we want the subject indexed. + dbQueryJoinMagic: + " INNER JOIN messagesText ON messages.id = messagesText.rowid", + attrTableName: "messageAttributes", attrIDColumnName: "messageID", + datastore: GlodaDatastore, objFromRow: GlodaDatastore._messageFromRow, + dbAttribAdjuster: GlodaDatastore.adjustMessageAttributes, + dbQueryValidityConstraintSuffix: + " AND +deleted = 0 AND +folderID IS NOT NULL AND +messageKey IS NOT NULL", + // This is what's used when we have no validity constraints, i.e. we allow + // for ghost messages, which do not have a row in the messagesText table. + dbQueryJoinMagicWithNoValidityConstraints: + " LEFT JOIN messagesText ON messages.id = messagesText.rowid", + objInsert: GlodaDatastore.insertMessage, + objUpdate: GlodaDatastore.updateMessage, + toParamAndValue: function(aMessage) { + if (aMessage instanceof GlodaMessage) + return [null, aMessage.id]; + else // assume they're just passing the id directly + return [null, aMessage]; + }}, this.NOUN_MESSAGE); + this.defineNoun({ + name: "contact", + clazz: GlodaContact, + allowsArbitraryAttrs: true, + isPrimitive: false, + cache: true, cacheCost: 128, + tableName: "contacts", + attrTableName: "contactAttributes", attrIDColumnName: "contactID", + datastore: GlodaDatastore, objFromRow: GlodaDatastore._contactFromRow, + dbAttribAdjuster: GlodaDatastore.adjustAttributes, + objInsert: GlodaDatastore.insertContact, + objUpdate: GlodaDatastore.updateContact, + comparator: function gloda_contact_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.name.localeCompare(b.name); + }, + toParamAndValue: function(aContact) { + if (aContact instanceof GlodaContact) + return [null, aContact.id]; + else // assume they're just passing the id directly + return [null, aContact]; + }}, this.NOUN_CONTACT); + this.defineNoun({ + name: "identity", + clazz: GlodaIdentity, + allowsArbitraryAttrs: false, + isPrimitive: false, + cache: true, cacheCost: 128, + usesUniqueValue: true, + tableName: "identities", + datastore: GlodaDatastore, objFromRow: GlodaDatastore._identityFromRow, + /** + * Short string is the contact name, long string includes the identity + * value too, delimited by a colon. Not tremendously localizable. + */ + userVisibleString: function(aIdentity, aLong) { + if (!aLong) + return aIdentity.contact.name; + if (aIdentity.contact.name == aIdentity.value) + return aIdentity.value; + return aIdentity.contact.name + " (" + aIdentity.value + ")"; + }, + comparator: function gloda_identity_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.contact.name.localeCompare(b.contact.name); + }, + toParamAndValue: function(aIdentity) { + if (aIdentity instanceof GlodaIdentity) + return [null, aIdentity.id]; + else // assume they're just passing the id directly + return [null, aIdentity]; + }}, this.NOUN_IDENTITY); + this.defineNoun({ + name: "attachment-infos", + clazz: GlodaAttachment, + allowsArbitraryAttrs: false, + isPrimitive: false, + toJSON: function (x) { + return [ + x._name, + x._contentType, + x._size, + x._part, + x._externalUrl, + x._isExternal + ] + }, + fromJSON: function (x, aGlodaMessage) { + let [name, contentType, size, _part, _externalUrl, isExternal] = x; + return new GlodaAttachment(aGlodaMessage, name, contentType, size, _part, _externalUrl, isExternal); + }, + }, this.NOUN_ATTACHMENT); + + // parameterized identity is just two identities; we store the first one + // (whose value set must be very constrainted, like the 'me' identities) + // as the parameter, the second (which does not need to be constrained) + // as the value. + this.defineNoun({ + name: "parameterized-identity", + clazz: null, + allowsArbitraryAttrs: false, + comparator: function gloda_fulltext_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + // First sort by the first identity in the tuple + // Since our general use-case is for the first guy to be "me", we only + // compare the identity value, not the name. + let fic = a[0].value.localeCompare(b[0].value); + if (fic) + return fic; + // Next compare the second identity in the tuple, but use the contact + // this time to be consistent with our identity comparator. + return a[1].contact.name.localeCompare(b[1].contact.name); + }, + computeDelta: function(aCurValues, aOldValues) { + let oldMap = {}; + for (let tupe of aOldValues) { + let [originIdentity, targetIdentity] = tupe; + let targets = oldMap[originIdentity]; + if (targets === undefined) + targets = oldMap[originIdentity] = {}; + targets[targetIdentity] = true; + } + + let added = [], removed = []; + for (let tupe of aCurValues) { + let [originIdentity, targetIdentity] = tupe; + let targets = oldMap[originIdentity]; + if ((targets === undefined) || !(targetIdentity in targets)) + added.push(tupe); + else + delete targets[targetIdentity]; + } + + for (let originIdentity in oldMap) { + let targets = oldMap[originIdentity]; + for (let targetIdentity in targets) { + removed.push([originIdentity, targetIdentity]); + } + } + + return [added, removed]; + }, + contributeObjDependencies: function(aJsonValues, aReferencesByNounID, + aInverseReferencesByNounID) { + // nothing to do with a zero-length list + if (aJsonValues.length == 0) + return false; + + let nounIdentityDef = Gloda._nounIDToDef[Gloda.NOUN_IDENTITY]; + let references = aReferencesByNounID[nounIdentityDef.id]; + if (references === undefined) + references = aReferencesByNounID[nounIdentityDef.id] = {}; + + for (let tupe of aJsonValues) { + let [originIdentityID, targetIdentityID] = tupe; + if (!(originIdentityID in references)) + references[originIdentityID] = null; + if (!(targetIdentityID in references)) + references[targetIdentityID] = null; + } + + return true; + }, + resolveObjDependencies: function(aJsonValues, aReferencesByNounID, + aInverseReferencesByNounID) { + let references = + aReferencesByNounID[Gloda.NOUN_IDENTITY]; + + let results = []; + for (let tupe of aJsonValues) { + let [originIdentityID, targetIdentityID] = tupe; + results.push([references[originIdentityID], + references[targetIdentityID]]); + } + + return results; + }, + toJSON: function (aIdentityTuple) { + return [aIdentityTuple[0].id, aIdentityTuple[1].id]; + }, + toParamAndValue: function(aIdentityTuple) { + return [aIdentityTuple[0].id, aIdentityTuple[1].id]; + }}, this.NOUN_PARAM_IDENTITY); + + GlodaDatastore.getAllAttributes(); + }, + + /** + * Create accessor functions to 'bind' an attribute to underlying normalized + * attribute storage, as well as creating the appropriate query object + * constraint helper functions. This name is somewhat of a misnomer because + * special attributes are not 'bound' (because specific/non-generic per-class + * code provides the properties) but still depend on this method to + * establish their constraint helper methods. + * + * @XXX potentially rename to not suggest binding is required. + */ + _bindAttribute: function gloda_ns_bindAttr(aAttrDef, aSubjectNounDef) { + let objectNounDef = aAttrDef.objectNounDef; + + // -- the query constraint helpers + if (aSubjectNounDef.queryClass !== undefined) { + let constrainer; + let canQuery = true; + if (aAttrDef.special == this.kSpecialFulltext) { + constrainer = function() { + let constraint = [GlodaDatastore.kConstraintFulltext, aAttrDef]; + for (let iArg = 0; iArg < arguments.length; iArg++) { + constraint.push(arguments[iArg]); + } + this._constraints.push(constraint); + return this; + }; + } + else if (aAttrDef.canQuery || aAttrDef.attributeName.startsWith("_")) { + constrainer = function() { + let constraint = [GlodaDatastore.kConstraintIn, aAttrDef]; + for (let iArg = 0; iArg < arguments.length; iArg++) { + constraint.push(arguments[iArg]); + } + this._constraints.push(constraint); + return this; + }; + } else { + constrainer = function() { + throw new Error( + "Cannot query on attribute "+aAttrDef.attributeName + + " because its canQuery parameter hasn't been set to true." + + " Reading the comments about Gloda.defineAttribute may be a" + + " sensible thing to do now."); + } + canQuery = false; + } + + aSubjectNounDef.queryClass.prototype[aAttrDef.boundName] = constrainer; + + // Don't bind extra query-able attributes if we're unable to perform a + // search on the attribute. + if (!canQuery) + return; + + // - ranged value helper: fooRange + if (objectNounDef.continuous) { + // takes one or more tuples of [lower bound, upper bound] + let rangedConstrainer = function() { + let constraint = [GlodaDatastore.kConstraintRanges, aAttrDef]; + for (let iArg = 0; iArg < arguments.length; iArg++ ) { + constraint.push(arguments[iArg]); + } + this._constraints.push(constraint); + return this; + }; + + aSubjectNounDef.queryClass.prototype[aAttrDef.boundName + "Range"] = + rangedConstrainer; + } + + // - string LIKE helper for special on-row attributes: fooLike + // (it is impossible to store a string as an indexed attribute, which is + // why we do this for on-row only.) + if (aAttrDef.special == this.kSpecialString) { + let likeConstrainer = function() { + let constraint = [GlodaDatastore.kConstraintStringLike, aAttrDef]; + for (let iArg = 0; iArg < arguments.length; iArg++) { + constraint.push(arguments[iArg]); + } + this._constraints.push(constraint); + return this; + }; + + aSubjectNounDef.queryClass.prototype[aAttrDef.boundName + "Like"] = + likeConstrainer; + } + + // - Custom helpers provided by the noun type... + if ("queryHelpers" in objectNounDef) { + for (let name in objectNounDef.queryHelpers) { + let helper = objectNounDef.queryHelpers[name]; + // we need a new closure... + let helperFunc = helper; + aSubjectNounDef.queryClass.prototype[aAttrDef.boundName + name] = + function() { + return helperFunc.call(this, aAttrDef, arguments); + }; + } + } + } + }, + + /** + * Names of attribute-specific localized strings and the JS attribute they are + * exposed as in the attribute's "strings" attribute (if the provider has a + * string bundle exposed on its "strings" attribute). They are rooted at + * "gloda.SUBJECT-NOUN-NAME.attr.ATTR-NAME.*". + * + * Please consult the localization notes in gloda.properties to understand + * what these are used for. + */ + _ATTR_LOCALIZED_STRINGS: { + /* - Faceting */ + facetNameLabel: "facetNameLabel", + includeLabel: "includeLabel", + excludeLabel: "excludeLabel", + remainderLabel: "remainderLabel", + mustMatchLabel: "mustMatchLabel", + cantMatchLabel: "cantMatchLabel", + mayMatchLabel: "mayMatchLabel", + mustMatchNoneLabel: "mustMatchNoneLabel", + mustMatchSomeLabel: "mustMatchSomeLabel", + mayMatchAnyLabel: "mayMatchAnyLabel", + }, + /** + * Define an attribute and all its meta-data. Takes a single dictionary as + * its argument, with the following required properties: + * + * @param aAttrDef.provider The object instance providing a 'process' method. + * @param aAttrDef.extensionName The name of the extension providing these + * attributes. + * @param aAttrDef.attributeType The type of attribute, one of the values from + * the kAttr* enumeration. + * @param aAttrDef.attributeName The name of the attribute, which also doubles + * as the bound property name if you pass 'bind' a value of true. You are + * responsible for avoiding collisions, which presumably will mean + * checking/updating a wiki page in the future, or just prefixing your + * attribute name with your extension name or something like that. + * @param aAttrDef.bind Should this attribute be 'bound' as a convenience + * attribute on the subject's object (true/false)? For example, with an + * attributeName of "foo" and passing true for 'bind' with a subject noun + * of NOUN_MESSAGE, GlodaMessage instances will expose a "foo" getter that + * returns the value of the attribute. If 'singular' is true, this means + * an instance of the object class corresponding to the noun type or null + * if the attribute does not exist. If 'singular' is false, this means a + * list of instances of the object class corresponding to the noun type, + * where the list may be empty if no instances of the attribute are + * present. + * @param aAttrDef.bindName Optional override of attributeName for purposes of + * the binding property's name. + * @param aAttrDef.singular Is the attribute going to happen at most once + * (true), or potentially multiple times (false). This affects whether + * the binding returns a list or just a single item (which is null when + * the attribute is not present). + * @param [aAttrDef.emptySetIsSignificant=false] Should we + * @param aAttrDef.subjectNouns A list of object types (NOUNs) that this + * attribute can be set on. Each element in the list should be one of the + * NOUN_* constants or a dynamically registered noun type. + * @param aAttrDef.objectNoun The object type (one of the NOUN_* constants or + * a dynamically registered noun types) that is the 'object' in the + * traditional RDF triple. More pragmatically, in the database row used + * to represent an attribute, we store the subject (ex: message ID), + * attribute ID, and an integer which is the integer representation of the + * 'object' whose type you are defining right here. + */ + defineAttribute: function gloda_ns_defineAttribute(aAttrDef) { + // ensure required properties exist on aAttrDef + if (!("provider" in aAttrDef) || + !("extensionName" in aAttrDef) || + !("attributeType" in aAttrDef) || + !("attributeName" in aAttrDef) || + !("singular" in aAttrDef) || + !("subjectNouns" in aAttrDef) || + !("objectNoun" in aAttrDef)) + // perhaps we should have a list of required attributes, perchance with + // and explanation of what it holds, and use that to be friendlier? + throw Error("You omitted a required attribute defining property, please" + + " consult the documentation as penance."); + + // -- Fill in defaults + if (!("emptySetIsSignificant" in aAttrDef)) + aAttrDef.emptySetIsSignificant = false; + + if (!("canQuery" in aAttrDef)) + aAttrDef.canQuery = aAttrDef.facet ? true : false; + + // return if the attribute has already been defined + if (aAttrDef.dbDef) + return aAttrDef; + + // - first time we've seen a provider init logic + if (!(aAttrDef.provider.providerName in this._attrProviders)) { + this._attrProviders[aAttrDef.provider.providerName] = []; + if (aAttrDef.provider.contentWhittle) + whittlerRegistry.registerWhittler(aAttrDef.provider); + } + + let compoundName = aAttrDef.extensionName + ":" + aAttrDef.attributeName; + // -- Database Definition + let attrDBDef; + if (compoundName in GlodaDatastore._attributeDBDefs) { + // the existence of the GlodaAttributeDBDef means that either it has + // already been fully defined, or has been loaded from the database but + // not yet 'bound' to a provider (and had important meta-info that + // doesn't go in the db copied over) + attrDBDef = GlodaDatastore._attributeDBDefs[compoundName]; + } + // we need to create the attribute definition in the database + else { + let attrID = null; + attrID = GlodaDatastore._createAttributeDef(aAttrDef.attributeType, + aAttrDef.extensionName, + aAttrDef.attributeName, + null); + + attrDBDef = new GlodaAttributeDBDef(GlodaDatastore, attrID, compoundName, + aAttrDef.attributeType, aAttrDef.extensionName, aAttrDef.attributeName); + GlodaDatastore._attributeDBDefs[compoundName] = attrDBDef; + GlodaDatastore._attributeIDToDBDefAndParam[attrID] = [attrDBDef, null]; + } + + aAttrDef.dbDef = attrDBDef; + attrDBDef.attrDef = aAttrDef; + + aAttrDef.id = aAttrDef.dbDef.id; + + if ("bindName" in aAttrDef) + aAttrDef.boundName = aAttrDef.bindName; + else + aAttrDef.boundName = aAttrDef.attributeName; + + aAttrDef.objectNounDef = this._nounIDToDef[aAttrDef.objectNoun]; + aAttrDef.objectNounDef.objectNounOfAttributes.push(aAttrDef); + + // -- Facets + function normalizeFacetDef(aFacetDef) { + if (!("groupIdAttr" in aFacetDef)) + aFacetDef.groupIdAttr = aAttrDef.objectNounDef.idAttr; + if (!("groupComparator" in aFacetDef)) + aFacetDef.groupComparator = aAttrDef.objectNounDef.comparator; + if (!("filter" in aFacetDef)) + aFacetDef.filter = null; + } + // No facet attribute means no facet desired; set an explicit null so that + // code can check without doing an "in" check. + if (!("facet" in aAttrDef)) + aAttrDef.facet = null; + // Promote "true" facet values to the defaults. Where attributes have + // specified values, make sure we fill in any missing defaults. + else { + if (aAttrDef.facet == true) { + aAttrDef.facet = { + type: "default", + groupIdAttr: aAttrDef.objectNounDef.idAttr, + groupComparator: aAttrDef.objectNounDef.comparator, + filter: null, + }; + } + else { + normalizeFacetDef(aAttrDef.facet); + } + } + if ("extraFacets" in aAttrDef) { + for (let facetDef of aAttrDef.extraFacets) { + normalizeFacetDef(facetDef); + } + } + + function gatherLocalizedStrings(aBundle, aPropRoot, aStickIn) { + for (let propName in Gloda._ATTR_LOCALIZED_STRINGS) { + let attrName = Gloda._ATTR_LOCALIZED_STRINGS[propName]; + try { + aStickIn[attrName] = aBundle.get(aPropRoot + propName); + } + catch (ex) { + // do nothing. nsIStringBundle throws exceptions because it is a + // standard nsresult type of API and our helper buddy does nothing + // to help us. (StringBundle.js, that is.) + } + } + } + + // -- L10n. + // If the provider has a string bundle, populate a "strings" attribute with + // our standard attribute strings that can be UI exposed. + if (("strings" in aAttrDef.provider) && (aAttrDef.facet)) { + let bundle = aAttrDef.provider.strings; + + // -- attribute strings + let attrStrings = aAttrDef.facet.strings = {}; + // we use the first subject the attribute applies to as the basis of + // where to get the string from. Mainly because we currently don't have + // any attributes with multiple subjects nor a use-case where we expose + // multiple noun types via the UI. (Just messages right now.) + let canonicalSubject = this._nounIDToDef[aAttrDef.subjectNouns[0]]; + let propRoot = "gloda." + canonicalSubject.name + ".attr." + + aAttrDef.attributeName + "."; + gatherLocalizedStrings(bundle, propRoot, attrStrings); + + // -- alias strings for synthetic facets + if ("extraFacets" in aAttrDef) { + for (let facetDef of aAttrDef.extraFacets) { + facetDef.strings = {}; + let aliasPropRoot = "gloda." + canonicalSubject.name + ".attr." + + facetDef.alias + "."; + gatherLocalizedStrings(bundle, aliasPropRoot, facetDef.strings); + } + } + } + + // -- Subject Noun Binding + for (let iSubject = 0; iSubject < aAttrDef.subjectNouns.length; + iSubject++) { + let subjectType = aAttrDef.subjectNouns[iSubject]; + let subjectNounDef = this._nounIDToDef[subjectType]; + this._bindAttribute(aAttrDef, subjectNounDef); + + // update the provider maps... + if (this._attrProviderOrderByNoun[subjectType] + .indexOf(aAttrDef.provider) == -1) { + this._attrProviderOrderByNoun[subjectType].push(aAttrDef.provider); + if (aAttrDef.provider.optimize) + this._attrOptimizerOrderByNoun[subjectType].push(aAttrDef.provider); + this._attrProvidersByNoun[subjectType][aAttrDef.provider] = []; + } + this._attrProvidersByNoun[subjectType][aAttrDef.provider].push(aAttrDef); + + subjectNounDef.attribsByBoundName[aAttrDef.boundName] = aAttrDef; + if (aAttrDef.domExpose) + subjectNounDef.domExposeAttribsByBoundName[aAttrDef.boundName] = + aAttrDef; + + if (aAttrDef.special & this.kSpecialColumn) + subjectNounDef.specialLoadAttribs.push(aAttrDef); + + // if this is a parent column attribute, make note of it so that if we + // need to do an inverse references lookup, we know what column we are + // issuing against. + if (aAttrDef.special === this.kSpecialColumnParent) { + subjectNounDef.parentColumnAttr = aAttrDef; + } + + if (aAttrDef.objectNounDef.tableName || + aAttrDef.objectNounDef.contributeObjDependencies) { + subjectNounDef.hasObjDependencies = true; + } + } + + this._attrProviders[aAttrDef.provider.providerName].push(aAttrDef); + return aAttrDef; + }, + + /** + * Retrieve the attribute provided by the given extension with the given + * attribute name. The original idea was that plugins would effectively + * name-space attributes, helping avoid collisions. Since we are leaning + * towards using binding heavily, this doesn't really help, as the collisions + * will just occur on the attribute name instead. Also, this can turn + * extensions into liars as name changes/moves to core/etc. happen. + * @TODO consider removing the extension name argument parameter requirement + */ + getAttrDef: function gloda_ns_getAttrDef(aPluginName, aAttrName) { + let compoundName = aPluginName + ":" + aAttrName; + return GlodaDatastore._attributeDBDefs[compoundName]; + }, + + /** + * Create a new query instance for the given noun-type. This provides + * a generic way to provide constraint-based queries of any first-class + * nouns supported by the system. + * + * The idea is that every attribute on an object can be used to express + * a constraint on the query object. Constraints implicitly 'AND' together, + * but providing multiple arguments to a constraint function results in an + * 'OR'ing of those values. Additionally, you can call or() on the returned + * query to create an alternate query that is effectively a giant OR against + * all the constraints you create on the main query object (or any other + * alternate queries returned by or()). (Note: there is no nesting of these + * alternate queries. query.or().or() is equivalent to query.or()) + * For each attribute, there is a constraint with the same name that takes + * one or more arguments. The arguments represent a set of OR values that + * objects matching the query can have. (If you want the constraint + * effectively ANDed together, just invoke the constraint function + * multiple times.) For example, newQuery(NOUN_PERSON).age(25) would + * constraint to all the people aged 25, while age(25, 26) would constrain + * to all the people age 25 or 26. + * For each attribute with a 'continuous' noun, there is a constraint with the + * attribute name with "Range" appended. It takes two arguments which are an + * inclusive lower bound and an inclusive lower bound for values in the + * range. If you would like an open-ended range on either side, pass null + * for that argument. If you would like to specify multiple ranges that + * should be ORed together, simply pass additional (pairs of) arguments. + * For example, newQuery(NOUN_PERSON).age(25,100) would constraint to all + * the people who are >= 25 and <= 100. Likewise age(25, null) would just + * return all the people who are 25 or older. And age(25,30,35,40) would + * return people who are either 25-30 or 35-30. + * There are also full-text constraint columns. In a nutshell, their + * arguments are the strings that should be passed to the SQLite FTS3 + * MATCH clause. + * + * @param aNounID The (integer) noun-id of the noun you want to query on. + * @param aOptions an optional dictionary of query options, see the GlodaQuery + * class documentation. + */ + newQuery: function gloda_ns_newQuery(aNounID, aOptions) { + let nounDef = this._nounIDToDef[aNounID]; + return new nounDef.queryClass(aOptions); + }, + + /** + * Create a collection/query for the given noun-type that only matches the + * provided items. This is to be used when you have an explicit set of items + * that you would still like to receive updates for. + */ + explicitCollection: function gloda_ns_explicitCollection(aNounID, aItems) { + let nounDef = this._nounIDToDef[aNounID]; + let collection = new GlodaCollection(nounDef, aItems, null, null); + let query = new nounDef.explicitQueryClass(collection); + collection.query = query; + GlodaCollectionManager.registerCollection(collection); + return collection; + }, + + /** + * Debugging 'wildcard' collection creation support. A wildcard collection + * will 'accept' any new item instances presented to the collection manager + * as new. The result is that it allows you to be notified as new items + * as they are indexed, existing items as they are loaded from the database, + * etc. + * Because the items are added to the collection without limit, this will + * result in a leak if you don't do something to clean up after the + * collection. (Forgetting about the collection will suffice, as it is still + * weakly held.) + */ + _wildcardCollection: function gloda_ns_wildcardCollection(aNounID, aItems) { + let nounDef = this._nounIDToDef[aNounID]; + let collection = new GlodaCollection(nounDef, aItems, null, null); + let query = new nounDef.wildcardQueryClass(collection); + collection.query = query; + GlodaCollectionManager.registerCollection(collection); + return collection; + }, + + /** + * Attribute providers attempting to index something that experience a fatal + * problem should throw one of these. For example: + * "throw new Gloda.BadItemContentsError('Message lacks an author.');". + * + * We're not really taking advantage of this yet, but it's a good idea. + */ + BadItemContentsError: BadItemContentsError, + + /** + * Populate a gloda representation of an item given the thus-far built + * representation, the previous representation, and one or more raw + * representations. The attribute providers/optimizers for the given noun + * type are invoked, allowing them to contribute/alter things. Following + * that, we build and persist our attribute representations. + * + * The result of the processing ends up with attributes in 3 different forms: + * - Database attribute rows (to be added and removed). + * - In-memory representation. + * - JSON-able representation. + * + * @param aItem The noun instance you want processed. + * @param aRawReps A dictionary that we pass to the attribute providers. + * There is a(n implied) contract between the caller of grokNounItem for a + * given noun type and the attribute providers for that noun type, and we + * have nothing to do with it OTHER THAN inserting a 'trueGlodaRep' + * value into it. In the event of reindexing an existing object, the + * gloda representation we pass to the indexers is actually a clone that + * allows the asynchronous indexers to mutate the object without + * causing visible changes in the existing representation of the gloda + * object. We patch the changes back onto the original item atomically + * once indexing completes. The 'trueGlodaRep' is then useful for + * objects that hang off of the gloda instance that need a reference + * back to their containing object for API convenience purposes. + * @param aIsConceptuallyNew Is the item "new" in the sense that it would + * never have been visible from within user code? This translates into + * whether this should trigger an itemAdded notification or an + * itemModified notification. + * @param aIsRecordNew Is the item "new" in the sense that we should INSERT + * a record rather than UPDATE-ing a record. For example, when dealing + * with messages where we may have a ghost, the ghost message is not a + * new record, but is conceptually new. + * @param aCallbackHandle The GlodaIndexer-style callback handle that is being + * used to drive this processing in an async fashion. (See + * GlodaIndexer._callbackHandle). + * @param aDoCache Should we allow this item to be contributed to its noun + * cache? + */ + grokNounItem: function* gloda_ns_grokNounItem(aItem, aRawReps, + aIsConceptuallyNew, aIsRecordNew, aCallbackHandle, aDoCache) { + let itemNounDef = aItem.NOUN_DEF; + let attribsByBoundName = itemNounDef.attribsByBoundName; + + this._log.info(" ** grokNounItem: " + itemNounDef.name); + + let addDBAttribs = []; + let removeDBAttribs = []; + + let jsonDict = {}; + + let aOldItem; + aRawReps.trueGlodaRep = aItem; + if (aIsConceptuallyNew) // there is no old item if we are new. + aOldItem = {}; + else { + aOldItem = aItem; + // we want to create a clone of the existing item so that we can know the + // deltas that happened for indexing purposes + aItem = aItem._clone(); + } + + // Have the attribute providers directly set properties on the aItem + let attrProviders = this._attrProviderOrderByNoun[itemNounDef.id]; + for (let iProvider = 0; iProvider < attrProviders.length; iProvider++) { + this._log.info(" * provider: " + attrProviders[iProvider].providerName); + yield aCallbackHandle.pushAndGo( + attrProviders[iProvider].process(aItem, aRawReps, aIsConceptuallyNew, + aCallbackHandle)); + } + + let attrOptimizers = this._attrOptimizerOrderByNoun[itemNounDef.id]; + for (let iProvider = 0; iProvider < attrOptimizers.length; iProvider++) { + this._log.info(" * optimizer: " + attrOptimizers[iProvider].providerName); + yield aCallbackHandle.pushAndGo( + attrOptimizers[iProvider].optimize(aItem, aRawReps, aIsConceptuallyNew, + aCallbackHandle)); + } + this._log.info(" ** done with providers."); + + // Iterate over the attributes on the item + for (let key of Object.keys(aItem)) { + let value = aItem[key]; + // ignore keys that start with underscores, they are private and not + // persisted by our attribute mechanism. (they are directly handled by + // the object implementation.) + if (key.startsWith("_")) + continue; + // find the attribute definition that corresponds to this key + let attrib = attribsByBoundName[key]; + // if there's no attribute, that's not good, but not horrible. + if (attrib === undefined) { + this._log.warn("new proc ignoring attrib: " + key); + continue; + } + + let attribDB = attrib.dbDef; + let objectNounDef = attrib.objectNounDef; + + // - translate for our JSON rep + if (attrib.singular) { + if (objectNounDef.toJSON) + jsonDict[attrib.id] = objectNounDef.toJSON(value); + else + jsonDict[attrib.id] = value; + } + else { + if (objectNounDef.toJSON) { + let toJSON = objectNounDef.toJSON; + jsonDict[attrib.id] = []; + for (let [, subValue] in Iterator(value)) { + jsonDict[attrib.id].push(toJSON(subValue)); + } + } + else + jsonDict[attrib.id] = value; + } + + let oldValue = aOldItem[key]; + + // the 'old' item is still the canonical one; update it + // do the update now, because we may skip operations on addDBAttribs and + // removeDBattribs, if the attribute is not to generate entries in + // messageAttributes + if (oldValue !== undefined || !aIsConceptuallyNew) + aOldItem[key] = value; + + // the new canQuery property has to be set to true to generate entries + // in the messageAttributes table. Any other truthy value (like a non + // empty string), will still make the message query-able but without + // using the database. + if (attrib.canQuery !== true) { + continue; + } + + // - database index attributes + + // perform a delta analysis against the old value, if we have one + if (oldValue !== undefined) { + // in the singular case if they don't match, it's one add and one remove + if (attrib.singular) { + // test for identicality, failing that, see if they have explicit + // equals support. + if ((value !== oldValue) && + (!value.equals || !value.equals(oldValue))) { + addDBAttribs.push(attribDB.convertValuesToDBAttributes([value])[0]); + removeDBAttribs.push( + attribDB.convertValuesToDBAttributes([oldValue])[0]); + } + } + // in the plural case, we have to figure the deltas accounting for + // possible changes in ordering (which is insignificant from an + // indexing perspective) + // some nouns may not meet === equivalence needs, so must provide a + // custom computeDelta method to help us out + else if (objectNounDef.computeDelta) { + let [valuesAdded, valuesRemoved] = + objectNounDef.computeDelta(value, oldValue); + // convert the values to database-style attribute rows + addDBAttribs.push.apply(addDBAttribs, + attribDB.convertValuesToDBAttributes(valuesAdded)); + removeDBAttribs.push.apply(removeDBAttribs, + attribDB.convertValuesToDBAttributes(valuesRemoved)); + } + else { + // build a map of the previous values; we will delete the values as + // we see them so that we will know what old values are no longer + // present in the current set of values. + let oldValueMap = {}; + for (let anOldValue of oldValue) { + // remember, the key is just the toString'ed value, so we need to + // store and use the actual value as the value! + oldValueMap[anOldValue] = anOldValue; + } + // traverse the current values... + let valuesAdded = []; + for (let curValue of value) { + if (curValue in oldValueMap) + delete oldValueMap[curValue]; + else + valuesAdded.push(curValue); + } + // anything still on oldValueMap was removed. + let valuesRemoved = Object.keys(oldValueMap). + map(key => oldValueMap[key]); + // convert the values to database-style attribute rows + addDBAttribs.push.apply(addDBAttribs, + attribDB.convertValuesToDBAttributes(valuesAdded)); + removeDBAttribs.push.apply(removeDBAttribs, + attribDB.convertValuesToDBAttributes(valuesRemoved)); + } + + // Add/remove the empty set indicator as appropriate. + if (attrib.emptySetIsSignificant) { + // if we are now non-zero but previously were zero, remove. + if (value.length && !oldValue.length) + removeDBAttribs.push([GlodaDatastore.kEmptySetAttrId, attribDB.id]); + // if we are now zero length but previously were not, add + else if (!value.length && oldValue.length) + addDBAttribs.push([GlodaDatastore.kEmptySetAttrId, attribDB.id]); + } + } + // no old value, all values are new + else { + // add the db reps on the new values + if (attrib.singular) + value = [value]; + addDBAttribs.push.apply(addDBAttribs, + attribDB.convertValuesToDBAttributes(value)); + // Add the empty set indicator for the attribute id if appropriate. + if (!value.length && attrib.emptySetIsSignificant) + addDBAttribs.push([GlodaDatastore.kEmptySetAttrId, attribDB.id]); + } + } + + // Iterate over any remaining values in old items for purge purposes. + for (let key of Object.keys(aOldItem)) { + let value = aOldItem[key]; + // ignore keys that start with underscores, they are private and not + // persisted by our attribute mechanism. (they are directly handled by + // the object implementation.) + if (key.startsWith("_")) + continue; + // ignore things we saw in the new guy + if (key in aItem) + continue; + + // find the attribute definition that corresponds to this key + let attrib = attribsByBoundName[key]; + // if there's no attribute, that's not good, but not horrible. + if (attrib === undefined) { + continue; + } + + // delete these from the old item, as the old item is canonical, and + // should no longer have these values + delete aOldItem[key]; + + if (attrib.canQuery !== true) { + this._log.debug("Not inserting attribute "+attrib.attributeName + +" into the db, since we don't plan on querying on it"); + continue; + } + + if (attrib.singular) + value = [value]; + let attribDB = attrib.dbDef; + removeDBAttribs.push.apply(removeDBAttribs, + attribDB.convertValuesToDBAttributes(value)); + // remove the empty set marker if there should have been one + if (!value.length && attrib.emptySetIsSignificant) + removeDBAttribs.push([GlodaDatastore.kEmptySetAttrId, attribDB.id]); + } + + aItem._jsonText = JSON.stringify(jsonDict); + this._log.debug(" json text: " + aItem._jsonText); + + if (aIsRecordNew) { + this._log.debug(" inserting item"); + itemNounDef.objInsert.call(itemNounDef.datastore, aItem); + } + else { + this._log.debug(" updating item"); + itemNounDef.objUpdate.call(itemNounDef.datastore, aItem); + } + + this._log.debug(" adjusting attributes, add: " + addDBAttribs + " rem: " + + removeDBAttribs); + itemNounDef.dbAttribAdjuster.call(itemNounDef.datastore, aItem, + addDBAttribs, removeDBAttribs); + + if (!aIsConceptuallyNew && ("_declone" in aOldItem)) + aOldItem._declone(aItem); + + // Cache ramifications... + if (aDoCache === undefined || aDoCache) { + if (aIsConceptuallyNew) + GlodaCollectionManager.itemsAdded(aItem.NOUN_ID, [aItem]); + else + GlodaCollectionManager.itemsModified(aOldItem.NOUN_ID, [aOldItem]); + } + + this._log.debug(" done grokking."); + + yield this.kWorkDone; + }, + + /** + * Processes a list of noun instances for their score within a given context. + * This is primarily intended for use by search ranking mechanisms, but could + * be used elsewhere too. (It does, however, depend on the complicity of the + * score method implementations to not get confused.) + * + * @param aItems The non-empty list of items to score. + * @param aContext A noun-specific dictionary that we just pass to the funcs. + * @param aExtraScoreFuncs A list of extra scoring functions to apply. + * @returns A list of integer scores equal in length to aItems. + */ + scoreNounItems: function gloda_ns_grokNounItem(aItems, aContext, + aExtraScoreFuncs) { + let scores = []; + // bail if there is nothing to score + if (!aItems.length) + return scores; + + let itemNounDef = aItems[0].NOUN_DEF; + if (aExtraScoreFuncs == null) + aExtraScoreFuncs = []; + + for (let item of aItems) { + let score = 0; + let attrProviders = this._attrProviderOrderByNoun[itemNounDef.id]; + for (let iProvider = 0; iProvider < attrProviders.length; iProvider++) { + let provider = attrProviders[iProvider]; + if (provider.score) + score += provider.score(item); + } + for (let [, extraScoreFunc] in Iterator(aExtraScoreFuncs)) + score += extraScoreFunc(item, aContext); + scores.push(score); + } + + return scores; + } +}; + +/* and initialize the Gloda object/NS before we return... */ +try { + Gloda._init(); +} +catch (ex) { + Gloda._log.debug("Exception during Gloda init (" + ex.fileName + ":" + + ex.lineNumber + "): " + ex); +}; +/* but don't forget that we effectively depend on everybody.js too, and + currently on our importer to be importing that if they need us fully armed + and operational. */ diff --git a/mailnews/db/gloda/modules/index_ab.js b/mailnews/db/gloda/modules/index_ab.js new file mode 100644 index 000000000..299733275 --- /dev/null +++ b/mailnews/db/gloda/modules/index_ab.js @@ -0,0 +1,287 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['GlodaABIndexer', 'GlodaABAttrs']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/collection.js"); +Cu.import("resource:///modules/gloda/datastore.js"); +Cu.import("resource:///modules/gloda/gloda.js"); +Cu.import("resource:///modules/gloda/indexer.js"); +Cu.import("resource:///modules/gloda/log4moz.js"); +Cu.import("resource:///modules/gloda/noun_freetag.js"); +Cu.import("resource:///modules/gloda/utils.js"); +Cu.import("resource:///modules/mailServices.js"); + + +var GlodaABIndexer = { + _log: null, + + name: "index_ab", + enable: function() { + if (this._log == null) + this._log = Log4Moz.repository.getLogger("gloda.index_ab"); + + MailServices.ab.addAddressBookListener(this, + Ci.nsIAbListener.itemAdded | + Ci.nsIAbListener.itemChanged | + Ci.nsIAbListener.directoryItemRemoved); + }, + + disable: function() { + MailServices.ab.removeAddressBookListener(this); + }, + + // it's a getter so we can reference 'this' + get workers() { + return [ + ["ab-card", { + worker: this._worker_index_card, + }], + ]; + }, + + _worker_index_card: function*(aJob, aCallbackHandle) { + let card = aJob.id; + + if (card.primaryEmail) { + // load the identity + let query = Gloda.newQuery(Gloda.NOUN_IDENTITY); + query.kind("email"); + // we currently normalize all e-mail addresses to be lowercase + query.value(card.primaryEmail.toLowerCase()); + let identityCollection = query.getCollection(aCallbackHandle); + yield Gloda.kWorkAsync; + + if (identityCollection.items.length) { + let identity = identityCollection.items[0]; + // force the identity to know it has an associated ab card. + identity._hasAddressBookCard = true; + + this._log.debug("Found identity, processing card."); + yield aCallbackHandle.pushAndGo( + Gloda.grokNounItem(identity.contact, {card: card}, false, false, + aCallbackHandle)); + this._log.debug("Done processing card."); + } + } + + yield GlodaIndexer.kWorkDone; + }, + + initialSweep: function() { + }, + + /* ------ nsIAbListener ------ */ + /** + * When an address book card is added, update the cached GlodaIdentity + * object's cached idea of whether the identity has an ab card. + */ + onItemAdded: function ab_indexer_onItemAdded(aParentDir, aItem) { + if (!(aItem instanceof Ci.nsIAbCard)) + return; + + this._log.debug("Received Card Add Notification"); + let identity = GlodaCollectionManager.cacheLookupOneByUniqueValue( + Gloda.NOUN_IDENTITY, "email@" + aItem.primaryEmail.toLowerCase()); + if (identity) + identity._hasAddressBookCard = true; + }, + /** + * When an address book card is added, update the cached GlodaIdentity + * object's cached idea of whether the identity has an ab card. + */ + onItemRemoved: function ab_indexer_onItemRemoved(aParentDir, aItem) { + if (!(aItem instanceof Ci.nsIAbCard)) + return; + + this._log.debug("Received Card Removal Notification"); + let identity = GlodaCollectionManager.cacheLookupOneByUniqueValue( + Gloda.NOUN_IDENTITY, "email@" + aItem.primaryEmail.toLowerCase()); + if (identity) + identity._hasAddressBookCard = false; + + }, + onItemPropertyChanged: function ab_indexer_onItemPropertyChanged(aItem, + aProperty, aOldValue, aNewValue) { + if (aProperty == null && aItem instanceof Ci.nsIAbCard) { + this._log.debug("Received Card Change Notification"); + + let card = aItem; // instanceof already QueryInterface'd for us. + let job = new IndexingJob("ab-card", card); + GlodaIndexer.indexJob(job); + } + } +}; +GlodaIndexer.registerIndexer(GlodaABIndexer); + +var GlodaABAttrs = { + providerName: "gloda.ab_attr", + _log: null, + + init: function() { + this._log = Log4Moz.repository.getLogger("gloda.abattrs"); + + try { + this.defineAttributes(); + } + catch (ex) { + this._log.error("Error in init: " + ex); + throw ex; + } + }, + + defineAttributes: function() { + /* ***** Contacts ***** */ + this._attrIdentityContact = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "identities", + singular: false, + special: Gloda.kSpecialColumnChildren, + //specialColumnName: "contactID", + storageAttributeName: "_identities", + subjectNouns: [Gloda.NOUN_CONTACT], + objectNoun: Gloda.NOUN_IDENTITY, + }); // tested-by: test_attributes_fundamental + this._attrContactName = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "name", + singular: true, + special: Gloda.kSpecialString, + specialColumnName: "name", + subjectNouns: [Gloda.NOUN_CONTACT], + objectNoun: Gloda.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + this._attrContactPopularity = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "popularity", + singular: true, + special: Gloda.kSpecialColumn, + specialColumnName: "popularity", + subjectNouns: [Gloda.NOUN_CONTACT], + objectNoun: Gloda.NOUN_NUMBER, + canQuery: true, + }); // not-tested + this._attrContactFrecency = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "frecency", + singular: true, + special: Gloda.kSpecialColumn, + specialColumnName: "frecency", + subjectNouns: [Gloda.NOUN_CONTACT], + objectNoun: Gloda.NOUN_NUMBER, + canQuery: true, + }); // not-tested + + /* ***** Identities ***** */ + this._attrIdentityContact = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "contact", + singular: true, + special: Gloda.kSpecialColumnParent, + specialColumnName: "contactID", // the column in the db + idStorageAttributeName: "_contactID", + valueStorageAttributeName: "_contact", + subjectNouns: [Gloda.NOUN_IDENTITY], + objectNoun: Gloda.NOUN_CONTACT, + canQuery: true, + }); // tested-by: test_attributes_fundamental + this._attrIdentityKind = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "kind", + singular: true, + special: Gloda.kSpecialString, + specialColumnName: "kind", + subjectNouns: [Gloda.NOUN_IDENTITY], + objectNoun: Gloda.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + this._attrIdentityValue = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "value", + singular: true, + special: Gloda.kSpecialString, + specialColumnName: "value", + subjectNouns: [Gloda.NOUN_IDENTITY], + objectNoun: Gloda.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + + /* ***** Contact Meta ***** */ + // Freeform tags; not explicit like thunderbird's fundamental tags. + // we differentiate for now because of fundamental implementation + // differences. + this._attrFreeTag = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrExplicit, + attributeName: "freetag", + bind: true, + bindName: "freeTags", + singular: false, + subjectNouns: [Gloda.NOUN_CONTACT], + objectNoun: Gloda.lookupNoun("freetag"), + parameterNoun: null, + canQuery: true, + }); // not-tested + // we need to find any existing bound freetag attributes, and use them to + // populate to FreeTagNoun's understanding + if ("parameterBindings" in this._attrFreeTag) { + for (let freeTagName in this._attrFreeTag.parameterBindings) { + this._log.debug("Telling FreeTagNoun about: " + freeTagName); + FreeTagNoun.getFreeTag(freeTagName); + } + } + }, + + process: function*(aContact, aRawReps, aIsNew, aCallbackHandle) { + let card = aRawReps.card; + if (aContact.NOUN_ID != Gloda.NOUN_CONTACT) { + this._log.warn("Somehow got a non-contact: " + aContact); + return; // this will produce an exception; we like. + } + + // update the name + if (card.displayName && card.displayName != aContact.name) + aContact.name = card.displayName; + + aContact.freeTags = []; + + let tags = null; + try { + tags = card.getProperty("Categories", null); + } catch (ex) { + this._log.error("Problem accessing property: " + ex); + } + if (tags) { + for (let tagName of tags.split(",")) { + tagName = tagName.trim(); + if (tagName) { + aContact.freeTags.push(FreeTagNoun.getFreeTag(tagName)); + } + } + } + + yield Gloda.kWorkDone; + } +}; diff --git a/mailnews/db/gloda/modules/index_msg.js b/mailnews/db/gloda/modules/index_msg.js new file mode 100644 index 000000000..d4d7a8ceb --- /dev/null +++ b/mailnews/db/gloda/modules/index_msg.js @@ -0,0 +1,3334 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +/* + * This file currently contains a fairly general implementation of asynchronous + * indexing with a very explicit message indexing implementation. As gloda + * will eventually want to index more than just messages, the message-specific + * things should ideally lose their special hold on this file. This will + * benefit readability/size as well. + */ + +this.EXPORTED_SYMBOLS = ['GlodaMsgIndexer']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource://gre/modules/XPCOMUtils.jsm"); +Cu.import("resource:///modules/iteratorUtils.jsm"); +Cu.import("resource:///modules/mailServices.js"); +Cu.import("resource:///modules/MailUtils.js"); + +Cu.import("resource:///modules/gloda/log4moz.js"); + +Cu.import("resource:///modules/gloda/utils.js"); +Cu.import("resource:///modules/gloda/datastore.js"); +Cu.import("resource:///modules/gloda/datamodel.js"); +Cu.import("resource:///modules/gloda/gloda.js"); +Cu.import("resource:///modules/gloda/collection.js"); +Cu.import("resource:///modules/gloda/connotent.js"); + +Cu.import("resource:///modules/gloda/indexer.js"); + +Cu.import("resource:///modules/gloda/mimemsg.js"); + +XPCOMUtils.defineLazyServiceGetter(this, "atomService", + "@mozilla.org/atom-service;1", + "nsIAtomService"); + +// Components.results does not have mailnews error codes! +var NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE = 0x80550005; + +var GLODA_MESSAGE_ID_PROPERTY = "gloda-id"; +/** + * Message header property to track dirty status; one of + * |GlodaIndexer.kMessageClean|, |GlodaIndexer.kMessageDirty|, + * |GlodaIndexer.kMessageFilthy|. + */ +var GLODA_DIRTY_PROPERTY = "gloda-dirty"; + +/** + * The sentinel GLODA_MESSAGE_ID_PROPERTY value indicating that a message fails + * to index and we should not bother trying again, at least not until a new + * release is made. + * + * This should ideally just flip between 1 and 2, with GLODA_OLD_BAD_MESSAGE_ID + * flipping in the other direction. If we start having more trailing badness, + * _indexerGetEnumerator and GLODA_OLD_BAD_MESSAGE_ID will need to be altered. + * + * When flipping this, be sure to update glodaTestHelper.js's copy. + */ +var GLODA_BAD_MESSAGE_ID = 2; +/** + * The gloda id we used to use to mark messages as bad, but now should be + * treated as eligible for indexing. This is only ever used for consideration + * when creating msg header enumerators with `_indexerGetEnumerator` which + * means we only will re-index such messages in an indexing sweep. Accordingly + * event-driven indexing will still treat such messages as unindexed (and + * unindexable) until an indexing sweep picks them up. + */ +var GLODA_OLD_BAD_MESSAGE_ID = 1; +var GLODA_FIRST_VALID_MESSAGE_ID = 32; + +var JUNK_SCORE_PROPERTY = "junkscore"; +var JUNK_SPAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_SPAM_SCORE.toString(); +var JUNK_HAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_HAM_SCORE.toString(); + +var nsIArray = Ci.nsIArray; +var nsIMsgFolder = Ci.nsIMsgFolder; +var nsIMsgLocalMailFolder = Ci.nsIMsgLocalMailFolder; +var nsIMsgImapMailFolder = Ci.nsIMsgImapMailFolder; +var nsIMsgDBHdr = Ci.nsIMsgDBHdr; +var nsMsgFolderFlags = Ci.nsMsgFolderFlags; +var nsMsgMessageFlags = Ci.nsMsgMessageFlags; +var nsMsgProcessingFlags = Ci.nsMsgProcessingFlags; + +/** + * The processing flags that tell us that a message header has not yet been + * reported to us via msgsClassified. If it has one of these flags, it is + * still being processed. + */ +var NOT_YET_REPORTED_PROCESSING_FLAGS = + nsMsgProcessingFlags.NotReportedClassified | + nsMsgProcessingFlags.ClassifyJunk; + +// for list comprehension fun +function* range(begin, end) { + for (let i = begin; i < end; ++i) { + yield i; + } +} + +/** + * We do not set properties on the messages until we perform a DB commit; this + * helper class tracks messages that we have indexed but are not yet marked + * as such on their header. + */ +var PendingCommitTracker = { + /** + * Maps message URIs to their gloda ids. + * + * I am not entirely sure why I chose the URI for the key rather than + * gloda folder ID + message key. Most likely it was to simplify debugging + * since the gloda folder ID is opaque while the URI is very informative. It + * is also possible I was afraid of IMAP folder renaming triggering a UID + * renumbering? + */ + _indexedMessagesPendingCommitByKey: {}, + /** + * Map from the pending commit gloda id to a tuple of [the corresponding + * message header, dirtyState]. + */ + _indexedMessagesPendingCommitByGlodaId: {}, + /** + * Do we have a post-commit handler registered with this transaction yet? + */ + _pendingCommit: false, + + /** + * The function gets called when the commit actually happens to flush our + * message id's. + * + * It is very possible that by the time this call happens we have left the + * folder and nulled out msgDatabase on the folder. Since nulling it out + * is what causes the commit, if we set the headers here without somehow + * forcing a commit, we will lose. Badly. + * Accordingly, we make a list of all the folders that the headers belong to + * as we iterate, make sure to re-attach their msgDatabase before forgetting + * the headers, then make sure to zero the msgDatabase again, triggering a + * commit. If there were a way to directly get the nsIMsgDatabase from the + * header we could do that and call commit directly. We don't track + * databases along with the headers since the headers can change because of + * moves and that would increase the number of moving parts. + */ + _commitCallback: function PendingCommitTracker_commitCallback() { + let foldersByURI = {}; + let lastFolder = null; + + for (let glodaId in + PendingCommitTracker._indexedMessagesPendingCommitByGlodaId) { + let [msgHdr, dirtyState] = + PendingCommitTracker._indexedMessagesPendingCommitByGlodaId[glodaId]; + // Mark this message as indexed. + // It's conceivable the database could have gotten blown away, in which + // case the message headers are going to throw exceptions when we try + // and touch them. So we wrap this in a try block that complains about + // this unforeseen circumstance. (noteFolderDatabaseGettingBlownAway + // should have been called and avoided this situation in all known + // situations.) + try { + let curGlodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + if (curGlodaId != glodaId) + msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, glodaId); + let headerDirty = msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); + if (headerDirty != dirtyState) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, dirtyState); + + // Make sure this folder is in our foldersByURI map. + if (lastFolder == msgHdr.folder) + continue; + lastFolder = msgHdr.folder; + let folderURI = lastFolder.URI; + if (!(folderURI in foldersByURI)) + foldersByURI[folderURI] = lastFolder; + } + catch (ex) { + GlodaMsgIndexer._log.error( + "Exception while attempting to mark message with gloda state after" + + "db commit", ex); + } + } + + // it is vitally important to do this before we forget about the headers! + for (let uri in foldersByURI) { + let folder = foldersByURI[uri]; + // This will not cause a parse. The database is in-memory since we have + // a header that belongs to it. This just causes the folder to + // re-acquire a reference from the database manager. + let ignoredDb = folder.msgDatabase; + // And this will cause a commit. (And must be done since we don't want + // to cause a leak.) + folder.msgDatabase = null; + } + + PendingCommitTracker._indexedMessagesPendingCommitByGlodaId = {}; + PendingCommitTracker._indexedMessagesPendingCommitByKey = {}; + + PendingCommitTracker._pendingCommit = false; + }, + + /** + * Track a message header that should be marked with the given gloda id when + * the database commits. + */ + track: function PendingCommitTracker_track(aMsgHdr, aGlodaId) { + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + this._indexedMessagesPendingCommitByKey[pendingKey] = aGlodaId; + this._indexedMessagesPendingCommitByGlodaId[aGlodaId] = + [aMsgHdr, GlodaMsgIndexer.kMessageClean]; + + if (!this._pendingCommit) { + GlodaDatastore.runPostCommit(this._commitCallback); + this._pendingCommit = true; + } + }, + + /** + * Get the current state of a message header given that we cannot rely on just + * looking at the header's properties because we defer setting those + * until the SQLite commit happens. + * + * @return Tuple of [gloda id, dirty status]. + */ + getGlodaState: + function PendingCommitTracker_getGlodaState(aMsgHdr) { + // If it's in the pending commit table, then the message is basically + // clean. Return that info. + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + if (pendingKey in this._indexedMessagesPendingCommitByKey) { + let glodaId = + PendingCommitTracker._indexedMessagesPendingCommitByKey[pendingKey]; + return [glodaId, this._indexedMessagesPendingCommitByGlodaId[glodaId][1]]; + } + else { + // Otherwise the header's concept of state is correct. + let glodaId = aMsgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + let glodaDirty = aMsgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); + return [glodaId, glodaDirty]; + } + }, + + /** + * Update our structure to reflect moved headers. Moves are currently + * treated as weakly interesting and do not require a reindexing + * although collections will get notified. So our job is to to fix-up + * the pending commit information if the message has a pending commit. + */ + noteMove: function PendingCommitTracker_noteMove(aOldHdr, aNewHdr) { + let oldKey = aOldHdr.folder.URI + "#" + aOldHdr.messageKey; + if (!(oldKey in this._indexedMessagesPendingCommitByKey)) + return; + + let glodaId = this._indexedMessagesPendingCommitByKey[oldKey]; + delete this._indexedMessagesPendingCommitByKey[oldKey]; + + let newKey = aNewHdr.folder.URI + "#" + aNewHdr.messageKey; + this._indexedMessagesPendingCommitByKey[newKey] = glodaId; + + // only clobber the header, not the dirty state + this._indexedMessagesPendingCommitByGlodaId[glodaId][0] = aNewHdr; + }, + + /** + * A blind move is one where we have the source header but not the destination + * header. This happens for IMAP messages that do not involve offline fake + * headers. + * XXX Since IMAP moves will propagate the gloda-id/gloda-dirty bits for us, + * we could detect the other side of the move when it shows up as a + * msgsClassified event and restore the mapping information. Since the + * offline fake header case should now cover the bulk of IMAP move + * operations, we probably do not need to pursue this. + * + * We just re-dispatch to noteDirtyHeader because we can't do anything more + * clever. + */ + noteBlindMove: function PendingCommitTracker_noteBlindMove(aOldHdr) { + this.noteDirtyHeader(aOldHdr); + }, + + /** + * If a message is dirty we should stop tracking it for post-commit + * purposes. This is not because we don't want to write to its header + * when we commit as much as that we want to avoid |getHeaderGlodaState| + * reporting that the message is clean. We could complicate our state + * by storing that information, but this is easier and ends up the same + * in the end. + */ + noteDirtyHeader: function PendingCommitTracker_noteDirtyHeader(aMsgHdr) { + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + if (!(pendingKey in this._indexedMessagesPendingCommitByKey)) + return; + + // (It is important that we get the gloda id from our own structure!) + let glodaId = this._indexedMessagesPendingCommitByKey[pendingKey]; + this._indexedMessagesPendingCommitByGlodaId[glodaId][1] = + GlodaMsgIndexer.kMessageDirty; + }, + + /** + * Sometimes a folder database gets blown away. This happens for one of two + * expected reasons right now: + * - Folder compaction. + * - Explicit reindexing of a folder via the folder properties "rebuild index" + * button. + * + * When this happens, we are basically out of luck and need to discard + * everything about the folder. The good news is that the folder compaction + * pass is clever enough to re-establish the linkages that are being lost + * when we drop these things on the floor. Reindexing of a folder is not + * clever enough to deal with this but is an exceptional case of last resort + * (the user should not normally be performing a reindex as part of daily + * operation), so we accept that messages may be redundantly indexed. + */ + noteFolderDatabaseGettingBlownAway: + function PendingCommitTracker_noteFolderDatabaseGettingBlownAway( + aMsgFolder) { + let uri = aMsgFolder.URI + "#"; + for (let key in Iterator(this._indexedMessagesPendingCommitByKey, true)) { + // this is not as efficient as it could be, but compaction is relatively + // rare and the number of pending headers is generally going to be + // small. + if (key.indexOf(uri) == 0) { + delete this._indexedMessagesPendingCommitByKey[key]; + } + } + }, +}; + +/** + * This callback handles processing the asynchronous query results of + * |GlodaMsgIndexer.getMessagesByMessageID|. + */ +function MessagesByMessageIdCallback(aMsgIDToIndex, aResults, + aCallback, aCallbackThis) { + this.msgIDToIndex = aMsgIDToIndex; + this.results = aResults; + this.callback = aCallback; + this.callbackThis = aCallbackThis; +} + +MessagesByMessageIdCallback.prototype = { + _log: Log4Moz.repository.getLogger("gloda.index_msg.mbm"), + + onItemsAdded: function gloda_ds_mbmi_onItemsAdded(aItems, aCollection) { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) + return; + + this._log.debug("getting results..."); + for (let message of aItems) { + this.results[this.msgIDToIndex[message.headerMessageID]].push(message); + } + }, + onItemsModified: function () {}, + onItemsRemoved: function () {}, + onQueryCompleted: function gloda_ds_mbmi_onQueryCompleted(aCollection) { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) + return; + + if (this._log.level <= Log4Moz.Level.Debug) + this._log.debug("query completed, notifying... " + this.results); + + this.callback.call(this.callbackThis, this.results); + } +}; + + +/** + * The message indexer! + * + * === Message Indexing Strategy + * To these ends, we implement things like so: + * + * Mesage State Tracking + * - We store a property on all indexed headers indicating their gloda message + * id. This allows us to tell whether a message is indexed from the header, + * without having to consult the SQL database. + * - When we receive an event that indicates that a message's meta-data has + * changed and gloda needs to re-index the message, we set a property on the + * header that indicates the message is dirty. This property can indicate + * that the message needs to be re-indexed but the gloda-id is valid (dirty) + * or that the message's gloda-id is invalid (filthy) because the gloda + * database has been blown away. + * - We track whether a folder is up-to-date on our GlodaFolder representation + * using a concept of dirtiness, just like messages. Like messages, a folder + * can be dirty or filthy. A dirty folder has at least one dirty message in + * it which means we should scan the folder. A filthy folder means that + * every message in the folder should be considered filthy. Folders start + * out filthy when Gloda is first told about them indicating we cannot + * trust any of the gloda-id's in the folders. Filthy folders are downgraded + * to dirty folders after we mark all of the headers with gloda-id's filthy. + * + * Indexing Message Control + * - We index the headers of all IMAP messages. We index the bodies of all IMAP + * messages that are offline. We index all local messages. We plan to avoid + * indexing news messages. + * - We would like a way to express desires about indexing that either don't + * confound offline storage with indexing, or actually allow some choice. + * + * Indexing Messages + * - We have two major modes of indexing: sweep and event-driven. When we + * start up we kick off an indexing sweep. We use event-driven indexing + * as we receive events for eligible messages, but if we get too many + * events we start dropping them on the floor and just flag that an indexing + * sweep is required. + * - The sweep initiates folder indexing jobs based on the priorities assigned + * to folders. Folder indexing uses a filtered message enumerator to find + * messages that need to be indexed, minimizing wasteful exposure of message + * headers to XPConnect that we would not end up indexing. + * - For local folders, we use GetDatabaseWithReparse to ensure that the .msf + * file exists. For IMAP folders, we simply use GetDatabase because we know + * the auto-sync logic will make sure that the folder is up-to-date and we + * want to avoid creating problems through use of updateFolder. + * + * Junk Mail + * - We do not index junk. We do not index messages until the junk/non-junk + * determination has been made. If a message gets marked as junk, we act like + * it was deleted. + * - We know when a message is actively queued for junk processing thanks to + * folder processing flags. nsMsgDBFolder::CallFilterPlugins does this + * prior to initiating spam processing. Unfortunately, this method does not + * get called until after we receive the notification about the existence of + * the header. How long after can vary on different factors. The longest + * delay is in the IMAP case where there is a filter that requires the + * message body to be present; the method does not get called until all the + * bodies are downloaded. + * + */ +var GlodaMsgIndexer = { + /** + * A partial attempt to generalize to support multiple databases. Each + * database would have its own datastore would have its own indexer. But + * we rather inter-mingle our use of this field with the singleton global + * GlodaDatastore. + */ + _datastore: GlodaDatastore, + _log: Log4Moz.repository.getLogger("gloda.index_msg"), + + _junkService: MailServices.junk, + + name: "index_msg", + /** + * Are we enabled, read: are we processing change events? + */ + _enabled: false, + get enabled() { return this._enabled; }, + + enable: function msg_indexer_enable() { + // initialize our listeners' this pointers + this._databaseAnnouncerListener.indexer = this; + this._msgFolderListener.indexer = this; + + // register for: + // - folder loaded events, so we know when getDatabaseWithReparse has + // finished updating the index/what not (if it was't immediately + // available) + // - property changes (so we know when a message's read/starred state have + // changed.) + this._folderListener._init(this); + MailServices.mailSession.AddFolderListener(this._folderListener, + Ci.nsIFolderListener.intPropertyChanged | + Ci.nsIFolderListener.propertyFlagChanged | + Ci.nsIFolderListener.event); + + MailServices.mfn.addListener(this._msgFolderListener, + // note: intentionally no msgAdded notification is requested. + Ci.nsIMsgFolderNotificationService.msgsClassified | + Ci.nsIMsgFolderNotificationService.msgsDeleted | + Ci.nsIMsgFolderNotificationService.msgsMoveCopyCompleted | + Ci.nsIMsgFolderNotificationService.msgKeyChanged | + Ci.nsIMsgFolderNotificationService.folderAdded | + Ci.nsIMsgFolderNotificationService.folderDeleted | + Ci.nsIMsgFolderNotificationService.folderMoveCopyCompleted | + Ci.nsIMsgFolderNotificationService.folderRenamed | + Ci.nsIMsgFolderNotificationService.itemEvent); + + this._enabled = true; + + this._considerSchemaMigration(); + + this._log.info("Event-Driven Indexing is now " + this._enabled); + }, + disable: function msg_indexer_disable() { + // remove FolderLoaded notification listener + MailServices.mailSession.RemoveFolderListener(this._folderListener); + + MailServices.mfn.removeListener(this._msgFolderListener); + + this._indexerLeaveFolder(); // nop if we aren't "in" a folder + + this._enabled = false; + + this._log.info("Event-Driven Indexing is now " + this._enabled); + }, + + /** + * Indicates that we have pending deletions to process, meaning that there + * are gloda message rows flagged for deletion. If this value is a boolean, + * it means the value is known reliably. If this value is null, it means + * that we don't know, likely because we have started up and have not checked + * the database. + */ + pendingDeletions: null, + + /** + * The message (or folder state) is believed up-to-date. + */ + kMessageClean: 0, + /** + * The message (or folder) is known to not be up-to-date. In the case of + * folders, this means that some of the messages in the folder may be dirty. + * However, because of the way our indexing works, it is possible there may + * actually be no dirty messages in a folder. (We attempt to process + * messages in an event-driven fashion for a finite number of messages, but + * because we can quit without completing processing of the queue, we need to + * mark the folder dirty, just-in-case.) (We could do some extra leg-work + * and do a better job of marking the folder clean again.) + */ + kMessageDirty: 1, + /** + * We have not indexed the folder at all, but messages in the folder think + * they are indexed. We downgrade the folder to just kMessageDirty after + * marking all the messages in the folder as dirty. We do this so that if we + * have to stop indexing the folder we can still build on our progress next + * time we enter the folder. + * We mark all folders filthy when (re-)creating the database because there + * may be previous state left over from an earlier database. + */ + kMessageFilthy: 2, + + /** + * A message addition job yet to be (completely) processed. Since message + * addition events come to us one-by-one, in order to aggregate them into a + * job, we need something like this. It's up to the indexing loop to + * decide when to null this out; it can either do it when it first starts + * processing it, or when it has processed the last thing. It's really a + * question of whether we want retrograde motion in the folder progress bar + * or the message progress bar. + */ + _pendingAddJob: null, + + /** + * The number of messages that we should queue for processing before letting + * them fall on the floor and relying on our folder-walking logic to ensure + * that the messages are indexed. + * The reason we allow for queueing messages in an event-driven fashion is + * that once we have reached a steady-state, it is preferable to be able to + * deal with new messages and modified meta-data in a prompt fasion rather + * than having to (potentially) walk every folder in the system just to find + * the message that the user changed the tag on. + */ + _indexMaxEventQueueMessages: 20, + + /** + * Unit testing hook to get us to emit additional logging that verges on + * inane for general usage but is helpful in unit test output to get a lay + * of the land and for paranoia reasons. + */ + _unitTestSuperVerbose: false, + + /** The GlodaFolder corresponding to the folder we are indexing. */ + _indexingGlodaFolder: null, + /** The nsIMsgFolder we are currently indexing. */ + _indexingFolder: null, + /** The nsIMsgDatabase we are currently indexing. */ + _indexingDatabase: null, + /** + * The iterator we are using to iterate over the headers in + * this._indexingDatabase. + */ + _indexingIterator: null, + + /** folder whose entry we are pending on */ + _pendingFolderEntry: null, + + // copy-down the work constants from Gloda + kWorkSync: Gloda.kWorkSync, + kWorkAsync: Gloda.kWorkAsync, + kWorkDone: Gloda.kWorkDone, + kWorkPause: Gloda.kWorkPause, + kWorkDoneWithResult: Gloda.kWorkDoneWithResult, + + /** + * Async common logic that we want to deal with the given folder ID. Besides + * cutting down on duplicate code, this ensures that we are listening on + * the folder in case it tries to go away when we are using it. + * + * @return true when the folder was successfully entered, false when we need + * to pend on notification of updating of the folder (due to re-parsing + * or what have you). In the event of an actual problem, an exception + * will escape. + */ + _indexerEnterFolder: function gloda_index_indexerEnterFolder(aFolderID) { + // leave the folder if we haven't explicitly left it. + if (this._indexingFolder !== null) { + this._indexerLeaveFolder(); + } + + this._indexingGlodaFolder = GlodaDatastore._mapFolderID(aFolderID); + this._indexingFolder = this._indexingGlodaFolder.getXPCOMFolder( + this._indexingGlodaFolder.kActivityIndexing); + + if (this._indexingFolder) + this._log.debug("Entering folder: " + this._indexingFolder.URI); + + try { + // The msf may need to be created or otherwise updated for local folders. + // This may require yielding until such time as the msf has been created. + try { + if (this._indexingFolder instanceof nsIMsgLocalMailFolder) { + this._indexingDatabase = + this._indexingFolder.getDatabaseWithReparse(null, + null); + } + // we need do nothing special for IMAP, news, or other + } + // getDatabaseWithReparse can return either NS_ERROR_NOT_INITIALIZED or + // NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE if the net result is that it + // is going to send us a notification when the reparse has completed. + // (note that although internally NS_MSG_ERROR_FOLDER_SUMMARY_MISSING + // might get flung around, it won't make it out to us, and will instead + // be permuted into an NS_ERROR_NOT_INITIALIZED.) + catch (e) { + if ((e.result == Cr.NS_ERROR_NOT_INITIALIZED) || + (e.result == NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE)) { + // this means that we need to pend on the update; the listener for + // FolderLoaded events will call _indexerCompletePendingFolderEntry. + this._log.debug("Pending on folder load..."); + this._pendingFolderEntry = this._indexingFolder; + return this.kWorkAsync; + } else { + throw e; + } + } + // we get an nsIMsgDatabase out of this (unsurprisingly) which + // explicitly inherits from nsIDBChangeAnnouncer, which has the + // AddListener call we want. + if (this._indexingDatabase == null) + this._indexingDatabase = this._indexingFolder.msgDatabase; + this._indexingDatabase.AddListener(this._databaseAnnouncerListener); + } + catch (ex) { + this._log.error("Problem entering folder: " + + (this._indexingFolder ? + this._indexingFolder.prettiestName : "unknown") + + ", skipping. Error was: " + ex.fileName + ":" + + ex.lineNumber + ": " + ex); + this._indexingGlodaFolder.indexing = false; + this._indexingFolder = null; + this._indexingGlodaFolder = null; + this._indexingDatabase = null; + this._indexingEnumerator = null; + + // re-throw, we just wanted to make sure this junk is cleaned up and + // get localized error logging... + throw ex; + } + + return this.kWorkSync; + }, + + /** + * If the folder was still parsing/updating when we tried to enter, then this + * handler will get called by the listener who got the FolderLoaded message. + * All we need to do is get the database reference, register a listener on + * the db, and retrieve an iterator if desired. + */ + _indexerCompletePendingFolderEntry: + function gloda_indexer_indexerCompletePendingFolderEntry() { + this._indexingDatabase = this._indexingFolder.msgDatabase; + this._indexingDatabase.AddListener(this._databaseAnnouncerListener); + this._log.debug("...Folder Loaded!"); + + // the load is no longer pending; we certainly don't want more notifications + this._pendingFolderEntry = null; + // indexerEnterFolder returned kWorkAsync, which means we need to notify + // the callback driver to get things going again. + GlodaIndexer.callbackDriver(); + }, + + /** + * Enumerate all messages in the folder. + */ + kEnumAllMsgs: 0, + /** + * Enumerate messages that look like they need to be indexed. + */ + kEnumMsgsToIndex: 1, + /** + * Enumerate messages that are already indexed. + */ + kEnumIndexedMsgs: 2, + + /** + * Synchronous helper to get an enumerator for the current folder (as found + * in |_indexingFolder|. + * + * @param aEnumKind One of |kEnumAllMsgs|, |kEnumMsgsToIndex|, or + * |kEnumIndexedMsgs|. + * @param [aAllowPreBadIds=false] Only valid for |kEnumIndexedMsgs|, tells us + * that we should treat message with any gloda-id as dirty, not just + * messages that have non-bad message id's. + */ + _indexerGetEnumerator: function gloda_indexer_indexerGetEnumerator( + aEnumKind, aAllowPreBadIds) { + if (aEnumKind == this.kEnumMsgsToIndex) { + // We need to create search terms for messages to index. Messages should + // be indexed if they're indexable (local or offline and not expunged) + // and either: haven't been indexed, are dirty, or are marked with with + // a former GLODA_BAD_MESSAGE_ID that is no longer our bad marker. (Our + // bad marker can change on minor schema revs so that we can try and + // reindex those messages exactly once and without needing to go through + // a pass to mark them as needing one more try.) + // The basic search expression is: + // ((GLODA_MESSAGE_ID_PROPERTY Is 0) || + // (GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID) || + // (GLODA_DIRTY_PROPERTY Isnt 0)) && + // (JUNK_SCORE_PROPERTY Isnt 100) + // If the folder !isLocal we add the terms: + // - if the folder is offline -- && (Status Is nsMsgMessageFlags.Offline) + // - && (Status Isnt nsMsgMessageFlags.Expunged) + + let searchSession = Cc["@mozilla.org/messenger/searchSession;1"] + .createInstance(Ci.nsIMsgSearchSession); + let searchTerms = Cc["@mozilla.org/array;1"] + .createInstance(Ci.nsIMutableArray); + let isLocal = this._indexingFolder instanceof nsIMsgLocalMailFolder; + + searchSession.addScopeTerm(Ci.nsMsgSearchScope.offlineMail, + this._indexingFolder); + let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; + let nsMsgSearchOp = Ci.nsMsgSearchOp; + + // first term: (GLODA_MESSAGE_ID_PROPERTY Is 0 + let searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // actually don't care here + searchTerm.beginsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Is; + let value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = 0; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // second term: || GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // OR + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Is; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = GLODA_OLD_BAD_MESSAGE_ID; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // third term: || GLODA_DIRTY_PROPERTY Isnt 0 ) + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; + searchTerm.endsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = 0; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // JUNK_SCORE_PROPERTY Isnt 100 + // For symmetry with our event-driven stuff, we just directly deal with + // the header property. + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.str = JUNK_SPAM_SCORE_STR; + searchTerm.value = value; + searchTerm.hdrProperty = JUNK_SCORE_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + if (!isLocal) + { + // If the folder is offline, then the message should be too + if (this._indexingFolder.flags & Ci.nsMsgFolderFlags.Offline) { + // third term: && Status Is nsMsgMessageFlags.Offline + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; + searchTerm.op = nsMsgSearchOp.Is; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = nsMsgMessageFlags.Offline; + searchTerm.value = value; + searchTerms.appendElement(searchTerm, false); + } + + // fourth term: && Status Isnt nsMsgMessageFlags.Expunged + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = nsMsgMessageFlags.Expunged; + searchTerm.value = value; + searchTerms.appendElement(searchTerm, false); + } + + this._indexingEnumerator = + this._indexingDatabase.getFilterEnumerator(searchTerms, true); + } + else if (aEnumKind == this.kEnumIndexedMsgs) { + // Enumerate only messages that are already indexed. This comes out to: + // ((GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1) && + // (GLODA_DIRTY_PROPERTY Isnt kMessageFilthy)) + // In English, a message is indexed if (by clause): + // 1) The message has a gloda-id and that gloda-id is in the valid range + // (and not in the bad message marker range). + // 2) The message has not been marked filthy (which invalidates the + // gloda-id.) We also assume that the folder would not have been + // entered at all if it was marked filthy. + let searchSession = Cc["@mozilla.org/messenger/searchSession;1"] + .createInstance(Ci.nsIMsgSearchSession); + let searchTerms = Cc["@mozilla.org/array;1"] + .createInstance(Ci.nsIMutableArray); + + searchSession.addScopeTerm(Ci.nsMsgSearchScope.offlineMail, + this._indexingFolder); + let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; + let nsMsgSearchOp = Ci.nsMsgSearchOp; + + // first term: (GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1 + let searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // actually don't care here + searchTerm.beginsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + // use != 0 if we're allow pre-bad ids. + searchTerm.op = aAllowPreBadIds ? nsMsgSearchOp.Isnt + : nsMsgSearchOp.IsGreaterThan; + let value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = aAllowPreBadIds ? 0 : (GLODA_FIRST_VALID_MESSAGE_ID - 1); + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // second term: && GLODA_DIRTY_PROPERTY Isnt kMessageFilthy) + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.endsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = this.kMessageFilthy; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // The use-case of already indexed messages does not want them reversed; + // we care about seeing the message keys in order. + this._indexingEnumerator = + this._indexingDatabase.getFilterEnumerator(searchTerms, false); + } + else if (aEnumKind == this.kEnumAllMsgs) { + this._indexingEnumerator = + this._indexingDatabase.ReverseEnumerateMessages(); + } + else { + throw new Error("Unknown enumerator type requested:" + aEnumKind); + } + }, + + _indexerLeaveFolder: function gloda_index_indexerLeaveFolder() { + if (this._indexingFolder !== null) { + if (this._indexingDatabase) { + this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit); + // remove our listener! + this._indexingDatabase.RemoveListener(this._databaseAnnouncerListener); + } + // let the gloda folder know we are done indexing + this._indexingGlodaFolder.indexing = false; + // null everyone out + this._indexingFolder = null; + this._indexingGlodaFolder = null; + this._indexingDatabase = null; + this._indexingEnumerator = null; + } + }, + + /** + * Event fed to us by our nsIFolderListener when a folder is loaded. We use + * this event to know when a folder we were trying to open to index is + * actually ready to be indexed. (The summary may have not existed, may have + * been out of date, or otherwise.) + * + * @param aFolder An nsIMsgFolder, already QI'd. + */ + _onFolderLoaded: function gloda_index_onFolderLoaded(aFolder) { + if ((this._pendingFolderEntry !== null) && + (aFolder.URI == this._pendingFolderEntry.URI)) + this._indexerCompletePendingFolderEntry(); + }, + + // it's a getter so we can reference 'this'. we could memoize. + get workers() { + return [ + ["folderSweep", { + worker: this._worker_indexingSweep, + jobCanceled: this._cleanup_indexingSweep, + cleanup: this._cleanup_indexingSweep, + }], + ["folder", { + worker: this._worker_folderIndex, + recover: this._recover_indexMessage, + cleanup: this._cleanup_indexing, + }], + ["folderCompact", { + worker: this._worker_folderCompactionPass, + // compaction enters the folder so needs to know how to leave + cleanup: this._cleanup_indexing, + }], + ["message", { + worker: this._worker_messageIndex, + onSchedule: this._schedule_messageIndex, + jobCanceled: this._canceled_messageIndex, + recover: this._recover_indexMessage, + cleanup: this._cleanup_indexing, + }], + ["delete", { + worker: this._worker_processDeletes, + }], + + ["fixMissingContacts", { + worker: this._worker_fixMissingContacts, + }], + ]; + }, + + _schemaMigrationInitiated: false, + _considerSchemaMigration: function() { + if (!this._schemaMigrationInitiated && + GlodaDatastore._actualSchemaVersion === 26) { + let job = new IndexingJob("fixMissingContacts", null); + GlodaIndexer.indexJob(job); + this._schemaMigrationInitiated = true; + } + }, + + initialSweep: function() { + this.indexingSweepNeeded = true; + }, + + _indexingSweepActive: false, + /** + * Indicate that an indexing sweep is desired. We kick-off an indexing + * sweep at start-up and whenever we receive an event-based notification + * that we either can't process as an event or that we normally handle + * during the sweep pass anyways. + */ + set indexingSweepNeeded(aNeeded) { + if (!this._indexingSweepActive && aNeeded) { + let job = new IndexingJob("folderSweep", null); + job.mappedFolders = false; + GlodaIndexer.indexJob(job); + this._indexingSweepActive = true; + } + }, + + /** + * Performs the folder sweep, locating folders that should be indexed, and + * creating a folder indexing job for them, and rescheduling itself for + * execution after that job is completed. Once it indexes all the folders, + * if we believe we have deletions to process (or just don't know), it kicks + * off a deletion processing job. + * + * Folder traversal logic is based off the spotlight/vista indexer code; we + * retrieve the list of servers and folders each time want to find a new + * folder to index. This avoids needing to maintain a perfect model of the + * folder hierarchy at all times. (We may eventually want to do that, but + * this is sufficient and safe for now.) Although our use of dirty flags on + * the folders allows us to avoid tracking the 'last folder' we processed, + * we do so to avoid getting 'trapped' in a folder with a high rate of + * changes. + */ + _worker_indexingSweep: function* gloda_worker_indexingSweep(aJob) { + if (!aJob.mappedFolders) { + // Walk the folders and make sure all the folders we would want to index + // are mapped. Build up a list of GlodaFolders as we go, so that we can + // sort them by their indexing priority. + let foldersToProcess = aJob.foldersToProcess = []; + + let allFolders = MailServices.accounts.allFolders; + for (let folder in fixIterator(allFolders, Ci.nsIMsgFolder)) { + if (this.shouldIndexFolder(folder)) + foldersToProcess.push(Gloda.getFolderForFolder(folder)); + } + + // sort the folders by priority (descending) + foldersToProcess.sort(function (a, b) { + return b.indexingPriority - a.indexingPriority; + }); + + aJob.mappedFolders = true; + } + + // -- process the folders (in sorted order) + while (aJob.foldersToProcess.length) { + let glodaFolder = aJob.foldersToProcess.shift(); + // ignore folders that: + // - have been deleted out of existence! + // - are not dirty/have not been compacted + // - are actively being compacted + if (glodaFolder._deleted || + (!glodaFolder.dirtyStatus && !glodaFolder.compacted) || + glodaFolder.compacting) + continue; + + // If the folder is marked as compacted, give it a compaction job. + if (glodaFolder.compacted) + GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id)); + + // add a job for the folder indexing if it was dirty + if (glodaFolder.dirtyStatus) + GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); + + // re-schedule this job (although this worker will die) + GlodaIndexer.indexJob(aJob); + yield this.kWorkDone; + } + + // consider deletion + if (this.pendingDeletions || this.pendingDeletions === null) + GlodaIndexer.indexJob(new IndexingJob("delete", null)); + + // we don't have any more work to do... + this._indexingSweepActive = false; + yield this.kWorkDone; + }, + + /** + * The only state we need to cleanup is that there is no longer an active + * indexing sweep. + */ + _cleanup_indexingSweep: function gloda_canceled_indexingSweep(aJob) { + this._indexingSweepActive = false; + }, + + /** + * The number of headers to look at before yielding with kWorkSync. This + * is for time-slicing purposes so we still yield to the UI periodically. + */ + HEADER_CHECK_SYNC_BLOCK_SIZE: 25, + + /** + * The number of headers to look at before calling + */ + HEADER_CHECK_GC_BLOCK_SIZE: 256, + + FOLDER_COMPACTION_PASS_BATCH_SIZE: 512, + /** + * Special indexing pass for (local) folders than have been compacted. The + * compaction can cause message keys to change because message keys in local + * folders are simply offsets into the mbox file. Accordingly, we need to + * update the gloda records/objects to point them at the new message key. + * + * Our general algorithm is to perform two traversals in parallel. The first + * is a straightforward enumeration of the message headers in the folder that + * apparently have been already indexed. These provide us with the message + * key and the "gloda-id" property. + * The second is a list of tuples containing a gloda message id, its current + * message key per the gloda database, and the message-id header. We re-fill + * the list with batches on-demand. This allows us to both avoid dispatching + * needless UPDATEs as well as deal with messages that were tracked by the + * PendingCommitTracker but were discarded by the compaction notification. + * + * We end up processing two streams of gloda-id's and some extra info. In + * the normal case we expect these two streams to line up exactly and all + * we need to do is update the message key if it has changed. + * + * There are a few exceptional cases where things do not line up: + * 1) The gloda database knows about a message that the enumerator does not + * know about... + * a) This message exists in the folder (identified using its message-id + * header). This means the message got indexed but PendingCommitTracker + * had to forget about the info when the compaction happened. We + * re-establish the link and track the message in PendingCommitTracker + * again. + * b) The message does not exist in the folder. This means the message got + * indexed, PendingCommitTracker had to forget about the info, and + * then the message either got moved or deleted before now. We mark + * the message as deleted; this allows the gloda message to be reused + * if the move target has not yet been indexed or purged if it already + * has been and the gloda message is a duplicate. And obviously, if the + * event that happened was actually a delete, then the delete is the + * right thing to do. + * 2) The enumerator knows about a message that the gloda database does not + * know about. This is unexpected and should not happen. We log a + * warning. We are able to differentiate this case from case #1a by + * retrieving the message header associated with the next gloda message + * (using the message-id header per 1a again). If the gloda message's + * message key is after the enumerator's message key then we know this is + * case #2. (It implies an insertion in the enumerator stream which is how + * we define the unexpected case.) + * + * Besides updating the database rows, we also need to make sure that + * in-memory representations are updated. Immediately after dispatching + * UPDATE changes to the database we use the same set of data to walk the + * live collections and update any affected messages. We are then able to + * discard the information. Although this means that we will have to + * potentially walk the live collections multiple times, unless something + * has gone horribly wrong, the number of collections should be reasonable + * and the lookups are cheap. We bias batch sizes accordingly. + * + * Because we operate based on chunks we need to make sure that when we + * actually deal with multiple chunks that we don't step on our own feet with + * our database updates. Since compaction of message key K results in a new + * message key K' such that K' <= K, we can reliably issue database + * updates for all values <= K. Which means our feet are safe no matter + * when we issue the update command. For maximum cache benefit, we issue + * our updates prior to our new query since they should still be maximally + * hot at that point. + */ + _worker_folderCompactionPass: + function* gloda_worker_folderCompactionPass(aJob, aCallbackHandle) { + yield this._indexerEnterFolder(aJob.id); + + // It's conceivable that with a folder sweep we might end up trying to + // compact a folder twice. Bail early in this case. + if (!this._indexingGlodaFolder.compacted) + yield this.kWorkDone; + + // this is a forward enumeration (sometimes we reverse enumerate; not here) + this._indexerGetEnumerator(this.kEnumIndexedMsgs); + + const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; + const HEADER_CHECK_GC_BLOCK_SIZE = this.HEADER_CHECK_GC_BLOCK_SIZE; + const FOLDER_COMPACTION_PASS_BATCH_SIZE = + this.FOLDER_COMPACTION_PASS_BATCH_SIZE; + + // Tuples of [gloda id, message key, message-id header] from + // folderCompactionPassBlockFetch + let glodaIdsMsgKeysHeaderIds = []; + // Unpack each tuple from glodaIdsMsgKeysHeaderIds into these guys. + // (Initialize oldMessageKey because we use it to kickstart our query.) + let oldGlodaId, oldMessageKey = -1, oldHeaderMessageId; + // parallel lists of gloda ids and message keys to pass to + // GlodaDatastore.updateMessageLocations + let updateGlodaIds = []; + let updateMessageKeys = []; + // list of gloda id's to mark deleted + let deleteGlodaIds = []; + let exceptionalMessages = {}; + + // for GC reasons we need to track the number of headers seen + let numHeadersSeen = 0; + + // We are consuming two lists; our loop structure has to reflect that. + let headerIter = Iterator(fixIterator(this._indexingEnumerator, + nsIMsgDBHdr)); + let mayHaveMoreGlodaMessages = true; + let keepIterHeader = false; + let keepGlodaTuple = false; + let msgHdr = null; + while (headerIter || mayHaveMoreGlodaMessages) { + let glodaId; + if (headerIter) { + try { + if (!keepIterHeader) + msgHdr = headerIter.next(); + else + keepIterHeader = false; + } + catch (ex) { + if (ex instanceof StopIteration) { + headerIter = null; + msgHdr = null; + // do the loop check again + continue; + } else { + throw ex; + } + } + } + + if (msgHdr) { + numHeadersSeen++; + if (numHeadersSeen % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) + yield this.kWorkSync; + + if (numHeadersSeen % HEADER_CHECK_GC_BLOCK_SIZE == 0) + GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); + + // There is no need to check with PendingCommitTracker. If a message + // somehow got indexed between the time the compaction killed + // everything and the time we run, that is a bug. + glodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + // (there is also no need to check for gloda dirty since the enumerator + // filtered that for us.) + } + + // get more [gloda id, message key, message-id header] tuples if out + if (!glodaIdsMsgKeysHeaderIds.length && mayHaveMoreGlodaMessages) { + // Since we operate on blocks, getting a new block implies we should + // flush the last block if applicable. + if (updateGlodaIds.length) { + GlodaDatastore.updateMessageLocations(updateGlodaIds, + updateMessageKeys, + aJob.id, true); + updateGlodaIds = []; + updateMessageKeys = []; + } + + if (deleteGlodaIds.length) { + GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); + deleteGlodaIds = []; + } + + GlodaDatastore.folderCompactionPassBlockFetch( + aJob.id, oldMessageKey + 1, FOLDER_COMPACTION_PASS_BATCH_SIZE, + aCallbackHandle.wrappedCallback); + glodaIdsMsgKeysHeaderIds = yield this.kWorkAsync; + // Reverse so we can use pop instead of shift and I don't need to be + // paranoid about performance. + glodaIdsMsgKeysHeaderIds.reverse(); + + if (!glodaIdsMsgKeysHeaderIds.length) { + mayHaveMoreGlodaMessages = false; + + // We shouldn't be in the loop anymore if headerIter is dead now. + if (!headerIter) + break; + } + } + + if (!keepGlodaTuple) { + if (mayHaveMoreGlodaMessages) + [oldGlodaId, oldMessageKey, oldHeaderMessageId] = + glodaIdsMsgKeysHeaderIds.pop(); + else + oldGlodaId = oldMessageKey = oldHeaderMessageId = null; + } + else { + keepGlodaTuple = false; + } + + // -- normal expected case + if (glodaId == oldGlodaId) { + // only need to do something if the key is not right + if (msgHdr.messageKey != oldMessageKey) { + updateGlodaIds.push(glodaId); + updateMessageKeys.push(msgHdr.messageKey); + } + } + // -- exceptional cases + else { + // This should always return a value unless something is very wrong. + // We do not want to catch the exception if one happens. + let idBasedHeader = oldHeaderMessageId ? + this._indexingDatabase.getMsgHdrForMessageID(oldHeaderMessageId) : + false; + // - Case 1b. + // We want to mark the message as deleted. + if (idBasedHeader == null) { + deleteGlodaIds.push(oldGlodaId); + } + // - Case 1a + // The expected case is that the message referenced by the gloda + // database precedes the header the enumerator told us about. This + // is expected because if PendingCommitTracker did not mark the + // message as indexed/clean then the enumerator would not tell us + // about it. + // Also, if we ran out of headers from the enumerator, this is a dead + // giveaway that this is the expected case. + else if (idBasedHeader && + ((msgHdr && + idBasedHeader.messageKey < msgHdr.messageKey) || + !msgHdr)) { + // tell the pending commit tracker about the gloda database one + PendingCommitTracker.track(idBasedHeader, oldGlodaId); + // and we might need to update the message key too + if (idBasedHeader.messageKey != oldMessageKey) { + updateGlodaIds.push(oldGlodaId); + updateMessageKeys.push(idBasedHeader.messageKey); + } + // Take another pass through the loop so that we check the + // enumerator header against the next message in the gloda + // database. + keepIterHeader = true; + } + // - Case 2 + // Whereas if the message referenced by gloda has a message key + // greater than the one returned by the enumerator, then we have a + // header claiming to be indexed by gloda that gloda does not + // actually know about. This is exceptional and gets a warning. + else if (msgHdr) { + this._log.warn("Observed header that claims to be gloda indexed " + + "but that gloda has never heard of during " + + "compaction." + + " In folder: " + msgHdr.folder.URI + + " sketchy key: " + msgHdr.messageKey + + " subject: " + msgHdr.mime2DecodedSubject); + // Keep this tuple around for the next enumerator provided header + keepGlodaTuple = true; + } + } + } + // If we don't flush the update, no one will! + if (updateGlodaIds.length) + GlodaDatastore.updateMessageLocations(updateGlodaIds, + updateMessageKeys, + aJob.id, true); + if (deleteGlodaIds.length) + GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); + + this._indexingGlodaFolder._setCompactedState(false); + + this._indexerLeaveFolder(); + yield this.kWorkDone; + }, + + /** + * Index the contents of a folder. + */ + _worker_folderIndex: + function* gloda_worker_folderIndex(aJob, aCallbackHandle) { + let logDebug = this._log.level <= Log4Moz.Level.Debug; + yield this._indexerEnterFolder(aJob.id); + + if (!this.shouldIndexFolder(this._indexingFolder)) { + aJob.safelyInvokeCallback(true); + yield this.kWorkDone; + } + + // Make sure listeners get notified about this job. + GlodaIndexer._notifyListeners(); + + // there is of course a cost to all this header investigation even if we + // don't do something. so we will yield with kWorkSync for every block. + const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; + const HEADER_CHECK_GC_BLOCK_SIZE = this.HEADER_CHECK_GC_BLOCK_SIZE; + + // we can safely presume if we are here that this folder has been selected + // for offline processing... + + // -- Filthy Folder + // A filthy folder may have misleading properties on the message that claim + // the message is indexed. They are misleading because the database, for + // whatever reason, does not have the messages (accurately) indexed. + // We need to walk all the messages and mark them filthy if they have a + // dirty property. Once we have done this, we can downgrade the folder's + // dirty status to plain dirty. We do this rather than trying to process + // everyone in one go in a filthy context because if we have to terminate + // indexing before we quit, we don't want to have to re-index messages next + // time. (This could even lead to never completing indexing in a + // pathological situation.) + let glodaFolder = GlodaDatastore._mapFolder(this._indexingFolder); + if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) { + this._indexerGetEnumerator(this.kEnumIndexedMsgs, true); + let count = 0; + for (let msgHdr in fixIterator(this._indexingEnumerator, nsIMsgDBHdr)) { + // we still need to avoid locking up the UI, pause periodically... + if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) + yield this.kWorkSync; + + if (count % HEADER_CHECK_GC_BLOCK_SIZE == 0) + GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); + + let glodaMessageId = msgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY); + // if it has a gloda message id, we need to mark it filthy + if (glodaMessageId != 0) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageFilthy); + // if it doesn't have a gloda message id, we will definitely index it, + // so no action is required. + } + // Commit the filthy status changes to the message database. + this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit); + + // this will automatically persist to the database + glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderDirty); + } + + // Figure out whether we're supposed to index _everything_ or just what + // has not yet been indexed. + let force = ("force" in aJob) && aJob.force; + let enumeratorType = force ? this.kEnumAllMsgs : this.kEnumMsgsToIndex; + + // Pass 1: count the number of messages to index. + // We do this in order to be able to report to the user what we're doing. + // TODO: give up after reaching a certain number of messages in folders + // with ridiculous numbers of messages and make the interface just say + // something like "over N messages to go." + + this._indexerGetEnumerator(enumeratorType); + + let numMessagesToIndex = 0; + let numMessagesOut = {}; + // Keep going until we run out of headers. + while (this._indexingFolder.msgDatabase.nextMatchingHdrs( + this._indexingEnumerator, + HEADER_CHECK_SYNC_BLOCK_SIZE * 8, // this way is faster, do more + 0, // moot, we don't return headers + null, // don't return headers, we just want the count + numMessagesOut)) { + numMessagesToIndex += numMessagesOut.value; + yield this.kWorkSync; + } + numMessagesToIndex += numMessagesOut.value; + + aJob.goal = numMessagesToIndex; + + if (numMessagesToIndex > 0) { + // We used up the iterator, get a new one. + this._indexerGetEnumerator(enumeratorType); + + // Pass 2: index the messages. + let count = 0; + for (let msgHdr in fixIterator(this._indexingEnumerator, nsIMsgDBHdr)) { + // per above, we want to periodically release control while doing all + // this header traversal/investigation. + if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) + yield this.kWorkSync; + + if (count % HEADER_CHECK_GC_BLOCK_SIZE == 0) + GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); + + // To keep our counts more accurate, increment the offset before + // potentially skipping any messages. + ++aJob.offset; + + // Skip messages that have not yet been reported to us as existing via + // msgsClassified. + if (this._indexingFolder.getProcessingFlags(msgHdr.messageKey) & + NOT_YET_REPORTED_PROCESSING_FLAGS) + continue; + + // Because the gloda id could be in-flight, we need to double-check the + // enumerator here since it can't know about our in-memory stuff. + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + // if the message seems valid and we are not forcing indexing, skip it. + // (that means good gloda id and not dirty) + if (!force && + glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty == this.kMessageClean) + continue; + + if (logDebug) + this._log.debug(">>> calling _indexMessage"); + yield aCallbackHandle.pushAndGo( + this._indexMessage(msgHdr, aCallbackHandle), + {what: "indexMessage", msgHdr: msgHdr}); + GlodaIndexer._indexedMessageCount++; + if (logDebug) + this._log.debug("<<< back from _indexMessage"); + } + } + + // This will trigger an (async) db update which cannot hit the disk prior to + // the actual database records that constitute the clean state. + // XXX There is the slight possibility that, in the event of a crash, this + // will hit the disk but the gloda-id properties on the headers will not + // get set. This should ideally be resolved by detecting a non-clean + // shutdown and marking all folders as dirty. + glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderClean); + + // by definition, it's not likely we'll visit this folder again anytime soon + this._indexerLeaveFolder(); + + aJob.safelyInvokeCallback(true); + + yield this.kWorkDone; + }, + + /** + * Invoked when a "message" job is scheduled so that we can clear + * _pendingAddJob if that is the job. We do this so that work items are not + * added to _pendingAddJob while it is being processed. + */ + _schedule_messageIndex: function(aJob, aCallbackHandle) { + // we do not want new work items to be added as we are processing, so + // clear _pendingAddJob. A new job will be created as needed. + if (aJob === this._pendingAddJob) + this._pendingAddJob = null; + // update our goal from the items length + aJob.goal = aJob.items.length; + }, + /** + * If the job gets canceled, we need to make sure that we clear out pending + * add job or our state will get wonky. + */ + _canceled_messageIndex: function gloda_index_msg_canceled_messageIndex(aJob) { + if (aJob === this._pendingAddJob) + this._pendingAddJob = null; + }, + + + /** + * Index a specific list of messages that we know to index from + * event-notification hints. + */ + _worker_messageIndex: + function* gloda_worker_messageIndex(aJob, aCallbackHandle) { + // if we are already in the correct folder, our "get in the folder" clause + // will not execute, so we need to make sure this value is accurate in + // that case. (and we want to avoid multiple checks...) + for (; aJob.offset < aJob.items.length; aJob.offset++) { + let item = aJob.items[aJob.offset]; + // item is either [folder ID, message key] or + // [folder ID, message ID] + + let glodaFolderId = item[0]; + // If the folder has been deleted since we queued, skip this message + if (!GlodaDatastore._folderIdKnown(glodaFolderId)) + continue; + let glodaFolder = GlodaDatastore._mapFolderID(glodaFolderId); + + // Stay out of folders that: + // - are compacting / compacted and not yet processed + // - got deleted (this would be redundant if we had a stance on id nukage) + // (these things could have changed since we queued the event) + if (glodaFolder.compacting || glodaFolder.compacted || + glodaFolder._deleted) + continue; + + // get in the folder + if (this._indexingGlodaFolder != glodaFolder) { + yield this._indexerEnterFolder(glodaFolderId); + + // Now that we have the real nsIMsgFolder, sanity-check that we should + // be indexing it. (There are some checks that require the + // nsIMsgFolder.) + if (!this.shouldIndexFolder(this._indexingFolder)) + continue; + } + + let msgHdr; + // GetMessageHeader can be affected by the use cache, so we need to check + // ContainsKey first to see if the header is really actually there. + if (typeof item[1] == "number") + msgHdr = this._indexingDatabase.ContainsKey(item[1]) && + this._indexingFolder.GetMessageHeader(item[1]); + else + // same deal as in move processing. + // TODO fixme to not assume singular message-id's. + msgHdr = this._indexingDatabase.getMsgHdrForMessageID(item[1]); + + if (msgHdr) + yield aCallbackHandle.pushAndGo( + this._indexMessage(msgHdr, aCallbackHandle), + {what: "indexMessage", msgHdr: msgHdr}); + else + yield this.kWorkSync; + } + + // There is no real reason to stay 'in' the folder. If we are going to get + // more events from the folder, its database would have to be open for us + // to get the events, so it's not like we're creating an efficiency + // problem where we unload a folder just to load it again in 2 seconds. + // (Well, at least assuming the views are good about holding onto the + // database references even though they go out of their way to avoid + // holding onto message header references.) + this._indexerLeaveFolder(); + + yield this.kWorkDone; + }, + + /** + * Recover from a "folder" or "message" job failing inside a call to + * |_indexMessage|, marking the message bad. If we were not in an + * |_indexMessage| call, then fail to recover. + * + * @param aJob The job that was being worked. We ignore this for now. + * @param aContextStack The callbackHandle mechanism's context stack. When we + * invoke pushAndGo for _indexMessage we put something in so we can + * detect when it is on the async stack. + * @param aException The exception that is necessitating we attempt to + * recover. + * + * @return 1 if we were able to recover (because we want the call stack + * popped down to our worker), false if we can't. + */ + _recover_indexMessage: + function gloda_index_recover_indexMessage(aJob, aContextStack, + aException) { + // See if indexMessage is on the stack... + if (aContextStack.length >= 2 && + aContextStack[1] && + ("what" in aContextStack[1]) && + aContextStack[1].what == "indexMessage") { + // it is, so this is probably recoverable. + + this._log.debug( + "Exception while indexing message, marking it bad (gloda id of 1)."); + + // -- Mark the message as bad + let msgHdr = aContextStack[1].msgHdr; + // (In the worst case, the header is no longer valid, which will result in + // exceptions. We need to be prepared for that.) + try { + msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, + GLODA_BAD_MESSAGE_ID); + // clear the dirty bit if it has one + if (msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY)) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, 0); + } + catch (ex) { + // If we are indexing a folder and the message header is no longer + // valid, then it's quite likely the whole folder is no longer valid. + // But since in the event-driven message indexing case we could have + // other valid things to look at, let's try and recover. The folder + // indexing case will come back to us shortly and we will indicate + // recovery is not possible at that point. + // So do nothing here since by popping the indexing of the specific + // message out of existence we are recovering. + } + return 1; + } + return false; + }, + + /** + * Cleanup after an aborted "folder" or "message" job. + */ + _cleanup_indexing: function gloda_index_cleanup_indexing(aJob) { + this._indexerLeaveFolder(); + aJob.safelyInvokeCallback(false); + }, + + /** + * Maximum number of deleted messages to process at a time. Arbitrary; there + * are no real known performance constraints at this point. + */ + DELETED_MESSAGE_BLOCK_SIZE: 32, + + /** + * Process pending deletes... + */ + _worker_processDeletes: function* gloda_worker_processDeletes(aJob, + aCallbackHandle) { + + // Count the number of messages we will eventually process. People freak + // out when the number is constantly increasing because they think gloda + // has gone rogue. (Note: new deletions can still accumulate during + // our execution, so we may 'expand' our count a little still.) + this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); + aJob.goal = yield this.kWorkAsync; + this._log.debug("There are currently " + aJob.goal + " messages awaiting" + + " deletion processing."); + + // get a block of messages to delete. + let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + query._deleted(1); + query.limit(this.DELETED_MESSAGE_BLOCK_SIZE); + let deletedCollection = query.getCollection(aCallbackHandle); + yield this.kWorkAsync; + + while (deletedCollection.items.length) { + for (let message of deletedCollection.items) { + // If it turns out our count is wrong (because some new deletions + // happened since we entered this worker), let's issue a new count + // and use that to accurately update our goal. + if (aJob.offset >= aJob.goal) { + this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); + aJob.goal += yield this.kWorkAsync; + } + + yield aCallbackHandle.pushAndGo(this._deleteMessage(message, + aCallbackHandle)); + aJob.offset++; + yield this.kWorkSync; + } + + deletedCollection = query.getCollection(aCallbackHandle); + yield this.kWorkAsync; + } + this.pendingDeletions = false; + + yield this.kWorkDone; + }, + + _worker_fixMissingContacts: function*(aJob, aCallbackHandle) { + let identityContactInfos = [], fixedContacts = {}; + + // -- asynchronously get a list of all identities without contacts + // The upper bound on the number of messed up contacts is the number of + // contacts in the user's address book. This should be small enough + // (and the data size small enough) that this won't explode thunderbird. + let queryStmt = GlodaDatastore._createAsyncStatement( + "SELECT identities.id, identities.contactID, identities.value " + + "FROM identities " + + "LEFT JOIN contacts ON identities.contactID = contacts.id " + + "WHERE identities.kind = 'email' AND contacts.id IS NULL", + true); + queryStmt.executeAsync({ + handleResult: function(aResultSet) { + let row; + while ((row = aResultSet.getNextRow())) { + identityContactInfos.push({ + identityId: row.getInt64(0), + contactId: row.getInt64(1), + email: row.getString(2) + }); + } + }, + handleError: function(aError) { + }, + handleCompletion: function(aReason) { + GlodaDatastore._asyncCompleted(); + aCallbackHandle.wrappedCallback(); + }, + }); + queryStmt.finalize(); + GlodaDatastore._pendingAsyncStatements++; + yield this.kWorkAsync; + + // -- perform fixes only if there were missing contacts + if (identityContactInfos.length) { + const yieldEvery = 64; + // - create the missing contacts + for (let i = 0; i < identityContactInfos.length; i++) { + if ((i % yieldEvery) === 0) + yield this.kWorkSync; + + let info = identityContactInfos[i], + card = GlodaUtils.getCardForEmail(info.email), + contact = new GlodaContact( + GlodaDatastore, info.contactId, + null, null, + card ? (card.displayName || info.email) : info.email, + 0, 0); + GlodaDatastore.insertContact(contact); + + // update the in-memory rep of the identity to know about the contact + // if there is one. + let identity = GlodaCollectionManager.cacheLookupOne( + Gloda.NOUN_IDENTITY, info.identityId, false); + if (identity) { + // Unfortunately, although this fixes the (reachable) Identity and + // exposes the Contact, it does not make the Contact reachable from + // the collection manager. This will make explicit queries that look + // up the contact potentially see the case where + // contact.identities[0].contact !== contact. Alternately, that + // may not happen and instead the "contact" object we created above + // may become unlinked. (I'd have to trace some logic I don't feel + // like tracing.) Either way, The potential fallout is minimal + // since the object identity invariant will just lapse and popularity + // on the contact may become stale, and neither of those meaningfully + // affect the operation of anything in Thunderbird. + // If we really cared, we could find all the dominant collections + // that reference the identity and update their corresponding + // contact collection to make it reachable. That use-case does not + // exist outside of here, which is why we're punting. + identity._contact = contact; + contact._identities = [identity]; + } + + // NOTE: If the addressbook indexer did anything useful other than + // adapting to name changes, we could schedule indexing of the cards at + // this time. However, as of this writing, it doesn't, and this task + // is a one-off relevant only to the time of this writing. + } + + // - mark all folders as dirty, initiate indexing sweep + this.dirtyAllKnownFolders(); + this.indexingSweepNeeded = true; + } + + // -- mark the schema upgrade, be done + GlodaDatastore._updateSchemaVersion(GlodaDatastore._schemaVersion); + yield this.kWorkDone; + }, + + /** + * Determine whether a folder is suitable for indexing. + * + * @param aMsgFolder An nsIMsgFolder you want to see if we should index. + * + * @returns true if we want to index messages in this type of folder, false if + * we do not. + */ + shouldIndexFolder: function(aMsgFolder) { + let folderFlags = aMsgFolder.flags; + // Completely ignore non-mail and virtual folders. They should never even + // get to be GlodaFolder instances. + if (!(folderFlags & Ci.nsMsgFolderFlags.Mail) || + (folderFlags & Ci.nsMsgFolderFlags.Virtual)) + return false; + + // Some folders do not really exist; we can detect this by getStringProperty + // exploding when we call it. This is primarily a concern because + // _mapFolder calls said exploding method, but we also don't want to + // even think about indexing folders that don't exist. (Such folders are + // likely the result of a messed up profile.) + try { + // flags is used because it should always be in the cache avoiding a miss + // which would compel an msf open. + aMsgFolder.getStringProperty("flags"); + } catch (ex) { + return false; + } + + // Now see what our gloda folder information has to say about the folder. + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + return glodaFolder.indexingPriority != glodaFolder.kIndexingNeverPriority; + }, + + /** + * Sets the indexing priority for this folder and persists it both to Gloda, + * and, for backup purposes, to the nsIMsgFolder via string property as well. + * + * Setting this priority may cause the indexer to either reindex this folder, + * or remove this folder from the existing index. + * + * @param {nsIMsgFolder} aFolder + * @param {Number} aPriority (one of the priority constants from GlodaFolder) + */ + setFolderIndexingPriority: function glodaSetFolderIndexingPriority(aFolder, aPriority) { + + let glodaFolder = GlodaDatastore._mapFolder(aFolder); + + // if there's been no change, we're done + if (aPriority == glodaFolder.indexingPriority) { + return; + } + + // save off the old priority, and set the new one + let previousPrio = glodaFolder.indexingPriority; + glodaFolder._indexingPriority = aPriority; + + // persist the new priority + GlodaDatastore.updateFolderIndexingPriority(glodaFolder); + aFolder.setStringProperty("indexingPriority", Number(aPriority).toString()); + + // if we've been told never to index this folder... + if (aPriority == glodaFolder.kIndexingNeverPriority) { + + // stop doing so + if (this._indexingFolder == aFolder) + GlodaIndexer.killActiveJob(); + + // mark all existing messages as deleted + GlodaDatastore.markMessagesDeletedByFolderID(glodaFolder.id); + + // re-index + GlodaMsgIndexer.indexingSweepNeeded = true; + + } else if (previousPrio == glodaFolder.kIndexingNeverPriority) { + + // there's no existing index, but the user now wants one + glodaFolder._dirtyStatus = glodaFolder.kFolderFilthy; + GlodaDatastore.updateFolderDirtyStatus(glodaFolder) + GlodaMsgIndexer.indexingSweepNeeded = true; + } + }, + + /** + * Resets the indexing priority on the given folder to whatever the default + * is for folders of that type. + * + * @note Calls setFolderIndexingPriority under the hood, so has identical + * potential reindexing side-effects + * + * @param {nsIMsgFolder} aFolder + * @param {boolean} aAllowSpecialFolderIndexing + */ + resetFolderIndexingPriority: function glodaResetFolderIndexingPriority(aFolder, aAllowSpecialFolderIndexing) { + this.setFolderIndexingPriority(aFolder, + GlodaDatastore.getDefaultIndexingPriority(aFolder, + aAllowSpecialFolderIndexing)); + }, + + /** + * Queue all of the folders of all of the accounts of the current profile + * for indexing. We traverse all folders and queue them immediately to try + * and have an accurate estimate of the number of folders that need to be + * indexed. (We previously queued accounts rather than immediately + * walking their list of folders.) + */ + indexEverything: function glodaIndexEverything() { + this._log.info("Queueing all accounts for indexing."); + + GlodaDatastore._beginTransaction(); + for (let account in fixIterator(MailServices.accounts.accounts, + Ci.nsIMsgAccount)) { + this.indexAccount(account); + } + GlodaDatastore._commitTransaction(); + }, + + /** + * Queue all of the folders belonging to an account for indexing. + */ + indexAccount: function glodaIndexAccount(aAccount) { + let rootFolder = aAccount.incomingServer.rootFolder; + if (rootFolder instanceof Ci.nsIMsgFolder) { + this._log.info("Queueing account folders for indexing: " + aAccount.key); + + let allFolders = rootFolder.descendants; + let folderJobs = []; + for (let folder in fixIterator(allFolders, Ci.nsIMsgFolder)) { + if (this.shouldIndexFolder(folder)) + GlodaIndexer.indexJob( + new IndexingJob("folder", GlodaDatastore._mapFolder(folder).id)); + } + } + else { + this._log.info("Skipping Account, root folder not nsIMsgFolder"); + } + }, + + /** + * Queue a single folder for indexing given an nsIMsgFolder. + * + * @param [aOptions.callback] A callback to invoke when the folder finishes + * indexing. First argument is true if the task ran to completion + * successfully, false if we had to abort for some reason. + * @param [aOptions.force=false] Should we force the indexing of all messages + * in the folder (true) or just index what hasn't been indexed (false). + * @return true if we are going to index the folder, false if not. + */ + indexFolder: function glodaIndexFolder(aMsgFolder, aOptions) { + if (!this.shouldIndexFolder(aMsgFolder)) + return false; + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + // stay out of compacting/compacted folders + if (glodaFolder.compacting || glodaFolder.compacted) + return false; + + this._log.info("Queue-ing folder for indexing: " + + aMsgFolder.prettiestName); + let job = new IndexingJob("folder", glodaFolder.id); + if (aOptions) { + if ("callback" in aOptions) + job.callback = aOptions.callback; + if ("force" in aOptions) + job.force = true; + } + GlodaIndexer.indexJob(job); + return true; + }, + + /** + * Queue a list of messages for indexing. + * + * @param aFoldersAndMessages List of [nsIMsgFolder, message key] tuples. + */ + indexMessages: function gloda_index_indexMessages(aFoldersAndMessages) { + let job = new IndexingJob("message", null); + job.items = aFoldersAndMessages. + map(fm => [GlodaDatastore._mapFolder(fm[0]).id, fm[1]]); + GlodaIndexer.indexJob(job); + }, + + /** + * Mark all known folders as dirty so that the next indexing sweep goes + * into all folders and checks their contents to see if they need to be + * indexed. + * + * This is being added for the migration case where we want to try and reindex + * all of the messages that had been marked with GLODA_BAD_MESSAGE_ID but + * which is now GLODA_OLD_BAD_MESSAGE_ID and so we should attempt to reindex + * them. + */ + dirtyAllKnownFolders: function gloda_index_msg_dirtyAllKnownFolders() { + // Just iterate over the datastore's folder map and tell each folder to + // be dirty if its priority is not disabled. + for (let folderID in GlodaDatastore._folderByID) { + let glodaFolder = GlodaDatastore._folderByID[folderID]; + if (glodaFolder.indexingPriority !== glodaFolder.kIndexingNeverPriority) + glodaFolder._ensureFolderDirty(); + } + }, + + /** + * Given a message header, return whether this message is likely to have + * been indexed or not. + * + * This means the message must: + * - Be in a folder eligible for gloda indexing. (Not News, etc.) + * - Be in a non-filthy folder. + * - Be gloda-indexed and non-filthy. + * + * @param aMsgHdr A message header. + * @returns true if the message is likely to have been indexed. + */ + isMessageIndexed: function gloda_index_isMessageIndexed(aMsgHdr) { + // If it's in a folder that we flat out do not index, say no. + if (!this.shouldIndexFolder(aMsgHdr.folder)) + return false; + let glodaFolder = GlodaDatastore._mapFolder(aMsgHdr.folder); + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(aMsgHdr); + return glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != GlodaMsgIndexer.kMessageFilthy && + glodaFolder && + glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy; + }, + + /* *********** Event Processing *********** */ + + /** + * Tracks messages we have received msgKeyChanged notifications for in order + * to provide batching and to suppress needless reindexing when we receive + * the expected follow-up msgsClassified notification. + * + * The entries in this dictionary should be extremely short-lived as we + * receive the msgKeyChanged notification as the offline fake header is + * converted into a real header (which is accompanied by a msgAdded + * notification we don't pay attention to). Once the headers finish + * updating, the message classifier will get its at-bat and should likely + * find that the messages have already been classified and so fast-path + * them. + * + * The keys in this dictionary are chosen to be consistent with those of + * PendingCommitTracker: the folder.URI + "#" + the (new) message key. + * The values in the dictionary are either an object with "id" (the gloda + * id), "key" (the new message key), and "dirty" (is it dirty and so + * should still be queued for indexing) attributes, or null indicating that + * no change in message key occurred and so no database changes are required. + */ + _keyChangedBatchInfo: {}, + + /** + * Common logic for things that want to feed event-driven indexing. This gets + * called by both |_msgFolderListener.msgsClassified| when we are first + * seeing a message as well as by |_folderListener| when things happen to + * existing messages. Although we could slightly specialize for the + * new-to-us case, it works out to be cleaner to just treat them the same + * and take a very small performance hit. + * + * @param aMsgHdrs Something fixIterator will work on to return an iterator + * on the set of messages that we should treat as potentially changed. + * @param aDirtyingEvent Is this event inherently dirtying? Receiving a + * msgsClassified notification is not inherently dirtying because it is + * just telling us that a message exists. We use this knowledge to + * ignore the msgsClassified notifications for messages we have received + * msgKeyChanged notifications for and fast-pathed. Since it is possible + * for user action to do something that dirties the message between the + * time we get the msgKeyChanged notification and when we receive the + * msgsClassified notification, we want to make sure we don't get + * confused. (Although since we remove the message from our ignore-set + * after the first notification, we would likely just mistakenly treat + * the msgsClassified notification as something dirtying, so it would + * still work out...) + */ + _reindexChangedMessages: function gloda_indexer_reindexChangedMessage( + aMsgHdrs, aDirtyingEvent) { + let glodaIdsNeedingDeletion = null; + let messageKeyChangedIds = null, messageKeyChangedNewKeys = null; + for (let msgHdr in fixIterator(aMsgHdrs, nsIMsgDBHdr)) { + // -- Index this folder? + let msgFolder = msgHdr.folder; + if (!this.shouldIndexFolder(msgFolder)) { + continue; + } + // -- Ignore messages in filthy folders! + // A filthy folder can only be processed by an indexing sweep, and at + // that point the message will get indexed. + let glodaFolder = GlodaDatastore._mapFolder(msgHdr.folder); + if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) + continue; + + // -- msgKeyChanged event follow-up + if (!aDirtyingEvent) { + let keyChangedKey = msgHdr.folder.URI + "#" + msgHdr.messageKey; + if (keyChangedKey in this._keyChangedBatchInfo) { + var keyChangedInfo = this._keyChangedBatchInfo[keyChangedKey]; + delete this._keyChangedBatchInfo[keyChangedKey]; + + // Null means to ignore this message because the key did not change + // (and the message was not dirty so it is safe to ignore.) + if (keyChangedInfo == null) + continue; + // (the key may be null if we only generated the entry because the + // message was dirty) + if (keyChangedInfo.key !== null) { + if (messageKeyChangedIds == null) { + messageKeyChangedIds = []; + messageKeyChangedNewKeys = []; + } + messageKeyChangedIds.push(keyChangedInfo.id); + messageKeyChangedNewKeys.push(keyChangedInfo.key); + } + // ignore the message because it was not dirty + if (!keyChangedInfo.isDirty) + continue; + } + } + + // -- Index this message? + // We index local messages, IMAP messages that are offline, and IMAP + // messages that aren't offline but whose folders aren't offline either + let isFolderLocal = msgFolder instanceof nsIMsgLocalMailFolder; + if (!isFolderLocal) { + if (!(msgHdr.flags & nsMsgMessageFlags.Offline) && + (msgFolder.flags & nsMsgFolderFlags.Offline)) { + continue; + } + } + // Ignore messages whose processing flags indicate it has not yet been + // classified. In the IMAP case if the Offline flag is going to get set + // we are going to see it before the msgsClassified event so this is + // very important. + if (msgFolder.getProcessingFlags(msgHdr.messageKey) & + NOT_YET_REPORTED_PROCESSING_FLAGS) + continue; + + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + + let isSpam = msgHdr.getStringProperty(JUNK_SCORE_PROPERTY) == + JUNK_SPAM_SCORE_STR; + + // -- Is the message currently gloda indexed? + if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != this.kMessageFilthy) { + // - Is the message spam? + if (isSpam) { + // Treat this as a deletion... + if (!glodaIdsNeedingDeletion) + glodaIdsNeedingDeletion = []; + glodaIdsNeedingDeletion.push(glodaId); + // and skip to the next message + continue; + } + + // - Mark the message dirty if it is clean. + // (This is the only case in which we need to mark dirty so that the + // indexing sweep takes care of things if we don't process this in + // an event-driven fashion. If the message has no gloda-id or does + // and it's already dirty or filthy, it is already marked for + // indexing.) + if (glodaDirty == this.kMessageClean) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageDirty); + // if the message is pending clean, this change invalidates that. + PendingCommitTracker.noteDirtyHeader(msgHdr); + } + // If it's not indexed but is spam, ignore it. + else if (isSpam) { + continue; + } + // (we want to index the message if we are here) + + // mark the folder dirty too, so we know to look inside + glodaFolder._ensureFolderDirty(); + + if (this._pendingAddJob == null) { + this._pendingAddJob = new IndexingJob("message", null); + GlodaIndexer.indexJob(this._pendingAddJob); + } + // only queue the message if we haven't overflowed our event-driven budget + if (this._pendingAddJob.items.length < + this._indexMaxEventQueueMessages) { + this._pendingAddJob.items.push( + [GlodaDatastore._mapFolder(msgFolder).id, msgHdr.messageKey]); + } + else { + this.indexingSweepNeeded = true; + } + } + + // Process any message key changes (from earlier msgKeyChanged events) + if (messageKeyChangedIds != null) + GlodaDatastore.updateMessageKeys(messageKeyChangedIds, + messageKeyChangedNewKeys); + + // If we accumulated any deletions in there, batch them off now. + if (glodaIdsNeedingDeletion) { + GlodaDatastore.markMessagesDeletedByIDs(glodaIdsNeedingDeletion); + this.pendingDeletions = true; + } + }, + + + /* ***** Folder Changes ***** */ + /** + * All additions and removals are queued for processing. Indexing messages + * is potentially phenomenally expensive, and deletion can still be + * relatively expensive due to our need to delete the message, its + * attributes, and all attributes that reference it. Additionally, + * attribute deletion costs are higher than attribute look-up because + * there is the actual row plus its 3 indices, and our covering indices are + * no help there. + * + */ + _msgFolderListener: { + indexer: null, + + /** + * We no longer use the msgAdded notification, instead opting to wait until + * junk/trait classification has run (or decided not to run) and all + * filters have run. The msgsClassified notification provides that for us. + */ + msgAdded: function gloda_indexer_msgAdded(aMsgHdr) { + // we are never called! we do not enable this bit! + }, + + /** + * Process (apparently newly added) messages that have been looked at by + * the message classifier. This ensures that if the message was going + * to get marked as spam, this will have already happened. + * + * Besides truly new (to us) messages, We will also receive this event for + * messages that are the result of IMAP message move/copy operations, + * including both moves that generated offline fake headers and those that + * did not. In the offline fake header case, however, we are able to + * ignore their msgsClassified events because we will have received a + * msgKeyChanged notification sometime in the recent past. + */ + msgsClassified: function gloda_indexer_msgsClassified( + aMsgHdrs, aJunkClassified, aTraitClassified) { + this.indexer._log.debug("msgsClassified notification"); + try { + GlodaMsgIndexer._reindexChangedMessages(aMsgHdrs.enumerate(), false); + } + catch (ex) { + this.indexer._log.error("Explosion in msgsClassified handling:", ex); + } + }, + + /** + * Handle real, actual deletion (move to trash and IMAP deletion model + * don't count); we only see the deletion here when it becomes forever, + * or rather _just before_ it becomes forever. Because the header is + * going away, we need to either process things immediately or extract the + * information required to purge it later without the header. + * To this end, we mark all messages that were indexed in the gloda message + * database as deleted. We set our pending deletions flag to let our + * indexing logic know that after its next wave of folder traversal, it + * should perform a deletion pass. If it turns out the messages are coming + * back, the fact that deletion is thus deferred can be handy, as we can + * reuse the existing gloda message. + */ + msgsDeleted: function gloda_indexer_msgsDeleted(aMsgHdrs) { + this.indexer._log.debug("msgsDeleted notification"); + let glodaMessageIds = []; + + for (let iMsgHdr = 0; iMsgHdr < aMsgHdrs.length; iMsgHdr++) { + let msgHdr = aMsgHdrs.queryElementAt(iMsgHdr, nsIMsgDBHdr); + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != GlodaMsgIndexer.kMessageFilthy) + glodaMessageIds.push(glodaId); + } + + if (glodaMessageIds.length) { + GlodaMsgIndexer._datastore.markMessagesDeletedByIDs(glodaMessageIds); + GlodaMsgIndexer.pendingDeletions = true; + } + }, + + /** + * Process a move or copy. + * + * Moves to a local folder or an IMAP folder where we are generating offline + * fake headers are dealt with efficiently because we get both the source + * and destination headers. The main ingredient to having offline fake + * headers is that allowUndo was true when the operation was performance. + * The only non-obvious thing is that we need to make sure that we deal + * with the impact of filthy folders and messages on gloda-id's (they + * invalidate the gloda-id). + * + * Moves to an IMAP folder that do not generate offline fake headers do not + * provide us with the target header, but the IMAP SetPendingAttributes + * logic will still attempt to propagate the properties on the message + * header so when we eventually see it in the msgsClassified notification, + * it should have the properties of the source message copied over. + * We make sure that gloda-id's do not get propagated when messages are + * moved from IMAP folders that are marked filthy or are marked as not + * supposed to be indexed by clearing the pending attributes for the header + * being tracked by the destination IMAP folder. + * We could fast-path the IMAP move case in msgsClassified by noticing that + * a message is showing up with a gloda-id header already and just + * performing an async location update. + * + * Moves that occur involving 'compacted' folders are fine and do not + * require special handling here. The one tricky super-edge-case that + * can happen (and gets handled by the compaction pass) is the move of a + * message that got gloda indexed that did not already have a gloda-id and + * PendingCommitTracker did not get to flush the gloda-id before the + * compaction happened. In that case our move logic cannot know to do + * anything and the gloda database still thinks the message lives in our + * folder. The compaction pass will deal with this by marking the message + * as deleted. The rationale being that marking it deleted allows the + * message to be re-used if it gets indexed in the target location, or if + * the target location has already been indexed, we no longer need the + * duplicate and it should be deleted. (Also, it is unable to distinguish + * between a case where the message got deleted versus moved.) + * + * Because copied messages are, by their nature, duplicate messages, we + * do not particularly care about them. As such, we defer their processing + * to the automatic sync logic that will happen much later on. This is + * potentially desirable in case the user deletes some of the original + * messages, allowing us to reuse the gloda message representations when + * we finally get around to indexing the messages. We do need to mark the + * folder as dirty, though, to clue in the sync logic. + */ + msgsMoveCopyCompleted: function gloda_indexer_msgsMoveCopyCompleted(aMove, + aSrcMsgHdrs, aDestFolder, aDestMsgHdrs) { + this.indexer._log.debug("MoveCopy notification. Move: " + aMove); + try { + // ---- Move + if (aMove) { + // -- Effectively a deletion? + // If the destination folder is not indexed, it's like these messages + // are being deleted. + if (!GlodaMsgIndexer.shouldIndexFolder(aDestFolder)) { + this.msgsDeleted(aSrcMsgHdrs); + return; + } + + // -- Avoid propagation of filthy gloda-id's. + // If the source folder is filthy or should not be indexed (and so + // any gloda-id's found in there are gibberish), our only job is to + // strip the gloda-id's off of all the destination headers because + // none of the gloda-id's are valid (and so we certainly don't want + // to try and use them as a basis for updating message keys.) + let srcMsgFolder = aSrcMsgHdrs.queryElementAt(0, nsIMsgDBHdr).folder; + if (!this.indexer.shouldIndexFolder(srcMsgFolder) || + (GlodaDatastore._mapFolder(srcMsgFolder).dirtyStatus == + GlodaFolder.prototype.kFolderFilthy)) { + // Local case, just modify the destination headers directly. + if (aDestMsgHdrs) { + for (let destMsgHdr in fixIterator(aDestMsgHdrs, nsIMsgDBHdr)) { + // zero it out if it exists + // (no need to deal with pending commit issues here; a filthy + // folder by definition has nothing indexed in it.) + let glodaId = destMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY); + if (glodaId) + destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, + 0); + } + + // Since we are moving messages from a folder where they were + // effectively not indexed, it is up to us to make sure the + // messages now get indexed. + this.indexer._reindexChangedMessages(aDestMsgHdrs.enumerate()); + return; + } + // IMAP move case, we need to operate on the pending headers using + // the source header to get the pending header and as the + // indication of what has been already set on the pending header. + else { + let destDb; + // so, this can fail, and there's not much we can do about it. + try { + destDb = aDestFolder.msgDatabase; + } catch (ex) { + this.indexer._log.warn("Destination database for " + + aDestFolder.prettiestName + + " not ready on IMAP move." + + " Gloda corruption possible."); + return; + } + for (let srcMsgHdr in fixIterator(aSrcMsgHdrs, nsIMsgDBHdr)) { + // zero it out if it exists + // (no need to deal with pending commit issues here; a filthy + // folder by definition has nothing indexed in it.) + let glodaId = srcMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY); + if (glodaId) + destDb.setUint32AttributeOnPendingHdr( + srcMsgHdr, GLODA_MESSAGE_ID_PROPERTY, 0); + } + + // Nothing remains to be done. The msgClassified event will take + // care of making sure the message gets indexed. + return; + } + } + + + // --- Have destination headers (local case): + if (aDestMsgHdrs) { + // -- Update message keys for valid gloda-id's. + // (Which means ignore filthy gloda-id's.) + let glodaIds = []; + let newMessageKeys = []; + aSrcMsgHdrs.QueryInterface(nsIArray); + aDestMsgHdrs.QueryInterface(nsIArray); + // Track whether we see any messages that are not gloda indexed so + // we know if we have to mark the destination folder dirty. + let sawNonGlodaMessage = false; + for (let iMsg = 0; iMsg < aSrcMsgHdrs.length; iMsg++) { + let srcMsgHdr = aSrcMsgHdrs.queryElementAt(iMsg, nsIMsgDBHdr); + let destMsgHdr = aDestMsgHdrs.queryElementAt(iMsg, nsIMsgDBHdr); + + let [glodaId, dirtyStatus] = + PendingCommitTracker.getGlodaState(srcMsgHdr); + if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + dirtyStatus != GlodaMsgIndexer.kMessageFilthy) { + // we may need to update the pending commit map (it checks) + PendingCommitTracker.noteMove(srcMsgHdr, destMsgHdr); + // but we always need to update our database + glodaIds.push(glodaId); + newMessageKeys.push(destMsgHdr.messageKey); + } + else { + sawNonGlodaMessage = true; + } + } + + // this method takes care to update the in-memory representations + // too; we don't need to do anything + if (glodaIds.length) + GlodaDatastore.updateMessageLocations(glodaIds, newMessageKeys, + aDestFolder); + + // Mark the destination folder dirty if we saw any messages that + // were not already gloda indexed. + if (sawNonGlodaMessage) { + let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); + destGlodaFolder._ensureFolderDirty(); + this.indexer.indexingSweepNeeded = true; + } + } + // --- No dest headers (IMAP case): + // Update any valid gloda indexed messages into their new folder to + // make the indexer's life easier when it sees the messages in their + // new folder. + else { + let glodaIds = []; + + let srcFolderIsLocal = + (srcMsgFolder instanceof nsIMsgLocalMailFolder); + for (let iMsgHdr = 0; iMsgHdr < aSrcMsgHdrs.length; iMsgHdr++) { + let msgHdr = aSrcMsgHdrs.queryElementAt(iMsgHdr, nsIMsgDBHdr); + + let [glodaId, dirtyStatus] = + PendingCommitTracker.getGlodaState(msgHdr); + if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + dirtyStatus != GlodaMsgIndexer.kMessageFilthy) { + // we may need to update the pending commit map (it checks) + PendingCommitTracker.noteBlindMove(msgHdr); + // but we always need to update our database + glodaIds.push(glodaId); + + // XXX UNDO WORKAROUND + // This constitutes a move from a local folder to an IMAP + // folder. Undo does not currently do the right thing for us, + // but we have a chance of not orphaning the message if we + // mark the source header as dirty so that when the message + // gets re-added we see it. (This does require that we enter + // the folder; we set the folder dirty after the loop to + // increase the probability of this but it's not foolproof + // depending on when the next indexing sweep happens and when + // the user performs an undo.) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, + GlodaMsgIndexer.kMessageDirty); + } + } + // XXX ALSO UNDO WORKAROUND + if (srcFolderIsLocal) { + let srcGlodaFolder = GlodaDatastore._mapFolder(srcMsgFolder); + srcGlodaFolder._ensureFolderDirty(); + } + + // quickly move them to the right folder, zeroing their message keys + GlodaDatastore.updateMessageFoldersByKeyPurging(glodaIds, + aDestFolder); + // we _do not_ need to mark the folder as dirty, because the + // message added events will cause that to happen. + } + } + // ---- Copy case + else { + // -- Do not propagate gloda-id's for copies + // (Only applies if we have the destination header, which means local) + if (aDestMsgHdrs) { + for (let destMsgHdr in fixIterator(aDestMsgHdrs, nsIMsgDBHdr)) { + let glodaId = destMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY); + if (glodaId) + destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0); + } + } + + // mark the folder as dirty; we'll get to it later. + let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); + destGlodaFolder._ensureFolderDirty(); + this.indexer.indexingSweepNeeded = true; + } + } catch (ex) { + this.indexer._log.error("Problem encountered during message move/copy:", + ex.stack); + } + }, + + /** + * Queue up message key changes that are a result of offline fake headers + * being made real for the actual update during the msgsClassified + * notification that is expected after this. We defer the + * actual work (if there is any to be done; the fake header might have + * guessed the right UID correctly) so that we can batch our work. + * + * The expectation is that there will be no meaningful time window between + * this notification and the msgsClassified notification since the message + * classifier should not actually need to classify the messages (they + * should already have been classified) and so can fast-path them. + */ + msgKeyChanged: function gloda_indexer_msgKeyChangeded(aOldMsgKey, + aNewMsgHdr) { + try { + let val = null, newKey = aNewMsgHdr.messageKey; + let [glodaId, glodaDirty] = + PendingCommitTracker.getGlodaState(aNewMsgHdr); + // If we haven't indexed this message yet, take no action, and leave it + // up to msgsClassified to take proper action. + if (glodaId < GLODA_FIRST_VALID_MESSAGE_ID) + return; + // take no action on filthy messages, + // generate an entry if dirty or the keys don't match. + if ((glodaDirty !== GlodaMsgIndexer.kMessageFilthy) && + ((glodaDirty === GlodaMsgIndexer.kMessageDirty) || + (aOldMsgKey !== newKey))) { + val = { + id: glodaId, + key: (aOldMsgKey !== newKey) ? newKey : null, + isDirty: glodaDirty === GlodaMsgIndexer.kMessageDirty, + }; + } + + let key = aNewMsgHdr.folder.URI + "#" + aNewMsgHdr.messageKey; + this.indexer._keyChangedBatchInfo[key] = val; + } + // this is more for the unit test to fail rather than user error reporting + catch (ex) { + this.indexer._log.error("Problem encountered during msgKeyChanged" + + " notification handling: " + ex + "\n\n" + + ex.stack + " \n\n"); + } + }, + + /** + * Detect newly added folders before they get messages so we map them before + * they get any messages added to them. If we only hear about them after + * they get their 1st message, then we will mark them filthy, but if we mark + * them before that, they get marked clean. + */ + folderAdded: function gloda_indexer_folderAdded(aMsgFolder) { + // This is invoked for its side-effect of invoking _mapFolder and doing so + // only after filtering out folders we don't care about. + GlodaMsgIndexer.shouldIndexFolder(aMsgFolder); + }, + + /** + * Handles folder no-longer-exists-ence. We mark all messages as deleted + * and remove the folder from our URI table. Currently, if a folder that + * contains other folders is deleted, we may either receive one + * notification for the folder that is deleted, or a notification for the + * folder and one for each of its descendents. This depends upon the + * underlying account implementation, so we explicitly handle each case. + * Namely, we treat it as if we're only planning on getting one, but we + * handle if the children are already gone for some reason. + */ + folderDeleted: function gloda_indexer_folderDeleted(aFolder) { + this.indexer._log.debug("folderDeleted notification"); + try { + let delFunc = function(aFolder, indexer) { + if (indexer._datastore._folderKnown(aFolder)) { + indexer._log.info("Processing deletion of folder " + + aFolder.prettiestName + "."); + let glodaFolder = GlodaDatastore._mapFolder(aFolder); + indexer._datastore.markMessagesDeletedByFolderID(glodaFolder.id); + indexer._datastore.deleteFolderByID(glodaFolder.id); + GlodaDatastore._killGlodaFolderIntoTombstone(glodaFolder); + } + else { + indexer._log.info("Ignoring deletion of folder " + + aFolder.prettiestName + + " because it is unknown to gloda."); + } + }; + + let descendentFolders = aFolder.descendants; + // (the order of operations does not matter; child, non-child, whatever.) + // delete the parent + delFunc(aFolder, this.indexer); + // delete all its descendents + for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { + delFunc(folder, this.indexer); + } + + this.indexer.pendingDeletions = true; + } catch (ex) { + this.indexer._log.error("Problem encountered during folder deletion" + + ": " + ex + "\n\n" + ex.stack + "\n\n"); + } + }, + + /** + * Handle a folder being copied or moved. + * Moves are handled by a helper function shared with _folderRenameHelper + * (which takes care of any nesting involved). + * Copies are actually ignored, because our periodic indexing traversal + * should discover these automatically. We could hint ourselves into + * action, but arguably a set of completely duplicate messages is not + * a high priority for indexing. + */ + folderMoveCopyCompleted: function gloda_indexer_folderMoveCopyCompleted( + aMove, aSrcFolder, aDestFolder) { + this.indexer._log.debug("folderMoveCopy notification (Move: " + aMove + + ")"); + if (aMove) { + let srcURI = aSrcFolder.URI; + let targetURI = aDestFolder.URI + + srcURI.substring(srcURI.lastIndexOf("/")); + this._folderRenameHelper(aSrcFolder, targetURI); + } + else { + this.indexer.indexingSweepNeeded = true; + } + }, + + /** + * We just need to update the URI <-> ID maps and the row in the database, + * all of which is actually done by the datastore for us. + * This method needs to deal with the complexity where local folders will + * generate a rename notification for each sub-folder, but IMAP folders + * will generate only a single notification. Our logic primarily handles + * this by not exploding if the original folder no longer exists. + */ + _folderRenameHelper: function gloda_indexer_folderRenameHelper(aOrigFolder, + aNewURI) { + let newFolder = MailUtils.getFolderForURI(aNewURI); + let specialFolderFlags = Ci.nsMsgFolderFlags.Trash | Ci.nsMsgFolderFlags.Junk; + if (newFolder.isSpecialFolder(specialFolderFlags, true)) { + let descendentFolders = newFolder.descendants; + + // First thing to do: make sure we don't index the resulting folder and + // its descendents. + GlodaMsgIndexer.resetFolderIndexingPriority(newFolder); + for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { + GlodaMsgIndexer.resetFolderIndexingPriority(folder); + } + + // Remove from the index messages from the original folder + this.folderDeleted(aOrigFolder); + } else { + let descendentFolders = aOrigFolder.descendants; + + let origURI = aOrigFolder.URI; + // this rename is straightforward. + GlodaDatastore.renameFolder(aOrigFolder, aNewURI); + + for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { + let oldSubURI = folder.URI; + // mangle a new URI from the old URI. we could also try and do a + // parallel traversal of the new folder hierarchy, but that seems like + // more work. + let newSubURI = aNewURI + oldSubURI.substring(origURI.length); + this.indexer._datastore.renameFolder(oldSubURI, newSubURI); + } + + this.indexer._log.debug("folder renamed: " + origURI + " to " + aNewURI); + } + }, + + /** + * Handle folder renames, dispatching to our rename helper (which also + * takes care of any nested folder issues.) + */ + folderRenamed: function gloda_indexer_folderRenamed(aOrigFolder, + aNewFolder) { + this._folderRenameHelper(aOrigFolder, aNewFolder.URI); + }, + + /** + * This tells us about many exciting things. What they are and what we do: + * + * - FolderCompactStart: Mark the folder as compacting in our in-memory + * representation. This should keep any new indexing out of the folder + * until it is done compacting. Also, kill any active or existing jobs + * to index the folder. + * - FolderCompactFinish: Mark the folder as done compacting in our + * in-memory representation. Assuming the folder was known to us and + * not marked filthy, queue a compaction job. + * + * - FolderReindexTriggered: We do the same thing as FolderCompactStart + * but don't mark the folder as compacting. + * + * - JunkStatusChanged: We mark the messages that have had their junk + * state change to be reindexed. + */ + itemEvent: function gloda_indexer_itemEvent(aItem, aEvent, aData) { + // Compact and Reindex are close enough that we can reuse the same code + // with one minor difference. + if (aEvent == "FolderCompactStart" || + aEvent == "FolderReindexTriggered") { + let aMsgFolder = aItem.QueryInterface(nsIMsgFolder); + // ignore folders we ignore... + if (!GlodaMsgIndexer.shouldIndexFolder(aMsgFolder)) + return; + + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + if (aEvent == "FolderCompactStart") + glodaFolder.compacting = true; + + // Purge any explicit indexing of said folder. + GlodaIndexer.purgeJobsUsingFilter(function (aJob) { + return (aJob.jobType == "folder" && + aJob.id == aMsgFolder.id); + }); + + // Abort the active job if it's in the folder (this covers both + // event-driven indexing that happens to be in the folder as well + // explicit folder indexing of the folder). + if (GlodaMsgIndexer._indexingFolder == aMsgFolder) + GlodaIndexer.killActiveJob(); + + // Tell the PendingCommitTracker to throw away anything it is tracking + // about the folder. We will pick up the pieces in the compaction + // pass. + PendingCommitTracker.noteFolderDatabaseGettingBlownAway(aMsgFolder); + + // (We do not need to mark the folder dirty because if we were indexing + // it, it already must have been marked dirty.) + } + else if (aEvent == "FolderCompactFinish") { + let aMsgFolder = aItem.QueryInterface(nsIMsgFolder); + // ignore folders we ignore... + if (!GlodaMsgIndexer.shouldIndexFolder(aMsgFolder)) + return; + + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + glodaFolder.compacting = false; + glodaFolder._setCompactedState(true); + + // Queue compaction unless the folder was filthy (in which case there + // are no valid gloda-id's to update.) + if (glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy) + GlodaIndexer.indexJob( + new IndexingJob("folderCompact", glodaFolder.id)); + + // Queue indexing of the folder if it is dirty. We are doing this + // mainly in case we were indexing it before the compaction started. + // It should be reasonably harmless if we weren't. + // (It would probably be better to just make sure that there is an + // indexing sweep queued or active, and if it's already active that + // this folder is in the queue to be processed.) + if (glodaFolder.dirtyStatus == glodaFolder.kFolderDirty) + GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); + } + else if (aEvent == "JunkStatusChanged") { + this.indexer._log.debug("JunkStatusChanged notification"); + aItem.QueryInterface(Ci.nsIArray); + GlodaMsgIndexer._reindexChangedMessages(aItem.enumerate(), true); + } + }, + }, + + /** + * A nsIFolderListener (listening on nsIMsgMailSession so we get all of + * these events) PRIMARILY to get folder loaded notifications. Because of + * deficiencies in the nsIMsgFolderListener's events at this time, we also + * get our folder-added and newsgroup notifications from here for now. (This + * will be rectified.) + */ + _folderListener: { + indexer: null, + + _init: function gloda_indexer_fl_init(aIndexer) { + this.indexer = aIndexer; + }, + + // We explicitly know about these things rather than bothering with some + // form of registration scheme because these aren't going to change much. + get _kFolderLoadedAtom() { + delete this._kFolderLoadedAtom; + return this._kFolderLoadedAtom = atomService.getAtom("FolderLoaded"); + }, + get _kKeywordsAtom() { + delete this._kKeywordsAtom; + return this._kKeywordsAtom = atomService.getAtom("Keywords"); + }, + get _kStatusAtom() { + delete this._kStatusAtom; + return this._kStatusAtom = atomService.getAtom("Status"); + }, + get _kFlaggedAtom() { + delete this._kFlaggedAtom; + return this._kFlaggedAtom = atomService.getAtom("Flagged"); + }, + get _kFolderFlagAtom() { + delete this._kFolderFlagAtom; + return this._kFolderFlagAtom = atomService.getAtom("FolderFlag"); + }, + + OnItemAdded: function gloda_indexer_OnItemAdded(aParentItem, aItem) { + }, + OnItemRemoved: function gloda_indexer_OnItemRemoved(aParentItem, aItem) { + }, + OnItemPropertyChanged: function gloda_indexer_OnItemPropertyChanged( + aItem, aProperty, aOldValue, aNewValue) { + }, + /** + * Detect changes to folder flags and reset our indexing priority. This + * is important because (all?) folders start out without any flags and + * then get their flags added to them. + */ + OnItemIntPropertyChanged: function gloda_indexer_OnItemIntPropertyChanged( + aFolderItem, aProperty, aOldValue, aNewValue) { + if (aProperty !== this._kFolderFlagAtom) + return; + if (!GlodaMsgIndexer.shouldIndexFolder(aFolderItem)) + return; + // Only reset priority if folder Special Use changes. + if ((aOldValue & Ci.nsMsgFolderFlags.SpecialUse) == + (aNewValue & Ci.nsMsgFolderFlags.SpecialUse)) + return; + GlodaMsgIndexer.resetFolderIndexingPriority(aFolderItem); + }, + OnItemBoolPropertyChanged: function gloda_indexer_OnItemBoolPropertyChanged( + aItem, aProperty, aOldValue, aNewValue) { + }, + OnItemUnicharPropertyChanged: + function gloda_indexer_OnItemUnicharPropertyChanged( + aItem, aProperty, aOldValue, aNewValue) { + + }, + /** + * Notice when user activity adds/removes tags or changes a message's + * status. + */ + OnItemPropertyFlagChanged: function gloda_indexer_OnItemPropertyFlagChanged( + aMsgHdr, aProperty, aOldValue, aNewValue) { + if (aProperty == this._kKeywordsAtom || + // We could care less about the new flag changing. + (aProperty == this._kStatusAtom && + (aOldValue ^ aNewValue) != nsMsgMessageFlags.New && + // We do care about IMAP deletion, but msgsDeleted tells us that, so + // ignore IMAPDeleted too... + (aOldValue ^ aNewValue) != nsMsgMessageFlags.IMAPDeleted) || + aProperty == this._kFlaggedAtom) { + GlodaMsgIndexer._reindexChangedMessages([aMsgHdr], true); + } + }, + + /** + * Get folder loaded notifications for folders that had to do some + * (asynchronous) processing before they could be opened. + */ + OnItemEvent: function gloda_indexer_OnItemEvent(aFolder, aEvent) { + if (aEvent == this._kFolderLoadedAtom) + this.indexer._onFolderLoaded(aFolder); + }, + }, + + /* ***** Rebuilding / Reindexing ***** */ + /** + * Allow us to invalidate an outstanding folder traversal because the + * underlying database is going away. We use other means for detecting + * modifications of the message (labeling, marked (un)read, starred, etc.) + * + * This is an nsIDBChangeListener listening to an nsIDBChangeAnnouncer. To + * add ourselves, we get us a nice nsMsgDatabase, query it to the announcer, + * then call AddListener. + */ + _databaseAnnouncerListener: { + indexer: null, + /** + * XXX We really should define the operations under which we expect this to + * occur. While we know this must be happening as the result of a + * ForceClosed call, we don't have a comprehensive list of when this is + * expected to occur. Some reasons: + * - Compaction (although we should already have killed the job thanks to + * our compaction notification) + * - UID validity rolls. + * - Folder Rename + * - Folder Delete + * The fact that we already have the database open when getting this means + * that it had to be valid before we opened it, which hopefully rules out + * modification of the mbox file by an external process (since that is + * forbidden when we are running) and many other exotic things. + * + * So this really ends up just being a correctness / safety protection + * mechanism. At least now that we have better compaction support. + */ + onAnnouncerGoingAway: function gloda_indexer_dbGoingAway( + aDBChangeAnnouncer) { + // The fact that we are getting called means we have an active folder and + // that we therefore are the active job. As such, we must kill the + // active job. + // XXX In the future, when we support interleaved event-driven indexing + // that bumps long-running indexing tasks, the semantics of this will + // have to change a bit since we will want to maintain being active in a + // folder even when bumped. However, we will probably have a more + // complex notion of indexing contexts on a per-job basis. + GlodaIndexer.killActiveJob(); + }, + + onHdrFlagsChanged: function(aHdrChanged, aOldFlags, aNewFlags, aInstigator) {}, + onHdrDeleted: function(aHdrChanged, aParentKey, aFlags, aInstigator) {}, + onHdrAdded: function(aHdrChanged, aParentKey, aFlags, aInstigator) {}, + onParentChanged: function(aKeyChanged, aOldParent, aNewParent, + aInstigator) {}, + onReadChanged: function(aInstigator) {}, + onJunkScoreChanged: function(aInstigator) {}, + onHdrPropertyChanged: function (aHdrToChange, aPreChange, aStatus, + aInstigator) {}, + onEvent: function (aDB, aEvent) {}, + }, + + /** + * Given a list of Message-ID's, return a matching list of lists of messages + * matching those Message-ID's. So if you pass an array with three + * Message-ID's ["a", "b", "c"], you would get back an array containing + * 3 lists, where the first list contains all the messages with a message-id + * of "a", and so forth. The reason a list is returned rather than null/a + * message is that we accept the reality that we have multiple copies of + * messages with the same ID. + * This call is asynchronous because it depends on previously created messages + * to be reflected in our results, which requires us to execute on the async + * thread where all our writes happen. This also turns out to be a + * reasonable thing because we could imagine pathological cases where there + * could be a lot of message-id's and/or a lot of messages with those + * message-id's. + * + * The returned collection will include both 'ghost' messages (messages + * that exist for conversation-threading purposes only) as well as deleted + * messages in addition to the normal 'live' messages that non-privileged + * queries might return. + */ + getMessagesByMessageID: function gloda_ns_getMessagesByMessageID(aMessageIDs, + aCallback, aCallbackThis) { + let msgIDToIndex = {}; + let results = []; + for (let iID = 0; iID < aMessageIDs.length; ++iID) { + let msgID = aMessageIDs[iID]; + results.push([]); + msgIDToIndex[msgID] = iID; + } + + // (Note: although we are performing a lookup with no validity constraints + // and using the same object-relational-mapper-ish layer used by things + // that do have constraints, we are not at risk of exposing deleted + // messages to other code and getting it confused. The only way code + // can find a message is if it shows up in their queries or gets announced + // via GlodaCollectionManager.itemsAdded, neither of which will happen.) + let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + query.headerMessageID.apply(query, aMessageIDs); + query.frozen = true; + + let listener = new MessagesByMessageIdCallback(msgIDToIndex, results, + aCallback, aCallbackThis); + return query.getCollection(listener, null, {becomeNull: true}); + }, + + /** + * A reference to MsgHdrToMimeMessage that unit testing can clobber when it + * wants to cause us to hang or inject a fault. If you are not + * glodaTestHelper.js then _do not touch this_. + */ + _MsgHdrToMimeMessageFunc: MsgHdrToMimeMessage, + /** + * Primary message indexing logic. This method is mainly concerned with + * getting all the information about the message required for threading / + * conversation building and subsequent processing. It is responsible for + * determining whether to reuse existing gloda messages or whether a new one + * should be created. Most attribute stuff happens in fund_attr.js or + * expl_attr.js. + * + * Prior to calling this method, the caller must have invoked + * |_indexerEnterFolder|, leaving us with the following true invariants + * below. + * + * @pre aMsgHdr.folder == this._indexingFolder + * @pre aMsgHdr.folder.msgDatabase == this._indexingDatabase + */ + _indexMessage: function* gloda_indexMessage(aMsgHdr, aCallbackHandle) { + let logDebug = this._log.level <= Log4Moz.Level.Debug; + if (logDebug) + this._log.debug("*** Indexing message: " + aMsgHdr.messageKey + " : " + + aMsgHdr.subject); + + // If the message is offline, then get the message body as well + let isMsgOffline = false; + let aMimeMsg; + if ((aMsgHdr.flags & nsMsgMessageFlags.Offline) || + (aMsgHdr.folder instanceof nsIMsgLocalMailFolder)) { + isMsgOffline = true; + this._MsgHdrToMimeMessageFunc(aMsgHdr, aCallbackHandle.callbackThis, + aCallbackHandle.callback, false, {saneBodySize: true}); + aMimeMsg = (yield this.kWorkAsync)[1]; + } + else { + if (logDebug) + this._log.debug(" * Message is not offline -- only headers indexed"); + } + + if (logDebug) + this._log.debug(" * Got message, subject " + aMsgHdr.subject); + + if (this._unitTestSuperVerbose) { + if (aMimeMsg) + this._log.debug(" * Got Mime " + aMimeMsg.prettyString()); + else + this._log.debug(" * NO MIME MESSAGE!!!\n"); + } + + // -- Find/create the conversation the message belongs to. + // Our invariant is that all messages that exist in the database belong to + // a conversation. + + // - See if any of the ancestors exist and have a conversationID... + // (references are ordered from old [0] to new [n-1]) + let references = Array.from(range(0, aMsgHdr.numReferences)). + map(i => aMsgHdr.getStringReference(i)); + // also see if we already know about the message... + references.push(aMsgHdr.messageId); + + this.getMessagesByMessageID(references, aCallbackHandle.callback, + aCallbackHandle.callbackThis); + // (ancestorLists has a direct correspondence to the message ids) + let ancestorLists = yield this.kWorkAsync; + + if (logDebug) { + this._log.debug("ancestors raw: " + ancestorLists); + this._log.debug("ref len: " + references.length + + " anc len: " + ancestorLists.length); + this._log.debug("references: " + + Log4Moz.enumerateProperties(references).join(",")); + this._log.debug("ancestors: " + + Log4Moz.enumerateProperties(ancestorLists).join(",")); + } + + // pull our current message lookup results off + references.pop(); + let candidateCurMsgs = ancestorLists.pop(); + + let conversationID = null; + let conversation = null; + // -- figure out the conversation ID + // if we have a clone/already exist, just use his conversation ID + if (candidateCurMsgs.length > 0) { + conversationID = candidateCurMsgs[0].conversationID; + conversation = candidateCurMsgs[0].conversation; + } + // otherwise check out our ancestors + else { + // (walk from closest to furthest ancestor) + for (let iAncestor = ancestorLists.length-1; iAncestor >= 0; + --iAncestor) { + let ancestorList = ancestorLists[iAncestor]; + + if (ancestorList.length > 0) { + // we only care about the first instance of the message because we are + // able to guarantee the invariant that all messages with the same + // message id belong to the same conversation. + let ancestor = ancestorList[0]; + if (conversationID === null) { + conversationID = ancestor.conversationID; + conversation = ancestor.conversation; + } + else if (conversationID != ancestor.conversationID) { + // XXX this inconsistency is known and understood and tracked by + // bug 478162 https://bugzilla.mozilla.org/show_bug.cgi?id=478162 + //this._log.error("Inconsistency in conversations invariant on " + + // ancestor.headerMessageID + ". It has conv id " + + // ancestor.conversationID + " but expected " + + // conversationID + ". ID: " + ancestor.id); + } + } + } + } + + // nobody had one? create a new conversation + if (conversationID === null) { + // (the create method could issue the id, making the call return + // without waiting for the database...) + conversation = this._datastore.createConversation( + aMsgHdr.mime2DecodedSubject, null, null); + conversationID = conversation.id; + } + + // Walk from furthest to closest ancestor, creating the ancestors that don't + // exist. (This is possible if previous messages that were consumed in this + // thread only had an in-reply-to or for some reason did not otherwise + // provide the full references chain.) + for (let iAncestor = 0; iAncestor < ancestorLists.length; ++iAncestor) { + let ancestorList = ancestorLists[iAncestor]; + + if (ancestorList.length == 0) { + if (logDebug) + this._log.debug("creating message with: null, " + conversationID + + ", " + references[iAncestor] + + ", null."); + let ancestor = this._datastore.createMessage(null, null, // ghost + conversationID, null, + references[iAncestor], + null, // no subject + null, // no body + null); // no attachments + this._datastore.insertMessage(ancestor); + ancestorLists[iAncestor].push(ancestor); + } + } + // now all our ancestors exist, though they may be ghost-like... + + // find if there's a ghost version of our message or we already have indexed + // this message. + let curMsg = null; + if (logDebug) + this._log.debug(candidateCurMsgs.length + " candidate messages"); + for (let iCurCand = 0; iCurCand < candidateCurMsgs.length; iCurCand++) { + let candMsg = candidateCurMsgs[iCurCand]; + + if (logDebug) + this._log.debug("candidate folderID: " + candMsg.folderID + + " messageKey: " + candMsg.messageKey); + + if (candMsg.folderURI == this._indexingFolder.URI) { + // if we are in the same folder and we have the same message key, we + // are definitely the same, stop looking. + if (candMsg.messageKey == aMsgHdr.messageKey) { + curMsg = candMsg; + break; + } + // if (we are in the same folder and) the candidate message has a null + // message key, we treat it as our best option unless we find an exact + // key match. (this would happen because the 'move' notification case + // has to deal with not knowing the target message key. this case + // will hopefully be somewhat improved in the future to not go through + // this path which mandates re-indexing of the message in its entirety) + if (candMsg.messageKey === null) + curMsg = candMsg; + // if (we are in the same folder and) the candidate message's underlying + // message no longer exists/matches, we'll assume we are the same but + // were betrayed by a re-indexing or something, but we have to make + // sure a perfect match doesn't turn up. + else if ((curMsg === null) && + !this._indexingDatabase.ContainsKey(candMsg.messageKey)) + curMsg = candMsg; + } + // a ghost/deleted message is fine + else if ((curMsg === null) && (candMsg.folderID === null)) { + curMsg = candMsg; + } + } + + let attachmentNames = null; + if (aMimeMsg) { + attachmentNames = aMimeMsg.allAttachments. + filter(att => att.isRealAttachment).map(att => att.name); + } + + let isConceptuallyNew, isRecordNew, insertFulltext; + if (curMsg === null) { + curMsg = this._datastore.createMessage(aMsgHdr.folder, + aMsgHdr.messageKey, + conversationID, + aMsgHdr.date, + aMsgHdr.messageId); + curMsg._conversation = conversation; + isConceptuallyNew = isRecordNew = insertFulltext = true; + } + else { + isRecordNew = false; + // the message is conceptually new if it was a ghost or dead. + isConceptuallyNew = curMsg._isGhost || curMsg._isDeleted; + // insert fulltext if it was a ghost + insertFulltext = curMsg._isGhost; + curMsg._folderID = this._datastore._mapFolder(aMsgHdr.folder).id; + curMsg._messageKey = aMsgHdr.messageKey; + curMsg.date = new Date(aMsgHdr.date / 1000); + // the message may have been deleted; tell it to make sure it's not. + curMsg._ensureNotDeleted(); + // note: we are assuming that our matching logic is flawless in that + // if this message was not a ghost, we are assuming the 'body' + // associated with the id is still exactly the same. It is conceivable + // that there are cases where this is not true. + } + + if (aMimeMsg) { + let bodyPlain = aMimeMsg.coerceBodyToPlaintext(aMsgHdr.folder); + if (bodyPlain) { + curMsg._bodyLines = bodyPlain.split(/\r?\n/); + // curMsg._content gets set by fundattr.js + } + } + + // Mark the message as new (for the purposes of fulltext insertion) + if (insertFulltext) + curMsg._isNew = true; + + curMsg._subject = aMsgHdr.mime2DecodedSubject; + curMsg._attachmentNames = attachmentNames; + + // curMsg._indexAuthor gets set by fundattr.js + // curMsg._indexRecipients gets set by fundattr.js + + // zero the notability so everything in grokNounItem can just increment + curMsg.notability = 0; + + yield aCallbackHandle.pushAndGo( + Gloda.grokNounItem(curMsg, + {header: aMsgHdr, mime: aMimeMsg, bodyLines: curMsg._bodyLines}, + isConceptuallyNew, isRecordNew, + aCallbackHandle)); + + delete curMsg._bodyLines; + delete curMsg._content; + delete curMsg._isNew; + delete curMsg._indexAuthor; + delete curMsg._indexRecipients; + + // we want to update the header for messages only after the transaction + // irrevocably hits the disk. otherwise we could get confused if the + // transaction rolls back or what not. + PendingCommitTracker.track(aMsgHdr, curMsg.id); + + yield this.kWorkDone; + }, + + /** + * Wipe a message out of existence from our index. This is slightly more + * tricky than one would first expect because there are potentially + * attributes not immediately associated with this message that reference + * the message. Not only that, but deletion of messages may leave a + * conversation posessing only ghost messages, which we don't want, so we + * need to nuke the moot conversation and its moot ghost messages. + * For now, we are actually punting on that trickiness, and the exact + * nuances aren't defined yet because we have not decided whether to store + * such attributes redundantly. For example, if we have subject-pred-object, + * we could actually store this as attributes (subject, id, object) and + * (object, id, subject). In such a case, we could query on (subject, *) + * and use the results to delete the (object, id, subject) case. If we + * don't redundantly store attributes, we can deal with the problem by + * collecting up all the attributes that accept a message as their object + * type and issuing a delete against that. For example, delete (*, [1,2,3], + * message id). + * (We are punting because we haven't implemented support for generating + * attributes like that yet.) + * + * @TODO: implement deletion of attributes that reference (deleted) messages + */ + _deleteMessage: function* gloda_index_deleteMessage(aMessage, + aCallbackHandle) { + let logDebug = this._log.level <= Log4Moz.Level.Debug; + if (logDebug) + this._log.debug("*** Deleting message: " + aMessage); + + // -- delete our attributes + // delete the message's attributes (if we implement the cascade delete, that + // could do the honors for us... right now we define the trigger in our + // schema but the back-end ignores it) + GlodaDatastore.clearMessageAttributes(aMessage); + + // -- delete our message or ghost us, and maybe nuke the whole conversation + // Look at the other messages in the conversation. + // (Note: although we are performing a lookup with no validity constraints + // and using the same object-relational-mapper-ish layer used by things + // that do have constraints, we are not at risk of exposing deleted + // messages to other code and getting it confused. The only way code + // can find a message is if it shows up in their queries or gets announced + // via GlodaCollectionManager.itemsAdded, neither of which will happen.) + let convPrivQuery = Gloda.newQuery(Gloda.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + convPrivQuery.conversation(aMessage.conversation); + let conversationCollection = convPrivQuery.getCollection(aCallbackHandle); + yield this.kWorkAsync; + + let conversationMsgs = conversationCollection.items; + + // Count the number of ghosts messages we see to determine if we are + // the last message alive. + let ghostCount = 0; + let twinMessageExists = false; + for (let convMsg of conversationMsgs) { + // ignore our own message + if (convMsg.id == aMessage.id) + continue; + + if (convMsg._isGhost) + ghostCount++; + // This message is our (living) twin if it is not a ghost, not deleted, + // and has the same message-id header. + else if (!convMsg._isDeleted && + convMsg.headerMessageID == aMessage.headerMessageID) + twinMessageExists = true; + } + + // -- If everyone else is a ghost, blow away the conversation. + // If there are messages still alive or deleted but we have not yet gotten + // to them yet _deleteMessage, then do not do this. (We will eventually + // hit this case if they are all deleted.) + if ((conversationMsgs.length - 1) == ghostCount) { + // - Obliterate each message + for (let msg of conversationMsgs) { + GlodaDatastore.deleteMessageByID(msg.id); + } + // - Obliterate the conversation + GlodaDatastore.deleteConversationByID(aMessage.conversationID); + // *no one* should hold a reference or use aMessage after this point, + // trash it so such ne'er do'wells are made plain. + aMessage._objectPurgedMakeYourselfUnpleasant(); + } + // -- Ghost or purge us as appropriate + else { + // Purge us if we have a (living) twin; no ghost required. + if (twinMessageExists) { + GlodaDatastore.deleteMessageByID(aMessage.id); + // *no one* should hold a reference or use aMessage after this point, + // trash it so such ne'er do'wells are made plain. + aMessage._objectPurgedMakeYourselfUnpleasant(); + } + // No twin, a ghost is required, we become the ghost. + else { + aMessage._ghost(); + GlodaDatastore.updateMessage(aMessage); + // ghosts don't have fulltext. purge it. + GlodaDatastore.deleteMessageTextByID(aMessage.id); + } + } + + yield this.kWorkDone; + }, +}; +GlodaIndexer.registerIndexer(GlodaMsgIndexer); diff --git a/mailnews/db/gloda/modules/indexer.js b/mailnews/db/gloda/modules/indexer.js new file mode 100644 index 000000000..f6c939530 --- /dev/null +++ b/mailnews/db/gloda/modules/indexer.js @@ -0,0 +1,1409 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file currently contains a fairly general implementation of asynchronous + * indexing with a very explicit message indexing implementation. As gloda + * will eventually want to index more than just messages, the message-specific + * things should ideally lose their special hold on this file. This will + * benefit readability/size as well. + */ + +this.EXPORTED_SYMBOLS = ['GlodaIndexer', 'IndexingJob']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource://gre/modules/XPCOMUtils.jsm"); +Cu.import("resource://gre/modules/Services.jsm"); +Cu.import("resource:///modules/iteratorUtils.jsm"); + +Cu.import("resource:///modules/gloda/log4moz.js"); + +Cu.import("resource:///modules/gloda/utils.js"); +Cu.import("resource:///modules/gloda/datastore.js"); +Cu.import("resource:///modules/gloda/gloda.js"); +Cu.import("resource:///modules/gloda/collection.js"); +Cu.import("resource:///modules/gloda/connotent.js"); + +/** + * @class Capture the indexing batch concept explicitly. + * + * @param aJobType The type of thing we are indexing. Current choices are: + * "folder" and "message". Previous choices included "account". The indexer + * currently knows too much about these; they should be de-coupled. + * @param aID Specific to the job type, but for now only used to hold folder + * IDs. + * + * @ivar items The list of items to process during this job/batch. (For + * example, if this is a "messages" job, this would be the list of messages + * to process, although the specific representation is determined by the + * job.) The list will only be mutated through the addition of extra items. + * @ivar offset The current offset into the 'items' list (if used), updated as + * processing occurs. If 'items' is not used, the processing code can also + * update this in a similar fashion. This is used by the status + * notification code in conjunction with goal. + * @ivar goal The total number of items to index/actions to perform in this job. + * This number may increase during the life of the job, but should not + * decrease. This is used by the status notification code in conjunction + * with the goal. + */ +function IndexingJob(aJobType, aID, aItems) { + this.jobType = aJobType; + this.id = aID; + this.items = (aItems != null) ? aItems : []; + this.offset = 0; + this.goal = null; + this.callback = null; + this.callbackThis = null; +} +IndexingJob.prototype = { + /** + * Invoke the callback associated with this job, passing through all arguments + * received by this function to the callback function. + */ + safelyInvokeCallback: function() { + if (!this.callback) + return; + try { + this.callback.apply(this.callbackThis, arguments); + } + catch(ex) { + GlodaIndexer._log.warn("job callback invocation problem:", ex); + } + }, + toString: function IndexingJob_toString() { + return "[job:" + this.jobType + + " id:" + this.id + " items:" + (this.items ? this.items.length : "no") + + " offset:" + this.offset + " goal:" + this.goal + "]"; + } +}; + +/** + * @namespace Core indexing logic, plus message-specific indexing logic. + * + * === Indexing Goals + * We have the following goals: + * + * Responsiveness + * - When the user wants to quit, we should be able to stop and quit in a timely + * fasion. + * - We should not interfere with the user's thunderbird usage. + * + * Correctness + * - Quitting should not result in any information loss; we should (eventually) + * end up at the same indexed state regardless of whether a user lets + * indexing run to completion or restarts thunderbird in the middle of the + * process. (It is okay to take slightly longer in the latter case.) + * + * Worst Case Scenario Avoidance + * - We should try to be O(1) memory-wise regardless of what notifications + * are thrown at us. + * + * === Indexing Throttling + * + * Adaptive Indexing + * - The indexer tries to stay out of the way of other running code in + * Thunderbird (autosync) and other code on the system. We try and target + * some number of milliseconds of activity between intentional inactive + * periods. The number of milliseconds of activity varies based on whether we + * believe the user to be actively using the computer or idle. We use our + * inactive periods as a way to measure system load; if we receive our + * notification promptly at the end of our inactive period, we believe the + * system is not heavily loaded. If we do not get notified promptly, we + * assume there is other stuff going on and back off. + * + */ +var GlodaIndexer = { + /** + * A partial attempt to generalize to support multiple databases. Each + * database would have its own datastore would have its own indexer. But + * we rather inter-mingle our use of this field with the singleton global + * GlodaDatastore. + */ + _datastore: GlodaDatastore, + _log: Log4Moz.repository.getLogger("gloda.indexer"), + /** + * Our nsITimer that we use to schedule ourselves on the main thread + * intermittently. The timer always exists but may not always be active. + */ + _timer: null, + /** + * Our nsITimer that we use to schedule events in the "far" future. For now, + * this means not compelling an initial indexing sweep until some number of + * seconds after startup. + */ + _longTimer: null, + + /** + * Periodic performance adjustment parameters: The overall goal is to adjust + * our rate of work so that we don't interfere with the user's activities + * when they are around (non-idle), and the system in general (when idle). + * Being nice when idle isn't quite as important, but is a good idea so that + * when the user un-idles we are able to back off nicely. Also, we give + * other processes on the system a chance to do something. + * + * We do this by organizing our work into discrete "tokens" of activity, + * then processing the number of tokens that we have determined will + * not impact the UI. Then we pause to give other activities a chance to get + * some work done, and we measure whether anything happened during our pause. + * If something else is going on in our application during that pause, we + * give it priority (up to a point) by delaying further indexing. + * + * Keep in mind that many of our operations are actually asynchronous, so we + * aren't entirely starving the event queue. However, a lot of the async + * stuff can end up not having any actual delay between events. For + * example, we only index offline message bodies, so there's no network + * latency involved, just disk IO; the only meaningful latency will be the + * initial disk seek (if there is one... pre-fetching may seriously be our + * friend). + * + * In order to maintain responsiveness, I assert that we want to minimize the + * length of the time we are dominating the event queue. This suggests + * that we want break up our blocks of work frequently. But not so + * frequently that there is a lot of waste. Accordingly our algorithm is + * basically: + * + * - Estimate the time that it takes to process a token, and schedule the + * number of tokens that should fit into that time. + * - Detect user activity, and back off immediately if found. + * - Try to delay commits and garbage collection until the user is inactive, + * as these tend to cause a brief pause in the UI. + */ + + /** + * The number of milliseconds before we declare the user idle and step up our + * indexing. + */ + _INDEX_IDLE_ADJUSTMENT_TIME: 5000, + + /** + * The time delay in milliseconds before we should schedule our initial sweep. + */ + _INITIAL_SWEEP_DELAY: 10000, + + /** + * How many milliseconds in the future should we schedule indexing to start + * when turning on indexing (and it was not previously active). + */ + _INDEX_KICKOFF_DELAY: 200, + + /** + * The time interval, in milliseconds, of pause between indexing batches. The + * maximum processor consumption is determined by this constant and the + * active |_cpuTargetIndexTime|. + * + * For current constants, that puts us at 50% while the user is active and 83% + * when idle. + */ + _INDEX_INTERVAL: 32, + + /** + * Number of indexing 'tokens' we are allowed to consume before yielding for + * each incremental pass. Consider a single token equal to indexing a single + * medium-sized message. This may be altered by user session (in)activity. + * Because we fetch message bodies, which is potentially asynchronous, this + * is not a precise knob to twiddle. + */ + _indexTokens: 2, + + /** + * Stopwatches used to measure performance during indexing, and during + * pauses between indexing. These help us adapt our indexing constants so + * as to not explode your computer. Kind of us, no? + */ + _perfIndexStopwatch: null, + _perfPauseStopwatch: null, + /** + * Do we have an uncommitted indexer transaction that idle callback should commit? + */ + _idleToCommit: false, + /** + * Target CPU time per batch of tokens, current value (milliseconds). + */ + _cpuTargetIndexTime: 32, + /** + * Target CPU time per batch of tokens, during non-idle (milliseconds). + */ + _CPU_TARGET_INDEX_TIME_ACTIVE: 32, + /** + * Target CPU time per batch of tokens, during idle (milliseconds). + */ + _CPU_TARGET_INDEX_TIME_IDLE: 160, + /** + * Average CPU time per processed token (milliseconds). + */ + _cpuAverageTimePerToken: 16, + /** + * Damping factor for _cpuAverageTimePerToken, as an approximate + * number of tokens to include in the average time. + */ + _CPU_AVERAGE_TIME_DAMPING: 200, + /** + * Maximum tokens per batch. This is normally just a sanity check. + */ + _CPU_MAX_TOKENS_PER_BATCH: 100, + /** + * CPU usage during a pause to declare that system was busy (milliseconds). + * This is typically set as 1.5 times the minimum resolution of the cpu + * usage clock, which is 16 milliseconds on Windows systems, and (I think) + * smaller on other systems, so we take the worst case. + */ + _CPU_IS_BUSY_TIME: 24, + /** + * Time that return from pause may be late before the system is declared + * busy, in milliseconds. (Same issues as _CPU_IS_BUSY_TIME). + */ + _PAUSE_LATE_IS_BUSY_TIME: 24, + /** + * Number of times that we will repeat a pause while waiting for a + * free CPU. + */ + _PAUSE_REPEAT_LIMIT: 10, + /** + * Minimum time delay between commits, in milliseconds. + */ + _MINIMUM_COMMIT_TIME: 5000, + /** + * Maximum time delay between commits, in milliseconds. + */ + _MAXIMUM_COMMIT_TIME: 20000, + + /** + * Unit testing hook to get us to emit additional logging that verges on + * inane for general usage but is helpful in unit test output to get a lay + * of the land and for paranoia reasons. + */ + _unitTestSuperVerbose: false, + /** + * Unit test vector to get notified when a worker has a problem and it has + * a recover helper associated. This gets called with an argument + * indicating whether the recovery helper indicates recovery was possible. + */ + _unitTestHookRecover: null, + /** + * Unit test vector to get notified when a worker runs into an exceptional + * situation (an exception propagates or gets explicitly killed) and needs + * to be cleaned up. This gets called with an argument indicating if there + * was a helper that was used or if we just did the default cleanup thing. + */ + _unitTestHookCleanup: null, + + /** + * Last commit time. Tracked to try and only commit at reasonable intervals. + */ + _lastCommitTime: Date.now(), + + _inited: false, + /** + * Initialize the indexer. + */ + _init: function gloda_index_init() { + if (this._inited) + return; + + this._inited = true; + + this._callbackHandle.init(); + + if (Services.io.offline) + this._suppressIndexing = true; + + // create the timer that drives our intermittent indexing + this._timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); + // create the timer for larger offsets independent of indexing + this._longTimer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); + + this._idleService = Cc["@mozilla.org/widget/idleservice;1"] + .getService(Ci.nsIIdleService); + + // create our performance stopwatches + try { + this._perfIndexStopwatch = Cc["@mozilla.org/stopwatch;1"] + .createInstance(Ci.nsIStopwatch); + this._perfPauseStopwatch = Cc["@mozilla.org/stopwatch;1"] + .createInstance(Ci.nsIStopwatch); + + } catch (ex) { + this._log.error("problem creating stopwatch!: " + ex); + } + + // register for shutdown notifications + Services.obs.addObserver(this, "quit-application", false); + + // figure out if event-driven indexing should be enabled... + let branch = Services.prefs.getBranch("mailnews.database.global.indexer."); + let eventDrivenEnabled = false; // default + let performInitialSweep = true; // default + try { + eventDrivenEnabled = branch.getBoolPref("enabled"); + } catch (ex) { + dump("%%% annoying exception on pref access: " + ex); + } + // this is a secret preference mainly intended for testing purposes. + try { + performInitialSweep = branch.getBoolPref("perform_initial_sweep"); + } catch (ex) {} + // pretend we have already performed an initial sweep... + if (!performInitialSweep) + this._initialSweepPerformed = true; + + this.enabled = eventDrivenEnabled; + }, + + /** + * When shutdown, indexing immediately ceases and no further progress should + * be made. This flag goes true once, and never returns to false. Being + * in this state is a destructive thing from whence we cannot recover. + */ + _indexerIsShutdown: false, + + /** + * Shutdown the indexing process and datastore as quickly as possible in + * a synchronous fashion. + */ + _shutdown: function gloda_index_shutdown() { + // no more timer events, please + try { + this._timer.cancel(); + } catch (ex) {} + this._timer = null; + try { + this._longTimer.cancel(); + } catch (ex) {} + this._longTimer = null; + + this._perfIndexStopwatch = null; + this._perfPauseStopwatch = null; + + // Remove listeners to avoid reference cycles on the off chance one of them + // holds a reference to the indexer object. + this._indexListeners = []; + + this._indexerIsShutdown = true; + + if (this.enabled) + this._log.info("Shutting Down"); + + // don't let anything try and convince us to start indexing again + this.suppressIndexing = true; + + // If there is an active job and it has a cleanup handler, run it. + if (this._curIndexingJob) { + let workerDef = this._curIndexingJob._workerDef; + try { + if (workerDef.cleanup) + workerDef.cleanup.call(workerDef.indexer, this._curIndexingJob); + } + catch (ex) { + this._log.error("problem during worker cleanup during shutdown."); + } + } + // Definitely clean out the async call stack and any associated data + this._callbackHandle.cleanup(); + this._workBatchData = undefined; + + // disable ourselves and all of the specific indexers + this.enabled = false; + + GlodaDatastore.shutdown(); + }, + + /** + * The list of indexers registered with us. If you are a core gloda indexer + * (you ship with gloda), then you can import this file directly and should + * make sure your indexer is imported in 'everybody.js' in the right order. + * If you are not core gloda, then you should import 'public.js' and only + * then should you import 'indexer.js' to get at GlodaIndexer. + */ + _indexers: [], + /** + * Register an indexer with the Gloda indexing mechanism. + * + * @param aIndexer.name The name of your indexer. + * @param aIndexer.enable Your enable function. This will be called during + * the call to registerIndexer if Gloda indexing is already enabled. If + * indexing is not yet enabled, you will be called + * @param aIndexer.disable Your disable function. This will be called when + * indexing is disabled or we are shutting down. This will only be called + * if enable has already been called. + * @param aIndexer.workers A list of tuples of the form [worker type code, + * worker generator function, optional scheduling trigger function]. The + * type code is the string used to uniquely identify the job type. If you + * are not core gloda, your job type must start with your extension's name + * and a colon; you can collow that with anything you want. The worker + * generator is not easily explained in here. The trigger function is + * invoked immediately prior to calling the generator to create it. The + * trigger function takes the job as an argument and should perform any + * finalization required on the job. Most workers should not need to use + * the trigger function. + * @param aIndexer.initialSweep We call this to tell each indexer when it is + * its turn to run its indexing sweep. The idea of the indexing sweep is + * that this is when you traverse things eligible for indexing to make + * sure they are indexed. Right now we just call everyone at the same + * time and hope that their jobs don't fight too much. + */ + registerIndexer: function gloda_index_registerIndexer(aIndexer) { + this._log.info("Registering indexer: " + aIndexer.name); + this._indexers.push(aIndexer); + + try { + for (let workerInfo of aIndexer.workers) { + let workerCode = workerInfo[0]; + let workerDef = workerInfo[1]; + workerDef.name = workerCode; + workerDef.indexer = aIndexer; + this._indexerWorkerDefs[workerCode] = workerDef; + if (!("recover" in workerDef)) + workerDef.recover = null; + if (!("cleanup" in workerDef)) + workerDef.cleanup = null; + if (!("onSchedule" in workerDef)) + workerDef.onSchedule = null; + if (!("jobCanceled" in workerDef)) + workerDef.jobCanceled = null; + } + } + catch (ex) { + this._log.warn("Helper indexer threw exception on worker enum."); + } + + if (this._enabled) { + try { + aIndexer.enable(); + } catch (ex) { + this._log.warn("Helper indexer threw exception on enable: " + ex); + } + } + }, + + /** + * Are we enabled, read: are we processing change events? + */ + _enabled: false, + get enabled() { return this._enabled; }, + set enabled(aEnable) { + if (!this._enabled && aEnable) { + // register for offline notifications + Services.obs.addObserver(this, "network:offline-status-changed", false); + + // register for idle notification + this._idleService.addIdleObserver(this, this._indexIdleThresholdSecs); + + this._enabled = true; + + for (let indexer of this._indexers) { + try { + indexer.enable(); + } catch (ex) { + this._log.warn("Helper indexer threw exception on enable: " + ex); + } + } + + // if we have an accumulated desire to index things, kick it off again. + if (this._indexingDesired) { + this._indexingDesired = false; // it's edge-triggered for now + this.indexing = true; + } + + // if we have not done an initial sweep, schedule scheduling one. + if (!this._initialSweepPerformed) { + this._longTimer.initWithCallback(this._scheduleInitialSweep, + this._INITIAL_SWEEP_DELAY, Ci.nsITimer.TYPE_ONE_SHOT); + } + } + else if (this._enabled && !aEnable) { + for (let indexer of this._indexers) { + try { + indexer.disable(); + } catch (ex) { + this._log.warn("Helper indexer threw exception on disable: " + ex); + } + } + + // remove offline observer + Services.obs.removeObserver(this, "network:offline-status-changed"); + + // remove idle + this._idleService.removeIdleObserver(this, this._indexIdleThresholdSecs); + + this._enabled = false; + } + + }, + + /** Track whether indexing is desired (we have jobs to prosecute). */ + _indexingDesired: false, + /** + * Track whether we have an actively pending callback or timer event. We do + * this so we don't experience a transient suppression and accidentally + * get multiple event-chains driving indexing at the same time (which the + * code will not handle correctly). + */ + _indexingActive: false, + /** + * Indicates whether indexing is currently ongoing. This may return false + * while indexing activities are still active, but they will quiesce shortly. + */ + get indexing() { + return this._indexingDesired && !this._suppressIndexing; + }, + /** Indicates whether indexing is desired. */ + get indexingDesired() { + return this._indexingDesired; + }, + /** + * Set this to true to indicate there is indexing work to perform. This does + * not mean indexing will begin immediately (if it wasn't active), however. + * If suppressIndexing has been set, we won't do anything until indexing is + * no longer suppressed. + */ + set indexing(aShouldIndex) { + if (!this._indexingDesired && aShouldIndex) { + this._indexingDesired = true; + if (this.enabled && !this._indexingActive && !this._suppressIndexing) { + this._log.info("+++ Indexing Queue Processing Commencing"); + this._indexingActive = true; + this._timer.initWithCallback(this._timerCallbackDriver, + this._INDEX_KICKOFF_DELAY, + Ci.nsITimer.TYPE_ONE_SHOT); + } + } + }, + + _suppressIndexing: false, + /** + * Set whether or not indexing should be suppressed. This is to allow us to + * avoid running down a laptop's battery when it is not on AC. Only code + * in charge of regulating that tracking should be setting this variable; if + * other factors want to contribute to such a decision, this logic needs to + * be changed to track that, since last-write currently wins. + */ + set suppressIndexing(aShouldSuppress) { + this._suppressIndexing = aShouldSuppress; + + // re-start processing if we are no longer suppressing, there is work yet + // to do, and the indexing process had actually stopped. + if (!this._suppressIndexing && this._indexingDesired && + !this._indexingActive) { + this._log.info("+++ Indexing Queue Processing Resuming"); + this._indexingActive = true; + this._timer.initWithCallback(this._timerCallbackDriver, + this._INDEX_KICKOFF_DELAY, + Ci.nsITimer.TYPE_ONE_SHOT); + } + }, + + /** + * Track whether an initial sweep has been performed. This mainly exists so + * that unit testing can stop us from performing an initial sweep. + */ + _initialSweepPerformed: false, + /** + * Our timer-driven callback to schedule our first initial indexing sweep. + * Because it is invoked by an nsITimer it operates without the benefit of + * a 'this' context and must use GlodaIndexer instead of this. + * Since an initial sweep could have been performed before we get invoked, + * we need to check whether an initial sweep is still desired before trying + * to schedule one. We don't need to worry about whether one is active + * because the indexingSweepNeeded takes care of that. + */ + _scheduleInitialSweep: function gloda_index_scheduleInitialSweep() { + if (GlodaIndexer._initialSweepPerformed) + return; + GlodaIndexer._initialSweepPerformed = true; + for (let indexer of GlodaIndexer._indexers) { + indexer.initialSweep(); + } + }, + + kWorkSync: Gloda.kWorkSync, + kWorkAsync: Gloda.kWorkAsync, + kWorkDone: Gloda.kWorkDone, + kWorkPause: Gloda.kWorkPause, + kWorkDoneWithResult: Gloda.kWorkDoneWithResult, + + /** + * Our current job number. Meaningless value that increments with every job + * we process that resets to 0 when we run out of jobs. Currently used by + * the activity manager's gloda listener to tell when we have changed jobs. + * We really need a better listener mechanism. + */ + _indexingJobCount: 0, + + /** + * A list of IndexingJob instances to process. + */ + _indexQueue: [], + + /** + * The current indexing job. + */ + _curIndexingJob: null, + + /** + * The number of seconds before we declare the user idle and commit if + * needed. + */ + _indexIdleThresholdSecs: 3, + + _indexListeners: [], + /** + * Add an indexing progress listener. The listener will be notified of at + * least all major status changes (idle -> indexing, indexing -> idle), plus + * arbitrary progress updates during the indexing process. + * If indexing is not active when the listener is added, a synthetic idle + * notification will be generated. + * + * @param aListener A listener function, taking arguments: status (Gloda. + * kIndexer*), the folder name if a folder is involved (string or null), + * current zero-based job number (int), + * current item number being indexed in this job (int), total number + * of items in this job to be indexed (int). + * + * @TODO should probably allow for a 'this' value to be provided + * @TODO generalize to not be folder/message specific. use nouns! + */ + addListener: function gloda_index_addListener(aListener) { + // should we weakify? + if (this._indexListeners.indexOf(aListener) == -1) + this._indexListeners.push(aListener); + // if we aren't indexing, give them an idle indicator, otherwise they can + // just be happy when we hit the next actual status point. + if (!this.indexing) + aListener(Gloda.kIndexerIdle, null, 0, 0, 1); + return aListener; + }, + /** + * Remove the given listener so that it no longer receives indexing progress + * updates. + */ + removeListener: function gloda_index_removeListener(aListener) { + let index = this._indexListeners.indexOf(aListener); + if (index != -1) + this._indexListeners.splice(index, 1); + }, + /** + * Helper method to tell listeners what we're up to. For code simplicity, + * the caller is just deciding when to send this update (preferably at + * reasonable intervals), and doesn't need to provide any indication of + * state... we figure that out ourselves. + * + * This was not pretty but got ugly once we moved the message indexing out + * to its own indexer. Some generalization is required but will likely + * require string hooks. + */ + _notifyListeners: function gloda_index_notifyListeners() { + let status, prettyName, jobIndex, jobItemIndex, jobItemGoal, jobType; + + if (this.indexing && this._curIndexingJob) { + let job = this._curIndexingJob; + status = Gloda.kIndexerIndexing; + + let indexer = this._indexerWorkerDefs[job.jobType].indexer; + if ("_indexingFolder" in indexer) + prettyName = (indexer._indexingFolder != null) ? + indexer._indexingFolder.prettiestName : null; + else + prettyName = null; + + jobIndex = this._indexingJobCount-1; + jobItemIndex = job.offset; + jobItemGoal = job.goal; + jobType = job.jobType; + } + else { + status = Gloda.kIndexerIdle; + prettyName = null; + jobIndex = 0; + jobItemIndex = 0; + jobItemGoal = 1; + jobType = null; + } + + // Some people ascribe to the belief that the most you can give is 100%. + // We know better, but let's humor them. + if (jobItemIndex > jobItemGoal) + jobItemGoal = jobItemIndex; + + for (let iListener = this._indexListeners.length-1; iListener >= 0; + iListener--) { + let listener = this._indexListeners[iListener]; + try { + listener(status, prettyName, jobIndex, jobItemIndex, jobItemGoal, + jobType); + } + catch(ex) { + this._log.error(ex); + } + } + }, + + /** + * A wrapped callback driver intended to be used by timers that provide + * arguments we really do not care about. + */ + _timerCallbackDriver: function gloda_index_timerCallbackDriver() { + GlodaIndexer.callbackDriver(); + }, + + /** + * A simple callback driver wrapper to provide 'this'. + */ + _wrapCallbackDriver: function gloda_index_wrapCallbackDriver() { + GlodaIndexer.callbackDriver.apply(GlodaIndexer, arguments); + }, + + /** + * The current processing 'batch' generator, produced by a call to workBatch() + * and used by callbackDriver to drive execution. + */ + _batch: null, + _inCallback: false, + _savedCallbackArgs: null, + /** + * The root work-driver. callbackDriver creates workBatch generator instances + * (stored in _batch) which run until they are done (kWorkDone) or they + * (really the embedded activeIterator) encounter something asynchronous. + * The convention is that all the callback handlers end up calling us, + * ensuring that control-flow properly resumes. If the batch completes, + * we re-schedule ourselves after a time delay (controlled by _INDEX_INTERVAL) + * and return. (We use one-shot timers because repeating-slack does not + * know enough to deal with our (current) asynchronous nature.) + */ + callbackDriver: function gloda_index_callbackDriver() { + // just bail if we are shutdown + if (this._indexerIsShutdown) + return; + + // it is conceivable that someone we call will call something that in some + // cases might be asynchronous, and in other cases immediately generate + // events without returning. In the interest of (stack-depth) sanity, + // let's handle this by performing a minimal time-delay callback. + // this is also now a good thing sequencing-wise. if we get our callback + // with data before the underlying function has yielded, we obviously can't + // cram the data in yet. Our options in this case are to either mark the + // fact that the callback has already happened and immediately return to + // the iterator when it does bubble up the kWorkAsync, or we can do as we + // have been doing, but save the + if (this._inCallback) { + this._savedCallbackArgs = arguments; + this._timer.initWithCallback(this._timerCallbackDriver, + 0, + Ci.nsITimer.TYPE_ONE_SHOT); + return; + } + this._inCallback = true; + + try { + if (this._batch === null) + this._batch = this.workBatch(); + + // kWorkAsync, kWorkDone, kWorkPause are allowed out; kWorkSync is not + // On kWorkDone, we want to schedule another timer to fire on us if we are + // not done indexing. (On kWorkAsync, we don't care what happens, because + // someone else will be receiving the callback, and they will call us when + // they are done doing their thing. + let args; + if (this._savedCallbackArgs != null) { + args = this._savedCallbackArgs; + this._savedCallbackArgs = null; + } + else + args = arguments; //Array.slice.call(arguments); + + let result; + if (args.length == 0) + result = this._batch.next().value; + else if (args.length == 1) + result = this._batch.next(args[0]).value; + else // arguments works with destructuring assignment + result = this._batch.next(args).value; + switch (result) { + // job's done, close the batch and re-schedule ourselves if there's more + // to do. + case this.kWorkDone: + this._batch.return(); + this._batch = null; + // (intentional fall-through to re-scheduling logic) + // the batch wants to get re-scheduled, do so. + case this.kWorkPause: + if (this.indexing) + this._timer.initWithCallback(this._timerCallbackDriver, + this._INDEX_INTERVAL, + Ci.nsITimer.TYPE_ONE_SHOT); + else { // it's important to indicate no more callbacks are in flight + this._indexingActive = false; + } + break; + case this.kWorkAsync: + // there is nothing to do. some other code is now responsible for + // calling us. + break; + } + } + finally { + this._inCallback = false; + } + }, + + _callbackHandle: { + init: function gloda_index_callbackhandle_init() { + this.wrappedCallback = GlodaIndexer._wrapCallbackDriver; + this.callbackThis = GlodaIndexer; + this.callback = GlodaIndexer.callbackDriver; + }, + /** + * The stack of generators we are processing. The (numerically) last one is + * also the |activeIterator|. + */ + activeStack: [], + /** + * The generator at the top of the |activeStack| and that we will call next + * or send on next if nothing changes. + */ + activeIterator: null, + /** + * Meta-information about the generators at each level of the stack. + */ + contextStack: [], + /** + * Push a new generator onto the stack. It becomes the active generator. + */ + push: function gloda_index_callbackhandle_push(aIterator, aContext) { + this.activeStack.push(aIterator); + this.contextStack.push(aContext); + this.activeIterator = aIterator; + }, + /** + * For use by generators that want to call another asynchronous process + * implemented as a generator. They should do + * "yield aCallbackHandle.pushAndGo(someGenerator(arg1, arg2));". + * + * @public + */ + pushAndGo: function gloda_index_callbackhandle_pushAndGo(aIterator, + aContext) { + this.push(aIterator, aContext); + return GlodaIndexer.kWorkSync; + }, + /** + * Pop the active generator off the stack. + */ + pop: function gloda_index_callbackhandle_pop() { + this.activeIterator.return(); + this.activeStack.pop(); + this.contextStack.pop(); + if (this.activeStack.length) + this.activeIterator = this.activeStack[this.activeStack.length - 1]; + else + this.activeIterator = null; + }, + /** + * Someone propagated an exception and we need to clean-up all the active + * logic as best we can. Which is not really all that well. + * + * @param [aOptionalStopAtDepth=0] The length the stack should be when this + * method completes. Pass 0 or omit for us to clear everything out. + * Pass 1 to leave just the top-level generator intact. + */ + cleanup: function gloda_index_callbackhandle_cleanup(aOptionalStopAtDepth) { + if (aOptionalStopAtDepth === undefined) + aOptionalStopAtDepth = 0; + while (this.activeStack.length > aOptionalStopAtDepth) { + this.pop(); + } + }, + /** + * For use when a generator finishes up by calling |doneWithResult| on us; + * the async driver calls this to pop that generator off the stack + * and get the result it passed in to its call to |doneWithResult|. + * + * @protected + */ + popWithResult: function gloda_index_callbackhandle_popWithResult() { + this.pop(); + let result = this._result; + this._result = null; + return result; + }, + _result: null, + /** + * For use by generators that want to return a result to the calling + * asynchronous generator. Specifically, they should do + * "yield aCallbackHandle.doneWithResult(RESULT);". + * + * @public + */ + doneWithResult: function gloda_index_callbackhandle_doneWithResult(aResult){ + this._result = aResult; + return Gloda.kWorkDoneWithResult; + }, + + /* be able to serve as a collection listener, resuming the active iterator's + last yield kWorkAsync */ + onItemsAdded: function() {}, + onItemsModified: function() {}, + onItemsRemoved: function() {}, + onQueryCompleted: function(aCollection) { + GlodaIndexer.callbackDriver(); + } + }, + _workBatchData: undefined, + /** + * The workBatch generator handles a single 'batch' of processing, managing + * the database transaction and keeping track of "tokens". It drives the + * activeIterator generator which is doing the work. + * workBatch will only produce kWorkAsync, kWorkPause, and kWorkDone + * notifications. If activeIterator returns kWorkSync and there are still + * tokens available, workBatch will keep driving the activeIterator until it + * encounters a kWorkAsync (which workBatch will yield to callbackDriver), or + * it runs out of tokens and yields a kWorkPause or kWorkDone. + */ + workBatch: function* gloda_index_workBatch() { + + // Do we still have an open transaction? If not, start a new one. + if (!this._idleToCommit) + GlodaDatastore._beginTransaction(); + else + // We'll manage commit ourself while this routine is active. + this._idleToCommit = false; + + this._perfIndexStopwatch.start(); + let batchCount; + let haveMoreWork = true; + let transactionToCommit = true; + let inIdle; + + let notifyDecimator = 0; + + while (haveMoreWork) { + // Both explicit work activity points (sync + async) and transfer of + // control return (via kWorkDone*) results in a token being eaten. The + // idea now is to make tokens less precious so that the adaptive logic + // can adjust them with less impact. (Before this change, doing 1 + // token's work per cycle ended up being an entire non-idle time-slice's + // work.) + // During this loop we track the clock real-time used even though we + // frequently yield to asynchronous operations. These asynchronous + // operations are either database queries or message streaming requests. + // Both may involve disk I/O but no network I/O (since we only stream + // messages that are already available offline), but in an ideal + // situation will come from cache and so the work this function kicks off + // will dominate. + // We do not use the CPU time to this end because... + // 1) Our timer granularity on linux is worse for CPU than for wall time. + // 2) That can fail to account for our I/O cost. + // 3) If something with a high priority / low latency need (like playing + // a video) is fighting us, although using CPU time will accurately + // express how much time we are actually spending to index, our goal + // is to control the duration of our time slices, not be "right" about + // the actual CPU cost. In that case, if we attempted to take on more + // work, we would likely interfere with the higher priority process or + // make ourselves less responsive by drawing out the period of time we + // are dominating the main thread. + this._perfIndexStopwatch.start(); + // For telemetry purposes, we want to know how many messages we've been + // processing during that batch, and how long it took, pauses included. + let t0 = Date.now(); + this._indexedMessageCount = 0; + batchCount = 0; + while (batchCount < this._indexTokens) { + if ((this._callbackHandle.activeIterator === null) && + !this._hireJobWorker()) { + haveMoreWork = false; + break; + } + batchCount++; + + // XXX for performance, we may want to move the try outside the for loop + // with a quasi-redundant outer loop that shunts control back inside + // if we left the loop due to an exception (without consuming all the + // tokens.) + try { + switch (this._callbackHandle + .activeIterator.next(this._workBatchData).value) { + case this.kWorkSync: + this._workBatchData = undefined; + break; + case this.kWorkAsync: + this._workBatchData = yield this.kWorkAsync; + break; + case this.kWorkDone: + this._callbackHandle.pop(); + this._workBatchData = undefined; + break; + case this.kWorkDoneWithResult: + this._workBatchData = this._callbackHandle.popWithResult(); + break; + default: + break; + } + } + catch (ex) { + this._log.debug("Exception in batch processing:", ex); + let workerDef = this._curIndexingJob._workerDef; + if (workerDef.recover) { + let recoverToDepth; + try { + recoverToDepth = + workerDef.recover.call(workerDef.indexer, + this._curIndexingJob, + this._callbackHandle.contextStack, + ex); + } + catch (ex2) { + this._log.error("Worker '" + workerDef.name + + "' recovery function itself failed:", ex2); + } + if (this._unitTestHookRecover) + this._unitTestHookRecover(recoverToDepth, ex, + this._curIndexingJob, + this._callbackHandle); + + if (recoverToDepth) { + this._callbackHandle.cleanup(recoverToDepth); + continue; + } + } + // (we either did not have a recover handler or it couldn't recover) + // call the cleanup helper if there is one + if (workerDef.cleanup) { + try { + workerDef.cleanup.call(workerDef.indexer, this._curIndexingJob); + } + catch (ex2) { + this._log.error("Worker '" + workerDef.name + + "' cleanup function itself failed:", ex2); + } + if (this._unitTestHookCleanup) + this._unitTestHookCleanup(true, ex, this._curIndexingJob, + this._callbackHandle); + } + else { + if (this._unitTestHookCleanup) + this._unitTestHookCleanup(false, ex, this._curIndexingJob, + this._callbackHandle); + } + + // Clean out everything on the async stack, warn about the job, kill. + // We do not log this warning lightly; it will break unit tests and + // be visible to users. Anything expected should likely have a + // recovery function or the cleanup logic should be extended to + // indicate that the failure is acceptable. + this._callbackHandle.cleanup(); + this._log.warn("Problem during " + this._curIndexingJob + + ", bailing:", ex); + this._curIndexingJob = null; + // the data must now be invalid + this._workBatchData = undefined; + } + } + this._perfIndexStopwatch.stop(); + + // idleTime can throw if there is no idle-provider available, such as an + // X session without the relevant extensions available. In this case + // we assume that the user is never idle. + try { + // We want to stop ASAP when leaving idle, so we can't rely on the + // standard polled callback. We do the polling ourselves. + if (this._idleService.idleTime < this._INDEX_IDLE_ADJUSTMENT_TIME) { + inIdle = false; + this._cpuTargetIndexTime = this._CPU_TARGET_INDEX_TIME_ACTIVE; + } + else { + inIdle = true; + this._cpuTargetIndexTime = this._CPU_TARGET_INDEX_TIME_IDLE; + } + } + catch (ex) { + inIdle = false; + } + + // take a breather by having the caller re-schedule us sometime in the + // future, but only if we're going to perform another loop iteration. + if (haveMoreWork) { + notifyDecimator = (notifyDecimator + 1) % 32; + if (!notifyDecimator) + this._notifyListeners(); + + for (let pauseCount = 0; + pauseCount < this._PAUSE_REPEAT_LIMIT; + pauseCount++) { + this._perfPauseStopwatch.start(); + + yield this.kWorkPause; + + this._perfPauseStopwatch.stop(); + // We repeat the pause if the pause was longer than + // we expected, or if it used a significant amount + // of cpu, either of which indicate significant other + // activity. + if ((this._perfPauseStopwatch.cpuTimeSeconds * 1000 < + this._CPU_IS_BUSY_TIME) && + (this._perfPauseStopwatch.realTimeSeconds * 1000 - + this._INDEX_INTERVAL < this._PAUSE_LATE_IS_BUSY_TIME)) + break; + } + } + + // All pauses have been taken, how effective were we? Report! + // XXX: there's possibly a lot of fluctuation since we go through here + // every 5 messages or even less + if (this._indexedMessageCount > 0) { + let delta = (Date.now() - t0)/1000; // in seconds + let v = Math.round(this._indexedMessageCount/delta); + try { + let h = Services.telemetry + .getHistogramById("THUNDERBIRD_INDEXING_RATE_MSG_PER_S"); + h.add(v); + } catch (e) { + this._log.warn("Couldn't report telemetry", e, v); + } + } + + if (batchCount > 0) { + let totalTime = this._perfIndexStopwatch.realTimeSeconds * 1000; + let timePerToken = totalTime / batchCount; + // Damp the average time since it is a rough estimate only. + this._cpuAverageTimePerToken = + (totalTime + + this._CPU_AVERAGE_TIME_DAMPING * this._cpuAverageTimePerToken) / + (batchCount + this._CPU_AVERAGE_TIME_DAMPING); + // We use the larger of the recent or the average time per token, so + // that we can respond quickly to slow down indexing if there + // is a sudden increase in time per token. + let bestTimePerToken = + Math.max(timePerToken, this._cpuAverageTimePerToken); + // Always index at least one token! + this._indexTokens = + Math.max(1, this._cpuTargetIndexTime / bestTimePerToken); + // But no more than the a maximum limit, just for sanity's sake. + this._indexTokens = Math.min(this._CPU_MAX_TOKENS_PER_BATCH, + this._indexTokens); + this._indexTokens = Math.ceil(this._indexTokens); + } + + // Should we try to commit now? + let elapsed = Date.now() - this._lastCommitTime; + // Commit tends to cause a brief UI pause, so we try to delay it (but not + // forever) if the user is active. If we're done and idling, we'll also + // commit, otherwise we'll let the idle callback do it. + let doCommit = transactionToCommit && + ((elapsed > this._MAXIMUM_COMMIT_TIME) || + (inIdle && + (elapsed > this._MINIMUM_COMMIT_TIME || !haveMoreWork))); + if (doCommit) { + GlodaCollectionManager.cacheCommitDirty(); + // Set up an async notification to happen after the commit completes so + // that we can avoid the indexer doing something with the database that + // causes the main thread to block against the completion of the commit + // (which can be a while) on 1.9.1. + GlodaDatastore.runPostCommit(this._callbackHandle.wrappedCallback); + // kick off the commit + GlodaDatastore._commitTransaction(); + yield this.kWorkAsync; + // Let's do the GC after the commit completes just so we can avoid + // having any ugly interactions. + GlodaUtils.forceGarbageCollection(false); + this._lastCommitTime = Date.now(); + // Restart the transaction if we still have work. + if (haveMoreWork) + GlodaDatastore._beginTransaction(); + else + transactionToCommit = false; + } + } + + this._notifyListeners(); + + // If we still have a transaction to commit, tell idle to do the commit + // when it gets around to it. + if (transactionToCommit) + this._idleToCommit = true; + + yield this.kWorkDone; + }, + + /** + * Maps indexing job type names to a worker definition. + * The worker definition is an object with the following attributes where + * only worker is required: + * - worker: + * - onSchedule: A function to be invoked when the worker is scheduled. The + * job is passed as an argument. + * - recover: + * - cleanup: + */ + _indexerWorkerDefs: {}, + /** + * Perform the initialization step and return a generator if there is any + * steady-state processing to be had. + */ + _hireJobWorker: function gloda_index_hireJobWorker() { + // In no circumstances should there be data bouncing around from previous + // calls if we are here. |killActiveJob| depends on this. + this._workBatchData = undefined; + + if (this._indexQueue.length == 0) { + this._log.info("--- Done indexing, disabling timer renewal."); + + this._curIndexingJob = null; + this._indexingDesired = false; + this._indexingJobCount = 0; + return false; + } + + let job = this._curIndexingJob = this._indexQueue.shift(); + this._indexingJobCount++; + + let generator = null; + + if (job.jobType in this._indexerWorkerDefs) { + let workerDef = this._indexerWorkerDefs[job.jobType]; + job._workerDef = workerDef; + + // Prior to creating the worker, call the scheduling trigger function + // if there is one. This is so that jobs can be finalized. The + // initial use case is event-driven message indexing that accumulates + // a list of messages to index but wants it locked down once we start + // processing the list. + if (workerDef.onSchedule) + workerDef.onSchedule.call(workerDef.indexer, job); + + generator = workerDef.worker.call(workerDef.indexer, job, + this._callbackHandle); + } + else { + // Nothing we can do about this. Be loud about it and try to schedule + // something else. + this._log.error("Unknown job type: " + job.jobType); + return this._hireJobWorker(); + } + + if (this._unitTestSuperVerbose) + this._log.debug("Hired job of type: " + job.jobType); + + this._notifyListeners(); + + if (generator) { + this._callbackHandle.push(generator); + return true; + } + else + return false; + }, + + /** + * Schedule a job for indexing. + */ + indexJob: function glodaIndexJob(aJob) { + this._log.info("Queue-ing job for indexing: " + aJob.jobType); + + this._indexQueue.push(aJob); + this.indexing = true; + }, + + /** + * Kill the active job. This means a few things: + * - Kill all the generators in the callbackHandle stack. + * - If we are currently waiting on an async return, we need to make sure it + * does not screw us up. + * - Make sure the job's cleanup function gets called if appropriate. + * + * The async return case is actually not too troublesome. Since there is an + * active indexing job and we are not (by fiat) in that call stack, we know + * that the callback driver is guaranteed to get triggered again somehow. + * The only issue is to make sure that _workBatchData does not end up with + * the data. We compel |_hireJobWorker| to erase it to this end. + * + * @note You MUST NOT call this function from inside a job or an async funtion + * on the callbackHandle's stack of generators. If you are in that + * situation, you should just throw an exception. At the very least, + * use a timeout to trigger us. + */ + killActiveJob: function() { + // There is nothing to do if we have no job + if (!this._curIndexingJob) + return; + + // -- Blow away the stack with cleanup. + let workerDef = this._curIndexingJob._workerDef; + if (this._unitTestSuperVerbose) + this._log.debug("Killing job of type: " + this._curIndexingJob.jobType); + if (this._unitTestHookCleanup) + this._unitTestHookCleanup(workerDef.cleanup ? true : false, + "no exception, this was killActiveJob", + this._curIndexingJob, + this._callbackHandle); + this._callbackHandle.cleanup(); + if (workerDef.cleanup) + workerDef.cleanup.call(workerDef.indexer, this._curIndexingJob); + + // Eliminate the job. + this._curIndexingJob = null; + }, + + /** + * Purge all jobs that the filter function returns true for. This does not + * kill the active job, use |killActiveJob| to do that. + * + * Make sure to call this function before killActiveJob + * + * @param aFilterElimFunc A filter function that takes an |IndexingJob| and + * returns true if the job should be purged, false if it should not be. + * The filter sees the jobs in the order they are scheduled. + */ + purgeJobsUsingFilter: function(aFilterElimFunc) { + for (let iJob = 0; iJob < this._indexQueue.length; iJob++) { + let job = this._indexQueue[iJob]; + + // If the filter says to, splice the job out of existence (and make sure + // to fixup iJob to compensate.) + if (aFilterElimFunc(job)) { + if (this._unitTestSuperVerbose) + this._log.debug("Purging job of type: " + job.jobType); + this._indexQueue.splice(iJob--, 1); + let workerDef = this._indexerWorkerDefs[job.jobType]; + if (workerDef.jobCanceled) + workerDef.jobCanceled.call(workerDef.indexer, job); + } + } + }, + + /* *********** Event Processing *********** */ + observe: function gloda_indexer_observe(aSubject, aTopic, aData) { + // idle + if (aTopic == "idle") { + // Do we need to commit an indexer transaction? + if (this._idleToCommit) { + this._idleToCommit = false; + GlodaCollectionManager.cacheCommitDirty(); + GlodaDatastore._commitTransaction(); + this._lastCommitTime = Date.now(); + this._notifyListeners(); + } + } + // offline status + else if (aTopic == "network:offline-status-changed") { + if (aData == "offline") { + this.suppressIndexing = true; + } + else { // online + this.suppressIndexing = false; + } + } + // shutdown fallback + else if (aTopic == "quit-application") { + this._shutdown(); + } + }, + + +}; +// we used to initialize here; now we have public.js do it for us after the +// indexers register themselves so we know about all our built-in indexers +// at init-time. diff --git a/mailnews/db/gloda/modules/log4moz.js b/mailnews/db/gloda/modules/log4moz.js new file mode 100644 index 000000000..379c669e1 --- /dev/null +++ b/mailnews/db/gloda/modules/log4moz.js @@ -0,0 +1,932 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['Log4Moz']; + +Components.utils.import("resource://gre/modules/Services.jsm"); + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +var MODE_RDONLY = 0x01; +var MODE_WRONLY = 0x02; +var MODE_CREATE = 0x08; +var MODE_APPEND = 0x10; +var MODE_TRUNCATE = 0x20; + +var PERMS_FILE = parseInt("0644", 8); +var PERMS_DIRECTORY = parseInt("0755", 8); + +var ONE_BYTE = 1; +var ONE_KILOBYTE = 1024 * ONE_BYTE; +var ONE_MEGABYTE = 1024 * ONE_KILOBYTE; + +var DEFAULT_NETWORK_TIMEOUT_DELAY = 5; + +var CDATA_START = "<![CDATA["; +var CDATA_END = "]]>"; +var CDATA_ESCAPED_END = CDATA_END + "]]>" + CDATA_START; + +var Log4Moz = { + Level: { + Fatal: 70, + Error: 60, + Warn: 50, + Info: 40, + Config: 30, + Debug: 20, + Trace: 10, + All: 0, + Desc: { + 70: "FATAL", + 60: "ERROR", + 50: "WARN", + 40: "INFO", + 30: "CONFIG", + 20: "DEBUG", + 10: "TRACE", + 0: "ALL" + } + }, + + /** + * Create a logger and configure it with dump and console appenders as + * specified by prefs based on the logger name. + * + * E.g., if the loggername is foo, then look for prefs + * foo.logging.console + * foo.logging.dump + * + * whose values can be empty: no logging of that type; or any of + * 'Fatal', 'Error', 'Warn', 'Info', 'Config', 'Debug', 'Trace', 'All', + * in which case the logging level for each appender will be set accordingly + * + * Parameters: + * + * @param loggername The name of the logger + * @param level (optional) the level of the logger itself + * @param consoleLevel (optional) the level of the console appender + * @param dumpLevel (optional) the level of the dump appender + * + * As described above, well-named prefs override the last two parameters + **/ + + getConfiguredLogger: function(loggername, level, consoleLevel, dumpLevel) { + let log = Log4Moz.repository.getLogger(loggername); + if (log._configured) + return log + + let formatter = new Log4Moz.BasicFormatter(); + + level = level || Log4Moz.Level.Error; + + consoleLevel = consoleLevel || -1; + dumpLevel = dumpLevel || -1; + let branch = Services.prefs.getBranch(loggername + ".logging."); + if (branch) + { + try { + // figure out if event-driven indexing should be enabled... + let consoleLevelString = branch.getCharPref("console"); + if (consoleLevelString) { + // capitalize to fit with Log4Moz.Level expectations + consoleLevelString = consoleLevelString.charAt(0).toUpperCase() + + consoleLevelString.substr(1).toLowerCase(); + consoleLevel = (consoleLevelString == 'None') ? + 100 : Log4Moz.Level[consoleLevelString]; + } + } catch (ex) { + // Ignore if preference is not found + } + try { + let dumpLevelString = branch.getCharPref("dump"); + if (dumpLevelString) { + // capitalize to fit with Log4Moz.Level expectations + dumpLevelString = dumpLevelString.charAt(0).toUpperCase() + + dumpLevelString.substr(1).toLowerCase(); + dumpLevel = (dumpLevelString == 'None') ? + 100 : Log4Moz.Level[dumpLevelString]; + } + } catch (ex) { + // Ignore if preference is not found + } + } + + if (consoleLevel != 100) { + if (consoleLevel == -1) + consoleLevel = Log4Moz.Level.Error; + let capp = new Log4Moz.ConsoleAppender(formatter); + capp.level = consoleLevel; + log.addAppender(capp); + } + + if (dumpLevel != 100) { + if (dumpLevel == -1) + dumpLevel = Log4Moz.Level.Error; + let dapp = new Log4Moz.DumpAppender(formatter); + dapp.level = dumpLevel; + log.addAppender(dapp); + } + + log.level = Math.min(level, Math.min(consoleLevel, dumpLevel)); + + log._configured = true; + + return log; + }, + + get repository() { + delete Log4Moz.repository; + Log4Moz.repository = new LoggerRepository(); + return Log4Moz.repository; + }, + set repository(value) { + delete Log4Moz.repository; + Log4Moz.repository = value; + }, + + get LogMessage() { return LogMessage; }, + get Logger() { return Logger; }, + get LoggerRepository() { return LoggerRepository; }, + + get Formatter() { return Formatter; }, + get BasicFormatter() { return BasicFormatter; }, + get XMLFormatter() { return XMLFormatter; }, + get JSONFormatter() { return JSONFormatter; }, + get Appender() { return Appender; }, + get DumpAppender() { return DumpAppender; }, + get ConsoleAppender() { return ConsoleAppender; }, + get TimeAwareMemoryBucketAppender() { return TimeAwareMemoryBucketAppender; }, + get FileAppender() { return FileAppender; }, + get SocketAppender() { return SocketAppender; }, + get RotatingFileAppender() { return RotatingFileAppender; }, + get ThrowingAppender() { return ThrowingAppender; }, + + // Logging helper: + // let logger = Log4Moz.repository.getLogger("foo"); + // logger.info(Log4Moz.enumerateInterfaces(someObject).join(",")); + enumerateInterfaces: function Log4Moz_enumerateInterfaces(aObject) { + let interfaces = []; + + for (i in Ci) { + try { + aObject.QueryInterface(Ci[i]); + interfaces.push(i); + } + catch(ex) {} + } + + return interfaces; + }, + + // Logging helper: + // let logger = Log4Moz.repository.getLogger("foo"); + // logger.info(Log4Moz.enumerateProperties(someObject).join(",")); + enumerateProperties: function Log4Moz_enumerateProps(aObject, + aExcludeComplexTypes) { + let properties = []; + + for (var p in aObject) { + try { + if (aExcludeComplexTypes && + (typeof aObject[p] == "object" || typeof aObject[p] == "function")) + continue; + properties.push(p + " = " + aObject[p]); + } + catch(ex) { + properties.push(p + " = " + ex); + } + } + + return properties; + } +}; + +function LoggerContext() { + this._started = this._lastStateChange = Date.now(); + this._state = "started"; +} +LoggerContext.prototype = { + _jsonMe: true, + _id: "unknown", + setState: function LoggerContext_state(aState) { + this._state = aState; + this._lastStateChange = Date.now(); + return this; + }, + finish: function LoggerContext_finish() { + this._finished = Date.now(); + this._state = "finished"; + return this; + }, + toString: function LoggerContext_toString() { + return "[Context: " + this._id + " state: " + this._state + "]"; + } +}; + + +/* + * LogMessage + * Encapsulates a single log event's data + */ +function LogMessage(loggerName, level, messageObjects){ + this.loggerName = loggerName; + this.messageObjects = messageObjects; + this.level = level; + this.time = Date.now(); +} +LogMessage.prototype = { + get levelDesc() { + if (this.level in Log4Moz.Level.Desc) + return Log4Moz.Level.Desc[this.level]; + return "UNKNOWN"; + }, + + toString: function LogMsg_toString(){ + return "LogMessage [" + this.time + " " + this.level + " " + + this.messageObjects + "]"; + } +}; + +/* + * Logger + * Hierarchical version. Logs to all appenders, assigned or inherited + */ + +function Logger(name, repository) { + this._init(name, repository); +} +Logger.prototype = { + _init: function Logger__init(name, repository) { + if (!repository) + repository = Log4Moz.repository; + this._name = name; + this.children = []; + this.ownAppenders = []; + this.appenders = []; + this._repository = repository; + }, + + get name() { + return this._name; + }, + + _level: null, + get level() { + if (this._level != null) + return this._level; + if (this.parent) + return this.parent.level; + dump("log4moz warning: root logger configuration error: no level defined\n"); + return Log4Moz.Level.All; + }, + set level(level) { + this._level = level; + }, + + _parent: null, + get parent() { return this._parent; }, + set parent(parent) { + if (this._parent == parent) { + return; + } + // Remove ourselves from parent's children + if (this._parent) { + let index = this._parent.children.indexOf(this); + if (index != -1) { + this._parent.children.splice(index, 1); + } + } + this._parent = parent; + parent.children.push(this); + this.updateAppenders(); + }, + + updateAppenders: function updateAppenders() { + if (this._parent) { + let notOwnAppenders = this._parent.appenders.filter(function(appender) { + return this.ownAppenders.indexOf(appender) == -1; + }, this); + this.appenders = notOwnAppenders.concat(this.ownAppenders); + } else { + this.appenders = this.ownAppenders.slice(); + } + + // Update children's appenders. + for (let i = 0; i < this.children.length; i++) { + this.children[i].updateAppenders(); + } + }, + + addAppender: function Logger_addAppender(appender) { + if (this.ownAppenders.indexOf(appender) != -1) { + return; + } + this.ownAppenders.push(appender); + this.updateAppenders(); + }, + + _nextContextId: 0, + newContext: function Logger_newContext(objWithProps) { + if (!("_id" in objWithProps)) + objWithProps._id = this._name + ":" + (++this._nextContextId); + + let c = new LoggerContext(); + c._isContext = true; + for (let key in objWithProps) { + c[key] = objWithProps[key]; + } + return c; + }, + + log: function Logger_log(message) { + if (this.level > message.level) + return; + let appenders = this.appenders; + for (let i = 0; i < appenders.length; i++){ + appenders[i].append(message); + } + }, + + removeAppender: function Logger_removeAppender(appender) { + let index = this.ownAppenders.indexOf(appender); + if (index == -1) { + return; + } + this.ownAppenders.splice(index, 1); + this.updateAppenders(); + }, + + log: function Logger_log(level, args) { + if (this.level > level) + return; + + // Hold off on creating the message object until we actually have + // an appender that's responsible. + let message; + let appenders = this.appenders; + for (let i = 0; i < appenders.length; i++){ + let appender = appenders[i]; + if (appender.level > level) + continue; + + if (!message) + message = new LogMessage(this._name, level, + Array.prototype.slice.call(args)); + + appender.append(message); + } + }, + + fatal: function Logger_fatal() { + this.log(Log4Moz.Level.Fatal, arguments); + }, + error: function Logger_error() { + this.log(Log4Moz.Level.Error, arguments); + }, + warn: function Logger_warn() { + this.log(Log4Moz.Level.Warn, arguments); + }, + info: function Logger_info(string) { + this.log(Log4Moz.Level.Info, arguments); + }, + config: function Logger_config(string) { + this.log(Log4Moz.Level.Config, arguments); + }, + debug: function Logger_debug(string) { + this.log(Log4Moz.Level.Debug, arguments); + }, + trace: function Logger_trace(string) { + this.log(Log4Moz.Level.Trace, arguments); + } +}; + +/* + * LoggerRepository + * Implements a hierarchy of Loggers + */ + +function LoggerRepository() {} +LoggerRepository.prototype = { + _loggers: {}, + + _rootLogger: null, + get rootLogger() { + if (!this._rootLogger) { + this._rootLogger = new Logger("root", this); + this._rootLogger.level = Log4Moz.Level.All; + } + return this._rootLogger; + }, + set rootLogger(logger) { + throw "Cannot change the root logger"; + }, + + _updateParents: function LogRep__updateParents(name) { + let pieces = name.split('.'); + let cur, parent; + + // find the closest parent + // don't test for the logger name itself, as there's a chance it's already + // there in this._loggers + for (let i = 0; i < pieces.length - 1; i++) { + if (cur) + cur += '.' + pieces[i]; + else + cur = pieces[i]; + if (cur in this._loggers) + parent = cur; + } + + // if we didn't assign a parent above, there is no parent + if (!parent) + this._loggers[name].parent = this.rootLogger; + else + this._loggers[name].parent = this._loggers[parent]; + + // trigger updates for any possible descendants of this logger + for (let logger in this._loggers) { + if (logger != name && logger.indexOf(name) == 0) + this._updateParents(logger); + } + }, + + getLogger: function LogRep_getLogger(name) { + if (name in this._loggers) + return this._loggers[name]; + this._loggers[name] = new Logger(name, this); + this._updateParents(name); + return this._loggers[name]; + } +}; + +/* + * Formatters + * These massage a LogMessage into whatever output is desired + * Only the BasicFormatter is currently implemented + */ + +// Abstract formatter +function Formatter() {} +Formatter.prototype = { + format: function Formatter_format(message) {} +}; + +// services' log4moz lost the date formatting default... +function BasicFormatter(dateFormat) { + if (dateFormat) + this.dateFormat = dateFormat; +} +BasicFormatter.prototype = { + __proto__: Formatter.prototype, + + _dateFormat: null, + + get dateFormat() { + if (!this._dateFormat) + this._dateFormat = "%Y-%m-%d %H:%M:%S"; + return this._dateFormat; + }, + + set dateFormat(format) { + this._dateFormat = format; + }, + + format: function BF_format(message) { + let date = new Date(message.time); + // The trick below prevents errors further down because mo is null or + // undefined. + let messageString = message.messageObjects.map(mo => "" + mo).join(" "); + return date.toLocaleFormat(this.dateFormat) + "\t" + + message.loggerName + "\t" + message.levelDesc + "\t" + + messageString + "\n"; + } +}; + +/* + * XMLFormatter + * Format like log4j's XMLLayout. The intent is that you can hook this up to + * a SocketAppender and point them at a Chainsaw GUI running with an + * XMLSocketReceiver running. Then your output comes out in Chainsaw. + * (Chainsaw is log4j's GUI that displays log output with niceties such as + * filtering and conditional coloring.) + */ + +function XMLFormatter() {} +XMLFormatter.prototype = { + __proto__: Formatter.prototype, + + format: function XF_format(message) { + let cdataEscapedMessage = + message.messageObjects + .map(mo => (typeof(mo) == "object") ? mo.toString() : mo) + .join(" ") + .split(CDATA_END).join(CDATA_ESCAPED_END); + return "<log4j:event logger='" + message.loggerName + "' " + + "level='" + message.levelDesc + "' thread='unknown' " + + "timestamp='" + message.time + "'>" + + "<log4j:message><![CDATA[" + cdataEscapedMessage + "]]></log4j:message>" + + "</log4j:event>"; + } +}; + +function JSONFormatter() { +} +JSONFormatter.prototype = { + __proto__: Formatter.prototype, + + format: function JF_format(message) { + // XXX I did all kinds of questionable things in here; they should be + // resolved... + // 1) JSON does not walk the __proto__ chain; there is no need to clobber + // it. + // 2) Our net mutation is sorta redundant messageObjects alongside + // msgObjects, although we only serialize one. + let origMessageObjects = message.messageObjects; + message.messageObjects = []; + let reProto = []; + for (let messageObject of origMessageObjects) { + if (messageObject) + if (messageObject._jsonMe) { + message.messageObjects.push(messageObject); +// FIXME: the commented out code should be fixed in a better way. +// See bug 984539: find a good way to avoid JSONing the impl in log4moz +// // temporarily strip the prototype to avoid JSONing the impl. +// reProto.push([messageObject, messageObject.__proto__]); +// messageObject.__proto__ = undefined; + } + else + message.messageObjects.push(messageObject.toString()); + else + message.messageObjects.push(messageObject); + } + let encoded = JSON.stringify(message) + "\r\n"; + message.msgObjects = origMessageObjects; +// for (let objectAndProtoPair of reProto) { +// objectAndProtoPair[0].__proto__ = objectAndProtoPair[1]; +// } + return encoded; + } +}; + + +/* + * Appenders + * These can be attached to Loggers to log to different places + * Simply subclass and override doAppend to implement a new one + */ + +function Appender(formatter) { + this._name = "Appender"; + this._formatter = formatter? formatter : new BasicFormatter(); +} +Appender.prototype = { + _level: Log4Moz.Level.All, + + append: function App_append(message) { + this.doAppend(this._formatter.format(message)); + }, + toString: function App_toString() { + return this._name + " [level=" + this._level + + ", formatter=" + this._formatter + "]"; + }, + doAppend: function App_doAppend(message) {} +}; + +/* + * DumpAppender + * Logs to standard out + */ + +function DumpAppender(formatter) { + this._name = "DumpAppender"; + this._formatter = formatter? formatter : new BasicFormatter(); +} +DumpAppender.prototype = { + __proto__: Appender.prototype, + + doAppend: function DApp_doAppend(message) { + dump(message); + } +}; + +/** + * An in-memory appender that always logs to its in-memory bucket and associates + * each message with a timestamp. Whoever creates us is responsible for causing + * us to switch to a new bucket using whatever criteria is appropriate. + * + * This is intended to be used roughly like an in-memory circular buffer. The + * expectation is that we are being used for unit tests and that each unit test + * function will get its own bucket. In the event that a test fails we would + * be asked for the contents of the current bucket and some portion of the + * previous bucket using up to some duration. + */ +function TimeAwareMemoryBucketAppender() { + this._name = "TimeAwareMemoryBucketAppender"; + this._level = Log4Moz.Level.All; + + this._lastBucket = null; + // to minimize object construction, even indices are timestamps, odd indices + // are the message objects. + this._curBucket = []; + this._curBucketStartedAt = Date.now(); +} +TimeAwareMemoryBucketAppender.prototype = { + get level() { return this._level; }, + set level(level) { this._level = level; }, + + append: function TAMBA_append(message) { + if (this._level <= message.level) + this._curBucket.push(message); + }, + + newBucket: function() { + this._lastBucket = this._curBucket; + this._curBucketStartedAt = Date.now(); + this._curBucket = []; + }, + + getPreviousBucketEvents: function(aNumMS) { + let lastBucket = this._lastBucket; + if (lastBucket == null || !lastBucket.length) + return []; + let timeBound = this._curBucketStartedAt - aNumMS; + // seek backwards through the list... + let i; + for (i = lastBucket.length - 1; i >= 0; i --) { + if (lastBucket[i].time < timeBound) + break; + } + return lastBucket.slice(i+1); + }, + + getBucketEvents: function() { + return this._curBucket.concat(); + }, + + toString: function() { + return "[TimeAwareMemoryBucketAppender]"; + }, +}; + +/* + * ConsoleAppender + * Logs to the javascript console + */ + +function ConsoleAppender(formatter) { + this._name = "ConsoleAppender"; + this._formatter = formatter; +} +ConsoleAppender.prototype = { + __proto__: Appender.prototype, + + // override to send Error and higher level messages to Components.utils.reportError() + append: function CApp_append(message) { + let stringMessage = this._formatter.format(message); + if (message.level > Log4Moz.Level.Warn) { + Cu.reportError(stringMessage); + } + this.doAppend(stringMessage); + }, + + doAppend: function CApp_doAppend(message) { + Services.console.logStringMessage(message); + } +}; + +/* + * FileAppender + * Logs to a file + */ + +function FileAppender(file, formatter) { + this._name = "FileAppender"; + this._file = file; // nsIFile + this._formatter = formatter? formatter : new BasicFormatter(); +} +FileAppender.prototype = { + __proto__: Appender.prototype, + + __fos: null, + get _fos() { + if (!this.__fos) + this.openStream(); + return this.__fos; + }, + + openStream: function FApp_openStream() { + this.__fos = Cc["@mozilla.org/network/file-output-stream;1"]. + createInstance(Ci.nsIFileOutputStream); + let flags = MODE_WRONLY | MODE_CREATE | MODE_APPEND; + this.__fos.init(this._file, flags, PERMS_FILE, 0); + }, + + closeStream: function FApp_closeStream() { + if (!this.__fos) + return; + try { + this.__fos.close(); + this.__fos = null; + } catch(e) { + dump("Failed to close file output stream\n" + e); + } + }, + + doAppend: function FApp_doAppend(message) { + if (message === null || message.length <= 0) + return; + try { + this._fos.write(message, message.length); + } catch(e) { + dump("Error writing file:\n" + e); + } + }, + + clear: function FApp_clear() { + this.closeStream(); + this._file.remove(false); + } +}; + +/* + * RotatingFileAppender + * Similar to FileAppender, but rotates logs when they become too large + */ + +function RotatingFileAppender(file, formatter, maxSize, maxBackups) { + if (maxSize === undefined) + maxSize = ONE_MEGABYTE * 2; + + if (maxBackups === undefined) + maxBackups = 0; + + this._name = "RotatingFileAppender"; + this._file = file; // nsIFile + this._formatter = formatter? formatter : new BasicFormatter(); + this._maxSize = maxSize; + this._maxBackups = maxBackups; +} +RotatingFileAppender.prototype = { + __proto__: FileAppender.prototype, + + doAppend: function RFApp_doAppend(message) { + if (message === null || message.length <= 0) + return; + try { + this.rotateLogs(); + this._fos.write(message, message.length); + } catch(e) { + dump("Error writing file:\n" + e); + } + }, + rotateLogs: function RFApp_rotateLogs() { + if(this._file.exists() && + this._file.fileSize < this._maxSize) + return; + + this.closeStream(); + + for (let i = this.maxBackups - 1; i > 0; i--){ + let backup = this._file.parent.clone(); + backup.append(this._file.leafName + "." + i); + if (backup.exists()) + backup.moveTo(this._file.parent, this._file.leafName + "." + (i + 1)); + } + + let cur = this._file.clone(); + if (cur.exists()) + cur.moveTo(cur.parent, cur.leafName + ".1"); + + // Note: this._file still points to the same file + } +}; + +/* + * SocketAppender + * Logs via TCP to a given host and port. Attempts to automatically reconnect + * when the connection drops or cannot be initially re-established. Connection + * attempts will happen at most every timeoutDelay seconds (has a sane default + * if left blank). Messages are dropped when there is no connection. + */ + +function SocketAppender(host, port, formatter, timeoutDelay) { + this._name = "SocketAppender"; + this._host = host; + this._port = port; + this._formatter = formatter? formatter : new BasicFormatter(); + this._timeout_delay = timeoutDelay || DEFAULT_NETWORK_TIMEOUT_DELAY; + + this._socketService = Cc["@mozilla.org/network/socket-transport-service;1"] + .getService(Ci.nsISocketTransportService); + this._mainThread = Services.tm.mainThread; +} +SocketAppender.prototype = { + __proto__: Appender.prototype, + + __nos: null, + get _nos() { + if (!this.__nos) + this.openStream(); + return this.__nos; + }, + _nextCheck: 0, + openStream: function SApp_openStream() { + let now = Date.now(); + if (now <= this._nextCheck) { + return; + } + this._nextCheck = now + this._timeout_delay * 1000; + try { + this._transport = this._socketService.createTransport( + null, 0, // default socket type + this._host, this._port, + null); // no proxy + this._transport.setTimeout(Ci.nsISocketTransport.TIMEOUT_CONNECT, + this._timeout_delay); + // do not set a timeout for TIMEOUT_READ_WRITE. The timeout is not + // entirely intuitive; your socket will time out if no one reads or + // writes to the socket within the timeout. That, as you can imagine, + // is not what we want. + this._transport.setEventSink(this, this._mainThread); + + let outputStream = this._transport.openOutputStream( + 0, // neither blocking nor unbuffered operation is desired + 0, // default buffer size is fine + 0 // default buffer count is fine + ); + + let uniOutputStream = Cc["@mozilla.org/intl/converter-output-stream;1"] + .createInstance(Ci.nsIConverterOutputStream); + uniOutputStream.init(outputStream, "utf-8", 0, 0x0000); + + this.__nos = uniOutputStream; + } catch (ex) { + dump("Unexpected SocketAppender connection problem: " + + ex.fileName + ":" + ex.lineNumber + ": " + ex + "\n"); + } + }, + + closeStream: function SApp_closeStream() { + if (!this._transport) + return; + try { + this._connected = false; + this._transport = null; + let nos = this.__nos; + this.__nos = null; + nos.close(); + } catch(e) { + // this shouldn't happen, but no one cares + } + }, + + doAppend: function SApp_doAppend(message) { + if (message === null || message.length <= 0) + return; + try { + let nos = this._nos; + if (nos) + nos.writeString(message); + } catch(e) { + if (this._transport && !this._transport.isAlive()) { + this.closeStream(); + } + } + }, + + clear: function SApp_clear() { + this.closeStream(); + }, + + /* nsITransportEventSink */ + onTransportStatus: function SApp_onTransportStatus(aTransport, aStatus, + aProgress, aProgressMax) { + if (aStatus == 0x804b0004) // STATUS_CONNECTED_TO is not a constant. + this._connected = true; + }, +}; + +/** + * Throws an exception whenever it gets a message. Intended to be used in + * automated testing situations where the code would normally log an error but + * not die in a fatal manner. + */ +function ThrowingAppender(thrower, formatter) { + this._name = "ThrowingAppender"; + this._formatter = formatter? formatter : new BasicFormatter(); + this._thrower = thrower; +} +ThrowingAppender.prototype = { + __proto__: Appender.prototype, + + doAppend: function TApp_doAppend(message) { + if (this._thrower) + this._thrower(message); + else + throw message; + } +}; diff --git a/mailnews/db/gloda/modules/mimeTypeCategories.js b/mailnews/db/gloda/modules/mimeTypeCategories.js new file mode 100644 index 000000000..463443044 --- /dev/null +++ b/mailnews/db/gloda/modules/mimeTypeCategories.js @@ -0,0 +1,204 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file wants to be a data file of some sort. It might do better as a real + * raw JSON file. It is trying to be one right now, but it obviously is not. + */ + +var EXPORTED_SYMBOLS = ['MimeCategoryMapping']; + +/** + * Input data structure to allow us to build a fast mapping from mime type to + * category name. The keys in MimeCategoryMapping are the top-level + * categories. Each value can either be a list of MIME types or a nested + * object which recursively defines sub-categories. We currently do not use + * the sub-categories. They are just there to try and organize the MIME types + * a little and open the door to future enhancements. + * + * Do _not_ add additional top-level categories unless you have added + * corresponding entries to gloda.properties under the + * "gloda.mimetype.category" branch and are making sure localizers are aware + * of the change and have time to localize it. + * + * Entries with wildcards in them are part of a fallback strategy by the + * |mimeTypeNoun| and do not actually use regular expressions or anything like + * that. Everything is a straight string lookup. Given "foo/bar" we look for + * "foo/bar", then "foo/*", and finally "*". + */ +var MimeCategoryMapping = { + archives: [ + "application/java-archive", + "application/x-java-archive", + "application/x-jar", + "application/x-java-jnlp-file", + + "application/mac-binhex40", + "application/vnd.ms-cab-compressed", + + "application/x-arc", + "application/x-arj", + "application/x-compress", + "application/x-compressed-tar", + "application/x-cpio", + "application/x-cpio-compressed", + "application/x-deb", + + "application/x-bittorrent", + + "application/x-rar", + "application/x-rar-compressed", + "application/x-7z-compressed", + "application/zip", + "application/x-zip-compressed", + "application/x-zip", + + "application/x-bzip", + "application/x-bzip-compressed-tar", + "application/x-bzip2", + "application/x-gzip", + "application/x-tar", + "application/x-tar-gz", + "application/x-tarz", + ], + documents: { + database: [ + "application/vnd.ms-access", + "application/x-msaccess", + "application/msaccess", + "application/vnd.msaccess", + "application/x-msaccess", + "application/mdb", + "application/x-mdb", + + "application/vnd.oasis.opendocument.database", + + ], + graphics: [ + "application/postscript", + "application/x-bzpostscript", + "application/x-dvi", + "application/x-gzdvi", + + "application/illustrator", + + "application/vnd.corel-draw", + "application/cdr", + "application/coreldraw", + "application/x-cdr", + "application/x-coreldraw", + "image/cdr", + "image/x-cdr", + "zz-application/zz-winassoc-cdr", + + "application/vnd.oasis.opendocument.graphics", + "application/vnd.oasis.opendocument.graphics-template", + "application/vnd.oasis.opendocument.image", + + "application/x-dia-diagram", + ], + presentation: [ + "application/vnd.ms-powerpoint.presentation.macroenabled.12", + "application/vnd.ms-powerpoint.template.macroenabled.12", + "application/vnd.ms-powerpoint", + "application/powerpoint", + "application/mspowerpoint", + "application/x-mspowerpoint", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.presentationml.template", + + "application/vnd.oasis.opendocument.presentation", + "application/vnd.oasis.opendocument.presentation-template" + ], + spreadsheet: [ + "application/vnd.lotus-1-2-3", + "application/x-lotus123", + "application/x-123", + "application/lotus123", + "application/wk1", + + "application/x-quattropro", + + "application/vnd.ms-excel.sheet.binary.macroenabled.12", + "application/vnd.ms-excel.sheet.macroenabled.12", + "application/vnd.ms-excel.template.macroenabled.12", + "application/vnd.ms-excel", + "application/msexcel", + "application/x-msexcel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.spreadsheetml.template", + + "application/vnd.oasis.opendocument.formula", + "application/vnd.oasis.opendocument.formula-template", + "application/vnd.oasis.opendocument.chart", + "application/vnd.oasis.opendocument.chart-template", + "application/vnd.oasis.opendocument.spreadsheet", + "application/vnd.oasis.opendocument.spreadsheet-template", + + "application/x-gnumeric", + ], + wordProcessor: [ + "application/msword", + "application/vnd.ms-word", + "application/x-msword", + "application/msword-template", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.wordprocessingml.template", + "application/vnd.ms-word.document.macroenabled.12", + "application/vnd.ms-word.template.macroenabled.12", + "application/x-mswrite", + "application/x-pocket-word", + + "application/rtf", + "text/rtf", + + + "application/vnd.oasis.opendocument.text", + "application/vnd.oasis.opendocument.text-master", + "application/vnd.oasis.opendocument.text-template", + "application/vnd.oasis.opendocument.text-web", + + "application/vnd.wordperfect", + + "application/x-abiword", + "application/x-amipro", + ], + suite: [ + "application/vnd.ms-works" + ], + }, + images: [ + "image/*" + ], + media: { + audio: [ + "audio/*", + ], + video: [ + "video/*", + ], + container: [ + "application/ogg", + + "application/smil", + "application/vnd.ms-asf", + "application/vnd.rn-realmedia", + "application/x-matroska", + "application/x-quicktime-media-link", + "application/x-quicktimeplayer", + ] + }, + other: [ + "*" + ], + pdf: [ + "application/pdf", + "application/x-pdf", + "image/pdf", + "file/pdf", + + "application/x-bzpdf", + "application/x-gzpdf", + ], +} diff --git a/mailnews/db/gloda/modules/mimemsg.js b/mailnews/db/gloda/modules/mimemsg.js new file mode 100644 index 000000000..ff984c178 --- /dev/null +++ b/mailnews/db/gloda/modules/mimemsg.js @@ -0,0 +1,719 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['MsgHdrToMimeMessage', + 'MimeMessage', 'MimeContainer', + 'MimeBody', 'MimeUnknown', + 'MimeMessageAttachment']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Components.utils.import("resource://gre/modules/Services.jsm"); +Components.utils.import("resource://gre/modules/XPCOMUtils.jsm"); + +var EMITTER_MIME_CODE = "application/x-js-mime-message"; + +/** + * The URL listener is surplus because the CallbackStreamListener ends up + * getting the same set of events, effectively. + */ +var dumbUrlListener = { + OnStartRunningUrl: function (aUrl) { + }, + OnStopRunningUrl: function (aUrl, aExitCode) { + }, +}; + +/** + * Maintain a list of all active stream listeners so that we can cancel them all + * during shutdown. If we don't cancel them, we risk calls into javascript + * from C++ after the various XPConnect contexts have already begun their + * teardown process. + */ +var activeStreamListeners = {}; + +var shutdownCleanupObserver = { + _initialized: false, + ensureInitialized: function mimemsg_shutdownCleanupObserver_init() { + if (this._initialized) + return; + + Services.obs.addObserver(this, "quit-application", false); + + this._initialized = true; + }, + + observe: function mimemsg_shutdownCleanupObserver_observe( + aSubject, aTopic, aData) { + if (aTopic == "quit-application") { + Services.obs.removeObserver(this, "quit-application"); + + for (let uri in activeStreamListeners) { + let streamListener = activeStreamListeners[uri]; + if (streamListener._request) + streamListener._request.cancel(Cr.NS_BINDING_ABORTED); + } + } + } +}; + +function CallbackStreamListener(aMsgHdr, aCallbackThis, aCallback) { + this._msgHdr = aMsgHdr; + let hdrURI = aMsgHdr.folder.getUriForMsg(aMsgHdr); + this._request = null; + this._stream = null; + if (aCallback === undefined) { + this._callbacksThis = [null]; + this._callbacks = [aCallbackThis]; + } + else { + this._callbacksThis = [aCallbackThis]; + this._callbacks =[aCallback]; + } + activeStreamListeners[hdrURI] = this; +} + +CallbackStreamListener.prototype = { + QueryInterface: XPCOMUtils.generateQI([Ci.nsIStreamListener]), + + // nsIRequestObserver part + onStartRequest: function (aRequest, aContext) { + this._request = aRequest; + }, + onStopRequest: function (aRequest, aContext, aStatusCode) { + let msgURI = this._msgHdr.folder.getUriForMsg(this._msgHdr); + delete activeStreamListeners[msgURI]; + + aContext.QueryInterface(Ci.nsIURI); + let message = MsgHdrToMimeMessage.RESULT_RENDEVOUZ[aContext.spec]; + if (message === undefined) + message = null; + + delete MsgHdrToMimeMessage.RESULT_RENDEVOUZ[aContext.spec]; + + for (let i = 0; i < this._callbacksThis.length; i++) { + try { + this._callbacks[i].call(this._callbacksThis[i], this._msgHdr, message); + } catch (e) { + // Most of the time, exceptions will silently disappear into the endless + // deeps of XPConnect, and never reach the surface ever again. At least + // warn the user if he has dump enabled. + dump("The MsgHdrToMimeMessage callback threw an exception: "+e+"\n"); + // That one will probably never make it to the original caller. + throw(e); + } + } + + this._msgHdr = null; + this._request = null; + this._stream = null; + this._callbacksThis = null; + this._callbacks = null; + }, + + /* okay, our onDataAvailable should actually never be called. the stream + converter is actually eating everything except the start and stop + notification. */ + // nsIStreamListener part + onDataAvailable: function (aRequest,aContext,aInputStream,aOffset,aCount) { + dump("this should not be happening! arrgggggh!\n"); + if (this._stream === null) { + this._stream = Cc["@mozilla.org/scriptableinputstream;1"]. + createInstance(Ci.nsIScriptableInputStream); + this._stream.init(aInputStream); + } + this._stream.read(aCount); + + }, +}; + +var gMessenger = Cc["@mozilla.org/messenger;1"]. + createInstance(Ci.nsIMessenger); + +function stripEncryptedParts(aPart) { + if (aPart.parts && aPart.isEncrypted) { + aPart.parts = []; // Show an empty container. + } else if (aPart.parts) { + aPart.parts = aPart.parts.map(stripEncryptedParts); + } + return aPart; +} + +/** + * Starts retrieval of a MimeMessage instance for the given message header. + * Your callback will be called with the message header you provide and the + * + * @param aMsgHdr The message header to retrieve the body for and build a MIME + * representation of the message. + * @param aCallbackThis The (optional) 'this' to use for your callback function. + * @param aCallback The callback function to invoke on completion of message + * parsing or failure. The first argument passed will be the nsIMsgDBHdr + * you passed to this function. The second argument will be the MimeMessage + * instance resulting from the processing on success, and null on failure. + * @param [aAllowDownload=false] Should we allow the message to be downloaded + * for this streaming request? The default is false, which means that we + * require that the message be available offline. If false is passed and + * the message is not available offline, we will propagate an exception + * thrown by the underlying code. + * @param [aOptions] Optional options. + * @param [aOptions.saneBodySize] Limit body sizes to a 'reasonable' size in + * order to combat corrupt offline/message stores creating pathological + * situtations where we have erroneously multi-megabyte messages. This + * also likely reduces the impact of legitimately ridiculously large + * messages. + * @param [aOptions.partsOnDemand] If this is a message stored on an IMAP + * server, and for whatever reason, it isn't available locally, then setting + * this option to true will make sure that attachments aren't downloaded. + * This makes sure the message is available quickly. + * @param [aOptions.examineEncryptedParts] By default, we won't reveal the + * contents of multipart/encrypted parts to the consumers, unless explicitly + * requested. In the case of MIME/PGP messages, for instance, the message + * will appear as an empty multipart/encrypted container, unless this option + * is used. + */ +function MsgHdrToMimeMessage(aMsgHdr, aCallbackThis, aCallback, + aAllowDownload, aOptions) { + shutdownCleanupObserver.ensureInitialized(); + + let requireOffline = !aAllowDownload; + + let msgURI = aMsgHdr.folder.getUriForMsg(aMsgHdr); + let msgService = gMessenger.messageServiceFromURI(msgURI); + + MsgHdrToMimeMessage.OPTION_TUNNEL = aOptions; + let partsOnDemandStr = (aOptions && aOptions.partsOnDemand) + ? "&fetchCompleteMessage=false" + : ""; + // By default, Enigmail only decrypts a message streamed via libmime if it's + // the one currently on display in the message reader. With this option, we're + // letting Enigmail know that it should decrypt the message since the client + // explicitly asked for it. + let encryptedStr = (aOptions && aOptions.examineEncryptedParts) + ? "&examineEncryptedParts=true" + : ""; + + // S/MIME, our other encryption backend, is not that smart, and always + // decrypts data. In order to protect sensitive data (e.g. not index it in + // Gloda), unless the client asked for encrypted data, we pass to the client + // callback a stripped-down version of the MIME structure where encrypted + // parts have been removed. + let wrapCallback = function (aCallback, aCallbackThis) { + if (aOptions && aOptions.examineEncryptedParts) + return aCallback; + else + return ((aMsgHdr, aMimeMsg) => + aCallback.call(aCallbackThis, aMsgHdr, stripEncryptedParts(aMimeMsg)) + ); + }; + + // Apparently there used to be an old syntax where the callback was the second + // argument... + let callback = aCallback ? aCallback : aCallbackThis; + let callbackThis = aCallback ? aCallbackThis : null; + + // if we're already streaming this msg, just add the callback + // to the listener. + let listenerForURI = activeStreamListeners[msgURI]; + if (listenerForURI != undefined) { + listenerForURI._callbacks.push(wrapCallback(callback, callbackThis)); + listenerForURI._callbacksThis.push(callbackThis); + return; + } + let streamListener = new CallbackStreamListener( + aMsgHdr, + callbackThis, + wrapCallback(callback, callbackThis) + ); + + try { + let streamURI = msgService.streamMessage( + msgURI, + streamListener, // consumer + null, // nsIMsgWindow + dumbUrlListener, // nsIUrlListener + true, // have them create the converter + // additional uri payload, note that "header=" is prepended automatically + "filter&emitter=js"+partsOnDemandStr+encryptedStr, + requireOffline); + } catch (ex) { + // If streamMessage throws an exception, we should make sure to clear the + // activeStreamListener, or any subsequent attempt at sreaming this URI + // will silently fail + if (activeStreamListeners[msgURI]) { + delete activeStreamListeners[msgURI]; + } + MsgHdrToMimeMessage.OPTION_TUNNEL = null; + throw(ex); + } + + MsgHdrToMimeMessage.OPTION_TUNNEL = null; +} + +/** + * Let the jsmimeemitter provide us with results. The poor emitter (if I am + * understanding things correctly) is evaluated outside of the C.u.import + * world, so if we were to import him, we would not see him, but rather a new + * copy of him. This goes for his globals, etc. (and is why we live in this + * file right here). Also, it appears that the XPCOM JS wrappers aren't + * magically unified so that we can try and pass data as expando properties + * on things like the nsIUri instances either. So we have the jsmimeemitter + * import us and poke things into RESULT_RENDEVOUZ. We put it here on this + * function to try and be stealthy and avoid polluting the namespaces (or + * encouraging bad behaviour) of our importers. + * + * If you can come up with a prettier way to shuttle this data, please do. + */ +MsgHdrToMimeMessage.RESULT_RENDEVOUZ = {}; +/** + * Cram rich options here for the MimeMessageEmitter to grab from. We + * leverage the known control-flow to avoid needing a whole dictionary here. + * We set this immediately before constructing the emitter and clear it + * afterwards. Control flow is never yielded during the process and reentrancy + * cannot happen via any other means. + */ +MsgHdrToMimeMessage.OPTION_TUNNEL = null; + +var HeaderHandlerBase = { + /** + * Look-up a header that should be present at most once. + * + * @param aHeaderName The header name to retrieve, case does not matter. + * @param aDefaultValue The value to return if the header was not found, null + * if left unspecified. + * @return the value of the header if present, and the default value if not + * (defaults to null). If the header was present multiple times, the first + * instance of the header is returned. Use getAll if you want all of the + * values for the multiply-defined header. + */ + get: function MimeMessage_get(aHeaderName, aDefaultValue) { + if (aDefaultValue === undefined) { + aDefaultValue = null; + } + let lowerHeader = aHeaderName.toLowerCase(); + if (lowerHeader in this.headers) + // we require that the list cannot be empty if present + return this.headers[lowerHeader][0]; + else + return aDefaultValue; + }, + /** + * Look-up a header that can be present multiple times. Use get for headers + * that you only expect to be present at most once. + * + * @param aHeaderName The header name to retrieve, case does not matter. + * @return An array containing the values observed, which may mean a zero + * length array. + */ + getAll: function MimeMessage_getAll(aHeaderName) { + let lowerHeader = aHeaderName.toLowerCase(); + if (lowerHeader in this.headers) + return this.headers[lowerHeader]; + else + return []; + }, + /** + * @param aHeaderName Header name to test for its presence. + * @return true if the message has (at least one value for) the given header + * name. + */ + has: function MimeMessage_has(aHeaderName) { + let lowerHeader = aHeaderName.toLowerCase(); + return lowerHeader in this.headers; + }, + _prettyHeaderString: function MimeMessage__prettyHeaderString(aIndent) { + if (aIndent === undefined) + aIndent = ""; + let s = ""; + for (let header in this.headers) { + let values = this.headers[header]; + s += "\n " + aIndent + header + ": " + values; + } + return s; + } +}; + +/** + * @ivar partName The MIME part, ex "1.2.2.1". The partName of a (top-level) + * message is "1", its first child is "1.1", its second child is "1.2", + * its first child's first child is "1.1.1", etc. + * @ivar headers Maps lower-cased header field names to a list of the values + * seen for the given header. Use get or getAll as convenience helpers. + * @ivar parts The list of the MIME part children of this message. Children + * will be either MimeMessage instances, MimeMessageAttachment instances, + * MimeContainer instances, or MimeUnknown instances. The latter two are + * the result of limitations in the Javascript representation generation + * at this time, combined with the need to most accurately represent the + * MIME structure. + */ +function MimeMessage() { + this.partName = null; + this.headers = {}; + this.parts = []; + this.isEncrypted = false; +} + +MimeMessage.prototype = { + __proto__: HeaderHandlerBase, + contentType: "message/rfc822", + + /** + * @return a list of all attachments contained in this message and all its + * sub-messages. Only MimeMessageAttachment instances will be present in + * the list (no sub-messages). + */ + get allAttachments() { + let results = []; // messages are not attachments, don't include self + for (let iChild = 0; iChild < this.parts.length; iChild++) { + let child = this.parts[iChild]; + results = results.concat(child.allAttachments); + } + return results; + }, + + /** + * @return a list of all attachments contained in this message, with + * included/forwarded messages treated as real attachments. Attachments + * contained in inner messages won't be shown. + */ + get allUserAttachments() { + if (this.url) + // The jsmimeemitter camouflaged us as a MimeAttachment + return [this]; + else + // Why is there no flatten method for arrays? + return this.parts.map(child => child.allUserAttachments) + .reduce((a, b) => a.concat(b), []); + }, + + /** + * @return the total size of this message, that is, the size of all subparts + */ + get size () { + return this.parts.map(child => child.size) + .reduce((a, b) => a + Math.max(b, 0), 0); + }, + + /** + * In the case of attached messages, libmime considers them as attachments, + * and if the body is, say, quoted-printable encoded, then libmime will start + * counting bytes and notify the js mime emitter about it. The JS mime emitter + * being a nice guy, it will try to set a size on us. While this is the + * expected behavior for MimeMsgAttachments, we must make sure we can handle + * that (failing to write a setter results in exceptions being thrown). + */ + set size (whatever) { + // nop + }, + + /** + * @param aMsgFolder A message folder, any message folder. Because this is + * a hack. + * @return The concatenation of all of the body parts where parts + * available as text/plain are pulled as-is, and parts only available + * as text/html are converted to plaintext form first. In other words, + * if we see a multipart/alternative with a text/plain, we take the + * text/plain. If we see a text/html without an alternative, we convert + * that to text. + */ + coerceBodyToPlaintext: + function MimeMessage_coerceBodyToPlaintext(aMsgFolder) { + let bodies = []; + for (let part of this.parts) { + // an undefined value for something not having the method is fine + let body = part.coerceBodyToPlaintext && + part.coerceBodyToPlaintext(aMsgFolder); + if (body) + bodies.push(body); + } + if (bodies) + return bodies.join(""); + else + return ""; + }, + + /** + * Convert the message and its hierarchy into a "pretty string". The message + * and each MIME part get their own line. The string never ends with a + * newline. For a non-multi-part message, only a single line will be + * returned. + * Messages have their subject displayed, attachments have their filename and + * content-type (ex: image/jpeg) displayed. "Filler" classes simply have + * their class displayed. + */ + prettyString: function MimeMessage_prettyString(aVerbose, aIndent, + aDumpBody) { + if (aIndent === undefined) + aIndent = ""; + let nextIndent = aIndent + " "; + + let s = "Message "+(this.isEncrypted ? "[encrypted] " : "") + + "(" + this.size + " bytes): " + + "subject" in this.headers ? this.headers.subject : ""; + if (aVerbose) + s += this._prettyHeaderString(nextIndent); + + for (let iPart = 0; iPart < this.parts.length; iPart++) { + let part = this.parts[iPart]; + s += "\n" + nextIndent + (iPart+1) + " " + + part.prettyString(aVerbose, nextIndent, aDumpBody); + } + + return s; + }, +}; + + +/** + * @ivar contentType The content-type of this container. + * @ivar parts The parts held by this container. These can be instances of any + * of the classes found in this file. + */ +function MimeContainer(aContentType) { + this.partName = null; + this.contentType = aContentType; + this.headers = {}; + this.parts = []; + this.isEncrypted = false; +} + +MimeContainer.prototype = { + __proto__: HeaderHandlerBase, + get allAttachments() { + let results = []; + for (let iChild = 0; iChild < this.parts.length; iChild++) { + let child = this.parts[iChild]; + results = results.concat(child.allAttachments); + } + return results; + }, + get allUserAttachments () { + return this.parts.map(child => child.allUserAttachments) + .reduce((a, b) => a.concat(b), []); + }, + get size () { + return this.parts.map(child => child.size) + .reduce((a, b) => a + Math.max(b, 0), 0); + }, + set size (whatever) { + // nop + }, + coerceBodyToPlaintext: + function MimeContainer_coerceBodyToPlaintext(aMsgFolder) { + if (this.contentType == "multipart/alternative") { + let htmlPart; + // pick the text/plain if we can find one, otherwise remember the HTML one + for (let part of this.parts) { + if (part.contentType == "text/plain") + return part.body; + if (part.contentType == "text/html") + htmlPart = part; + // text/enriched gets transformed into HTML, use it if we don't already + // have an HTML part. + else if (!htmlPart && part.contentType == "text/enriched") + htmlPart = part; + } + // convert the HTML part if we have one + if (htmlPart) + return aMsgFolder.convertMsgSnippetToPlainText(htmlPart.body); + } + // if it's not alternative, recurse/aggregate using MimeMessage logic + return MimeMessage.prototype.coerceBodyToPlaintext.call(this, aMsgFolder); + }, + prettyString: function MimeContainer_prettyString(aVerbose, aIndent, + aDumpBody) { + let nextIndent = aIndent + " "; + + let s = "Container "+(this.isEncrypted ? "[encrypted] " : "")+ + "(" + this.size + " bytes): " + this.contentType; + if (aVerbose) + s += this._prettyHeaderString(nextIndent); + + for (let iPart = 0; iPart < this.parts.length; iPart++) { + let part = this.parts[iPart]; + s += "\n" + nextIndent + (iPart+1) + " " + + part.prettyString(aVerbose, nextIndent, aDumpBody); + } + + return s; + }, + toString: function MimeContainer_toString() { + return "Container: " + this.contentType; + } +}; + +/** + * @class Represents a body portion that we understand and do not believe to be + * a proper attachment. This means text/plain or text/html and it has no + * filename. (A filename suggests an attachment.) + * + * @ivar contentType The content type of this body materal; text/plain or + * text/html. + * @ivar body The actual body content. + */ +function MimeBody(aContentType) { + this.partName = null; + this.contentType = aContentType; + this.headers = {}; + this.body = ""; + this.isEncrypted = false; +} + +MimeBody.prototype = { + __proto__: HeaderHandlerBase, + get allAttachments() { + return []; // we are a leaf + }, + get allUserAttachments() { + return []; // we are a leaf + }, + get size() { + return this.body.length; + }, + set size (whatever) { + // nop + }, + appendBody: function MimeBody_append(aBuf) { + this.body += aBuf; + }, + coerceBodyToPlaintext: + function MimeBody_coerceBodyToPlaintext(aMsgFolder) { + if (this.contentType == "text/plain") + return this.body; + // text/enriched gets transformed into HTML by libmime + if (this.contentType == "text/html" || + this.contentType == "text/enriched") + return aMsgFolder.convertMsgSnippetToPlainText(this.body); + return ""; + }, + prettyString: function MimeBody_prettyString(aVerbose, aIndent, aDumpBody) { + let s = "Body: "+(this.isEncrypted ? "[encrypted] " : "")+ + "" + this.contentType + " (" + this.body.length + " bytes" + + (aDumpBody ? (": '" + this.body + "'") : "") + ")"; + if (aVerbose) + s += this._prettyHeaderString(aIndent + " "); + return s; + }, + toString: function MimeBody_toString() { + return "Body: " + this.contentType + " (" + this.body.length + " bytes)"; + } +}; + +/** + * @class A MIME Leaf node that doesn't have a filename so we assume it's not + * intended to be an attachment proper. This is probably meant for inline + * display or is the result of someone amusing themselves by composing messages + * by hand or a bad client. This class should probably be renamed or we should + * introduce a better named class that we try and use in preference to this + * class. + * + * @ivar contentType The content type of this part. + */ +function MimeUnknown(aContentType) { + this.partName = null; + this.contentType = aContentType; + this.headers = {}; + // Looks like libmime does not always intepret us as an attachment, which + // means we'll have to have a default size. Returning undefined would cause + // the recursive size computations to fail. + this._size = 0; + this.isEncrypted = false; + // We want to make sure MimeUnknown has a part property: S/MIME encrypted + // messages have a topmost MimeUnknown part, with the encrypted bit set to 1, + // and we need to ensure all other encrypted parts are children of this + // topmost part. + this.parts = []; +} + +MimeUnknown.prototype = { + __proto__: HeaderHandlerBase, + get allAttachments() { + return this.parts.map(child => child.allAttachments) + .reduce((a, b) => a.concat(b), []); + }, + get allUserAttachments() { + return this.parts.map(child => child.allUserAttachments) + .reduce((a, b) => a.concat(b), []); + }, + get size() { + return this._size + this.parts.map(child => child.size) + .reduce((a, b) => a + Math.max(b, 0), 0); + }, + set size(aSize) { + this._size = aSize; + }, + prettyString: function MimeUnknown_prettyString(aVerbose, aIndent, + aDumpBody) { + let nextIndent = aIndent + " "; + + let s = "Unknown: "+(this.isEncrypted ? "[encrypted] " : "")+ + "" + this.contentType + " (" + this.size + " bytes)"; + if (aVerbose) + s += this._prettyHeaderString(aIndent + " "); + + for (let iPart = 0; iPart < this.parts.length; iPart++) { + let part = this.parts[iPart]; + s += "\n" + nextIndent + (iPart+1) + " " + + (part ? part.prettyString(aVerbose, nextIndent, aDumpBody) : "NULL"); + } + return s; + }, + toString: function MimeUnknown_toString() { + return "Unknown: " + this.contentType; + } +}; + +/** + * @class An attachment proper. We think it's an attachment because it has a + * filename that libmime was able to figure out. + * + * @ivar partName @see{MimeMessage.partName} + * @ivar name The filename of this attachment. + * @ivar contentType The MIME content type of this part. + * @ivar url The URL to stream if you want the contents of this part. + * @ivar isExternal Is the attachment stored someplace else than in the message? + * @ivar size The size of the attachment if available, -1 otherwise (size is set + * after initialization by jsmimeemitter.js) + */ +function MimeMessageAttachment(aPartName, aName, aContentType, aUrl, + aIsExternal) { + this.partName = aPartName; + this.name = aName; + this.contentType = aContentType; + this.url = aUrl; + this.isExternal = aIsExternal; + this.headers = {}; + this.isEncrypted = false; + // parts is copied over from the part instance that preceded us + // headers is copied over from the part instance that preceded us + // isEncrypted is copied over from the part instance that preceded us +} + +MimeMessageAttachment.prototype = { + __proto__: HeaderHandlerBase, + // This is a legacy property. + get isRealAttachment() { + return true; + }, + get allAttachments() { + return [this]; // we are a leaf, so just us. + }, + get allUserAttachments() { + return [this]; + }, + prettyString: function MimeMessageAttachment_prettyString(aVerbose, aIndent, + aDumpBody) { + let s = "Attachment "+(this.isEncrypted ? "[encrypted] " : "")+ + "(" + this.size+" bytes): " + + this.name + ", " + this.contentType; + if (aVerbose) + s += this._prettyHeaderString(aIndent + " "); + return s; + }, + toString: function MimeMessageAttachment_toString() { + return this.prettyString(false, ""); + }, +}; diff --git a/mailnews/db/gloda/modules/moz.build b/mailnews/db/gloda/modules/moz.build new file mode 100644 index 000000000..7ad34e2a6 --- /dev/null +++ b/mailnews/db/gloda/modules/moz.build @@ -0,0 +1,32 @@ +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXTRA_JS_MODULES.gloda += [ + 'collection.js', + 'connotent.js', + 'databind.js', + 'datamodel.js', + 'datastore.js', + 'dbview.js', + 'everybody.js', + 'explattr.js', + 'facet.js', + 'fundattr.js', + 'gloda.js', + 'index_ab.js', + 'index_msg.js', + 'indexer.js', + 'log4moz.js', + 'mimemsg.js', + 'mimeTypeCategories.js', + 'msg_search.js', + 'noun_freetag.js', + 'noun_mimetype.js', + 'noun_tag.js', + 'public.js', + 'query.js', + 'suffixtree.js', + 'utils.js', +] diff --git a/mailnews/db/gloda/modules/msg_search.js b/mailnews/db/gloda/modules/msg_search.js new file mode 100644 index 000000000..8ba854406 --- /dev/null +++ b/mailnews/db/gloda/modules/msg_search.js @@ -0,0 +1,346 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ["GlodaMsgSearcher"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource://gre/modules/Services.jsm"); +Cu.import("resource:///modules/gloda/public.js"); + +/** + * How much time boost should a 'score point' amount to? The authoritative, + * incontrivertible answer, across all time and space, is a week. + * Note that gloda stores timestamps as PRTimes for no exceedingly good + * reason. + */ +var FUZZSCORE_TIMESTAMP_FACTOR = 1000 * 1000 * 60 * 60 * 24 * 7; + +var RANK_USAGE = + "glodaRank(matchinfo(messagesText), 1.0, 2.0, 2.0, 1.5, 1.5)"; + +var DASCORE = + "(((" + RANK_USAGE + " + messages.notability) * " + + FUZZSCORE_TIMESTAMP_FACTOR + + ") + messages.date)"; + +/** + * A new optimization decision we are making is that we do not want to carry + * around any data in our ephemeral tables that is not used for whittling the + * result set. The idea is that the btree page cache or OS cache is going to + * save us from the disk seeks and carrying around the extra data is just going + * to be CPU/memory churn that slows us down. + * + * Additionally, we try and avoid row lookups that would have their results + * discarded by the LIMIT. Because of limitations in FTS3 (which might + * be addressed in FTS4 by a feature request), we can't avoid the 'messages' + * lookup since that has the message's date and static notability but we can + * defer the 'messagesText' lookup. + * + * This is the access pattern we are after here: + * 1) Order the matches with minimized lookup and result storage costs. + * - The innermost MATCH does the doclist magic and provides us with + * matchinfo() support which does not require content row retrieval + * from messagesText. Unfortunately, this is not enough to whittle anything + * because we still need static interestingness, so... + * - Based on the match we retrieve the date and notability for that row from + * 'messages' using this in conjunction with matchinfo() to provide a score + * that we can then use to LIMIT our results. + * 2) We reissue the MATCH query so that we will be able to use offsets(), but + * we intersect the results of this MATCH against our LIMITed results from + * step 1. + * - We use 'docid IN (phase 1 query)' to accomplish this because it results in + * efficient lookup. If we just use a join, we get O(mn) performance because + * a cartesian join ends up being performed where either we end up performing + * the fulltext query M times and table scan intersect with the results from + * phase 1 or we do the fulltext once but traverse the entire result set from + * phase 1 N times. + * - We believe that the re-execution of the MATCH query should have no disk + * costs because it should still be cached by SQLite or the OS. In the case + * where memory is so constrained this is not true our behavior is still + * probably preferable than the old way because that would have caused lots + * of swapping. + * - This part of the query otherwise resembles the basic gloda query but with + * the inclusion of the offsets() invocation. The messages table lookup + * should not involve any disk traffic because the pages should still be + * cached (SQLite or OS) from phase 1. The messagesText lookup is new, and + * this is the major disk-seek reduction optimization we are making. (Since + * we avoid this lookup for all of the documents that were excluded by the + * LIMIT.) Since offsets() also needs to retrieve the row from messagesText + * there is a nice synergy there. + */ +var NUEVO_FULLTEXT_SQL = + "SELECT messages.*, messagesText.*, offsets(messagesText) AS osets " + + "FROM messagesText, messages " + + "WHERE" + + " messagesText MATCH ?1 " + + " AND messagesText.docid IN (" + + "SELECT docid " + + "FROM messagesText JOIN messages ON messagesText.docid = messages.id " + + "WHERE messagesText MATCH ?1 " + + "ORDER BY " + DASCORE + " DESC " + + "LIMIT ?2" + + " )" + + " AND messages.id = messagesText.docid " + + " AND +messages.deleted = 0" + + " AND +messages.folderID IS NOT NULL" + + " AND +messages.messageKey IS NOT NULL"; + +function identityFunc(x) { + return x; +} + +function oneLessMaxZero(x) { + if (x <= 1) + return 0; + else + return x - 1; +} + +function reduceSum(accum, curValue) { + return accum + curValue; +} + +/* + * Columns are: body, subject, attachment names, author, recipients + */ + +/** + * Scores if all search terms match in a column. We bias against author + * slightly and recipient a bit more in this case because a search that + * entirely matches just on a person should give a mention of that person + * in the subject or attachment a fighting chance. + * Keep in mind that because of our indexing in the face of address book + * contacts (namely, we index the name used in the e-mail as well as the + * display name on the address book card associated with the e-mail adress) + * a contact is going to bias towards matching multiple times. + */ +var COLUMN_ALL_MATCH_SCORES = [4, 20, 20, 16, 12]; +/** + * Score for each distinct term that matches in the column. This is capped + * by COLUMN_ALL_SCORES. + */ +var COLUMN_PARTIAL_PER_MATCH_SCORES = [1, 4, 4, 4, 3]; +/** + * If a term matches multiple times, what is the marginal score for each + * additional match. We count the total number of matches beyond the + * first match for each term. In other words, if we have 3 terms which + * matched 5, 3, and 0 times, then the total from our perspective is + * (5 - 1) + (3 - 1) + 0 = 4 + 2 + 0 = 6. We take the minimum of that value + * and the value in COLUMN_MULTIPLE_MATCH_LIMIT and multiply by the value in + * COLUMN_MULTIPLE_MATCH_SCORES. + */ +var COLUMN_MULTIPLE_MATCH_SCORES = [1, 0, 0, 0, 0]; +var COLUMN_MULTIPLE_MATCH_LIMIT = [10, 0, 0, 0, 0]; + +/** + * Score the message on its offsets (from stashedColumns). + */ +function scoreOffsets(aMessage, aContext) { + let score = 0; + + let termTemplate = aContext.terms.map(_ => 0); + // for each column, a list of the incidence of each term + let columnTermIncidence = [termTemplate.concat(), + termTemplate.concat(), + termTemplate.concat(), + termTemplate.concat(), + termTemplate.concat()]; + + // we need a friendlyParseInt because otherwise the radix stuff happens + // because of the extra arguments map parses. curse you, map! + let offsetNums = + aContext.stashedColumns[aMessage.id][0].split(" ").map(x => parseInt(x)); + for (let i=0; i < offsetNums.length; i += 4) { + let columnIndex = offsetNums[i]; + let termIndex = offsetNums[i+1]; + columnTermIncidence[columnIndex][termIndex]++; + } + + for (let iColumn = 0; iColumn < COLUMN_ALL_MATCH_SCORES.length; iColumn++) { + let termIncidence = columnTermIncidence[iColumn]; + // bestow all match credit + if (termIncidence.every(identityFunc)) + score += COLUMN_ALL_MATCH_SCORES[iColumn]; + // bestow partial match credit + else if (termIncidence.some(identityFunc)) + score += Math.min(COLUMN_ALL_MATCH_SCORES[iColumn], + COLUMN_PARTIAL_PER_MATCH_SCORES[iColumn] * + termIncidence.filter(identityFunc).length); + // bestow multiple match credit + score += Math.min(termIncidence.map(oneLessMaxZero).reduce(reduceSum, 0), + COLUMN_MULTIPLE_MATCH_LIMIT[iColumn]) * + COLUMN_MULTIPLE_MATCH_SCORES[iColumn]; + } + + return score; +} + +/** + * The searcher basically looks like a query, but is specialized for fulltext + * search against messages. Most of the explicit specialization involves + * crafting a SQL query that attempts to order the matches by likelihood that + * the user was looking for it. This is based on full-text matches combined + * with an explicit (generic) interest score value placed on the message at + * indexing time. This is followed by using the more generic gloda scoring + * mechanism to explicitly score the messages given the search context in + * addition to the more generic score adjusting rules. + */ +function GlodaMsgSearcher(aListener, aSearchString, aAndTerms) { + this.listener = aListener; + + this.searchString = aSearchString; + this.fulltextTerms = this.parseSearchString(aSearchString); + this.andTerms = (aAndTerms != null) ? aAndTerms : true; + + this.query = null; + this.collection = null; + + this.scores = null; +} +GlodaMsgSearcher.prototype = { + /** + * Number of messages to retrieve initially. + */ + get retrievalLimit() { + return Services.prefs.getIntPref( + "mailnews.database.global.search.msg.limit" + ); + }, + + /** + * Parse the string into terms/phrases by finding matching double-quotes. + */ + parseSearchString: function GlodaMsgSearcher_parseSearchString(aSearchString) { + aSearchString = aSearchString.trim(); + let terms = []; + + /* + * Add the term as long as the trim on the way in didn't obliterate it. + * + * In the future this might have other helper logic; it did once before. + */ + function addTerm(aTerm) { + if (aTerm) + terms.push(aTerm); + } + + while (aSearchString) { + if (aSearchString.startsWith('"')) { + let endIndex = aSearchString.indexOf(aSearchString[0], 1); + // eat the quote if it has no friend + if (endIndex == -1) { + aSearchString = aSearchString.substring(1); + continue; + } + + addTerm(aSearchString.substring(1, endIndex).trim()); + aSearchString = aSearchString.substring(endIndex + 1); + continue; + } + + let spaceIndex = aSearchString.indexOf(" "); + if (spaceIndex == -1) { + addTerm(aSearchString); + break; + } + + addTerm(aSearchString.substring(0, spaceIndex)); + aSearchString = aSearchString.substring(spaceIndex+1); + } + + return terms; + }, + + buildFulltextQuery: function GlodaMsgSearcher_buildFulltextQuery() { + let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { + noMagic: true, + explicitSQL: NUEVO_FULLTEXT_SQL, + limitClauseAlreadyIncluded: true, + // osets is 0-based column number 14 (volatile to column changes) + // save the offset column for extra analysis + stashColumns: [14] + }); + + let fulltextQueryString = ""; + + for (let [iTerm, term] of this.fulltextTerms.entries()) { + if (iTerm) + fulltextQueryString += this.andTerms ? " " : " OR "; + + // Put our term in quotes. This is needed for the tokenizer to be able + // to do useful things. The exception is people clever enough to use + // NEAR. + if (/^NEAR(\/\d+)?$/.test(term)) + fulltextQueryString += term; + // Check if this is a single-character CJK search query. If so, we want + // to add a wildcard. + // Our tokenizer treats anything at/above 0x2000 as CJK for now. + else if (term.length == 1 && term.charCodeAt(0) >= 0x2000) + fulltextQueryString += term + "*"; + else if ( + term.length == 2 && + term.charCodeAt(0) >= 0x2000 && + term.charCodeAt(1) >= 0x2000 + || term.length >= 3 + ) + fulltextQueryString += '"' + term + '"'; + + } + + query.fulltextMatches(fulltextQueryString); + query.limit(this.retrievalLimit); + + return query; + }, + + getCollection: function GlodaMsgSearcher_getCollection( + aListenerOverride, aData) { + if (aListenerOverride) + this.listener = aListenerOverride; + + this.query = this.buildFulltextQuery(); + this.collection = this.query.getCollection(this, aData); + this.completed = false; + + return this.collection; + }, + + sortBy: '-dascore', + + onItemsAdded: function GlodaMsgSearcher_onItemsAdded(aItems, aCollection) { + let newScores = Gloda.scoreNounItems( + aItems, + { + terms: this.fulltextTerms, + stashedColumns: aCollection.stashedColumns + }, + [scoreOffsets]); + if (this.scores) + this.scores = this.scores.concat(newScores); + else + this.scores = newScores; + + if (this.listener) + this.listener.onItemsAdded(aItems, aCollection); + }, + onItemsModified: function GlodaMsgSearcher_onItemsModified(aItems, + aCollection) { + if (this.listener) + this.listener.onItemsModified(aItems, aCollection); + }, + onItemsRemoved: function GlodaMsgSearcher_onItemsRemoved(aItems, + aCollection) { + if (this.listener) + this.listener.onItemsRemoved(aItems, aCollection); + }, + onQueryCompleted: function GlodaMsgSearcher_onQueryCompleted(aCollection) { + this.completed = true; + if (this.listener) + this.listener.onQueryCompleted(aCollection); + }, +}; diff --git a/mailnews/db/gloda/modules/noun_freetag.js b/mailnews/db/gloda/modules/noun_freetag.js new file mode 100644 index 000000000..8c92d53ae --- /dev/null +++ b/mailnews/db/gloda/modules/noun_freetag.js @@ -0,0 +1,93 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['FreeTag', 'FreeTagNoun']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); + +Cu.import("resource:///modules/gloda/gloda.js"); + +function FreeTag(aTagName) { + this.name = aTagName; +} + +FreeTag.prototype = { + toString: function () { + return this.name; + } +}; + +/** + * @namespace Tag noun provider. Since the tag unique value is stored as a + * parameter, we are an odd case and semantically confused. + */ +var FreeTagNoun = { + _log: Log4Moz.repository.getLogger("gloda.noun.freetag"), + + name: "freetag", + clazz: FreeTag, + allowsArbitraryAttrs: false, + usesParameter: true, + + _listeners: [], + addListener: function(aListener) { + this._listeners.push(aListener); + }, + removeListener: function(aListener) { + let index = this._listeners.indexOf(aListener); + if (index >=0) + this._listeners.splice(index, 1); + }, + + populateKnownFreeTags: function() { + for (let attr of this.objectNounOfAttributes) { + let attrDB = attr.dbDef; + for (let param in attrDB.parameterBindings) { + this.getFreeTag(param); + } + } + }, + + knownFreeTags: {}, + getFreeTag: function(aTagName) { + let tag = this.knownFreeTags[aTagName]; + if (!tag) { + tag = this.knownFreeTags[aTagName] = new FreeTag(aTagName); + for (let listener of this._listeners) + listener.onFreeTagAdded(tag); + } + return tag; + }, + + comparator: function gloda_noun_freetag_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.name.localeCompare(b.name); + }, + + toParamAndValue: function gloda_noun_freetag_toParamAndValue(aTag) { + return [aTag.name, null]; + }, + + toJSON: function gloda_noun_freetag_toJSON(aTag) { + return aTag.name; + }, + fromJSON: function gloda_noun_freetag_fromJSON(aTagName) { + return this.getFreeTag(aTagName); + }, +}; + +Gloda.defineNoun(FreeTagNoun); diff --git a/mailnews/db/gloda/modules/noun_mimetype.js b/mailnews/db/gloda/modules/noun_mimetype.js new file mode 100644 index 000000000..066031c4a --- /dev/null +++ b/mailnews/db/gloda/modules/noun_mimetype.js @@ -0,0 +1,365 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['MimeType', 'MimeTypeNoun']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); +Cu.import("resource:///modules/StringBundle.js"); + +var LOG = Log4Moz.repository.getLogger("gloda.noun.mimetype"); + +Cu.import("resource:///modules/gloda/gloda.js"); + +var CategoryStringMap = {}; + +/** + * Mime type abstraction that exists primarily so we can map mime types to + * integer id's. + * + * Instances of this class should only be retrieved via |MimeTypeNoun|; no one + * should ever create an instance directly. + */ +function MimeType(aID, aType, aSubType, aFullType, aCategory) { + this._id = aID; + this._type = aType; + this._subType = aSubType; + this._fullType = aFullType; + this._category = aCategory; +} + +MimeType.prototype = { + /** + * The integer id we have associated with the mime type. This is stable for + * the lifetime of the database, which means that anything in the Gloda + * database can use this without fear. Things not persisted in the database + * should use the actual string mime type, retrieval via |fullType|. + */ + get id() { return this._id; }, + /** + * The first part of the MIME type; "text/plain" gets you "text". + */ + get type() { return this._type; }, + set fullType(aFullType) { + if (!this._fullType) { + this._fullType = aFullType; + [this._type, this._subType] = this._fullType.split("/"); + this._category = + MimeTypeNoun._getCategoryForMimeType(aFullType, this._type); + } + }, + /** + * If the |fullType| is "text/plain", subType is "plain". + */ + get subType() { return this._subType; }, + /** + * The full MIME type; "text/plain" returns "text/plain". + */ + get fullType() { return this._fullType; }, + toString: function () { + return this.fullType; + }, + + /** + * @return the category we believe this mime type belongs to. This category + * name should never be shown directly to the user. Instead, use + * |categoryLabel| to get the localized name for the category. The + * category mapping comes from mimeTypesCategories.js. + */ + get category() { + return this._category; + }, + /** + * @return The localized label for the category from gloda.properties in the + * "gloda.mimetype.category.CATEGORY.label" definition using the value + * from |category|. + */ + get categoryLabel() { + return CategoryStringMap[this._category]; + } +}; + +/** + * Mime type noun provider. + * + * The set of MIME Types is sufficiently limited that we can keep them all in + * memory. In theory it is also sufficiently limited that we could use the + * parameter mechanism in the database. However, it is more efficient, for + * both space and performance reasons, to store the specific mime type as a + * value. For future-proofing reasons, we opt to use a database table to + * persist the mapping rather than a hard-coded list. A preferences file or + * other text file would arguably suffice, but for consistency reasons, the + * database is not a bad thing. + */ +var MimeTypeNoun = { + name: "mime-type", + clazz: MimeType, // gloda supports clazz as well as class + allowsArbitraryAttrs: false, + + _strings: new StringBundle("chrome://messenger/locale/gloda.properties"), + + // note! update test_noun_mimetype if you change our internals! + _mimeTypes: {}, + _mimeTypesByID: {}, + TYPE_BLOCK_SIZE: 16384, + _mimeTypeHighID: {}, + _mimeTypeRangeDummyObjects: {}, + _highID: 0, + + // we now use the exciting 'schema' mechanism of defineNoun to get our table + // created for us, plus some helper methods that we simply don't use. + schema: { + name: 'mimeTypes', + columns: [['id', 'INTEGER PRIMARY KEY', '_id'], + ['mimeType', 'TEXT', 'fullType']], + }, + + _init: function() { + LOG.debug("loading MIME types"); + this._loadCategoryMapping(); + this._loadMimeTypes(); + }, + + /** + * A map from MIME type to category name. + */ + _mimeTypeToCategory: {}, + /** + * Load the contents of mimeTypeCategories.js and populate + */ + _loadCategoryMapping: function MimeTypeNoun__loadCategoryMapping() { + let mimecatNS = {}; + Cu.import("resource:///modules/gloda/mimeTypeCategories.js", + mimecatNS); + let mcm = mimecatNS.MimeCategoryMapping; + + let mimeTypeToCategory = this._mimeTypeToCategory; + + function procMapObj(aSubTree, aCategories) { + for (let key in aSubTree) { + let value = aSubTree[key]; + // Add this category to our nested categories list. Use concat since + // the list will be long-lived and each list needs to be distinct. + let categories = aCategories.concat(); + categories.push(key); + + if (categories.length == 1) { + CategoryStringMap[key] = + MimeTypeNoun._strings.get( + "gloda.mimetype.category." + key + ".label"); + } + + // Is it an array? If so, just process this depth + if (Array.isArray(value)) { + for (let mimeTypeStr of value) { + mimeTypeToCategory[mimeTypeStr] = categories; + } + } + // it's yet another sub-tree branch + else { + procMapObj(value, categories); + } + } + } + + procMapObj(mimecatNS.MimeCategoryMapping, []); + }, + + /** + * Lookup the category associated with a MIME type given its full type and + * type. (So, "foo/bar" and "foo" for "foo/bar".) + */ + _getCategoryForMimeType: + function MimeTypeNoun__getCategoryForMimeType(aFullType, aType) { + if (aFullType in this._mimeTypeToCategory) + return this._mimeTypeToCategory[aFullType][0]; + let wildType = aType + "/*"; + if (wildType in this._mimeTypeToCategory) + return this._mimeTypeToCategory[wildType][0]; + return this._mimeTypeToCategory["*"][0]; + }, + + /** + * In order to allow the gloda query mechanism to avoid hitting the database, + * we need to either define the noun type as cachable and have a super-large + * cache or simply have a collection with every MIME type in it that stays + * alive forever. + * This is that collection. It is initialized by |_loadMimeTypes|. As new + * MIME types are created, we add them to the collection. + */ + _universalCollection: null, + + /** + * Kick off a query of all the mime types in our database, leaving + * |_processMimeTypes| to actually do the legwork. + */ + _loadMimeTypes: function MimeTypeNoun__loadMimeTypes() { + // get all the existing mime types! + let query = Gloda.newQuery(this.id); + let nullFunc = function() {}; + this._universalCollection = query.getCollection({ + onItemsAdded: nullFunc, onItemsModified: nullFunc, + onItemsRemoved: nullFunc, + onQueryCompleted: function (aCollection) { + MimeTypeNoun._processMimeTypes(aCollection.items); + } + }, null); + }, + + /** + * For the benefit of our Category queryHelper, we need dummy ranged objects + * that cover the numerical address space allocated to the category. We + * can't use a real object for the upper-bound because the upper-bound is + * constantly growing and there is the chance the query might get persisted, + * which means these values need to be long-lived. Unfortunately, our + * solution to this problem (dummy objects) complicates the second case, + * should it ever occur. (Because the dummy objects cannot be persisted + * on their own... but there are other issues that will come up that we will + * just have to deal with then.) + */ + _createCategoryDummies: function (aId, aCategory) { + let blockBottom = aId - (aId % this.TYPE_BLOCK_SIZE); + let blockTop = blockBottom + this.TYPE_BLOCK_SIZE - 1; + this._mimeTypeRangeDummyObjects[aCategory] = [ + new MimeType(blockBottom, "!category-dummy!", aCategory, + "!category-dummy!/" + aCategory, aCategory), + new MimeType(blockTop, "!category-dummy!", aCategory, + "!category-dummy!/" + aCategory, aCategory) + ]; + }, + + _processMimeTypes: function MimeTypeNoun__processMimeTypes(aMimeTypes) { + for (let mimeType of aMimeTypes) { + if (mimeType.id > this._highID) + this._highID = mimeType.id; + this._mimeTypes[mimeType] = mimeType; + this._mimeTypesByID[mimeType.id] = mimeType; + + let typeBlock = mimeType.id - (mimeType.id % this.TYPE_BLOCK_SIZE); + let blockHighID = (mimeType.category in this._mimeTypeHighID) ? + this._mimeTypeHighID[mimeType.category] : undefined; + // create the dummy range objects + if (blockHighID === undefined) + this._createCategoryDummies(mimeType.id, mimeType.category); + if ((blockHighID === undefined) || mimeType.id > blockHighID) + this._mimeTypeHighID[mimeType.category] = mimeType.id; + } + }, + + _addNewMimeType: function MimeTypeNoun__addNewMimeType(aMimeTypeName) { + let [typeName, subTypeName] = aMimeTypeName.split("/"); + let category = this._getCategoryForMimeType(aMimeTypeName, typeName); + + if (!(category in this._mimeTypeHighID)) { + let nextID = this._highID - (this._highID % this.TYPE_BLOCK_SIZE) + + this.TYPE_BLOCK_SIZE; + this._mimeTypeHighID[category] = nextID; + this._createCategoryDummies(nextID, category); + } + + let nextID = ++this._mimeTypeHighID[category]; + + let mimeType = new MimeType(nextID, typeName, subTypeName, aMimeTypeName, + category); + if (mimeType.id > this._highID) + this._highID = mimeType.id; + + this._mimeTypes[aMimeTypeName] = mimeType; + this._mimeTypesByID[nextID] = mimeType; + + // As great as the gloda extension mechanisms are, we don't think it makes + // a lot of sense to use them in this case. So we directly trigger object + // insertion without any of the grokNounItem stuff. + this.objInsert.call(this.datastore, mimeType); + // Since we bypass grokNounItem and its fun, we need to explicitly add the + // new MIME-type to _universalCollection ourselves. Don't try this at + // home, kids. + this._universalCollection._onItemsAdded([mimeType]); + + return mimeType; + }, + + /** + * Map a mime type to a |MimeType| instance, creating it if necessary. + * + * @param aMimeTypeName The mime type. It may optionally include parameters + * (which will be ignored). A mime type is of the form "type/subtype". + * A type with parameters would look like 'type/subtype; param="value"'. + */ + getMimeType: function MimeTypeNoun_getMimeType(aMimeTypeName) { + // first, lose any parameters + let semiIndex = aMimeTypeName.indexOf(";"); + if (semiIndex >= 0) + aMimeTypeName = aMimeTypeName.substring(0, semiIndex); + aMimeTypeName = aMimeTypeName.trim().toLowerCase(); + + if (aMimeTypeName in this._mimeTypes) + return this._mimeTypes[aMimeTypeName]; + else + return this._addNewMimeType(aMimeTypeName); + }, + + /** + * Query helpers contribute additional functions to the query object for the + * attributes that use the noun type. For example, we define Category, so + * for the "attachmentTypes" attribute, "attachmentTypesCategory" would be + * exposed. + */ + queryHelpers: { + /** + * Query for MIME type categories based on one or more MIME type objects + * passed in. We want the range to span the entire block allocated to the + * category. + * + * @param aAttrDef The attribute that is using us. + * @param aArguments The actual arguments object that + */ + Category: function(aAttrDef, aArguments) { + let rangePairs = []; + // If there are no arguments then we want to fall back to the 'in' + // constraint which matches on any attachment. + if (aArguments.length == 0) + return this._inConstraintHelper(aAttrDef, []); + + for (let iArg = 0; iArg < aArguments.length; iArg++) { + let arg = aArguments[iArg]; + rangePairs.push(MimeTypeNoun._mimeTypeRangeDummyObjects[arg.category]); + } + return this._rangedConstraintHelper(aAttrDef, rangePairs); + } + }, + + comparator: function gloda_noun_mimeType_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.fullType.localeCompare(b.fullType); + }, + + toParamAndValue: function gloda_noun_mimeType_toParamAndValue(aMimeType) { + return [null, aMimeType.id]; + }, + toJSON: function gloda_noun_mimeType_toJSON(aMimeType) { + return aMimeType.id; + }, + fromJSON: function gloda_noun_mimeType_fromJSON(aMimeTypeID) { + return this._mimeTypesByID[aMimeTypeID]; + }, +}; +Gloda.defineNoun(MimeTypeNoun, Gloda.NOUN_MIME_TYPE); +try { +MimeTypeNoun._init(); +} catch (ex) { + LOG.error("problem init-ing: " + ex.fileName + ":" + ex.lineNumber + ": " + ex); +} diff --git a/mailnews/db/gloda/modules/noun_tag.js b/mailnews/db/gloda/modules/noun_tag.js new file mode 100644 index 000000000..292bdad9b --- /dev/null +++ b/mailnews/db/gloda/modules/noun_tag.js @@ -0,0 +1,95 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['TagNoun']; + +Components.utils.import("resource:///modules/mailServices.js"); + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/gloda.js"); + +/** + * @namespace Tag noun provider. + */ +var TagNoun = { + name: "tag", + clazz: Ci.nsIMsgTag, + usesParameter: true, + allowsArbitraryAttrs: false, + idAttr: "key", + _msgTagService: null, + _tagMap: null, + _tagList: null, + + _init: function () { + this._msgTagService = MailServices.tags; + this._updateTagMap(); + }, + + getAllTags: function gloda_noun_tag_getAllTags() { + if (this._tagList == null) + this._updateTagMap(); + return this._tagList; + }, + + _updateTagMap: function gloda_noun_tag_updateTagMap() { + this._tagMap = {}; + let tagArray = this._tagList = this._msgTagService.getAllTags({}); + for (let iTag = 0; iTag < tagArray.length; iTag++) { + let tag = tagArray[iTag]; + this._tagMap[tag.key] = tag; + } + }, + + comparator: function gloda_noun_tag_comparator(a, b) { + if (a == null) { + if (b == null) + return 0; + else + return 1; + } + else if (b == null) { + return -1; + } + return a.tag.localeCompare(b.tag); + }, + userVisibleString: function gloda_noun_tag_userVisibleString(aTag) { + return aTag.tag; + }, + + // we cannot be an attribute value + + toParamAndValue: function gloda_noun_tag_toParamAndValue(aTag) { + return [aTag.key, null]; + }, + toJSON: function gloda_noun_tag_toJSON(aTag) { + return aTag.key; + }, + fromJSON: function gloda_noun_tag_fromJSON(aTagKey, aIgnored) { + let tag = this._tagMap.hasOwnProperty(aTagKey) ? this._tagMap[aTagKey] + : undefined; + // you will note that if a tag is removed, we are unable to aggressively + // deal with this. we are okay with this, but it would be nice to be able + // to listen to the message tag service to know when we should rebuild. + if ((tag === undefined) && this._msgTagService.isValidKey(aTagKey)) { + this._updateTagMap(); + tag = this._tagMap[aTagKey]; + } + // we intentionally are returning undefined if the tag doesn't exist + return tag; + }, + /** + * Convenience helper to turn a tag key into a tag name. + */ + getTag: function gloda_noun_tag_getTag(aTagKey) { + return this.fromJSON(aTagKey); + } +}; + +TagNoun._init(); +Gloda.defineNoun(TagNoun, Gloda.NOUN_TAG); diff --git a/mailnews/db/gloda/modules/public.js b/mailnews/db/gloda/modules/public.js new file mode 100644 index 000000000..63c7d87a9 --- /dev/null +++ b/mailnews/db/gloda/modules/public.js @@ -0,0 +1,36 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ["Gloda"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/gloda.js"); +Cu.import("resource:///modules/gloda/everybody.js"); +Cu.import("resource:///modules/gloda/indexer.js"); +// initialize the indexer! (who was actually imported as a nested dep by the +// things everybody.js imported.) We waited until now so it could know about +// its indexers. +GlodaIndexer._init(); +Cu.import("resource:///modules/gloda/index_msg.js"); + +/** + * Expose some junk + */ +function proxy(aSourceObj, aSourceAttr, aDestObj, aDestAttr) { + aDestObj[aDestAttr] = function() { + return aSourceObj[aSourceAttr].apply(aSourceObj, arguments); + }; +} + +proxy(GlodaIndexer, "addListener", Gloda, "addIndexerListener"); +proxy(GlodaIndexer, "removeListener", Gloda, "removeIndexerListener"); +proxy(GlodaMsgIndexer, "isMessageIndexed", Gloda, "isMessageIndexed"); +proxy(GlodaMsgIndexer, "setFolderIndexingPriority", Gloda, + "setFolderIndexingPriority"); +proxy(GlodaMsgIndexer, "resetFolderIndexingPriority", Gloda, + "resetFolderIndexingPriority"); diff --git a/mailnews/db/gloda/modules/query.js b/mailnews/db/gloda/modules/query.js new file mode 100644 index 000000000..2c2139498 --- /dev/null +++ b/mailnews/db/gloda/modules/query.js @@ -0,0 +1,618 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ["GlodaQueryClassFactory"]; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); + +// GlodaDatastore has some constants we need, and oddly enough, there was no +// load dependency preventing us from doing this. +Cu.import("resource:///modules/gloda/datastore.js"); + +/** + * @class Query class core; each noun gets its own sub-class where attributes + * have helper methods bound. + * + * @param aOptions A dictionary of options. Current legal options are: + * - noMagic: Indicates that the noun's dbQueryJoinMagic should be ignored. + * Currently, this means that messages will not have their + * full-text indexed values re-attached. This is planned to be + * offset by having queries/cache lookups that do not request + * noMagic to ensure that their data does get loaded. + * - explicitSQL: A hand-rolled alternate representation for the core + * SELECT portion of the SQL query. The queryFromQuery logic still + * generates its normal query, we just ignore its result in favor of + * your provided value. This means that the positional parameter + * list is still built and you should/must rely on those bound + * parameters (using '?'). The replacement occurs prior to the + * outerWrapColumns, ORDER BY, and LIMIT contributions to the query. + * - outerWrapColumns: If provided, wraps the query in a "SELECT *,blah + * FROM (actual query)" where blah is your list of outerWrapColumns + * made comma-delimited. The idea is that this allows you to + * reference the result of expressions inside the query using their + * names rather than having to duplicate the logic. In practice, + * this makes things more readable but is unlikely to improve + * performance. (Namely, my use of 'offsets' for full-text stuff + * ends up in the EXPLAIN plan twice despite this.) + * - noDbQueryValidityConstraints: Indicates that any validity constraints + * should be ignored. This should be used when you need to get every + * match regardless of whether it's valid. + * + * @property _owner The query instance that holds the list of unions... + * @property _constraints A list of (lists of OR constraints) that are ANDed + * together. For example [[FROM bob, FROM jim], [DATE last week]] would + * be requesting us to find all the messages from either bob or jim, and + * sent in the last week. + * @property _unions A list of other queries whose results are unioned with our + * own. There is no concept of nesting or sub-queries apart from this + * mechanism. + */ +function GlodaQueryClass(aOptions) { + this.options = (aOptions != null) ? aOptions : {}; + + // if we are an 'or' clause, who is our parent whom other 'or' clauses should + // spawn from... + this._owner = null; + // our personal chain of and-ing. + this._constraints = []; + // the other instances we union with + this._unions = []; + + this._order = []; + this._limit = 0; +} + +GlodaQueryClass.prototype = { + WILDCARD: {}, + + get constraintCount() { + return this._constraints.length; + }, + + or: function gloda_query_or() { + let owner = this._owner || this; + let orQuery = new this._queryClass(); + orQuery._owner = owner; + owner._unions.push(orQuery); + return orQuery; + }, + + orderBy: function gloda_query_orderBy() { + for (let iArg = 0; iArg < arguments.length; iArg++) { + let arg = arguments[iArg]; + this._order.push(arg); + } + return this; + }, + + limit: function gloda_query_limit(aLimit) { + this._limit = aLimit; + return this; + }, + + /** + * Return a collection asynchronously populated by this collection. You must + * provide a listener to receive notifications from the collection as it + * receives updates. The listener object should implement onItemsAdded, + * onItemsModified, and onItemsRemoved methods, all of which take a single + * argument which is the list of items which have been added, modified, or + * removed respectively. + * + * @param aListener The collection listener. + * @param [aData] The data attribute to set on the collection. + * @param [aArgs.becomeExplicit] Make the collection explicit so that the + * collection will only ever contain results found from the database + * query and the query will not be updated as new items are indexed that + * also match the query. + * @param [aArgs.becomeNull] Change the collection's query to a null query so + * that it will never receive any additional added/modified/removed events + * apart from the underlying database query. This is really only intended + * for gloda internal use but may be acceptable for non-gloda use. Please + * ask on mozilla.dev.apps.thunderbird first to make sure there isn't a + * better solution for your use-case. (Note: removals will still happen + * when things get fully deleted.) + */ + getCollection: function gloda_query_getCollection(aListener, aData, aArgs) { + this.completed = false; + return this._nounDef.datastore.queryFromQuery(this, aListener, aData, + /* aExistingCollection */ null, /* aMasterCollection */ null, + aArgs); + }, + + /** + * Test whether the given first-class noun instance satisfies this query. + * + * @testpoint gloda.query.test + */ + test: function gloda_query_test(aObj) { + // when changing this method, be sure that GlodaDatastore's queryFromQuery + // method likewise has any required changes made. + let unionQueries = [this].concat(this._unions); + + for (let iUnion = 0; iUnion < unionQueries.length; iUnion++) { + let curQuery = unionQueries[iUnion]; + + // assume success until a specific (or) constraint proves us wrong + let querySatisfied = true; + for (let iConstraint = 0; iConstraint < curQuery._constraints.length; + iConstraint++) { + let constraint = curQuery._constraints[iConstraint]; + let [constraintType, attrDef] = constraint; + let boundName = attrDef ? attrDef.boundName : "id"; + if ((boundName in aObj) && + aObj[boundName] === GlodaDatastore.IGNORE_FACET) { + querySatisfied = false; + break; + } + + let constraintValues = constraint.slice(2); + + if (constraintType === GlodaDatastore.kConstraintIdIn) { + if (constraintValues.indexOf(aObj.id) == -1) { + querySatisfied = false; + break; + } + } + // @testpoint gloda.query.test.kConstraintIn + else if ((constraintType === GlodaDatastore.kConstraintIn) || + (constraintType === GlodaDatastore.kConstraintEquals)) { + let objectNounDef = attrDef.objectNounDef; + + // if they provide an equals comparator, use that. + // (note: the next case has better optimization possibilities than + // this mechanism, but of course has higher initialization costs or + // code complexity costs...) + if (objectNounDef.equals) { + let testValues; + if (!(boundName in aObj)) + testValues = []; + else if (attrDef.singular) + testValues = [aObj[boundName]]; + else + testValues = aObj[boundName]; + + // If there are no constraints, then we are just testing for there + // being a value. Succeed (continue) in that case. + if (constraintValues.length == 0 && testValues.length && + testValues[0] != null) + continue; + + // If there are no test values and the empty set is significant, + // then check if any of the constraint values are null (our + // empty indicator.) + if (testValues.length == 0 && attrDef.emptySetIsSignificant) { + let foundEmptySetSignifier = false; + for (let constraintValue of constraintValues) { + if (constraintValue == null) { + foundEmptySetSignifier = true; + break; + } + } + if (foundEmptySetSignifier) + continue; + } + + let foundMatch = false; + for (let testValue of testValues) { + for (let value of constraintValues) { + if (objectNounDef.equals(testValue, value)) { + foundMatch = true; + break; + } + } + if (foundMatch) + break; + } + if (!foundMatch) { + querySatisfied = false; + break; + } + } + // otherwise, we need to convert everyone to their param/value form + // in order to test for equality + else { + // let's just do the simple, obvious thing for now. which is + // what we did in the prior case but exploding values using + // toParamAndValue, and then comparing. + let testValues; + if (!(boundName in aObj)) + testValues = []; + else if (attrDef.singular) + testValues = [aObj[boundName]]; + else + testValues = aObj[boundName]; + + // If there are no constraints, then we are just testing for there + // being a value. Succeed (continue) in that case. + if (constraintValues.length == 0 && testValues.length && + testValues[0] != null) + continue; + // If there are no test values and the empty set is significant, + // then check if any of the constraint values are null (our + // empty indicator.) + if (testValues.length == 0 && attrDef.emptySetIsSignificant) { + let foundEmptySetSignifier = false; + for (let constraintValue of constraintValues) { + if (constraintValue == null) { + foundEmptySetSignifier = true; + break; + } + } + if (foundEmptySetSignifier) + continue; + } + + let foundMatch = false; + for (let testValue of testValues) { + let [aParam, aValue] = objectNounDef.toParamAndValue(testValue); + for (let value of constraintValues) { + // skip empty set check sentinel values + if (value == null && attrDef.emptySetIsSignificant) + continue; + let [bParam, bValue] = objectNounDef.toParamAndValue(value); + if (aParam == bParam && aValue == bValue) { + foundMatch = true; + break; + } + } + if (foundMatch) + break; + } + if (!foundMatch) { + querySatisfied = false; + break; + } + } + } + // @testpoint gloda.query.test.kConstraintRanges + else if (constraintType === GlodaDatastore.kConstraintRanges) { + let objectNounDef = attrDef.objectNounDef; + + let testValues; + if (!(boundName in aObj)) + testValues = []; + else if (attrDef.singular) + testValues = [aObj[boundName]]; + else + testValues = aObj[boundName]; + + let foundMatch = false; + for (let testValue of testValues) { + let [tParam, tValue] = objectNounDef.toParamAndValue(testValue); + for (let rangeTuple of constraintValues) { + let [lowerRValue, upperRValue] = rangeTuple; + if (lowerRValue == null) { + let [upperParam, upperValue] = + objectNounDef.toParamAndValue(upperRValue); + if (tParam == upperParam && tValue <= upperValue) { + foundMatch = true; + break; + } + } + else if (upperRValue == null) { + let [lowerParam, lowerValue] = + objectNounDef.toParamAndValue(lowerRValue); + if (tParam == lowerParam && tValue >= lowerValue) { + foundMatch = true; + break; + } + } + else { // no one is null + let [upperParam, upperValue] = + objectNounDef.toParamAndValue(upperRValue); + let [lowerParam, lowerValue] = + objectNounDef.toParamAndValue(lowerRValue); + if ((tParam == lowerParam) && (tValue >= lowerValue) && + (tParam == upperParam) && (tValue <= upperValue)) { + foundMatch = true; + break; + } + } + } + if (foundMatch) + break; + } + if (!foundMatch) { + querySatisfied = false; + break; + } + } + // @testpoint gloda.query.test.kConstraintStringLike + else if (constraintType === GlodaDatastore.kConstraintStringLike) { + let curIndex = 0; + let value = (boundName in aObj) ? aObj[boundName] : ""; + // the attribute must be singular, we don't support arrays of strings. + for (let valuePart of constraintValues) { + if (typeof valuePart == "string") { + let index = value.indexOf(valuePart); + // if curIndex is null, we just need any match + // if it's not null, it must match the offset of our found match + if (curIndex === null) { + if (index == -1) + querySatisfied = false; + else + curIndex = index + valuePart.length; + } + else { + if (index != curIndex) + querySatisfied = false; + else + curIndex = index + valuePart.length; + } + if (!querySatisfied) + break; + } + else // wild! + curIndex = null; + } + // curIndex must be null or equal to the length of the string + if (querySatisfied && curIndex !== null && curIndex != value.length) + querySatisfied = false; + } + // @testpoint gloda.query.test.kConstraintFulltext + else if (constraintType === GlodaDatastore.kConstraintFulltext) { + // this is beyond our powers. Even if we have the fulltext content in + // memory, which we may not, the tokenization and such to perform + // the testing gets very complicated in the face of i18n, etc. + // so, let's fail if the item is not already in the collection, and + // let the testing continue if it is. (some other constraint may no + // longer apply...) + if (!(aObj.id in this.collection._idMap)) + querySatisfied = false; + } + + if (!querySatisfied) + break; + } + + if (querySatisfied) + return true; + } + return false; + }, + + /** + * Helper code for noun definitions of queryHelpers that want to build a + * traditional in/equals constraint. The goal is to let them build a range + * without having to know how we structure |_constraints|. + * + * @protected + */ + _inConstraintHelper: + function gloda_query__discreteConstraintHelper(aAttrDef, aValues) { + let constraint = + [GlodaDatastore.kConstraintIn, aAttrDef].concat(aValues); + this._constraints.push(constraint); + return this; + }, + + /** + * Helper code for noun definitions of queryHelpers that want to build a + * range. The goal is to let them build a range without having to know how + * we structure |_constraints| or requiring them to mark themselves as + * continuous to get a "Range". + * + * @protected + */ + _rangedConstraintHelper: + function gloda_query__rangedConstraintHelper(aAttrDef, aRanges) { + let constraint = + [GlodaDatastore.kConstraintRanges, aAttrDef].concat(aRanges); + this._constraints.push(constraint); + return this; + } +}; + +/** + * @class A query that never matches anything. + * + * Collections corresponding to this query are intentionally frozen in time and + * do not want to be notified of any updates. We need the collection to be + * registered with the collection manager so that the noun instances in the + * collection are always 'reachable' via the collection for as long as we might + * be handing out references to the instances. (The other way to avoid updates + * would be to not register the collection, but then items might not be + * reachable.) + * This is intended to be used in implementation details behind the gloda + * abstraction barrier. For example, the message indexer likes to be able + * to represent 'ghost' and deleted messages, but these should never be exposed + * to the user. For code simplicity, it wants to be able to use the query + * mechanism. But it doesn't want updates that are effectively + * nonsensical. For example, a ghost message that is reused by message + * indexing may already be present in a collection; when the collection manager + * receives an itemsAdded event, a GlodaExplicitQueryClass would result in + * an item added notification in that case, which would wildly not be desired. + */ +function GlodaNullQueryClass() { +} + +GlodaNullQueryClass.prototype = { + /** + * No options; they are currently only needed for SQL query generation, which + * does not happen for null queries. + */ + options: {}, + + /** + * Provide a duck-typing way of indicating to GlodaCollectionManager that our + * associated collection just doesn't want anything to change. Our test + * function is able to convey most of it, but special-casing has to happen + * somewhere, so it happens here. + */ + frozen: true, + + /** + * Since our query never matches anything, it doesn't make sense to let + * someone attempt to construct a boolean OR involving us. + * + * @returns null + */ + or: function() { + return null; + }, + + /** + * Return nothing (null) because it does not make sense to create a collection + * based on a null query. This method is normally used (on a normal query) + * to return a collection populated by the constraints of the query. We + * match nothing, so we should return nothing. More importantly, you are + * currently doing something wrong if you try and do this, so null is + * appropriate. It may turn out that it makes sense for us to return an + * empty collection in the future for sentinel value purposes, but we'll + * cross that bridge when we come to it. + * + * @returns null + */ + getCollection: function() { + return null; + }, + + /** + * Never matches anything. + * + * @param aObj The object someone wants us to test for relevance to our + * associated collection. But we don't care! Not a fig! + * @returns false + */ + test: function gloda_query_null_test(aObj) { + return false; + } +}; + +/** + * @class A query that only 'tests' for already belonging to the collection. + * + * This type of collection is useful for when you (or rather your listener) + * are interested in hearing about modifications to your collection or removals + * from your collection because of deletion, but do not want to be notified + * about newly indexed items matching your normal query constraints. + * + * @param aCollection The collection this query belongs to. This needs to be + * passed-in here or the collection should set the attribute directly when + * the query is passed in to a collection's constructor. + */ +function GlodaExplicitQueryClass(aCollection) { + this.collection = aCollection; +} + +GlodaExplicitQueryClass.prototype = { + /** + * No options; they are currently only needed for SQL query generation, which + * does not happen for explicit queries. + */ + options: {}, + + /** + * Since our query is intended to only match the contents of our collection, + * it doesn't make sense to let someone attempt to construct a boolean OR + * involving us. + * + * @returns null + */ + or: function() { + return null; + }, + + /** + * Return nothing (null) because it does not make sense to create a collection + * based on an explicit query. This method is normally used (on a normal + * query) to return a collection populated by the constraints of the query. + * In the case of an explicit query, we expect it will be associated with + * either a hand-created collection or the results of a normal query that is + * immediately converted into an explicit query. In all likelihood, calling + * this method on an instance of this type is an error, so it is helpful to + * return null because people will error hard. + * + * @returns null + */ + getCollection: function() { + return null; + }, + + /** + * Matches only items that are already in the collection associated with this + * query (by id). + * + * @param aObj The object/item to test for already being in the associated + * collection. + * @returns true when the object is in the associated collection, otherwise + * false. + */ + test: function gloda_query_explicit_test(aObj) { + return (aObj.id in this.collection._idMap); + } +}; + +/** + * @class A query that 'tests' true for everything. Intended for debugging purposes + * only. + */ +function GlodaWildcardQueryClass() { +} + +GlodaWildcardQueryClass.prototype = { + /** + * No options; they are currently only needed for SQL query generation. + */ + options: {}, + + // don't let people try and mess with us + or: function() { return null; }, + // don't let people try and query on us (until we have a real use case for + // that...) + getCollection: function() { return null; }, + /** + * Everybody wins! + */ + test: function gloda_query_explicit_test(aObj) { + return true; + } +}; + +/** + * Factory method to effectively create per-noun subclasses of GlodaQueryClass, + * GlodaNullQueryClass, GlodaExplicitQueryClass, and GlodaWildcardQueryClass. + * For GlodaQueryClass this allows us to add per-noun helpers. For the others, + * this is merely a means of allowing us to attach the (per-noun) nounDef to + * the 'class'. + */ +function GlodaQueryClassFactory(aNounDef) { + let newQueryClass = function(aOptions) { + GlodaQueryClass.call(this, aOptions); + }; + newQueryClass.prototype = new GlodaQueryClass(); + newQueryClass.prototype._queryClass = newQueryClass; + newQueryClass.prototype._nounDef = aNounDef; + + let newNullClass = function(aCollection) { + GlodaNullQueryClass.call(this); + this.collection = aCollection; + }; + newNullClass.prototype = new GlodaNullQueryClass(); + newNullClass.prototype._queryClass = newNullClass; + newNullClass.prototype._nounDef = aNounDef; + + let newExplicitClass = function(aCollection) { + GlodaExplicitQueryClass.call(this); + this.collection = aCollection; + }; + newExplicitClass.prototype = new GlodaExplicitQueryClass(); + newExplicitClass.prototype._queryClass = newExplicitClass; + newExplicitClass.prototype._nounDef = aNounDef; + + let newWildcardClass = function(aCollection) { + GlodaWildcardQueryClass.call(this); + this.collection = aCollection; + }; + newWildcardClass.prototype = new GlodaWildcardQueryClass(); + newWildcardClass.prototype._queryClass = newWildcardClass; + newWildcardClass.prototype._nounDef = aNounDef; + + return [newQueryClass, newNullClass, newExplicitClass, newWildcardClass]; +} diff --git a/mailnews/db/gloda/modules/suffixtree.js b/mailnews/db/gloda/modules/suffixtree.js new file mode 100644 index 000000000..009ad5f9d --- /dev/null +++ b/mailnews/db/gloda/modules/suffixtree.js @@ -0,0 +1,340 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ["SuffixTree", "MultiSuffixTree"]; + +/** + * Given a list of strings and a corresponding map of items that those strings + * correspond to, build a suffix tree. + */ +function MultiSuffixTree(aStrings, aItems) { + if (aStrings.length != aItems.length) + throw new Error("Array lengths need to be the same."); + + let s = ''; + let offsetsToItems = []; + let lastLength = 0; + for (let i = 0; i < aStrings.length; i++) { + s += aStrings[i]; + offsetsToItems.push(lastLength, s.length, aItems[i]); + lastLength = s.length; + } + + this._construct(s); + this._offsetsToItems = offsetsToItems; + this._numItems = aItems.length; +} + +/** + * @constructor + */ +function State(aStartIndex, aEndIndex, aSuffix) { + this.start = aStartIndex; + this.end = aEndIndex; + this.suffix = aSuffix; +} + +var dump; +if (dump === undefined) { + dump = function(a) { + print(a.slice(0, -1)); + }; +} + +/** + * Since objects are basically hash-tables anyways, we simply create an + * attribute whose name is the first letter of the edge string. (So, the + * edge string can conceptually be a multi-letter string, but since we would + * split it were there any ambiguity, it's okay to just use the single letter.) + * This avoids having to update the attribute name or worry about tripping our + * implementation up. + */ +State.prototype = { + get isExplicit() { + // our end is not inclusive... + return (this.end <= this.start); + }, + get isImplicit() { + // our end is not inclusive... + return (this.end > this.start); + }, + + get length() { + return this.end - this.start; + }, + + toString: function State_toString() { + return "[Start: " + this.start + " End: " + this.end + + (this.suffix ? " non-null suffix]" : " null suffix]"); + } +}; + +/** + * Suffix tree implemented using Ukkonen's algorithm. + * @constructor + */ +function SuffixTree(aStr) { + this._construct(aStr); +} + +/** + * States are + */ +SuffixTree.prototype = { + /** + * Find all items matching the provided substring. + */ + findMatches: function findMatches(aSubstring) { + let results = []; + let state = this._root; + let index=0; + let end = aSubstring.length; + while(index < end) { + state = state[aSubstring[index]]; + // bail if there was no edge + if (state === undefined) + return results; + // bail if the portion of the edge we traversed is not equal to that + // portion of our pattern + let actualTraverseLength = Math.min(state.length, + end - index); + if (this._str.substring(state.start, + state.start + actualTraverseLength) != + aSubstring.substring(index, index + actualTraverseLength)) + return results; + index += state.length; + } + + // state should now be the node which itself and all its children match... + // The delta is to adjust us to the offset of the last letter of our match; + // the edge we traversed to get here may have found us traversing more + // than we wanted. + // index - end captures the over-shoot of the edge traversal, + // index - end + 1 captures the fact that we want to find the last letter + // that matched, not just the first letter beyond it + // However, if this state is a leaf node (end == 'infinity'), then 'end' + // isn't describing an edge at all and we want to avoid accounting for it. + let delta; + /* + if (state.end != this._infinity) + //delta = index - end + 1; + delta = end - (index - state.length); + else */ + delta = index - state.length - end + 1; + + this._resultGather(state, results, {}, end, delta, true); + return results; + }, + + _resultGather: function resultGather(aState, aResults, aPresence, + aPatLength, aDelta, alreadyAdjusted) { + // find the item that this state originated from based on the state's + // start character. offsetToItem holds [string start index, string end + // index (exclusive), item reference]. So we want to binary search to + // find the string whose start/end index contains the state's start index. + let low = 0; + let high = this._numItems-1; + let mid, stringStart, stringEnd; + + let patternLast = aState.start - aDelta; + while (low <= high) { + mid = low + Math.floor((high - low) / 2); // excessive, especially with js nums + stringStart = this._offsetsToItems[mid*3]; + let startDelta = stringStart - patternLast; + stringEnd = this._offsetsToItems[mid*3+1]; + let endDelta = stringEnd - patternLast; + if (startDelta > 0) + high = mid - 1; + else if (endDelta <= 0) + low = mid + 1; + else { + break; + } + } + + // - The match occurred completely inside a source string. Success. + // - The match spans more than one source strings, and is therefore not + // a match. + + // at this point, we have located the origin string that corresponds to the + // start index of this state. + // - The match terminated with the end of the preceding string, and does + // not match us at all. We, and potentially our children, are merely + // serving as a unique terminal. + // - The + + let patternFirst = patternLast - (aPatLength - 1); + + if (patternFirst >= stringStart) { + if (!(stringStart in aPresence)) { + aPresence[stringStart] = true; + aResults.push(this._offsetsToItems[mid*3+2]); + } + } + + // bail if we had it coming OR + // if the result terminates at/part-way through this state, meaning any + // of its children are not going to be actual results, just hangers + // on. +/* + if (bail || (end <= aState.end)) { +dump(" bailing! (bail was: " + bail + ")\n"); + return; + } +*/ + // process our children... + for (let key in aState) { + // edges have attributes of length 1... + if (key.length == 1) { + let statePrime = aState[key]; + this._resultGather(statePrime, aResults, aPresence, aPatLength, + aDelta + aState.length, //(alreadyAdjusted ? 0 : aState.length), + false); + } + } + }, + + /** + * Given a reference 'pair' of a state and a string (may be 'empty'=explicit, + * which means no work to do and we return immediately) follow that state + * (and then the successive states)'s transitions until we run out of + * transitions. This happens either when we find an explicit state, or + * find ourselves partially along an edge (conceptually speaking). In + * the partial case, we return the state prior to the edge traversal. + * (The information about the 'edge' is contained on its target State; + * we can do this because a state is only referenced by one other state.) + */ + _canonize: function canonize(aState, aStart, aEnd) { + if (aEnd <= aStart) { + return [aState, aStart]; + } + + let statePrime; + // we treat an aState of null as 'bottom', which has transitions for every + // letter in the alphabet to 'root'. rather than create all those + // transitions, we special-case here. + if (aState === null) + statePrime = this._root; + else + statePrime = aState[this._str[aStart]]; + while (statePrime.length <= aEnd - aStart) { // (no 1 adjustment required) + aStart += statePrime.length; + aState = statePrime; + if (aStart < aEnd) { + statePrime = aState[this._str[aStart]]; + } + } + return [aState, aStart]; + }, + + /** + * Given a reference 'pair' whose state may or may not be explicit (and for + * which we will perform the required splitting to make it explicit), test + * whether it already possesses a transition corresponding to the provided + * character. + * @return A list of: whether we had to make it explicit, the (potentially) + * new explicit state. + */ + _testAndSplit: function testAndSplit(aState, aStart, aEnd, aChar) { + if (aStart < aEnd) { // it's not explicit + let statePrime = aState[this._str[aStart]]; + let length = aEnd - aStart; + if (aChar == this._str[statePrime.start + length]) { + return [true, aState]; + } + else { + // do splitting... aState -> rState -> statePrime + let rState = new State(statePrime.start, statePrime.start + length); + aState[this._str[statePrime.start]] = rState; + statePrime.start += length; + rState[this._str[statePrime.start]] = statePrime; + return [false, rState]; + } + } + else { // it's already explicit + if (aState === null) { // bottom case... shouldn't happen, but hey. + return [true, aState]; + } + return [(aChar in aState), aState]; + } + + }, + + _update: function update(aState, aStart, aIndex) { + let oldR = this._root; + let textAtIndex = this._str[aIndex]; // T sub i (0-based corrected...) + // because of the way we store the 'end' value as a one-past form, we do + // not need to subtract 1 off of aIndex. + let [endPoint, rState] = this._testAndSplit(aState, aStart, aIndex, //no -1 + textAtIndex); + while (!endPoint) { + let rPrime = new State(aIndex, this._infinity); + rState[textAtIndex] = rPrime; + if (oldR !== this._root) + oldR.suffix = rState; + oldR = rState; + [aState, aStart] = this._canonize(aState.suffix, aStart, aIndex); // no -1 + [endPoint, rState] = this._testAndSplit(aState, aStart, aIndex, // no -1 + textAtIndex); + } + if (oldR !== this._root) + oldR.suffix = aState; + + return [aState, aStart]; + }, + + _construct: function construct(aStr) { + this._str = aStr; + // just needs to be longer than the string. + this._infinity = aStr.length + 1; + + //this._bottom = new State(0, -1, null); + this._root = new State(-1, 0, null); // null === bottom + let state = this._root; + let start = 0; + + for (let i = 0; i < aStr.length; i++) { + [state, start] = this._update(state, start, i); // treat as flowing -1... + [state, start] = this._canonize(state, start, i+1); // 1-length string + } + }, + + dump: function SuffixTree_show(aState, aIndent, aKey) { + if (aState === undefined) + aState = this._root; + if (aIndent === undefined) { + aIndent = ""; + aKey = "."; + } + + if (aState.isImplicit) { + let snip; + if (aState.length > 10) + snip = this._str.slice(aState.start, + Math.min(aState.start+10, this._str.length)) + "..."; + else + snip = this._str.slice(aState.start, + Math.min(aState.end, this._str.length)); + dump(aIndent + aKey + ":" + snip + "(" + + aState.start + ":" + aState.end + ")\n"); + } + else + dump(aIndent + aKey + ": (explicit:" + aState.start + ":" + aState.end +")\n"); + let nextIndent = aIndent + " "; + let keys = Object.keys(aState).filter(c => c.length == 1); + for (let key of keys) { + this.dump(aState[key], nextIndent, key); + } + } +}; +MultiSuffixTree.prototype = SuffixTree.prototype; + +function examplar() { + let names = ["AndrewSmith", "AndrewJones", "MarkSmith", "BryanClark", + "MarthaJones", "DavidAscher", "DanMosedale", "DavidBienvenu", + "JanetDavis", "JosephBryant"]; + let b = new MultiSuffixTree(names, names); + b.dump(); + dump(b.findMatches("rya") + "\n"); +} diff --git a/mailnews/db/gloda/modules/utils.js b/mailnews/db/gloda/modules/utils.js new file mode 100644 index 000000000..2a095c427 --- /dev/null +++ b/mailnews/db/gloda/modules/utils.js @@ -0,0 +1,155 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['GlodaUtils']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/mailServices.js"); + +/** + * @namespace A holding place for logic that is not gloda-specific and should + * reside elsewhere. + */ +var GlodaUtils = { + + /** + * This Regexp is super-complicated and used at least in two different parts of + * the code, so let's expose it from one single location. + */ + PART_RE: new RegExp("^[^?]+\\?(?:/;section=\\d+\\?)?(?:[^&]+&)*part=([^&]+)(?:&[^&]+)*$"), + + deMime: function gloda_utils_deMime(aString) { + return MailServices.mimeConverter.decodeMimeHeader(aString, null, false, true); + }, + + _headerParser: MailServices.headerParser, + + /** + * Parses an RFC 2822 list of e-mail addresses and returns an object with + * 4 attributes, as described below. We will use the example of the user + * passing an argument of '"Bob Smith" <bob@example.com>'. + * + * This method (by way of nsIMsgHeaderParser) takes care of decoding mime + * headers, but is not aware of folder-level character set overrides. + * + * count: the number of addresses parsed. (ex: 1) + * addresses: a list of e-mail addresses (ex: ["bob@example.com"]) + * names: a list of names (ex: ["Bob Smith"]) + * fullAddresses: aka the list of name and e-mail together (ex: ['"Bob Smith" + * <bob@example.com>']). + * + * This method is a convenience wrapper around nsIMsgHeaderParser. + */ + parseMailAddresses: function gloda_utils_parseMailAddresses(aMailAddresses) { + let addresses = {}, names = {}, fullAddresses = {}; + this._headerParser.parseHeadersWithArray(aMailAddresses, addresses, + names, fullAddresses); + return {names: names.value, addresses: addresses.value, + fullAddresses: fullAddresses.value, + count: names.value.length}; + }, + + /** + * MD5 hash a string and return the hex-string result. Impl from nsICryptoHash + * docs. + */ + md5HashString: function gloda_utils_md5hash(aString) { + let converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]. + createInstance(Ci.nsIScriptableUnicodeConverter); + let trash = {}; + converter.charset = "UTF-8"; + let data = converter.convertToByteArray(aString, trash); + + let hasher = Cc['@mozilla.org/security/hash;1']. + createInstance(Ci.nsICryptoHash); + hasher.init(Ci.nsICryptoHash.MD5); + hasher.update(data, data.length); + let hash = hasher.finish(false); + + // return the two-digit hexadecimal code for a byte + function toHexString(charCode) { + return ("0" + charCode.toString(16)).slice(-2); + } + + // convert the binary hash data to a hex string. + let hex = Object.keys(hash).map(i => toHexString(hash.charCodeAt(i))); + return hex.join(""); + }, + + getCardForEmail: function gloda_utils_getCardForEmail(aAddress) { + // search through all of our local address books looking for a match. + let enumerator = MailServices.ab.directories; + let cardForEmailAddress; + let addrbook; + while (!cardForEmailAddress && enumerator.hasMoreElements()) + { + addrbook = enumerator.getNext().QueryInterface(Ci.nsIAbDirectory); + try + { + cardForEmailAddress = addrbook.cardForEmailAddress(aAddress); + if (cardForEmailAddress) + return cardForEmailAddress; + } catch (ex) {} + } + + return null; + }, + + _FORCE_GC_AFTER_NUM_HEADERS: 4096, + _headersSeen: 0, + /** + * As |forceGarbageCollection| says, once XPConnect sees a header, it likes + * to hold onto that reference. This method is used to track the number of + * headers we have seen and force a GC when we have to. + * + * Ideally the indexer's idle-biased GC mechanism would take care of all the + * GC; we are just a failsafe to make sure that our memory usage is bounded + * based on the number of headers we have seen rather than just time. + * Since holding onto headers can keep databases around too, this also + * helps avoid keeping file handles open, etc. + * + * |forceGarbageCollection| will zero our tracking variable when a GC happens + * so we are informed by the indexer's GC triggering. + * + * And of course, we don't want to trigger collections willy nilly because + * they have a cost even if there is no garbage. + * + * @param aNumHeadersSeen The number of headers code has seen. A granularity + * of hundreds of messages should be fine. + */ + considerHeaderBasedGC: function(aNumHeadersSeen) { + this._headersSeen += aNumHeadersSeen; + if (this._headersSeen >= this._FORCE_GC_AFTER_NUM_HEADERS) + this.forceGarbageCollection(); + }, + + /** + * Force a garbage-collection sweep. Gloda has to force garbage collection + * periodically because XPConnect's XPCJSRuntime::DeferredRelease mechanism + * can end up holding onto a ridiculously high number of XPConnect objects in + * between normal garbage collections. This has mainly posed a problem + * because nsAutolock is a jerk in DEBUG builds in 1.9.1, but in theory this + * also helps us even out our memory usage. + * We also are starting to do this more to try and keep the garbage collection + * durations acceptable. We intentionally avoid triggering the cycle + * collector in those cases, as we do presume a non-trivial fixed cost for + * cycle collection. (And really all we want is XPConnect to not be a jerk.) + * This method exists mainly to centralize our GC activities and because if + * we do start involving the cycle collector, that is a non-trivial block of + * code to copy-and-paste all over the place (at least in a module). + * + * @param aCycleCollecting Do we need the cycle collector to run? Currently + * unused / unimplemented, but we would use + * nsIDOMWindowUtils.garbageCollect() to do so. + */ + forceGarbageCollection: + function gloda_utils_garbageCollection(aCycleCollecting) { + Cu.forceGC(); + this._headersSeen = 0; + } +}; |