diff options
Diffstat (limited to 'mailnews/db/gloda/modules/index_msg.js')
-rw-r--r-- | mailnews/db/gloda/modules/index_msg.js | 3334 |
1 files changed, 3334 insertions, 0 deletions
diff --git a/mailnews/db/gloda/modules/index_msg.js b/mailnews/db/gloda/modules/index_msg.js new file mode 100644 index 000000000..d4d7a8ceb --- /dev/null +++ b/mailnews/db/gloda/modules/index_msg.js @@ -0,0 +1,3334 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +/* + * This file currently contains a fairly general implementation of asynchronous + * indexing with a very explicit message indexing implementation. As gloda + * will eventually want to index more than just messages, the message-specific + * things should ideally lose their special hold on this file. This will + * benefit readability/size as well. + */ + +this.EXPORTED_SYMBOLS = ['GlodaMsgIndexer']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource://gre/modules/XPCOMUtils.jsm"); +Cu.import("resource:///modules/iteratorUtils.jsm"); +Cu.import("resource:///modules/mailServices.js"); +Cu.import("resource:///modules/MailUtils.js"); + +Cu.import("resource:///modules/gloda/log4moz.js"); + +Cu.import("resource:///modules/gloda/utils.js"); +Cu.import("resource:///modules/gloda/datastore.js"); +Cu.import("resource:///modules/gloda/datamodel.js"); +Cu.import("resource:///modules/gloda/gloda.js"); +Cu.import("resource:///modules/gloda/collection.js"); +Cu.import("resource:///modules/gloda/connotent.js"); + +Cu.import("resource:///modules/gloda/indexer.js"); + +Cu.import("resource:///modules/gloda/mimemsg.js"); + +XPCOMUtils.defineLazyServiceGetter(this, "atomService", + "@mozilla.org/atom-service;1", + "nsIAtomService"); + +// Components.results does not have mailnews error codes! +var NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE = 0x80550005; + +var GLODA_MESSAGE_ID_PROPERTY = "gloda-id"; +/** + * Message header property to track dirty status; one of + * |GlodaIndexer.kMessageClean|, |GlodaIndexer.kMessageDirty|, + * |GlodaIndexer.kMessageFilthy|. + */ +var GLODA_DIRTY_PROPERTY = "gloda-dirty"; + +/** + * The sentinel GLODA_MESSAGE_ID_PROPERTY value indicating that a message fails + * to index and we should not bother trying again, at least not until a new + * release is made. + * + * This should ideally just flip between 1 and 2, with GLODA_OLD_BAD_MESSAGE_ID + * flipping in the other direction. If we start having more trailing badness, + * _indexerGetEnumerator and GLODA_OLD_BAD_MESSAGE_ID will need to be altered. + * + * When flipping this, be sure to update glodaTestHelper.js's copy. + */ +var GLODA_BAD_MESSAGE_ID = 2; +/** + * The gloda id we used to use to mark messages as bad, but now should be + * treated as eligible for indexing. This is only ever used for consideration + * when creating msg header enumerators with `_indexerGetEnumerator` which + * means we only will re-index such messages in an indexing sweep. Accordingly + * event-driven indexing will still treat such messages as unindexed (and + * unindexable) until an indexing sweep picks them up. + */ +var GLODA_OLD_BAD_MESSAGE_ID = 1; +var GLODA_FIRST_VALID_MESSAGE_ID = 32; + +var JUNK_SCORE_PROPERTY = "junkscore"; +var JUNK_SPAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_SPAM_SCORE.toString(); +var JUNK_HAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_HAM_SCORE.toString(); + +var nsIArray = Ci.nsIArray; +var nsIMsgFolder = Ci.nsIMsgFolder; +var nsIMsgLocalMailFolder = Ci.nsIMsgLocalMailFolder; +var nsIMsgImapMailFolder = Ci.nsIMsgImapMailFolder; +var nsIMsgDBHdr = Ci.nsIMsgDBHdr; +var nsMsgFolderFlags = Ci.nsMsgFolderFlags; +var nsMsgMessageFlags = Ci.nsMsgMessageFlags; +var nsMsgProcessingFlags = Ci.nsMsgProcessingFlags; + +/** + * The processing flags that tell us that a message header has not yet been + * reported to us via msgsClassified. If it has one of these flags, it is + * still being processed. + */ +var NOT_YET_REPORTED_PROCESSING_FLAGS = + nsMsgProcessingFlags.NotReportedClassified | + nsMsgProcessingFlags.ClassifyJunk; + +// for list comprehension fun +function* range(begin, end) { + for (let i = begin; i < end; ++i) { + yield i; + } +} + +/** + * We do not set properties on the messages until we perform a DB commit; this + * helper class tracks messages that we have indexed but are not yet marked + * as such on their header. + */ +var PendingCommitTracker = { + /** + * Maps message URIs to their gloda ids. + * + * I am not entirely sure why I chose the URI for the key rather than + * gloda folder ID + message key. Most likely it was to simplify debugging + * since the gloda folder ID is opaque while the URI is very informative. It + * is also possible I was afraid of IMAP folder renaming triggering a UID + * renumbering? + */ + _indexedMessagesPendingCommitByKey: {}, + /** + * Map from the pending commit gloda id to a tuple of [the corresponding + * message header, dirtyState]. + */ + _indexedMessagesPendingCommitByGlodaId: {}, + /** + * Do we have a post-commit handler registered with this transaction yet? + */ + _pendingCommit: false, + + /** + * The function gets called when the commit actually happens to flush our + * message id's. + * + * It is very possible that by the time this call happens we have left the + * folder and nulled out msgDatabase on the folder. Since nulling it out + * is what causes the commit, if we set the headers here without somehow + * forcing a commit, we will lose. Badly. + * Accordingly, we make a list of all the folders that the headers belong to + * as we iterate, make sure to re-attach their msgDatabase before forgetting + * the headers, then make sure to zero the msgDatabase again, triggering a + * commit. If there were a way to directly get the nsIMsgDatabase from the + * header we could do that and call commit directly. We don't track + * databases along with the headers since the headers can change because of + * moves and that would increase the number of moving parts. + */ + _commitCallback: function PendingCommitTracker_commitCallback() { + let foldersByURI = {}; + let lastFolder = null; + + for (let glodaId in + PendingCommitTracker._indexedMessagesPendingCommitByGlodaId) { + let [msgHdr, dirtyState] = + PendingCommitTracker._indexedMessagesPendingCommitByGlodaId[glodaId]; + // Mark this message as indexed. + // It's conceivable the database could have gotten blown away, in which + // case the message headers are going to throw exceptions when we try + // and touch them. So we wrap this in a try block that complains about + // this unforeseen circumstance. (noteFolderDatabaseGettingBlownAway + // should have been called and avoided this situation in all known + // situations.) + try { + let curGlodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + if (curGlodaId != glodaId) + msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, glodaId); + let headerDirty = msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); + if (headerDirty != dirtyState) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, dirtyState); + + // Make sure this folder is in our foldersByURI map. + if (lastFolder == msgHdr.folder) + continue; + lastFolder = msgHdr.folder; + let folderURI = lastFolder.URI; + if (!(folderURI in foldersByURI)) + foldersByURI[folderURI] = lastFolder; + } + catch (ex) { + GlodaMsgIndexer._log.error( + "Exception while attempting to mark message with gloda state after" + + "db commit", ex); + } + } + + // it is vitally important to do this before we forget about the headers! + for (let uri in foldersByURI) { + let folder = foldersByURI[uri]; + // This will not cause a parse. The database is in-memory since we have + // a header that belongs to it. This just causes the folder to + // re-acquire a reference from the database manager. + let ignoredDb = folder.msgDatabase; + // And this will cause a commit. (And must be done since we don't want + // to cause a leak.) + folder.msgDatabase = null; + } + + PendingCommitTracker._indexedMessagesPendingCommitByGlodaId = {}; + PendingCommitTracker._indexedMessagesPendingCommitByKey = {}; + + PendingCommitTracker._pendingCommit = false; + }, + + /** + * Track a message header that should be marked with the given gloda id when + * the database commits. + */ + track: function PendingCommitTracker_track(aMsgHdr, aGlodaId) { + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + this._indexedMessagesPendingCommitByKey[pendingKey] = aGlodaId; + this._indexedMessagesPendingCommitByGlodaId[aGlodaId] = + [aMsgHdr, GlodaMsgIndexer.kMessageClean]; + + if (!this._pendingCommit) { + GlodaDatastore.runPostCommit(this._commitCallback); + this._pendingCommit = true; + } + }, + + /** + * Get the current state of a message header given that we cannot rely on just + * looking at the header's properties because we defer setting those + * until the SQLite commit happens. + * + * @return Tuple of [gloda id, dirty status]. + */ + getGlodaState: + function PendingCommitTracker_getGlodaState(aMsgHdr) { + // If it's in the pending commit table, then the message is basically + // clean. Return that info. + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + if (pendingKey in this._indexedMessagesPendingCommitByKey) { + let glodaId = + PendingCommitTracker._indexedMessagesPendingCommitByKey[pendingKey]; + return [glodaId, this._indexedMessagesPendingCommitByGlodaId[glodaId][1]]; + } + else { + // Otherwise the header's concept of state is correct. + let glodaId = aMsgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + let glodaDirty = aMsgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); + return [glodaId, glodaDirty]; + } + }, + + /** + * Update our structure to reflect moved headers. Moves are currently + * treated as weakly interesting and do not require a reindexing + * although collections will get notified. So our job is to to fix-up + * the pending commit information if the message has a pending commit. + */ + noteMove: function PendingCommitTracker_noteMove(aOldHdr, aNewHdr) { + let oldKey = aOldHdr.folder.URI + "#" + aOldHdr.messageKey; + if (!(oldKey in this._indexedMessagesPendingCommitByKey)) + return; + + let glodaId = this._indexedMessagesPendingCommitByKey[oldKey]; + delete this._indexedMessagesPendingCommitByKey[oldKey]; + + let newKey = aNewHdr.folder.URI + "#" + aNewHdr.messageKey; + this._indexedMessagesPendingCommitByKey[newKey] = glodaId; + + // only clobber the header, not the dirty state + this._indexedMessagesPendingCommitByGlodaId[glodaId][0] = aNewHdr; + }, + + /** + * A blind move is one where we have the source header but not the destination + * header. This happens for IMAP messages that do not involve offline fake + * headers. + * XXX Since IMAP moves will propagate the gloda-id/gloda-dirty bits for us, + * we could detect the other side of the move when it shows up as a + * msgsClassified event and restore the mapping information. Since the + * offline fake header case should now cover the bulk of IMAP move + * operations, we probably do not need to pursue this. + * + * We just re-dispatch to noteDirtyHeader because we can't do anything more + * clever. + */ + noteBlindMove: function PendingCommitTracker_noteBlindMove(aOldHdr) { + this.noteDirtyHeader(aOldHdr); + }, + + /** + * If a message is dirty we should stop tracking it for post-commit + * purposes. This is not because we don't want to write to its header + * when we commit as much as that we want to avoid |getHeaderGlodaState| + * reporting that the message is clean. We could complicate our state + * by storing that information, but this is easier and ends up the same + * in the end. + */ + noteDirtyHeader: function PendingCommitTracker_noteDirtyHeader(aMsgHdr) { + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + if (!(pendingKey in this._indexedMessagesPendingCommitByKey)) + return; + + // (It is important that we get the gloda id from our own structure!) + let glodaId = this._indexedMessagesPendingCommitByKey[pendingKey]; + this._indexedMessagesPendingCommitByGlodaId[glodaId][1] = + GlodaMsgIndexer.kMessageDirty; + }, + + /** + * Sometimes a folder database gets blown away. This happens for one of two + * expected reasons right now: + * - Folder compaction. + * - Explicit reindexing of a folder via the folder properties "rebuild index" + * button. + * + * When this happens, we are basically out of luck and need to discard + * everything about the folder. The good news is that the folder compaction + * pass is clever enough to re-establish the linkages that are being lost + * when we drop these things on the floor. Reindexing of a folder is not + * clever enough to deal with this but is an exceptional case of last resort + * (the user should not normally be performing a reindex as part of daily + * operation), so we accept that messages may be redundantly indexed. + */ + noteFolderDatabaseGettingBlownAway: + function PendingCommitTracker_noteFolderDatabaseGettingBlownAway( + aMsgFolder) { + let uri = aMsgFolder.URI + "#"; + for (let key in Iterator(this._indexedMessagesPendingCommitByKey, true)) { + // this is not as efficient as it could be, but compaction is relatively + // rare and the number of pending headers is generally going to be + // small. + if (key.indexOf(uri) == 0) { + delete this._indexedMessagesPendingCommitByKey[key]; + } + } + }, +}; + +/** + * This callback handles processing the asynchronous query results of + * |GlodaMsgIndexer.getMessagesByMessageID|. + */ +function MessagesByMessageIdCallback(aMsgIDToIndex, aResults, + aCallback, aCallbackThis) { + this.msgIDToIndex = aMsgIDToIndex; + this.results = aResults; + this.callback = aCallback; + this.callbackThis = aCallbackThis; +} + +MessagesByMessageIdCallback.prototype = { + _log: Log4Moz.repository.getLogger("gloda.index_msg.mbm"), + + onItemsAdded: function gloda_ds_mbmi_onItemsAdded(aItems, aCollection) { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) + return; + + this._log.debug("getting results..."); + for (let message of aItems) { + this.results[this.msgIDToIndex[message.headerMessageID]].push(message); + } + }, + onItemsModified: function () {}, + onItemsRemoved: function () {}, + onQueryCompleted: function gloda_ds_mbmi_onQueryCompleted(aCollection) { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) + return; + + if (this._log.level <= Log4Moz.Level.Debug) + this._log.debug("query completed, notifying... " + this.results); + + this.callback.call(this.callbackThis, this.results); + } +}; + + +/** + * The message indexer! + * + * === Message Indexing Strategy + * To these ends, we implement things like so: + * + * Mesage State Tracking + * - We store a property on all indexed headers indicating their gloda message + * id. This allows us to tell whether a message is indexed from the header, + * without having to consult the SQL database. + * - When we receive an event that indicates that a message's meta-data has + * changed and gloda needs to re-index the message, we set a property on the + * header that indicates the message is dirty. This property can indicate + * that the message needs to be re-indexed but the gloda-id is valid (dirty) + * or that the message's gloda-id is invalid (filthy) because the gloda + * database has been blown away. + * - We track whether a folder is up-to-date on our GlodaFolder representation + * using a concept of dirtiness, just like messages. Like messages, a folder + * can be dirty or filthy. A dirty folder has at least one dirty message in + * it which means we should scan the folder. A filthy folder means that + * every message in the folder should be considered filthy. Folders start + * out filthy when Gloda is first told about them indicating we cannot + * trust any of the gloda-id's in the folders. Filthy folders are downgraded + * to dirty folders after we mark all of the headers with gloda-id's filthy. + * + * Indexing Message Control + * - We index the headers of all IMAP messages. We index the bodies of all IMAP + * messages that are offline. We index all local messages. We plan to avoid + * indexing news messages. + * - We would like a way to express desires about indexing that either don't + * confound offline storage with indexing, or actually allow some choice. + * + * Indexing Messages + * - We have two major modes of indexing: sweep and event-driven. When we + * start up we kick off an indexing sweep. We use event-driven indexing + * as we receive events for eligible messages, but if we get too many + * events we start dropping them on the floor and just flag that an indexing + * sweep is required. + * - The sweep initiates folder indexing jobs based on the priorities assigned + * to folders. Folder indexing uses a filtered message enumerator to find + * messages that need to be indexed, minimizing wasteful exposure of message + * headers to XPConnect that we would not end up indexing. + * - For local folders, we use GetDatabaseWithReparse to ensure that the .msf + * file exists. For IMAP folders, we simply use GetDatabase because we know + * the auto-sync logic will make sure that the folder is up-to-date and we + * want to avoid creating problems through use of updateFolder. + * + * Junk Mail + * - We do not index junk. We do not index messages until the junk/non-junk + * determination has been made. If a message gets marked as junk, we act like + * it was deleted. + * - We know when a message is actively queued for junk processing thanks to + * folder processing flags. nsMsgDBFolder::CallFilterPlugins does this + * prior to initiating spam processing. Unfortunately, this method does not + * get called until after we receive the notification about the existence of + * the header. How long after can vary on different factors. The longest + * delay is in the IMAP case where there is a filter that requires the + * message body to be present; the method does not get called until all the + * bodies are downloaded. + * + */ +var GlodaMsgIndexer = { + /** + * A partial attempt to generalize to support multiple databases. Each + * database would have its own datastore would have its own indexer. But + * we rather inter-mingle our use of this field with the singleton global + * GlodaDatastore. + */ + _datastore: GlodaDatastore, + _log: Log4Moz.repository.getLogger("gloda.index_msg"), + + _junkService: MailServices.junk, + + name: "index_msg", + /** + * Are we enabled, read: are we processing change events? + */ + _enabled: false, + get enabled() { return this._enabled; }, + + enable: function msg_indexer_enable() { + // initialize our listeners' this pointers + this._databaseAnnouncerListener.indexer = this; + this._msgFolderListener.indexer = this; + + // register for: + // - folder loaded events, so we know when getDatabaseWithReparse has + // finished updating the index/what not (if it was't immediately + // available) + // - property changes (so we know when a message's read/starred state have + // changed.) + this._folderListener._init(this); + MailServices.mailSession.AddFolderListener(this._folderListener, + Ci.nsIFolderListener.intPropertyChanged | + Ci.nsIFolderListener.propertyFlagChanged | + Ci.nsIFolderListener.event); + + MailServices.mfn.addListener(this._msgFolderListener, + // note: intentionally no msgAdded notification is requested. + Ci.nsIMsgFolderNotificationService.msgsClassified | + Ci.nsIMsgFolderNotificationService.msgsDeleted | + Ci.nsIMsgFolderNotificationService.msgsMoveCopyCompleted | + Ci.nsIMsgFolderNotificationService.msgKeyChanged | + Ci.nsIMsgFolderNotificationService.folderAdded | + Ci.nsIMsgFolderNotificationService.folderDeleted | + Ci.nsIMsgFolderNotificationService.folderMoveCopyCompleted | + Ci.nsIMsgFolderNotificationService.folderRenamed | + Ci.nsIMsgFolderNotificationService.itemEvent); + + this._enabled = true; + + this._considerSchemaMigration(); + + this._log.info("Event-Driven Indexing is now " + this._enabled); + }, + disable: function msg_indexer_disable() { + // remove FolderLoaded notification listener + MailServices.mailSession.RemoveFolderListener(this._folderListener); + + MailServices.mfn.removeListener(this._msgFolderListener); + + this._indexerLeaveFolder(); // nop if we aren't "in" a folder + + this._enabled = false; + + this._log.info("Event-Driven Indexing is now " + this._enabled); + }, + + /** + * Indicates that we have pending deletions to process, meaning that there + * are gloda message rows flagged for deletion. If this value is a boolean, + * it means the value is known reliably. If this value is null, it means + * that we don't know, likely because we have started up and have not checked + * the database. + */ + pendingDeletions: null, + + /** + * The message (or folder state) is believed up-to-date. + */ + kMessageClean: 0, + /** + * The message (or folder) is known to not be up-to-date. In the case of + * folders, this means that some of the messages in the folder may be dirty. + * However, because of the way our indexing works, it is possible there may + * actually be no dirty messages in a folder. (We attempt to process + * messages in an event-driven fashion for a finite number of messages, but + * because we can quit without completing processing of the queue, we need to + * mark the folder dirty, just-in-case.) (We could do some extra leg-work + * and do a better job of marking the folder clean again.) + */ + kMessageDirty: 1, + /** + * We have not indexed the folder at all, but messages in the folder think + * they are indexed. We downgrade the folder to just kMessageDirty after + * marking all the messages in the folder as dirty. We do this so that if we + * have to stop indexing the folder we can still build on our progress next + * time we enter the folder. + * We mark all folders filthy when (re-)creating the database because there + * may be previous state left over from an earlier database. + */ + kMessageFilthy: 2, + + /** + * A message addition job yet to be (completely) processed. Since message + * addition events come to us one-by-one, in order to aggregate them into a + * job, we need something like this. It's up to the indexing loop to + * decide when to null this out; it can either do it when it first starts + * processing it, or when it has processed the last thing. It's really a + * question of whether we want retrograde motion in the folder progress bar + * or the message progress bar. + */ + _pendingAddJob: null, + + /** + * The number of messages that we should queue for processing before letting + * them fall on the floor and relying on our folder-walking logic to ensure + * that the messages are indexed. + * The reason we allow for queueing messages in an event-driven fashion is + * that once we have reached a steady-state, it is preferable to be able to + * deal with new messages and modified meta-data in a prompt fasion rather + * than having to (potentially) walk every folder in the system just to find + * the message that the user changed the tag on. + */ + _indexMaxEventQueueMessages: 20, + + /** + * Unit testing hook to get us to emit additional logging that verges on + * inane for general usage but is helpful in unit test output to get a lay + * of the land and for paranoia reasons. + */ + _unitTestSuperVerbose: false, + + /** The GlodaFolder corresponding to the folder we are indexing. */ + _indexingGlodaFolder: null, + /** The nsIMsgFolder we are currently indexing. */ + _indexingFolder: null, + /** The nsIMsgDatabase we are currently indexing. */ + _indexingDatabase: null, + /** + * The iterator we are using to iterate over the headers in + * this._indexingDatabase. + */ + _indexingIterator: null, + + /** folder whose entry we are pending on */ + _pendingFolderEntry: null, + + // copy-down the work constants from Gloda + kWorkSync: Gloda.kWorkSync, + kWorkAsync: Gloda.kWorkAsync, + kWorkDone: Gloda.kWorkDone, + kWorkPause: Gloda.kWorkPause, + kWorkDoneWithResult: Gloda.kWorkDoneWithResult, + + /** + * Async common logic that we want to deal with the given folder ID. Besides + * cutting down on duplicate code, this ensures that we are listening on + * the folder in case it tries to go away when we are using it. + * + * @return true when the folder was successfully entered, false when we need + * to pend on notification of updating of the folder (due to re-parsing + * or what have you). In the event of an actual problem, an exception + * will escape. + */ + _indexerEnterFolder: function gloda_index_indexerEnterFolder(aFolderID) { + // leave the folder if we haven't explicitly left it. + if (this._indexingFolder !== null) { + this._indexerLeaveFolder(); + } + + this._indexingGlodaFolder = GlodaDatastore._mapFolderID(aFolderID); + this._indexingFolder = this._indexingGlodaFolder.getXPCOMFolder( + this._indexingGlodaFolder.kActivityIndexing); + + if (this._indexingFolder) + this._log.debug("Entering folder: " + this._indexingFolder.URI); + + try { + // The msf may need to be created or otherwise updated for local folders. + // This may require yielding until such time as the msf has been created. + try { + if (this._indexingFolder instanceof nsIMsgLocalMailFolder) { + this._indexingDatabase = + this._indexingFolder.getDatabaseWithReparse(null, + null); + } + // we need do nothing special for IMAP, news, or other + } + // getDatabaseWithReparse can return either NS_ERROR_NOT_INITIALIZED or + // NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE if the net result is that it + // is going to send us a notification when the reparse has completed. + // (note that although internally NS_MSG_ERROR_FOLDER_SUMMARY_MISSING + // might get flung around, it won't make it out to us, and will instead + // be permuted into an NS_ERROR_NOT_INITIALIZED.) + catch (e) { + if ((e.result == Cr.NS_ERROR_NOT_INITIALIZED) || + (e.result == NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE)) { + // this means that we need to pend on the update; the listener for + // FolderLoaded events will call _indexerCompletePendingFolderEntry. + this._log.debug("Pending on folder load..."); + this._pendingFolderEntry = this._indexingFolder; + return this.kWorkAsync; + } else { + throw e; + } + } + // we get an nsIMsgDatabase out of this (unsurprisingly) which + // explicitly inherits from nsIDBChangeAnnouncer, which has the + // AddListener call we want. + if (this._indexingDatabase == null) + this._indexingDatabase = this._indexingFolder.msgDatabase; + this._indexingDatabase.AddListener(this._databaseAnnouncerListener); + } + catch (ex) { + this._log.error("Problem entering folder: " + + (this._indexingFolder ? + this._indexingFolder.prettiestName : "unknown") + + ", skipping. Error was: " + ex.fileName + ":" + + ex.lineNumber + ": " + ex); + this._indexingGlodaFolder.indexing = false; + this._indexingFolder = null; + this._indexingGlodaFolder = null; + this._indexingDatabase = null; + this._indexingEnumerator = null; + + // re-throw, we just wanted to make sure this junk is cleaned up and + // get localized error logging... + throw ex; + } + + return this.kWorkSync; + }, + + /** + * If the folder was still parsing/updating when we tried to enter, then this + * handler will get called by the listener who got the FolderLoaded message. + * All we need to do is get the database reference, register a listener on + * the db, and retrieve an iterator if desired. + */ + _indexerCompletePendingFolderEntry: + function gloda_indexer_indexerCompletePendingFolderEntry() { + this._indexingDatabase = this._indexingFolder.msgDatabase; + this._indexingDatabase.AddListener(this._databaseAnnouncerListener); + this._log.debug("...Folder Loaded!"); + + // the load is no longer pending; we certainly don't want more notifications + this._pendingFolderEntry = null; + // indexerEnterFolder returned kWorkAsync, which means we need to notify + // the callback driver to get things going again. + GlodaIndexer.callbackDriver(); + }, + + /** + * Enumerate all messages in the folder. + */ + kEnumAllMsgs: 0, + /** + * Enumerate messages that look like they need to be indexed. + */ + kEnumMsgsToIndex: 1, + /** + * Enumerate messages that are already indexed. + */ + kEnumIndexedMsgs: 2, + + /** + * Synchronous helper to get an enumerator for the current folder (as found + * in |_indexingFolder|. + * + * @param aEnumKind One of |kEnumAllMsgs|, |kEnumMsgsToIndex|, or + * |kEnumIndexedMsgs|. + * @param [aAllowPreBadIds=false] Only valid for |kEnumIndexedMsgs|, tells us + * that we should treat message with any gloda-id as dirty, not just + * messages that have non-bad message id's. + */ + _indexerGetEnumerator: function gloda_indexer_indexerGetEnumerator( + aEnumKind, aAllowPreBadIds) { + if (aEnumKind == this.kEnumMsgsToIndex) { + // We need to create search terms for messages to index. Messages should + // be indexed if they're indexable (local or offline and not expunged) + // and either: haven't been indexed, are dirty, or are marked with with + // a former GLODA_BAD_MESSAGE_ID that is no longer our bad marker. (Our + // bad marker can change on minor schema revs so that we can try and + // reindex those messages exactly once and without needing to go through + // a pass to mark them as needing one more try.) + // The basic search expression is: + // ((GLODA_MESSAGE_ID_PROPERTY Is 0) || + // (GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID) || + // (GLODA_DIRTY_PROPERTY Isnt 0)) && + // (JUNK_SCORE_PROPERTY Isnt 100) + // If the folder !isLocal we add the terms: + // - if the folder is offline -- && (Status Is nsMsgMessageFlags.Offline) + // - && (Status Isnt nsMsgMessageFlags.Expunged) + + let searchSession = Cc["@mozilla.org/messenger/searchSession;1"] + .createInstance(Ci.nsIMsgSearchSession); + let searchTerms = Cc["@mozilla.org/array;1"] + .createInstance(Ci.nsIMutableArray); + let isLocal = this._indexingFolder instanceof nsIMsgLocalMailFolder; + + searchSession.addScopeTerm(Ci.nsMsgSearchScope.offlineMail, + this._indexingFolder); + let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; + let nsMsgSearchOp = Ci.nsMsgSearchOp; + + // first term: (GLODA_MESSAGE_ID_PROPERTY Is 0 + let searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // actually don't care here + searchTerm.beginsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Is; + let value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = 0; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // second term: || GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // OR + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Is; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = GLODA_OLD_BAD_MESSAGE_ID; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // third term: || GLODA_DIRTY_PROPERTY Isnt 0 ) + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; + searchTerm.endsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = 0; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // JUNK_SCORE_PROPERTY Isnt 100 + // For symmetry with our event-driven stuff, we just directly deal with + // the header property. + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.str = JUNK_SPAM_SCORE_STR; + searchTerm.value = value; + searchTerm.hdrProperty = JUNK_SCORE_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + if (!isLocal) + { + // If the folder is offline, then the message should be too + if (this._indexingFolder.flags & Ci.nsMsgFolderFlags.Offline) { + // third term: && Status Is nsMsgMessageFlags.Offline + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; + searchTerm.op = nsMsgSearchOp.Is; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = nsMsgMessageFlags.Offline; + searchTerm.value = value; + searchTerms.appendElement(searchTerm, false); + } + + // fourth term: && Status Isnt nsMsgMessageFlags.Expunged + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = nsMsgMessageFlags.Expunged; + searchTerm.value = value; + searchTerms.appendElement(searchTerm, false); + } + + this._indexingEnumerator = + this._indexingDatabase.getFilterEnumerator(searchTerms, true); + } + else if (aEnumKind == this.kEnumIndexedMsgs) { + // Enumerate only messages that are already indexed. This comes out to: + // ((GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1) && + // (GLODA_DIRTY_PROPERTY Isnt kMessageFilthy)) + // In English, a message is indexed if (by clause): + // 1) The message has a gloda-id and that gloda-id is in the valid range + // (and not in the bad message marker range). + // 2) The message has not been marked filthy (which invalidates the + // gloda-id.) We also assume that the folder would not have been + // entered at all if it was marked filthy. + let searchSession = Cc["@mozilla.org/messenger/searchSession;1"] + .createInstance(Ci.nsIMsgSearchSession); + let searchTerms = Cc["@mozilla.org/array;1"] + .createInstance(Ci.nsIMutableArray); + + searchSession.addScopeTerm(Ci.nsMsgSearchScope.offlineMail, + this._indexingFolder); + let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; + let nsMsgSearchOp = Ci.nsMsgSearchOp; + + // first term: (GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1 + let searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // actually don't care here + searchTerm.beginsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + // use != 0 if we're allow pre-bad ids. + searchTerm.op = aAllowPreBadIds ? nsMsgSearchOp.Isnt + : nsMsgSearchOp.IsGreaterThan; + let value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = aAllowPreBadIds ? 0 : (GLODA_FIRST_VALID_MESSAGE_ID - 1); + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // second term: && GLODA_DIRTY_PROPERTY Isnt kMessageFilthy) + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.endsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = this.kMessageFilthy; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; + searchTerms.appendElement(searchTerm, false); + + // The use-case of already indexed messages does not want them reversed; + // we care about seeing the message keys in order. + this._indexingEnumerator = + this._indexingDatabase.getFilterEnumerator(searchTerms, false); + } + else if (aEnumKind == this.kEnumAllMsgs) { + this._indexingEnumerator = + this._indexingDatabase.ReverseEnumerateMessages(); + } + else { + throw new Error("Unknown enumerator type requested:" + aEnumKind); + } + }, + + _indexerLeaveFolder: function gloda_index_indexerLeaveFolder() { + if (this._indexingFolder !== null) { + if (this._indexingDatabase) { + this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit); + // remove our listener! + this._indexingDatabase.RemoveListener(this._databaseAnnouncerListener); + } + // let the gloda folder know we are done indexing + this._indexingGlodaFolder.indexing = false; + // null everyone out + this._indexingFolder = null; + this._indexingGlodaFolder = null; + this._indexingDatabase = null; + this._indexingEnumerator = null; + } + }, + + /** + * Event fed to us by our nsIFolderListener when a folder is loaded. We use + * this event to know when a folder we were trying to open to index is + * actually ready to be indexed. (The summary may have not existed, may have + * been out of date, or otherwise.) + * + * @param aFolder An nsIMsgFolder, already QI'd. + */ + _onFolderLoaded: function gloda_index_onFolderLoaded(aFolder) { + if ((this._pendingFolderEntry !== null) && + (aFolder.URI == this._pendingFolderEntry.URI)) + this._indexerCompletePendingFolderEntry(); + }, + + // it's a getter so we can reference 'this'. we could memoize. + get workers() { + return [ + ["folderSweep", { + worker: this._worker_indexingSweep, + jobCanceled: this._cleanup_indexingSweep, + cleanup: this._cleanup_indexingSweep, + }], + ["folder", { + worker: this._worker_folderIndex, + recover: this._recover_indexMessage, + cleanup: this._cleanup_indexing, + }], + ["folderCompact", { + worker: this._worker_folderCompactionPass, + // compaction enters the folder so needs to know how to leave + cleanup: this._cleanup_indexing, + }], + ["message", { + worker: this._worker_messageIndex, + onSchedule: this._schedule_messageIndex, + jobCanceled: this._canceled_messageIndex, + recover: this._recover_indexMessage, + cleanup: this._cleanup_indexing, + }], + ["delete", { + worker: this._worker_processDeletes, + }], + + ["fixMissingContacts", { + worker: this._worker_fixMissingContacts, + }], + ]; + }, + + _schemaMigrationInitiated: false, + _considerSchemaMigration: function() { + if (!this._schemaMigrationInitiated && + GlodaDatastore._actualSchemaVersion === 26) { + let job = new IndexingJob("fixMissingContacts", null); + GlodaIndexer.indexJob(job); + this._schemaMigrationInitiated = true; + } + }, + + initialSweep: function() { + this.indexingSweepNeeded = true; + }, + + _indexingSweepActive: false, + /** + * Indicate that an indexing sweep is desired. We kick-off an indexing + * sweep at start-up and whenever we receive an event-based notification + * that we either can't process as an event or that we normally handle + * during the sweep pass anyways. + */ + set indexingSweepNeeded(aNeeded) { + if (!this._indexingSweepActive && aNeeded) { + let job = new IndexingJob("folderSweep", null); + job.mappedFolders = false; + GlodaIndexer.indexJob(job); + this._indexingSweepActive = true; + } + }, + + /** + * Performs the folder sweep, locating folders that should be indexed, and + * creating a folder indexing job for them, and rescheduling itself for + * execution after that job is completed. Once it indexes all the folders, + * if we believe we have deletions to process (or just don't know), it kicks + * off a deletion processing job. + * + * Folder traversal logic is based off the spotlight/vista indexer code; we + * retrieve the list of servers and folders each time want to find a new + * folder to index. This avoids needing to maintain a perfect model of the + * folder hierarchy at all times. (We may eventually want to do that, but + * this is sufficient and safe for now.) Although our use of dirty flags on + * the folders allows us to avoid tracking the 'last folder' we processed, + * we do so to avoid getting 'trapped' in a folder with a high rate of + * changes. + */ + _worker_indexingSweep: function* gloda_worker_indexingSweep(aJob) { + if (!aJob.mappedFolders) { + // Walk the folders and make sure all the folders we would want to index + // are mapped. Build up a list of GlodaFolders as we go, so that we can + // sort them by their indexing priority. + let foldersToProcess = aJob.foldersToProcess = []; + + let allFolders = MailServices.accounts.allFolders; + for (let folder in fixIterator(allFolders, Ci.nsIMsgFolder)) { + if (this.shouldIndexFolder(folder)) + foldersToProcess.push(Gloda.getFolderForFolder(folder)); + } + + // sort the folders by priority (descending) + foldersToProcess.sort(function (a, b) { + return b.indexingPriority - a.indexingPriority; + }); + + aJob.mappedFolders = true; + } + + // -- process the folders (in sorted order) + while (aJob.foldersToProcess.length) { + let glodaFolder = aJob.foldersToProcess.shift(); + // ignore folders that: + // - have been deleted out of existence! + // - are not dirty/have not been compacted + // - are actively being compacted + if (glodaFolder._deleted || + (!glodaFolder.dirtyStatus && !glodaFolder.compacted) || + glodaFolder.compacting) + continue; + + // If the folder is marked as compacted, give it a compaction job. + if (glodaFolder.compacted) + GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id)); + + // add a job for the folder indexing if it was dirty + if (glodaFolder.dirtyStatus) + GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); + + // re-schedule this job (although this worker will die) + GlodaIndexer.indexJob(aJob); + yield this.kWorkDone; + } + + // consider deletion + if (this.pendingDeletions || this.pendingDeletions === null) + GlodaIndexer.indexJob(new IndexingJob("delete", null)); + + // we don't have any more work to do... + this._indexingSweepActive = false; + yield this.kWorkDone; + }, + + /** + * The only state we need to cleanup is that there is no longer an active + * indexing sweep. + */ + _cleanup_indexingSweep: function gloda_canceled_indexingSweep(aJob) { + this._indexingSweepActive = false; + }, + + /** + * The number of headers to look at before yielding with kWorkSync. This + * is for time-slicing purposes so we still yield to the UI periodically. + */ + HEADER_CHECK_SYNC_BLOCK_SIZE: 25, + + /** + * The number of headers to look at before calling + */ + HEADER_CHECK_GC_BLOCK_SIZE: 256, + + FOLDER_COMPACTION_PASS_BATCH_SIZE: 512, + /** + * Special indexing pass for (local) folders than have been compacted. The + * compaction can cause message keys to change because message keys in local + * folders are simply offsets into the mbox file. Accordingly, we need to + * update the gloda records/objects to point them at the new message key. + * + * Our general algorithm is to perform two traversals in parallel. The first + * is a straightforward enumeration of the message headers in the folder that + * apparently have been already indexed. These provide us with the message + * key and the "gloda-id" property. + * The second is a list of tuples containing a gloda message id, its current + * message key per the gloda database, and the message-id header. We re-fill + * the list with batches on-demand. This allows us to both avoid dispatching + * needless UPDATEs as well as deal with messages that were tracked by the + * PendingCommitTracker but were discarded by the compaction notification. + * + * We end up processing two streams of gloda-id's and some extra info. In + * the normal case we expect these two streams to line up exactly and all + * we need to do is update the message key if it has changed. + * + * There are a few exceptional cases where things do not line up: + * 1) The gloda database knows about a message that the enumerator does not + * know about... + * a) This message exists in the folder (identified using its message-id + * header). This means the message got indexed but PendingCommitTracker + * had to forget about the info when the compaction happened. We + * re-establish the link and track the message in PendingCommitTracker + * again. + * b) The message does not exist in the folder. This means the message got + * indexed, PendingCommitTracker had to forget about the info, and + * then the message either got moved or deleted before now. We mark + * the message as deleted; this allows the gloda message to be reused + * if the move target has not yet been indexed or purged if it already + * has been and the gloda message is a duplicate. And obviously, if the + * event that happened was actually a delete, then the delete is the + * right thing to do. + * 2) The enumerator knows about a message that the gloda database does not + * know about. This is unexpected and should not happen. We log a + * warning. We are able to differentiate this case from case #1a by + * retrieving the message header associated with the next gloda message + * (using the message-id header per 1a again). If the gloda message's + * message key is after the enumerator's message key then we know this is + * case #2. (It implies an insertion in the enumerator stream which is how + * we define the unexpected case.) + * + * Besides updating the database rows, we also need to make sure that + * in-memory representations are updated. Immediately after dispatching + * UPDATE changes to the database we use the same set of data to walk the + * live collections and update any affected messages. We are then able to + * discard the information. Although this means that we will have to + * potentially walk the live collections multiple times, unless something + * has gone horribly wrong, the number of collections should be reasonable + * and the lookups are cheap. We bias batch sizes accordingly. + * + * Because we operate based on chunks we need to make sure that when we + * actually deal with multiple chunks that we don't step on our own feet with + * our database updates. Since compaction of message key K results in a new + * message key K' such that K' <= K, we can reliably issue database + * updates for all values <= K. Which means our feet are safe no matter + * when we issue the update command. For maximum cache benefit, we issue + * our updates prior to our new query since they should still be maximally + * hot at that point. + */ + _worker_folderCompactionPass: + function* gloda_worker_folderCompactionPass(aJob, aCallbackHandle) { + yield this._indexerEnterFolder(aJob.id); + + // It's conceivable that with a folder sweep we might end up trying to + // compact a folder twice. Bail early in this case. + if (!this._indexingGlodaFolder.compacted) + yield this.kWorkDone; + + // this is a forward enumeration (sometimes we reverse enumerate; not here) + this._indexerGetEnumerator(this.kEnumIndexedMsgs); + + const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; + const HEADER_CHECK_GC_BLOCK_SIZE = this.HEADER_CHECK_GC_BLOCK_SIZE; + const FOLDER_COMPACTION_PASS_BATCH_SIZE = + this.FOLDER_COMPACTION_PASS_BATCH_SIZE; + + // Tuples of [gloda id, message key, message-id header] from + // folderCompactionPassBlockFetch + let glodaIdsMsgKeysHeaderIds = []; + // Unpack each tuple from glodaIdsMsgKeysHeaderIds into these guys. + // (Initialize oldMessageKey because we use it to kickstart our query.) + let oldGlodaId, oldMessageKey = -1, oldHeaderMessageId; + // parallel lists of gloda ids and message keys to pass to + // GlodaDatastore.updateMessageLocations + let updateGlodaIds = []; + let updateMessageKeys = []; + // list of gloda id's to mark deleted + let deleteGlodaIds = []; + let exceptionalMessages = {}; + + // for GC reasons we need to track the number of headers seen + let numHeadersSeen = 0; + + // We are consuming two lists; our loop structure has to reflect that. + let headerIter = Iterator(fixIterator(this._indexingEnumerator, + nsIMsgDBHdr)); + let mayHaveMoreGlodaMessages = true; + let keepIterHeader = false; + let keepGlodaTuple = false; + let msgHdr = null; + while (headerIter || mayHaveMoreGlodaMessages) { + let glodaId; + if (headerIter) { + try { + if (!keepIterHeader) + msgHdr = headerIter.next(); + else + keepIterHeader = false; + } + catch (ex) { + if (ex instanceof StopIteration) { + headerIter = null; + msgHdr = null; + // do the loop check again + continue; + } else { + throw ex; + } + } + } + + if (msgHdr) { + numHeadersSeen++; + if (numHeadersSeen % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) + yield this.kWorkSync; + + if (numHeadersSeen % HEADER_CHECK_GC_BLOCK_SIZE == 0) + GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); + + // There is no need to check with PendingCommitTracker. If a message + // somehow got indexed between the time the compaction killed + // everything and the time we run, that is a bug. + glodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + // (there is also no need to check for gloda dirty since the enumerator + // filtered that for us.) + } + + // get more [gloda id, message key, message-id header] tuples if out + if (!glodaIdsMsgKeysHeaderIds.length && mayHaveMoreGlodaMessages) { + // Since we operate on blocks, getting a new block implies we should + // flush the last block if applicable. + if (updateGlodaIds.length) { + GlodaDatastore.updateMessageLocations(updateGlodaIds, + updateMessageKeys, + aJob.id, true); + updateGlodaIds = []; + updateMessageKeys = []; + } + + if (deleteGlodaIds.length) { + GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); + deleteGlodaIds = []; + } + + GlodaDatastore.folderCompactionPassBlockFetch( + aJob.id, oldMessageKey + 1, FOLDER_COMPACTION_PASS_BATCH_SIZE, + aCallbackHandle.wrappedCallback); + glodaIdsMsgKeysHeaderIds = yield this.kWorkAsync; + // Reverse so we can use pop instead of shift and I don't need to be + // paranoid about performance. + glodaIdsMsgKeysHeaderIds.reverse(); + + if (!glodaIdsMsgKeysHeaderIds.length) { + mayHaveMoreGlodaMessages = false; + + // We shouldn't be in the loop anymore if headerIter is dead now. + if (!headerIter) + break; + } + } + + if (!keepGlodaTuple) { + if (mayHaveMoreGlodaMessages) + [oldGlodaId, oldMessageKey, oldHeaderMessageId] = + glodaIdsMsgKeysHeaderIds.pop(); + else + oldGlodaId = oldMessageKey = oldHeaderMessageId = null; + } + else { + keepGlodaTuple = false; + } + + // -- normal expected case + if (glodaId == oldGlodaId) { + // only need to do something if the key is not right + if (msgHdr.messageKey != oldMessageKey) { + updateGlodaIds.push(glodaId); + updateMessageKeys.push(msgHdr.messageKey); + } + } + // -- exceptional cases + else { + // This should always return a value unless something is very wrong. + // We do not want to catch the exception if one happens. + let idBasedHeader = oldHeaderMessageId ? + this._indexingDatabase.getMsgHdrForMessageID(oldHeaderMessageId) : + false; + // - Case 1b. + // We want to mark the message as deleted. + if (idBasedHeader == null) { + deleteGlodaIds.push(oldGlodaId); + } + // - Case 1a + // The expected case is that the message referenced by the gloda + // database precedes the header the enumerator told us about. This + // is expected because if PendingCommitTracker did not mark the + // message as indexed/clean then the enumerator would not tell us + // about it. + // Also, if we ran out of headers from the enumerator, this is a dead + // giveaway that this is the expected case. + else if (idBasedHeader && + ((msgHdr && + idBasedHeader.messageKey < msgHdr.messageKey) || + !msgHdr)) { + // tell the pending commit tracker about the gloda database one + PendingCommitTracker.track(idBasedHeader, oldGlodaId); + // and we might need to update the message key too + if (idBasedHeader.messageKey != oldMessageKey) { + updateGlodaIds.push(oldGlodaId); + updateMessageKeys.push(idBasedHeader.messageKey); + } + // Take another pass through the loop so that we check the + // enumerator header against the next message in the gloda + // database. + keepIterHeader = true; + } + // - Case 2 + // Whereas if the message referenced by gloda has a message key + // greater than the one returned by the enumerator, then we have a + // header claiming to be indexed by gloda that gloda does not + // actually know about. This is exceptional and gets a warning. + else if (msgHdr) { + this._log.warn("Observed header that claims to be gloda indexed " + + "but that gloda has never heard of during " + + "compaction." + + " In folder: " + msgHdr.folder.URI + + " sketchy key: " + msgHdr.messageKey + + " subject: " + msgHdr.mime2DecodedSubject); + // Keep this tuple around for the next enumerator provided header + keepGlodaTuple = true; + } + } + } + // If we don't flush the update, no one will! + if (updateGlodaIds.length) + GlodaDatastore.updateMessageLocations(updateGlodaIds, + updateMessageKeys, + aJob.id, true); + if (deleteGlodaIds.length) + GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); + + this._indexingGlodaFolder._setCompactedState(false); + + this._indexerLeaveFolder(); + yield this.kWorkDone; + }, + + /** + * Index the contents of a folder. + */ + _worker_folderIndex: + function* gloda_worker_folderIndex(aJob, aCallbackHandle) { + let logDebug = this._log.level <= Log4Moz.Level.Debug; + yield this._indexerEnterFolder(aJob.id); + + if (!this.shouldIndexFolder(this._indexingFolder)) { + aJob.safelyInvokeCallback(true); + yield this.kWorkDone; + } + + // Make sure listeners get notified about this job. + GlodaIndexer._notifyListeners(); + + // there is of course a cost to all this header investigation even if we + // don't do something. so we will yield with kWorkSync for every block. + const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; + const HEADER_CHECK_GC_BLOCK_SIZE = this.HEADER_CHECK_GC_BLOCK_SIZE; + + // we can safely presume if we are here that this folder has been selected + // for offline processing... + + // -- Filthy Folder + // A filthy folder may have misleading properties on the message that claim + // the message is indexed. They are misleading because the database, for + // whatever reason, does not have the messages (accurately) indexed. + // We need to walk all the messages and mark them filthy if they have a + // dirty property. Once we have done this, we can downgrade the folder's + // dirty status to plain dirty. We do this rather than trying to process + // everyone in one go in a filthy context because if we have to terminate + // indexing before we quit, we don't want to have to re-index messages next + // time. (This could even lead to never completing indexing in a + // pathological situation.) + let glodaFolder = GlodaDatastore._mapFolder(this._indexingFolder); + if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) { + this._indexerGetEnumerator(this.kEnumIndexedMsgs, true); + let count = 0; + for (let msgHdr in fixIterator(this._indexingEnumerator, nsIMsgDBHdr)) { + // we still need to avoid locking up the UI, pause periodically... + if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) + yield this.kWorkSync; + + if (count % HEADER_CHECK_GC_BLOCK_SIZE == 0) + GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); + + let glodaMessageId = msgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY); + // if it has a gloda message id, we need to mark it filthy + if (glodaMessageId != 0) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageFilthy); + // if it doesn't have a gloda message id, we will definitely index it, + // so no action is required. + } + // Commit the filthy status changes to the message database. + this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit); + + // this will automatically persist to the database + glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderDirty); + } + + // Figure out whether we're supposed to index _everything_ or just what + // has not yet been indexed. + let force = ("force" in aJob) && aJob.force; + let enumeratorType = force ? this.kEnumAllMsgs : this.kEnumMsgsToIndex; + + // Pass 1: count the number of messages to index. + // We do this in order to be able to report to the user what we're doing. + // TODO: give up after reaching a certain number of messages in folders + // with ridiculous numbers of messages and make the interface just say + // something like "over N messages to go." + + this._indexerGetEnumerator(enumeratorType); + + let numMessagesToIndex = 0; + let numMessagesOut = {}; + // Keep going until we run out of headers. + while (this._indexingFolder.msgDatabase.nextMatchingHdrs( + this._indexingEnumerator, + HEADER_CHECK_SYNC_BLOCK_SIZE * 8, // this way is faster, do more + 0, // moot, we don't return headers + null, // don't return headers, we just want the count + numMessagesOut)) { + numMessagesToIndex += numMessagesOut.value; + yield this.kWorkSync; + } + numMessagesToIndex += numMessagesOut.value; + + aJob.goal = numMessagesToIndex; + + if (numMessagesToIndex > 0) { + // We used up the iterator, get a new one. + this._indexerGetEnumerator(enumeratorType); + + // Pass 2: index the messages. + let count = 0; + for (let msgHdr in fixIterator(this._indexingEnumerator, nsIMsgDBHdr)) { + // per above, we want to periodically release control while doing all + // this header traversal/investigation. + if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) + yield this.kWorkSync; + + if (count % HEADER_CHECK_GC_BLOCK_SIZE == 0) + GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); + + // To keep our counts more accurate, increment the offset before + // potentially skipping any messages. + ++aJob.offset; + + // Skip messages that have not yet been reported to us as existing via + // msgsClassified. + if (this._indexingFolder.getProcessingFlags(msgHdr.messageKey) & + NOT_YET_REPORTED_PROCESSING_FLAGS) + continue; + + // Because the gloda id could be in-flight, we need to double-check the + // enumerator here since it can't know about our in-memory stuff. + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + // if the message seems valid and we are not forcing indexing, skip it. + // (that means good gloda id and not dirty) + if (!force && + glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty == this.kMessageClean) + continue; + + if (logDebug) + this._log.debug(">>> calling _indexMessage"); + yield aCallbackHandle.pushAndGo( + this._indexMessage(msgHdr, aCallbackHandle), + {what: "indexMessage", msgHdr: msgHdr}); + GlodaIndexer._indexedMessageCount++; + if (logDebug) + this._log.debug("<<< back from _indexMessage"); + } + } + + // This will trigger an (async) db update which cannot hit the disk prior to + // the actual database records that constitute the clean state. + // XXX There is the slight possibility that, in the event of a crash, this + // will hit the disk but the gloda-id properties on the headers will not + // get set. This should ideally be resolved by detecting a non-clean + // shutdown and marking all folders as dirty. + glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderClean); + + // by definition, it's not likely we'll visit this folder again anytime soon + this._indexerLeaveFolder(); + + aJob.safelyInvokeCallback(true); + + yield this.kWorkDone; + }, + + /** + * Invoked when a "message" job is scheduled so that we can clear + * _pendingAddJob if that is the job. We do this so that work items are not + * added to _pendingAddJob while it is being processed. + */ + _schedule_messageIndex: function(aJob, aCallbackHandle) { + // we do not want new work items to be added as we are processing, so + // clear _pendingAddJob. A new job will be created as needed. + if (aJob === this._pendingAddJob) + this._pendingAddJob = null; + // update our goal from the items length + aJob.goal = aJob.items.length; + }, + /** + * If the job gets canceled, we need to make sure that we clear out pending + * add job or our state will get wonky. + */ + _canceled_messageIndex: function gloda_index_msg_canceled_messageIndex(aJob) { + if (aJob === this._pendingAddJob) + this._pendingAddJob = null; + }, + + + /** + * Index a specific list of messages that we know to index from + * event-notification hints. + */ + _worker_messageIndex: + function* gloda_worker_messageIndex(aJob, aCallbackHandle) { + // if we are already in the correct folder, our "get in the folder" clause + // will not execute, so we need to make sure this value is accurate in + // that case. (and we want to avoid multiple checks...) + for (; aJob.offset < aJob.items.length; aJob.offset++) { + let item = aJob.items[aJob.offset]; + // item is either [folder ID, message key] or + // [folder ID, message ID] + + let glodaFolderId = item[0]; + // If the folder has been deleted since we queued, skip this message + if (!GlodaDatastore._folderIdKnown(glodaFolderId)) + continue; + let glodaFolder = GlodaDatastore._mapFolderID(glodaFolderId); + + // Stay out of folders that: + // - are compacting / compacted and not yet processed + // - got deleted (this would be redundant if we had a stance on id nukage) + // (these things could have changed since we queued the event) + if (glodaFolder.compacting || glodaFolder.compacted || + glodaFolder._deleted) + continue; + + // get in the folder + if (this._indexingGlodaFolder != glodaFolder) { + yield this._indexerEnterFolder(glodaFolderId); + + // Now that we have the real nsIMsgFolder, sanity-check that we should + // be indexing it. (There are some checks that require the + // nsIMsgFolder.) + if (!this.shouldIndexFolder(this._indexingFolder)) + continue; + } + + let msgHdr; + // GetMessageHeader can be affected by the use cache, so we need to check + // ContainsKey first to see if the header is really actually there. + if (typeof item[1] == "number") + msgHdr = this._indexingDatabase.ContainsKey(item[1]) && + this._indexingFolder.GetMessageHeader(item[1]); + else + // same deal as in move processing. + // TODO fixme to not assume singular message-id's. + msgHdr = this._indexingDatabase.getMsgHdrForMessageID(item[1]); + + if (msgHdr) + yield aCallbackHandle.pushAndGo( + this._indexMessage(msgHdr, aCallbackHandle), + {what: "indexMessage", msgHdr: msgHdr}); + else + yield this.kWorkSync; + } + + // There is no real reason to stay 'in' the folder. If we are going to get + // more events from the folder, its database would have to be open for us + // to get the events, so it's not like we're creating an efficiency + // problem where we unload a folder just to load it again in 2 seconds. + // (Well, at least assuming the views are good about holding onto the + // database references even though they go out of their way to avoid + // holding onto message header references.) + this._indexerLeaveFolder(); + + yield this.kWorkDone; + }, + + /** + * Recover from a "folder" or "message" job failing inside a call to + * |_indexMessage|, marking the message bad. If we were not in an + * |_indexMessage| call, then fail to recover. + * + * @param aJob The job that was being worked. We ignore this for now. + * @param aContextStack The callbackHandle mechanism's context stack. When we + * invoke pushAndGo for _indexMessage we put something in so we can + * detect when it is on the async stack. + * @param aException The exception that is necessitating we attempt to + * recover. + * + * @return 1 if we were able to recover (because we want the call stack + * popped down to our worker), false if we can't. + */ + _recover_indexMessage: + function gloda_index_recover_indexMessage(aJob, aContextStack, + aException) { + // See if indexMessage is on the stack... + if (aContextStack.length >= 2 && + aContextStack[1] && + ("what" in aContextStack[1]) && + aContextStack[1].what == "indexMessage") { + // it is, so this is probably recoverable. + + this._log.debug( + "Exception while indexing message, marking it bad (gloda id of 1)."); + + // -- Mark the message as bad + let msgHdr = aContextStack[1].msgHdr; + // (In the worst case, the header is no longer valid, which will result in + // exceptions. We need to be prepared for that.) + try { + msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, + GLODA_BAD_MESSAGE_ID); + // clear the dirty bit if it has one + if (msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY)) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, 0); + } + catch (ex) { + // If we are indexing a folder and the message header is no longer + // valid, then it's quite likely the whole folder is no longer valid. + // But since in the event-driven message indexing case we could have + // other valid things to look at, let's try and recover. The folder + // indexing case will come back to us shortly and we will indicate + // recovery is not possible at that point. + // So do nothing here since by popping the indexing of the specific + // message out of existence we are recovering. + } + return 1; + } + return false; + }, + + /** + * Cleanup after an aborted "folder" or "message" job. + */ + _cleanup_indexing: function gloda_index_cleanup_indexing(aJob) { + this._indexerLeaveFolder(); + aJob.safelyInvokeCallback(false); + }, + + /** + * Maximum number of deleted messages to process at a time. Arbitrary; there + * are no real known performance constraints at this point. + */ + DELETED_MESSAGE_BLOCK_SIZE: 32, + + /** + * Process pending deletes... + */ + _worker_processDeletes: function* gloda_worker_processDeletes(aJob, + aCallbackHandle) { + + // Count the number of messages we will eventually process. People freak + // out when the number is constantly increasing because they think gloda + // has gone rogue. (Note: new deletions can still accumulate during + // our execution, so we may 'expand' our count a little still.) + this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); + aJob.goal = yield this.kWorkAsync; + this._log.debug("There are currently " + aJob.goal + " messages awaiting" + + " deletion processing."); + + // get a block of messages to delete. + let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + query._deleted(1); + query.limit(this.DELETED_MESSAGE_BLOCK_SIZE); + let deletedCollection = query.getCollection(aCallbackHandle); + yield this.kWorkAsync; + + while (deletedCollection.items.length) { + for (let message of deletedCollection.items) { + // If it turns out our count is wrong (because some new deletions + // happened since we entered this worker), let's issue a new count + // and use that to accurately update our goal. + if (aJob.offset >= aJob.goal) { + this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); + aJob.goal += yield this.kWorkAsync; + } + + yield aCallbackHandle.pushAndGo(this._deleteMessage(message, + aCallbackHandle)); + aJob.offset++; + yield this.kWorkSync; + } + + deletedCollection = query.getCollection(aCallbackHandle); + yield this.kWorkAsync; + } + this.pendingDeletions = false; + + yield this.kWorkDone; + }, + + _worker_fixMissingContacts: function*(aJob, aCallbackHandle) { + let identityContactInfos = [], fixedContacts = {}; + + // -- asynchronously get a list of all identities without contacts + // The upper bound on the number of messed up contacts is the number of + // contacts in the user's address book. This should be small enough + // (and the data size small enough) that this won't explode thunderbird. + let queryStmt = GlodaDatastore._createAsyncStatement( + "SELECT identities.id, identities.contactID, identities.value " + + "FROM identities " + + "LEFT JOIN contacts ON identities.contactID = contacts.id " + + "WHERE identities.kind = 'email' AND contacts.id IS NULL", + true); + queryStmt.executeAsync({ + handleResult: function(aResultSet) { + let row; + while ((row = aResultSet.getNextRow())) { + identityContactInfos.push({ + identityId: row.getInt64(0), + contactId: row.getInt64(1), + email: row.getString(2) + }); + } + }, + handleError: function(aError) { + }, + handleCompletion: function(aReason) { + GlodaDatastore._asyncCompleted(); + aCallbackHandle.wrappedCallback(); + }, + }); + queryStmt.finalize(); + GlodaDatastore._pendingAsyncStatements++; + yield this.kWorkAsync; + + // -- perform fixes only if there were missing contacts + if (identityContactInfos.length) { + const yieldEvery = 64; + // - create the missing contacts + for (let i = 0; i < identityContactInfos.length; i++) { + if ((i % yieldEvery) === 0) + yield this.kWorkSync; + + let info = identityContactInfos[i], + card = GlodaUtils.getCardForEmail(info.email), + contact = new GlodaContact( + GlodaDatastore, info.contactId, + null, null, + card ? (card.displayName || info.email) : info.email, + 0, 0); + GlodaDatastore.insertContact(contact); + + // update the in-memory rep of the identity to know about the contact + // if there is one. + let identity = GlodaCollectionManager.cacheLookupOne( + Gloda.NOUN_IDENTITY, info.identityId, false); + if (identity) { + // Unfortunately, although this fixes the (reachable) Identity and + // exposes the Contact, it does not make the Contact reachable from + // the collection manager. This will make explicit queries that look + // up the contact potentially see the case where + // contact.identities[0].contact !== contact. Alternately, that + // may not happen and instead the "contact" object we created above + // may become unlinked. (I'd have to trace some logic I don't feel + // like tracing.) Either way, The potential fallout is minimal + // since the object identity invariant will just lapse and popularity + // on the contact may become stale, and neither of those meaningfully + // affect the operation of anything in Thunderbird. + // If we really cared, we could find all the dominant collections + // that reference the identity and update their corresponding + // contact collection to make it reachable. That use-case does not + // exist outside of here, which is why we're punting. + identity._contact = contact; + contact._identities = [identity]; + } + + // NOTE: If the addressbook indexer did anything useful other than + // adapting to name changes, we could schedule indexing of the cards at + // this time. However, as of this writing, it doesn't, and this task + // is a one-off relevant only to the time of this writing. + } + + // - mark all folders as dirty, initiate indexing sweep + this.dirtyAllKnownFolders(); + this.indexingSweepNeeded = true; + } + + // -- mark the schema upgrade, be done + GlodaDatastore._updateSchemaVersion(GlodaDatastore._schemaVersion); + yield this.kWorkDone; + }, + + /** + * Determine whether a folder is suitable for indexing. + * + * @param aMsgFolder An nsIMsgFolder you want to see if we should index. + * + * @returns true if we want to index messages in this type of folder, false if + * we do not. + */ + shouldIndexFolder: function(aMsgFolder) { + let folderFlags = aMsgFolder.flags; + // Completely ignore non-mail and virtual folders. They should never even + // get to be GlodaFolder instances. + if (!(folderFlags & Ci.nsMsgFolderFlags.Mail) || + (folderFlags & Ci.nsMsgFolderFlags.Virtual)) + return false; + + // Some folders do not really exist; we can detect this by getStringProperty + // exploding when we call it. This is primarily a concern because + // _mapFolder calls said exploding method, but we also don't want to + // even think about indexing folders that don't exist. (Such folders are + // likely the result of a messed up profile.) + try { + // flags is used because it should always be in the cache avoiding a miss + // which would compel an msf open. + aMsgFolder.getStringProperty("flags"); + } catch (ex) { + return false; + } + + // Now see what our gloda folder information has to say about the folder. + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + return glodaFolder.indexingPriority != glodaFolder.kIndexingNeverPriority; + }, + + /** + * Sets the indexing priority for this folder and persists it both to Gloda, + * and, for backup purposes, to the nsIMsgFolder via string property as well. + * + * Setting this priority may cause the indexer to either reindex this folder, + * or remove this folder from the existing index. + * + * @param {nsIMsgFolder} aFolder + * @param {Number} aPriority (one of the priority constants from GlodaFolder) + */ + setFolderIndexingPriority: function glodaSetFolderIndexingPriority(aFolder, aPriority) { + + let glodaFolder = GlodaDatastore._mapFolder(aFolder); + + // if there's been no change, we're done + if (aPriority == glodaFolder.indexingPriority) { + return; + } + + // save off the old priority, and set the new one + let previousPrio = glodaFolder.indexingPriority; + glodaFolder._indexingPriority = aPriority; + + // persist the new priority + GlodaDatastore.updateFolderIndexingPriority(glodaFolder); + aFolder.setStringProperty("indexingPriority", Number(aPriority).toString()); + + // if we've been told never to index this folder... + if (aPriority == glodaFolder.kIndexingNeverPriority) { + + // stop doing so + if (this._indexingFolder == aFolder) + GlodaIndexer.killActiveJob(); + + // mark all existing messages as deleted + GlodaDatastore.markMessagesDeletedByFolderID(glodaFolder.id); + + // re-index + GlodaMsgIndexer.indexingSweepNeeded = true; + + } else if (previousPrio == glodaFolder.kIndexingNeverPriority) { + + // there's no existing index, but the user now wants one + glodaFolder._dirtyStatus = glodaFolder.kFolderFilthy; + GlodaDatastore.updateFolderDirtyStatus(glodaFolder) + GlodaMsgIndexer.indexingSweepNeeded = true; + } + }, + + /** + * Resets the indexing priority on the given folder to whatever the default + * is for folders of that type. + * + * @note Calls setFolderIndexingPriority under the hood, so has identical + * potential reindexing side-effects + * + * @param {nsIMsgFolder} aFolder + * @param {boolean} aAllowSpecialFolderIndexing + */ + resetFolderIndexingPriority: function glodaResetFolderIndexingPriority(aFolder, aAllowSpecialFolderIndexing) { + this.setFolderIndexingPriority(aFolder, + GlodaDatastore.getDefaultIndexingPriority(aFolder, + aAllowSpecialFolderIndexing)); + }, + + /** + * Queue all of the folders of all of the accounts of the current profile + * for indexing. We traverse all folders and queue them immediately to try + * and have an accurate estimate of the number of folders that need to be + * indexed. (We previously queued accounts rather than immediately + * walking their list of folders.) + */ + indexEverything: function glodaIndexEverything() { + this._log.info("Queueing all accounts for indexing."); + + GlodaDatastore._beginTransaction(); + for (let account in fixIterator(MailServices.accounts.accounts, + Ci.nsIMsgAccount)) { + this.indexAccount(account); + } + GlodaDatastore._commitTransaction(); + }, + + /** + * Queue all of the folders belonging to an account for indexing. + */ + indexAccount: function glodaIndexAccount(aAccount) { + let rootFolder = aAccount.incomingServer.rootFolder; + if (rootFolder instanceof Ci.nsIMsgFolder) { + this._log.info("Queueing account folders for indexing: " + aAccount.key); + + let allFolders = rootFolder.descendants; + let folderJobs = []; + for (let folder in fixIterator(allFolders, Ci.nsIMsgFolder)) { + if (this.shouldIndexFolder(folder)) + GlodaIndexer.indexJob( + new IndexingJob("folder", GlodaDatastore._mapFolder(folder).id)); + } + } + else { + this._log.info("Skipping Account, root folder not nsIMsgFolder"); + } + }, + + /** + * Queue a single folder for indexing given an nsIMsgFolder. + * + * @param [aOptions.callback] A callback to invoke when the folder finishes + * indexing. First argument is true if the task ran to completion + * successfully, false if we had to abort for some reason. + * @param [aOptions.force=false] Should we force the indexing of all messages + * in the folder (true) or just index what hasn't been indexed (false). + * @return true if we are going to index the folder, false if not. + */ + indexFolder: function glodaIndexFolder(aMsgFolder, aOptions) { + if (!this.shouldIndexFolder(aMsgFolder)) + return false; + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + // stay out of compacting/compacted folders + if (glodaFolder.compacting || glodaFolder.compacted) + return false; + + this._log.info("Queue-ing folder for indexing: " + + aMsgFolder.prettiestName); + let job = new IndexingJob("folder", glodaFolder.id); + if (aOptions) { + if ("callback" in aOptions) + job.callback = aOptions.callback; + if ("force" in aOptions) + job.force = true; + } + GlodaIndexer.indexJob(job); + return true; + }, + + /** + * Queue a list of messages for indexing. + * + * @param aFoldersAndMessages List of [nsIMsgFolder, message key] tuples. + */ + indexMessages: function gloda_index_indexMessages(aFoldersAndMessages) { + let job = new IndexingJob("message", null); + job.items = aFoldersAndMessages. + map(fm => [GlodaDatastore._mapFolder(fm[0]).id, fm[1]]); + GlodaIndexer.indexJob(job); + }, + + /** + * Mark all known folders as dirty so that the next indexing sweep goes + * into all folders and checks their contents to see if they need to be + * indexed. + * + * This is being added for the migration case where we want to try and reindex + * all of the messages that had been marked with GLODA_BAD_MESSAGE_ID but + * which is now GLODA_OLD_BAD_MESSAGE_ID and so we should attempt to reindex + * them. + */ + dirtyAllKnownFolders: function gloda_index_msg_dirtyAllKnownFolders() { + // Just iterate over the datastore's folder map and tell each folder to + // be dirty if its priority is not disabled. + for (let folderID in GlodaDatastore._folderByID) { + let glodaFolder = GlodaDatastore._folderByID[folderID]; + if (glodaFolder.indexingPriority !== glodaFolder.kIndexingNeverPriority) + glodaFolder._ensureFolderDirty(); + } + }, + + /** + * Given a message header, return whether this message is likely to have + * been indexed or not. + * + * This means the message must: + * - Be in a folder eligible for gloda indexing. (Not News, etc.) + * - Be in a non-filthy folder. + * - Be gloda-indexed and non-filthy. + * + * @param aMsgHdr A message header. + * @returns true if the message is likely to have been indexed. + */ + isMessageIndexed: function gloda_index_isMessageIndexed(aMsgHdr) { + // If it's in a folder that we flat out do not index, say no. + if (!this.shouldIndexFolder(aMsgHdr.folder)) + return false; + let glodaFolder = GlodaDatastore._mapFolder(aMsgHdr.folder); + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(aMsgHdr); + return glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != GlodaMsgIndexer.kMessageFilthy && + glodaFolder && + glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy; + }, + + /* *********** Event Processing *********** */ + + /** + * Tracks messages we have received msgKeyChanged notifications for in order + * to provide batching and to suppress needless reindexing when we receive + * the expected follow-up msgsClassified notification. + * + * The entries in this dictionary should be extremely short-lived as we + * receive the msgKeyChanged notification as the offline fake header is + * converted into a real header (which is accompanied by a msgAdded + * notification we don't pay attention to). Once the headers finish + * updating, the message classifier will get its at-bat and should likely + * find that the messages have already been classified and so fast-path + * them. + * + * The keys in this dictionary are chosen to be consistent with those of + * PendingCommitTracker: the folder.URI + "#" + the (new) message key. + * The values in the dictionary are either an object with "id" (the gloda + * id), "key" (the new message key), and "dirty" (is it dirty and so + * should still be queued for indexing) attributes, or null indicating that + * no change in message key occurred and so no database changes are required. + */ + _keyChangedBatchInfo: {}, + + /** + * Common logic for things that want to feed event-driven indexing. This gets + * called by both |_msgFolderListener.msgsClassified| when we are first + * seeing a message as well as by |_folderListener| when things happen to + * existing messages. Although we could slightly specialize for the + * new-to-us case, it works out to be cleaner to just treat them the same + * and take a very small performance hit. + * + * @param aMsgHdrs Something fixIterator will work on to return an iterator + * on the set of messages that we should treat as potentially changed. + * @param aDirtyingEvent Is this event inherently dirtying? Receiving a + * msgsClassified notification is not inherently dirtying because it is + * just telling us that a message exists. We use this knowledge to + * ignore the msgsClassified notifications for messages we have received + * msgKeyChanged notifications for and fast-pathed. Since it is possible + * for user action to do something that dirties the message between the + * time we get the msgKeyChanged notification and when we receive the + * msgsClassified notification, we want to make sure we don't get + * confused. (Although since we remove the message from our ignore-set + * after the first notification, we would likely just mistakenly treat + * the msgsClassified notification as something dirtying, so it would + * still work out...) + */ + _reindexChangedMessages: function gloda_indexer_reindexChangedMessage( + aMsgHdrs, aDirtyingEvent) { + let glodaIdsNeedingDeletion = null; + let messageKeyChangedIds = null, messageKeyChangedNewKeys = null; + for (let msgHdr in fixIterator(aMsgHdrs, nsIMsgDBHdr)) { + // -- Index this folder? + let msgFolder = msgHdr.folder; + if (!this.shouldIndexFolder(msgFolder)) { + continue; + } + // -- Ignore messages in filthy folders! + // A filthy folder can only be processed by an indexing sweep, and at + // that point the message will get indexed. + let glodaFolder = GlodaDatastore._mapFolder(msgHdr.folder); + if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) + continue; + + // -- msgKeyChanged event follow-up + if (!aDirtyingEvent) { + let keyChangedKey = msgHdr.folder.URI + "#" + msgHdr.messageKey; + if (keyChangedKey in this._keyChangedBatchInfo) { + var keyChangedInfo = this._keyChangedBatchInfo[keyChangedKey]; + delete this._keyChangedBatchInfo[keyChangedKey]; + + // Null means to ignore this message because the key did not change + // (and the message was not dirty so it is safe to ignore.) + if (keyChangedInfo == null) + continue; + // (the key may be null if we only generated the entry because the + // message was dirty) + if (keyChangedInfo.key !== null) { + if (messageKeyChangedIds == null) { + messageKeyChangedIds = []; + messageKeyChangedNewKeys = []; + } + messageKeyChangedIds.push(keyChangedInfo.id); + messageKeyChangedNewKeys.push(keyChangedInfo.key); + } + // ignore the message because it was not dirty + if (!keyChangedInfo.isDirty) + continue; + } + } + + // -- Index this message? + // We index local messages, IMAP messages that are offline, and IMAP + // messages that aren't offline but whose folders aren't offline either + let isFolderLocal = msgFolder instanceof nsIMsgLocalMailFolder; + if (!isFolderLocal) { + if (!(msgHdr.flags & nsMsgMessageFlags.Offline) && + (msgFolder.flags & nsMsgFolderFlags.Offline)) { + continue; + } + } + // Ignore messages whose processing flags indicate it has not yet been + // classified. In the IMAP case if the Offline flag is going to get set + // we are going to see it before the msgsClassified event so this is + // very important. + if (msgFolder.getProcessingFlags(msgHdr.messageKey) & + NOT_YET_REPORTED_PROCESSING_FLAGS) + continue; + + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + + let isSpam = msgHdr.getStringProperty(JUNK_SCORE_PROPERTY) == + JUNK_SPAM_SCORE_STR; + + // -- Is the message currently gloda indexed? + if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != this.kMessageFilthy) { + // - Is the message spam? + if (isSpam) { + // Treat this as a deletion... + if (!glodaIdsNeedingDeletion) + glodaIdsNeedingDeletion = []; + glodaIdsNeedingDeletion.push(glodaId); + // and skip to the next message + continue; + } + + // - Mark the message dirty if it is clean. + // (This is the only case in which we need to mark dirty so that the + // indexing sweep takes care of things if we don't process this in + // an event-driven fashion. If the message has no gloda-id or does + // and it's already dirty or filthy, it is already marked for + // indexing.) + if (glodaDirty == this.kMessageClean) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageDirty); + // if the message is pending clean, this change invalidates that. + PendingCommitTracker.noteDirtyHeader(msgHdr); + } + // If it's not indexed but is spam, ignore it. + else if (isSpam) { + continue; + } + // (we want to index the message if we are here) + + // mark the folder dirty too, so we know to look inside + glodaFolder._ensureFolderDirty(); + + if (this._pendingAddJob == null) { + this._pendingAddJob = new IndexingJob("message", null); + GlodaIndexer.indexJob(this._pendingAddJob); + } + // only queue the message if we haven't overflowed our event-driven budget + if (this._pendingAddJob.items.length < + this._indexMaxEventQueueMessages) { + this._pendingAddJob.items.push( + [GlodaDatastore._mapFolder(msgFolder).id, msgHdr.messageKey]); + } + else { + this.indexingSweepNeeded = true; + } + } + + // Process any message key changes (from earlier msgKeyChanged events) + if (messageKeyChangedIds != null) + GlodaDatastore.updateMessageKeys(messageKeyChangedIds, + messageKeyChangedNewKeys); + + // If we accumulated any deletions in there, batch them off now. + if (glodaIdsNeedingDeletion) { + GlodaDatastore.markMessagesDeletedByIDs(glodaIdsNeedingDeletion); + this.pendingDeletions = true; + } + }, + + + /* ***** Folder Changes ***** */ + /** + * All additions and removals are queued for processing. Indexing messages + * is potentially phenomenally expensive, and deletion can still be + * relatively expensive due to our need to delete the message, its + * attributes, and all attributes that reference it. Additionally, + * attribute deletion costs are higher than attribute look-up because + * there is the actual row plus its 3 indices, and our covering indices are + * no help there. + * + */ + _msgFolderListener: { + indexer: null, + + /** + * We no longer use the msgAdded notification, instead opting to wait until + * junk/trait classification has run (or decided not to run) and all + * filters have run. The msgsClassified notification provides that for us. + */ + msgAdded: function gloda_indexer_msgAdded(aMsgHdr) { + // we are never called! we do not enable this bit! + }, + + /** + * Process (apparently newly added) messages that have been looked at by + * the message classifier. This ensures that if the message was going + * to get marked as spam, this will have already happened. + * + * Besides truly new (to us) messages, We will also receive this event for + * messages that are the result of IMAP message move/copy operations, + * including both moves that generated offline fake headers and those that + * did not. In the offline fake header case, however, we are able to + * ignore their msgsClassified events because we will have received a + * msgKeyChanged notification sometime in the recent past. + */ + msgsClassified: function gloda_indexer_msgsClassified( + aMsgHdrs, aJunkClassified, aTraitClassified) { + this.indexer._log.debug("msgsClassified notification"); + try { + GlodaMsgIndexer._reindexChangedMessages(aMsgHdrs.enumerate(), false); + } + catch (ex) { + this.indexer._log.error("Explosion in msgsClassified handling:", ex); + } + }, + + /** + * Handle real, actual deletion (move to trash and IMAP deletion model + * don't count); we only see the deletion here when it becomes forever, + * or rather _just before_ it becomes forever. Because the header is + * going away, we need to either process things immediately or extract the + * information required to purge it later without the header. + * To this end, we mark all messages that were indexed in the gloda message + * database as deleted. We set our pending deletions flag to let our + * indexing logic know that after its next wave of folder traversal, it + * should perform a deletion pass. If it turns out the messages are coming + * back, the fact that deletion is thus deferred can be handy, as we can + * reuse the existing gloda message. + */ + msgsDeleted: function gloda_indexer_msgsDeleted(aMsgHdrs) { + this.indexer._log.debug("msgsDeleted notification"); + let glodaMessageIds = []; + + for (let iMsgHdr = 0; iMsgHdr < aMsgHdrs.length; iMsgHdr++) { + let msgHdr = aMsgHdrs.queryElementAt(iMsgHdr, nsIMsgDBHdr); + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != GlodaMsgIndexer.kMessageFilthy) + glodaMessageIds.push(glodaId); + } + + if (glodaMessageIds.length) { + GlodaMsgIndexer._datastore.markMessagesDeletedByIDs(glodaMessageIds); + GlodaMsgIndexer.pendingDeletions = true; + } + }, + + /** + * Process a move or copy. + * + * Moves to a local folder or an IMAP folder where we are generating offline + * fake headers are dealt with efficiently because we get both the source + * and destination headers. The main ingredient to having offline fake + * headers is that allowUndo was true when the operation was performance. + * The only non-obvious thing is that we need to make sure that we deal + * with the impact of filthy folders and messages on gloda-id's (they + * invalidate the gloda-id). + * + * Moves to an IMAP folder that do not generate offline fake headers do not + * provide us with the target header, but the IMAP SetPendingAttributes + * logic will still attempt to propagate the properties on the message + * header so when we eventually see it in the msgsClassified notification, + * it should have the properties of the source message copied over. + * We make sure that gloda-id's do not get propagated when messages are + * moved from IMAP folders that are marked filthy or are marked as not + * supposed to be indexed by clearing the pending attributes for the header + * being tracked by the destination IMAP folder. + * We could fast-path the IMAP move case in msgsClassified by noticing that + * a message is showing up with a gloda-id header already and just + * performing an async location update. + * + * Moves that occur involving 'compacted' folders are fine and do not + * require special handling here. The one tricky super-edge-case that + * can happen (and gets handled by the compaction pass) is the move of a + * message that got gloda indexed that did not already have a gloda-id and + * PendingCommitTracker did not get to flush the gloda-id before the + * compaction happened. In that case our move logic cannot know to do + * anything and the gloda database still thinks the message lives in our + * folder. The compaction pass will deal with this by marking the message + * as deleted. The rationale being that marking it deleted allows the + * message to be re-used if it gets indexed in the target location, or if + * the target location has already been indexed, we no longer need the + * duplicate and it should be deleted. (Also, it is unable to distinguish + * between a case where the message got deleted versus moved.) + * + * Because copied messages are, by their nature, duplicate messages, we + * do not particularly care about them. As such, we defer their processing + * to the automatic sync logic that will happen much later on. This is + * potentially desirable in case the user deletes some of the original + * messages, allowing us to reuse the gloda message representations when + * we finally get around to indexing the messages. We do need to mark the + * folder as dirty, though, to clue in the sync logic. + */ + msgsMoveCopyCompleted: function gloda_indexer_msgsMoveCopyCompleted(aMove, + aSrcMsgHdrs, aDestFolder, aDestMsgHdrs) { + this.indexer._log.debug("MoveCopy notification. Move: " + aMove); + try { + // ---- Move + if (aMove) { + // -- Effectively a deletion? + // If the destination folder is not indexed, it's like these messages + // are being deleted. + if (!GlodaMsgIndexer.shouldIndexFolder(aDestFolder)) { + this.msgsDeleted(aSrcMsgHdrs); + return; + } + + // -- Avoid propagation of filthy gloda-id's. + // If the source folder is filthy or should not be indexed (and so + // any gloda-id's found in there are gibberish), our only job is to + // strip the gloda-id's off of all the destination headers because + // none of the gloda-id's are valid (and so we certainly don't want + // to try and use them as a basis for updating message keys.) + let srcMsgFolder = aSrcMsgHdrs.queryElementAt(0, nsIMsgDBHdr).folder; + if (!this.indexer.shouldIndexFolder(srcMsgFolder) || + (GlodaDatastore._mapFolder(srcMsgFolder).dirtyStatus == + GlodaFolder.prototype.kFolderFilthy)) { + // Local case, just modify the destination headers directly. + if (aDestMsgHdrs) { + for (let destMsgHdr in fixIterator(aDestMsgHdrs, nsIMsgDBHdr)) { + // zero it out if it exists + // (no need to deal with pending commit issues here; a filthy + // folder by definition has nothing indexed in it.) + let glodaId = destMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY); + if (glodaId) + destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, + 0); + } + + // Since we are moving messages from a folder where they were + // effectively not indexed, it is up to us to make sure the + // messages now get indexed. + this.indexer._reindexChangedMessages(aDestMsgHdrs.enumerate()); + return; + } + // IMAP move case, we need to operate on the pending headers using + // the source header to get the pending header and as the + // indication of what has been already set on the pending header. + else { + let destDb; + // so, this can fail, and there's not much we can do about it. + try { + destDb = aDestFolder.msgDatabase; + } catch (ex) { + this.indexer._log.warn("Destination database for " + + aDestFolder.prettiestName + + " not ready on IMAP move." + + " Gloda corruption possible."); + return; + } + for (let srcMsgHdr in fixIterator(aSrcMsgHdrs, nsIMsgDBHdr)) { + // zero it out if it exists + // (no need to deal with pending commit issues here; a filthy + // folder by definition has nothing indexed in it.) + let glodaId = srcMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY); + if (glodaId) + destDb.setUint32AttributeOnPendingHdr( + srcMsgHdr, GLODA_MESSAGE_ID_PROPERTY, 0); + } + + // Nothing remains to be done. The msgClassified event will take + // care of making sure the message gets indexed. + return; + } + } + + + // --- Have destination headers (local case): + if (aDestMsgHdrs) { + // -- Update message keys for valid gloda-id's. + // (Which means ignore filthy gloda-id's.) + let glodaIds = []; + let newMessageKeys = []; + aSrcMsgHdrs.QueryInterface(nsIArray); + aDestMsgHdrs.QueryInterface(nsIArray); + // Track whether we see any messages that are not gloda indexed so + // we know if we have to mark the destination folder dirty. + let sawNonGlodaMessage = false; + for (let iMsg = 0; iMsg < aSrcMsgHdrs.length; iMsg++) { + let srcMsgHdr = aSrcMsgHdrs.queryElementAt(iMsg, nsIMsgDBHdr); + let destMsgHdr = aDestMsgHdrs.queryElementAt(iMsg, nsIMsgDBHdr); + + let [glodaId, dirtyStatus] = + PendingCommitTracker.getGlodaState(srcMsgHdr); + if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + dirtyStatus != GlodaMsgIndexer.kMessageFilthy) { + // we may need to update the pending commit map (it checks) + PendingCommitTracker.noteMove(srcMsgHdr, destMsgHdr); + // but we always need to update our database + glodaIds.push(glodaId); + newMessageKeys.push(destMsgHdr.messageKey); + } + else { + sawNonGlodaMessage = true; + } + } + + // this method takes care to update the in-memory representations + // too; we don't need to do anything + if (glodaIds.length) + GlodaDatastore.updateMessageLocations(glodaIds, newMessageKeys, + aDestFolder); + + // Mark the destination folder dirty if we saw any messages that + // were not already gloda indexed. + if (sawNonGlodaMessage) { + let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); + destGlodaFolder._ensureFolderDirty(); + this.indexer.indexingSweepNeeded = true; + } + } + // --- No dest headers (IMAP case): + // Update any valid gloda indexed messages into their new folder to + // make the indexer's life easier when it sees the messages in their + // new folder. + else { + let glodaIds = []; + + let srcFolderIsLocal = + (srcMsgFolder instanceof nsIMsgLocalMailFolder); + for (let iMsgHdr = 0; iMsgHdr < aSrcMsgHdrs.length; iMsgHdr++) { + let msgHdr = aSrcMsgHdrs.queryElementAt(iMsgHdr, nsIMsgDBHdr); + + let [glodaId, dirtyStatus] = + PendingCommitTracker.getGlodaState(msgHdr); + if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + dirtyStatus != GlodaMsgIndexer.kMessageFilthy) { + // we may need to update the pending commit map (it checks) + PendingCommitTracker.noteBlindMove(msgHdr); + // but we always need to update our database + glodaIds.push(glodaId); + + // XXX UNDO WORKAROUND + // This constitutes a move from a local folder to an IMAP + // folder. Undo does not currently do the right thing for us, + // but we have a chance of not orphaning the message if we + // mark the source header as dirty so that when the message + // gets re-added we see it. (This does require that we enter + // the folder; we set the folder dirty after the loop to + // increase the probability of this but it's not foolproof + // depending on when the next indexing sweep happens and when + // the user performs an undo.) + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, + GlodaMsgIndexer.kMessageDirty); + } + } + // XXX ALSO UNDO WORKAROUND + if (srcFolderIsLocal) { + let srcGlodaFolder = GlodaDatastore._mapFolder(srcMsgFolder); + srcGlodaFolder._ensureFolderDirty(); + } + + // quickly move them to the right folder, zeroing their message keys + GlodaDatastore.updateMessageFoldersByKeyPurging(glodaIds, + aDestFolder); + // we _do not_ need to mark the folder as dirty, because the + // message added events will cause that to happen. + } + } + // ---- Copy case + else { + // -- Do not propagate gloda-id's for copies + // (Only applies if we have the destination header, which means local) + if (aDestMsgHdrs) { + for (let destMsgHdr in fixIterator(aDestMsgHdrs, nsIMsgDBHdr)) { + let glodaId = destMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY); + if (glodaId) + destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0); + } + } + + // mark the folder as dirty; we'll get to it later. + let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); + destGlodaFolder._ensureFolderDirty(); + this.indexer.indexingSweepNeeded = true; + } + } catch (ex) { + this.indexer._log.error("Problem encountered during message move/copy:", + ex.stack); + } + }, + + /** + * Queue up message key changes that are a result of offline fake headers + * being made real for the actual update during the msgsClassified + * notification that is expected after this. We defer the + * actual work (if there is any to be done; the fake header might have + * guessed the right UID correctly) so that we can batch our work. + * + * The expectation is that there will be no meaningful time window between + * this notification and the msgsClassified notification since the message + * classifier should not actually need to classify the messages (they + * should already have been classified) and so can fast-path them. + */ + msgKeyChanged: function gloda_indexer_msgKeyChangeded(aOldMsgKey, + aNewMsgHdr) { + try { + let val = null, newKey = aNewMsgHdr.messageKey; + let [glodaId, glodaDirty] = + PendingCommitTracker.getGlodaState(aNewMsgHdr); + // If we haven't indexed this message yet, take no action, and leave it + // up to msgsClassified to take proper action. + if (glodaId < GLODA_FIRST_VALID_MESSAGE_ID) + return; + // take no action on filthy messages, + // generate an entry if dirty or the keys don't match. + if ((glodaDirty !== GlodaMsgIndexer.kMessageFilthy) && + ((glodaDirty === GlodaMsgIndexer.kMessageDirty) || + (aOldMsgKey !== newKey))) { + val = { + id: glodaId, + key: (aOldMsgKey !== newKey) ? newKey : null, + isDirty: glodaDirty === GlodaMsgIndexer.kMessageDirty, + }; + } + + let key = aNewMsgHdr.folder.URI + "#" + aNewMsgHdr.messageKey; + this.indexer._keyChangedBatchInfo[key] = val; + } + // this is more for the unit test to fail rather than user error reporting + catch (ex) { + this.indexer._log.error("Problem encountered during msgKeyChanged" + + " notification handling: " + ex + "\n\n" + + ex.stack + " \n\n"); + } + }, + + /** + * Detect newly added folders before they get messages so we map them before + * they get any messages added to them. If we only hear about them after + * they get their 1st message, then we will mark them filthy, but if we mark + * them before that, they get marked clean. + */ + folderAdded: function gloda_indexer_folderAdded(aMsgFolder) { + // This is invoked for its side-effect of invoking _mapFolder and doing so + // only after filtering out folders we don't care about. + GlodaMsgIndexer.shouldIndexFolder(aMsgFolder); + }, + + /** + * Handles folder no-longer-exists-ence. We mark all messages as deleted + * and remove the folder from our URI table. Currently, if a folder that + * contains other folders is deleted, we may either receive one + * notification for the folder that is deleted, or a notification for the + * folder and one for each of its descendents. This depends upon the + * underlying account implementation, so we explicitly handle each case. + * Namely, we treat it as if we're only planning on getting one, but we + * handle if the children are already gone for some reason. + */ + folderDeleted: function gloda_indexer_folderDeleted(aFolder) { + this.indexer._log.debug("folderDeleted notification"); + try { + let delFunc = function(aFolder, indexer) { + if (indexer._datastore._folderKnown(aFolder)) { + indexer._log.info("Processing deletion of folder " + + aFolder.prettiestName + "."); + let glodaFolder = GlodaDatastore._mapFolder(aFolder); + indexer._datastore.markMessagesDeletedByFolderID(glodaFolder.id); + indexer._datastore.deleteFolderByID(glodaFolder.id); + GlodaDatastore._killGlodaFolderIntoTombstone(glodaFolder); + } + else { + indexer._log.info("Ignoring deletion of folder " + + aFolder.prettiestName + + " because it is unknown to gloda."); + } + }; + + let descendentFolders = aFolder.descendants; + // (the order of operations does not matter; child, non-child, whatever.) + // delete the parent + delFunc(aFolder, this.indexer); + // delete all its descendents + for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { + delFunc(folder, this.indexer); + } + + this.indexer.pendingDeletions = true; + } catch (ex) { + this.indexer._log.error("Problem encountered during folder deletion" + + ": " + ex + "\n\n" + ex.stack + "\n\n"); + } + }, + + /** + * Handle a folder being copied or moved. + * Moves are handled by a helper function shared with _folderRenameHelper + * (which takes care of any nesting involved). + * Copies are actually ignored, because our periodic indexing traversal + * should discover these automatically. We could hint ourselves into + * action, but arguably a set of completely duplicate messages is not + * a high priority for indexing. + */ + folderMoveCopyCompleted: function gloda_indexer_folderMoveCopyCompleted( + aMove, aSrcFolder, aDestFolder) { + this.indexer._log.debug("folderMoveCopy notification (Move: " + aMove + + ")"); + if (aMove) { + let srcURI = aSrcFolder.URI; + let targetURI = aDestFolder.URI + + srcURI.substring(srcURI.lastIndexOf("/")); + this._folderRenameHelper(aSrcFolder, targetURI); + } + else { + this.indexer.indexingSweepNeeded = true; + } + }, + + /** + * We just need to update the URI <-> ID maps and the row in the database, + * all of which is actually done by the datastore for us. + * This method needs to deal with the complexity where local folders will + * generate a rename notification for each sub-folder, but IMAP folders + * will generate only a single notification. Our logic primarily handles + * this by not exploding if the original folder no longer exists. + */ + _folderRenameHelper: function gloda_indexer_folderRenameHelper(aOrigFolder, + aNewURI) { + let newFolder = MailUtils.getFolderForURI(aNewURI); + let specialFolderFlags = Ci.nsMsgFolderFlags.Trash | Ci.nsMsgFolderFlags.Junk; + if (newFolder.isSpecialFolder(specialFolderFlags, true)) { + let descendentFolders = newFolder.descendants; + + // First thing to do: make sure we don't index the resulting folder and + // its descendents. + GlodaMsgIndexer.resetFolderIndexingPriority(newFolder); + for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { + GlodaMsgIndexer.resetFolderIndexingPriority(folder); + } + + // Remove from the index messages from the original folder + this.folderDeleted(aOrigFolder); + } else { + let descendentFolders = aOrigFolder.descendants; + + let origURI = aOrigFolder.URI; + // this rename is straightforward. + GlodaDatastore.renameFolder(aOrigFolder, aNewURI); + + for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { + let oldSubURI = folder.URI; + // mangle a new URI from the old URI. we could also try and do a + // parallel traversal of the new folder hierarchy, but that seems like + // more work. + let newSubURI = aNewURI + oldSubURI.substring(origURI.length); + this.indexer._datastore.renameFolder(oldSubURI, newSubURI); + } + + this.indexer._log.debug("folder renamed: " + origURI + " to " + aNewURI); + } + }, + + /** + * Handle folder renames, dispatching to our rename helper (which also + * takes care of any nested folder issues.) + */ + folderRenamed: function gloda_indexer_folderRenamed(aOrigFolder, + aNewFolder) { + this._folderRenameHelper(aOrigFolder, aNewFolder.URI); + }, + + /** + * This tells us about many exciting things. What they are and what we do: + * + * - FolderCompactStart: Mark the folder as compacting in our in-memory + * representation. This should keep any new indexing out of the folder + * until it is done compacting. Also, kill any active or existing jobs + * to index the folder. + * - FolderCompactFinish: Mark the folder as done compacting in our + * in-memory representation. Assuming the folder was known to us and + * not marked filthy, queue a compaction job. + * + * - FolderReindexTriggered: We do the same thing as FolderCompactStart + * but don't mark the folder as compacting. + * + * - JunkStatusChanged: We mark the messages that have had their junk + * state change to be reindexed. + */ + itemEvent: function gloda_indexer_itemEvent(aItem, aEvent, aData) { + // Compact and Reindex are close enough that we can reuse the same code + // with one minor difference. + if (aEvent == "FolderCompactStart" || + aEvent == "FolderReindexTriggered") { + let aMsgFolder = aItem.QueryInterface(nsIMsgFolder); + // ignore folders we ignore... + if (!GlodaMsgIndexer.shouldIndexFolder(aMsgFolder)) + return; + + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + if (aEvent == "FolderCompactStart") + glodaFolder.compacting = true; + + // Purge any explicit indexing of said folder. + GlodaIndexer.purgeJobsUsingFilter(function (aJob) { + return (aJob.jobType == "folder" && + aJob.id == aMsgFolder.id); + }); + + // Abort the active job if it's in the folder (this covers both + // event-driven indexing that happens to be in the folder as well + // explicit folder indexing of the folder). + if (GlodaMsgIndexer._indexingFolder == aMsgFolder) + GlodaIndexer.killActiveJob(); + + // Tell the PendingCommitTracker to throw away anything it is tracking + // about the folder. We will pick up the pieces in the compaction + // pass. + PendingCommitTracker.noteFolderDatabaseGettingBlownAway(aMsgFolder); + + // (We do not need to mark the folder dirty because if we were indexing + // it, it already must have been marked dirty.) + } + else if (aEvent == "FolderCompactFinish") { + let aMsgFolder = aItem.QueryInterface(nsIMsgFolder); + // ignore folders we ignore... + if (!GlodaMsgIndexer.shouldIndexFolder(aMsgFolder)) + return; + + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + glodaFolder.compacting = false; + glodaFolder._setCompactedState(true); + + // Queue compaction unless the folder was filthy (in which case there + // are no valid gloda-id's to update.) + if (glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy) + GlodaIndexer.indexJob( + new IndexingJob("folderCompact", glodaFolder.id)); + + // Queue indexing of the folder if it is dirty. We are doing this + // mainly in case we were indexing it before the compaction started. + // It should be reasonably harmless if we weren't. + // (It would probably be better to just make sure that there is an + // indexing sweep queued or active, and if it's already active that + // this folder is in the queue to be processed.) + if (glodaFolder.dirtyStatus == glodaFolder.kFolderDirty) + GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); + } + else if (aEvent == "JunkStatusChanged") { + this.indexer._log.debug("JunkStatusChanged notification"); + aItem.QueryInterface(Ci.nsIArray); + GlodaMsgIndexer._reindexChangedMessages(aItem.enumerate(), true); + } + }, + }, + + /** + * A nsIFolderListener (listening on nsIMsgMailSession so we get all of + * these events) PRIMARILY to get folder loaded notifications. Because of + * deficiencies in the nsIMsgFolderListener's events at this time, we also + * get our folder-added and newsgroup notifications from here for now. (This + * will be rectified.) + */ + _folderListener: { + indexer: null, + + _init: function gloda_indexer_fl_init(aIndexer) { + this.indexer = aIndexer; + }, + + // We explicitly know about these things rather than bothering with some + // form of registration scheme because these aren't going to change much. + get _kFolderLoadedAtom() { + delete this._kFolderLoadedAtom; + return this._kFolderLoadedAtom = atomService.getAtom("FolderLoaded"); + }, + get _kKeywordsAtom() { + delete this._kKeywordsAtom; + return this._kKeywordsAtom = atomService.getAtom("Keywords"); + }, + get _kStatusAtom() { + delete this._kStatusAtom; + return this._kStatusAtom = atomService.getAtom("Status"); + }, + get _kFlaggedAtom() { + delete this._kFlaggedAtom; + return this._kFlaggedAtom = atomService.getAtom("Flagged"); + }, + get _kFolderFlagAtom() { + delete this._kFolderFlagAtom; + return this._kFolderFlagAtom = atomService.getAtom("FolderFlag"); + }, + + OnItemAdded: function gloda_indexer_OnItemAdded(aParentItem, aItem) { + }, + OnItemRemoved: function gloda_indexer_OnItemRemoved(aParentItem, aItem) { + }, + OnItemPropertyChanged: function gloda_indexer_OnItemPropertyChanged( + aItem, aProperty, aOldValue, aNewValue) { + }, + /** + * Detect changes to folder flags and reset our indexing priority. This + * is important because (all?) folders start out without any flags and + * then get their flags added to them. + */ + OnItemIntPropertyChanged: function gloda_indexer_OnItemIntPropertyChanged( + aFolderItem, aProperty, aOldValue, aNewValue) { + if (aProperty !== this._kFolderFlagAtom) + return; + if (!GlodaMsgIndexer.shouldIndexFolder(aFolderItem)) + return; + // Only reset priority if folder Special Use changes. + if ((aOldValue & Ci.nsMsgFolderFlags.SpecialUse) == + (aNewValue & Ci.nsMsgFolderFlags.SpecialUse)) + return; + GlodaMsgIndexer.resetFolderIndexingPriority(aFolderItem); + }, + OnItemBoolPropertyChanged: function gloda_indexer_OnItemBoolPropertyChanged( + aItem, aProperty, aOldValue, aNewValue) { + }, + OnItemUnicharPropertyChanged: + function gloda_indexer_OnItemUnicharPropertyChanged( + aItem, aProperty, aOldValue, aNewValue) { + + }, + /** + * Notice when user activity adds/removes tags or changes a message's + * status. + */ + OnItemPropertyFlagChanged: function gloda_indexer_OnItemPropertyFlagChanged( + aMsgHdr, aProperty, aOldValue, aNewValue) { + if (aProperty == this._kKeywordsAtom || + // We could care less about the new flag changing. + (aProperty == this._kStatusAtom && + (aOldValue ^ aNewValue) != nsMsgMessageFlags.New && + // We do care about IMAP deletion, but msgsDeleted tells us that, so + // ignore IMAPDeleted too... + (aOldValue ^ aNewValue) != nsMsgMessageFlags.IMAPDeleted) || + aProperty == this._kFlaggedAtom) { + GlodaMsgIndexer._reindexChangedMessages([aMsgHdr], true); + } + }, + + /** + * Get folder loaded notifications for folders that had to do some + * (asynchronous) processing before they could be opened. + */ + OnItemEvent: function gloda_indexer_OnItemEvent(aFolder, aEvent) { + if (aEvent == this._kFolderLoadedAtom) + this.indexer._onFolderLoaded(aFolder); + }, + }, + + /* ***** Rebuilding / Reindexing ***** */ + /** + * Allow us to invalidate an outstanding folder traversal because the + * underlying database is going away. We use other means for detecting + * modifications of the message (labeling, marked (un)read, starred, etc.) + * + * This is an nsIDBChangeListener listening to an nsIDBChangeAnnouncer. To + * add ourselves, we get us a nice nsMsgDatabase, query it to the announcer, + * then call AddListener. + */ + _databaseAnnouncerListener: { + indexer: null, + /** + * XXX We really should define the operations under which we expect this to + * occur. While we know this must be happening as the result of a + * ForceClosed call, we don't have a comprehensive list of when this is + * expected to occur. Some reasons: + * - Compaction (although we should already have killed the job thanks to + * our compaction notification) + * - UID validity rolls. + * - Folder Rename + * - Folder Delete + * The fact that we already have the database open when getting this means + * that it had to be valid before we opened it, which hopefully rules out + * modification of the mbox file by an external process (since that is + * forbidden when we are running) and many other exotic things. + * + * So this really ends up just being a correctness / safety protection + * mechanism. At least now that we have better compaction support. + */ + onAnnouncerGoingAway: function gloda_indexer_dbGoingAway( + aDBChangeAnnouncer) { + // The fact that we are getting called means we have an active folder and + // that we therefore are the active job. As such, we must kill the + // active job. + // XXX In the future, when we support interleaved event-driven indexing + // that bumps long-running indexing tasks, the semantics of this will + // have to change a bit since we will want to maintain being active in a + // folder even when bumped. However, we will probably have a more + // complex notion of indexing contexts on a per-job basis. + GlodaIndexer.killActiveJob(); + }, + + onHdrFlagsChanged: function(aHdrChanged, aOldFlags, aNewFlags, aInstigator) {}, + onHdrDeleted: function(aHdrChanged, aParentKey, aFlags, aInstigator) {}, + onHdrAdded: function(aHdrChanged, aParentKey, aFlags, aInstigator) {}, + onParentChanged: function(aKeyChanged, aOldParent, aNewParent, + aInstigator) {}, + onReadChanged: function(aInstigator) {}, + onJunkScoreChanged: function(aInstigator) {}, + onHdrPropertyChanged: function (aHdrToChange, aPreChange, aStatus, + aInstigator) {}, + onEvent: function (aDB, aEvent) {}, + }, + + /** + * Given a list of Message-ID's, return a matching list of lists of messages + * matching those Message-ID's. So if you pass an array with three + * Message-ID's ["a", "b", "c"], you would get back an array containing + * 3 lists, where the first list contains all the messages with a message-id + * of "a", and so forth. The reason a list is returned rather than null/a + * message is that we accept the reality that we have multiple copies of + * messages with the same ID. + * This call is asynchronous because it depends on previously created messages + * to be reflected in our results, which requires us to execute on the async + * thread where all our writes happen. This also turns out to be a + * reasonable thing because we could imagine pathological cases where there + * could be a lot of message-id's and/or a lot of messages with those + * message-id's. + * + * The returned collection will include both 'ghost' messages (messages + * that exist for conversation-threading purposes only) as well as deleted + * messages in addition to the normal 'live' messages that non-privileged + * queries might return. + */ + getMessagesByMessageID: function gloda_ns_getMessagesByMessageID(aMessageIDs, + aCallback, aCallbackThis) { + let msgIDToIndex = {}; + let results = []; + for (let iID = 0; iID < aMessageIDs.length; ++iID) { + let msgID = aMessageIDs[iID]; + results.push([]); + msgIDToIndex[msgID] = iID; + } + + // (Note: although we are performing a lookup with no validity constraints + // and using the same object-relational-mapper-ish layer used by things + // that do have constraints, we are not at risk of exposing deleted + // messages to other code and getting it confused. The only way code + // can find a message is if it shows up in their queries or gets announced + // via GlodaCollectionManager.itemsAdded, neither of which will happen.) + let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + query.headerMessageID.apply(query, aMessageIDs); + query.frozen = true; + + let listener = new MessagesByMessageIdCallback(msgIDToIndex, results, + aCallback, aCallbackThis); + return query.getCollection(listener, null, {becomeNull: true}); + }, + + /** + * A reference to MsgHdrToMimeMessage that unit testing can clobber when it + * wants to cause us to hang or inject a fault. If you are not + * glodaTestHelper.js then _do not touch this_. + */ + _MsgHdrToMimeMessageFunc: MsgHdrToMimeMessage, + /** + * Primary message indexing logic. This method is mainly concerned with + * getting all the information about the message required for threading / + * conversation building and subsequent processing. It is responsible for + * determining whether to reuse existing gloda messages or whether a new one + * should be created. Most attribute stuff happens in fund_attr.js or + * expl_attr.js. + * + * Prior to calling this method, the caller must have invoked + * |_indexerEnterFolder|, leaving us with the following true invariants + * below. + * + * @pre aMsgHdr.folder == this._indexingFolder + * @pre aMsgHdr.folder.msgDatabase == this._indexingDatabase + */ + _indexMessage: function* gloda_indexMessage(aMsgHdr, aCallbackHandle) { + let logDebug = this._log.level <= Log4Moz.Level.Debug; + if (logDebug) + this._log.debug("*** Indexing message: " + aMsgHdr.messageKey + " : " + + aMsgHdr.subject); + + // If the message is offline, then get the message body as well + let isMsgOffline = false; + let aMimeMsg; + if ((aMsgHdr.flags & nsMsgMessageFlags.Offline) || + (aMsgHdr.folder instanceof nsIMsgLocalMailFolder)) { + isMsgOffline = true; + this._MsgHdrToMimeMessageFunc(aMsgHdr, aCallbackHandle.callbackThis, + aCallbackHandle.callback, false, {saneBodySize: true}); + aMimeMsg = (yield this.kWorkAsync)[1]; + } + else { + if (logDebug) + this._log.debug(" * Message is not offline -- only headers indexed"); + } + + if (logDebug) + this._log.debug(" * Got message, subject " + aMsgHdr.subject); + + if (this._unitTestSuperVerbose) { + if (aMimeMsg) + this._log.debug(" * Got Mime " + aMimeMsg.prettyString()); + else + this._log.debug(" * NO MIME MESSAGE!!!\n"); + } + + // -- Find/create the conversation the message belongs to. + // Our invariant is that all messages that exist in the database belong to + // a conversation. + + // - See if any of the ancestors exist and have a conversationID... + // (references are ordered from old [0] to new [n-1]) + let references = Array.from(range(0, aMsgHdr.numReferences)). + map(i => aMsgHdr.getStringReference(i)); + // also see if we already know about the message... + references.push(aMsgHdr.messageId); + + this.getMessagesByMessageID(references, aCallbackHandle.callback, + aCallbackHandle.callbackThis); + // (ancestorLists has a direct correspondence to the message ids) + let ancestorLists = yield this.kWorkAsync; + + if (logDebug) { + this._log.debug("ancestors raw: " + ancestorLists); + this._log.debug("ref len: " + references.length + + " anc len: " + ancestorLists.length); + this._log.debug("references: " + + Log4Moz.enumerateProperties(references).join(",")); + this._log.debug("ancestors: " + + Log4Moz.enumerateProperties(ancestorLists).join(",")); + } + + // pull our current message lookup results off + references.pop(); + let candidateCurMsgs = ancestorLists.pop(); + + let conversationID = null; + let conversation = null; + // -- figure out the conversation ID + // if we have a clone/already exist, just use his conversation ID + if (candidateCurMsgs.length > 0) { + conversationID = candidateCurMsgs[0].conversationID; + conversation = candidateCurMsgs[0].conversation; + } + // otherwise check out our ancestors + else { + // (walk from closest to furthest ancestor) + for (let iAncestor = ancestorLists.length-1; iAncestor >= 0; + --iAncestor) { + let ancestorList = ancestorLists[iAncestor]; + + if (ancestorList.length > 0) { + // we only care about the first instance of the message because we are + // able to guarantee the invariant that all messages with the same + // message id belong to the same conversation. + let ancestor = ancestorList[0]; + if (conversationID === null) { + conversationID = ancestor.conversationID; + conversation = ancestor.conversation; + } + else if (conversationID != ancestor.conversationID) { + // XXX this inconsistency is known and understood and tracked by + // bug 478162 https://bugzilla.mozilla.org/show_bug.cgi?id=478162 + //this._log.error("Inconsistency in conversations invariant on " + + // ancestor.headerMessageID + ". It has conv id " + + // ancestor.conversationID + " but expected " + + // conversationID + ". ID: " + ancestor.id); + } + } + } + } + + // nobody had one? create a new conversation + if (conversationID === null) { + // (the create method could issue the id, making the call return + // without waiting for the database...) + conversation = this._datastore.createConversation( + aMsgHdr.mime2DecodedSubject, null, null); + conversationID = conversation.id; + } + + // Walk from furthest to closest ancestor, creating the ancestors that don't + // exist. (This is possible if previous messages that were consumed in this + // thread only had an in-reply-to or for some reason did not otherwise + // provide the full references chain.) + for (let iAncestor = 0; iAncestor < ancestorLists.length; ++iAncestor) { + let ancestorList = ancestorLists[iAncestor]; + + if (ancestorList.length == 0) { + if (logDebug) + this._log.debug("creating message with: null, " + conversationID + + ", " + references[iAncestor] + + ", null."); + let ancestor = this._datastore.createMessage(null, null, // ghost + conversationID, null, + references[iAncestor], + null, // no subject + null, // no body + null); // no attachments + this._datastore.insertMessage(ancestor); + ancestorLists[iAncestor].push(ancestor); + } + } + // now all our ancestors exist, though they may be ghost-like... + + // find if there's a ghost version of our message or we already have indexed + // this message. + let curMsg = null; + if (logDebug) + this._log.debug(candidateCurMsgs.length + " candidate messages"); + for (let iCurCand = 0; iCurCand < candidateCurMsgs.length; iCurCand++) { + let candMsg = candidateCurMsgs[iCurCand]; + + if (logDebug) + this._log.debug("candidate folderID: " + candMsg.folderID + + " messageKey: " + candMsg.messageKey); + + if (candMsg.folderURI == this._indexingFolder.URI) { + // if we are in the same folder and we have the same message key, we + // are definitely the same, stop looking. + if (candMsg.messageKey == aMsgHdr.messageKey) { + curMsg = candMsg; + break; + } + // if (we are in the same folder and) the candidate message has a null + // message key, we treat it as our best option unless we find an exact + // key match. (this would happen because the 'move' notification case + // has to deal with not knowing the target message key. this case + // will hopefully be somewhat improved in the future to not go through + // this path which mandates re-indexing of the message in its entirety) + if (candMsg.messageKey === null) + curMsg = candMsg; + // if (we are in the same folder and) the candidate message's underlying + // message no longer exists/matches, we'll assume we are the same but + // were betrayed by a re-indexing or something, but we have to make + // sure a perfect match doesn't turn up. + else if ((curMsg === null) && + !this._indexingDatabase.ContainsKey(candMsg.messageKey)) + curMsg = candMsg; + } + // a ghost/deleted message is fine + else if ((curMsg === null) && (candMsg.folderID === null)) { + curMsg = candMsg; + } + } + + let attachmentNames = null; + if (aMimeMsg) { + attachmentNames = aMimeMsg.allAttachments. + filter(att => att.isRealAttachment).map(att => att.name); + } + + let isConceptuallyNew, isRecordNew, insertFulltext; + if (curMsg === null) { + curMsg = this._datastore.createMessage(aMsgHdr.folder, + aMsgHdr.messageKey, + conversationID, + aMsgHdr.date, + aMsgHdr.messageId); + curMsg._conversation = conversation; + isConceptuallyNew = isRecordNew = insertFulltext = true; + } + else { + isRecordNew = false; + // the message is conceptually new if it was a ghost or dead. + isConceptuallyNew = curMsg._isGhost || curMsg._isDeleted; + // insert fulltext if it was a ghost + insertFulltext = curMsg._isGhost; + curMsg._folderID = this._datastore._mapFolder(aMsgHdr.folder).id; + curMsg._messageKey = aMsgHdr.messageKey; + curMsg.date = new Date(aMsgHdr.date / 1000); + // the message may have been deleted; tell it to make sure it's not. + curMsg._ensureNotDeleted(); + // note: we are assuming that our matching logic is flawless in that + // if this message was not a ghost, we are assuming the 'body' + // associated with the id is still exactly the same. It is conceivable + // that there are cases where this is not true. + } + + if (aMimeMsg) { + let bodyPlain = aMimeMsg.coerceBodyToPlaintext(aMsgHdr.folder); + if (bodyPlain) { + curMsg._bodyLines = bodyPlain.split(/\r?\n/); + // curMsg._content gets set by fundattr.js + } + } + + // Mark the message as new (for the purposes of fulltext insertion) + if (insertFulltext) + curMsg._isNew = true; + + curMsg._subject = aMsgHdr.mime2DecodedSubject; + curMsg._attachmentNames = attachmentNames; + + // curMsg._indexAuthor gets set by fundattr.js + // curMsg._indexRecipients gets set by fundattr.js + + // zero the notability so everything in grokNounItem can just increment + curMsg.notability = 0; + + yield aCallbackHandle.pushAndGo( + Gloda.grokNounItem(curMsg, + {header: aMsgHdr, mime: aMimeMsg, bodyLines: curMsg._bodyLines}, + isConceptuallyNew, isRecordNew, + aCallbackHandle)); + + delete curMsg._bodyLines; + delete curMsg._content; + delete curMsg._isNew; + delete curMsg._indexAuthor; + delete curMsg._indexRecipients; + + // we want to update the header for messages only after the transaction + // irrevocably hits the disk. otherwise we could get confused if the + // transaction rolls back or what not. + PendingCommitTracker.track(aMsgHdr, curMsg.id); + + yield this.kWorkDone; + }, + + /** + * Wipe a message out of existence from our index. This is slightly more + * tricky than one would first expect because there are potentially + * attributes not immediately associated with this message that reference + * the message. Not only that, but deletion of messages may leave a + * conversation posessing only ghost messages, which we don't want, so we + * need to nuke the moot conversation and its moot ghost messages. + * For now, we are actually punting on that trickiness, and the exact + * nuances aren't defined yet because we have not decided whether to store + * such attributes redundantly. For example, if we have subject-pred-object, + * we could actually store this as attributes (subject, id, object) and + * (object, id, subject). In such a case, we could query on (subject, *) + * and use the results to delete the (object, id, subject) case. If we + * don't redundantly store attributes, we can deal with the problem by + * collecting up all the attributes that accept a message as their object + * type and issuing a delete against that. For example, delete (*, [1,2,3], + * message id). + * (We are punting because we haven't implemented support for generating + * attributes like that yet.) + * + * @TODO: implement deletion of attributes that reference (deleted) messages + */ + _deleteMessage: function* gloda_index_deleteMessage(aMessage, + aCallbackHandle) { + let logDebug = this._log.level <= Log4Moz.Level.Debug; + if (logDebug) + this._log.debug("*** Deleting message: " + aMessage); + + // -- delete our attributes + // delete the message's attributes (if we implement the cascade delete, that + // could do the honors for us... right now we define the trigger in our + // schema but the back-end ignores it) + GlodaDatastore.clearMessageAttributes(aMessage); + + // -- delete our message or ghost us, and maybe nuke the whole conversation + // Look at the other messages in the conversation. + // (Note: although we are performing a lookup with no validity constraints + // and using the same object-relational-mapper-ish layer used by things + // that do have constraints, we are not at risk of exposing deleted + // messages to other code and getting it confused. The only way code + // can find a message is if it shows up in their queries or gets announced + // via GlodaCollectionManager.itemsAdded, neither of which will happen.) + let convPrivQuery = Gloda.newQuery(Gloda.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + convPrivQuery.conversation(aMessage.conversation); + let conversationCollection = convPrivQuery.getCollection(aCallbackHandle); + yield this.kWorkAsync; + + let conversationMsgs = conversationCollection.items; + + // Count the number of ghosts messages we see to determine if we are + // the last message alive. + let ghostCount = 0; + let twinMessageExists = false; + for (let convMsg of conversationMsgs) { + // ignore our own message + if (convMsg.id == aMessage.id) + continue; + + if (convMsg._isGhost) + ghostCount++; + // This message is our (living) twin if it is not a ghost, not deleted, + // and has the same message-id header. + else if (!convMsg._isDeleted && + convMsg.headerMessageID == aMessage.headerMessageID) + twinMessageExists = true; + } + + // -- If everyone else is a ghost, blow away the conversation. + // If there are messages still alive or deleted but we have not yet gotten + // to them yet _deleteMessage, then do not do this. (We will eventually + // hit this case if they are all deleted.) + if ((conversationMsgs.length - 1) == ghostCount) { + // - Obliterate each message + for (let msg of conversationMsgs) { + GlodaDatastore.deleteMessageByID(msg.id); + } + // - Obliterate the conversation + GlodaDatastore.deleteConversationByID(aMessage.conversationID); + // *no one* should hold a reference or use aMessage after this point, + // trash it so such ne'er do'wells are made plain. + aMessage._objectPurgedMakeYourselfUnpleasant(); + } + // -- Ghost or purge us as appropriate + else { + // Purge us if we have a (living) twin; no ghost required. + if (twinMessageExists) { + GlodaDatastore.deleteMessageByID(aMessage.id); + // *no one* should hold a reference or use aMessage after this point, + // trash it so such ne'er do'wells are made plain. + aMessage._objectPurgedMakeYourselfUnpleasant(); + } + // No twin, a ghost is required, we become the ghost. + else { + aMessage._ghost(); + GlodaDatastore.updateMessage(aMessage); + // ghosts don't have fulltext. purge it. + GlodaDatastore.deleteMessageTextByID(aMessage.id); + } + } + + yield this.kWorkDone; + }, +}; +GlodaIndexer.registerIndexer(GlodaMsgIndexer); |