/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ "use strict"; /* * This file currently contains a fairly general implementation of asynchronous * indexing with a very explicit message indexing implementation. As gloda * will eventually want to index more than just messages, the message-specific * things should ideally lose their special hold on this file. This will * benefit readability/size as well. */ this.EXPORTED_SYMBOLS = ['GlodaMsgIndexer']; var Cc = Components.classes; var Ci = Components.interfaces; var Cr = Components.results; var Cu = Components.utils; Cu.import("resource://gre/modules/XPCOMUtils.jsm"); Cu.import("resource:///modules/iteratorUtils.jsm"); Cu.import("resource:///modules/mailServices.js"); Cu.import("resource:///modules/MailUtils.js"); Cu.import("resource:///modules/gloda/log4moz.js"); Cu.import("resource:///modules/gloda/utils.js"); Cu.import("resource:///modules/gloda/datastore.js"); Cu.import("resource:///modules/gloda/datamodel.js"); Cu.import("resource:///modules/gloda/gloda.js"); Cu.import("resource:///modules/gloda/collection.js"); Cu.import("resource:///modules/gloda/connotent.js"); Cu.import("resource:///modules/gloda/indexer.js"); Cu.import("resource:///modules/gloda/mimemsg.js"); XPCOMUtils.defineLazyServiceGetter(this, "atomService", "@mozilla.org/atom-service;1", "nsIAtomService"); // Components.results does not have mailnews error codes! var NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE = 0x80550005; var GLODA_MESSAGE_ID_PROPERTY = "gloda-id"; /** * Message header property to track dirty status; one of * |GlodaIndexer.kMessageClean|, |GlodaIndexer.kMessageDirty|, * |GlodaIndexer.kMessageFilthy|. */ var GLODA_DIRTY_PROPERTY = "gloda-dirty"; /** * The sentinel GLODA_MESSAGE_ID_PROPERTY value indicating that a message fails * to index and we should not bother trying again, at least not until a new * release is made. * * This should ideally just flip between 1 and 2, with GLODA_OLD_BAD_MESSAGE_ID * flipping in the other direction. If we start having more trailing badness, * _indexerGetEnumerator and GLODA_OLD_BAD_MESSAGE_ID will need to be altered. * * When flipping this, be sure to update glodaTestHelper.js's copy. */ var GLODA_BAD_MESSAGE_ID = 2; /** * The gloda id we used to use to mark messages as bad, but now should be * treated as eligible for indexing. This is only ever used for consideration * when creating msg header enumerators with `_indexerGetEnumerator` which * means we only will re-index such messages in an indexing sweep. Accordingly * event-driven indexing will still treat such messages as unindexed (and * unindexable) until an indexing sweep picks them up. */ var GLODA_OLD_BAD_MESSAGE_ID = 1; var GLODA_FIRST_VALID_MESSAGE_ID = 32; var JUNK_SCORE_PROPERTY = "junkscore"; var JUNK_SPAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_SPAM_SCORE.toString(); var JUNK_HAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_HAM_SCORE.toString(); var nsIArray = Ci.nsIArray; var nsIMsgFolder = Ci.nsIMsgFolder; var nsIMsgLocalMailFolder = Ci.nsIMsgLocalMailFolder; var nsIMsgImapMailFolder = Ci.nsIMsgImapMailFolder; var nsIMsgDBHdr = Ci.nsIMsgDBHdr; var nsMsgFolderFlags = Ci.nsMsgFolderFlags; var nsMsgMessageFlags = Ci.nsMsgMessageFlags; var nsMsgProcessingFlags = Ci.nsMsgProcessingFlags; /** * The processing flags that tell us that a message header has not yet been * reported to us via msgsClassified. If it has one of these flags, it is * still being processed. */ var NOT_YET_REPORTED_PROCESSING_FLAGS = nsMsgProcessingFlags.NotReportedClassified | nsMsgProcessingFlags.ClassifyJunk; // for list comprehension fun function* range(begin, end) { for (let i = begin; i < end; ++i) { yield i; } } /** * We do not set properties on the messages until we perform a DB commit; this * helper class tracks messages that we have indexed but are not yet marked * as such on their header. */ var PendingCommitTracker = { /** * Maps message URIs to their gloda ids. * * I am not entirely sure why I chose the URI for the key rather than * gloda folder ID + message key. Most likely it was to simplify debugging * since the gloda folder ID is opaque while the URI is very informative. It * is also possible I was afraid of IMAP folder renaming triggering a UID * renumbering? */ _indexedMessagesPendingCommitByKey: {}, /** * Map from the pending commit gloda id to a tuple of [the corresponding * message header, dirtyState]. */ _indexedMessagesPendingCommitByGlodaId: {}, /** * Do we have a post-commit handler registered with this transaction yet? */ _pendingCommit: false, /** * The function gets called when the commit actually happens to flush our * message id's. * * It is very possible that by the time this call happens we have left the * folder and nulled out msgDatabase on the folder. Since nulling it out * is what causes the commit, if we set the headers here without somehow * forcing a commit, we will lose. Badly. * Accordingly, we make a list of all the folders that the headers belong to * as we iterate, make sure to re-attach their msgDatabase before forgetting * the headers, then make sure to zero the msgDatabase again, triggering a * commit. If there were a way to directly get the nsIMsgDatabase from the * header we could do that and call commit directly. We don't track * databases along with the headers since the headers can change because of * moves and that would increase the number of moving parts. */ _commitCallback: function PendingCommitTracker_commitCallback() { let foldersByURI = {}; let lastFolder = null; for (let glodaId in PendingCommitTracker._indexedMessagesPendingCommitByGlodaId) { let [msgHdr, dirtyState] = PendingCommitTracker._indexedMessagesPendingCommitByGlodaId[glodaId]; // Mark this message as indexed. // It's conceivable the database could have gotten blown away, in which // case the message headers are going to throw exceptions when we try // and touch them. So we wrap this in a try block that complains about // this unforeseen circumstance. (noteFolderDatabaseGettingBlownAway // should have been called and avoided this situation in all known // situations.) try { let curGlodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); if (curGlodaId != glodaId) msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, glodaId); let headerDirty = msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); if (headerDirty != dirtyState) msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, dirtyState); // Make sure this folder is in our foldersByURI map. if (lastFolder == msgHdr.folder) continue; lastFolder = msgHdr.folder; let folderURI = lastFolder.URI; if (!(folderURI in foldersByURI)) foldersByURI[folderURI] = lastFolder; } catch (ex) { GlodaMsgIndexer._log.error( "Exception while attempting to mark message with gloda state after" + "db commit", ex); } } // it is vitally important to do this before we forget about the headers! for (let uri in foldersByURI) { let folder = foldersByURI[uri]; // This will not cause a parse. The database is in-memory since we have // a header that belongs to it. This just causes the folder to // re-acquire a reference from the database manager. let ignoredDb = folder.msgDatabase; // And this will cause a commit. (And must be done since we don't want // to cause a leak.) folder.msgDatabase = null; } PendingCommitTracker._indexedMessagesPendingCommitByGlodaId = {}; PendingCommitTracker._indexedMessagesPendingCommitByKey = {}; PendingCommitTracker._pendingCommit = false; }, /** * Track a message header that should be marked with the given gloda id when * the database commits. */ track: function PendingCommitTracker_track(aMsgHdr, aGlodaId) { let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; this._indexedMessagesPendingCommitByKey[pendingKey] = aGlodaId; this._indexedMessagesPendingCommitByGlodaId[aGlodaId] = [aMsgHdr, GlodaMsgIndexer.kMessageClean]; if (!this._pendingCommit) { GlodaDatastore.runPostCommit(this._commitCallback); this._pendingCommit = true; } }, /** * Get the current state of a message header given that we cannot rely on just * looking at the header's properties because we defer setting those * until the SQLite commit happens. * * @return Tuple of [gloda id, dirty status]. */ getGlodaState: function PendingCommitTracker_getGlodaState(aMsgHdr) { // If it's in the pending commit table, then the message is basically // clean. Return that info. let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; if (pendingKey in this._indexedMessagesPendingCommitByKey) { let glodaId = PendingCommitTracker._indexedMessagesPendingCommitByKey[pendingKey]; return [glodaId, this._indexedMessagesPendingCommitByGlodaId[glodaId][1]]; } else { // Otherwise the header's concept of state is correct. let glodaId = aMsgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); let glodaDirty = aMsgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); return [glodaId, glodaDirty]; } }, /** * Update our structure to reflect moved headers. Moves are currently * treated as weakly interesting and do not require a reindexing * although collections will get notified. So our job is to to fix-up * the pending commit information if the message has a pending commit. */ noteMove: function PendingCommitTracker_noteMove(aOldHdr, aNewHdr) { let oldKey = aOldHdr.folder.URI + "#" + aOldHdr.messageKey; if (!(oldKey in this._indexedMessagesPendingCommitByKey)) return; let glodaId = this._indexedMessagesPendingCommitByKey[oldKey]; delete this._indexedMessagesPendingCommitByKey[oldKey]; let newKey = aNewHdr.folder.URI + "#" + aNewHdr.messageKey; this._indexedMessagesPendingCommitByKey[newKey] = glodaId; // only clobber the header, not the dirty state this._indexedMessagesPendingCommitByGlodaId[glodaId][0] = aNewHdr; }, /** * A blind move is one where we have the source header but not the destination * header. This happens for IMAP messages that do not involve offline fake * headers. * XXX Since IMAP moves will propagate the gloda-id/gloda-dirty bits for us, * we could detect the other side of the move when it shows up as a * msgsClassified event and restore the mapping information. Since the * offline fake header case should now cover the bulk of IMAP move * operations, we probably do not need to pursue this. * * We just re-dispatch to noteDirtyHeader because we can't do anything more * clever. */ noteBlindMove: function PendingCommitTracker_noteBlindMove(aOldHdr) { this.noteDirtyHeader(aOldHdr); }, /** * If a message is dirty we should stop tracking it for post-commit * purposes. This is not because we don't want to write to its header * when we commit as much as that we want to avoid |getHeaderGlodaState| * reporting that the message is clean. We could complicate our state * by storing that information, but this is easier and ends up the same * in the end. */ noteDirtyHeader: function PendingCommitTracker_noteDirtyHeader(aMsgHdr) { let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; if (!(pendingKey in this._indexedMessagesPendingCommitByKey)) return; // (It is important that we get the gloda id from our own structure!) let glodaId = this._indexedMessagesPendingCommitByKey[pendingKey]; this._indexedMessagesPendingCommitByGlodaId[glodaId][1] = GlodaMsgIndexer.kMessageDirty; }, /** * Sometimes a folder database gets blown away. This happens for one of two * expected reasons right now: * - Folder compaction. * - Explicit reindexing of a folder via the folder properties "rebuild index" * button. * * When this happens, we are basically out of luck and need to discard * everything about the folder. The good news is that the folder compaction * pass is clever enough to re-establish the linkages that are being lost * when we drop these things on the floor. Reindexing of a folder is not * clever enough to deal with this but is an exceptional case of last resort * (the user should not normally be performing a reindex as part of daily * operation), so we accept that messages may be redundantly indexed. */ noteFolderDatabaseGettingBlownAway: function PendingCommitTracker_noteFolderDatabaseGettingBlownAway( aMsgFolder) { let uri = aMsgFolder.URI + "#"; for (let key in Iterator(this._indexedMessagesPendingCommitByKey, true)) { // this is not as efficient as it could be, but compaction is relatively // rare and the number of pending headers is generally going to be // small. if (key.indexOf(uri) == 0) { delete this._indexedMessagesPendingCommitByKey[key]; } } }, }; /** * This callback handles processing the asynchronous query results of * |GlodaMsgIndexer.getMessagesByMessageID|. */ function MessagesByMessageIdCallback(aMsgIDToIndex, aResults, aCallback, aCallbackThis) { this.msgIDToIndex = aMsgIDToIndex; this.results = aResults; this.callback = aCallback; this.callbackThis = aCallbackThis; } MessagesByMessageIdCallback.prototype = { _log: Log4Moz.repository.getLogger("gloda.index_msg.mbm"), onItemsAdded: function gloda_ds_mbmi_onItemsAdded(aItems, aCollection) { // just outright bail if we are shutdown if (GlodaDatastore.datastoreIsShutdown) return; this._log.debug("getting results..."); for (let message of aItems) { this.results[this.msgIDToIndex[message.headerMessageID]].push(message); } }, onItemsModified: function () {}, onItemsRemoved: function () {}, onQueryCompleted: function gloda_ds_mbmi_onQueryCompleted(aCollection) { // just outright bail if we are shutdown if (GlodaDatastore.datastoreIsShutdown) return; if (this._log.level <= Log4Moz.Level.Debug) this._log.debug("query completed, notifying... " + this.results); this.callback.call(this.callbackThis, this.results); } }; /** * The message indexer! * * === Message Indexing Strategy * To these ends, we implement things like so: * * Mesage State Tracking * - We store a property on all indexed headers indicating their gloda message * id. This allows us to tell whether a message is indexed from the header, * without having to consult the SQL database. * - When we receive an event that indicates that a message's meta-data has * changed and gloda needs to re-index the message, we set a property on the * header that indicates the message is dirty. This property can indicate * that the message needs to be re-indexed but the gloda-id is valid (dirty) * or that the message's gloda-id is invalid (filthy) because the gloda * database has been blown away. * - We track whether a folder is up-to-date on our GlodaFolder representation * using a concept of dirtiness, just like messages. Like messages, a folder * can be dirty or filthy. A dirty folder has at least one dirty message in * it which means we should scan the folder. A filthy folder means that * every message in the folder should be considered filthy. Folders start * out filthy when Gloda is first told about them indicating we cannot * trust any of the gloda-id's in the folders. Filthy folders are downgraded * to dirty folders after we mark all of the headers with gloda-id's filthy. * * Indexing Message Control * - We index the headers of all IMAP messages. We index the bodies of all IMAP * messages that are offline. We index all local messages. We plan to avoid * indexing news messages. * - We would like a way to express desires about indexing that either don't * confound offline storage with indexing, or actually allow some choice. * * Indexing Messages * - We have two major modes of indexing: sweep and event-driven. When we * start up we kick off an indexing sweep. We use event-driven indexing * as we receive events for eligible messages, but if we get too many * events we start dropping them on the floor and just flag that an indexing * sweep is required. * - The sweep initiates folder indexing jobs based on the priorities assigned * to folders. Folder indexing uses a filtered message enumerator to find * messages that need to be indexed, minimizing wasteful exposure of message * headers to XPConnect that we would not end up indexing. * - For local folders, we use GetDatabaseWithReparse to ensure that the .msf * file exists. For IMAP folders, we simply use GetDatabase because we know * the auto-sync logic will make sure that the folder is up-to-date and we * want to avoid creating problems through use of updateFolder. * * Junk Mail * - We do not index junk. We do not index messages until the junk/non-junk * determination has been made. If a message gets marked as junk, we act like * it was deleted. * - We know when a message is actively queued for junk processing thanks to * folder processing flags. nsMsgDBFolder::CallFilterPlugins does this * prior to initiating spam processing. Unfortunately, this method does not * get called until after we receive the notification about the existence of * the header. How long after can vary on different factors. The longest * delay is in the IMAP case where there is a filter that requires the * message body to be present; the method does not get called until all the * bodies are downloaded. * */ var GlodaMsgIndexer = { /** * A partial attempt to generalize to support multiple databases. Each * database would have its own datastore would have its own indexer. But * we rather inter-mingle our use of this field with the singleton global * GlodaDatastore. */ _datastore: GlodaDatastore, _log: Log4Moz.repository.getLogger("gloda.index_msg"), _junkService: MailServices.junk, name: "index_msg", /** * Are we enabled, read: are we processing change events? */ _enabled: false, get enabled() { return this._enabled; }, enable: function msg_indexer_enable() { // initialize our listeners' this pointers this._databaseAnnouncerListener.indexer = this; this._msgFolderListener.indexer = this; // register for: // - folder loaded events, so we know when getDatabaseWithReparse has // finished updating the index/what not (if it was't immediately // available) // - property changes (so we know when a message's read/starred state have // changed.) this._folderListener._init(this); MailServices.mailSession.AddFolderListener(this._folderListener, Ci.nsIFolderListener.intPropertyChanged | Ci.nsIFolderListener.propertyFlagChanged | Ci.nsIFolderListener.event); MailServices.mfn.addListener(this._msgFolderListener, // note: intentionally no msgAdded notification is requested. Ci.nsIMsgFolderNotificationService.msgsClassified | Ci.nsIMsgFolderNotificationService.msgsDeleted | Ci.nsIMsgFolderNotificationService.msgsMoveCopyCompleted | Ci.nsIMsgFolderNotificationService.msgKeyChanged | Ci.nsIMsgFolderNotificationService.folderAdded | Ci.nsIMsgFolderNotificationService.folderDeleted | Ci.nsIMsgFolderNotificationService.folderMoveCopyCompleted | Ci.nsIMsgFolderNotificationService.folderRenamed | Ci.nsIMsgFolderNotificationService.itemEvent); this._enabled = true; this._considerSchemaMigration(); this._log.info("Event-Driven Indexing is now " + this._enabled); }, disable: function msg_indexer_disable() { // remove FolderLoaded notification listener MailServices.mailSession.RemoveFolderListener(this._folderListener); MailServices.mfn.removeListener(this._msgFolderListener); this._indexerLeaveFolder(); // nop if we aren't "in" a folder this._enabled = false; this._log.info("Event-Driven Indexing is now " + this._enabled); }, /** * Indicates that we have pending deletions to process, meaning that there * are gloda message rows flagged for deletion. If this value is a boolean, * it means the value is known reliably. If this value is null, it means * that we don't know, likely because we have started up and have not checked * the database. */ pendingDeletions: null, /** * The message (or folder state) is believed up-to-date. */ kMessageClean: 0, /** * The message (or folder) is known to not be up-to-date. In the case of * folders, this means that some of the messages in the folder may be dirty. * However, because of the way our indexing works, it is possible there may * actually be no dirty messages in a folder. (We attempt to process * messages in an event-driven fashion for a finite number of messages, but * because we can quit without completing processing of the queue, we need to * mark the folder dirty, just-in-case.) (We could do some extra leg-work * and do a better job of marking the folder clean again.) */ kMessageDirty: 1, /** * We have not indexed the folder at all, but messages in the folder think * they are indexed. We downgrade the folder to just kMessageDirty after * marking all the messages in the folder as dirty. We do this so that if we * have to stop indexing the folder we can still build on our progress next * time we enter the folder. * We mark all folders filthy when (re-)creating the database because there * may be previous state left over from an earlier database. */ kMessageFilthy: 2, /** * A message addition job yet to be (completely) processed. Since message * addition events come to us one-by-one, in order to aggregate them into a * job, we need something like this. It's up to the indexing loop to * decide when to null this out; it can either do it when it first starts * processing it, or when it has processed the last thing. It's really a * question of whether we want retrograde motion in the folder progress bar * or the message progress bar. */ _pendingAddJob: null, /** * The number of messages that we should queue for processing before letting * them fall on the floor and relying on our folder-walking logic to ensure * that the messages are indexed. * The reason we allow for queueing messages in an event-driven fashion is * that once we have reached a steady-state, it is preferable to be able to * deal with new messages and modified meta-data in a prompt fasion rather * than having to (potentially) walk every folder in the system just to find * the message that the user changed the tag on. */ _indexMaxEventQueueMessages: 20, /** * Unit testing hook to get us to emit additional logging that verges on * inane for general usage but is helpful in unit test output to get a lay * of the land and for paranoia reasons. */ _unitTestSuperVerbose: false, /** The GlodaFolder corresponding to the folder we are indexing. */ _indexingGlodaFolder: null, /** The nsIMsgFolder we are currently indexing. */ _indexingFolder: null, /** The nsIMsgDatabase we are currently indexing. */ _indexingDatabase: null, /** * The iterator we are using to iterate over the headers in * this._indexingDatabase. */ _indexingIterator: null, /** folder whose entry we are pending on */ _pendingFolderEntry: null, // copy-down the work constants from Gloda kWorkSync: Gloda.kWorkSync, kWorkAsync: Gloda.kWorkAsync, kWorkDone: Gloda.kWorkDone, kWorkPause: Gloda.kWorkPause, kWorkDoneWithResult: Gloda.kWorkDoneWithResult, /** * Async common logic that we want to deal with the given folder ID. Besides * cutting down on duplicate code, this ensures that we are listening on * the folder in case it tries to go away when we are using it. * * @return true when the folder was successfully entered, false when we need * to pend on notification of updating of the folder (due to re-parsing * or what have you). In the event of an actual problem, an exception * will escape. */ _indexerEnterFolder: function gloda_index_indexerEnterFolder(aFolderID) { // leave the folder if we haven't explicitly left it. if (this._indexingFolder !== null) { this._indexerLeaveFolder(); } this._indexingGlodaFolder = GlodaDatastore._mapFolderID(aFolderID); this._indexingFolder = this._indexingGlodaFolder.getXPCOMFolder( this._indexingGlodaFolder.kActivityIndexing); if (this._indexingFolder) this._log.debug("Entering folder: " + this._indexingFolder.URI); try { // The msf may need to be created or otherwise updated for local folders. // This may require yielding until such time as the msf has been created. try { if (this._indexingFolder instanceof nsIMsgLocalMailFolder) { this._indexingDatabase = this._indexingFolder.getDatabaseWithReparse(null, null); } // we need do nothing special for IMAP, news, or other } // getDatabaseWithReparse can return either NS_ERROR_NOT_INITIALIZED or // NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE if the net result is that it // is going to send us a notification when the reparse has completed. // (note that although internally NS_MSG_ERROR_FOLDER_SUMMARY_MISSING // might get flung around, it won't make it out to us, and will instead // be permuted into an NS_ERROR_NOT_INITIALIZED.) catch (e) { if ((e.result == Cr.NS_ERROR_NOT_INITIALIZED) || (e.result == NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE)) { // this means that we need to pend on the update; the listener for // FolderLoaded events will call _indexerCompletePendingFolderEntry. this._log.debug("Pending on folder load..."); this._pendingFolderEntry = this._indexingFolder; return this.kWorkAsync; } else { throw e; } } // we get an nsIMsgDatabase out of this (unsurprisingly) which // explicitly inherits from nsIDBChangeAnnouncer, which has the // AddListener call we want. if (this._indexingDatabase == null) this._indexingDatabase = this._indexingFolder.msgDatabase; this._indexingDatabase.AddListener(this._databaseAnnouncerListener); } catch (ex) { this._log.error("Problem entering folder: " + (this._indexingFolder ? this._indexingFolder.prettiestName : "unknown") + ", skipping. Error was: " + ex.fileName + ":" + ex.lineNumber + ": " + ex); this._indexingGlodaFolder.indexing = false; this._indexingFolder = null; this._indexingGlodaFolder = null; this._indexingDatabase = null; this._indexingEnumerator = null; // re-throw, we just wanted to make sure this junk is cleaned up and // get localized error logging... throw ex; } return this.kWorkSync; }, /** * If the folder was still parsing/updating when we tried to enter, then this * handler will get called by the listener who got the FolderLoaded message. * All we need to do is get the database reference, register a listener on * the db, and retrieve an iterator if desired. */ _indexerCompletePendingFolderEntry: function gloda_indexer_indexerCompletePendingFolderEntry() { this._indexingDatabase = this._indexingFolder.msgDatabase; this._indexingDatabase.AddListener(this._databaseAnnouncerListener); this._log.debug("...Folder Loaded!"); // the load is no longer pending; we certainly don't want more notifications this._pendingFolderEntry = null; // indexerEnterFolder returned kWorkAsync, which means we need to notify // the callback driver to get things going again. GlodaIndexer.callbackDriver(); }, /** * Enumerate all messages in the folder. */ kEnumAllMsgs: 0, /** * Enumerate messages that look like they need to be indexed. */ kEnumMsgsToIndex: 1, /** * Enumerate messages that are already indexed. */ kEnumIndexedMsgs: 2, /** * Synchronous helper to get an enumerator for the current folder (as found * in |_indexingFolder|. * * @param aEnumKind One of |kEnumAllMsgs|, |kEnumMsgsToIndex|, or * |kEnumIndexedMsgs|. * @param [aAllowPreBadIds=false] Only valid for |kEnumIndexedMsgs|, tells us * that we should treat message with any gloda-id as dirty, not just * messages that have non-bad message id's. */ _indexerGetEnumerator: function gloda_indexer_indexerGetEnumerator( aEnumKind, aAllowPreBadIds) { if (aEnumKind == this.kEnumMsgsToIndex) { // We need to create search terms for messages to index. Messages should // be indexed if they're indexable (local or offline and not expunged) // and either: haven't been indexed, are dirty, or are marked with with // a former GLODA_BAD_MESSAGE_ID that is no longer our bad marker. (Our // bad marker can change on minor schema revs so that we can try and // reindex those messages exactly once and without needing to go through // a pass to mark them as needing one more try.) // The basic search expression is: // ((GLODA_MESSAGE_ID_PROPERTY Is 0) || // (GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID) || // (GLODA_DIRTY_PROPERTY Isnt 0)) && // (JUNK_SCORE_PROPERTY Isnt 100) // If the folder !isLocal we add the terms: // - if the folder is offline -- && (Status Is nsMsgMessageFlags.Offline) // - && (Status Isnt nsMsgMessageFlags.Expunged) let searchSession = Cc["@mozilla.org/messenger/searchSession;1"] .createInstance(Ci.nsIMsgSearchSession); let searchTerms = Cc["@mozilla.org/array;1"] .createInstance(Ci.nsIMutableArray); let isLocal = this._indexingFolder instanceof nsIMsgLocalMailFolder; searchSession.addScopeTerm(Ci.nsMsgSearchScope.offlineMail, this._indexingFolder); let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; let nsMsgSearchOp = Ci.nsMsgSearchOp; // first term: (GLODA_MESSAGE_ID_PROPERTY Is 0 let searchTerm = searchSession.createTerm(); searchTerm.booleanAnd = false; // actually don't care here searchTerm.beginsGrouping = true; searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; searchTerm.op = nsMsgSearchOp.Is; let value = searchTerm.value; value.attrib = searchTerm.attrib; value.status = 0; searchTerm.value = value; searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; searchTerms.appendElement(searchTerm, false); // second term: || GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID searchTerm = searchSession.createTerm(); searchTerm.booleanAnd = false; // OR searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; searchTerm.op = nsMsgSearchOp.Is; value = searchTerm.value; value.attrib = searchTerm.attrib; value.status = GLODA_OLD_BAD_MESSAGE_ID; searchTerm.value = value; searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; searchTerms.appendElement(searchTerm, false); // third term: || GLODA_DIRTY_PROPERTY Isnt 0 ) searchTerm = searchSession.createTerm(); searchTerm.booleanAnd = false; searchTerm.endsGrouping = true; searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; searchTerm.op = nsMsgSearchOp.Isnt; value = searchTerm.value; value.attrib = searchTerm.attrib; value.status = 0; searchTerm.value = value; searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; searchTerms.appendElement(searchTerm, false); // JUNK_SCORE_PROPERTY Isnt 100 // For symmetry with our event-driven stuff, we just directly deal with // the header property. searchTerm = searchSession.createTerm(); searchTerm.booleanAnd = true; searchTerm.attrib = nsMsgSearchAttrib.HdrProperty; searchTerm.op = nsMsgSearchOp.Isnt; value = searchTerm.value; value.attrib = searchTerm.attrib; value.str = JUNK_SPAM_SCORE_STR; searchTerm.value = value; searchTerm.hdrProperty = JUNK_SCORE_PROPERTY; searchTerms.appendElement(searchTerm, false); if (!isLocal) { // If the folder is offline, then the message should be too if (this._indexingFolder.flags & Ci.nsMsgFolderFlags.Offline) { // third term: && Status Is nsMsgMessageFlags.Offline searchTerm = searchSession.createTerm(); searchTerm.booleanAnd = true; searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; searchTerm.op = nsMsgSearchOp.Is; value = searchTerm.value; value.attrib = searchTerm.attrib; value.status = nsMsgMessageFlags.Offline; searchTerm.value = value; searchTerms.appendElement(searchTerm, false); } // fourth term: && Status Isnt nsMsgMessageFlags.Expunged searchTerm = searchSession.createTerm(); searchTerm.booleanAnd = true; searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; searchTerm.op = nsMsgSearchOp.Isnt; value = searchTerm.value; value.attrib = searchTerm.attrib; value.status = nsMsgMessageFlags.Expunged; searchTerm.value = value; searchTerms.appendElement(searchTerm, false); } this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator(searchTerms, true); } else if (aEnumKind == this.kEnumIndexedMsgs) { // Enumerate only messages that are already indexed. This comes out to: // ((GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1) && // (GLODA_DIRTY_PROPERTY Isnt kMessageFilthy)) // In English, a message is indexed if (by clause): // 1) The message has a gloda-id and that gloda-id is in the valid range // (and not in the bad message marker range). // 2) The message has not been marked filthy (which invalidates the // gloda-id.) We also assume that the folder would not have been // entered at all if it was marked filthy. let searchSession = Cc["@mozilla.org/messenger/searchSession;1"] .createInstance(Ci.nsIMsgSearchSession); let searchTerms = Cc["@mozilla.org/array;1"] .createInstance(Ci.nsIMutableArray); searchSession.addScopeTerm(Ci.nsMsgSearchScope.offlineMail, this._indexingFolder); let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; let nsMsgSearchOp = Ci.nsMsgSearchOp; // first term: (GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1 let searchTerm = searchSession.createTerm(); searchTerm.booleanAnd = false; // actually don't care here searchTerm.beginsGrouping = true; searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; // use != 0 if we're allow pre-bad ids. searchTerm.op = aAllowPreBadIds ? nsMsgSearchOp.Isnt : nsMsgSearchOp.IsGreaterThan; let value = searchTerm.value; value.attrib = searchTerm.attrib; value.status = aAllowPreBadIds ? 0 : (GLODA_FIRST_VALID_MESSAGE_ID - 1); searchTerm.value = value; searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; searchTerms.appendElement(searchTerm, false); // second term: && GLODA_DIRTY_PROPERTY Isnt kMessageFilthy) searchTerm = searchSession.createTerm(); searchTerm.booleanAnd = true; searchTerm.endsGrouping = true; searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; searchTerm.op = nsMsgSearchOp.Isnt; value = searchTerm.value; value.attrib = searchTerm.attrib; value.status = this.kMessageFilthy; searchTerm.value = value; searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; searchTerms.appendElement(searchTerm, false); // The use-case of already indexed messages does not want them reversed; // we care about seeing the message keys in order. this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator(searchTerms, false); } else if (aEnumKind == this.kEnumAllMsgs) { this._indexingEnumerator = this._indexingDatabase.ReverseEnumerateMessages(); } else { throw new Error("Unknown enumerator type requested:" + aEnumKind); } }, _indexerLeaveFolder: function gloda_index_indexerLeaveFolder() { if (this._indexingFolder !== null) { if (this._indexingDatabase) { this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit); // remove our listener! this._indexingDatabase.RemoveListener(this._databaseAnnouncerListener); } // let the gloda folder know we are done indexing this._indexingGlodaFolder.indexing = false; // null everyone out this._indexingFolder = null; this._indexingGlodaFolder = null; this._indexingDatabase = null; this._indexingEnumerator = null; } }, /** * Event fed to us by our nsIFolderListener when a folder is loaded. We use * this event to know when a folder we were trying to open to index is * actually ready to be indexed. (The summary may have not existed, may have * been out of date, or otherwise.) * * @param aFolder An nsIMsgFolder, already QI'd. */ _onFolderLoaded: function gloda_index_onFolderLoaded(aFolder) { if ((this._pendingFolderEntry !== null) && (aFolder.URI == this._pendingFolderEntry.URI)) this._indexerCompletePendingFolderEntry(); }, // it's a getter so we can reference 'this'. we could memoize. get workers() { return [ ["folderSweep", { worker: this._worker_indexingSweep, jobCanceled: this._cleanup_indexingSweep, cleanup: this._cleanup_indexingSweep, }], ["folder", { worker: this._worker_folderIndex, recover: this._recover_indexMessage, cleanup: this._cleanup_indexing, }], ["folderCompact", { worker: this._worker_folderCompactionPass, // compaction enters the folder so needs to know how to leave cleanup: this._cleanup_indexing, }], ["message", { worker: this._worker_messageIndex, onSchedule: this._schedule_messageIndex, jobCanceled: this._canceled_messageIndex, recover: this._recover_indexMessage, cleanup: this._cleanup_indexing, }], ["delete", { worker: this._worker_processDeletes, }], ["fixMissingContacts", { worker: this._worker_fixMissingContacts, }], ]; }, _schemaMigrationInitiated: false, _considerSchemaMigration: function() { if (!this._schemaMigrationInitiated && GlodaDatastore._actualSchemaVersion === 26) { let job = new IndexingJob("fixMissingContacts", null); GlodaIndexer.indexJob(job); this._schemaMigrationInitiated = true; } }, initialSweep: function() { this.indexingSweepNeeded = true; }, _indexingSweepActive: false, /** * Indicate that an indexing sweep is desired. We kick-off an indexing * sweep at start-up and whenever we receive an event-based notification * that we either can't process as an event or that we normally handle * during the sweep pass anyways. */ set indexingSweepNeeded(aNeeded) { if (!this._indexingSweepActive && aNeeded) { let job = new IndexingJob("folderSweep", null); job.mappedFolders = false; GlodaIndexer.indexJob(job); this._indexingSweepActive = true; } }, /** * Performs the folder sweep, locating folders that should be indexed, and * creating a folder indexing job for them, and rescheduling itself for * execution after that job is completed. Once it indexes all the folders, * if we believe we have deletions to process (or just don't know), it kicks * off a deletion processing job. * * Folder traversal logic is based off the spotlight/vista indexer code; we * retrieve the list of servers and folders each time want to find a new * folder to index. This avoids needing to maintain a perfect model of the * folder hierarchy at all times. (We may eventually want to do that, but * this is sufficient and safe for now.) Although our use of dirty flags on * the folders allows us to avoid tracking the 'last folder' we processed, * we do so to avoid getting 'trapped' in a folder with a high rate of * changes. */ _worker_indexingSweep: function* gloda_worker_indexingSweep(aJob) { if (!aJob.mappedFolders) { // Walk the folders and make sure all the folders we would want to index // are mapped. Build up a list of GlodaFolders as we go, so that we can // sort them by their indexing priority. let foldersToProcess = aJob.foldersToProcess = []; let allFolders = MailServices.accounts.allFolders; for (let folder in fixIterator(allFolders, Ci.nsIMsgFolder)) { if (this.shouldIndexFolder(folder)) foldersToProcess.push(Gloda.getFolderForFolder(folder)); } // sort the folders by priority (descending) foldersToProcess.sort(function (a, b) { return b.indexingPriority - a.indexingPriority; }); aJob.mappedFolders = true; } // -- process the folders (in sorted order) while (aJob.foldersToProcess.length) { let glodaFolder = aJob.foldersToProcess.shift(); // ignore folders that: // - have been deleted out of existence! // - are not dirty/have not been compacted // - are actively being compacted if (glodaFolder._deleted || (!glodaFolder.dirtyStatus && !glodaFolder.compacted) || glodaFolder.compacting) continue; // If the folder is marked as compacted, give it a compaction job. if (glodaFolder.compacted) GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id)); // add a job for the folder indexing if it was dirty if (glodaFolder.dirtyStatus) GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); // re-schedule this job (although this worker will die) GlodaIndexer.indexJob(aJob); yield this.kWorkDone; } // consider deletion if (this.pendingDeletions || this.pendingDeletions === null) GlodaIndexer.indexJob(new IndexingJob("delete", null)); // we don't have any more work to do... this._indexingSweepActive = false; yield this.kWorkDone; }, /** * The only state we need to cleanup is that there is no longer an active * indexing sweep. */ _cleanup_indexingSweep: function gloda_canceled_indexingSweep(aJob) { this._indexingSweepActive = false; }, /** * The number of headers to look at before yielding with kWorkSync. This * is for time-slicing purposes so we still yield to the UI periodically. */ HEADER_CHECK_SYNC_BLOCK_SIZE: 25, /** * The number of headers to look at before calling */ HEADER_CHECK_GC_BLOCK_SIZE: 256, FOLDER_COMPACTION_PASS_BATCH_SIZE: 512, /** * Special indexing pass for (local) folders than have been compacted. The * compaction can cause message keys to change because message keys in local * folders are simply offsets into the mbox file. Accordingly, we need to * update the gloda records/objects to point them at the new message key. * * Our general algorithm is to perform two traversals in parallel. The first * is a straightforward enumeration of the message headers in the folder that * apparently have been already indexed. These provide us with the message * key and the "gloda-id" property. * The second is a list of tuples containing a gloda message id, its current * message key per the gloda database, and the message-id header. We re-fill * the list with batches on-demand. This allows us to both avoid dispatching * needless UPDATEs as well as deal with messages that were tracked by the * PendingCommitTracker but were discarded by the compaction notification. * * We end up processing two streams of gloda-id's and some extra info. In * the normal case we expect these two streams to line up exactly and all * we need to do is update the message key if it has changed. * * There are a few exceptional cases where things do not line up: * 1) The gloda database knows about a message that the enumerator does not * know about... * a) This message exists in the folder (identified using its message-id * header). This means the message got indexed but PendingCommitTracker * had to forget about the info when the compaction happened. We * re-establish the link and track the message in PendingCommitTracker * again. * b) The message does not exist in the folder. This means the message got * indexed, PendingCommitTracker had to forget about the info, and * then the message either got moved or deleted before now. We mark * the message as deleted; this allows the gloda message to be reused * if the move target has not yet been indexed or purged if it already * has been and the gloda message is a duplicate. And obviously, if the * event that happened was actually a delete, then the delete is the * right thing to do. * 2) The enumerator knows about a message that the gloda database does not * know about. This is unexpected and should not happen. We log a * warning. We are able to differentiate this case from case #1a by * retrieving the message header associated with the next gloda message * (using the message-id header per 1a again). If the gloda message's * message key is after the enumerator's message key then we know this is * case #2. (It implies an insertion in the enumerator stream which is how * we define the unexpected case.) * * Besides updating the database rows, we also need to make sure that * in-memory representations are updated. Immediately after dispatching * UPDATE changes to the database we use the same set of data to walk the * live collections and update any affected messages. We are then able to * discard the information. Although this means that we will have to * potentially walk the live collections multiple times, unless something * has gone horribly wrong, the number of collections should be reasonable * and the lookups are cheap. We bias batch sizes accordingly. * * Because we operate based on chunks we need to make sure that when we * actually deal with multiple chunks that we don't step on our own feet with * our database updates. Since compaction of message key K results in a new * message key K' such that K' <= K, we can reliably issue database * updates for all values <= K. Which means our feet are safe no matter * when we issue the update command. For maximum cache benefit, we issue * our updates prior to our new query since they should still be maximally * hot at that point. */ _worker_folderCompactionPass: function* gloda_worker_folderCompactionPass(aJob, aCallbackHandle) { yield this._indexerEnterFolder(aJob.id); // It's conceivable that with a folder sweep we might end up trying to // compact a folder twice. Bail early in this case. if (!this._indexingGlodaFolder.compacted) yield this.kWorkDone; // this is a forward enumeration (sometimes we reverse enumerate; not here) this._indexerGetEnumerator(this.kEnumIndexedMsgs); const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; const HEADER_CHECK_GC_BLOCK_SIZE = this.HEADER_CHECK_GC_BLOCK_SIZE; const FOLDER_COMPACTION_PASS_BATCH_SIZE = this.FOLDER_COMPACTION_PASS_BATCH_SIZE; // Tuples of [gloda id, message key, message-id header] from // folderCompactionPassBlockFetch let glodaIdsMsgKeysHeaderIds = []; // Unpack each tuple from glodaIdsMsgKeysHeaderIds into these guys. // (Initialize oldMessageKey because we use it to kickstart our query.) let oldGlodaId, oldMessageKey = -1, oldHeaderMessageId; // parallel lists of gloda ids and message keys to pass to // GlodaDatastore.updateMessageLocations let updateGlodaIds = []; let updateMessageKeys = []; // list of gloda id's to mark deleted let deleteGlodaIds = []; let exceptionalMessages = {}; // for GC reasons we need to track the number of headers seen let numHeadersSeen = 0; // We are consuming two lists; our loop structure has to reflect that. let headerIter = Iterator(fixIterator(this._indexingEnumerator, nsIMsgDBHdr)); let mayHaveMoreGlodaMessages = true; let keepIterHeader = false; let keepGlodaTuple = false; let msgHdr = null; while (headerIter || mayHaveMoreGlodaMessages) { let glodaId; if (headerIter) { try { if (!keepIterHeader) msgHdr = headerIter.next(); else keepIterHeader = false; } catch (ex) { if (ex instanceof StopIteration) { headerIter = null; msgHdr = null; // do the loop check again continue; } else { throw ex; } } } if (msgHdr) { numHeadersSeen++; if (numHeadersSeen % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) yield this.kWorkSync; if (numHeadersSeen % HEADER_CHECK_GC_BLOCK_SIZE == 0) GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); // There is no need to check with PendingCommitTracker. If a message // somehow got indexed between the time the compaction killed // everything and the time we run, that is a bug. glodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); // (there is also no need to check for gloda dirty since the enumerator // filtered that for us.) } // get more [gloda id, message key, message-id header] tuples if out if (!glodaIdsMsgKeysHeaderIds.length && mayHaveMoreGlodaMessages) { // Since we operate on blocks, getting a new block implies we should // flush the last block if applicable. if (updateGlodaIds.length) { GlodaDatastore.updateMessageLocations(updateGlodaIds, updateMessageKeys, aJob.id, true); updateGlodaIds = []; updateMessageKeys = []; } if (deleteGlodaIds.length) { GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); deleteGlodaIds = []; } GlodaDatastore.folderCompactionPassBlockFetch( aJob.id, oldMessageKey + 1, FOLDER_COMPACTION_PASS_BATCH_SIZE, aCallbackHandle.wrappedCallback); glodaIdsMsgKeysHeaderIds = yield this.kWorkAsync; // Reverse so we can use pop instead of shift and I don't need to be // paranoid about performance. glodaIdsMsgKeysHeaderIds.reverse(); if (!glodaIdsMsgKeysHeaderIds.length) { mayHaveMoreGlodaMessages = false; // We shouldn't be in the loop anymore if headerIter is dead now. if (!headerIter) break; } } if (!keepGlodaTuple) { if (mayHaveMoreGlodaMessages) [oldGlodaId, oldMessageKey, oldHeaderMessageId] = glodaIdsMsgKeysHeaderIds.pop(); else oldGlodaId = oldMessageKey = oldHeaderMessageId = null; } else { keepGlodaTuple = false; } // -- normal expected case if (glodaId == oldGlodaId) { // only need to do something if the key is not right if (msgHdr.messageKey != oldMessageKey) { updateGlodaIds.push(glodaId); updateMessageKeys.push(msgHdr.messageKey); } } // -- exceptional cases else { // This should always return a value unless something is very wrong. // We do not want to catch the exception if one happens. let idBasedHeader = oldHeaderMessageId ? this._indexingDatabase.getMsgHdrForMessageID(oldHeaderMessageId) : false; // - Case 1b. // We want to mark the message as deleted. if (idBasedHeader == null) { deleteGlodaIds.push(oldGlodaId); } // - Case 1a // The expected case is that the message referenced by the gloda // database precedes the header the enumerator told us about. This // is expected because if PendingCommitTracker did not mark the // message as indexed/clean then the enumerator would not tell us // about it. // Also, if we ran out of headers from the enumerator, this is a dead // giveaway that this is the expected case. else if (idBasedHeader && ((msgHdr && idBasedHeader.messageKey < msgHdr.messageKey) || !msgHdr)) { // tell the pending commit tracker about the gloda database one PendingCommitTracker.track(idBasedHeader, oldGlodaId); // and we might need to update the message key too if (idBasedHeader.messageKey != oldMessageKey) { updateGlodaIds.push(oldGlodaId); updateMessageKeys.push(idBasedHeader.messageKey); } // Take another pass through the loop so that we check the // enumerator header against the next message in the gloda // database. keepIterHeader = true; } // - Case 2 // Whereas if the message referenced by gloda has a message key // greater than the one returned by the enumerator, then we have a // header claiming to be indexed by gloda that gloda does not // actually know about. This is exceptional and gets a warning. else if (msgHdr) { this._log.warn("Observed header that claims to be gloda indexed " + "but that gloda has never heard of during " + "compaction." + " In folder: " + msgHdr.folder.URI + " sketchy key: " + msgHdr.messageKey + " subject: " + msgHdr.mime2DecodedSubject); // Keep this tuple around for the next enumerator provided header keepGlodaTuple = true; } } } // If we don't flush the update, no one will! if (updateGlodaIds.length) GlodaDatastore.updateMessageLocations(updateGlodaIds, updateMessageKeys, aJob.id, true); if (deleteGlodaIds.length) GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); this._indexingGlodaFolder._setCompactedState(false); this._indexerLeaveFolder(); yield this.kWorkDone; }, /** * Index the contents of a folder. */ _worker_folderIndex: function* gloda_worker_folderIndex(aJob, aCallbackHandle) { let logDebug = this._log.level <= Log4Moz.Level.Debug; yield this._indexerEnterFolder(aJob.id); if (!this.shouldIndexFolder(this._indexingFolder)) { aJob.safelyInvokeCallback(true); yield this.kWorkDone; } // Make sure listeners get notified about this job. GlodaIndexer._notifyListeners(); // there is of course a cost to all this header investigation even if we // don't do something. so we will yield with kWorkSync for every block. const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; const HEADER_CHECK_GC_BLOCK_SIZE = this.HEADER_CHECK_GC_BLOCK_SIZE; // we can safely presume if we are here that this folder has been selected // for offline processing... // -- Filthy Folder // A filthy folder may have misleading properties on the message that claim // the message is indexed. They are misleading because the database, for // whatever reason, does not have the messages (accurately) indexed. // We need to walk all the messages and mark them filthy if they have a // dirty property. Once we have done this, we can downgrade the folder's // dirty status to plain dirty. We do this rather than trying to process // everyone in one go in a filthy context because if we have to terminate // indexing before we quit, we don't want to have to re-index messages next // time. (This could even lead to never completing indexing in a // pathological situation.) let glodaFolder = GlodaDatastore._mapFolder(this._indexingFolder); if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) { this._indexerGetEnumerator(this.kEnumIndexedMsgs, true); let count = 0; for (let msgHdr in fixIterator(this._indexingEnumerator, nsIMsgDBHdr)) { // we still need to avoid locking up the UI, pause periodically... if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) yield this.kWorkSync; if (count % HEADER_CHECK_GC_BLOCK_SIZE == 0) GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); let glodaMessageId = msgHdr.getUint32Property( GLODA_MESSAGE_ID_PROPERTY); // if it has a gloda message id, we need to mark it filthy if (glodaMessageId != 0) msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageFilthy); // if it doesn't have a gloda message id, we will definitely index it, // so no action is required. } // Commit the filthy status changes to the message database. this._indexingDatabase.Commit(Ci.nsMsgDBCommitType.kLargeCommit); // this will automatically persist to the database glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderDirty); } // Figure out whether we're supposed to index _everything_ or just what // has not yet been indexed. let force = ("force" in aJob) && aJob.force; let enumeratorType = force ? this.kEnumAllMsgs : this.kEnumMsgsToIndex; // Pass 1: count the number of messages to index. // We do this in order to be able to report to the user what we're doing. // TODO: give up after reaching a certain number of messages in folders // with ridiculous numbers of messages and make the interface just say // something like "over N messages to go." this._indexerGetEnumerator(enumeratorType); let numMessagesToIndex = 0; let numMessagesOut = {}; // Keep going until we run out of headers. while (this._indexingFolder.msgDatabase.nextMatchingHdrs( this._indexingEnumerator, HEADER_CHECK_SYNC_BLOCK_SIZE * 8, // this way is faster, do more 0, // moot, we don't return headers null, // don't return headers, we just want the count numMessagesOut)) { numMessagesToIndex += numMessagesOut.value; yield this.kWorkSync; } numMessagesToIndex += numMessagesOut.value; aJob.goal = numMessagesToIndex; if (numMessagesToIndex > 0) { // We used up the iterator, get a new one. this._indexerGetEnumerator(enumeratorType); // Pass 2: index the messages. let count = 0; for (let msgHdr in fixIterator(this._indexingEnumerator, nsIMsgDBHdr)) { // per above, we want to periodically release control while doing all // this header traversal/investigation. if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) yield this.kWorkSync; if (count % HEADER_CHECK_GC_BLOCK_SIZE == 0) GlodaUtils.considerHeaderBasedGC(HEADER_CHECK_GC_BLOCK_SIZE); // To keep our counts more accurate, increment the offset before // potentially skipping any messages. ++aJob.offset; // Skip messages that have not yet been reported to us as existing via // msgsClassified. if (this._indexingFolder.getProcessingFlags(msgHdr.messageKey) & NOT_YET_REPORTED_PROCESSING_FLAGS) continue; // Because the gloda id could be in-flight, we need to double-check the // enumerator here since it can't know about our in-memory stuff. let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); // if the message seems valid and we are not forcing indexing, skip it. // (that means good gloda id and not dirty) if (!force && glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && glodaDirty == this.kMessageClean) continue; if (logDebug) this._log.debug(">>> calling _indexMessage"); yield aCallbackHandle.pushAndGo( this._indexMessage(msgHdr, aCallbackHandle), {what: "indexMessage", msgHdr: msgHdr}); GlodaIndexer._indexedMessageCount++; if (logDebug) this._log.debug("<<< back from _indexMessage"); } } // This will trigger an (async) db update which cannot hit the disk prior to // the actual database records that constitute the clean state. // XXX There is the slight possibility that, in the event of a crash, this // will hit the disk but the gloda-id properties on the headers will not // get set. This should ideally be resolved by detecting a non-clean // shutdown and marking all folders as dirty. glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderClean); // by definition, it's not likely we'll visit this folder again anytime soon this._indexerLeaveFolder(); aJob.safelyInvokeCallback(true); yield this.kWorkDone; }, /** * Invoked when a "message" job is scheduled so that we can clear * _pendingAddJob if that is the job. We do this so that work items are not * added to _pendingAddJob while it is being processed. */ _schedule_messageIndex: function(aJob, aCallbackHandle) { // we do not want new work items to be added as we are processing, so // clear _pendingAddJob. A new job will be created as needed. if (aJob === this._pendingAddJob) this._pendingAddJob = null; // update our goal from the items length aJob.goal = aJob.items.length; }, /** * If the job gets canceled, we need to make sure that we clear out pending * add job or our state will get wonky. */ _canceled_messageIndex: function gloda_index_msg_canceled_messageIndex(aJob) { if (aJob === this._pendingAddJob) this._pendingAddJob = null; }, /** * Index a specific list of messages that we know to index from * event-notification hints. */ _worker_messageIndex: function* gloda_worker_messageIndex(aJob, aCallbackHandle) { // if we are already in the correct folder, our "get in the folder" clause // will not execute, so we need to make sure this value is accurate in // that case. (and we want to avoid multiple checks...) for (; aJob.offset < aJob.items.length; aJob.offset++) { let item = aJob.items[aJob.offset]; // item is either [folder ID, message key] or // [folder ID, message ID] let glodaFolderId = item[0]; // If the folder has been deleted since we queued, skip this message if (!GlodaDatastore._folderIdKnown(glodaFolderId)) continue; let glodaFolder = GlodaDatastore._mapFolderID(glodaFolderId); // Stay out of folders that: // - are compacting / compacted and not yet processed // - got deleted (this would be redundant if we had a stance on id nukage) // (these things could have changed since we queued the event) if (glodaFolder.compacting || glodaFolder.compacted || glodaFolder._deleted) continue; // get in the folder if (this._indexingGlodaFolder != glodaFolder) { yield this._indexerEnterFolder(glodaFolderId); // Now that we have the real nsIMsgFolder, sanity-check that we should // be indexing it. (There are some checks that require the // nsIMsgFolder.) if (!this.shouldIndexFolder(this._indexingFolder)) continue; } let msgHdr; // GetMessageHeader can be affected by the use cache, so we need to check // ContainsKey first to see if the header is really actually there. if (typeof item[1] == "number") msgHdr = this._indexingDatabase.ContainsKey(item[1]) && this._indexingFolder.GetMessageHeader(item[1]); else // same deal as in move processing. // TODO fixme to not assume singular message-id's. msgHdr = this._indexingDatabase.getMsgHdrForMessageID(item[1]); if (msgHdr) yield aCallbackHandle.pushAndGo( this._indexMessage(msgHdr, aCallbackHandle), {what: "indexMessage", msgHdr: msgHdr}); else yield this.kWorkSync; } // There is no real reason to stay 'in' the folder. If we are going to get // more events from the folder, its database would have to be open for us // to get the events, so it's not like we're creating an efficiency // problem where we unload a folder just to load it again in 2 seconds. // (Well, at least assuming the views are good about holding onto the // database references even though they go out of their way to avoid // holding onto message header references.) this._indexerLeaveFolder(); yield this.kWorkDone; }, /** * Recover from a "folder" or "message" job failing inside a call to * |_indexMessage|, marking the message bad. If we were not in an * |_indexMessage| call, then fail to recover. * * @param aJob The job that was being worked. We ignore this for now. * @param aContextStack The callbackHandle mechanism's context stack. When we * invoke pushAndGo for _indexMessage we put something in so we can * detect when it is on the async stack. * @param aException The exception that is necessitating we attempt to * recover. * * @return 1 if we were able to recover (because we want the call stack * popped down to our worker), false if we can't. */ _recover_indexMessage: function gloda_index_recover_indexMessage(aJob, aContextStack, aException) { // See if indexMessage is on the stack... if (aContextStack.length >= 2 && aContextStack[1] && ("what" in aContextStack[1]) && aContextStack[1].what == "indexMessage") { // it is, so this is probably recoverable. this._log.debug( "Exception while indexing message, marking it bad (gloda id of 1)."); // -- Mark the message as bad let msgHdr = aContextStack[1].msgHdr; // (In the worst case, the header is no longer valid, which will result in // exceptions. We need to be prepared for that.) try { msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, GLODA_BAD_MESSAGE_ID); // clear the dirty bit if it has one if (msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY)) msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, 0); } catch (ex) { // If we are indexing a folder and the message header is no longer // valid, then it's quite likely the whole folder is no longer valid. // But since in the event-driven message indexing case we could have // other valid things to look at, let's try and recover. The folder // indexing case will come back to us shortly and we will indicate // recovery is not possible at that point. // So do nothing here since by popping the indexing of the specific // message out of existence we are recovering. } return 1; } return false; }, /** * Cleanup after an aborted "folder" or "message" job. */ _cleanup_indexing: function gloda_index_cleanup_indexing(aJob) { this._indexerLeaveFolder(); aJob.safelyInvokeCallback(false); }, /** * Maximum number of deleted messages to process at a time. Arbitrary; there * are no real known performance constraints at this point. */ DELETED_MESSAGE_BLOCK_SIZE: 32, /** * Process pending deletes... */ _worker_processDeletes: function* gloda_worker_processDeletes(aJob, aCallbackHandle) { // Count the number of messages we will eventually process. People freak // out when the number is constantly increasing because they think gloda // has gone rogue. (Note: new deletions can still accumulate during // our execution, so we may 'expand' our count a little still.) this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); aJob.goal = yield this.kWorkAsync; this._log.debug("There are currently " + aJob.goal + " messages awaiting" + " deletion processing."); // get a block of messages to delete. let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { noDbQueryValidityConstraints: true, }); query._deleted(1); query.limit(this.DELETED_MESSAGE_BLOCK_SIZE); let deletedCollection = query.getCollection(aCallbackHandle); yield this.kWorkAsync; while (deletedCollection.items.length) { for (let message of deletedCollection.items) { // If it turns out our count is wrong (because some new deletions // happened since we entered this worker), let's issue a new count // and use that to accurately update our goal. if (aJob.offset >= aJob.goal) { this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); aJob.goal += yield this.kWorkAsync; } yield aCallbackHandle.pushAndGo(this._deleteMessage(message, aCallbackHandle)); aJob.offset++; yield this.kWorkSync; } deletedCollection = query.getCollection(aCallbackHandle); yield this.kWorkAsync; } this.pendingDeletions = false; yield this.kWorkDone; }, _worker_fixMissingContacts: function*(aJob, aCallbackHandle) { let identityContactInfos = [], fixedContacts = {}; // -- asynchronously get a list of all identities without contacts // The upper bound on the number of messed up contacts is the number of // contacts in the user's address book. This should be small enough // (and the data size small enough) that this won't explode thunderbird. let queryStmt = GlodaDatastore._createAsyncStatement( "SELECT identities.id, identities.contactID, identities.value " + "FROM identities " + "LEFT JOIN contacts ON identities.contactID = contacts.id " + "WHERE identities.kind = 'email' AND contacts.id IS NULL", true); queryStmt.executeAsync({ handleResult: function(aResultSet) { let row; while ((row = aResultSet.getNextRow())) { identityContactInfos.push({ identityId: row.getInt64(0), contactId: row.getInt64(1), email: row.getString(2) }); } }, handleError: function(aError) { }, handleCompletion: function(aReason) { GlodaDatastore._asyncCompleted(); aCallbackHandle.wrappedCallback(); }, }); queryStmt.finalize(); GlodaDatastore._pendingAsyncStatements++; yield this.kWorkAsync; // -- perform fixes only if there were missing contacts if (identityContactInfos.length) { const yieldEvery = 64; // - create the missing contacts for (let i = 0; i < identityContactInfos.length; i++) { if ((i % yieldEvery) === 0) yield this.kWorkSync; let info = identityContactInfos[i], card = GlodaUtils.getCardForEmail(info.email), contact = new GlodaContact( GlodaDatastore, info.contactId, null, null, card ? (card.displayName || info.email) : info.email, 0, 0); GlodaDatastore.insertContact(contact); // update the in-memory rep of the identity to know about the contact // if there is one. let identity = GlodaCollectionManager.cacheLookupOne( Gloda.NOUN_IDENTITY, info.identityId, false); if (identity) { // Unfortunately, although this fixes the (reachable) Identity and // exposes the Contact, it does not make the Contact reachable from // the collection manager. This will make explicit queries that look // up the contact potentially see the case where // contact.identities[0].contact !== contact. Alternately, that // may not happen and instead the "contact" object we created above // may become unlinked. (I'd have to trace some logic I don't feel // like tracing.) Either way, The potential fallout is minimal // since the object identity invariant will just lapse and popularity // on the contact may become stale, and neither of those meaningfully // affect the operation of anything in Thunderbird. // If we really cared, we could find all the dominant collections // that reference the identity and update their corresponding // contact collection to make it reachable. That use-case does not // exist outside of here, which is why we're punting. identity._contact = contact; contact._identities = [identity]; } // NOTE: If the addressbook indexer did anything useful other than // adapting to name changes, we could schedule indexing of the cards at // this time. However, as of this writing, it doesn't, and this task // is a one-off relevant only to the time of this writing. } // - mark all folders as dirty, initiate indexing sweep this.dirtyAllKnownFolders(); this.indexingSweepNeeded = true; } // -- mark the schema upgrade, be done GlodaDatastore._updateSchemaVersion(GlodaDatastore._schemaVersion); yield this.kWorkDone; }, /** * Determine whether a folder is suitable for indexing. * * @param aMsgFolder An nsIMsgFolder you want to see if we should index. * * @returns true if we want to index messages in this type of folder, false if * we do not. */ shouldIndexFolder: function(aMsgFolder) { let folderFlags = aMsgFolder.flags; // Completely ignore non-mail and virtual folders. They should never even // get to be GlodaFolder instances. if (!(folderFlags & Ci.nsMsgFolderFlags.Mail) || (folderFlags & Ci.nsMsgFolderFlags.Virtual)) return false; // Some folders do not really exist; we can detect this by getStringProperty // exploding when we call it. This is primarily a concern because // _mapFolder calls said exploding method, but we also don't want to // even think about indexing folders that don't exist. (Such folders are // likely the result of a messed up profile.) try { // flags is used because it should always be in the cache avoiding a miss // which would compel an msf open. aMsgFolder.getStringProperty("flags"); } catch (ex) { return false; } // Now see what our gloda folder information has to say about the folder. let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); return glodaFolder.indexingPriority != glodaFolder.kIndexingNeverPriority; }, /** * Sets the indexing priority for this folder and persists it both to Gloda, * and, for backup purposes, to the nsIMsgFolder via string property as well. * * Setting this priority may cause the indexer to either reindex this folder, * or remove this folder from the existing index. * * @param {nsIMsgFolder} aFolder * @param {Number} aPriority (one of the priority constants from GlodaFolder) */ setFolderIndexingPriority: function glodaSetFolderIndexingPriority(aFolder, aPriority) { let glodaFolder = GlodaDatastore._mapFolder(aFolder); // if there's been no change, we're done if (aPriority == glodaFolder.indexingPriority) { return; } // save off the old priority, and set the new one let previousPrio = glodaFolder.indexingPriority; glodaFolder._indexingPriority = aPriority; // persist the new priority GlodaDatastore.updateFolderIndexingPriority(glodaFolder); aFolder.setStringProperty("indexingPriority", Number(aPriority).toString()); // if we've been told never to index this folder... if (aPriority == glodaFolder.kIndexingNeverPriority) { // stop doing so if (this._indexingFolder == aFolder) GlodaIndexer.killActiveJob(); // mark all existing messages as deleted GlodaDatastore.markMessagesDeletedByFolderID(glodaFolder.id); // re-index GlodaMsgIndexer.indexingSweepNeeded = true; } else if (previousPrio == glodaFolder.kIndexingNeverPriority) { // there's no existing index, but the user now wants one glodaFolder._dirtyStatus = glodaFolder.kFolderFilthy; GlodaDatastore.updateFolderDirtyStatus(glodaFolder) GlodaMsgIndexer.indexingSweepNeeded = true; } }, /** * Resets the indexing priority on the given folder to whatever the default * is for folders of that type. * * @note Calls setFolderIndexingPriority under the hood, so has identical * potential reindexing side-effects * * @param {nsIMsgFolder} aFolder * @param {boolean} aAllowSpecialFolderIndexing */ resetFolderIndexingPriority: function glodaResetFolderIndexingPriority(aFolder, aAllowSpecialFolderIndexing) { this.setFolderIndexingPriority(aFolder, GlodaDatastore.getDefaultIndexingPriority(aFolder, aAllowSpecialFolderIndexing)); }, /** * Queue all of the folders of all of the accounts of the current profile * for indexing. We traverse all folders and queue them immediately to try * and have an accurate estimate of the number of folders that need to be * indexed. (We previously queued accounts rather than immediately * walking their list of folders.) */ indexEverything: function glodaIndexEverything() { this._log.info("Queueing all accounts for indexing."); GlodaDatastore._beginTransaction(); for (let account in fixIterator(MailServices.accounts.accounts, Ci.nsIMsgAccount)) { this.indexAccount(account); } GlodaDatastore._commitTransaction(); }, /** * Queue all of the folders belonging to an account for indexing. */ indexAccount: function glodaIndexAccount(aAccount) { let rootFolder = aAccount.incomingServer.rootFolder; if (rootFolder instanceof Ci.nsIMsgFolder) { this._log.info("Queueing account folders for indexing: " + aAccount.key); let allFolders = rootFolder.descendants; let folderJobs = []; for (let folder in fixIterator(allFolders, Ci.nsIMsgFolder)) { if (this.shouldIndexFolder(folder)) GlodaIndexer.indexJob( new IndexingJob("folder", GlodaDatastore._mapFolder(folder).id)); } } else { this._log.info("Skipping Account, root folder not nsIMsgFolder"); } }, /** * Queue a single folder for indexing given an nsIMsgFolder. * * @param [aOptions.callback] A callback to invoke when the folder finishes * indexing. First argument is true if the task ran to completion * successfully, false if we had to abort for some reason. * @param [aOptions.force=false] Should we force the indexing of all messages * in the folder (true) or just index what hasn't been indexed (false). * @return true if we are going to index the folder, false if not. */ indexFolder: function glodaIndexFolder(aMsgFolder, aOptions) { if (!this.shouldIndexFolder(aMsgFolder)) return false; let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); // stay out of compacting/compacted folders if (glodaFolder.compacting || glodaFolder.compacted) return false; this._log.info("Queue-ing folder for indexing: " + aMsgFolder.prettiestName); let job = new IndexingJob("folder", glodaFolder.id); if (aOptions) { if ("callback" in aOptions) job.callback = aOptions.callback; if ("force" in aOptions) job.force = true; } GlodaIndexer.indexJob(job); return true; }, /** * Queue a list of messages for indexing. * * @param aFoldersAndMessages List of [nsIMsgFolder, message key] tuples. */ indexMessages: function gloda_index_indexMessages(aFoldersAndMessages) { let job = new IndexingJob("message", null); job.items = aFoldersAndMessages. map(fm => [GlodaDatastore._mapFolder(fm[0]).id, fm[1]]); GlodaIndexer.indexJob(job); }, /** * Mark all known folders as dirty so that the next indexing sweep goes * into all folders and checks their contents to see if they need to be * indexed. * * This is being added for the migration case where we want to try and reindex * all of the messages that had been marked with GLODA_BAD_MESSAGE_ID but * which is now GLODA_OLD_BAD_MESSAGE_ID and so we should attempt to reindex * them. */ dirtyAllKnownFolders: function gloda_index_msg_dirtyAllKnownFolders() { // Just iterate over the datastore's folder map and tell each folder to // be dirty if its priority is not disabled. for (let folderID in GlodaDatastore._folderByID) { let glodaFolder = GlodaDatastore._folderByID[folderID]; if (glodaFolder.indexingPriority !== glodaFolder.kIndexingNeverPriority) glodaFolder._ensureFolderDirty(); } }, /** * Given a message header, return whether this message is likely to have * been indexed or not. * * This means the message must: * - Be in a folder eligible for gloda indexing. (Not News, etc.) * - Be in a non-filthy folder. * - Be gloda-indexed and non-filthy. * * @param aMsgHdr A message header. * @returns true if the message is likely to have been indexed. */ isMessageIndexed: function gloda_index_isMessageIndexed(aMsgHdr) { // If it's in a folder that we flat out do not index, say no. if (!this.shouldIndexFolder(aMsgHdr.folder)) return false; let glodaFolder = GlodaDatastore._mapFolder(aMsgHdr.folder); let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(aMsgHdr); return glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && glodaDirty != GlodaMsgIndexer.kMessageFilthy && glodaFolder && glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy; }, /* *********** Event Processing *********** */ /** * Tracks messages we have received msgKeyChanged notifications for in order * to provide batching and to suppress needless reindexing when we receive * the expected follow-up msgsClassified notification. * * The entries in this dictionary should be extremely short-lived as we * receive the msgKeyChanged notification as the offline fake header is * converted into a real header (which is accompanied by a msgAdded * notification we don't pay attention to). Once the headers finish * updating, the message classifier will get its at-bat and should likely * find that the messages have already been classified and so fast-path * them. * * The keys in this dictionary are chosen to be consistent with those of * PendingCommitTracker: the folder.URI + "#" + the (new) message key. * The values in the dictionary are either an object with "id" (the gloda * id), "key" (the new message key), and "dirty" (is it dirty and so * should still be queued for indexing) attributes, or null indicating that * no change in message key occurred and so no database changes are required. */ _keyChangedBatchInfo: {}, /** * Common logic for things that want to feed event-driven indexing. This gets * called by both |_msgFolderListener.msgsClassified| when we are first * seeing a message as well as by |_folderListener| when things happen to * existing messages. Although we could slightly specialize for the * new-to-us case, it works out to be cleaner to just treat them the same * and take a very small performance hit. * * @param aMsgHdrs Something fixIterator will work on to return an iterator * on the set of messages that we should treat as potentially changed. * @param aDirtyingEvent Is this event inherently dirtying? Receiving a * msgsClassified notification is not inherently dirtying because it is * just telling us that a message exists. We use this knowledge to * ignore the msgsClassified notifications for messages we have received * msgKeyChanged notifications for and fast-pathed. Since it is possible * for user action to do something that dirties the message between the * time we get the msgKeyChanged notification and when we receive the * msgsClassified notification, we want to make sure we don't get * confused. (Although since we remove the message from our ignore-set * after the first notification, we would likely just mistakenly treat * the msgsClassified notification as something dirtying, so it would * still work out...) */ _reindexChangedMessages: function gloda_indexer_reindexChangedMessage( aMsgHdrs, aDirtyingEvent) { let glodaIdsNeedingDeletion = null; let messageKeyChangedIds = null, messageKeyChangedNewKeys = null; for (let msgHdr in fixIterator(aMsgHdrs, nsIMsgDBHdr)) { // -- Index this folder? let msgFolder = msgHdr.folder; if (!this.shouldIndexFolder(msgFolder)) { continue; } // -- Ignore messages in filthy folders! // A filthy folder can only be processed by an indexing sweep, and at // that point the message will get indexed. let glodaFolder = GlodaDatastore._mapFolder(msgHdr.folder); if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) continue; // -- msgKeyChanged event follow-up if (!aDirtyingEvent) { let keyChangedKey = msgHdr.folder.URI + "#" + msgHdr.messageKey; if (keyChangedKey in this._keyChangedBatchInfo) { var keyChangedInfo = this._keyChangedBatchInfo[keyChangedKey]; delete this._keyChangedBatchInfo[keyChangedKey]; // Null means to ignore this message because the key did not change // (and the message was not dirty so it is safe to ignore.) if (keyChangedInfo == null) continue; // (the key may be null if we only generated the entry because the // message was dirty) if (keyChangedInfo.key !== null) { if (messageKeyChangedIds == null) { messageKeyChangedIds = []; messageKeyChangedNewKeys = []; } messageKeyChangedIds.push(keyChangedInfo.id); messageKeyChangedNewKeys.push(keyChangedInfo.key); } // ignore the message because it was not dirty if (!keyChangedInfo.isDirty) continue; } } // -- Index this message? // We index local messages, IMAP messages that are offline, and IMAP // messages that aren't offline but whose folders aren't offline either let isFolderLocal = msgFolder instanceof nsIMsgLocalMailFolder; if (!isFolderLocal) { if (!(msgHdr.flags & nsMsgMessageFlags.Offline) && (msgFolder.flags & nsMsgFolderFlags.Offline)) { continue; } } // Ignore messages whose processing flags indicate it has not yet been // classified. In the IMAP case if the Offline flag is going to get set // we are going to see it before the msgsClassified event so this is // very important. if (msgFolder.getProcessingFlags(msgHdr.messageKey) & NOT_YET_REPORTED_PROCESSING_FLAGS) continue; let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); let isSpam = msgHdr.getStringProperty(JUNK_SCORE_PROPERTY) == JUNK_SPAM_SCORE_STR; // -- Is the message currently gloda indexed? if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && glodaDirty != this.kMessageFilthy) { // - Is the message spam? if (isSpam) { // Treat this as a deletion... if (!glodaIdsNeedingDeletion) glodaIdsNeedingDeletion = []; glodaIdsNeedingDeletion.push(glodaId); // and skip to the next message continue; } // - Mark the message dirty if it is clean. // (This is the only case in which we need to mark dirty so that the // indexing sweep takes care of things if we don't process this in // an event-driven fashion. If the message has no gloda-id or does // and it's already dirty or filthy, it is already marked for // indexing.) if (glodaDirty == this.kMessageClean) msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageDirty); // if the message is pending clean, this change invalidates that. PendingCommitTracker.noteDirtyHeader(msgHdr); } // If it's not indexed but is spam, ignore it. else if (isSpam) { continue; } // (we want to index the message if we are here) // mark the folder dirty too, so we know to look inside glodaFolder._ensureFolderDirty(); if (this._pendingAddJob == null) { this._pendingAddJob = new IndexingJob("message", null); GlodaIndexer.indexJob(this._pendingAddJob); } // only queue the message if we haven't overflowed our event-driven budget if (this._pendingAddJob.items.length < this._indexMaxEventQueueMessages) { this._pendingAddJob.items.push( [GlodaDatastore._mapFolder(msgFolder).id, msgHdr.messageKey]); } else { this.indexingSweepNeeded = true; } } // Process any message key changes (from earlier msgKeyChanged events) if (messageKeyChangedIds != null) GlodaDatastore.updateMessageKeys(messageKeyChangedIds, messageKeyChangedNewKeys); // If we accumulated any deletions in there, batch them off now. if (glodaIdsNeedingDeletion) { GlodaDatastore.markMessagesDeletedByIDs(glodaIdsNeedingDeletion); this.pendingDeletions = true; } }, /* ***** Folder Changes ***** */ /** * All additions and removals are queued for processing. Indexing messages * is potentially phenomenally expensive, and deletion can still be * relatively expensive due to our need to delete the message, its * attributes, and all attributes that reference it. Additionally, * attribute deletion costs are higher than attribute look-up because * there is the actual row plus its 3 indices, and our covering indices are * no help there. * */ _msgFolderListener: { indexer: null, /** * We no longer use the msgAdded notification, instead opting to wait until * junk/trait classification has run (or decided not to run) and all * filters have run. The msgsClassified notification provides that for us. */ msgAdded: function gloda_indexer_msgAdded(aMsgHdr) { // we are never called! we do not enable this bit! }, /** * Process (apparently newly added) messages that have been looked at by * the message classifier. This ensures that if the message was going * to get marked as spam, this will have already happened. * * Besides truly new (to us) messages, We will also receive this event for * messages that are the result of IMAP message move/copy operations, * including both moves that generated offline fake headers and those that * did not. In the offline fake header case, however, we are able to * ignore their msgsClassified events because we will have received a * msgKeyChanged notification sometime in the recent past. */ msgsClassified: function gloda_indexer_msgsClassified( aMsgHdrs, aJunkClassified, aTraitClassified) { this.indexer._log.debug("msgsClassified notification"); try { GlodaMsgIndexer._reindexChangedMessages(aMsgHdrs.enumerate(), false); } catch (ex) { this.indexer._log.error("Explosion in msgsClassified handling:", ex); } }, /** * Handle real, actual deletion (move to trash and IMAP deletion model * don't count); we only see the deletion here when it becomes forever, * or rather _just before_ it becomes forever. Because the header is * going away, we need to either process things immediately or extract the * information required to purge it later without the header. * To this end, we mark all messages that were indexed in the gloda message * database as deleted. We set our pending deletions flag to let our * indexing logic know that after its next wave of folder traversal, it * should perform a deletion pass. If it turns out the messages are coming * back, the fact that deletion is thus deferred can be handy, as we can * reuse the existing gloda message. */ msgsDeleted: function gloda_indexer_msgsDeleted(aMsgHdrs) { this.indexer._log.debug("msgsDeleted notification"); let glodaMessageIds = []; for (let iMsgHdr = 0; iMsgHdr < aMsgHdrs.length; iMsgHdr++) { let msgHdr = aMsgHdrs.queryElementAt(iMsgHdr, nsIMsgDBHdr); let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && glodaDirty != GlodaMsgIndexer.kMessageFilthy) glodaMessageIds.push(glodaId); } if (glodaMessageIds.length) { GlodaMsgIndexer._datastore.markMessagesDeletedByIDs(glodaMessageIds); GlodaMsgIndexer.pendingDeletions = true; } }, /** * Process a move or copy. * * Moves to a local folder or an IMAP folder where we are generating offline * fake headers are dealt with efficiently because we get both the source * and destination headers. The main ingredient to having offline fake * headers is that allowUndo was true when the operation was performance. * The only non-obvious thing is that we need to make sure that we deal * with the impact of filthy folders and messages on gloda-id's (they * invalidate the gloda-id). * * Moves to an IMAP folder that do not generate offline fake headers do not * provide us with the target header, but the IMAP SetPendingAttributes * logic will still attempt to propagate the properties on the message * header so when we eventually see it in the msgsClassified notification, * it should have the properties of the source message copied over. * We make sure that gloda-id's do not get propagated when messages are * moved from IMAP folders that are marked filthy or are marked as not * supposed to be indexed by clearing the pending attributes for the header * being tracked by the destination IMAP folder. * We could fast-path the IMAP move case in msgsClassified by noticing that * a message is showing up with a gloda-id header already and just * performing an async location update. * * Moves that occur involving 'compacted' folders are fine and do not * require special handling here. The one tricky super-edge-case that * can happen (and gets handled by the compaction pass) is the move of a * message that got gloda indexed that did not already have a gloda-id and * PendingCommitTracker did not get to flush the gloda-id before the * compaction happened. In that case our move logic cannot know to do * anything and the gloda database still thinks the message lives in our * folder. The compaction pass will deal with this by marking the message * as deleted. The rationale being that marking it deleted allows the * message to be re-used if it gets indexed in the target location, or if * the target location has already been indexed, we no longer need the * duplicate and it should be deleted. (Also, it is unable to distinguish * between a case where the message got deleted versus moved.) * * Because copied messages are, by their nature, duplicate messages, we * do not particularly care about them. As such, we defer their processing * to the automatic sync logic that will happen much later on. This is * potentially desirable in case the user deletes some of the original * messages, allowing us to reuse the gloda message representations when * we finally get around to indexing the messages. We do need to mark the * folder as dirty, though, to clue in the sync logic. */ msgsMoveCopyCompleted: function gloda_indexer_msgsMoveCopyCompleted(aMove, aSrcMsgHdrs, aDestFolder, aDestMsgHdrs) { this.indexer._log.debug("MoveCopy notification. Move: " + aMove); try { // ---- Move if (aMove) { // -- Effectively a deletion? // If the destination folder is not indexed, it's like these messages // are being deleted. if (!GlodaMsgIndexer.shouldIndexFolder(aDestFolder)) { this.msgsDeleted(aSrcMsgHdrs); return; } // -- Avoid propagation of filthy gloda-id's. // If the source folder is filthy or should not be indexed (and so // any gloda-id's found in there are gibberish), our only job is to // strip the gloda-id's off of all the destination headers because // none of the gloda-id's are valid (and so we certainly don't want // to try and use them as a basis for updating message keys.) let srcMsgFolder = aSrcMsgHdrs.queryElementAt(0, nsIMsgDBHdr).folder; if (!this.indexer.shouldIndexFolder(srcMsgFolder) || (GlodaDatastore._mapFolder(srcMsgFolder).dirtyStatus == GlodaFolder.prototype.kFolderFilthy)) { // Local case, just modify the destination headers directly. if (aDestMsgHdrs) { for (let destMsgHdr in fixIterator(aDestMsgHdrs, nsIMsgDBHdr)) { // zero it out if it exists // (no need to deal with pending commit issues here; a filthy // folder by definition has nothing indexed in it.) let glodaId = destMsgHdr.getUint32Property( GLODA_MESSAGE_ID_PROPERTY); if (glodaId) destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0); } // Since we are moving messages from a folder where they were // effectively not indexed, it is up to us to make sure the // messages now get indexed. this.indexer._reindexChangedMessages(aDestMsgHdrs.enumerate()); return; } // IMAP move case, we need to operate on the pending headers using // the source header to get the pending header and as the // indication of what has been already set on the pending header. else { let destDb; // so, this can fail, and there's not much we can do about it. try { destDb = aDestFolder.msgDatabase; } catch (ex) { this.indexer._log.warn("Destination database for " + aDestFolder.prettiestName + " not ready on IMAP move." + " Gloda corruption possible."); return; } for (let srcMsgHdr in fixIterator(aSrcMsgHdrs, nsIMsgDBHdr)) { // zero it out if it exists // (no need to deal with pending commit issues here; a filthy // folder by definition has nothing indexed in it.) let glodaId = srcMsgHdr.getUint32Property( GLODA_MESSAGE_ID_PROPERTY); if (glodaId) destDb.setUint32AttributeOnPendingHdr( srcMsgHdr, GLODA_MESSAGE_ID_PROPERTY, 0); } // Nothing remains to be done. The msgClassified event will take // care of making sure the message gets indexed. return; } } // --- Have destination headers (local case): if (aDestMsgHdrs) { // -- Update message keys for valid gloda-id's. // (Which means ignore filthy gloda-id's.) let glodaIds = []; let newMessageKeys = []; aSrcMsgHdrs.QueryInterface(nsIArray); aDestMsgHdrs.QueryInterface(nsIArray); // Track whether we see any messages that are not gloda indexed so // we know if we have to mark the destination folder dirty. let sawNonGlodaMessage = false; for (let iMsg = 0; iMsg < aSrcMsgHdrs.length; iMsg++) { let srcMsgHdr = aSrcMsgHdrs.queryElementAt(iMsg, nsIMsgDBHdr); let destMsgHdr = aDestMsgHdrs.queryElementAt(iMsg, nsIMsgDBHdr); let [glodaId, dirtyStatus] = PendingCommitTracker.getGlodaState(srcMsgHdr); if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && dirtyStatus != GlodaMsgIndexer.kMessageFilthy) { // we may need to update the pending commit map (it checks) PendingCommitTracker.noteMove(srcMsgHdr, destMsgHdr); // but we always need to update our database glodaIds.push(glodaId); newMessageKeys.push(destMsgHdr.messageKey); } else { sawNonGlodaMessage = true; } } // this method takes care to update the in-memory representations // too; we don't need to do anything if (glodaIds.length) GlodaDatastore.updateMessageLocations(glodaIds, newMessageKeys, aDestFolder); // Mark the destination folder dirty if we saw any messages that // were not already gloda indexed. if (sawNonGlodaMessage) { let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); destGlodaFolder._ensureFolderDirty(); this.indexer.indexingSweepNeeded = true; } } // --- No dest headers (IMAP case): // Update any valid gloda indexed messages into their new folder to // make the indexer's life easier when it sees the messages in their // new folder. else { let glodaIds = []; let srcFolderIsLocal = (srcMsgFolder instanceof nsIMsgLocalMailFolder); for (let iMsgHdr = 0; iMsgHdr < aSrcMsgHdrs.length; iMsgHdr++) { let msgHdr = aSrcMsgHdrs.queryElementAt(iMsgHdr, nsIMsgDBHdr); let [glodaId, dirtyStatus] = PendingCommitTracker.getGlodaState(msgHdr); if (glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && dirtyStatus != GlodaMsgIndexer.kMessageFilthy) { // we may need to update the pending commit map (it checks) PendingCommitTracker.noteBlindMove(msgHdr); // but we always need to update our database glodaIds.push(glodaId); // XXX UNDO WORKAROUND // This constitutes a move from a local folder to an IMAP // folder. Undo does not currently do the right thing for us, // but we have a chance of not orphaning the message if we // mark the source header as dirty so that when the message // gets re-added we see it. (This does require that we enter // the folder; we set the folder dirty after the loop to // increase the probability of this but it's not foolproof // depending on when the next indexing sweep happens and when // the user performs an undo.) msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, GlodaMsgIndexer.kMessageDirty); } } // XXX ALSO UNDO WORKAROUND if (srcFolderIsLocal) { let srcGlodaFolder = GlodaDatastore._mapFolder(srcMsgFolder); srcGlodaFolder._ensureFolderDirty(); } // quickly move them to the right folder, zeroing their message keys GlodaDatastore.updateMessageFoldersByKeyPurging(glodaIds, aDestFolder); // we _do not_ need to mark the folder as dirty, because the // message added events will cause that to happen. } } // ---- Copy case else { // -- Do not propagate gloda-id's for copies // (Only applies if we have the destination header, which means local) if (aDestMsgHdrs) { for (let destMsgHdr in fixIterator(aDestMsgHdrs, nsIMsgDBHdr)) { let glodaId = destMsgHdr.getUint32Property( GLODA_MESSAGE_ID_PROPERTY); if (glodaId) destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0); } } // mark the folder as dirty; we'll get to it later. let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); destGlodaFolder._ensureFolderDirty(); this.indexer.indexingSweepNeeded = true; } } catch (ex) { this.indexer._log.error("Problem encountered during message move/copy:", ex.stack); } }, /** * Queue up message key changes that are a result of offline fake headers * being made real for the actual update during the msgsClassified * notification that is expected after this. We defer the * actual work (if there is any to be done; the fake header might have * guessed the right UID correctly) so that we can batch our work. * * The expectation is that there will be no meaningful time window between * this notification and the msgsClassified notification since the message * classifier should not actually need to classify the messages (they * should already have been classified) and so can fast-path them. */ msgKeyChanged: function gloda_indexer_msgKeyChangeded(aOldMsgKey, aNewMsgHdr) { try { let val = null, newKey = aNewMsgHdr.messageKey; let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(aNewMsgHdr); // If we haven't indexed this message yet, take no action, and leave it // up to msgsClassified to take proper action. if (glodaId < GLODA_FIRST_VALID_MESSAGE_ID) return; // take no action on filthy messages, // generate an entry if dirty or the keys don't match. if ((glodaDirty !== GlodaMsgIndexer.kMessageFilthy) && ((glodaDirty === GlodaMsgIndexer.kMessageDirty) || (aOldMsgKey !== newKey))) { val = { id: glodaId, key: (aOldMsgKey !== newKey) ? newKey : null, isDirty: glodaDirty === GlodaMsgIndexer.kMessageDirty, }; } let key = aNewMsgHdr.folder.URI + "#" + aNewMsgHdr.messageKey; this.indexer._keyChangedBatchInfo[key] = val; } // this is more for the unit test to fail rather than user error reporting catch (ex) { this.indexer._log.error("Problem encountered during msgKeyChanged" + " notification handling: " + ex + "\n\n" + ex.stack + " \n\n"); } }, /** * Detect newly added folders before they get messages so we map them before * they get any messages added to them. If we only hear about them after * they get their 1st message, then we will mark them filthy, but if we mark * them before that, they get marked clean. */ folderAdded: function gloda_indexer_folderAdded(aMsgFolder) { // This is invoked for its side-effect of invoking _mapFolder and doing so // only after filtering out folders we don't care about. GlodaMsgIndexer.shouldIndexFolder(aMsgFolder); }, /** * Handles folder no-longer-exists-ence. We mark all messages as deleted * and remove the folder from our URI table. Currently, if a folder that * contains other folders is deleted, we may either receive one * notification for the folder that is deleted, or a notification for the * folder and one for each of its descendents. This depends upon the * underlying account implementation, so we explicitly handle each case. * Namely, we treat it as if we're only planning on getting one, but we * handle if the children are already gone for some reason. */ folderDeleted: function gloda_indexer_folderDeleted(aFolder) { this.indexer._log.debug("folderDeleted notification"); try { let delFunc = function(aFolder, indexer) { if (indexer._datastore._folderKnown(aFolder)) { indexer._log.info("Processing deletion of folder " + aFolder.prettiestName + "."); let glodaFolder = GlodaDatastore._mapFolder(aFolder); indexer._datastore.markMessagesDeletedByFolderID(glodaFolder.id); indexer._datastore.deleteFolderByID(glodaFolder.id); GlodaDatastore._killGlodaFolderIntoTombstone(glodaFolder); } else { indexer._log.info("Ignoring deletion of folder " + aFolder.prettiestName + " because it is unknown to gloda."); } }; let descendentFolders = aFolder.descendants; // (the order of operations does not matter; child, non-child, whatever.) // delete the parent delFunc(aFolder, this.indexer); // delete all its descendents for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { delFunc(folder, this.indexer); } this.indexer.pendingDeletions = true; } catch (ex) { this.indexer._log.error("Problem encountered during folder deletion" + ": " + ex + "\n\n" + ex.stack + "\n\n"); } }, /** * Handle a folder being copied or moved. * Moves are handled by a helper function shared with _folderRenameHelper * (which takes care of any nesting involved). * Copies are actually ignored, because our periodic indexing traversal * should discover these automatically. We could hint ourselves into * action, but arguably a set of completely duplicate messages is not * a high priority for indexing. */ folderMoveCopyCompleted: function gloda_indexer_folderMoveCopyCompleted( aMove, aSrcFolder, aDestFolder) { this.indexer._log.debug("folderMoveCopy notification (Move: " + aMove + ")"); if (aMove) { let srcURI = aSrcFolder.URI; let targetURI = aDestFolder.URI + srcURI.substring(srcURI.lastIndexOf("/")); this._folderRenameHelper(aSrcFolder, targetURI); } else { this.indexer.indexingSweepNeeded = true; } }, /** * We just need to update the URI <-> ID maps and the row in the database, * all of which is actually done by the datastore for us. * This method needs to deal with the complexity where local folders will * generate a rename notification for each sub-folder, but IMAP folders * will generate only a single notification. Our logic primarily handles * this by not exploding if the original folder no longer exists. */ _folderRenameHelper: function gloda_indexer_folderRenameHelper(aOrigFolder, aNewURI) { let newFolder = MailUtils.getFolderForURI(aNewURI); let specialFolderFlags = Ci.nsMsgFolderFlags.Trash | Ci.nsMsgFolderFlags.Junk; if (newFolder.isSpecialFolder(specialFolderFlags, true)) { let descendentFolders = newFolder.descendants; // First thing to do: make sure we don't index the resulting folder and // its descendents. GlodaMsgIndexer.resetFolderIndexingPriority(newFolder); for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { GlodaMsgIndexer.resetFolderIndexingPriority(folder); } // Remove from the index messages from the original folder this.folderDeleted(aOrigFolder); } else { let descendentFolders = aOrigFolder.descendants; let origURI = aOrigFolder.URI; // this rename is straightforward. GlodaDatastore.renameFolder(aOrigFolder, aNewURI); for (let folder in fixIterator(descendentFolders, Ci.nsIMsgFolder)) { let oldSubURI = folder.URI; // mangle a new URI from the old URI. we could also try and do a // parallel traversal of the new folder hierarchy, but that seems like // more work. let newSubURI = aNewURI + oldSubURI.substring(origURI.length); this.indexer._datastore.renameFolder(oldSubURI, newSubURI); } this.indexer._log.debug("folder renamed: " + origURI + " to " + aNewURI); } }, /** * Handle folder renames, dispatching to our rename helper (which also * takes care of any nested folder issues.) */ folderRenamed: function gloda_indexer_folderRenamed(aOrigFolder, aNewFolder) { this._folderRenameHelper(aOrigFolder, aNewFolder.URI); }, /** * This tells us about many exciting things. What they are and what we do: * * - FolderCompactStart: Mark the folder as compacting in our in-memory * representation. This should keep any new indexing out of the folder * until it is done compacting. Also, kill any active or existing jobs * to index the folder. * - FolderCompactFinish: Mark the folder as done compacting in our * in-memory representation. Assuming the folder was known to us and * not marked filthy, queue a compaction job. * * - FolderReindexTriggered: We do the same thing as FolderCompactStart * but don't mark the folder as compacting. * * - JunkStatusChanged: We mark the messages that have had their junk * state change to be reindexed. */ itemEvent: function gloda_indexer_itemEvent(aItem, aEvent, aData) { // Compact and Reindex are close enough that we can reuse the same code // with one minor difference. if (aEvent == "FolderCompactStart" || aEvent == "FolderReindexTriggered") { let aMsgFolder = aItem.QueryInterface(nsIMsgFolder); // ignore folders we ignore... if (!GlodaMsgIndexer.shouldIndexFolder(aMsgFolder)) return; let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); if (aEvent == "FolderCompactStart") glodaFolder.compacting = true; // Purge any explicit indexing of said folder. GlodaIndexer.purgeJobsUsingFilter(function (aJob) { return (aJob.jobType == "folder" && aJob.id == aMsgFolder.id); }); // Abort the active job if it's in the folder (this covers both // event-driven indexing that happens to be in the folder as well // explicit folder indexing of the folder). if (GlodaMsgIndexer._indexingFolder == aMsgFolder) GlodaIndexer.killActiveJob(); // Tell the PendingCommitTracker to throw away anything it is tracking // about the folder. We will pick up the pieces in the compaction // pass. PendingCommitTracker.noteFolderDatabaseGettingBlownAway(aMsgFolder); // (We do not need to mark the folder dirty because if we were indexing // it, it already must have been marked dirty.) } else if (aEvent == "FolderCompactFinish") { let aMsgFolder = aItem.QueryInterface(nsIMsgFolder); // ignore folders we ignore... if (!GlodaMsgIndexer.shouldIndexFolder(aMsgFolder)) return; let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); glodaFolder.compacting = false; glodaFolder._setCompactedState(true); // Queue compaction unless the folder was filthy (in which case there // are no valid gloda-id's to update.) if (glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy) GlodaIndexer.indexJob( new IndexingJob("folderCompact", glodaFolder.id)); // Queue indexing of the folder if it is dirty. We are doing this // mainly in case we were indexing it before the compaction started. // It should be reasonably harmless if we weren't. // (It would probably be better to just make sure that there is an // indexing sweep queued or active, and if it's already active that // this folder is in the queue to be processed.) if (glodaFolder.dirtyStatus == glodaFolder.kFolderDirty) GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); } else if (aEvent == "JunkStatusChanged") { this.indexer._log.debug("JunkStatusChanged notification"); aItem.QueryInterface(Ci.nsIArray); GlodaMsgIndexer._reindexChangedMessages(aItem.enumerate(), true); } }, }, /** * A nsIFolderListener (listening on nsIMsgMailSession so we get all of * these events) PRIMARILY to get folder loaded notifications. Because of * deficiencies in the nsIMsgFolderListener's events at this time, we also * get our folder-added and newsgroup notifications from here for now. (This * will be rectified.) */ _folderListener: { indexer: null, _init: function gloda_indexer_fl_init(aIndexer) { this.indexer = aIndexer; }, // We explicitly know about these things rather than bothering with some // form of registration scheme because these aren't going to change much. get _kFolderLoadedAtom() { delete this._kFolderLoadedAtom; return this._kFolderLoadedAtom = atomService.getAtom("FolderLoaded"); }, get _kKeywordsAtom() { delete this._kKeywordsAtom; return this._kKeywordsAtom = atomService.getAtom("Keywords"); }, get _kStatusAtom() { delete this._kStatusAtom; return this._kStatusAtom = atomService.getAtom("Status"); }, get _kFlaggedAtom() { delete this._kFlaggedAtom; return this._kFlaggedAtom = atomService.getAtom("Flagged"); }, get _kFolderFlagAtom() { delete this._kFolderFlagAtom; return this._kFolderFlagAtom = atomService.getAtom("FolderFlag"); }, OnItemAdded: function gloda_indexer_OnItemAdded(aParentItem, aItem) { }, OnItemRemoved: function gloda_indexer_OnItemRemoved(aParentItem, aItem) { }, OnItemPropertyChanged: function gloda_indexer_OnItemPropertyChanged( aItem, aProperty, aOldValue, aNewValue) { }, /** * Detect changes to folder flags and reset our indexing priority. This * is important because (all?) folders start out without any flags and * then get their flags added to them. */ OnItemIntPropertyChanged: function gloda_indexer_OnItemIntPropertyChanged( aFolderItem, aProperty, aOldValue, aNewValue) { if (aProperty !== this._kFolderFlagAtom) return; if (!GlodaMsgIndexer.shouldIndexFolder(aFolderItem)) return; // Only reset priority if folder Special Use changes. if ((aOldValue & Ci.nsMsgFolderFlags.SpecialUse) == (aNewValue & Ci.nsMsgFolderFlags.SpecialUse)) return; GlodaMsgIndexer.resetFolderIndexingPriority(aFolderItem); }, OnItemBoolPropertyChanged: function gloda_indexer_OnItemBoolPropertyChanged( aItem, aProperty, aOldValue, aNewValue) { }, OnItemUnicharPropertyChanged: function gloda_indexer_OnItemUnicharPropertyChanged( aItem, aProperty, aOldValue, aNewValue) { }, /** * Notice when user activity adds/removes tags or changes a message's * status. */ OnItemPropertyFlagChanged: function gloda_indexer_OnItemPropertyFlagChanged( aMsgHdr, aProperty, aOldValue, aNewValue) { if (aProperty == this._kKeywordsAtom || // We could care less about the new flag changing. (aProperty == this._kStatusAtom && (aOldValue ^ aNewValue) != nsMsgMessageFlags.New && // We do care about IMAP deletion, but msgsDeleted tells us that, so // ignore IMAPDeleted too... (aOldValue ^ aNewValue) != nsMsgMessageFlags.IMAPDeleted) || aProperty == this._kFlaggedAtom) { GlodaMsgIndexer._reindexChangedMessages([aMsgHdr], true); } }, /** * Get folder loaded notifications for folders that had to do some * (asynchronous) processing before they could be opened. */ OnItemEvent: function gloda_indexer_OnItemEvent(aFolder, aEvent) { if (aEvent == this._kFolderLoadedAtom) this.indexer._onFolderLoaded(aFolder); }, }, /* ***** Rebuilding / Reindexing ***** */ /** * Allow us to invalidate an outstanding folder traversal because the * underlying database is going away. We use other means for detecting * modifications of the message (labeling, marked (un)read, starred, etc.) * * This is an nsIDBChangeListener listening to an nsIDBChangeAnnouncer. To * add ourselves, we get us a nice nsMsgDatabase, query it to the announcer, * then call AddListener. */ _databaseAnnouncerListener: { indexer: null, /** * XXX We really should define the operations under which we expect this to * occur. While we know this must be happening as the result of a * ForceClosed call, we don't have a comprehensive list of when this is * expected to occur. Some reasons: * - Compaction (although we should already have killed the job thanks to * our compaction notification) * - UID validity rolls. * - Folder Rename * - Folder Delete * The fact that we already have the database open when getting this means * that it had to be valid before we opened it, which hopefully rules out * modification of the mbox file by an external process (since that is * forbidden when we are running) and many other exotic things. * * So this really ends up just being a correctness / safety protection * mechanism. At least now that we have better compaction support. */ onAnnouncerGoingAway: function gloda_indexer_dbGoingAway( aDBChangeAnnouncer) { // The fact that we are getting called means we have an active folder and // that we therefore are the active job. As such, we must kill the // active job. // XXX In the future, when we support interleaved event-driven indexing // that bumps long-running indexing tasks, the semantics of this will // have to change a bit since we will want to maintain being active in a // folder even when bumped. However, we will probably have a more // complex notion of indexing contexts on a per-job basis. GlodaIndexer.killActiveJob(); }, onHdrFlagsChanged: function(aHdrChanged, aOldFlags, aNewFlags, aInstigator) {}, onHdrDeleted: function(aHdrChanged, aParentKey, aFlags, aInstigator) {}, onHdrAdded: function(aHdrChanged, aParentKey, aFlags, aInstigator) {}, onParentChanged: function(aKeyChanged, aOldParent, aNewParent, aInstigator) {}, onReadChanged: function(aInstigator) {}, onJunkScoreChanged: function(aInstigator) {}, onHdrPropertyChanged: function (aHdrToChange, aPreChange, aStatus, aInstigator) {}, onEvent: function (aDB, aEvent) {}, }, /** * Given a list of Message-ID's, return a matching list of lists of messages * matching those Message-ID's. So if you pass an array with three * Message-ID's ["a", "b", "c"], you would get back an array containing * 3 lists, where the first list contains all the messages with a message-id * of "a", and so forth. The reason a list is returned rather than null/a * message is that we accept the reality that we have multiple copies of * messages with the same ID. * This call is asynchronous because it depends on previously created messages * to be reflected in our results, which requires us to execute on the async * thread where all our writes happen. This also turns out to be a * reasonable thing because we could imagine pathological cases where there * could be a lot of message-id's and/or a lot of messages with those * message-id's. * * The returned collection will include both 'ghost' messages (messages * that exist for conversation-threading purposes only) as well as deleted * messages in addition to the normal 'live' messages that non-privileged * queries might return. */ getMessagesByMessageID: function gloda_ns_getMessagesByMessageID(aMessageIDs, aCallback, aCallbackThis) { let msgIDToIndex = {}; let results = []; for (let iID = 0; iID < aMessageIDs.length; ++iID) { let msgID = aMessageIDs[iID]; results.push([]); msgIDToIndex[msgID] = iID; } // (Note: although we are performing a lookup with no validity constraints // and using the same object-relational-mapper-ish layer used by things // that do have constraints, we are not at risk of exposing deleted // messages to other code and getting it confused. The only way code // can find a message is if it shows up in their queries or gets announced // via GlodaCollectionManager.itemsAdded, neither of which will happen.) let query = Gloda.newQuery(Gloda.NOUN_MESSAGE, { noDbQueryValidityConstraints: true, }); query.headerMessageID.apply(query, aMessageIDs); query.frozen = true; let listener = new MessagesByMessageIdCallback(msgIDToIndex, results, aCallback, aCallbackThis); return query.getCollection(listener, null, {becomeNull: true}); }, /** * A reference to MsgHdrToMimeMessage that unit testing can clobber when it * wants to cause us to hang or inject a fault. If you are not * glodaTestHelper.js then _do not touch this_. */ _MsgHdrToMimeMessageFunc: MsgHdrToMimeMessage, /** * Primary message indexing logic. This method is mainly concerned with * getting all the information about the message required for threading / * conversation building and subsequent processing. It is responsible for * determining whether to reuse existing gloda messages or whether a new one * should be created. Most attribute stuff happens in fund_attr.js or * expl_attr.js. * * Prior to calling this method, the caller must have invoked * |_indexerEnterFolder|, leaving us with the following true invariants * below. * * @pre aMsgHdr.folder == this._indexingFolder * @pre aMsgHdr.folder.msgDatabase == this._indexingDatabase */ _indexMessage: function* gloda_indexMessage(aMsgHdr, aCallbackHandle) { let logDebug = this._log.level <= Log4Moz.Level.Debug; if (logDebug) this._log.debug("*** Indexing message: " + aMsgHdr.messageKey + " : " + aMsgHdr.subject); // If the message is offline, then get the message body as well let isMsgOffline = false; let aMimeMsg; if ((aMsgHdr.flags & nsMsgMessageFlags.Offline) || (aMsgHdr.folder instanceof nsIMsgLocalMailFolder)) { isMsgOffline = true; this._MsgHdrToMimeMessageFunc(aMsgHdr, aCallbackHandle.callbackThis, aCallbackHandle.callback, false, {saneBodySize: true}); aMimeMsg = (yield this.kWorkAsync)[1]; } else { if (logDebug) this._log.debug(" * Message is not offline -- only headers indexed"); } if (logDebug) this._log.debug(" * Got message, subject " + aMsgHdr.subject); if (this._unitTestSuperVerbose) { if (aMimeMsg) this._log.debug(" * Got Mime " + aMimeMsg.prettyString()); else this._log.debug(" * NO MIME MESSAGE!!!\n"); } // -- Find/create the conversation the message belongs to. // Our invariant is that all messages that exist in the database belong to // a conversation. // - See if any of the ancestors exist and have a conversationID... // (references are ordered from old [0] to new [n-1]) let references = Array.from(range(0, aMsgHdr.numReferences)). map(i => aMsgHdr.getStringReference(i)); // also see if we already know about the message... references.push(aMsgHdr.messageId); this.getMessagesByMessageID(references, aCallbackHandle.callback, aCallbackHandle.callbackThis); // (ancestorLists has a direct correspondence to the message ids) let ancestorLists = yield this.kWorkAsync; if (logDebug) { this._log.debug("ancestors raw: " + ancestorLists); this._log.debug("ref len: " + references.length + " anc len: " + ancestorLists.length); this._log.debug("references: " + Log4Moz.enumerateProperties(references).join(",")); this._log.debug("ancestors: " + Log4Moz.enumerateProperties(ancestorLists).join(",")); } // pull our current message lookup results off references.pop(); let candidateCurMsgs = ancestorLists.pop(); let conversationID = null; let conversation = null; // -- figure out the conversation ID // if we have a clone/already exist, just use his conversation ID if (candidateCurMsgs.length > 0) { conversationID = candidateCurMsgs[0].conversationID; conversation = candidateCurMsgs[0].conversation; } // otherwise check out our ancestors else { // (walk from closest to furthest ancestor) for (let iAncestor = ancestorLists.length-1; iAncestor >= 0; --iAncestor) { let ancestorList = ancestorLists[iAncestor]; if (ancestorList.length > 0) { // we only care about the first instance of the message because we are // able to guarantee the invariant that all messages with the same // message id belong to the same conversation. let ancestor = ancestorList[0]; if (conversationID === null) { conversationID = ancestor.conversationID; conversation = ancestor.conversation; } else if (conversationID != ancestor.conversationID) { // XXX this inconsistency is known and understood and tracked by // bug 478162 https://bugzilla.mozilla.org/show_bug.cgi?id=478162 //this._log.error("Inconsistency in conversations invariant on " + // ancestor.headerMessageID + ". It has conv id " + // ancestor.conversationID + " but expected " + // conversationID + ". ID: " + ancestor.id); } } } } // nobody had one? create a new conversation if (conversationID === null) { // (the create method could issue the id, making the call return // without waiting for the database...) conversation = this._datastore.createConversation( aMsgHdr.mime2DecodedSubject, null, null); conversationID = conversation.id; } // Walk from furthest to closest ancestor, creating the ancestors that don't // exist. (This is possible if previous messages that were consumed in this // thread only had an in-reply-to or for some reason did not otherwise // provide the full references chain.) for (let iAncestor = 0; iAncestor < ancestorLists.length; ++iAncestor) { let ancestorList = ancestorLists[iAncestor]; if (ancestorList.length == 0) { if (logDebug) this._log.debug("creating message with: null, " + conversationID + ", " + references[iAncestor] + ", null."); let ancestor = this._datastore.createMessage(null, null, // ghost conversationID, null, references[iAncestor], null, // no subject null, // no body null); // no attachments this._datastore.insertMessage(ancestor); ancestorLists[iAncestor].push(ancestor); } } // now all our ancestors exist, though they may be ghost-like... // find if there's a ghost version of our message or we already have indexed // this message. let curMsg = null; if (logDebug) this._log.debug(candidateCurMsgs.length + " candidate messages"); for (let iCurCand = 0; iCurCand < candidateCurMsgs.length; iCurCand++) { let candMsg = candidateCurMsgs[iCurCand]; if (logDebug) this._log.debug("candidate folderID: " + candMsg.folderID + " messageKey: " + candMsg.messageKey); if (candMsg.folderURI == this._indexingFolder.URI) { // if we are in the same folder and we have the same message key, we // are definitely the same, stop looking. if (candMsg.messageKey == aMsgHdr.messageKey) { curMsg = candMsg; break; } // if (we are in the same folder and) the candidate message has a null // message key, we treat it as our best option unless we find an exact // key match. (this would happen because the 'move' notification case // has to deal with not knowing the target message key. this case // will hopefully be somewhat improved in the future to not go through // this path which mandates re-indexing of the message in its entirety) if (candMsg.messageKey === null) curMsg = candMsg; // if (we are in the same folder and) the candidate message's underlying // message no longer exists/matches, we'll assume we are the same but // were betrayed by a re-indexing or something, but we have to make // sure a perfect match doesn't turn up. else if ((curMsg === null) && !this._indexingDatabase.ContainsKey(candMsg.messageKey)) curMsg = candMsg; } // a ghost/deleted message is fine else if ((curMsg === null) && (candMsg.folderID === null)) { curMsg = candMsg; } } let attachmentNames = null; if (aMimeMsg) { attachmentNames = aMimeMsg.allAttachments. filter(att => att.isRealAttachment).map(att => att.name); } let isConceptuallyNew, isRecordNew, insertFulltext; if (curMsg === null) { curMsg = this._datastore.createMessage(aMsgHdr.folder, aMsgHdr.messageKey, conversationID, aMsgHdr.date, aMsgHdr.messageId); curMsg._conversation = conversation; isConceptuallyNew = isRecordNew = insertFulltext = true; } else { isRecordNew = false; // the message is conceptually new if it was a ghost or dead. isConceptuallyNew = curMsg._isGhost || curMsg._isDeleted; // insert fulltext if it was a ghost insertFulltext = curMsg._isGhost; curMsg._folderID = this._datastore._mapFolder(aMsgHdr.folder).id; curMsg._messageKey = aMsgHdr.messageKey; curMsg.date = new Date(aMsgHdr.date / 1000); // the message may have been deleted; tell it to make sure it's not. curMsg._ensureNotDeleted(); // note: we are assuming that our matching logic is flawless in that // if this message was not a ghost, we are assuming the 'body' // associated with the id is still exactly the same. It is conceivable // that there are cases where this is not true. } if (aMimeMsg) { let bodyPlain = aMimeMsg.coerceBodyToPlaintext(aMsgHdr.folder); if (bodyPlain) { curMsg._bodyLines = bodyPlain.split(/\r?\n/); // curMsg._content gets set by fundattr.js } } // Mark the message as new (for the purposes of fulltext insertion) if (insertFulltext) curMsg._isNew = true; curMsg._subject = aMsgHdr.mime2DecodedSubject; curMsg._attachmentNames = attachmentNames; // curMsg._indexAuthor gets set by fundattr.js // curMsg._indexRecipients gets set by fundattr.js // zero the notability so everything in grokNounItem can just increment curMsg.notability = 0; yield aCallbackHandle.pushAndGo( Gloda.grokNounItem(curMsg, {header: aMsgHdr, mime: aMimeMsg, bodyLines: curMsg._bodyLines}, isConceptuallyNew, isRecordNew, aCallbackHandle)); delete curMsg._bodyLines; delete curMsg._content; delete curMsg._isNew; delete curMsg._indexAuthor; delete curMsg._indexRecipients; // we want to update the header for messages only after the transaction // irrevocably hits the disk. otherwise we could get confused if the // transaction rolls back or what not. PendingCommitTracker.track(aMsgHdr, curMsg.id); yield this.kWorkDone; }, /** * Wipe a message out of existence from our index. This is slightly more * tricky than one would first expect because there are potentially * attributes not immediately associated with this message that reference * the message. Not only that, but deletion of messages may leave a * conversation posessing only ghost messages, which we don't want, so we * need to nuke the moot conversation and its moot ghost messages. * For now, we are actually punting on that trickiness, and the exact * nuances aren't defined yet because we have not decided whether to store * such attributes redundantly. For example, if we have subject-pred-object, * we could actually store this as attributes (subject, id, object) and * (object, id, subject). In such a case, we could query on (subject, *) * and use the results to delete the (object, id, subject) case. If we * don't redundantly store attributes, we can deal with the problem by * collecting up all the attributes that accept a message as their object * type and issuing a delete against that. For example, delete (*, [1,2,3], * message id). * (We are punting because we haven't implemented support for generating * attributes like that yet.) * * @TODO: implement deletion of attributes that reference (deleted) messages */ _deleteMessage: function* gloda_index_deleteMessage(aMessage, aCallbackHandle) { let logDebug = this._log.level <= Log4Moz.Level.Debug; if (logDebug) this._log.debug("*** Deleting message: " + aMessage); // -- delete our attributes // delete the message's attributes (if we implement the cascade delete, that // could do the honors for us... right now we define the trigger in our // schema but the back-end ignores it) GlodaDatastore.clearMessageAttributes(aMessage); // -- delete our message or ghost us, and maybe nuke the whole conversation // Look at the other messages in the conversation. // (Note: although we are performing a lookup with no validity constraints // and using the same object-relational-mapper-ish layer used by things // that do have constraints, we are not at risk of exposing deleted // messages to other code and getting it confused. The only way code // can find a message is if it shows up in their queries or gets announced // via GlodaCollectionManager.itemsAdded, neither of which will happen.) let convPrivQuery = Gloda.newQuery(Gloda.NOUN_MESSAGE, { noDbQueryValidityConstraints: true, }); convPrivQuery.conversation(aMessage.conversation); let conversationCollection = convPrivQuery.getCollection(aCallbackHandle); yield this.kWorkAsync; let conversationMsgs = conversationCollection.items; // Count the number of ghosts messages we see to determine if we are // the last message alive. let ghostCount = 0; let twinMessageExists = false; for (let convMsg of conversationMsgs) { // ignore our own message if (convMsg.id == aMessage.id) continue; if (convMsg._isGhost) ghostCount++; // This message is our (living) twin if it is not a ghost, not deleted, // and has the same message-id header. else if (!convMsg._isDeleted && convMsg.headerMessageID == aMessage.headerMessageID) twinMessageExists = true; } // -- If everyone else is a ghost, blow away the conversation. // If there are messages still alive or deleted but we have not yet gotten // to them yet _deleteMessage, then do not do this. (We will eventually // hit this case if they are all deleted.) if ((conversationMsgs.length - 1) == ghostCount) { // - Obliterate each message for (let msg of conversationMsgs) { GlodaDatastore.deleteMessageByID(msg.id); } // - Obliterate the conversation GlodaDatastore.deleteConversationByID(aMessage.conversationID); // *no one* should hold a reference or use aMessage after this point, // trash it so such ne'er do'wells are made plain. aMessage._objectPurgedMakeYourselfUnpleasant(); } // -- Ghost or purge us as appropriate else { // Purge us if we have a (living) twin; no ghost required. if (twinMessageExists) { GlodaDatastore.deleteMessageByID(aMessage.id); // *no one* should hold a reference or use aMessage after this point, // trash it so such ne'er do'wells are made plain. aMessage._objectPurgedMakeYourselfUnpleasant(); } // No twin, a ghost is required, we become the ghost. else { aMessage._ghost(); GlodaDatastore.updateMessage(aMessage); // ghosts don't have fulltext. purge it. GlodaDatastore.deleteMessageTextByID(aMessage.id); } } yield this.kWorkDone; }, }; GlodaIndexer.registerIndexer(GlodaMsgIndexer);