diff options
Diffstat (limited to 'mailnews/db/gloda/modules/fundattr.js')
-rw-r--r-- | mailnews/db/gloda/modules/fundattr.js | 907 |
1 files changed, 907 insertions, 0 deletions
diff --git a/mailnews/db/gloda/modules/fundattr.js b/mailnews/db/gloda/modules/fundattr.js new file mode 100644 index 000000000..75a424adb --- /dev/null +++ b/mailnews/db/gloda/modules/fundattr.js @@ -0,0 +1,907 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +this.EXPORTED_SYMBOLS = ['GlodaFundAttr']; + +var Cc = Components.classes; +var Ci = Components.interfaces; +var Cr = Components.results; +var Cu = Components.utils; + +Cu.import("resource:///modules/gloda/log4moz.js"); +Cu.import("resource:///modules/StringBundle.js"); + +Cu.import("resource:///modules/gloda/utils.js"); +Cu.import("resource:///modules/gloda/gloda.js"); +Cu.import("resource:///modules/gloda/datastore.js"); +Cu.import("resource:///modules/gloda/datamodel.js"); // for GlodaAttachment + +Cu.import("resource:///modules/gloda/noun_mimetype.js"); +Cu.import("resource:///modules/gloda/connotent.js"); + +/** + * @namespace The Gloda Fundamental Attribute provider is a special attribute + * provider; it provides attributes that the rest of the providers should be + * able to assume exist. Also, it may end up accessing things at a lower level + * than most extension providers should do. In summary, don't mimic this code + * unless you won't complain when your code breaks. + */ +var GlodaFundAttr = { + providerName: "gloda.fundattr", + strings: new StringBundle("chrome://messenger/locale/gloda.properties"), + _log: null, + + init: function gloda_explattr_init() { + this._log = Log4Moz.repository.getLogger("gloda.fundattr"); + + try { + this.defineAttributes(); + } + catch (ex) { + this._log.error("Error in init: " + ex); + throw ex; + } + }, + + POPULARITY_FROM_ME_TO: 10, + POPULARITY_FROM_ME_CC: 4, + POPULARITY_FROM_ME_BCC: 3, + POPULARITY_TO_ME: 5, + POPULARITY_CC_ME: 1, + POPULARITY_BCC_ME: 1, + + /** Boost for messages 'I' sent */ + NOTABILITY_FROM_ME: 10, + /** Boost for messages involving 'me'. */ + NOTABILITY_INVOLVING_ME: 1, + /** Boost for message from someone in 'my' address book. */ + NOTABILITY_FROM_IN_ADDR_BOOK: 10, + /** Boost for the first person involved in my address book. */ + NOTABILITY_INVOLVING_ADDR_BOOK_FIRST: 8, + /** Boost for each additional person involved in my address book. */ + NOTABILITY_INVOLVING_ADDR_BOOK_ADDL: 2, + + defineAttributes: function gloda_fundattr_defineAttributes() { + /* ***** Conversations ***** */ + // conversation: subjectMatches + this._attrConvSubject = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "subjectMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "subject", + subjectNouns: [Gloda.NOUN_CONVERSATION], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + /* ***** Messages ***** */ + // folder + this._attrFolder = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "folder", + singular: true, + facet: true, + special: Gloda.kSpecialColumn, + specialColumnName: "folderID", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FOLDER, + }); // tested-by: test_attributes_fundamental + this._attrAccount = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "account", + canQuery: "memory", + singular: true, + facet: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_ACCOUNT + }); + this._attrMessageKey = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "messageKey", + singular: true, + special: Gloda.kSpecialColumn, + specialColumnName: "messageKey", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_NUMBER, + canQuery: true, + }); // tested-by: test_attributes_fundamental + + // We need to surface the deleted attribute for querying, but there is no + // reason for user code, so let's call it "_deleted" rather than deleted. + // (In fact, our validity constraints require a special query formulation + // that user code should have no clue exists. That's right user code, + // that's a dare.) + Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "_deleted", + singular: true, + special: Gloda.kSpecialColumn, + specialColumnName: "deleted", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_NUMBER, + }); + + + // -- fulltext search helpers + // fulltextMatches. Match over message subject, body, and attachments + // @testpoint gloda.noun.message.attr.fulltextMatches + this._attrFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "fulltextMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "messagesText", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // subjectMatches. Fulltext match on subject + // @testpoint gloda.noun.message.attr.subjectMatches + this._attrSubjectText = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "subjectMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "subject", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // bodyMatches. super-synthetic full-text matching... + // @testpoint gloda.noun.message.attr.bodyMatches + this._attrBody = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "bodyMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "body", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // attachmentNamesMatch + // @testpoint gloda.noun.message.attr.attachmentNamesMatch + this._attrAttachmentNames = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "attachmentNamesMatch", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "attachmentNames", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // @testpoint gloda.noun.message.attr.authorMatches + this._attrAuthorFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "authorMatches", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "author", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // @testpoint gloda.noun.message.attr.recipientsMatch + this._attrRecipientsFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrDerived, + attributeName: "recipientsMatch", + singular: true, + special: Gloda.kSpecialFulltext, + specialColumnName: "recipients", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_FULLTEXT, + }); + + // --- synthetic stuff for some reason + // conversation + // @testpoint gloda.noun.message.attr.conversation + this._attrConversation = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "conversation", + singular: true, + special: Gloda.kSpecialColumnParent, + specialColumnName: "conversationID", + idStorageAttributeName: "_conversationID", + valueStorageAttributeName: "_conversation", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_CONVERSATION, + canQuery: true, + }); + + // --- Fundamental + // From + this._attrFrom = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "from", + singular: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // tested-by: test_attributes_fundamental + // To + this._attrTo = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "to", + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // tested-by: test_attributes_fundamental + // Cc + this._attrCc = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "cc", + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested + /** + * Bcc'ed recipients; only makes sense for sent messages. + */ + this._attrBcc = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "bcc", + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested + + // Date. now lives on the row. + this._attrDate = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "date", + singular: true, + facet: { + type: "date", + }, + special: Gloda.kSpecialColumn, + specialColumnName: "date", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_DATE, + }); // tested-by: test_attributes_fundamental + + // Header message ID. + this._attrHeaderMessageID = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "headerMessageID", + singular: true, + special: Gloda.kSpecialString, + specialColumnName: "headerMessageID", + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + + // Attachment MIME Types + this._attrAttachmentTypes = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "attachmentTypes", + singular: false, + emptySetIsSignificant: true, + facet: { + type: "default", + // This will group the MIME types by their category. + groupIdAttr: "category", + queryHelper: "Category", + }, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_MIME_TYPE, + }); + + // Attachment infos + this._attrIsEncrypted = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "isEncrypted", + singular: true, + emptySetIsSignificant: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_NUMBER, + }); + + // Attachment infos + this._attrAttachmentInfos = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "attachmentInfos", + singular: false, + emptySetIsSignificant: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_ATTACHMENT, + }); + + // --- Optimization + /** + * Involves means any of from/to/cc/bcc. The queries get ugly enough + * without this that it seems to justify the cost, especially given the + * frequent use case. (In fact, post-filtering for the specific from/to/cc + * is probably justifiable rather than losing this attribute...) + */ + this._attrInvolves = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrOptimization, + attributeName: "involves", + singular: false, + facet: { + type: "default", + /** + * Filter out 'me', as we have other facets that deal with that, and the + * 'me' identities are so likely that they distort things. + * + * @return true if the identity is not one of my identities, false if it + * is. + */ + filter: function gloda_explattr_involves_filter(aItem) { + return (!(aItem.id in Gloda.myIdentities)); + } + }, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested + + /** + * Any of to/cc/bcc. + */ + this._attrRecipients = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrOptimization, + attributeName: "recipients", + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested + + // From Me (To/Cc/Bcc) + this._attrFromMe = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrOptimization, + attributeName: "fromMe", + singular: false, + // The interesting thing to a facet is whether the message is from me. + facet: { + type: "nonempty?" + }, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_PARAM_IDENTITY, + }); // not-tested + // To/Cc/Bcc Me + this._attrToMe = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "toMe", + // The interesting thing to a facet is whether the message is to me. + facet: { + type: "nonempty?" + }, + singular: false, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_PARAM_IDENTITY, + }); // not-tested + + + // -- Mailing List + // Non-singular, but a hard call. Namely, it is obvious that a message can + // be addressed to multiple mailing lists. However, I don't see how you + // could receive a message with more than one set of List-* headers, + // since each list-serve would each send you a copy. Based on our current + // decision to treat each physical message as separate, it almost seems + // right to limit the list attribute to the copy that originated at the + // list. That may sound entirely wrong, but keep in mind that until we + // have seen a message from the list with the List headers, we can't + // definitely know it's a mailing list (although heuristics could take us + // pretty far). As such, the quasi-singular thing is appealing. + // Of course, the reality is that we really want to know if a message was + // sent to multiple mailing lists and be able to query on that. + // Additionally, our implicit-to logic needs to work on messages that + // weren't relayed by the list-serve, especially messages sent to the list + // by the user. + this._attrList = Gloda.defineAttribute({ + provider: this, + extensionName: Gloda.BUILT_IN, + attributeType: Gloda.kAttrFundamental, + attributeName: "mailing-list", + bindName: "mailingLists", + singular: false, + emptySetIsSignificant: true, + facet: true, + subjectNouns: [Gloda.NOUN_MESSAGE], + objectNoun: Gloda.NOUN_IDENTITY, + }); // not-tested, not-implemented + }, + + RE_LIST_POST: /<mailto:([^>]+)>/, + + /** + * + * Specializations: + * - Mailing Lists. Replies to a message on a mailing list frequently only + * have the list-serve as the 'to', so we try to generate a synthetic 'to' + * based on the author of the parent message when possible. (The 'possible' + * part is that we may not have a copy of the parent message at the time of + * processing.) + * - Newsgroups. Same deal as mailing lists. + */ + process: function* gloda_fundattr_process(aGlodaMessage, aRawReps, + aIsNew, aCallbackHandle) { + let aMsgHdr = aRawReps.header; + let aMimeMsg = aRawReps.mime; + + // -- From + // Let's use replyTo if available. + // er, since we are just dealing with mailing lists for now, forget the + // reply-to... + // TODO: deal with default charset issues + let author = null; + /* + try { + author = aMsgHdr.getStringProperty("replyTo"); + } + catch (ex) { + } + */ + if (author == null || author == "") + author = aMsgHdr.author; + + let normalizedListPost = ""; + if (aMimeMsg && aMimeMsg.has("list-post")) { + let match = this.RE_LIST_POST.exec(aMimeMsg.get("list-post")); + if (match) + normalizedListPost = "<" + match[1] + ">"; + } + + // Do not use the MIME decoded variants of any of the email addresses + // because if name is encoded and has a comma in it, it will break the + // address parser (which already knows how to do the decoding anyways). + let [authorIdentities, toIdentities, ccIdentities, bccIdentities, + listIdentities] = + yield aCallbackHandle.pushAndGo( + Gloda.getOrCreateMailIdentities(aCallbackHandle, + author, aMsgHdr.recipients, + aMsgHdr.ccList, aMsgHdr.bccList, + normalizedListPost)); + + if (authorIdentities.length != 1) { + throw new Gloda.BadItemContentsError( + "Message with subject '" + aMsgHdr.mime2DecodedSubject + + "' somehow lacks a valid author. Bailing."); + } + let authorIdentity = authorIdentities[0]; + aGlodaMessage.from = authorIdentity; + + // -- To, Cc, Bcc + aGlodaMessage.to = toIdentities; + aGlodaMessage.cc = ccIdentities; + aGlodaMessage.bcc = bccIdentities; + + // -- Mailing List + if (listIdentities.length) + aGlodaMessage.mailingLists = listIdentities; + + let findIsEncrypted = x => + x.isEncrypted || (x.parts ? x.parts.some(findIsEncrypted) : false); + + // -- Encryption + aGlodaMessage.isEncrypted = false; + if (aMimeMsg) { + aGlodaMessage.isEncrypted = findIsEncrypted(aMimeMsg); + } + + // -- Attachments + if (aMimeMsg) { + // nsParseMailbox.cpp puts the attachment flag on msgHdrs as soon as it + // finds a multipart/mixed part. This is a good heuristic, but if it turns + // out the part has no filename, then we don't treat it as an attachment. + // We just streamed the message, and we have all the information to figure + // that out, so now is a good place to clear the flag if needed. + let foundRealAttachment = false; + let attachmentTypes = []; + for (let attachment of aMimeMsg.allAttachments) { + // We don't care about would-be attachments that are not user-intended + // attachments but rather artifacts of the message content. + // We also want to avoid dealing with obviously bogus mime types. + // (If you don't have a "/", you are probably bogus.) + if (attachment.isRealAttachment && + attachment.contentType.includes("/")) { + attachmentTypes.push(MimeTypeNoun.getMimeType(attachment.contentType)); + } + if (attachment.isRealAttachment) + foundRealAttachment = true; + } + if (attachmentTypes.length) { + aGlodaMessage.attachmentTypes = attachmentTypes; + } + + let aMsgHdr = aRawReps.header; + let wasStreamed = aMsgHdr && + !aGlodaMessage.isEncrypted && + ((aMsgHdr.flags & Ci.nsMsgMessageFlags.Offline) || + (aMsgHdr.folder instanceof Ci.nsIMsgLocalMailFolder)); + + // Clear the flag if it turns out there's no attachment after all and we + // streamed completely the message (if we didn't, then we have no + // knowledge of attachments, unless bug 673370 is fixed). + if (!foundRealAttachment && wasStreamed) + aMsgHdr.markHasAttachments(false); + + // This is not the same kind of attachments as above. Now, we want to + // provide convenience attributes to Gloda consumers, so that they can run + // through the list of attachments of a given message, to possibly build a + // visualization on top of it. We still reject bogus mime types, which + // means yencode won't be supported. Oh, I feel really bad. + let attachmentInfos = []; + for (let att of aMimeMsg.allUserAttachments) { + attachmentInfos.push(this.glodaAttFromMimeAtt(aRawReps.trueGlodaRep, + att)); + } + aGlodaMessage.attachmentInfos = attachmentInfos; + } + + // TODO: deal with mailing lists, including implicit-to. this will require + // convincing the indexer to pass us in the previous message if it is + // available. (which we'll simply pass to everyone... it can help body + // logic for quoting purposes, etc. too.) + + yield Gloda.kWorkDone; + }, + + glodaAttFromMimeAtt: + function gloda_fundattr_glodaAttFromMimeAtt(aGlodaMessage, aAtt) { + // So we don't want to store the URL because it can change over time if + // the message is moved. What we do is store the full URL if it's a + // detached attachment, otherwise just keep the part information, and + // rebuild the URL according to where the message is sitting. + let part, externalUrl; + if (aAtt.isExternal) { + externalUrl = aAtt.url; + } else { + let matches = aAtt.url.match(GlodaUtils.PART_RE); + if (matches && matches.length) + part = matches[1]; + else + this._log.error("Error processing attachment: " + aAtt.url); + } + return new GlodaAttachment(aGlodaMessage, + aAtt.name, + aAtt.contentType, + aAtt.size, + part, + externalUrl, + aAtt.isExternal); + }, + + optimize: function* gloda_fundattr_optimize(aGlodaMessage, aRawReps, + aIsNew, aCallbackHandle) { + + let aMsgHdr = aRawReps.header; + + // for simplicity this is used for both involves and recipients + let involvesIdentities = {}; + let involves = aGlodaMessage.involves || []; + let recipients = aGlodaMessage.recipients || []; + + // 'me' specialization optimizations + let toMe = aGlodaMessage.toMe || []; + let fromMe = aGlodaMessage.fromMe || []; + + let myIdentities = Gloda.myIdentities; // needless optimization? + let authorIdentity = aGlodaMessage.from; + let isFromMe = authorIdentity.id in myIdentities; + + // The fulltext search column for the author. We want to have in here: + // - The e-mail address and display name as enclosed on the message. + // - The name per the address book card for this e-mail address, if we have + // one. + aGlodaMessage._indexAuthor = aMsgHdr.mime2DecodedAuthor; + // The fulltext search column for the recipients. (same deal) + aGlodaMessage._indexRecipients = aMsgHdr.mime2DecodedRecipients; + + if (isFromMe) + aGlodaMessage.notability += this.NOTABILITY_FROM_ME; + else { + let authorCard = authorIdentity.abCard; + if (authorCard) { + aGlodaMessage.notability += this.NOTABILITY_FROM_IN_ADDR_BOOK; + // @testpoint gloda.noun.message.attr.authorMatches + aGlodaMessage._indexAuthor += ' ' + authorCard.displayName; + } + } + + involves.push(authorIdentity); + involvesIdentities[authorIdentity.id] = true; + + let involvedAddrBookCount = 0; + + for (let toIdentity of aGlodaMessage.to) { + if (!(toIdentity.id in involvesIdentities)) { + involves.push(toIdentity); + recipients.push(toIdentity); + involvesIdentities[toIdentity.id] = true; + let toCard = toIdentity.abCard; + if (toCard) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += ' ' + toCard.displayName; + } + } + + // optimization attribute to-me ('I' am the parameter) + if (toIdentity.id in myIdentities) { + toMe.push([toIdentity, authorIdentity]); + if (aIsNew) + authorIdentity.contact.popularity += this.POPULARITY_TO_ME; + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, toIdentity]); + // also, popularity + if (aIsNew) + toIdentity.contact.popularity += this.POPULARITY_FROM_ME_TO; + } + } + for (let ccIdentity of aGlodaMessage.cc) { + if (!(ccIdentity.id in involvesIdentities)) { + involves.push(ccIdentity); + recipients.push(ccIdentity); + involvesIdentities[ccIdentity.id] = true; + let ccCard = ccIdentity.abCard; + if (ccCard) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += ' ' + ccCard.displayName; + } + } + // optimization attribute cc-me ('I' am the parameter) + if (ccIdentity.id in myIdentities) { + toMe.push([ccIdentity, authorIdentity]); + if (aIsNew) + authorIdentity.contact.popularity += this.POPULARITY_CC_ME; + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, ccIdentity]); + // also, popularity + if (aIsNew) + ccIdentity.contact.popularity += this.POPULARITY_FROM_ME_CC; + } + } + // just treat bcc like cc; the intent is the same although the exact + // semantics differ. + for (let bccIdentity of aGlodaMessage.bcc) { + if (!(bccIdentity.id in involvesIdentities)) { + involves.push(bccIdentity); + recipients.push(bccIdentity); + involvesIdentities[bccIdentity.id] = true; + let bccCard = bccIdentity.abCard; + if (bccCard) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += ' ' + bccCard.displayName; + } + } + // optimization attribute cc-me ('I' am the parameter) + if (bccIdentity.id in myIdentities) { + toMe.push([bccIdentity, authorIdentity]); + if (aIsNew) + authorIdentity.contact.popularity += this.POPULARITY_BCC_ME; + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, bccIdentity]); + // also, popularity + if (aIsNew) + bccIdentity.contact.popularity += this.POPULARITY_FROM_ME_BCC; + } + } + + if (involvedAddrBookCount) + aGlodaMessage.notability += this.NOTABILITY_INVOLVING_ADDR_BOOK_FIRST + + (involvedAddrBookCount - 1) * this.NOTABILITY_INVOLVING_ADDR_BOOK_ADDL; + + aGlodaMessage.involves = involves; + aGlodaMessage.recipients = recipients; + if (toMe.length) { + aGlodaMessage.toMe = toMe; + aGlodaMessage.notability += this.NOTABILITY_INVOLVING_ME; + } + if (fromMe.length) + aGlodaMessage.fromMe = fromMe; + + // Content + if (aRawReps.bodyLines) { + aGlodaMessage._content = aRawReps.content = new GlodaContent(); + if (this.contentWhittle({}, aRawReps.bodyLines, aGlodaMessage._content)) { + // we were going to do something here? + } + } + else { + aRawReps.content = null; + } + + yield Gloda.kWorkDone; + }, + + /** + * Duplicates the notability logic from optimize(). Arguably optimize should + * be factored to call us, grokNounItem should be factored to call us, or we + * should get sufficiently fancy that our code wildly diverges. + */ + score: function gloda_fundattr_score(aMessage, aContext) { + let score = 0; + + let authorIdentity = aMessage.from; + if (authorIdentity.id in Gloda.myIdentities) + score += this.NOTABILITY_FROM_ME; + else if (authorIdentity.inAddressBook) + score += this.NOTABILITY_FROM_IN_ADDR_BOOK; + if (aMessage.toMe) + score += this.NOTABILITY_INVOLVING_ME; + + let involvedAddrBookCount = 0; + for (let [, identity] in Iterator(aMessage.to)) + if (identity.inAddressBook) + involvedAddrBookCount++; + for (let [, identity] in Iterator(aMessage.cc)) + if (identity.inAddressBook) + involvedAddrBookCount++; + if (involvedAddrBookCount) + score += this.NOTABILITY_INVOLVING_ADDR_BOOK_FIRST + + (involvedAddrBookCount - 1) * this.NOTABILITY_INVOLVING_ADDR_BOOK_ADDL; + return score; + }, + + _countQuoteDepthAndNormalize: + function gloda_fundattr__countQuoteDepthAndNormalize(aLine) { + let count = 0; + let lastStartOffset = 0; + + for (let i = 0; i < aLine.length; i++) { + let c = aLine[i]; + if (c == ">") { + count++; + lastStartOffset = i+1; + } + else if (c == " ") { + } + else { + return [count, + lastStartOffset ? aLine.substring(lastStartOffset) : aLine]; + } + } + + return [count, lastStartOffset ? aLine.substring(lastStartOffset) : aLine]; + }, + + /** + * Attempt to understand simple quoting constructs that use ">" with + * obvious phrases to enter the quoting block. No support for other types + * of quoting at this time. Also no support for piercing the wrapper of + * forwarded messages to actually be the content of the forwarded message. + */ + contentWhittle: function gloda_fundattr_contentWhittle(aMeta, + aBodyLines, aContent) { + if (!aContent.volunteerContent(aContent.kPriorityBase)) + return false; + + // duplicate the list; we mutate somewhat... + let bodyLines = aBodyLines.concat(); + + // lastNonBlankLine originally was just for detecting quoting idioms where + // the "wrote" line was separated from the quoted block by a blank line. + // Now we also use it for whitespace suppression at the boundaries of + // quoted and un-quoted text. (We keep blank lines within the same + // 'block' of quoted or non-quoted text.) + // Because we now have two goals for it, and we still want to suppress blank + // lines when there is a 'wrote' line involved, we introduce... + // prevLastNonBlankLine! This arguably suggests refactoring should be the + // next step, but things work for now. + let rangeStart = 0, lastNonBlankLine = null, prevLastNonBlankLine = null; + let inQuoteDepth = 0; + for (let [iLine, line] of bodyLines.entries()) { + if (!line || (line == "\xa0")) /* unicode non breaking space */ + continue; + + if (line.startsWith(">")) { + if (!inQuoteDepth) { + let rangeEnd = iLine - 1; + let quoteRangeStart = iLine; + // see if the last non-blank-line was a lead-in... + if (lastNonBlankLine != null) { + // TODO: localize quote range start detection + if (aBodyLines[lastNonBlankLine].includes("wrote")) { + quoteRangeStart = lastNonBlankLine; + rangeEnd = lastNonBlankLine - 1; + // we 'used up' lastNonBlankLine, let's promote the prev guy to + // be the new lastNonBlankLine for the next logic block + lastNonBlankLine = prevLastNonBlankLine; + } + // eat the trailing whitespace... + if (lastNonBlankLine != null) + rangeEnd = Math.min(rangeEnd, lastNonBlankLine); + } + if (rangeEnd >= rangeStart) + aContent.content(aBodyLines.slice(rangeStart, rangeEnd+1)); + + [inQuoteDepth, line] = this._countQuoteDepthAndNormalize(line); + bodyLines[iLine] = line; + rangeStart = quoteRangeStart; + } + else { + let curQuoteDepth; + [curQuoteDepth, line] = this._countQuoteDepthAndNormalize(line); + bodyLines[iLine] = line; + + if (curQuoteDepth != inQuoteDepth) { + // we could do some "wrote" compensation here, but it's not really + // as important. let's wait for a more clever algorithm. + aContent.quoted(aBodyLines.slice(rangeStart, iLine), inQuoteDepth); + inQuoteDepth = curQuoteDepth; + rangeStart = iLine; + } + } + } + else { + if (inQuoteDepth) { + aContent.quoted(aBodyLines.slice(rangeStart, iLine), inQuoteDepth); + inQuoteDepth = 0; + rangeStart = iLine; + } + } + + prevLastNonBlankLine = lastNonBlankLine; + lastNonBlankLine = iLine; + } + + if (inQuoteDepth) { + aContent.quoted(aBodyLines.slice(rangeStart), inQuoteDepth); + } + else { + aContent.content(aBodyLines.slice(rangeStart, lastNonBlankLine+1)); + } + + return true; + }, +}; |