summaryrefslogtreecommitdiffstats
path: root/mailnews/db/gloda/modules/connotent.js
diff options
context:
space:
mode:
Diffstat (limited to 'mailnews/db/gloda/modules/connotent.js')
-rw-r--r--mailnews/db/gloda/modules/connotent.js273
1 files changed, 273 insertions, 0 deletions
diff --git a/mailnews/db/gloda/modules/connotent.js b/mailnews/db/gloda/modules/connotent.js
new file mode 100644
index 000000000..4ef424d43
--- /dev/null
+++ b/mailnews/db/gloda/modules/connotent.js
@@ -0,0 +1,273 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+this.EXPORTED_SYMBOLS = ['GlodaContent', 'whittlerRegistry',
+ 'mimeMsgToContentAndMeta', 'mimeMsgToContentSnippetAndMeta'];
+
+var Cc = Components.classes;
+var Ci = Components.interfaces;
+var Cr = Components.results;
+var Cu = Components.utils;
+
+Cu.import("resource:///modules/gloda/log4moz.js");
+
+var LOG = Log4Moz.repository.getLogger("gloda.connotent");
+
+
+
+/**
+ * Given a MimeMsg and the corresponding folder, return the GlodaContent object.
+ *
+ * @param aMimeMsg: the MimeMessage instance
+ * @param folder: the nsIMsgDBFolder
+ * @return an array containing the GlodaContent instance, and the meta dictionary
+ * that the Gloda content providers may have filled with useful data.
+ */
+
+function mimeMsgToContentAndMeta(aMimeMsg, folder) {
+ let content = new GlodaContent();
+ let meta = {subject: aMimeMsg.get("subject")};
+ let bodyLines = aMimeMsg.coerceBodyToPlaintext(folder).split(/\r?\n/);
+
+ for (let whittler of whittlerRegistry.getWhittlers())
+ whittler.contentWhittle(meta, bodyLines, content);
+
+ return [content, meta];
+}
+
+
+/**
+ * Given a MimeMsg, return the whittled content string, suitable for summarizing
+ * a message.
+ *
+ * @param aMimeMsg: the MimeMessage instance
+ * @param folder: the nsIMsgDBFolder
+ * @param length: optional number of characters to trim the whittled content.
+ * If the actual length of the message is greater than |length|, then the return
+ * value is the first (length-1) characters with an ellipsis appended.
+ * @return an array containing the text of the snippet, and the meta dictionary
+ * that the Gloda content providers may have filled with useful data.
+ */
+
+function mimeMsgToContentSnippetAndMeta(aMimeMsg, folder, length) {
+ let [content, meta] = mimeMsgToContentAndMeta(aMimeMsg, folder);
+
+ let text = content.getContentSnippet(length + 1);
+ if (length && text.length > length)
+ text = text.substring(0, length-1) + "\u2026"; // ellipsis
+
+ return [text, meta];
+}
+
+
+/**
+ * A registry of gloda providers that have contentWhittle() functions.
+ * used by mimeMsgToContentSnippet, but populated by the Gloda object as it's
+ * processing providers.
+ */
+function WhittlerRegistry() {
+ this._whittlers = [];
+}
+
+WhittlerRegistry.prototype = {
+ /**
+ * Add a provider as a content whittler.
+ */
+ registerWhittler: function whittler_registry_registerWhittler(provider) {
+ this._whittlers.push(provider);
+ },
+ /**
+ * get the list of content whittlers, sorted from the most specific to
+ * the most generic
+ */
+ getWhittlers: function whittler_registry_getWhittlers() {
+ // Use the concat() trick to avoid mutating the internal object and
+ // leaking an internal representation.
+ return this._whittlers.concat().reverse();
+ }
+}
+
+this.whittlerRegistry = new WhittlerRegistry();
+
+function GlodaContent() {
+ this._contentPriority = null;
+ this._producing = false;
+ this._hunks = [];
+}
+
+GlodaContent.prototype = {
+ kPriorityBase: 0,
+ kPriorityPerfect: 100,
+
+ kHunkMeta: 1,
+ kHunkQuoted: 2,
+ kHunkContent: 3,
+
+ _resetContent: function gloda_content__resetContent() {
+ this._keysAndValues = [];
+ this._keysAndDeltaValues = [];
+ this._hunks = [];
+ this._curHunk = null;
+ },
+
+ /* ===== Consumer API ===== */
+ hasContent: function gloda_content_hasContent() {
+ return (this._contentPriority != null);
+ },
+
+ /**
+ * Return content suitable for snippet display. This means that no quoting
+ * or meta-data should be returned.
+ *
+ * @param aMaxLength The maximum snippet length desired.
+ */
+ getContentSnippet: function gloda_content_getContentSnippet(aMaxLength) {
+ let content = this.getContentString();
+ if (aMaxLength)
+ content = content.substring(0, aMaxLength);
+ return content;
+ },
+
+ getContentString: function gloda_content_getContent(aIndexingPurposes) {
+ let data = "";
+ for (let hunk of this._hunks) {
+ if (hunk.hunkType == this.kHunkContent) {
+ if (data)
+ data += "\n" + hunk.data;
+ else
+ data = hunk.data;
+ }
+ }
+
+ if (aIndexingPurposes) {
+ // append the values for indexing. we assume the keywords are cruft.
+ // this may be crazy, but things that aren't a science aren't an exact
+ // science.
+ for (let kv of this._keysAndValues) {
+ data += "\n" + kv[1];
+ }
+ for (let kon of this._keysAndValues) {
+ data += "\n" + kon[1] + "\n" + kon[2];
+ }
+ }
+
+ return data;
+ },
+
+ /* ===== Producer API ===== */
+ /**
+ * Called by a producer with the priority they believe their interpretation
+ * of the content comes in at.
+ *
+ * @returns true if we believe the producer's interpretation will be
+ * interesting and they should go ahead and generate events. We return
+ * false if we don't think they are interesting, in which case they should
+ * probably not issue calls to us, although we don't care. (We will
+ * ignore their calls if we return false, this allows the simplification
+ * of code that needs to run anyways.)
+ */
+ volunteerContent: function gloda_content_volunteerContent(aPriority) {
+ if (this._contentPriority === null || this._contentPriority < aPriority) {
+ this._contentPriority = aPriority;
+ this._resetContent();
+ this._producing = true;
+ return true;
+ }
+ this._producing = false;
+ return false;
+ },
+
+ keyValue: function gloda_content_keyValue(aKey, aValue) {
+ if (!this._producing)
+ return;
+
+ this._keysAndValues.push([aKey, aValue]);
+ },
+ keyValueDelta: function gloda_content_keyValueDelta (aKey, aOldValue,
+ aNewValue) {
+ if (!this._producing)
+ return;
+
+ this._keysAndDeltaValues.push([aKey, aOldValue, aNewValue]);
+ },
+
+ /**
+ * Meta lines are lines that have to do with the content but are not the
+ * content and can generally be related to an attribute that has been derived
+ * and stored on the item.
+ * For example, a bugzilla bug may note that an attachment was created; this
+ * is not content and wouldn't be desired in a snippet, but is still
+ * potentially interesting meta-data.
+ *
+ * @param aLineOrLines The line or list of lines that are meta-data.
+ * @param aAttr The attribute this meta-data is associated with.
+ * @param aIndex If the attribute is non-singular, indicate the specific
+ * index of the item in the attribute's bound list that the meta-data
+ * is associated with.
+ */
+ meta: function gloda_content_meta(aLineOrLines, aAttr, aIndex) {
+ if (!this._producing)
+ return;
+
+ let data;
+ if (typeof(aLineOrLines) == "string")
+ data = aLineOrLines;
+ else
+ data = aLineOrLines.join("\n");
+
+ this._curHunk = {hunkType: this.kHunkMeta, attr: aAttr, index: aIndex,
+ data: data};
+ this._hunks.push(this._curHunk);
+ },
+ /**
+ * Quoted lines reference previous messages or what not.
+ *
+ * @param aLineOrLiens The line or list of lines that are quoted.
+ * @param aDepth The depth of the quoting.
+ * @param aOrigin The item that originated the original content, if known.
+ * For example, perhaps a GlodaMessage?
+ * @param aTarget A reference to the location in the original content, if
+ * known. For example, the index of a line in a message or something?
+ */
+ quoted: function gloda_content_quoted(aLineOrLines, aDepth, aOrigin,
+ aTarget) {
+ if (!this._producing)
+ return;
+
+ let data;
+ if (typeof(aLineOrLines) == "string")
+ data = aLineOrLines;
+ else
+ data = aLineOrLines.join("\n");
+
+ if (!this._curHunk ||
+ this._curHunk.hunkType != this.kHunkQuoted ||
+ this._curHunk.depth != aDepth ||
+ this._curHunk.origin != aOrigin || this._curHunk.target != aTarget) {
+ this._curHunk = {hunkType: this.kHunkQuoted, data: data,
+ depth: aDepth, origin: aOrigin, target: aTarget};
+ this._hunks.push(this._curHunk);
+ }
+ else
+ this._curHunk.data += "\n" + data;
+ },
+
+ content: function gloda_content_content(aLineOrLines) {
+ if (!this._producing)
+ return;
+
+ let data;
+ if (typeof(aLineOrLines) == "string")
+ data = aLineOrLines;
+ else
+ data = aLineOrLines.join("\n");
+
+ if (!this._curHunk || this._curHunk.hunkType != this.kHunkContent) {
+ this._curHunk = {hunkType: this.kHunkContent, data: data};
+ this._hunks.push(this._curHunk);
+ }
+ else
+ this._curHunk.data += "\n" + data;
+ },
+};