summaryrefslogtreecommitdiffstats
path: root/mailnews/db/gloda/modules/connotent.js
blob: 4ef424d43faac3fe05b563854862bd3d8be191c1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

this.EXPORTED_SYMBOLS = ['GlodaContent', 'whittlerRegistry',
                          'mimeMsgToContentAndMeta', 'mimeMsgToContentSnippetAndMeta'];

var Cc = Components.classes;
var Ci = Components.interfaces;
var Cr = Components.results;
var Cu = Components.utils;

Cu.import("resource:///modules/gloda/log4moz.js");

var LOG = Log4Moz.repository.getLogger("gloda.connotent");



/**
 * Given a MimeMsg and the corresponding folder, return the GlodaContent object.
 *
 * @param aMimeMsg: the MimeMessage instance
 * @param folder: the nsIMsgDBFolder
 * @return an array containing the GlodaContent instance, and the meta dictionary
 * that the Gloda content providers may have filled with useful data.
 */

function mimeMsgToContentAndMeta(aMimeMsg, folder) {
  let content = new GlodaContent();
  let meta = {subject: aMimeMsg.get("subject")};
  let bodyLines = aMimeMsg.coerceBodyToPlaintext(folder).split(/\r?\n/);

  for (let whittler of whittlerRegistry.getWhittlers())
    whittler.contentWhittle(meta, bodyLines, content);

  return [content, meta];
}


/**
 * Given a MimeMsg, return the whittled content string, suitable for summarizing
 * a message.
 *
 * @param aMimeMsg: the MimeMessage instance
 * @param folder: the nsIMsgDBFolder
 * @param length: optional number of characters to trim the whittled content.
 * If the actual length of the message is greater than |length|, then the return
 * value is the first (length-1) characters with an ellipsis appended.
 * @return an array containing the text of the snippet, and the meta dictionary
 * that the Gloda content providers may have filled with useful data.
 */

function mimeMsgToContentSnippetAndMeta(aMimeMsg, folder, length) {
  let [content, meta] = mimeMsgToContentAndMeta(aMimeMsg, folder);

  let text = content.getContentSnippet(length + 1);
  if (length && text.length > length)
    text = text.substring(0, length-1) + "\u2026"; // ellipsis

  return [text, meta];
}


/**
 * A registry of gloda providers that have contentWhittle() functions.
 * used by mimeMsgToContentSnippet, but populated by the Gloda object as it's
 * processing providers.
 */
function WhittlerRegistry() {
  this._whittlers = [];
}

WhittlerRegistry.prototype = {
  /**
   * Add a provider as a content whittler.
   */
  registerWhittler: function whittler_registry_registerWhittler(provider) {
    this._whittlers.push(provider);
  },
  /**
   * get the list of content whittlers, sorted from the most specific to
   * the most generic
   */
  getWhittlers: function whittler_registry_getWhittlers() {
    // Use the concat() trick to avoid mutating the internal object and
    // leaking an internal representation.
    return this._whittlers.concat().reverse();
  }
}

this.whittlerRegistry = new WhittlerRegistry();

function GlodaContent() {
  this._contentPriority = null;
  this._producing = false;
  this._hunks = [];
}

GlodaContent.prototype = {
  kPriorityBase: 0,
  kPriorityPerfect: 100,

  kHunkMeta: 1,
  kHunkQuoted: 2,
  kHunkContent: 3,

  _resetContent: function gloda_content__resetContent() {
    this._keysAndValues = [];
    this._keysAndDeltaValues = [];
    this._hunks = [];
    this._curHunk = null;
  },

  /* ===== Consumer API ===== */
  hasContent: function gloda_content_hasContent() {
    return (this._contentPriority != null);
  },

  /**
   * Return content suitable for snippet display.  This means that no quoting
   *  or meta-data should be returned.
   *
   * @param aMaxLength The maximum snippet length desired.
   */
  getContentSnippet: function gloda_content_getContentSnippet(aMaxLength) {
    let content = this.getContentString();
    if (aMaxLength)
      content = content.substring(0, aMaxLength);
    return content;
  },

  getContentString: function gloda_content_getContent(aIndexingPurposes) {
    let data = "";
    for (let hunk of this._hunks) {
      if (hunk.hunkType == this.kHunkContent) {
        if (data)
          data += "\n" + hunk.data;
        else
          data = hunk.data;
      }
    }

    if (aIndexingPurposes) {
      // append the values for indexing.  we assume the keywords are cruft.
      // this may be crazy, but things that aren't a science aren't an exact
      // science.
      for (let kv of this._keysAndValues) {
        data += "\n" + kv[1];
      }
      for (let kon of this._keysAndValues) {
        data += "\n" + kon[1] + "\n" + kon[2];
      }
    }

    return data;
  },

  /* ===== Producer API ===== */
  /**
   * Called by a producer with the priority they believe their interpretation
   *  of the content comes in at.
   *
   * @returns true if we believe the producer's interpretation will be
   *     interesting and they should go ahead and generate events.  We return
   *     false if we don't think they are interesting, in which case they should
   *     probably not issue calls to us, although we don't care.  (We will
   *     ignore their calls if we return false, this allows the simplification
   *     of code that needs to run anyways.)
   */
  volunteerContent: function gloda_content_volunteerContent(aPriority) {
    if (this._contentPriority === null || this._contentPriority < aPriority) {
      this._contentPriority = aPriority;
      this._resetContent();
      this._producing = true;
      return true;
    }
    this._producing = false;
    return false;
  },

  keyValue: function gloda_content_keyValue(aKey, aValue) {
    if (!this._producing)
      return;

    this._keysAndValues.push([aKey, aValue]);
  },
  keyValueDelta: function gloda_content_keyValueDelta (aKey, aOldValue,
      aNewValue) {
    if (!this._producing)
      return;

    this._keysAndDeltaValues.push([aKey, aOldValue, aNewValue]);
  },

  /**
   * Meta lines are lines that have to do with the content but are not the
   *  content and can generally be related to an attribute that has been derived
   *  and stored on the item.
   * For example, a bugzilla bug may note that an attachment was created; this
   *  is not content and wouldn't be desired in a snippet, but is still
   *  potentially interesting meta-data.
   *
   * @param aLineOrLines The line or list of lines that are meta-data.
   * @param aAttr The attribute this meta-data is associated with.
   * @param aIndex If the attribute is non-singular, indicate the specific
   *     index of the item in the attribute's bound list that the meta-data
   *     is associated with.
   */
  meta: function gloda_content_meta(aLineOrLines, aAttr, aIndex) {
    if (!this._producing)
      return;

    let data;
    if (typeof(aLineOrLines) == "string")
      data = aLineOrLines;
    else
      data = aLineOrLines.join("\n");

    this._curHunk = {hunkType: this.kHunkMeta, attr: aAttr, index: aIndex,
                     data: data};
    this._hunks.push(this._curHunk);
  },
  /**
   * Quoted lines reference previous messages or what not.
   *
   * @param aLineOrLiens The line or list of lines that are quoted.
   * @param aDepth The depth of the quoting.
   * @param aOrigin The item that originated the original content, if known.
   *     For example, perhaps a GlodaMessage?
   * @param aTarget A reference to the location in the original content, if
   *     known.  For example, the index of a line in a message or something?
   */
  quoted: function gloda_content_quoted(aLineOrLines, aDepth, aOrigin,
      aTarget) {
    if (!this._producing)
      return;

    let data;
    if (typeof(aLineOrLines) == "string")
      data = aLineOrLines;
    else
      data = aLineOrLines.join("\n");

    if (!this._curHunk ||
        this._curHunk.hunkType != this.kHunkQuoted ||
        this._curHunk.depth != aDepth ||
        this._curHunk.origin != aOrigin || this._curHunk.target != aTarget) {
      this._curHunk = {hunkType: this.kHunkQuoted, data: data,
                       depth: aDepth, origin: aOrigin, target: aTarget};
      this._hunks.push(this._curHunk);
    }
    else
      this._curHunk.data += "\n" + data;
  },

  content: function gloda_content_content(aLineOrLines) {
    if (!this._producing)
      return;

    let data;
    if (typeof(aLineOrLines) == "string")
      data = aLineOrLines;
    else
      data = aLineOrLines.join("\n");

    if (!this._curHunk || this._curHunk.hunkType != this.kHunkContent) {
      this._curHunk = {hunkType: this.kHunkContent, data: data};
      this._hunks.push(this._curHunk);
    }
    else
      this._curHunk.data += "\n" + data;
  },
};