1 files changed, 487 insertions, 0 deletions
diff --git a/mailnews/base/search/src/nsMsgBodyHandler.cpp b/mailnews/base/search/src/nsMsgBodyHandler.cpp
new file mode 100644
index 000000000..873713bbb
--- /dev/null
+++ b/mailnews/base/search/src/nsMsgBodyHandler.cpp
@@ -0,0 +1,487 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "msgCore.h"
+#include "nsMsgSearchCore.h"
+#include "nsMsgUtils.h"
+#include "nsMsgBodyHandler.h"
+#include "nsMsgSearchTerm.h"
+#include "nsIMsgHdr.h"
+#include "nsMsgMessageFlags.h"
+#include "nsISeekableStream.h"
+#include "nsIInputStream.h"
+#include "nsIFile.h"
+#include "plbase64.h"
+#include "prmem.h"
+#include "nsMimeTypes.h"
+
+nsMsgBodyHandler::nsMsgBodyHandler (nsIMsgSearchScopeTerm * scope,
+                                    uint32_t numLines,
+                                    nsIMsgDBHdr* msg, nsIMsgDatabase * db)
+{
+  m_scope = scope;
+  m_numLocalLines = numLines;
+  uint32_t flags;
+  m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags)) ?
+    !(flags & nsMsgMessageFlags::Offline) : true;
+  // account for added x-mozilla-status lines, and envelope line.
+  if (!m_lineCountInBodyLines)
+    m_numLocalLines += 3;
+  m_msgHdr = msg;
+  m_db = db;
+
+  // the following are variables used when the body handler is handling stuff from filters....through this constructor, that is not the
+  // case so we set them to NULL.
+  m_headers = NULL;
+  m_headersSize = 0;
+  m_Filtering = false; // make sure we set this before we call initialize...
+
+  Initialize();  // common initialization stuff
+  OpenLocalFolder();
+}
+
+nsMsgBodyHandler::nsMsgBodyHandler(nsIMsgSearchScopeTerm * scope,
+                                   uint32_t numLines,
+                                   nsIMsgDBHdr* msg, nsIMsgDatabase* db,
+                                   const char * headers, uint32_t headersSize,
+                                   bool Filtering)
+{
+  m_scope = scope;
+  m_numLocalLines = numLines;
+  uint32_t flags;
+  m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags)) ?
+    !(flags & nsMsgMessageFlags::Offline) : true;
+  // account for added x-mozilla-status lines, and envelope line.
+  if (!m_lineCountInBodyLines)
+    m_numLocalLines += 3;
+  m_msgHdr = msg;
+  m_db = db;
+  m_headersSize = headersSize;
+  m_Filtering = Filtering;
+
+  Initialize();
+
+  if (m_Filtering)
+    m_headers = headers;
+  else
+    OpenLocalFolder();  // if nothing else applies, then we must be a POP folder file
+}
+
+void nsMsgBodyHandler::Initialize()
+// common initialization code regardless of what body type we are handling...
+{
+  // Default transformations for local message search and MAPI access
+  m_stripHeaders = true;
+  m_stripHtml = true;
+  m_partIsHtml = false;
+  m_base64part = false;
+  m_isMultipart = false;
+  m_partIsText = true; // Default is text/plain, maybe proven otherwise later.
+  m_pastMsgHeaders = false;
+  m_pastPartHeaders = false;
+  m_inMessageAttachment = false;
+  m_headerBytesRead = 0;
+}
+
+nsMsgBodyHandler::~nsMsgBodyHandler()
+{
+}
+
+int32_t nsMsgBodyHandler::GetNextLine (nsCString &buf, nsCString &charset)
+{
+  int32_t length = -1;          // length of incoming line or -1 eof
+  int32_t outLength = -1;       // length of outgoing line or -1 eof
+  bool eatThisLine = true;
+  nsAutoCString nextLine;
+
+  while (eatThisLine) {
+    // first, handle the filtering case...this is easy....
+    if (m_Filtering)
+      length = GetNextFilterLine(nextLine);
+    else
+    {
+      // 3 cases: Offline IMAP, POP, or we are dealing with a news message....
+      // Offline cases should be same as local mail cases, since we're going
+      // to store offline messages in berkeley format folders.
+      if (m_db)
+      {
+         length = GetNextLocalLine (nextLine); // (2) POP
+      }
+    }
+
+    if (length < 0)
+      break; // eof in
+
+    outLength = ApplyTransformations(nextLine, length, eatThisLine, buf);
+  }
+
+  if (outLength < 0)
+    return -1; // eof out
+
+  // For non-multipart messages, the entire message minus headers is encoded
+  // ApplyTransformations can only decode a part
+  if (!m_isMultipart && m_base64part)
+  {
+    Base64Decode(buf);
+    m_base64part = false;
+    // And reapply our transformations...
+    outLength = ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
+  }
+
+  charset = m_partCharset;
+  return outLength;
+}
+
+void nsMsgBodyHandler::OpenLocalFolder()
+{
+  nsCOMPtr <nsIInputStream> inputStream;
+  nsresult rv = m_scope->GetInputStream(m_msgHdr, getter_AddRefs(inputStream));
+  // Warn and return if GetInputStream fails
+  NS_ENSURE_SUCCESS_VOID(rv);
+  m_fileLineStream = do_QueryInterface(inputStream);
+}
+
+int32_t nsMsgBodyHandler::GetNextFilterLine(nsCString &buf)
+{
+  // m_nextHdr always points to the next header in the list....the list is NULL terminated...
+  uint32_t numBytesCopied = 0;
+  if (m_headersSize > 0)
+  {
+    // #mscott. Ugly hack! filter headers list have CRs & LFs inside the NULL delimited list of header
+    // strings. It is possible to have: To NULL CR LF From. We want to skip over these CR/LFs if they start
+    // at the beginning of what we think is another header.
+
+    while (m_headersSize > 0 && (m_headers[0] == '\r' || m_headers[0] == '\n' || m_headers[0] == ' ' || m_headers[0] == '\0'))
+    {
+      m_headers++;  // skip over these chars...
+      m_headersSize--;
+    }
+
+    if (m_headersSize > 0)
+    {
+      numBytesCopied = strlen(m_headers) + 1 ;
+      buf.Assign(m_headers);
+      m_headers += numBytesCopied;
+      // be careful...m_headersSize is unsigned. Don't let it go negative or we overflow to 2^32....*yikes*
+      if (m_headersSize < numBytesCopied)
+        m_headersSize = 0;
+      else
+        m_headersSize -= numBytesCopied;  // update # bytes we have read from the headers list
+
+      return (int32_t) numBytesCopied;
+    }
+  }
+  else if (m_headersSize == 0) {
+    buf.Truncate();
+  }
+  return -1;
+}
+
+// return -1 if no more local lines, length of next line otherwise.
+
+int32_t nsMsgBodyHandler::GetNextLocalLine(nsCString &buf)
+// returns number of bytes copied
+{
+  if (m_numLocalLines)
+  {
+    // I the line count is in body lines, only decrement once we have
+    // processed all the headers.  Otherwise the line is not in body
+    // lines and we want to decrement for every line.
+    if (m_pastMsgHeaders || !m_lineCountInBodyLines)
+      m_numLocalLines--;
+    // do we need to check the return value here?
+    if (m_fileLineStream)
+    {
+      bool more = false;
+      nsresult rv = m_fileLineStream->ReadLine(buf, &more);
+      if (NS_SUCCEEDED(rv))
+        return buf.Length();
+    }
+  }
+
+  return -1;
+}
+
+/**
+ * This method applies a sequence of transformations to the line.
+ *
+ * It applies the following sequences in order
+ * * Removes headers if the searcher doesn't want them
+ *   (sets m_past*Headers)
+ * * Determines the current MIME type.
+ *   (via SniffPossibleMIMEHeader)
+ * * Strips any HTML if the searcher doesn't want it
+ * * Strips non-text parts
+ * * Decodes any base64 part
+ *   (resetting part variables: m_base64part, m_pastPartHeaders, m_partIsHtml,
+ *    m_partIsText)
+ *
+ * @param line        (in)    the current line
+ * @param length      (in)    the length of said line
+ * @param eatThisLine (out)   whether or not to ignore this line
+ * @param buf         (inout) if m_base64part, the current part as needed for
+ *                            decoding; else, it is treated as an out param (a
+ *                            redundant version of line).
+ * @return            the length of the line after applying transformations
+ */
+int32_t nsMsgBodyHandler::ApplyTransformations (const nsCString &line, int32_t length,
+                                                bool &eatThisLine, nsCString &buf)
+{
+  eatThisLine = false;
+
+  if (!m_pastPartHeaders)  // line is a line from the part headers
+  {
+    if (m_stripHeaders)
+      eatThisLine = true;
+
+    // We have already grabbed all worthwhile information from the headers,
+    // so there is no need to keep track of the current lines
+    buf.Assign(line);
+
+    SniffPossibleMIMEHeader(buf);
+
+    if (buf.IsEmpty() || buf.First() == '\r' || buf.First() == '\n') {
+      if (!m_inMessageAttachment) {
+        m_pastPartHeaders = true;
+      } else {
+        // We're in a message attachment and have just read past the
+        // part header for the attached message. We now need to read
+        // the message headers and any part headers.
+        // We can now forget about the special handling of attached messages.
+        m_inMessageAttachment = false;
+      }
+    }
+
+    // We set m_pastMsgHeaders to 'true' only once.
+    if (m_pastPartHeaders)
+      m_pastMsgHeaders = true;
+
+    return length;
+  }
+
+  // Check to see if this is one of our boundary strings.
+  bool matchedBoundary = false;
+  if (m_isMultipart && m_boundaries.Length() > 0) {
+    for (int32_t i = (int32_t)m_boundaries.Length() - 1; i >= 0; i--) {
+      if (StringBeginsWith(line, m_boundaries[i])) {
+        matchedBoundary = true;
+        // If we matched a boundary, we won't need the nested/later ones any more.
+        m_boundaries.SetLength(i+1);
+        break;
+      }
+    }
+  }
+  if (matchedBoundary)
+  {
+    if (m_base64part && m_partIsText)
+    {
+      Base64Decode(buf);
+      // Work on the parsed string
+      if (!buf.Length())
+      {
+        NS_WARNING("Trying to transform an empty buffer");
+        eatThisLine = true;
+      }
+      else
+      {
+        // It is wrong to call ApplyTransformations() here since this will
+        // lead to the buffer being doubled-up at |buf.Append(line.get());| below.
+        // ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
+        // Avoid spurious failures
+        eatThisLine = false;
+      }
+    }
+    else
+    {
+      buf.Truncate();
+      eatThisLine = true; // We have no content...
+    }
+
+    // Reset all assumed headers
+    m_base64part = false;
+    // Get ready to sniff new part headers, but do not reset m_pastMsgHeaders
+    // since it will screw the body line count.
+    m_pastPartHeaders = false;
+    m_partIsHtml = false;
+    // If we ever see a multipart message, each part needs to set 'm_partIsText',
+    // so no more defaulting to 'true' when the part is done.
+    m_partIsText = false;
+
+    return buf.Length();
+  }
+
+  if (!m_partIsText)
+  {
+    // Ignore non-text parts
+    buf.Truncate();
+    eatThisLine = true;
+    return 0;
+  }
+
+  if (m_base64part)
+  {
+    // We need to keep track of all lines to parse base64encoded...
+    buf.Append(line.get());
+    eatThisLine = true;
+    return buf.Length();
+  }
+
+  // ... but there's no point if we're not parsing base64.
+  buf.Assign(line);
+  if (m_stripHtml && m_partIsHtml)
+  {
+    StripHtml (buf);
+  }
+
+  return buf.Length();
+}
+
+void nsMsgBodyHandler::StripHtml (nsCString &pBufInOut)
+{
+  char *pBuf = (char*) PR_Malloc (pBufInOut.Length() + 1);
+  if (pBuf)
+  {
+    char *pWalk = pBuf;
+
+    char *pWalkInOut = (char *) pBufInOut.get();
+    bool inTag = false;
+    while (*pWalkInOut) // throw away everything inside < >
+    {
+      if (!inTag)
+        if (*pWalkInOut == '<')
+          inTag = true;
+        else
+          *pWalk++ = *pWalkInOut;
+        else
+          if (*pWalkInOut == '>')
+            inTag = false;
+          pWalkInOut++;
+    }
+    *pWalk = 0; // null terminator
+
+    pBufInOut.Adopt(pBuf);
+  }
+}
+
+/**
+ * Determines the MIME type, if present, from the current line.
+ *
+ * m_partIsHtml, m_isMultipart, m_partIsText, m_base64part, and boundary are
+ * all set by this method at various points in time.
+ *
+ * @param line        (in)    a header line that may contain a MIME header
+ */
+void nsMsgBodyHandler::SniffPossibleMIMEHeader(const nsCString &line)
+{
+  // Some parts of MIME are case-sensitive and other parts are case-insensitive;
+  // specifically, the headers are all case-insensitive and the values we care
+  // about are also case-insensitive, with the sole exception of the boundary
+  // string, so we can't just take the input line and make it lower case.
+  nsCString lowerCaseLine(line);
+  ToLowerCase(lowerCaseLine);
+
+  if (StringBeginsWith(lowerCaseLine, NS_LITERAL_CSTRING("content-type:")))
+  {
+    if (lowerCaseLine.Find("text/html", CaseInsensitiveCompare) != -1)
+    {
+      m_partIsText = true;
+      m_partIsHtml = true;
+    }
+    else if (lowerCaseLine.Find("multipart/", CaseInsensitiveCompare) != -1)
+    {
+      if (m_isMultipart)
+      {
+        // Nested multipart, get ready for new headers.
+        m_base64part = false;
+        m_pastPartHeaders = false;
+        m_partIsHtml = false;
+        m_partIsText = false;
+      }
+      m_isMultipart = true;
+      m_partCharset.Truncate();
+    }
+    else if (lowerCaseLine.Find("message/", CaseInsensitiveCompare) != -1)
+    {
+      // Initialise again.
+      m_base64part = false;
+      m_pastPartHeaders = false;
+      m_partIsHtml = false;
+      m_partIsText = true;  // Default is text/plain, maybe proven otherwise later.
+      m_inMessageAttachment = true;
+    }
+    else if (lowerCaseLine.Find("text/", CaseInsensitiveCompare) != -1)
+      m_partIsText = true;
+    else if (lowerCaseLine.Find("text/", CaseInsensitiveCompare) == -1)
+      m_partIsText = false; // We have disproven our assumption.
+  }
+
+  int32_t start;
+  if (m_isMultipart &&
+      (start = lowerCaseLine.Find("boundary=", CaseInsensitiveCompare)) != -1)
+  {
+    start += 9;  // strlen("boundary=")
+    if (line[start] == '\"')
+      start++;
+    int32_t end = line.RFindChar('\"');
+    if (end == -1)
+      end = line.Length();
+
+    // Collect all boundaries. Since we only react to crossing a boundary,
+    // we can simply collect the boundaries instead of forming a tree
+    // structure from the message. Keep it simple ;-)
+    nsCString boundary;
+    boundary.Assign("--");
+    boundary.Append(Substring(line, start, end-start));
+    if (!m_boundaries.Contains(boundary))
+      m_boundaries.AppendElement(boundary);
+  }
+
+  if (m_isMultipart &&
+      (start = lowerCaseLine.Find("charset=", CaseInsensitiveCompare)) != -1)
+  {
+    start += 8;  // strlen("charset=")
+    bool foundQuote = false;
+    if (line[start] == '\"') {
+      start++;
+      foundQuote = true;
+    }
+    int32_t end = line.FindChar(foundQuote ? '\"' : ';', start);
+    if (end == -1)
+      end = line.Length();
+
+    m_partCharset.Assign(Substring(line, start, end-start));
+  }
+
+  if (StringBeginsWith(lowerCaseLine,
+                       NS_LITERAL_CSTRING("content-transfer-encoding:")) &&
+      lowerCaseLine.Find(ENCODING_BASE64, CaseInsensitiveCompare) != kNotFound)
+    m_base64part = true;
+}
+
+/**
+ * Decodes the given base64 string.
+ *
+ * It returns its decoded string in its input.
+ *
+ * @param pBufInOut   (inout) a buffer of the string
+ */
+void nsMsgBodyHandler::Base64Decode (nsCString &pBufInOut)
+{
+  char *decodedBody = PL_Base64Decode(pBufInOut.get(), pBufInOut.Length(), nullptr);
+  if (decodedBody)
+    pBufInOut.Adopt(decodedBody);
+
+  int32_t offset = pBufInOut.FindChar('\n');
+  while (offset != -1) {
+    pBufInOut.Replace(offset, 1, ' ');
+    offset = pBufInOut.FindChar('\n', offset);
+  }
+  offset = pBufInOut.FindChar('\r');
+  while (offset != -1) {
+    pBufInOut.Replace(offset, 1, ' ');
+    offset = pBufInOut.FindChar('\r', offset);
+  }
+}
+