summaryrefslogtreecommitdiffstats
path: root/mailnews/base/search/src/nsMsgBodyHandler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mailnews/base/search/src/nsMsgBodyHandler.cpp')
-rw-r--r--mailnews/base/search/src/nsMsgBodyHandler.cpp487
1 files changed, 487 insertions, 0 deletions
diff --git a/mailnews/base/search/src/nsMsgBodyHandler.cpp b/mailnews/base/search/src/nsMsgBodyHandler.cpp
new file mode 100644
index 000000000..873713bbb
--- /dev/null
+++ b/mailnews/base/search/src/nsMsgBodyHandler.cpp
@@ -0,0 +1,487 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "msgCore.h"
+#include "nsMsgSearchCore.h"
+#include "nsMsgUtils.h"
+#include "nsMsgBodyHandler.h"
+#include "nsMsgSearchTerm.h"
+#include "nsIMsgHdr.h"
+#include "nsMsgMessageFlags.h"
+#include "nsISeekableStream.h"
+#include "nsIInputStream.h"
+#include "nsIFile.h"
+#include "plbase64.h"
+#include "prmem.h"
+#include "nsMimeTypes.h"
+
+nsMsgBodyHandler::nsMsgBodyHandler (nsIMsgSearchScopeTerm * scope,
+ uint32_t numLines,
+ nsIMsgDBHdr* msg, nsIMsgDatabase * db)
+{
+ m_scope = scope;
+ m_numLocalLines = numLines;
+ uint32_t flags;
+ m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags)) ?
+ !(flags & nsMsgMessageFlags::Offline) : true;
+ // account for added x-mozilla-status lines, and envelope line.
+ if (!m_lineCountInBodyLines)
+ m_numLocalLines += 3;
+ m_msgHdr = msg;
+ m_db = db;
+
+ // the following are variables used when the body handler is handling stuff from filters....through this constructor, that is not the
+ // case so we set them to NULL.
+ m_headers = NULL;
+ m_headersSize = 0;
+ m_Filtering = false; // make sure we set this before we call initialize...
+
+ Initialize(); // common initialization stuff
+ OpenLocalFolder();
+}
+
+nsMsgBodyHandler::nsMsgBodyHandler(nsIMsgSearchScopeTerm * scope,
+ uint32_t numLines,
+ nsIMsgDBHdr* msg, nsIMsgDatabase* db,
+ const char * headers, uint32_t headersSize,
+ bool Filtering)
+{
+ m_scope = scope;
+ m_numLocalLines = numLines;
+ uint32_t flags;
+ m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags)) ?
+ !(flags & nsMsgMessageFlags::Offline) : true;
+ // account for added x-mozilla-status lines, and envelope line.
+ if (!m_lineCountInBodyLines)
+ m_numLocalLines += 3;
+ m_msgHdr = msg;
+ m_db = db;
+ m_headersSize = headersSize;
+ m_Filtering = Filtering;
+
+ Initialize();
+
+ if (m_Filtering)
+ m_headers = headers;
+ else
+ OpenLocalFolder(); // if nothing else applies, then we must be a POP folder file
+}
+
+void nsMsgBodyHandler::Initialize()
+// common initialization code regardless of what body type we are handling...
+{
+ // Default transformations for local message search and MAPI access
+ m_stripHeaders = true;
+ m_stripHtml = true;
+ m_partIsHtml = false;
+ m_base64part = false;
+ m_isMultipart = false;
+ m_partIsText = true; // Default is text/plain, maybe proven otherwise later.
+ m_pastMsgHeaders = false;
+ m_pastPartHeaders = false;
+ m_inMessageAttachment = false;
+ m_headerBytesRead = 0;
+}
+
+nsMsgBodyHandler::~nsMsgBodyHandler()
+{
+}
+
+int32_t nsMsgBodyHandler::GetNextLine (nsCString &buf, nsCString &charset)
+{
+ int32_t length = -1; // length of incoming line or -1 eof
+ int32_t outLength = -1; // length of outgoing line or -1 eof
+ bool eatThisLine = true;
+ nsAutoCString nextLine;
+
+ while (eatThisLine) {
+ // first, handle the filtering case...this is easy....
+ if (m_Filtering)
+ length = GetNextFilterLine(nextLine);
+ else
+ {
+ // 3 cases: Offline IMAP, POP, or we are dealing with a news message....
+ // Offline cases should be same as local mail cases, since we're going
+ // to store offline messages in berkeley format folders.
+ if (m_db)
+ {
+ length = GetNextLocalLine (nextLine); // (2) POP
+ }
+ }
+
+ if (length < 0)
+ break; // eof in
+
+ outLength = ApplyTransformations(nextLine, length, eatThisLine, buf);
+ }
+
+ if (outLength < 0)
+ return -1; // eof out
+
+ // For non-multipart messages, the entire message minus headers is encoded
+ // ApplyTransformations can only decode a part
+ if (!m_isMultipart && m_base64part)
+ {
+ Base64Decode(buf);
+ m_base64part = false;
+ // And reapply our transformations...
+ outLength = ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
+ }
+
+ charset = m_partCharset;
+ return outLength;
+}
+
+void nsMsgBodyHandler::OpenLocalFolder()
+{
+ nsCOMPtr <nsIInputStream> inputStream;
+ nsresult rv = m_scope->GetInputStream(m_msgHdr, getter_AddRefs(inputStream));
+ // Warn and return if GetInputStream fails
+ NS_ENSURE_SUCCESS_VOID(rv);
+ m_fileLineStream = do_QueryInterface(inputStream);
+}
+
+int32_t nsMsgBodyHandler::GetNextFilterLine(nsCString &buf)
+{
+ // m_nextHdr always points to the next header in the list....the list is NULL terminated...
+ uint32_t numBytesCopied = 0;
+ if (m_headersSize > 0)
+ {
+ // #mscott. Ugly hack! filter headers list have CRs & LFs inside the NULL delimited list of header
+ // strings. It is possible to have: To NULL CR LF From. We want to skip over these CR/LFs if they start
+ // at the beginning of what we think is another header.
+
+ while (m_headersSize > 0 && (m_headers[0] == '\r' || m_headers[0] == '\n' || m_headers[0] == ' ' || m_headers[0] == '\0'))
+ {
+ m_headers++; // skip over these chars...
+ m_headersSize--;
+ }
+
+ if (m_headersSize > 0)
+ {
+ numBytesCopied = strlen(m_headers) + 1 ;
+ buf.Assign(m_headers);
+ m_headers += numBytesCopied;
+ // be careful...m_headersSize is unsigned. Don't let it go negative or we overflow to 2^32....*yikes*
+ if (m_headersSize < numBytesCopied)
+ m_headersSize = 0;
+ else
+ m_headersSize -= numBytesCopied; // update # bytes we have read from the headers list
+
+ return (int32_t) numBytesCopied;
+ }
+ }
+ else if (m_headersSize == 0) {
+ buf.Truncate();
+ }
+ return -1;
+}
+
+// return -1 if no more local lines, length of next line otherwise.
+
+int32_t nsMsgBodyHandler::GetNextLocalLine(nsCString &buf)
+// returns number of bytes copied
+{
+ if (m_numLocalLines)
+ {
+ // I the line count is in body lines, only decrement once we have
+ // processed all the headers. Otherwise the line is not in body
+ // lines and we want to decrement for every line.
+ if (m_pastMsgHeaders || !m_lineCountInBodyLines)
+ m_numLocalLines--;
+ // do we need to check the return value here?
+ if (m_fileLineStream)
+ {
+ bool more = false;
+ nsresult rv = m_fileLineStream->ReadLine(buf, &more);
+ if (NS_SUCCEEDED(rv))
+ return buf.Length();
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * This method applies a sequence of transformations to the line.
+ *
+ * It applies the following sequences in order
+ * * Removes headers if the searcher doesn't want them
+ * (sets m_past*Headers)
+ * * Determines the current MIME type.
+ * (via SniffPossibleMIMEHeader)
+ * * Strips any HTML if the searcher doesn't want it
+ * * Strips non-text parts
+ * * Decodes any base64 part
+ * (resetting part variables: m_base64part, m_pastPartHeaders, m_partIsHtml,
+ * m_partIsText)
+ *
+ * @param line (in) the current line
+ * @param length (in) the length of said line
+ * @param eatThisLine (out) whether or not to ignore this line
+ * @param buf (inout) if m_base64part, the current part as needed for
+ * decoding; else, it is treated as an out param (a
+ * redundant version of line).
+ * @return the length of the line after applying transformations
+ */
+int32_t nsMsgBodyHandler::ApplyTransformations (const nsCString &line, int32_t length,
+ bool &eatThisLine, nsCString &buf)
+{
+ eatThisLine = false;
+
+ if (!m_pastPartHeaders) // line is a line from the part headers
+ {
+ if (m_stripHeaders)
+ eatThisLine = true;
+
+ // We have already grabbed all worthwhile information from the headers,
+ // so there is no need to keep track of the current lines
+ buf.Assign(line);
+
+ SniffPossibleMIMEHeader(buf);
+
+ if (buf.IsEmpty() || buf.First() == '\r' || buf.First() == '\n') {
+ if (!m_inMessageAttachment) {
+ m_pastPartHeaders = true;
+ } else {
+ // We're in a message attachment and have just read past the
+ // part header for the attached message. We now need to read
+ // the message headers and any part headers.
+ // We can now forget about the special handling of attached messages.
+ m_inMessageAttachment = false;
+ }
+ }
+
+ // We set m_pastMsgHeaders to 'true' only once.
+ if (m_pastPartHeaders)
+ m_pastMsgHeaders = true;
+
+ return length;
+ }
+
+ // Check to see if this is one of our boundary strings.
+ bool matchedBoundary = false;
+ if (m_isMultipart && m_boundaries.Length() > 0) {
+ for (int32_t i = (int32_t)m_boundaries.Length() - 1; i >= 0; i--) {
+ if (StringBeginsWith(line, m_boundaries[i])) {
+ matchedBoundary = true;
+ // If we matched a boundary, we won't need the nested/later ones any more.
+ m_boundaries.SetLength(i+1);
+ break;
+ }
+ }
+ }
+ if (matchedBoundary)
+ {
+ if (m_base64part && m_partIsText)
+ {
+ Base64Decode(buf);
+ // Work on the parsed string
+ if (!buf.Length())
+ {
+ NS_WARNING("Trying to transform an empty buffer");
+ eatThisLine = true;
+ }
+ else
+ {
+ // It is wrong to call ApplyTransformations() here since this will
+ // lead to the buffer being doubled-up at |buf.Append(line.get());| below.
+ // ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
+ // Avoid spurious failures
+ eatThisLine = false;
+ }
+ }
+ else
+ {
+ buf.Truncate();
+ eatThisLine = true; // We have no content...
+ }
+
+ // Reset all assumed headers
+ m_base64part = false;
+ // Get ready to sniff new part headers, but do not reset m_pastMsgHeaders
+ // since it will screw the body line count.
+ m_pastPartHeaders = false;
+ m_partIsHtml = false;
+ // If we ever see a multipart message, each part needs to set 'm_partIsText',
+ // so no more defaulting to 'true' when the part is done.
+ m_partIsText = false;
+
+ return buf.Length();
+ }
+
+ if (!m_partIsText)
+ {
+ // Ignore non-text parts
+ buf.Truncate();
+ eatThisLine = true;
+ return 0;
+ }
+
+ if (m_base64part)
+ {
+ // We need to keep track of all lines to parse base64encoded...
+ buf.Append(line.get());
+ eatThisLine = true;
+ return buf.Length();
+ }
+
+ // ... but there's no point if we're not parsing base64.
+ buf.Assign(line);
+ if (m_stripHtml && m_partIsHtml)
+ {
+ StripHtml (buf);
+ }
+
+ return buf.Length();
+}
+
+void nsMsgBodyHandler::StripHtml (nsCString &pBufInOut)
+{
+ char *pBuf = (char*) PR_Malloc (pBufInOut.Length() + 1);
+ if (pBuf)
+ {
+ char *pWalk = pBuf;
+
+ char *pWalkInOut = (char *) pBufInOut.get();
+ bool inTag = false;
+ while (*pWalkInOut) // throw away everything inside < >
+ {
+ if (!inTag)
+ if (*pWalkInOut == '<')
+ inTag = true;
+ else
+ *pWalk++ = *pWalkInOut;
+ else
+ if (*pWalkInOut == '>')
+ inTag = false;
+ pWalkInOut++;
+ }
+ *pWalk = 0; // null terminator
+
+ pBufInOut.Adopt(pBuf);
+ }
+}
+
+/**
+ * Determines the MIME type, if present, from the current line.
+ *
+ * m_partIsHtml, m_isMultipart, m_partIsText, m_base64part, and boundary are
+ * all set by this method at various points in time.
+ *
+ * @param line (in) a header line that may contain a MIME header
+ */
+void nsMsgBodyHandler::SniffPossibleMIMEHeader(const nsCString &line)
+{
+ // Some parts of MIME are case-sensitive and other parts are case-insensitive;
+ // specifically, the headers are all case-insensitive and the values we care
+ // about are also case-insensitive, with the sole exception of the boundary
+ // string, so we can't just take the input line and make it lower case.
+ nsCString lowerCaseLine(line);
+ ToLowerCase(lowerCaseLine);
+
+ if (StringBeginsWith(lowerCaseLine, NS_LITERAL_CSTRING("content-type:")))
+ {
+ if (lowerCaseLine.Find("text/html", CaseInsensitiveCompare) != -1)
+ {
+ m_partIsText = true;
+ m_partIsHtml = true;
+ }
+ else if (lowerCaseLine.Find("multipart/", CaseInsensitiveCompare) != -1)
+ {
+ if (m_isMultipart)
+ {
+ // Nested multipart, get ready for new headers.
+ m_base64part = false;
+ m_pastPartHeaders = false;
+ m_partIsHtml = false;
+ m_partIsText = false;
+ }
+ m_isMultipart = true;
+ m_partCharset.Truncate();
+ }
+ else if (lowerCaseLine.Find("message/", CaseInsensitiveCompare) != -1)
+ {
+ // Initialise again.
+ m_base64part = false;
+ m_pastPartHeaders = false;
+ m_partIsHtml = false;
+ m_partIsText = true; // Default is text/plain, maybe proven otherwise later.
+ m_inMessageAttachment = true;
+ }
+ else if (lowerCaseLine.Find("text/", CaseInsensitiveCompare) != -1)
+ m_partIsText = true;
+ else if (lowerCaseLine.Find("text/", CaseInsensitiveCompare) == -1)
+ m_partIsText = false; // We have disproven our assumption.
+ }
+
+ int32_t start;
+ if (m_isMultipart &&
+ (start = lowerCaseLine.Find("boundary=", CaseInsensitiveCompare)) != -1)
+ {
+ start += 9; // strlen("boundary=")
+ if (line[start] == '\"')
+ start++;
+ int32_t end = line.RFindChar('\"');
+ if (end == -1)
+ end = line.Length();
+
+ // Collect all boundaries. Since we only react to crossing a boundary,
+ // we can simply collect the boundaries instead of forming a tree
+ // structure from the message. Keep it simple ;-)
+ nsCString boundary;
+ boundary.Assign("--");
+ boundary.Append(Substring(line, start, end-start));
+ if (!m_boundaries.Contains(boundary))
+ m_boundaries.AppendElement(boundary);
+ }
+
+ if (m_isMultipart &&
+ (start = lowerCaseLine.Find("charset=", CaseInsensitiveCompare)) != -1)
+ {
+ start += 8; // strlen("charset=")
+ bool foundQuote = false;
+ if (line[start] == '\"') {
+ start++;
+ foundQuote = true;
+ }
+ int32_t end = line.FindChar(foundQuote ? '\"' : ';', start);
+ if (end == -1)
+ end = line.Length();
+
+ m_partCharset.Assign(Substring(line, start, end-start));
+ }
+
+ if (StringBeginsWith(lowerCaseLine,
+ NS_LITERAL_CSTRING("content-transfer-encoding:")) &&
+ lowerCaseLine.Find(ENCODING_BASE64, CaseInsensitiveCompare) != kNotFound)
+ m_base64part = true;
+}
+
+/**
+ * Decodes the given base64 string.
+ *
+ * It returns its decoded string in its input.
+ *
+ * @param pBufInOut (inout) a buffer of the string
+ */
+void nsMsgBodyHandler::Base64Decode (nsCString &pBufInOut)
+{
+ char *decodedBody = PL_Base64Decode(pBufInOut.get(), pBufInOut.Length(), nullptr);
+ if (decodedBody)
+ pBufInOut.Adopt(decodedBody);
+
+ int32_t offset = pBufInOut.FindChar('\n');
+ while (offset != -1) {
+ pBufInOut.Replace(offset, 1, ' ');
+ offset = pBufInOut.FindChar('\n', offset);
+ }
+ offset = pBufInOut.FindChar('\r');
+ while (offset != -1) {
+ pBufInOut.Replace(offset, 1, ' ');
+ offset = pBufInOut.FindChar('\r', offset);
+ }
+}
+