diff options
Diffstat (limited to 'mailnews/base/search/src/nsMsgBodyHandler.cpp')
-rw-r--r-- | mailnews/base/search/src/nsMsgBodyHandler.cpp | 487 |
1 files changed, 487 insertions, 0 deletions
diff --git a/mailnews/base/search/src/nsMsgBodyHandler.cpp b/mailnews/base/search/src/nsMsgBodyHandler.cpp new file mode 100644 index 000000000..873713bbb --- /dev/null +++ b/mailnews/base/search/src/nsMsgBodyHandler.cpp @@ -0,0 +1,487 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "msgCore.h" +#include "nsMsgSearchCore.h" +#include "nsMsgUtils.h" +#include "nsMsgBodyHandler.h" +#include "nsMsgSearchTerm.h" +#include "nsIMsgHdr.h" +#include "nsMsgMessageFlags.h" +#include "nsISeekableStream.h" +#include "nsIInputStream.h" +#include "nsIFile.h" +#include "plbase64.h" +#include "prmem.h" +#include "nsMimeTypes.h" + +nsMsgBodyHandler::nsMsgBodyHandler (nsIMsgSearchScopeTerm * scope, + uint32_t numLines, + nsIMsgDBHdr* msg, nsIMsgDatabase * db) +{ + m_scope = scope; + m_numLocalLines = numLines; + uint32_t flags; + m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags)) ? + !(flags & nsMsgMessageFlags::Offline) : true; + // account for added x-mozilla-status lines, and envelope line. + if (!m_lineCountInBodyLines) + m_numLocalLines += 3; + m_msgHdr = msg; + m_db = db; + + // the following are variables used when the body handler is handling stuff from filters....through this constructor, that is not the + // case so we set them to NULL. + m_headers = NULL; + m_headersSize = 0; + m_Filtering = false; // make sure we set this before we call initialize... + + Initialize(); // common initialization stuff + OpenLocalFolder(); +} + +nsMsgBodyHandler::nsMsgBodyHandler(nsIMsgSearchScopeTerm * scope, + uint32_t numLines, + nsIMsgDBHdr* msg, nsIMsgDatabase* db, + const char * headers, uint32_t headersSize, + bool Filtering) +{ + m_scope = scope; + m_numLocalLines = numLines; + uint32_t flags; + m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags)) ? + !(flags & nsMsgMessageFlags::Offline) : true; + // account for added x-mozilla-status lines, and envelope line. + if (!m_lineCountInBodyLines) + m_numLocalLines += 3; + m_msgHdr = msg; + m_db = db; + m_headersSize = headersSize; + m_Filtering = Filtering; + + Initialize(); + + if (m_Filtering) + m_headers = headers; + else + OpenLocalFolder(); // if nothing else applies, then we must be a POP folder file +} + +void nsMsgBodyHandler::Initialize() +// common initialization code regardless of what body type we are handling... +{ + // Default transformations for local message search and MAPI access + m_stripHeaders = true; + m_stripHtml = true; + m_partIsHtml = false; + m_base64part = false; + m_isMultipart = false; + m_partIsText = true; // Default is text/plain, maybe proven otherwise later. + m_pastMsgHeaders = false; + m_pastPartHeaders = false; + m_inMessageAttachment = false; + m_headerBytesRead = 0; +} + +nsMsgBodyHandler::~nsMsgBodyHandler() +{ +} + +int32_t nsMsgBodyHandler::GetNextLine (nsCString &buf, nsCString &charset) +{ + int32_t length = -1; // length of incoming line or -1 eof + int32_t outLength = -1; // length of outgoing line or -1 eof + bool eatThisLine = true; + nsAutoCString nextLine; + + while (eatThisLine) { + // first, handle the filtering case...this is easy.... + if (m_Filtering) + length = GetNextFilterLine(nextLine); + else + { + // 3 cases: Offline IMAP, POP, or we are dealing with a news message.... + // Offline cases should be same as local mail cases, since we're going + // to store offline messages in berkeley format folders. + if (m_db) + { + length = GetNextLocalLine (nextLine); // (2) POP + } + } + + if (length < 0) + break; // eof in + + outLength = ApplyTransformations(nextLine, length, eatThisLine, buf); + } + + if (outLength < 0) + return -1; // eof out + + // For non-multipart messages, the entire message minus headers is encoded + // ApplyTransformations can only decode a part + if (!m_isMultipart && m_base64part) + { + Base64Decode(buf); + m_base64part = false; + // And reapply our transformations... + outLength = ApplyTransformations(buf, buf.Length(), eatThisLine, buf); + } + + charset = m_partCharset; + return outLength; +} + +void nsMsgBodyHandler::OpenLocalFolder() +{ + nsCOMPtr <nsIInputStream> inputStream; + nsresult rv = m_scope->GetInputStream(m_msgHdr, getter_AddRefs(inputStream)); + // Warn and return if GetInputStream fails + NS_ENSURE_SUCCESS_VOID(rv); + m_fileLineStream = do_QueryInterface(inputStream); +} + +int32_t nsMsgBodyHandler::GetNextFilterLine(nsCString &buf) +{ + // m_nextHdr always points to the next header in the list....the list is NULL terminated... + uint32_t numBytesCopied = 0; + if (m_headersSize > 0) + { + // #mscott. Ugly hack! filter headers list have CRs & LFs inside the NULL delimited list of header + // strings. It is possible to have: To NULL CR LF From. We want to skip over these CR/LFs if they start + // at the beginning of what we think is another header. + + while (m_headersSize > 0 && (m_headers[0] == '\r' || m_headers[0] == '\n' || m_headers[0] == ' ' || m_headers[0] == '\0')) + { + m_headers++; // skip over these chars... + m_headersSize--; + } + + if (m_headersSize > 0) + { + numBytesCopied = strlen(m_headers) + 1 ; + buf.Assign(m_headers); + m_headers += numBytesCopied; + // be careful...m_headersSize is unsigned. Don't let it go negative or we overflow to 2^32....*yikes* + if (m_headersSize < numBytesCopied) + m_headersSize = 0; + else + m_headersSize -= numBytesCopied; // update # bytes we have read from the headers list + + return (int32_t) numBytesCopied; + } + } + else if (m_headersSize == 0) { + buf.Truncate(); + } + return -1; +} + +// return -1 if no more local lines, length of next line otherwise. + +int32_t nsMsgBodyHandler::GetNextLocalLine(nsCString &buf) +// returns number of bytes copied +{ + if (m_numLocalLines) + { + // I the line count is in body lines, only decrement once we have + // processed all the headers. Otherwise the line is not in body + // lines and we want to decrement for every line. + if (m_pastMsgHeaders || !m_lineCountInBodyLines) + m_numLocalLines--; + // do we need to check the return value here? + if (m_fileLineStream) + { + bool more = false; + nsresult rv = m_fileLineStream->ReadLine(buf, &more); + if (NS_SUCCEEDED(rv)) + return buf.Length(); + } + } + + return -1; +} + +/** + * This method applies a sequence of transformations to the line. + * + * It applies the following sequences in order + * * Removes headers if the searcher doesn't want them + * (sets m_past*Headers) + * * Determines the current MIME type. + * (via SniffPossibleMIMEHeader) + * * Strips any HTML if the searcher doesn't want it + * * Strips non-text parts + * * Decodes any base64 part + * (resetting part variables: m_base64part, m_pastPartHeaders, m_partIsHtml, + * m_partIsText) + * + * @param line (in) the current line + * @param length (in) the length of said line + * @param eatThisLine (out) whether or not to ignore this line + * @param buf (inout) if m_base64part, the current part as needed for + * decoding; else, it is treated as an out param (a + * redundant version of line). + * @return the length of the line after applying transformations + */ +int32_t nsMsgBodyHandler::ApplyTransformations (const nsCString &line, int32_t length, + bool &eatThisLine, nsCString &buf) +{ + eatThisLine = false; + + if (!m_pastPartHeaders) // line is a line from the part headers + { + if (m_stripHeaders) + eatThisLine = true; + + // We have already grabbed all worthwhile information from the headers, + // so there is no need to keep track of the current lines + buf.Assign(line); + + SniffPossibleMIMEHeader(buf); + + if (buf.IsEmpty() || buf.First() == '\r' || buf.First() == '\n') { + if (!m_inMessageAttachment) { + m_pastPartHeaders = true; + } else { + // We're in a message attachment and have just read past the + // part header for the attached message. We now need to read + // the message headers and any part headers. + // We can now forget about the special handling of attached messages. + m_inMessageAttachment = false; + } + } + + // We set m_pastMsgHeaders to 'true' only once. + if (m_pastPartHeaders) + m_pastMsgHeaders = true; + + return length; + } + + // Check to see if this is one of our boundary strings. + bool matchedBoundary = false; + if (m_isMultipart && m_boundaries.Length() > 0) { + for (int32_t i = (int32_t)m_boundaries.Length() - 1; i >= 0; i--) { + if (StringBeginsWith(line, m_boundaries[i])) { + matchedBoundary = true; + // If we matched a boundary, we won't need the nested/later ones any more. + m_boundaries.SetLength(i+1); + break; + } + } + } + if (matchedBoundary) + { + if (m_base64part && m_partIsText) + { + Base64Decode(buf); + // Work on the parsed string + if (!buf.Length()) + { + NS_WARNING("Trying to transform an empty buffer"); + eatThisLine = true; + } + else + { + // It is wrong to call ApplyTransformations() here since this will + // lead to the buffer being doubled-up at |buf.Append(line.get());| below. + // ApplyTransformations(buf, buf.Length(), eatThisLine, buf); + // Avoid spurious failures + eatThisLine = false; + } + } + else + { + buf.Truncate(); + eatThisLine = true; // We have no content... + } + + // Reset all assumed headers + m_base64part = false; + // Get ready to sniff new part headers, but do not reset m_pastMsgHeaders + // since it will screw the body line count. + m_pastPartHeaders = false; + m_partIsHtml = false; + // If we ever see a multipart message, each part needs to set 'm_partIsText', + // so no more defaulting to 'true' when the part is done. + m_partIsText = false; + + return buf.Length(); + } + + if (!m_partIsText) + { + // Ignore non-text parts + buf.Truncate(); + eatThisLine = true; + return 0; + } + + if (m_base64part) + { + // We need to keep track of all lines to parse base64encoded... + buf.Append(line.get()); + eatThisLine = true; + return buf.Length(); + } + + // ... but there's no point if we're not parsing base64. + buf.Assign(line); + if (m_stripHtml && m_partIsHtml) + { + StripHtml (buf); + } + + return buf.Length(); +} + +void nsMsgBodyHandler::StripHtml (nsCString &pBufInOut) +{ + char *pBuf = (char*) PR_Malloc (pBufInOut.Length() + 1); + if (pBuf) + { + char *pWalk = pBuf; + + char *pWalkInOut = (char *) pBufInOut.get(); + bool inTag = false; + while (*pWalkInOut) // throw away everything inside < > + { + if (!inTag) + if (*pWalkInOut == '<') + inTag = true; + else + *pWalk++ = *pWalkInOut; + else + if (*pWalkInOut == '>') + inTag = false; + pWalkInOut++; + } + *pWalk = 0; // null terminator + + pBufInOut.Adopt(pBuf); + } +} + +/** + * Determines the MIME type, if present, from the current line. + * + * m_partIsHtml, m_isMultipart, m_partIsText, m_base64part, and boundary are + * all set by this method at various points in time. + * + * @param line (in) a header line that may contain a MIME header + */ +void nsMsgBodyHandler::SniffPossibleMIMEHeader(const nsCString &line) +{ + // Some parts of MIME are case-sensitive and other parts are case-insensitive; + // specifically, the headers are all case-insensitive and the values we care + // about are also case-insensitive, with the sole exception of the boundary + // string, so we can't just take the input line and make it lower case. + nsCString lowerCaseLine(line); + ToLowerCase(lowerCaseLine); + + if (StringBeginsWith(lowerCaseLine, NS_LITERAL_CSTRING("content-type:"))) + { + if (lowerCaseLine.Find("text/html", CaseInsensitiveCompare) != -1) + { + m_partIsText = true; + m_partIsHtml = true; + } + else if (lowerCaseLine.Find("multipart/", CaseInsensitiveCompare) != -1) + { + if (m_isMultipart) + { + // Nested multipart, get ready for new headers. + m_base64part = false; + m_pastPartHeaders = false; + m_partIsHtml = false; + m_partIsText = false; + } + m_isMultipart = true; + m_partCharset.Truncate(); + } + else if (lowerCaseLine.Find("message/", CaseInsensitiveCompare) != -1) + { + // Initialise again. + m_base64part = false; + m_pastPartHeaders = false; + m_partIsHtml = false; + m_partIsText = true; // Default is text/plain, maybe proven otherwise later. + m_inMessageAttachment = true; + } + else if (lowerCaseLine.Find("text/", CaseInsensitiveCompare) != -1) + m_partIsText = true; + else if (lowerCaseLine.Find("text/", CaseInsensitiveCompare) == -1) + m_partIsText = false; // We have disproven our assumption. + } + + int32_t start; + if (m_isMultipart && + (start = lowerCaseLine.Find("boundary=", CaseInsensitiveCompare)) != -1) + { + start += 9; // strlen("boundary=") + if (line[start] == '\"') + start++; + int32_t end = line.RFindChar('\"'); + if (end == -1) + end = line.Length(); + + // Collect all boundaries. Since we only react to crossing a boundary, + // we can simply collect the boundaries instead of forming a tree + // structure from the message. Keep it simple ;-) + nsCString boundary; + boundary.Assign("--"); + boundary.Append(Substring(line, start, end-start)); + if (!m_boundaries.Contains(boundary)) + m_boundaries.AppendElement(boundary); + } + + if (m_isMultipart && + (start = lowerCaseLine.Find("charset=", CaseInsensitiveCompare)) != -1) + { + start += 8; // strlen("charset=") + bool foundQuote = false; + if (line[start] == '\"') { + start++; + foundQuote = true; + } + int32_t end = line.FindChar(foundQuote ? '\"' : ';', start); + if (end == -1) + end = line.Length(); + + m_partCharset.Assign(Substring(line, start, end-start)); + } + + if (StringBeginsWith(lowerCaseLine, + NS_LITERAL_CSTRING("content-transfer-encoding:")) && + lowerCaseLine.Find(ENCODING_BASE64, CaseInsensitiveCompare) != kNotFound) + m_base64part = true; +} + +/** + * Decodes the given base64 string. + * + * It returns its decoded string in its input. + * + * @param pBufInOut (inout) a buffer of the string + */ +void nsMsgBodyHandler::Base64Decode (nsCString &pBufInOut) +{ + char *decodedBody = PL_Base64Decode(pBufInOut.get(), pBufInOut.Length(), nullptr); + if (decodedBody) + pBufInOut.Adopt(decodedBody); + + int32_t offset = pBufInOut.FindChar('\n'); + while (offset != -1) { + pBufInOut.Replace(offset, 1, ' '); + offset = pBufInOut.FindChar('\n', offset); + } + offset = pBufInOut.FindChar('\r'); + while (offset != -1) { + pBufInOut.Replace(offset, 1, ' '); + offset = pBufInOut.FindChar('\r', offset); + } +} + |