/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "msgCore.h" #include "nsMsgSearchCore.h" #include "nsMsgUtils.h" #include "nsMsgBodyHandler.h" #include "nsMsgSearchTerm.h" #include "nsIMsgHdr.h" #include "nsMsgMessageFlags.h" #include "nsISeekableStream.h" #include "nsIInputStream.h" #include "nsIFile.h" #include "plbase64.h" #include "prmem.h" #include "nsMimeTypes.h" nsMsgBodyHandler::nsMsgBodyHandler (nsIMsgSearchScopeTerm * scope, uint32_t numLines, nsIMsgDBHdr* msg, nsIMsgDatabase * db) { m_scope = scope; m_numLocalLines = numLines; uint32_t flags; m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags)) ? !(flags & nsMsgMessageFlags::Offline) : true; // account for added x-mozilla-status lines, and envelope line. if (!m_lineCountInBodyLines) m_numLocalLines += 3; m_msgHdr = msg; m_db = db; // the following are variables used when the body handler is handling stuff from filters....through this constructor, that is not the // case so we set them to NULL. m_headers = NULL; m_headersSize = 0; m_Filtering = false; // make sure we set this before we call initialize... Initialize(); // common initialization stuff OpenLocalFolder(); } nsMsgBodyHandler::nsMsgBodyHandler(nsIMsgSearchScopeTerm * scope, uint32_t numLines, nsIMsgDBHdr* msg, nsIMsgDatabase* db, const char * headers, uint32_t headersSize, bool Filtering) { m_scope = scope; m_numLocalLines = numLines; uint32_t flags; m_lineCountInBodyLines = NS_SUCCEEDED(msg->GetFlags(&flags)) ? !(flags & nsMsgMessageFlags::Offline) : true; // account for added x-mozilla-status lines, and envelope line. if (!m_lineCountInBodyLines) m_numLocalLines += 3; m_msgHdr = msg; m_db = db; m_headersSize = headersSize; m_Filtering = Filtering; Initialize(); if (m_Filtering) m_headers = headers; else OpenLocalFolder(); // if nothing else applies, then we must be a POP folder file } void nsMsgBodyHandler::Initialize() // common initialization code regardless of what body type we are handling... { // Default transformations for local message search and MAPI access m_stripHeaders = true; m_stripHtml = true; m_partIsHtml = false; m_base64part = false; m_isMultipart = false; m_partIsText = true; // Default is text/plain, maybe proven otherwise later. m_pastMsgHeaders = false; m_pastPartHeaders = false; m_inMessageAttachment = false; m_headerBytesRead = 0; } nsMsgBodyHandler::~nsMsgBodyHandler() { } int32_t nsMsgBodyHandler::GetNextLine (nsCString &buf, nsCString &charset) { int32_t length = -1; // length of incoming line or -1 eof int32_t outLength = -1; // length of outgoing line or -1 eof bool eatThisLine = true; nsAutoCString nextLine; while (eatThisLine) { // first, handle the filtering case...this is easy.... if (m_Filtering) length = GetNextFilterLine(nextLine); else { // 3 cases: Offline IMAP, POP, or we are dealing with a news message.... // Offline cases should be same as local mail cases, since we're going // to store offline messages in berkeley format folders. if (m_db) { length = GetNextLocalLine (nextLine); // (2) POP } } if (length < 0) break; // eof in outLength = ApplyTransformations(nextLine, length, eatThisLine, buf); } if (outLength < 0) return -1; // eof out // For non-multipart messages, the entire message minus headers is encoded // ApplyTransformations can only decode a part if (!m_isMultipart && m_base64part) { Base64Decode(buf); m_base64part = false; // And reapply our transformations... outLength = ApplyTransformations(buf, buf.Length(), eatThisLine, buf); } charset = m_partCharset; return outLength; } void nsMsgBodyHandler::OpenLocalFolder() { nsCOMPtr inputStream; nsresult rv = m_scope->GetInputStream(m_msgHdr, getter_AddRefs(inputStream)); // Warn and return if GetInputStream fails NS_ENSURE_SUCCESS_VOID(rv); m_fileLineStream = do_QueryInterface(inputStream); } int32_t nsMsgBodyHandler::GetNextFilterLine(nsCString &buf) { // m_nextHdr always points to the next header in the list....the list is NULL terminated... uint32_t numBytesCopied = 0; if (m_headersSize > 0) { // #mscott. Ugly hack! filter headers list have CRs & LFs inside the NULL delimited list of header // strings. It is possible to have: To NULL CR LF From. We want to skip over these CR/LFs if they start // at the beginning of what we think is another header. while (m_headersSize > 0 && (m_headers[0] == '\r' || m_headers[0] == '\n' || m_headers[0] == ' ' || m_headers[0] == '\0')) { m_headers++; // skip over these chars... m_headersSize--; } if (m_headersSize > 0) { numBytesCopied = strlen(m_headers) + 1 ; buf.Assign(m_headers); m_headers += numBytesCopied; // be careful...m_headersSize is unsigned. Don't let it go negative or we overflow to 2^32....*yikes* if (m_headersSize < numBytesCopied) m_headersSize = 0; else m_headersSize -= numBytesCopied; // update # bytes we have read from the headers list return (int32_t) numBytesCopied; } } else if (m_headersSize == 0) { buf.Truncate(); } return -1; } // return -1 if no more local lines, length of next line otherwise. int32_t nsMsgBodyHandler::GetNextLocalLine(nsCString &buf) // returns number of bytes copied { if (m_numLocalLines) { // I the line count is in body lines, only decrement once we have // processed all the headers. Otherwise the line is not in body // lines and we want to decrement for every line. if (m_pastMsgHeaders || !m_lineCountInBodyLines) m_numLocalLines--; // do we need to check the return value here? if (m_fileLineStream) { bool more = false; nsresult rv = m_fileLineStream->ReadLine(buf, &more); if (NS_SUCCEEDED(rv)) return buf.Length(); } } return -1; } /** * This method applies a sequence of transformations to the line. * * It applies the following sequences in order * * Removes headers if the searcher doesn't want them * (sets m_past*Headers) * * Determines the current MIME type. * (via SniffPossibleMIMEHeader) * * Strips any HTML if the searcher doesn't want it * * Strips non-text parts * * Decodes any base64 part * (resetting part variables: m_base64part, m_pastPartHeaders, m_partIsHtml, * m_partIsText) * * @param line (in) the current line * @param length (in) the length of said line * @param eatThisLine (out) whether or not to ignore this line * @param buf (inout) if m_base64part, the current part as needed for * decoding; else, it is treated as an out param (a * redundant version of line). * @return the length of the line after applying transformations */ int32_t nsMsgBodyHandler::ApplyTransformations (const nsCString &line, int32_t length, bool &eatThisLine, nsCString &buf) { eatThisLine = false; if (!m_pastPartHeaders) // line is a line from the part headers { if (m_stripHeaders) eatThisLine = true; // We have already grabbed all worthwhile information from the headers, // so there is no need to keep track of the current lines buf.Assign(line); SniffPossibleMIMEHeader(buf); if (buf.IsEmpty() || buf.First() == '\r' || buf.First() == '\n') { if (!m_inMessageAttachment) { m_pastPartHeaders = true; } else { // We're in a message attachment and have just read past the // part header for the attached message. We now need to read // the message headers and any part headers. // We can now forget about the special handling of attached messages. m_inMessageAttachment = false; } } // We set m_pastMsgHeaders to 'true' only once. if (m_pastPartHeaders) m_pastMsgHeaders = true; return length; } // Check to see if this is one of our boundary strings. bool matchedBoundary = false; if (m_isMultipart && m_boundaries.Length() > 0) { for (int32_t i = (int32_t)m_boundaries.Length() - 1; i >= 0; i--) { if (StringBeginsWith(line, m_boundaries[i])) { matchedBoundary = true; // If we matched a boundary, we won't need the nested/later ones any more. m_boundaries.SetLength(i+1); break; } } } if (matchedBoundary) { if (m_base64part && m_partIsText) { Base64Decode(buf); // Work on the parsed string if (!buf.Length()) { NS_WARNING("Trying to transform an empty buffer"); eatThisLine = true; } else { // It is wrong to call ApplyTransformations() here since this will // lead to the buffer being doubled-up at |buf.Append(line.get());| below. // ApplyTransformations(buf, buf.Length(), eatThisLine, buf); // Avoid spurious failures eatThisLine = false; } } else { buf.Truncate(); eatThisLine = true; // We have no content... } // Reset all assumed headers m_base64part = false; // Get ready to sniff new part headers, but do not reset m_pastMsgHeaders // since it will screw the body line count. m_pastPartHeaders = false; m_partIsHtml = false; // If we ever see a multipart message, each part needs to set 'm_partIsText', // so no more defaulting to 'true' when the part is done. m_partIsText = false; return buf.Length(); } if (!m_partIsText) { // Ignore non-text parts buf.Truncate(); eatThisLine = true; return 0; } if (m_base64part) { // We need to keep track of all lines to parse base64encoded... buf.Append(line.get()); eatThisLine = true; return buf.Length(); } // ... but there's no point if we're not parsing base64. buf.Assign(line); if (m_stripHtml && m_partIsHtml) { StripHtml (buf); } return buf.Length(); } void nsMsgBodyHandler::StripHtml (nsCString &pBufInOut) { char *pBuf = (char*) PR_Malloc (pBufInOut.Length() + 1); if (pBuf) { char *pWalk = pBuf; char *pWalkInOut = (char *) pBufInOut.get(); bool inTag = false; while (*pWalkInOut) // throw away everything inside < > { if (!inTag) if (*pWalkInOut == '<') inTag = true; else *pWalk++ = *pWalkInOut; else if (*pWalkInOut == '>') inTag = false; pWalkInOut++; } *pWalk = 0; // null terminator pBufInOut.Adopt(pBuf); } } /** * Determines the MIME type, if present, from the current line. * * m_partIsHtml, m_isMultipart, m_partIsText, m_base64part, and boundary are * all set by this method at various points in time. * * @param line (in) a header line that may contain a MIME header */ void nsMsgBodyHandler::SniffPossibleMIMEHeader(const nsCString &line) { // Some parts of MIME are case-sensitive and other parts are case-insensitive; // specifically, the headers are all case-insensitive and the values we care // about are also case-insensitive, with the sole exception of the boundary // string, so we can't just take the input line and make it lower case. nsCString lowerCaseLine(line); ToLowerCase(lowerCaseLine); if (StringBeginsWith(lowerCaseLine, NS_LITERAL_CSTRING("content-type:"))) { if (lowerCaseLine.Find("text/html", CaseInsensitiveCompare) != -1) { m_partIsText = true; m_partIsHtml = true; } else if (lowerCaseLine.Find("multipart/", CaseInsensitiveCompare) != -1) { if (m_isMultipart) { // Nested multipart, get ready for new headers. m_base64part = false; m_pastPartHeaders = false; m_partIsHtml = false; m_partIsText = false; } m_isMultipart = true; m_partCharset.Truncate(); } else if (lowerCaseLine.Find("message/", CaseInsensitiveCompare) != -1) { // Initialise again. m_base64part = false; m_pastPartHeaders = false; m_partIsHtml = false; m_partIsText = true; // Default is text/plain, maybe proven otherwise later. m_inMessageAttachment = true; } else if (lowerCaseLine.Find("text/", CaseInsensitiveCompare) != -1) m_partIsText = true; else if (lowerCaseLine.Find("text/", CaseInsensitiveCompare) == -1) m_partIsText = false; // We have disproven our assumption. } int32_t start; if (m_isMultipart && (start = lowerCaseLine.Find("boundary=", CaseInsensitiveCompare)) != -1) { start += 9; // strlen("boundary=") if (line[start] == '\"') start++; int32_t end = line.RFindChar('\"'); if (end == -1) end = line.Length(); // Collect all boundaries. Since we only react to crossing a boundary, // we can simply collect the boundaries instead of forming a tree // structure from the message. Keep it simple ;-) nsCString boundary; boundary.Assign("--"); boundary.Append(Substring(line, start, end-start)); if (!m_boundaries.Contains(boundary)) m_boundaries.AppendElement(boundary); } if (m_isMultipart && (start = lowerCaseLine.Find("charset=", CaseInsensitiveCompare)) != -1) { start += 8; // strlen("charset=") bool foundQuote = false; if (line[start] == '\"') { start++; foundQuote = true; } int32_t end = line.FindChar(foundQuote ? '\"' : ';', start); if (end == -1) end = line.Length(); m_partCharset.Assign(Substring(line, start, end-start)); } if (StringBeginsWith(lowerCaseLine, NS_LITERAL_CSTRING("content-transfer-encoding:")) && lowerCaseLine.Find(ENCODING_BASE64, CaseInsensitiveCompare) != kNotFound) m_base64part = true; } /** * Decodes the given base64 string. * * It returns its decoded string in its input. * * @param pBufInOut (inout) a buffer of the string */ void nsMsgBodyHandler::Base64Decode (nsCString &pBufInOut) { char *decodedBody = PL_Base64Decode(pBufInOut.get(), pBufInOut.Length(), nullptr); if (decodedBody) pBufInOut.Adopt(decodedBody); int32_t offset = pBufInOut.FindChar('\n'); while (offset != -1) { pBufInOut.Replace(offset, 1, ' '); offset = pBufInOut.FindChar('\n', offset); } offset = pBufInOut.FindChar('\r'); while (offset != -1) { pBufInOut.Replace(offset, 1, ' '); offset = pBufInOut.FindChar('\r', offset); } }