diff options
Diffstat (limited to 'mailnews/mime/src/mimeTextHTMLParsed.cpp')
-rw-r--r-- | mailnews/mime/src/mimeTextHTMLParsed.cpp | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/mailnews/mime/src/mimeTextHTMLParsed.cpp b/mailnews/mime/src/mimeTextHTMLParsed.cpp new file mode 100644 index 000000000..2d45bd2c1 --- /dev/null +++ b/mailnews/mime/src/mimeTextHTMLParsed.cpp @@ -0,0 +1,150 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Most of this code is copied from mimethsa. If you find a bug here, check that class, too. */ + +/* This runs the entire HTML document through the Mozilla HTML parser, and + then outputs it as string again. This ensures that the HTML document is + syntactically correct and complete and all tags and attributes are closed. + + That prevents "MIME in the middle" attacks like efail.de. + The base problem is that we concatenate different MIME parts in the output + and render them all together as a single HTML document in the display. + + The better solution would be to put each MIME part into its own <iframe type="content">. + during rendering. Unfortunately, we'd need <iframe seamless> for that. + That would remove the need for this workaround, and stop even more attack classes. +*/ + +#include "mimeTextHTMLParsed.h" +#include "prmem.h" +#include "prlog.h" +#include "msgCore.h" +#include "nsIDOMParser.h" +#include "nsIDOMDocument.h" +#include "nsIDocument.h" +#include "nsIDocumentEncoder.h" +#include "mozilla/ErrorResult.h" +#include "nsIPrefBranch.h" +#include "mimethtm.h" + +#define MIME_SUPERCLASS mimeInlineTextHTMLClass +MimeDefClass(MimeInlineTextHTMLParsed, MimeInlineTextHTMLParsedClass, + mimeInlineTextHTMLParsedClass, &MIME_SUPERCLASS); + +static int MimeInlineTextHTMLParsed_parse_line(const char *, int32_t, + MimeObject *); +static int MimeInlineTextHTMLParsed_parse_begin(MimeObject *obj); +static int MimeInlineTextHTMLParsed_parse_eof(MimeObject *, bool); +static void MimeInlineTextHTMLParsed_finalize(MimeObject *obj); + +static int +MimeInlineTextHTMLParsedClassInitialize(MimeInlineTextHTMLParsedClass *clazz) +{ + MimeObjectClass *oclass = (MimeObjectClass *)clazz; + NS_ASSERTION(!oclass->class_initialized, "problem with superclass"); + oclass->parse_line = MimeInlineTextHTMLParsed_parse_line; + oclass->parse_begin = MimeInlineTextHTMLParsed_parse_begin; + oclass->parse_eof = MimeInlineTextHTMLParsed_parse_eof; + oclass->finalize = MimeInlineTextHTMLParsed_finalize; + + return 0; +} + +static int +MimeInlineTextHTMLParsed_parse_begin(MimeObject *obj) +{ + MimeInlineTextHTMLParsed *me = (MimeInlineTextHTMLParsed *)obj; + me->complete_buffer = new nsString(); + int status = ((MimeObjectClass*)&MIME_SUPERCLASS)->parse_begin(obj); + if (status < 0) + return status; + + return 0; +} + +static int +MimeInlineTextHTMLParsed_parse_eof(MimeObject *obj, bool abort_p) +{ + + if (obj->closed_p) + return 0; + int status = ((MimeObjectClass*)&MIME_SUPERCLASS)->parse_eof(obj, abort_p); + if (status < 0) + return status; + MimeInlineTextHTMLParsed *me = (MimeInlineTextHTMLParsed *)obj; + + // We have to cache all lines and parse the whole document at once. + // There's a useful sounding function parseFromStream(), but it only allows XML + // mimetypes, not HTML. Methinks that's because the HTML soup parser + // needs the entire doc to make sense of the gibberish that people write. + if (!me || !me->complete_buffer) + return 0; + + nsString& rawHTML = *(me->complete_buffer); + if (rawHTML.IsEmpty()) + return 0; + nsString parsed; + nsresult rv; + + // Parse the HTML source. + nsCOMPtr<nsIDOMDocument> document; + nsCOMPtr<nsIDOMParser> parser = do_GetService(NS_DOMPARSER_CONTRACTID); + rv = parser->ParseFromString(rawHTML.get(), "text/html", + getter_AddRefs(document)); + NS_ENSURE_SUCCESS(rv, -1); + + // Serialize it back to HTML source again. + nsCOMPtr<nsIDocumentEncoder> encoder = do_CreateInstance( + "@mozilla.org/layout/documentEncoder;1?type=text/html"); + uint32_t aFlags = nsIDocumentEncoder::OutputRaw | + nsIDocumentEncoder::OutputDisallowLineBreaking; + rv = encoder->Init(document, NS_LITERAL_STRING("text/html"), aFlags); + NS_ENSURE_SUCCESS(rv, -1); + rv = encoder->EncodeToString(parsed); + NS_ENSURE_SUCCESS(rv, -1); + + // Write it out. + NS_ConvertUTF16toUTF8 resultCStr(parsed); + MimeInlineTextHTML_insert_lang_div(obj, resultCStr); + MimeInlineTextHTML_remove_plaintext_tag(obj, resultCStr); + status = ((MimeObjectClass*)&MIME_SUPERCLASS)->parse_line( + resultCStr.BeginWriting(), resultCStr.Length(), obj); + rawHTML.Truncate(); + return status; +} + +void +MimeInlineTextHTMLParsed_finalize(MimeObject *obj) +{ + MimeInlineTextHTMLParsed *me = (MimeInlineTextHTMLParsed *)obj; + + if (me && me->complete_buffer) + { + obj->clazz->parse_eof(obj, false); + delete me->complete_buffer; + me->complete_buffer = NULL; + } + + ((MimeObjectClass*)&MIME_SUPERCLASS)->finalize(obj); +} + +static int +MimeInlineTextHTMLParsed_parse_line(const char *line, int32_t length, + MimeObject *obj) +{ + MimeInlineTextHTMLParsed *me = (MimeInlineTextHTMLParsed *)obj; + + if (!me || !(me->complete_buffer)) + return -1; + + nsCString linestr(line, length); + NS_ConvertUTF8toUTF16 line_ucs2(linestr.get()); + if (length && line_ucs2.IsEmpty()) + CopyASCIItoUTF16(linestr, line_ucs2); + (me->complete_buffer)->Append(line_ucs2); + + return 0; +} |