diff options
Diffstat (limited to 'mailnews/mime/src/mimetext.cpp')
-rw-r--r-- | mailnews/mime/src/mimetext.cpp | 544 |
1 files changed, 544 insertions, 0 deletions
diff --git a/mailnews/mime/src/mimetext.cpp b/mailnews/mime/src/mimetext.cpp new file mode 100644 index 000000000..a854348c5 --- /dev/null +++ b/mailnews/mime/src/mimetext.cpp @@ -0,0 +1,544 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * This Original Code has been modified by IBM Corporation. Modifications made by IBM + * described herein are Copyright (c) International Business Machines Corporation, 2000. + * Modifications to Mozilla code or documentation identified per MPL Section 3.3 + * + * Date Modified by Description of modification + * 04/20/2000 IBM Corp. OS/2 VisualAge build. + */ +#include "mimetext.h" +#include "mimebuf.h" +#include "mimethtm.h" +#include "comi18n.h" +#include "mimemoz2.h" + +#include "prlog.h" +#include "prmem.h" +#include "plstr.h" +#include "nsIPrefService.h" +#include "nsIPrefBranch.h" +#include "nsIServiceManager.h" +#include "nsIPrefLocalizedString.h" +#include "nsMsgUtils.h" +#include "nsMimeTypes.h" +#include "nsServiceManagerUtils.h" + +#define MIME_SUPERCLASS mimeLeafClass +MimeDefClass(MimeInlineText, MimeInlineTextClass, mimeInlineTextClass, + &MIME_SUPERCLASS); + +static int MimeInlineText_initialize (MimeObject *); +static void MimeInlineText_finalize (MimeObject *); +static int MimeInlineText_rot13_line (MimeObject *, char *line, int32_t length); +static int MimeInlineText_parse_eof (MimeObject *obj, bool abort_p); +static int MimeInlineText_parse_end (MimeObject *, bool); +static int MimeInlineText_parse_decoded_buffer (const char *, int32_t, MimeObject *); +static int MimeInlineText_rotate_convert_and_parse_line(char *, int32_t, + MimeObject *); +static int MimeInlineText_open_dam(char *line, int32_t length, MimeObject *obj); +static int MimeInlineText_initializeCharset(MimeObject *obj); + +static int +MimeInlineTextClassInitialize(MimeInlineTextClass *clazz) +{ + MimeObjectClass *oclass = (MimeObjectClass *) clazz; + MimeLeafClass *lclass = (MimeLeafClass *) clazz; + PR_ASSERT(!oclass->class_initialized); + oclass->initialize = MimeInlineText_initialize; + oclass->finalize = MimeInlineText_finalize; + oclass->parse_eof = MimeInlineText_parse_eof; + oclass->parse_end = MimeInlineText_parse_end; + clazz->rot13_line = MimeInlineText_rot13_line; + clazz->initialize_charset = MimeInlineText_initializeCharset; + lclass->parse_decoded_buffer = MimeInlineText_parse_decoded_buffer; + return 0; +} + +static int +MimeInlineText_initialize (MimeObject *obj) +{ + /* This is an abstract class; it shouldn't be directly instantiated. */ + PR_ASSERT(obj->clazz != (MimeObjectClass *) &mimeInlineTextClass); + + ((MimeInlineText *) obj)->initializeCharset = false; + ((MimeInlineText *) obj)->needUpdateMsgWinCharset = false; + return ((MimeObjectClass*)&MIME_SUPERCLASS)->initialize(obj); +} + +static int MimeInlineText_initializeCharset(MimeObject *obj) +{ + MimeInlineText *text = (MimeInlineText *) obj; + + text->inputAutodetect = false; + text->charsetOverridable = false; + + /* Figure out an appropriate charset for this object. + */ + if (!text->charset && obj->headers) + { + if (obj->options && obj->options->override_charset) + { + text->charset = strdup(obj->options->default_charset); + } + else + { + char *ct = MimeHeaders_get (obj->headers, HEADER_CONTENT_TYPE, + false, false); + if (ct) + { + text->charset = MimeHeaders_get_parameter (ct, "charset", NULL, NULL); + PR_Free(ct); + } + + if (!text->charset) + { + /* If we didn't find "Content-Type: ...; charset=XX" then look + for "X-Sun-Charset: XX" instead. (Maybe this should be done + in MimeSunAttachmentClass, but it's harder there than here.) + */ + text->charset = MimeHeaders_get (obj->headers, + HEADER_X_SUN_CHARSET, + false, false); + } + + /* iMIP entities without an explicit charset parameter default to + US-ASCII (RFC 2447, section 2.4). However, Microsoft Outlook generates + UTF-8 but omits the charset parameter. + When no charset is defined by the container (e.g. iMIP), iCalendar + files default to UTF-8 (RFC 2445, section 4.1.4). + */ + if (!text->charset && + obj->content_type && + !PL_strcasecmp(obj->content_type, TEXT_CALENDAR)) + text->charset = strdup("UTF-8"); + + if (!text->charset) + { + nsresult res; + + text->charsetOverridable = true; + + nsCOMPtr<nsIPrefBranch> prefBranch(do_GetService(NS_PREFSERVICE_CONTRACTID, &res)); + if (NS_SUCCEEDED(res)) + { + nsCOMPtr<nsIPrefLocalizedString> str; + if (NS_SUCCEEDED(prefBranch->GetComplexValue("intl.charset.detector", NS_GET_IID(nsIPrefLocalizedString), getter_AddRefs(str)))) { + //only if we can get autodetector name correctly, do we set this to true + text->inputAutodetect = true; + } + } + + if (obj->options && obj->options->default_charset) + text->charset = strdup(obj->options->default_charset); + else + { + if (NS_SUCCEEDED(res)) + { + nsString value; + NS_GetLocalizedUnicharPreferenceWithDefault(prefBranch, "mailnews.view_default_charset", EmptyString(), value); + text->charset = ToNewUTF8String(value); + } + else + text->charset = strdup(""); + } + } + } + } + + if (text->inputAutodetect) + { + //we need to prepare lineDam for charset detection + text->lineDamBuffer = (char*)PR_Malloc(DAM_MAX_BUFFER_SIZE); + text->lineDamPtrs = (char**)PR_Malloc(DAM_MAX_LINES*sizeof(char*)); + text->curDamOffset = 0; + text->lastLineInDam = 0; + if (!text->lineDamBuffer || !text->lineDamPtrs) + { + text->inputAutodetect = false; + PR_FREEIF(text->lineDamBuffer); + PR_FREEIF(text->lineDamPtrs); + } + } + + text->initializeCharset = true; + + return 0; +} + +static void +MimeInlineText_finalize (MimeObject *obj) +{ + MimeInlineText *text = (MimeInlineText *) obj; + + obj->clazz->parse_eof (obj, false); + obj->clazz->parse_end (obj, false); + + text->inputDecoder = nullptr; + text->utf8Encoder = nullptr; + PR_FREEIF(text->charset); + + /* Should have been freed by parse_eof, but just in case... */ + PR_ASSERT(!text->cbuffer); + PR_FREEIF (text->cbuffer); + + if (text->inputAutodetect) { + PR_FREEIF(text->lineDamBuffer); + PR_FREEIF(text->lineDamPtrs); + text->inputAutodetect = false; + } + + ((MimeObjectClass*)&MIME_SUPERCLASS)->finalize (obj); +} + + +static int +MimeInlineText_parse_eof (MimeObject *obj, bool abort_p) +{ + int status; + + if (obj->closed_p) return 0; + NS_ASSERTION(!obj->parsed_p, "obj already parsed"); + + MimeInlineText *text = (MimeInlineText *) obj; + + /* Flush any buffered data from the MimeLeaf's decoder */ + status = ((MimeLeafClass*)&MIME_SUPERCLASS)->close_decoder(obj); + if (status < 0) return status; + + /* If there is still data in the ibuffer, that means that the last + line of this part didn't end in a newline; so push it out anyway + (this means that the parse_line method will be called with a string + with no trailing newline, which isn't the usual case). We do this + here, rather than in MimeObject_parse_eof, because MimeObject isn't + aware of the rotating-and-converting / charset detection we need to + do first. + */ + if (!abort_p && obj->ibuffer_fp > 0) + { + status = MimeInlineText_rotate_convert_and_parse_line (obj->ibuffer, + obj->ibuffer_fp, + obj); + obj->ibuffer_fp = 0; + if (status < 0) + { + //we haven't find charset yet? Do it before return + if (text->inputAutodetect) + status = MimeInlineText_open_dam(nullptr, 0, obj); + + obj->closed_p = true; + return status; + } + } + + //we haven't find charset yet? now its the time + if (text->inputAutodetect) + status = MimeInlineText_open_dam(nullptr, 0, obj); + + return ((MimeObjectClass*)&MIME_SUPERCLASS)->parse_eof (obj, abort_p); +} + +static int +MimeInlineText_parse_end (MimeObject *obj, bool abort_p) +{ + MimeInlineText *text = (MimeInlineText *) obj; + + if (obj->parsed_p) + { + PR_ASSERT(obj->closed_p); + return 0; + } + + /* We won't be needing this buffer any more; nuke it. */ + PR_FREEIF(text->cbuffer); + text->cbuffer_size = 0; + + return ((MimeObjectClass*)&MIME_SUPERCLASS)->parse_end (obj, abort_p); +} + + +/* This maps A-M to N-Z and N-Z to A-M. All other characters are left alone. + (Comments in GNUS imply that for Japanese, one should rotate by 47?) + */ +static const unsigned char MimeInlineText_rot13_table[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 91, 92, 93, 94, 95, 96, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 123, 124, 125, 126, + 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, + 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, + 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, + 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, + 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, + 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255 }; + +static int +MimeInlineText_rot13_line (MimeObject *obj, char *line, int32_t length) +{ + unsigned char *s, *end; + PR_ASSERT(line); + if (!line) return -1; + s = (unsigned char *) line; + end = s + length; + while (s < end) + { + *s = MimeInlineText_rot13_table[*s]; + s++; + } + return 0; +} + + +static int +MimeInlineText_parse_decoded_buffer (const char *buf, int32_t size, MimeObject *obj) +{ + PR_ASSERT(!obj->closed_p); + if (obj->closed_p) return -1; + + /* MimeLeaf takes care of this. */ + PR_ASSERT(obj->output_p && obj->options && obj->options->output_fn); + if (!obj->options) return -1; + + /* If we're supposed to write this object, but aren't supposed to convert + it to HTML, simply pass it through unaltered. */ + if (!obj->options->write_html_p && obj->options->format_out != nsMimeOutput::nsMimeMessageAttach) + return MimeObject_write(obj, buf, size, true); + + /* This is just like the parse_decoded_buffer method we inherit from the + MimeLeaf class, except that we line-buffer to our own wrapper on the + `parse_line' method instead of calling the `parse_line' method directly. + */ + return mime_LineBuffer (buf, size, + &obj->ibuffer, &obj->ibuffer_size, &obj->ibuffer_fp, + true, + ((int (*) (char *, int32_t, void *)) + /* This cast is to turn void into MimeObject */ + MimeInlineText_rotate_convert_and_parse_line), + obj); +} + + +#define MimeInlineText_grow_cbuffer(text, desired_size) \ + (((desired_size) >= (text)->cbuffer_size) ? \ + mime_GrowBuffer ((desired_size), sizeof(char), 100, \ + &(text)->cbuffer, &(text)->cbuffer_size) \ + : 0) + +static int +MimeInlineText_convert_and_parse_line(char *line, int32_t length, MimeObject *obj) +{ + int status; + char *converted = 0; + int32_t converted_len = 0; + + MimeInlineText *text = (MimeInlineText *) obj; + + //in case of charset autodetection, charset can be override by meta charset + if (text->charsetOverridable) { + if (mime_typep(obj, (MimeObjectClass *) &mimeInlineTextHTMLClass)) + { + MimeInlineTextHTML *textHTML = (MimeInlineTextHTML *) obj; + if (textHTML->charset && + *textHTML->charset && + strcmp(textHTML->charset, text->charset)) + { + //if meta tag specified charset is different from our detected result, use meta charset. + //but we don't want to redo previous lines + MIME_get_unicode_decoder(textHTML->charset, getter_AddRefs(text->inputDecoder)); + PR_FREEIF(text->charset); + text->charset = strdup(textHTML->charset); + + //update MsgWindow charset if we are instructed to do so + if (text->needUpdateMsgWinCharset && *text->charset) + SetMailCharacterSetToMsgWindow(obj, text->charset); + } + } + } + + //initiate decoder if not yet + if (text->inputDecoder == nullptr) + MIME_get_unicode_decoder(text->charset, getter_AddRefs(text->inputDecoder)); + // If no decoder found, use ""UTF-8"", that will map most non-US-ASCII chars as invalid + // A pure-ASCII only decoder would be better, but there is none + if (text->inputDecoder == nullptr) + MIME_get_unicode_decoder("UTF-8", getter_AddRefs(text->inputDecoder)); + if (text->utf8Encoder == nullptr) + MIME_get_unicode_encoder("UTF-8", getter_AddRefs(text->utf8Encoder)); + + bool useInputCharsetConverter = obj->options->m_inputCharsetToUnicodeDecoder && !PL_strcasecmp(text->charset, obj->options->charsetForCachedInputDecoder.get()); + + if (useInputCharsetConverter) + status = obj->options->charset_conversion_fn(line, length, + text->charset, + "UTF-8", + &converted, + &converted_len, + obj->options->stream_closure, obj->options->m_inputCharsetToUnicodeDecoder, + obj->options->m_unicodeToUTF8Encoder); + else + status = obj->options->charset_conversion_fn(line, length, + text->charset, + "UTF-8", + &converted, + &converted_len, + obj->options->stream_closure, (nsIUnicodeDecoder*)text->inputDecoder, + (nsIUnicodeEncoder*)text->utf8Encoder); + + if (status < 0) + { + PR_FREEIF(converted); + return status; + } + + if (converted) + { + line = converted; + length = converted_len; + } + + /* Now that the line has been converted, call the subclass's parse_line + method with the decoded data. */ + status = obj->clazz->parse_line(line, length, obj); + PR_FREEIF(converted); + + return status; +} + +//In this function call, all buffered lines in lineDam will be sent to charset detector +// and a charset will be used to parse all those line and following lines in this mime obj. +static int +MimeInlineText_open_dam(char *line, int32_t length, MimeObject *obj) +{ + MimeInlineText *text = (MimeInlineText *) obj; + const char* detectedCharset = nullptr; + nsresult res = NS_OK; + int status = 0; + int32_t i; + + if (text->curDamOffset <= 0) { + //there is nothing in dam, use current line for detection + if (length > 0) { + res = MIME_detect_charset(line, length, &detectedCharset); + } + } else { + //we have stuff in dam, use the one + res = MIME_detect_charset(text->lineDamBuffer, text->curDamOffset, &detectedCharset); + } + + //set the charset for this obj + if (NS_SUCCEEDED(res) && detectedCharset && *detectedCharset) { + PR_FREEIF(text->charset); + text->charset = strdup(detectedCharset); + + //update MsgWindow charset if we are instructed to do so + if (text->needUpdateMsgWinCharset && *text->charset) + SetMailCharacterSetToMsgWindow(obj, text->charset); + } + + //process dam and line using the charset + if (text->curDamOffset) { + for (i = 0; i < text->lastLineInDam-1; i++) + { + status = MimeInlineText_convert_and_parse_line( + text->lineDamPtrs[i], + text->lineDamPtrs[i+1] - text->lineDamPtrs[i], + obj ); + } + status = MimeInlineText_convert_and_parse_line( + text->lineDamPtrs[i], + text->lineDamBuffer + text->curDamOffset - text->lineDamPtrs[i], + obj ); + } + + if (length) + status = MimeInlineText_convert_and_parse_line(line, length, obj); + + PR_Free(text->lineDamPtrs); + PR_Free(text->lineDamBuffer); + text->lineDamPtrs = nullptr; + text->lineDamBuffer = nullptr; + text->inputAutodetect = false; + + return status; +} + + +static int +MimeInlineText_rotate_convert_and_parse_line(char *line, int32_t length, + MimeObject *obj) +{ + int status = 0; + MimeInlineTextClass *textc = (MimeInlineTextClass *) obj->clazz; + + PR_ASSERT(!obj->closed_p); + if (obj->closed_p) return -1; + + /* Rotate the line, if desired (this happens on the raw data, before any + charset conversion.) */ + if (obj->options && obj->options->rot13_p) + { + status = textc->rot13_line(obj, line, length); + if (status < 0) return status; + } + + // Now convert to the canonical charset, if desired. + // + bool doConvert = true; + // Don't convert vCard data + if ( ( (obj->content_type) && (!PL_strcasecmp(obj->content_type, TEXT_VCARD)) ) || + (obj->options->format_out == nsMimeOutput::nsMimeMessageSaveAs) + || obj->options->format_out == nsMimeOutput::nsMimeMessageAttach) + doConvert = false; + + // Only convert if the user prefs is false + if ( (obj->options && obj->options->charset_conversion_fn) && + (!obj->options->force_user_charset) && + (doConvert) + ) + { + MimeInlineText *text = (MimeInlineText *) obj; + + if (!text->initializeCharset) + { + MimeInlineText_initializeCharset(obj); + //update MsgWindow charset if we are instructed to do so + if (text->needUpdateMsgWinCharset && *text->charset) + SetMailCharacterSetToMsgWindow(obj, text->charset); + } + + //if autodetect is on, push line to dam + if (text->inputAutodetect) + { + //see if we reach the lineDam buffer limit, if so, there is no need to keep buffering + if (text->lastLineInDam >= DAM_MAX_LINES || + DAM_MAX_BUFFER_SIZE - text->curDamOffset <= length) { + //we let open dam process this line as well as thing that already in Dam + //In case there is nothing in dam because this line is too big, we need to + //perform autodetect on this line + status = MimeInlineText_open_dam(line, length, obj); + } + else { + //buffering current line + text->lineDamPtrs[text->lastLineInDam] = text->lineDamBuffer + text->curDamOffset; + memcpy(text->lineDamPtrs[text->lastLineInDam], line, length); + text->lastLineInDam++; + text->curDamOffset += length; + } + } + else + status = MimeInlineText_convert_and_parse_line(line, length, obj); + } + else + status = obj->clazz->parse_line(line, length, obj); + + return status; +} |