(function (root, fn) { if (typeof define === 'function' && define.amd) { define(fn); } else if (typeof module !== 'undefined' && module.exports) { module.exports = fn(); } else { root.jsmime = fn(); } }(this, function() { var mods = {}; function req(id) { return mods[id.replace(/^\.\//, '')]; } function def(id, fn) { mods[id] = fn(req); } def('mimeutils', function() { "use strict"; /** * Decode a quoted-printable buffer into a binary string. * * @param buffer {BinaryString} The string to decode. * @param more {Boolean} This argument is ignored. * @returns {Array(BinaryString, BinaryString)} The first element of the array * is the decoded string. The second element is always the empty * string. */ function decode_qp(buffer, more) { // Unlike base64, quoted-printable isn't stateful across multiple lines, so // there is no need to buffer input, so we can always ignore more. let decoded = buffer.replace( // Replace either = or =CRLF /=([0-9A-F][0-9A-F]|[ \t]*(\r\n|[\r\n]|$))/gi, function replace_chars(match, param) { // If trailing text matches [ \t]*CRLF, drop everything, since it's a // soft line break. if (param.trim().length == 0) return ''; return String.fromCharCode(parseInt(param, 16)); }); return [decoded, '']; } /** * Decode a base64 buffer into a binary string. Unlike window.atob, the buffer * may contain non-base64 characters that will be ignored. * * @param buffer {BinaryString} The string to decode. * @param more {Boolean} If true, we expect that this function could be * called again and should retain extra data. If * false, we should flush all pending output. * @returns {Array(BinaryString, BinaryString)} The first element of the array * is the decoded string. The second element contains the data that * could not be decoded and needs to be retained for the next call. */ function decode_base64(buffer, more) { // Drop all non-base64 characters let sanitize = buffer.replace(/[^A-Za-z0-9+\/=]/g,''); // Remove harmful `=' chars in the middle. sanitize = sanitize.replace(/=+([A-Za-z0-9+\/])/g, '$1'); // We need to encode in groups of 4 chars. If we don't have enough, leave the // excess for later. If there aren't any more, drop enough to make it 4. let excess = sanitize.length % 4; if (excess != 0 && more) buffer = sanitize.slice(-excess); else buffer = ''; sanitize = sanitize.substring(0, sanitize.length - excess); // Delete all unnecessary '====' in padding. sanitize = sanitize.replace(/(====)+$/g, ''); // Use the atob function we (ought to) have in global scope. return [atob(sanitize), buffer]; } /** * Converts a binary string into a Uint8Array buffer. * * @param buffer {BinaryString} The string to convert. * @returns {Uint8Array} The converted data. */ function stringToTypedArray(buffer) { var typedarray = new Uint8Array(buffer.length); for (var i = 0; i < buffer.length; i++) typedarray[i] = buffer.charCodeAt(i); return typedarray; } /** * Converts a Uint8Array buffer to a binary string. * * @param buffer {BinaryString} The string to convert. * @returns {Uint8Array} The converted data. */ function typedArrayToString(buffer) { var string = ''; for (var i = 0; i < buffer.length; i+= 100) string += String.fromCharCode.apply(undefined, buffer.subarray(i, i + 100)); return string; } /** A list of month names for Date parsing. */ var kMonthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]; return { decode_base64: decode_base64, decode_qp: decode_qp, kMonthNames: kMonthNames, stringToTypedArray: stringToTypedArray, typedArrayToString: typedArrayToString, }; }); /** * This file implements knowledge of how to encode or decode structured headers * for several key headers. It is not meant to be used externally to jsmime. */ def('structuredHeaders', function (require) { "use strict"; var structuredDecoders = new Map(); var structuredEncoders = new Map(); var preferredSpellings = new Map(); function addHeader(name, decoder, encoder) { var lowerName = name.toLowerCase(); structuredDecoders.set(lowerName, decoder); structuredEncoders.set(lowerName, encoder); preferredSpellings.set(lowerName, name); } // Addressing headers: We assume that they can be specified in 1* form (this is // false for From, but it's close enough to the truth that it shouldn't matter). // There is no need to specialize the results for the header, so just pun it // back to parseAddressingHeader. function parseAddress(value) { let results = []; let headerparser = this; return value.reduce(function (results, header) { return results.concat(headerparser.parseAddressingHeader(header, true)); }, []); } function writeAddress(value) { // Make sure the input is an array (accept a single entry) if (!Array.isArray(value)) value = [value]; this.addAddresses(value); } // Addressing headers from RFC 5322: addHeader("Bcc", parseAddress, writeAddress); addHeader("Cc", parseAddress, writeAddress); addHeader("From", parseAddress, writeAddress); addHeader("Reply-To", parseAddress, writeAddress); addHeader("Resent-Bcc", parseAddress, writeAddress); addHeader("Resent-Cc", parseAddress, writeAddress); addHeader("Resent-From", parseAddress, writeAddress); addHeader("Resent-Reply-To", parseAddress, writeAddress); addHeader("Resent-Sender", parseAddress, writeAddress); addHeader("Resent-To", parseAddress, writeAddress); addHeader("Sender", parseAddress, writeAddress); addHeader("To", parseAddress, writeAddress); // From RFC 5536: addHeader("Approved", parseAddress, writeAddress); // From RFC 3798: addHeader("Disposition-Notification-To", parseAddress, writeAddress); // Non-standard headers: addHeader("Delivered-To", parseAddress, writeAddress); addHeader("Return-Receipt-To", parseAddress, writeAddress); // http://cr.yp.to/proto/replyto.html addHeader("Mail-Reply-To", parseAddress, writeAddress); addHeader("Mail-Followup-To", parseAddress, writeAddress); // Parameter-based headers. Note that all parameters are slightly different, so // we use slightly different variants here. function parseParameterHeader(value, do2231, do2047) { // Only use the first header for parameters; ignore subsequent redefinitions. return this.parseParameterHeader(value[0], do2231, do2047); } // RFC 2045 function parseContentType(value) { let params = parseParameterHeader.call(this, value, false, false); let origtype = params.preSemi; let parts = origtype.split('/'); if (parts.length != 2) { // Malformed. Return to text/plain. Evil, ain't it? params = new Map(); parts = ["text", "plain"]; } let mediatype = parts[0].toLowerCase(); let subtype = parts[1].toLowerCase(); let type = mediatype + '/' + subtype; let structure = new Map(); structure.mediatype = mediatype; structure.subtype = subtype; structure.type = type; params.forEach(function (value, name) { structure.set(name.toLowerCase(), value); }); return structure; } structuredDecoders.set("Content-Type", parseContentType); // Unstructured headers (just decode RFC 2047 for the first header value) function parseUnstructured(values) { return this.decodeRFC2047Words(values[0]); } function writeUnstructured(value) { this.addUnstructured(value); } // Message-ID headers. function parseMessageID(values) { // TODO: Proper parsing support for these headers is currently unsupported). return this.decodeRFC2047Words(values[0]); } function writeMessageID(value) { // TODO: Proper parsing support for these headers is currently unsupported). this.addUnstructured(value); } // RFC 5322 addHeader("Comments", parseUnstructured, writeUnstructured); addHeader("Keywords", parseUnstructured, writeUnstructured); addHeader("Subject", parseUnstructured, writeUnstructured); // RFC 2045 addHeader("MIME-Version", parseUnstructured, writeUnstructured); addHeader("Content-Description", parseUnstructured, writeUnstructured); // RFC 7231 addHeader("User-Agent", parseUnstructured, writeUnstructured); // Date headers function parseDate(values) { return this.parseDateHeader(values[0]); } function writeDate(value) { this.addDate(value); } // RFC 5322 addHeader("Date", parseDate, writeDate); addHeader("Resent-Date", parseDate, writeDate); // RFC 5536 addHeader("Expires", parseDate, writeDate); addHeader("Injection-Date", parseDate, writeDate); addHeader("NNTP-Posting-Date", parseDate, writeDate); // RFC 5322 addHeader("Message-ID", parseMessageID, writeMessageID); addHeader("Resent-Message-ID", parseMessageID, writeMessageID); // Miscellaneous headers (those that don't fall under the above schemes): // RFC 2047 structuredDecoders.set("Content-Transfer-Encoding", function (values) { return values[0].toLowerCase(); }); structuredEncoders.set("Content-Transfer-Encoding", writeUnstructured); // Some clients like outlook.com send non-compliant References headers that // separate values using commas. Also, some clients don't separate References // with spaces, since these are optional according to RFC2822. So here we // preprocess these headers (see bug 1154521 and bug 1197686). function preprocessMessageIDs(values) { let msgId = /<[^>]*>/g; let match, ids = []; while ((match = msgId.exec(values)) !== null) { ids.push(match[0]); } return ids.join(' '); } structuredDecoders.set("References", preprocessMessageIDs); structuredDecoders.set("In-Reply-To", preprocessMessageIDs); return Object.freeze({ decoders: structuredDecoders, encoders: structuredEncoders, spellings: preferredSpellings, }); }); def('headerparser', function(require) { /** * This file implements the structured decoding of message header fields. It is * part of the same system as found in mimemimeutils.js, and occasionally makes * references to globals defined in that file or other dependencies thereof. See * documentation in that file for more information about external dependencies. */ "use strict"; var mimeutils = require('./mimeutils'); /** * This is the API that we ultimately return. * * We define it as a global here, because we need to pass it as a |this| * argument to a few functions. */ var headerparser = {}; /** * Tokenizes a message header into a stream of tokens as a generator. * * The low-level tokens are meant to be loosely correspond to the tokens as * defined in RFC 5322. For reasons of saner error handling, however, the two * definitions are not exactly equivalent. The tokens we emit are the following: * 1. Special delimiters: Any char in the delimiters string is emitted as a * string by itself. Parsing parameter headers, for example, would use ";=" * for the delimiter string. * 2. Quoted-strings (if opt.qstring is true): A string which is surrounded by * double quotes. Escapes in the string are omitted when returning. * 3. Domain Literals (if opt.dliteral is true): A string which matches the * dliteral construct in RFC 5322. Escapes here are NOT omitted. * 4. Comments (if opt.comments is true): Comments are handled specially. In * practice, decoding the comments in To headers appears to be necessary, so * comments are not stripped in the output value. Instead, they are emitted * as if they are a special delimiter. However, all delimiters found within a * comment are returned as if they were a quoted string, so that consumers * ignore delimiters within comments. If ignoring comment text completely is * desired, upon seeing a "(" token, consumers should ignore all tokens until * a matching ")" is found (note that comments can be nested). * 5. RFC 2047 encoded-words (if opts.rfc2047 is true): These are strings which * are the decoded contents of RFC 2047's =?UTF-8?Q?blah?=-style words. * 6. Atoms: Atoms are defined not in the RFC 5322 sense, but rather as the * longest sequence of characters that is neither whitespace nor any of the * special characters above. * * The intended interpretation of the stream of output tokens is that they are * the portions of text which can be safely wrapped in whitespace with no ill * effect. The output tokens are either strings (which represent individual * delimiter tokens) or instances of a class that has a customized .toString() * for output (for quoted strings, atoms, domain literals, and encoded-words). * Checking for a delimiter MUST use the strictly equals operator (===). For * example, the proper way to call this method is as follows: * * for (let token of getHeaderTokens(rest, ";=", opts)) { * if (token === ';') { * // This represents a literal ';' in the string * } else if (token === '=') { * // This represents a literal '=' in the string * } else { * // If a ";" qstring was parsed, we fall through to here! * token = token.toString(); * } * } * * This method does not properly tokenize 5322 in all corner cases; however, * this is equivalent in those corner cases to an older header parsing * algorithm, so the algorithm should be correct for all real-world cases. The * corner cases are as follows: * 1. Quoted-strings and domain literals are parsed even if they are within a * comment block (we effectively treat ctext as containing qstring). * 2. WSP need not be between a qstring and an atom (a"b" produces two tokens, * a and b). This is an error case, though. * 3. Legacy comments as display names: We recognize address fields with * comments, and (a) either drop them if inside addr-spec or (b) preserve * them as part of the display-name if not. If the display-name is empty * while the last comment is not, we assume it's the legacy form above and * take the comment content as the display-name. * * @param {String} value The header value, post charset conversion but * before RFC 2047 decoding, to be parsed. * @param {String} delimiters A set of delimiters to include as individual * tokens. * @param {Object} opts A set of options selecting what to parse. * @param {Boolean} [opts.qstring] If true, recognize quoted strings. * @param {Boolean} [opts.dliteral] If true, recognize domain literals. * @param {Boolean} [opts.comments] If true, recognize comments. * @param {Boolean} [opts.rfc2047] If true, parse and decode RFC 2047 * encoded-words. * @returns {(Token|String)[]} An array of Token objects (which have a toString * method returning their value) or String objects * (representing delimiters). */ function getHeaderTokens(value, delimiters, opts) { // The array of parsed tokens. This method used to be a generator, but it // appears that generators are poorly optimized in current engines, so it was // converted to not be one. let tokenList = []; // Represents a non-delimiter token. function Token(token) { // Replace problematic characters so we don't get unexpected behavior // down the line. These fall into a few categories: // A) "Separator, space" (Zs), // B) "Mark, Nonspacing" (Mn) // C) "Other, Control" (Cc) // D) "Other, Format" (Cf) // Unfortuantely, no support for the needed regexp Unicode property escapes // in our engine. So we need to hand-roll it. Used the regexpu tool for // that: https://mothereff.in/regexpu. // This should be updated regularly, to take into account new additions // to the unicode standard. Last updated July 2019. // For a full list of categories, see http://unicode.org/Public//5.0.0/ucd/UCD.html. // -- case A: /\p{Zs}/u // https://www.fileformat.info/info/unicode/category/Zs/list.htm // https://mothereff.in/regexpu#input=/\p{Zs}/u&unicodePropertyEscape=1 token = token.replace(/[\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]/g, " "); // -- case B: /\p{Mn}/u // https://www.fileformat.info/info/unicode/category/Mn/list.htm // https://mothereff.in/regexpu#input=/\p{Mn}/u&unicodePropertyEscape=1 // This is a bit more complicated as some of them could be "real", so we'll // only remove the ones that are known to show as blank. token = token.replace(/[\u034F\u17B4\u17B5\u180B-\u180D\uFE00-\uFE0F]/g, ""); // \uE0100-\uE01EF need to be written using their surrogate code point pairs // until extended Unicode escapes are supported in regexps. // https://www.fileformat.info/info/unicode/char/e0100/index.htm says \uDB40\uDD00. // https://www.fileformat.info/info/unicode/char/e01ef/index.htm says \uDB40\uDDEF. token = token.replace(/\uDB40[\uDD00-\uDDEF]/g, ""); // -- case C: /\p{Cc}/u, except Tab/LF/CR // https://www.fileformat.info/info/unicode/category/Cc/list.htm // https://mothereff.in/regexpu#input=/\p{Cc}/u&unicodePropertyEscape=1 // eslint-disable-next-line no-control-regex token = token.replace(/(?![\t\n\r])[\0-\x1F\x7F-\x9F]/g, ""); // -- case D: /\p{Cf}/u // https://www.fileformat.info/info/unicode/category/Cf/list.htm // https://mothereff.in/regexpu#input=/\p{Cf}/u&unicodePropertyEscape=1 // Remove all of these except for \u0600-\u0605. token = token.replace(/(?:[\xAD\u061C\u06DD\u070F\u08E2\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\uFEFF\uFFF9-\uFFFB]|\uD804[\uDCBD\uDCCD]|\uD80D[\uDC30-\uDC38]|\uD82F[\uDCA0-\uDCA3]|\uD834[\uDD73-\uDD7A]|\uDB40[\uDC01\uDC20-\uDC7F])/g, ""); // Unescape all quoted pairs. Any trailing \ is deleted. this.token = token.replace(/\\(.?)/g, "$1"); } Token.prototype.toString = function () { return this.token; }; // The start of the current token (e.g., atoms, strings) let tokenStart = undefined; // The set of whitespace characters, as defined by RFC 5322 let wsp = " \t\r\n"; // If we are a domain literal ([]) or a quoted string ("), this is set to the // character to look for at the end. let endQuote = undefined; // The current depth of comments, since they can be nested. A value 0 means we // are not in a comment. let commentDepth = 0; // Iterate over every character one character at a time. let length = value.length; for (let i = 0; i < length; i++) { let ch = value[i]; // If we see a \, no matter what context we are in, ignore the next // character. if (ch == '\\') { i++; continue; } // If we are in a qstring or a dliteral, process the character only if it is // what we are looking for to end the quote. if (endQuote !== undefined) { if (ch == endQuote && ch == '"') { // Quoted strings don't include their delimiters. let text = value.slice(tokenStart + 1, i); // If RFC 2047 is enabled, always decode the qstring. if (opts.rfc2047) text = decodeRFC2047Words(text); tokenList.push(new Token(text)); endQuote = undefined; tokenStart = undefined; } else if (ch == endQuote && ch == ']') { // Domain literals include their delimiters. tokenList.push(new Token(value.slice(tokenStart, i + 1))); endQuote = undefined; tokenStart = undefined; } // Avoid any further processing. continue; } // If we can match the RFC 2047 encoded-word pattern, we need to decode the // entire word or set of words. if (opts.rfc2047 && ch == '=' && i + 1 < value.length && value[i + 1] == '?') { // RFC 2047 tokens separated only by whitespace are conceptually part of // the same output token, so we need to decode them all at once. let encodedWordsRE = /([ \t\r\n]*=\?[^?]*\?[BbQq]\?[^?]*\?=)+/; let result = encodedWordsRE.exec(value.slice(i)); if (result !== null) { // If we were in the middle of a prior token (i.e., something like // foobar=?UTF-8?Q?blah?=), yield the previous segment as a token. if (tokenStart !== undefined) { tokenList.push(new Token(value.slice(tokenStart, i))); tokenStart = undefined; } // Find out how much we need to decode... let encWordsLen = result[0].length; let string = decodeRFC2047Words(value.slice(i, i + encWordsLen), "UTF-8"); // Don't make a new Token variable, since we do not want to unescape the // decoded string. tokenList.push({ toString: function() { return string; }}); // Skip everything we decoded. The -1 is because we don't want to // include the starting character. i += encWordsLen - 1; continue; } // If we are here, then we failed to match the simple 2047 encoded-word // regular expression, despite the fact that it matched the =? at the // beginning. Fall through and treat the text as if we aren't trying to // decode RFC 2047. } // If we reach this point, we're not inside of quoted strings, domain // literals, or RFC 2047 encoded-words. This means that the characters we // parse are potential delimiters (unless we're in comments, where // everything starts to go really wonky). Several things could happen, // depending on the kind of character we read and whether or not we were in // the middle of a token. The three values here tell us what we could need // to do at this point: // tokenIsEnding: The current character is not able to be accumulated to an // atom, so we need to flush the atom if there is one. // tokenIsStarting: The current character could begin an atom (or // anything that requires us to mark the starting point), so we need to save // the location. // isSpecial: The current character is a delimiter that needs to be output. let tokenIsEnding = false, tokenIsStarting = false, isSpecial = false; if (wsp.includes(ch)) { // Whitespace ends current tokens, doesn't emit anything. tokenIsEnding = true; } else if (commentDepth == 0 && delimiters.includes(ch)) { // Delimiters end the current token, and need to be output. They do not // apply within comments. tokenIsEnding = true; isSpecial = true; } else if (opts.qstring && ch == '"') { // Quoted strings end the last token and start a new one. tokenIsEnding = true; tokenIsStarting = true; endQuote = ch; } else if (opts.dliteral && ch == '[') { // Domain literals end the last token and start a new one. tokenIsEnding = true; tokenIsStarting = true; endQuote = ']'; } else if (opts.comments && ch == '(') { // Comments are nested (oh joy). We only really care for the outer // delimiter, though, which also ends the prior token and needs to be // output if the consumer requests it. commentDepth++; if (commentDepth == 1) { tokenIsEnding = true; isSpecial = true; } else { tokenIsStarting = true; } } else if (opts.comments && ch == ')') { // Comments are nested (oh joy). We only really care for the outer // delimiter, though, which also ends the prior token and needs to be // output if the consumer requests it. if (commentDepth > 0) commentDepth--; if (commentDepth == 0) { tokenIsEnding = true; isSpecial = true; } else { tokenIsStarting = true; } } else { // Not a delimiter, whitespace, comment, domain literal, or quoted string. // Must be part of an atom then! tokenIsStarting = true; } // If our analysis concluded that we closed an open token, and there is an // open token, then yield that token. if (tokenIsEnding && tokenStart !== undefined) { tokenList.push(new Token(value.slice(tokenStart, i))); tokenStart = undefined; } // If we need to output a delimiter, do so. if (isSpecial) tokenList.push(ch); // If our analysis concluded that we could open a token, and no token is // opened yet, then start the token. if (tokenIsStarting && tokenStart === undefined) { tokenStart = i; } } // That concludes the loop! If there is a currently open token, close that // token now. if (tokenStart !== undefined) { // Error case: a partially-open quoted string is assumed to have a trailing // " character. if (endQuote == '"') tokenList.push(new Token(value.slice(tokenStart + 1))); else tokenList.push(new Token(value.slice(tokenStart))); } return tokenList; } /** * Convert a header value into UTF-16 strings by attempting to decode as UTF-8 * or another legacy charset. If the header is valid UTF-8, it will be decoded * as UTF-8; if it is not, the fallbackCharset will be attempted instead. * * @param {String} headerValue The header (as a binary string) to attempt * to convert to UTF-16. * @param {String} [fallbackCharset] The optional charset to try if UTF-8 * doesn't work. * @returns {String} The UTF-16 representation of the string above. */ function convert8BitHeader(headerValue, fallbackCharset) { // Only attempt to convert the headerValue if it contains non-ASCII // characters. if (/[\x80-\xff]/.exec(headerValue)) { // First convert the value to a typed-array for TextDecoder. let typedarray = mimeutils.stringToTypedArray(headerValue); // Don't try UTF-8 as fallback (redundant), and don't try UTF-16 or UTF-32 // either, since they radically change header interpretation. // If we have a fallback charset, we want to know if decoding will fail; // otherwise, we want to replace with substitution chars. let hasFallback = fallbackCharset && !fallbackCharset.toLowerCase().startsWith("utf"); let utf8Decoder = new TextDecoder("utf-8", {fatal: hasFallback}); try { headerValue = utf8Decoder.decode(typedarray); } catch (e) { // Failed, try the fallback let decoder = new TextDecoder(fallbackCharset, {fatal: false}); headerValue = decoder.decode(typedarray); } } return headerValue; } /** * Decodes all RFC 2047 encoded-words in the input string. The string does not * necessarily have to contain any such words. This is useful, for example, for * parsing unstructured headers. * * @param {String} headerValue The header which may contain RFC 2047 encoded- * words. * @returns {String} A full UTF-16 string with all encoded words expanded. */ function decodeRFC2047Words(headerValue) { // Unfortunately, many implementations of RFC 2047 encoding are actually wrong // in that they split over-long encoded words without regard for whether or // not the split point is in the middle of a multibyte character. Therefore, // we need to be able to handle these situations gracefully. This is done by // using the decoder in streaming mode so long as the next token is another // 2047 token with the same charset. let lastCharset = '', currentDecoder = undefined; /** * Decode a single RFC 2047 token. This function is inline so that we can * easily close over the lastCharset/currentDecoder variables, needed for * handling bad RFC 2047 productions properly. */ function decode2047Token(token, isLastToken) { let tokenParts = token.split("?"); // If it's obviously not a valid token, return false immediately. if (tokenParts.length != 5 || tokenParts[4] != '=') return false; // The charset parameter is defined in RFC 2231 to be charset or // charset*language. We only care about the charset here, so ignore any // language parameter that gets passed in. let charset = tokenParts[1].split('*', 1)[0]; let encoding = tokenParts[2], text = tokenParts[3]; let buffer; if (encoding == 'B' || encoding == 'b') { // Decode base64. If there's any non-base64 data, treat the string as // an illegal token. if (/[^ A-Za-z0-9+\/=]/.exec(text)) return false; // Decode the string buffer = mimeutils.decode_base64(text, false)[0]; } else if (encoding == 'Q' || encoding == 'q') { // Q encoding here looks a lot like quoted-printable text. The differences // between quoted-printable and this are that quoted-printable allows you // to quote newlines (this doesn't), while this replaces spaces with _. // We can reuse the decode_qp code here, since newlines are already // stripped from the header. There is one edge case that could trigger a // false positive, namely when you have a single = or an = followed by // whitespace at the end of the string. Such an input string is already // malformed to begin with, so stripping the = and following input in that // case should not be an important loss. buffer = mimeutils.decode_qp(text.replace(/_/g, ' '), false)[0]; } else { return false; } // Make the buffer be a typed array for what follows let stringBuffer = buffer; buffer = mimeutils.stringToTypedArray(buffer); // If we cannot reuse the last decoder, flush out whatever remains. var output = ''; if (charset != lastCharset && currentDecoder) { output += currentDecoder.decode(); currentDecoder = null; } // Initialize the decoder for this token. lastCharset = charset; if (!currentDecoder) { try { currentDecoder = new TextDecoder(charset, {fatal: false}); } catch (e) { // We don't recognize the charset, so give up. return false; } } // Convert this token with the buffer. Note the stream parameter--although // RFC 2047 tokens aren't supposed to break in the middle of a multibyte // character, a lot of software messes up and does so because it's hard not // to (see headeremitter.js for exactly how hard!). // We must not stream ISO-2022-JP if the buffer switches back to // the ASCII state, that is, ends in "ESC(B". // Also, we shouldn't do streaming on the last token. let doStreaming; if (isLastToken || (charset.toUpperCase() == "ISO-2022-JP" && stringBuffer.endsWith("\x1B(B"))) doStreaming = {stream: false}; else doStreaming = {stream: true}; return output + currentDecoder.decode(buffer, doStreaming); } // The first step of decoding is to split the string into RFC 2047 and // non-RFC 2047 tokens. RFC 2047 tokens look like the following: // =?charset?c?text?=, where c is one of B, b, Q, and q. The split regex does // some amount of semantic checking, so that malformed RFC 2047 tokens will // get ignored earlier. let components = headerValue.split(/(=\?[^?]*\?[BQbq]\?[^?]*\?=)/); // Find last RFC 2047 token. let lastRFC2047Index = -1; for (let i = 0; i < components.length; i++) { if (components[i].substring(0, 2) == "=?") lastRFC2047Index = i; } for (let i = 0; i < components.length; i++) { if (components[i].substring(0, 2) == "=?") { let decoded = decode2047Token(components[i], i == lastRFC2047Index); if (decoded !== false) { // If 2047 decoding succeeded for this bit, rewrite the original value // with the proper decoding. components[i] = decoded; // We're done processing, so continue to the next link. continue; } } else if (/^[ \t\r\n]*$/.exec(components[i])) { // Whitespace-only tokens get squashed into nothing, so 2047 tokens will // be concatenated together. components[i] = ''; continue; } // If there was stuff left over from decoding the last 2047 token, flush it // out. lastCharset = ''; if (currentDecoder) { components[i] = currentDecoder.decode() + components[i]; currentDecoder = null; } } // After the for loop, we'll have a set of decoded strings. Concatenate them // together to make the return value. return components.join(''); } /////////////////////////////// // Structured field decoders // /////////////////////////////// /** * Extract a list of addresses from a header which matches the RFC 5322 * address-list production, possibly doing RFC 2047 decoding along the way. * * The output of this method is an array of elements corresponding to the * addresses and the groups in the input header. An address is represented by * an object of the form: * { * name: The display name of the address * email: The address of the object * } * while a group is represented by an object of the form: * { * name: The display name of the group * group: An array of address object for members in the group. * } * * @param {String} header The MIME header text to be parsed * @param {Boolean} doRFC2047 If true, decode RFC 2047 parameters found in the * header. * @returns {(Address|Group)[]} An array of the addresses found in the header, * where each element is of the form mentioned * above. */ function parseAddressingHeader(header, doRFC2047) { // Default to true if (doRFC2047 === undefined) doRFC2047 = true; // The final (top-level) results list to append to. let results = []; // Temporary results let addrlist = []; // Build up all of the values let name = '', groupName = '', localPart = '', address = '', comment = ''; // Indicators of current state let inAngle = false, inComment = false, needsSpace = false; let preserveSpace = false; let commentClosed = false; // RFC 5322 §3.4 notes that legacy implementations exist which use a simple // recipient form where the addr-spec appears without the angle brackets, // but includes the name of the recipient in parentheses as a comment // following the addr-spec. While we do not create this format, we still // want to recognize it, though. // Furthermore, despite allowing comments in addresses, RFC 5322 §3.4 notes // that legacy implementations may interpret the comment, and thus it // recommends not to use them. (Also, they may be illegal as per RFC 5321.) // While we do not create address fields with comments, we recognize such // comments during parsing and (a) either drop them if inside addr-spec or // (b) preserve them as part of the display-name if not. // If the display-name is empty while the last comment is not, we assume it's // the legacy form above and take the comment content as the display-name. // // When parsing the address field, we at first do not know whether any // strings belong to the display-name (which may include comments) or to the // local-part of an addr-spec (where we ignore comments) until we find an // '@' or an '<' token. Thus, we collect both variants until the fog lifts, // plus the last comment seen. let lastComment = ''; /** * Add the parsed mailbox object to the address list. * If it's in the legacy form above, correct the display-name. * Also reset any faked flags. * @param {String} displayName display-name as per RFC 5322 * @param {String} addrSpec addr-spec as per RFC 5322 */ function addToAddrList(displayName, addrSpec) { // Keep the local-part quoted if it needs to be. let lp = addrSpec.substring(0, addrSpec.lastIndexOf("@")); if (/[ !()<>\[\]:;@\\,"]/.exec(lp) !== null) { addrSpec = '"' + lp.replace(/([\\"])/g, "\\$1") + '"' + addrSpec.substring(addrSpec.lastIndexOf("@")); } // Replace all whitespace characters with a single whitespace, // to avoid consecutive whitespace and also to normalize tabs and newlines. displayName = displayName.replace(/\s+/g, " ").trim(); if (displayName === '' && lastComment !== '') { // Take last comment content as the display-name. let offset = lastComment[0] === ' ' ? 2 : 1; displayName = lastComment.substr(offset, lastComment.length - offset - 1); } if (displayName !== '' || addrSpec !== '') { addrlist.push({name: displayName, email: addrSpec}); } // Clear pending flags and variables. name = localPart = address = lastComment = ''; inAngle = inComment = needsSpace = false; } // Main parsing loop for (let token of getHeaderTokens(header, ":,;<>@", {qstring: true, comments: true, dliteral: true, rfc2047: doRFC2047})) { if (token === ':') { groupName = name; name = ''; localPart = ''; // If we had prior email address results, commit them to the top-level. if (addrlist.length > 0) results = results.concat(addrlist); addrlist = []; } else if (token === '<') { if (inAngle) { // Interpret the address we were parsing as a name. if (address.length > 0) { name = address; } localPart = address = ''; } else { inAngle = true; } } else if (token === '>') { inAngle = false; // Forget addr-spec comments. lastComment = ''; } else if (token === '(') { inComment = true; // The needsSpace flag may not always be set even if it should be, // e.g. for a comment behind an angle-addr. // Also, we need to restore the needsSpace flag if we ignore the comment. preserveSpace = needsSpace; if (!needsSpace) needsSpace = name !== '' && name.substr(-1) !== ' '; comment = needsSpace ? ' (' : '('; commentClosed = false; } else if (token === ')') { inComment = false; comment += ')'; lastComment = comment; // The comment may be part of the name, but not of the local-part. // Enforce a space behind the comment only when not ignoring it. if (inAngle) { needsSpace = preserveSpace; } else { name += comment; needsSpace = true; } commentClosed = true; continue; } else if (token === '@') { // An @ means we see an email address. If we're not within <> brackets, // then we just parsed an email address instead of a display name. Empty // out the display name for the current production. if (!inAngle) { address = localPart; name = ''; localPart = ''; // The remainder of this mailbox is part of an addr-spec. inAngle = true; } address += '@'; } else if (token === ',') { // A comma ends the current name. If we have something that's kind of a // name, add it to the result list. If we don't, then our input looks like // To: , , -> don't bother adding an empty entry. addToAddrList(name, address); } else if (token === ';') { // Add pending name to the list addToAddrList(name, address); // If no group name was found, treat the ';' as a ','. In any case, we // need to copy the results of addrlist into either a new group object or // the main list. if (groupName === '') { results = results.concat(addrlist); } else { results.push({ name: groupName, group: addrlist }); } // ... and reset every other variable. addrlist = []; groupName = ''; } else { // This is either comment content, a quoted-string, or some span of // dots and atoms. // Ignore the needs space if we're a "close" delimiter token. let spacedToken = token; if (needsSpace && (token.toString().length > 0) && token.toString()[0] != ".") spacedToken = ' ' + spacedToken; // Which field do we add this data to? if (inComment) { comment += spacedToken; } else if (inAngle) { address += spacedToken; } else { name += spacedToken; // Never add a space to the local-part, if we just ignored a comment. if (commentClosed) { localPart += token; commentClosed = false; } else { localPart += spacedToken; } } // We need space for the next token if we aren't some kind of comment or // . delimiter. needsSpace = token.toString()[0] != '.'; // The fall-through case after this resets needsSpace to false, and we // don't want that! continue; } // If we just parsed a delimiter, we don't need any space for the next // token. needsSpace = false; } // If we're missing the final ';' of a group, assume it was present. Also, add // in the details of any email/address that we previously saw. addToAddrList(name, address); if (groupName !== '') { results.push({name: groupName, group: addrlist}); addrlist = []; } // Add the current address list build-up to the list of addresses, and return // the whole array to the caller. return results.concat(addrlist); } /** * Extract parameters from a header which is a series of ;-separated * attribute=value tokens. * * @param {String} headerValue The MIME header value to parse. * @param {Boolean} doRFC2047 If true, decode RFC 2047 encoded-words. * @param {Boolean} doRFC2231 If true, decode RFC 2231 encoded parameters. * @return {Map(String -> String)} A map of parameter names to parameter values. * The property preSemi is set to the token that * precedes the first semicolon. */ function parseParameterHeader(headerValue, doRFC2047, doRFC2231) { // The basic syntax of headerValue is token [; token = token-or-qstring]* // Copying more or less liberally from nsMIMEHeaderParamImpl: // The first token is the text to the first whitespace or semicolon. var semi = headerValue.indexOf(";"); if (semi < 0) { var start = headerValue; var rest = ''; } else { var start = headerValue.substring(0, semi); var rest = headerValue.substring(semi); // Include the semicolon } // Strip start to be . start = start.trim().split(/[ \t\r\n]/)[0]; // Decode the the parameter tokens. let opts = {qstring: true, rfc2047: doRFC2047}; // Name is the name of the parameter, inName is true iff we don't have a name // yet. let name = '', inName = true; // Matches is a list of [name, value] pairs, where we found something that // looks like name=value in the input string. let matches = []; for (let token of getHeaderTokens(rest, ";=", opts)) { if (token === ';') { // If we didn't find a name yet (we have ... tokenA; tokenB), push the // name with an empty token instead. if (name != '' && inName == false) matches.push([name, '']); name = ''; inName = true; } else if (token === '=') { inName = false; } else if (inName && name == '') { name = token.toString(); } else if (!inName && name != '') { token = token.toString(); // RFC 2231 doesn't make it clear if %-encoding is supposed to happen // within a quoted string, but this is very much required in practice. If // it ends with a '*', then the string is an extended-value, which means // that its value may be %-encoded. if (doRFC2231 && name.endsWith('*')) { token = token.replace(/%([0-9A-Fa-f]{2})/g, function percent_deencode(match, hexchars) { return String.fromCharCode(parseInt(hexchars, 16)); }); } matches.push([name, token]); // Clear the name, so we ignore anything afterwards. name = ''; } else if (inName) { // We have ...; tokenA tokenB ... -> ignore both tokens name = ''; // Error recovery, ignore this one } } // If we have a leftover ...; tokenA, push the tokenA if (name != '' && inName == false) matches.push([name, '']); // Now matches holds the parameters, so clean up for RFC 2231. There are three // cases: param=val, param*=us-ascii'en-US'blah, and param*n= variants. The // order of preference is to pick the middle, then the last, then the first. // Note that we already unpacked %-encoded values. // simpleValues is just a straight parameter -> value map. // charsetValues is the parameter -> value map, although values are stored // before charset decoding happens. // continuationValues maps parameter -> array of values, with extra properties // valid (if we decided we couldn't do anything anymore) and hasCharset (which // records if we need to decode the charset parameter or not). var simpleValues = new Map(), charsetValues = new Map(), continuationValues = new Map(); for (let pair of matches) { let name = pair[0]; let value = pair[1]; // Get first index, not last index, so we match param*0*= like param*0=. let star = name.indexOf('*'); if (star == -1) { // This is the case of param=val. Select the first value here, if there // are multiple ones. if (!simpleValues.has(name)) simpleValues.set(name, value); } else if (star == name.length - 1) { // This is the case of param*=us-ascii'en-US'blah. name = name.substring(0, star); // Again, select only the first value here. if (!charsetValues.has(name)) charsetValues.set(name, value); } else { // This is the case of param*0= or param*0*=. let param = name.substring(0, star); let entry = continuationValues.get(param); // Did we previously find this one to be bungled? Then ignore it. if (continuationValues.has(param) && !entry.valid) continue; // If we haven't seen it yet, set up entry already. Note that entries are // not straight string values but rather [valid, hasCharset, param0, ... ] if (!continuationValues.has(param)) { entry = new Array(); entry.valid = true; entry.hasCharset = undefined; continuationValues.set(param, entry); } // When the string ends in *, we need to charset decoding. // Note that the star is only meaningful for the *0*= case. let lastStar = name[name.length - 1] == '*'; let number = name.substring(star + 1, name.length - (lastStar ? 1 : 0)); if (number == '0') entry.hasCharset = lastStar; // Is the continuation number illegal? else if ((number[0] == '0' && number != '0') || !(/^[0-9]+$/.test(number))) { entry.valid = false; continue; } // Normalize to an integer number = parseInt(number, 10); // Is this a repeat? If so, bail. if (entry[number] !== undefined) { entry.valid = false; continue; } // Set the value for this continuation index. JS's magic array setter will // expand the array if necessary. entry[number] = value; } } // Build the actual parameter array from the parsed values var values = new Map(); // Simple values have lowest priority, so just add everything into the result // now. for (let pair of simpleValues) { values.set(pair[0], pair[1]); } if (doRFC2231) { // Continuation values come next for (let pair of continuationValues) { let name = pair[0]; let entry = pair[1]; // If we never saw a param*0= or param*0*= value, then we can't do any // reasoning about what it looks like, so bail out now. if (entry.hasCharset === undefined) continue; // Use as many entries in the array as are valid--if we are missing an // entry, stop there. let valid = true; for (var i = 0; valid && i < entry.length; i++) if (entry[i] === undefined) valid = false; // Concatenate as many parameters as are valid. If we need to decode thec // charset, do so now. var value = entry.slice(0, i).join(''); if (entry.hasCharset) { try { value = decode2231Value(value); } catch (e) { // Bad charset, don't add anything. continue; } } // Finally, add this to the output array. values.set(name, value); } // Highest priority is the charset conversion. for (let pair of charsetValues) { try { values.set(pair[0], decode2231Value(pair[1])); } catch (e) { // Bad charset, don't add anything. } } } // Finally, return the values computed above. values.preSemi = start; return values; } /** * Convert a RFC 2231-encoded string parameter into a Unicode version of the * string. This assumes that percent-decoding has already been applied. * * @param {String} value The RFC 2231-encoded string to decode. * @return The Unicode version of the string. */ function decode2231Value(value) { let quote1 = value.indexOf("'"); let quote2 = quote1 >= 0 ? value.indexOf("'", quote1 + 1) : -1; let charset = (quote1 >= 0 ? value.substring(0, quote1) : ""); // It turns out that the language isn't useful anywhere in our codebase for // the present time, so we will safely ignore it. //var language = (quote2 >= 0 ? value.substring(quote1 + 2, quote2) : ""); value = value.substring(Math.max(quote1, quote2) + 1); // Convert the value into a typed array for decoding let typedarray = mimeutils.stringToTypedArray(value); // Decode the charset. If the charset isn't found, we throw an error. Try to // fallback in that case. return new TextDecoder(charset, {fatal: true}) .decode(typedarray, {stream: false}); } // This is a map of known timezone abbreviations, for fallback in obsolete Date // productions. var kKnownTZs = { // The following timezones are explicitly listed in RFC 5322. "UT": "+0000", "GMT": "+0000", "EST": "-0500", "EDT": "-0400", "CST": "-0600", "CDT": "-0500", "MST": "-0700", "MDT": "-0600", "PST": "-0800", "PDT": "-0700", // The following are time zones copied from NSPR's prtime.c "AST": "-0400", // Atlantic Standard Time "NST": "-0330", // Newfoundland Standard Time "BST": "+0100", // British Summer Time "MET": "+0100", // Middle Europe Time "EET": "+0200", // Eastern Europe Time "JST": "+0900" // Japan Standard Time }; /** * Parse a header that contains a date-time definition according to RFC 5322. * The result is a JS date object with the same timestamp as the header. * * The dates returned by this parser cannot be reliably converted back into the * original header for two reasons. First, JS date objects cannot retain the * timezone information they were initialized with, so reserializing a date * header would necessarily produce a date in either the current timezone or in * UTC. Second, JS dates measure time as seconds elapsed from the POSIX epoch * excluding leap seconds. Any timestamp containing a leap second is instead * converted into one that represents the next second. * * Dates that do not match the RFC 5322 production are instead attempted to * parse using the Date.parse function. The strings that are accepted by * Date.parse are not fully defined by the standard, but most implementations * should accept strings that look rather close to RFC 5322 strings. Truly * invalid dates produce a formulation that results in an invalid date, * detectable by having its .getTime() method return NaN. * * @param {String} header The MIME header value to parse. * @returns {Date} The date contained within the header, as described * above. */ function parseDateHeader(header) { let tokens = getHeaderTokens(header, ",:", {}).map(x => x.toString()); // What does a Date header look like? In practice, most date headers devolve // into Date: [dow ,] dom mon year hh:mm:ss tzoff [(abbrev)], with the day of // week mostly present and the timezone abbreviation mostly absent. // First, ignore the day-of-the-week if present. This would be the first two // tokens. if (tokens.length > 1 && tokens[1] === ',') tokens = tokens.slice(2); // If there are too few tokens, the date is obviously invalid. if (tokens.length < 8) return new Date(NaN); // Save off the numeric tokens let day = parseInt(tokens[0]); // month is tokens[1] let year = parseInt(tokens[2]); let hours = parseInt(tokens[3]); // tokens[4] === ':' let minutes = parseInt(tokens[5]); // tokens[6] === ':' let seconds = parseInt(tokens[7]); // Compute the month. Check only the first three digits for equality; this // allows us to accept, e.g., "January" in lieu of "Jan." let month = mimeutils.kMonthNames.indexOf(tokens[1].slice(0, 3)); // If the month name is not recognized, make the result illegal. if (month < 0) month = NaN; // Compute the full year if it's only 2 digits. RFC 5322 states that the // cutoff is 50 instead of 70. if (year < 100) { year += year < 50 ? 2000 : 1900; } // Compute the timezone offset. If it's not in the form ±hhmm, convert it to // that form. let tzoffset = tokens[8]; if (tzoffset in kKnownTZs) tzoffset = kKnownTZs[tzoffset]; let decompose = /^([+-])(\d\d)(\d\d)$/.exec(tzoffset); // Unknown? Make it +0000 if (decompose === null) decompose = ['+0000', '+', '00', '00']; let tzOffsetInMin = parseInt(decompose[2]) * 60 + parseInt(decompose[3]); if (decompose[1] == '-') tzOffsetInMin = -tzOffsetInMin; // How do we make the date at this point? Well, the JS date's constructor // builds the time in terms of the local timezone. To account for the offset // properly, we need to build in UTC. let finalDate = new Date(Date.UTC(year, month, day, hours, minutes, seconds) - tzOffsetInMin * 60 * 1000); // Suppose our header was mangled and we couldn't read it--some of the fields // became undefined. In that case, the date would become invalid, and the // indication that it is so is that the underlying number is a NaN. In that // scenario, we could build attempt to use JS Date parsing as a last-ditch // attempt. But it's not clear that such messages really exist in practice, // and the valid formats for Date in ES6 are unspecified. return finalDate; } //////////////////////////////////////// // Structured header decoding support // //////////////////////////////////////// // Load the default structured decoders var structuredDecoders = new Map(); var structuredHeaders = require('./structuredHeaders'); var preferredSpellings = structuredHeaders.spellings; var forbiddenHeaders = new Set(); for (let pair of structuredHeaders.decoders) { addStructuredDecoder(pair[0], pair[1]); forbiddenHeaders.add(pair[0].toLowerCase()); } /** * Use an already-registered structured decoder to parse the value of the header * into a structured representation. * * As this method is designed to be used for the internal MIME Parser to convert * the raw header values to well-structured values, value is intended to be an * array consisting of all occurences of the header in order. However, for ease * of use by other callers, it can also be treated as a string. * * If the decoder for the header is not found, an exception will be thrown. * * A large set of headers have pre-defined structured decoders; these decoders * cannot be overrided with addStructuredDecoder, as doing so could prevent the * MIME or message parsers from working properly. The pre-defined structured * headers break down into five clases of results, plus some ad-hoc * representations. They are: * * Addressing headers (results are the same as parseAddressingHeader): * - Approved * - Bcc * - Cc * - Delivered-To * - Disposition-Notification-To * - From * - Mail-Reply-To * - Mail-Followup-To * - Reply-To * - Resent-Bcc * - Resent-Cc * - Resent-From * - Resent-Reply-To * - Resent-Sender * - Resent-To * - Return-Receipt-To * - Sender * - To * * Date headers (results are the same as parseDateHeader): * - Date * - Expires * - Injection-Date * - NNTP-Posting-Date * - Resent-Date * * References headers (results are the same as parseReferencesHeader): * - (TODO: Parsing support for these headers is currently unsupported) * * Message-ID headers (results are the first entry of the result of * parseReferencesHeader): * - (TODO: Parsing support for these headers is currently unsupported) * * Unstructured headers (results are merely decoded according to RFC 2047): * - Comments * - Content-Description * - Keywords * - Subject * * The ad-hoc headers and their resulting formats are as follows: * Content-Type: returns a JS Map of parameter names (in lower case) to their * values, along with the following extra properties defined on the map: * - mediatype: the type to the left of '/' (e.g., 'text', 'message') * - subtype: the type to the right of '/' (e.g., 'plain', 'rfc822') * - type: the full typename (e.g., 'text/plain') * RFC 2047 and RFC 2231 decoding is applied where appropriate. The values of * the type, mediatype, and subtype attributes are all normalized to lower-case, * as are the names of all parameters. * * Content-Transfer-Encoding: the first value is converted to lower-case. * * @param {String} header The name of the header of the values. * @param {String|Array} value The value(s) of the headers, after charset * conversion (if any) has been applied. If it is * an array, the headers are listed in the order * they appear in the message. * @returns {Object} A structured representation of the header values. */ function parseStructuredHeader(header, value) { // Enforce that the parameter is an array. If it's a string, make it a // 1-element array. if (typeof value === "string" || value instanceof String) value = [value]; if (!Array.isArray(value)) throw new TypeError("Header value is not an array: " + value); // Lookup the header in our decoders; if present, use that to decode the // header. let lowerHeader = header.toLowerCase(); if (structuredDecoders.has(lowerHeader)) { return structuredDecoders.get(lowerHeader).call(headerparser, value); } // If not present, throw an exception. throw new Error("Unknown structured header: " + header); } /** * Add a custom structured MIME decoder to the set of known decoders. These * decoders are used for {@link parseStructuredHeader} and similar functions to * encode richer, more structured values instead of relying on string * representations everywhere. * * Structured decoders are functions which take in a single parameter consisting * of an array of the string values of the header, in order that they appear in * the message. These headers have had the charset conversion (if necessary) * applied to them already. The this parameter of the function is set to be the * jsmime.headerparser module. * * There is a large set of structured decoders built-in to the jsmime library * already. As these headers are fundamental to the workings of jsmime, * attempting to replace them with a custom version will instead produce an * exception. * * @param {String} header The header name (in any case) * for which the decoder will be * used. * @param {Function(String[] -> Object)} decoder The structured decoder * function. */ function addStructuredDecoder(header, decoder) { let lowerHeader = header.toLowerCase(); if (forbiddenHeaders.has(lowerHeader)) throw new Error("Cannot override header: " + header); structuredDecoders.set(lowerHeader, decoder); if (!preferredSpellings.has(lowerHeader)) preferredSpellings.set(lowerHeader, header); } headerparser.addStructuredDecoder = addStructuredDecoder; headerparser.convert8BitHeader = convert8BitHeader; headerparser.decodeRFC2047Words = decodeRFC2047Words; headerparser.getHeaderTokens = getHeaderTokens; headerparser.parseAddressingHeader = parseAddressingHeader; headerparser.parseDateHeader = parseDateHeader; headerparser.parseParameterHeader = parseParameterHeader; headerparser.parseStructuredHeader = parseStructuredHeader; return Object.freeze(headerparser); }); //////////////////////////////////////////////////////////////////////////////// // JavaScript Raw MIME Parser // //////////////////////////////////////////////////////////////////////////////// /** * The parser implemented in this file produces a MIME part tree for a given * input message via a streaming callback interface. It does not, by itself, * understand concepts like attachments (hence the term 'Raw'); the consumer * must translate output into such a format. * * Charsets: * The MIME specifications permit a single message to contain multiple charsets * (or perhaps none) as raw octets. As JavaScript strings are implicitly * implemented in UTF-16, it is possible that some engines will attempt to * convert these strings using an incorrect charset or simply fail to convert * them at all. This parser assumes that its input is in the form of a "binary * string", a string that uses only the first 256 characters of Unicode to * represent the individual octets. To verify that charsets are not getting * mangled elsewhere in the pipeline, the auxiliary test file test/data/charsets * can be used. * * This parser attempts to hide the charset details from clients as much as * possible. The resulting values of structured headers are always converted * into proper Unicode strings before being exposed to clients; getting at the * raw binary string data can only be done via getRawHeader. The .charset * parameter on header objects, if changed, changes the fallback charset used * for headers. It is initialized to the presumed charset of the corresponding * part, taking into account the charset and force-charset options of the * parser. Body parts are only converted into Unicode strings if the strformat * option is set to Unicode. Even then, only the bodies of parts with a media * type of text are converted to Unicode strings using available charset data; * other parts are retained as Uint8Array objects. * * Part numbering: * Since the output is a streaming format, individual parts are identified by a * numbering scheme. The intent of the numbering scheme for parts is to comply * with the part numbers as dictated by RFC 3501 as much possible; however, * that scheme does have several edge cases which would, if strictly followed, * make it impossible to refer to certain parts of the message. In addition, we * wish to make it possible to refer to parts which are not discoverable in the * original MIME tree but are still viewable as parts. The part numbering * scheme is as follows: * - Individual sections of a multipart/* body are numbered in increasing order * sequentially, starting from 1. Note that the prologue and the epilogue of * a multipart/* body are not considered entities and are therefore not * included in the part numbering scheme (there is no way to refer to them). * - The numbers of multipart/* parts are separated by `.' characters. * - The outermost message is referred to by use of the empty string. * --> The following segments are not accounted for by IMAP part numbering. <-- * - The body of any message/rfc822 or similar part is distinguished from the * message part as a whole by appending a `$' character. This does not apply * to the outermost message/rfc822 envelope. */ def('mimeparser', function(require) { "use strict"; var mimeutils = require('./mimeutils'); var headerparser = require('./headerparser'); var spellings = require('./structuredHeaders').spellings; /** * An object that represents the structured MIME headers for a message. * * This class is primarily used as the 'headers' parameter in the startPart * callback on handlers for MimeParser. As such, it is designed to do the right * thing in common cases as much as possible, with some advanced customization * possible for clients that need such flexibility. * * In a nutshell, this class stores the raw headers as an internal Map. The * structured headers are not computed until they are actually used, which means * that potentially expensive structuring (e.g., doing manual DKIM validation) * can be performed as a structured decoder without impeding performance for * those who just want a few common headers. * * The outer API of this class is intended to be similar to a read-only Map * object (complete with iterability support), with a few extra properties to * represent things that are hard to determine properly from headers. The keys * used are "preferred spellings" of the headers, although the get and has * methods will accept header parameters of any case. Preferred spellings are * derived from the name passed to addStructuredDecoder/addStructuredEncoder; if * no structured decoder has been registered, then the name capitalizes the * first letter of every word in the header name. * * Extra properties compared to a Map object are: * - charset: This field represents the assumed charset of the associated MIME * body. It is prefilled using a combination of the charset and force-charset * options on the associated MimeParser instance as well as attempting to find * a charset parameter in the Content-Type header. * * If the force-charset option is false, the charset is guessed first using * the Content-Type header's charset parameter, falling back to the charset * option if it is present. If the force-charset option is true, the charset * is initially set to the charset option. This initial guessed value can be * overridden at any time by simply setting the field on this object. * * The charset is better reflected as a parameter of the body rather than the * headers; this is ultimately the charset parameter that will be used if a * body part is being converted to a Unicode strformat. Headers are converted * using headerparser.convert8BitHeader, and this field is used as the * fallbackCharset parameter, which will always to attempt to decode as UTF-8 * first (in accordance with RFC 6532) and will refuse to decode as UTF-16 or * UTF-32, as ASCII is not a subset of those charsets. * * - rawHeaderText: This read-only field contains the original header text from * which headers were parsed, preserving case and whitespace (including * alternate line endings instead of CRLF) exactly. If the header text begins * with the mbox delimiter (i.e., a line that begins with "From "), then that * is excluded from the rawHeaderText value and is not reflected anywhere in * this object. * * - contentType: This field contains the structured representation of the * Content-Type header, if it is present. If it is not present, it is set to * the structured representation of the default Content-Type for a part (as * this data is not easily guessed given only MIME tree events). * * The constructor for these objects is not externally exported, and thus they * can only be created via MimeParser. * * @param rawHeaderText {BinaryString} The contents of the MIME headers to be * parsed. * @param options {Object} Options for the header parser. * @param options.stripcontinuations {Boolean} If true, elide CRLFs from the * raw header output. */ function StructuredHeaders(rawHeaderText, options) { // An individual header is terminated by a CRLF, except if the CRLF is // followed by a SP or TAB. Use negative lookahead to capture the latter case, // and don't capture the strings or else split results get nasty. let values = rawHeaderText.split(/(?:\r\n|\n)(?![ \t])|\r(?![ \t\n])/); // Ignore the first "header" if it begins with an mbox delimiter if (values.length > 0 && values[0].substring(0, 5) == "From ") { values.shift(); // Elide the mbox delimiter from this._headerData if (values.length == 0) rawHeaderText = ''; else rawHeaderText = rawHeaderText.substring(rawHeaderText.indexOf(values[0])); } let headers = new Map(); for (let i = 0; i < values.length; i++) { // Look for a colon. If it's not present, this header line is malformed, // perhaps by premature EOF or similar. let colon = values[i].indexOf(":"); if (colon >= 0) { var header = values[i].substring(0, colon); var val = values[i].substring(colon + 1).trim(); if (options.stripcontinuations) val = val.replace(/[\r\n]/g, ''); } else { var header = values[i]; var val = ''; } // Canonicalize the header in lower-case form. header = header.trim().toLowerCase(); // Omit "empty" headers if (header == '') continue; // We keep an array of values for each header, since a given header may be // repeated multiple times. if (headers.has(header)) { headers.get(header).push(val); } else { headers.set(header, [val]); } } /** * A map of header names to arrays of raw values found in this header block. * @private */ this._rawHeaders = headers; /** * Cached results of structured header parsing. * @private */ this._cachedHeaders = new Map(); Object.defineProperty(this, "rawHeaderText", {get: function () { return rawHeaderText; }}); Object.defineProperty(this, "size", {get: function () { return this._rawHeaders.size; }}); Object.defineProperty(this, "charset", { get: function () { return this._charset; }, set: function (value) { this._charset = value; // Clear the cached headers, since this could change their values this._cachedHeaders.clear(); } }); // Default to the charset, until the message parser overrides us. if ('charset' in options) this._charset = options.charset; else this._charset = null; // If we have a Content-Type header, set contentType to return the structured // representation. We don't set the value off the bat, since we want to let // someone who changes the charset affect the values of 8-bit parameters. Object.defineProperty(this, "contentType", { configurable: true, get: function () { return this.get('Content-Type'); } }); } /** * Get a raw header. * * Raw headers are an array of the header values, listed in order that they were * specified in the header block, and without any attempt to convert charsets or * apply RFC 2047 decoding. For example, in the following message (where the * is meant to represent binary-octets): * * X-Header: Value A * X-Header: Vlue B * Header2: Q * * the result of calling getRawHeader('X-Header') or getRawHeader('x-header') * would be ['Value A', 'V\xC3\xA5lue B'] and the result of * getRawHeader('Header2') would be ['Q']. * * @param headerName {String} The header name for which to get header values. * @returns {BinaryString[]} The raw header values (with no charset conversion * applied). */ StructuredHeaders.prototype.getRawHeader = function (headerName) { return this._rawHeaders.get(headerName.toLowerCase()); }; /** * Retrieve a structured version of the header. * * If there is a registered structured decoder (registration happens via * headerparser.addStructuredDecoder), then the result of calling that decoder * on the charset-corrected version of the header is returned. Otherwise, the * values are charset-corrected and RFC 2047 decoding is applied as if the * header were an unstructured header. * * A substantial set of headers have pre-registed structured decoders, which, in * some cases, are unable to be overridden due to their importance in the * functioning of the parser code itself. * * @param headerName {String} The header name for which to get the header value. * @returns The structured header value of the output. */ StructuredHeaders.prototype.get = function (headerName) { // Normalize the header name to lower case headerName = headerName.toLowerCase(); // First, check the cache for the header value if (this._cachedHeaders.has(headerName)) return this._cachedHeaders.get(headerName); // Not cached? Grab it [propagating lack of header to caller] let headerValue = this._rawHeaders.get(headerName); if (headerValue === undefined) return headerValue; // Convert the header to Unicode let charset = this.charset; headerValue = headerValue.map(function (value) { return headerparser.convert8BitHeader(value, charset); }); // If there is a structured decoder, use that; otherwise, assume that the // header is unstructured and only do RFC 2047 conversion let structured; try { structured = headerparser.parseStructuredHeader(headerName, headerValue); } catch (e) { structured = headerValue.map(function (value) { return headerparser.decodeRFC2047Words(value); }); } // Cache the result and return it this._cachedHeaders.set(headerName, structured); return structured; }; /** * Check if the message has the given header. * * @param headerName {String} The header name for which to get the header value. * @returns {Boolean} True if the header is present in this header block. */ StructuredHeaders.prototype.has = function (headerName) { // Check for presence in the raw headers instead of cached headers. return this._rawHeaders.has(headerName.toLowerCase()); }; // Make a custom iterator. Presently, support for Symbol isn't yet present in // SpiderMonkey (or V8 for that matter), so type-pun the name for now. var JS_HAS_SYMBOLS = typeof Symbol === "function"; var ITERATOR_SYMBOL = JS_HAS_SYMBOLS ? Symbol.iterator : "@@iterator"; /** * An equivalent of Map.@@iterator, applied to the structured header * representations. This is the function that makes * for (let [header, value] of headers) work properly. */ StructuredHeaders.prototype[ITERATOR_SYMBOL] = function*() { // Iterate over all the raw headers, and use the cached headers to retrieve // them. for (let headerName of this.keys()) { yield [headerName, this.get(headerName)]; } }; /** * An equivalent of Map.forEach, applied to the structured header * representations. * * @param callback {Function(value, name, headers)} The callback to call for * each header/value combo. * @param thisarg {Object} The parameter that will be * the |this| of the callback. */ StructuredHeaders.prototype.forEach = function (callback, thisarg) { for (let [header, value] of this) { callback.call(thisarg, value, header, this); } }; /** * An equivalent of Map.entries, applied to the structured header * representations. */ StructuredHeaders.prototype.entries = StructuredHeaders.prototype[Symbol.iterator]; /// This function maps lower case names to a pseudo-preferred spelling. function capitalize(headerName) { return headerName.replace(/\b[a-z]/g, function (match) { return match.toUpperCase(); }); } /** * An equivalent of Map.keys, applied to the structured header representations. */ StructuredHeaders.prototype.keys = function*() { for (let name of this._rawHeaders.keys()) { yield spellings.get(name) || capitalize(name); } }; /** * An equivalent of Map.values, applied to the structured header * representations. */ StructuredHeaders.prototype.values = function* () { for (let [, value] of this) { yield value; } }; /** * A MIME parser. * * The inputs to the constructor consist of a callback object which receives * information about the output data and an optional object containing the * settings for the parser. * * The first parameter, emitter, is an object which contains several callbacks. * Note that any and all of these methods are optional; the parser will not * crash if one is missing. The callbacks are as follows: * startMessage() * Called when the stream to be parsed has started delivering data. This * will be called exactly once, before any other call. * endMessage() * Called after all data has been delivered and the message parsing has * been completed. This will be called exactly once, after any other call. * startPart(string partNum, object headers) * Called after the headers for a body part (including the top-level * message) have been parsed. The first parameter is the part number (see * the discussion on part numbering). The second parameter is an instance * of StructuredHeaders that represents all of the headers for the part. * endPart(string partNum) * Called after all of the data for a body part (including sub-parts) has * been parsed. The first parameter is the part number. * deliverPartData(string partNum, {string,typedarray} data) * Called when some data for a body part has been delivered. The first * parameter is the part number. The second parameter is the data which is * being delivered; the exact type of this data depends on the options * used. Note that data is only delivered for leaf body parts. * * The second parameter, options, is an optional object containing the options * for the parser. The following are the options that the parser may use: * pruneat: [default=""] * Treat the message as starting at the given part number, so that no parts * above are returned. * bodyformat: one of {none, raw, nodecode, decode} [default=nodecode] * How to return the bodies of parts: * none: no part data is returned * raw: the body of the part is passed through raw * nodecode: the body is passed through without decoding QP/Base64 * decode: quoted-printable and base64 are fully decoded * strformat: one of {binarystring, unicode, typedarray} [default=binarystring] * How to treat output strings: * binarystring: Data is a JS string with chars in the range [\x00-\xff] * unicode: Data for text parts is converted to UTF-16; data for other * parts is a typed array buffer, akin to typedarray. * typedarray: Data is a JS typed array buffer * charset: [default=""] * What charset to assume if no charset information is explicitly provided. * This only matters if strformat is unicode. See above note on charsets * for more details. * force-charset: [default=false] * If true, this coerces all types to use the charset option, even if the * message specifies a different content-type. * stripcontinuations: [default=true] * If true, then the newlines in headers are removed in the returned * header objects. * onerror: [default = nop-function] * An error function that is called if an emitter callback throws an error. * By default, such errors are swallowed by the parser. If you want the * parser itself to throw an error, rethrow it via the onerror function. */ function MimeParser(emitter, options) { /// The actual emitter this._emitter = emitter; /// Options for the parser (those listed here are defaults) this._options = { pruneat: "", bodyformat: "nodecode", strformat: "binarystring", stripcontinuations: true, charset: "", "force-charset": false, onerror: function swallow(error) {} }; // Load the options as a copy here (prevents people from changing on the fly). if (options) for (var opt in options) { this._options[opt] = options[opt]; } // Ensure that the error function is in fact a function if (typeof this._options.onerror != "function") throw new Exception("onerror callback must be a function"); // Reset the parser this.resetParser(); } /** * Resets the parser to read a new message. This method need not be called * immediately after construction. */ MimeParser.prototype.resetParser = function () { /// Current parser state this._state = PARSING_HEADERS; /// Input data that needs to be held for buffer conditioning this._holdData = ''; /// Complete collection of headers (also used to accumulate _headerData) this._headerData = ''; /// Whether or not emitter.startMessage has been called this._triggeredCall = false; /// Splitting input this._splitRegex = this._handleSplit = undefined; /// Subparsing this._subparser = this._subPartNum = undefined; /// Data that has yet to be consumed by _convertData this._savedBuffer = ''; /// Convert data this._convertData = undefined; /// String decoder this._decoder = undefined; }; /** * Deliver a buffer of data to the parser. * * @param buffer {BinaryString} The raw data to add to the message. */ MimeParser.prototype.deliverData = function (buffer) { // In ideal circumstances, we'd like to parse the message all at once. In // reality, though, data will be coming to us in packets. To keep the amount // of saved state low, we want to make basic guarantees about how packets get // delivered. Our basic model is a twist on line-buffering, as the format of // MIME and messages make it hard to not do so: we can handle multiple lines // at once. To ensure this, we start by conditioning the packet by // withholding data to make sure that the internal deliveries have the // guarantees. This implies that we need to do the following steps: // 1. We don't know if a `\r' comes from `\r\n' or the old mac line ending // until we see the next character. So withhold the last `\r'. // 2. Ensure that every packet ends on a newline. So scan for the end of the // line and withhold until the \r\n comes through. // [Note that this means that an input message that uses \r line endings and // is being passed to us via a line-buffered input is going to have most of // its data being withhold until the next buffer. Since \r is so uncommon of // a line ending in modern times, this is acceptable lossage.] // 3. Eliminate empty packets. // Add in previously saved data if (this._holdData) { buffer = this._holdData + buffer; this._holdData = ''; } // Condition the input, so that we get the multiline-buffering mentioned in // the above comment. if (buffer.length > 0) { [buffer, this._holdData] = conditionToEndOnCRLF(buffer); } // Ignore 0-length buffers. if (buffer.length == 0) return; // Signal the beginning, if we haven't done so. if (!this._triggeredCall) { this._callEmitter("startMessage"); this._triggeredCall = true; } // Finally, send it the internal parser. this._dispatchData("", buffer, true); } /** * Ensure that a set of data always ends in an end-of-line character. * * @param buffer {BinaryString} The data with no guarantees about where it ends. * @returns {BinaryString[]} An array of 2 binary strings where the first string * ends in a newline and the last string contains the * text in buffer following the first string. */ function conditionToEndOnCRLF(buffer) { // Find the last occurrence of '\r' or '\n' to split the string. However, we // don't want to consider '\r' if it is the very last character, as we need // the next packet to tell if the '\r' is the beginning of a CRLF or a line // ending by itself. let lastCR = buffer.lastIndexOf('\r', buffer.length - 2); let lastLF = buffer.lastIndexOf('\n'); let end = lastLF > lastCR ? lastLF : lastCR; return [buffer.substring(0, end + 1), buffer.substring(end + 1)]; }; /** * Tell the parser that all of the data has been delivered. * * This will flush all of the internal state of the parser. */ MimeParser.prototype.deliverEOF = function () { // Start of input buffered too long? Call start message now. if (!this._triggeredCall) { this._triggeredCall = true; this._callEmitter("startMessage"); } // Force a flush of all of the data. if (this._holdData) this._dispatchData("", this._holdData, true); this._dispatchEOF(""); // Signal to the emitter that we're done. this._callEmitter("endMessage"); }; /** * Calls a method on the emitter safely. * * This method ensures that errors in the emitter call won't cause the parser * to exit with an error, unless the user wants it to. * * @param funcname {String} The function name to call on the emitter. * @param args... Extra arguments to pass into the emitter callback. */ MimeParser.prototype._callEmitter = function (funcname) { if (this._emitter && funcname in this._emitter) { let args = Array.prototype.splice.call(arguments, 1); if (args.length > 0 && this._willIgnorePart(args[0])) { // partNum is always the first argument, so check to make sure that it // satisfies our emitter's pruneat requirement. return; } try { this._emitter[funcname].apply(this._emitter, args); } catch (e) { // We ensure that the onerror attribute in options is a function, so this // is always safe. this._options.onerror(e); } } }; /** * Helper function to decide if a part's output will never be seen. * * @param part {String} The number of the part. * @returns {Boolean} True if the emitter is not interested in this part. */ MimeParser.prototype._willIgnorePart = function (part) { if (this._options["pruneat"]) { let match = this._options["pruneat"]; let start = part.substr(0, match.length); // It needs to start with and follow with a new part indicator // (i.e., don't let 10 match with 1, but let 1.1 or 1$ do so) if (start != match || (match.length < part.length && "$.".indexOf(part[match.length]) == -1)) return true; } return false; }; ////////////////////// // MIME parser core // ////////////////////// // This MIME parser is a stateful parser; handling of the MIME tree is mostly // done by creating new parsers and feeding data to them manually. In parallel // to the externally-visible deliverData and deliverEOF, the two methods // _dispatchData and _dispatchEOF are the internal counterparts that do the // main work of moving data to where it needs to go; helper functions are used // to handle translation. // // The overall flow of the parser is this. First, it buffers all of the data // until the dual-CRLF pattern is noticed. Once that is found, it parses the // entire header chunk at once. As a result of header parsing, the parser enters // one of three modes for handling data, and uses a special regex to change // modes and handle state changes. Specific details about the states the parser // can be in are as follows: // PARSING_HEADERS: The input buffer is concatenated to the currently-received // text, which is then searched for the CRLFCRLF pattern. If found, the data // is split at this boundary; the first chunk is parsed using _parseHeaders, // and the second chunk will fall through to buffer processing. After // splitting, the headers are deliverd via the emitter, and _startBody is // called to set up state for the parser. // SEND_TO_BLACK_HOLE: All data in the input is ignored. // SEND_TO_EMITTER: All data is passed into the emitter, if it is desired. // Data can be optionally converted with this._convertData. // SEND_TO_SUBPARSER: All data is passed into the subparser's _dispatchData // method, using _subPartNum as the part number and _subparser as the object // to call. Data can be optionally converted first with this._convertData. // // Additional state modifications can be done using a regex in _splitRegex and // the callback method this._handleSplit(partNum, regexResult). The _handleSplit // callback is free to do any modification to the current parser, including // modifying the _splitRegex value. Packet conditioning guarantees that every // buffer string passed into _dispatchData will have started immediately after a // newline character in the fully assembled message. // // The this._convertData method, if present, is expected to return an array of // two values, [{typedarray, string} decoded_buffer, string unused_buffer], and // has as its arguments (string buffer, bool moreToCome). // // The header parsing by itself does very little parsing, only parsing as if all // headers were unstructured fields. Values are munged so that embedded newlines // are stripped and the result is also trimmed. Headers themselves are // canonicalized into lower-case. // Parser states. See the large comment above. var PARSING_HEADERS = 1; var SEND_TO_BLACK_HOLE = 2; var SEND_TO_EMITTER = 3; var SEND_TO_SUBPARSER = 4; /** * Main dispatch for incoming packet data. * * The incoming data needs to have been sanitized so that each packet begins on * a newline boundary. The part number for the current parser also needs to be * passed in. The checkSplit parameter controls whether or not the data in * buffer needs to be checked against _splitRegex; this is used internally for * the mechanics of splitting and should otherwise always be true. * * @param partNum {String} The part number being currently parsed. * @param buffer {BinaryString} The text (conditioned as mentioned above) to * pass to the parser. * @param checkSplit {Boolean} If true, split the text using _splitRegex. * This is set to false internally to handle * low-level splitting details. */ MimeParser.prototype._dispatchData = function (partNum, buffer, checkSplit) { // Are we parsing headers? if (this._state == PARSING_HEADERS) { this._headerData += buffer; // Find the end of the headers--either it's a CRLF at the beginning (in // which case we have no headers), or it's a pair of CRLFs. let result = /(?:^(?:\r\n|[\r\n]))|(\r\n|[\r\n])\1/.exec(this._headerData); if (result != null) { // If we found the end of headers, split the data at this point and send // the stuff after the double-CRLF into the later body parsing. let headers = this._headerData.substr(0, result.index); buffer = this._headerData.substring(result.index + result[0].length); this._headerData = headers; this._headers = this._parseHeaders(); this._callEmitter("startPart", partNum, this._headers); this._startBody(partNum); } else { return; } } // We're in the middle of the body. Start by testing the split regex, to see // if there are many things that need to be done. if (checkSplit && this._splitRegex) { let splitResult = this._splitRegex.exec(buffer); if (splitResult) { // Pass the text before the split through the current state. let start = splitResult.index, len = splitResult[0].length; if (start > 0) this._dispatchData(partNum, buffer.substr(0, start), false); // Tell the handler that we've seen the split. Note that this can change // any method on `this'. this._handleSplit(partNum, splitResult); // Send the rest of the data to where it needs to go. There could be more // splits in the data, so watch out! buffer = buffer.substring(start + len); if (buffer.length > 0) this._dispatchData(partNum, buffer, true); return; } } // Where does the data go? if (this._state == SEND_TO_BLACK_HOLE) { // Don't send any data when going to the black hole. return; } else if (this._state == SEND_TO_EMITTER) { // Don't pass body data if the format is to be none let passData = this._options["bodyformat"] != "none"; if (!passData || this._willIgnorePart(partNum)) return; buffer = this._applyDataConversion(buffer, this._options["strformat"]); if (buffer.length > 0) this._callEmitter("deliverPartData", partNum, buffer); } else if (this._state == SEND_TO_SUBPARSER) { buffer = this._applyDataConversion(buffer, "binarystring"); if (buffer.length > 0) this._subparser._dispatchData(this._subPartNum, buffer, true); } }; /** * Output data using the desired output format, saving data if data conversion * needs extra data to be saved. * * @param buf {BinaryString} The data to be sent to the output. * @param type {String} The type of the data to output. Valid values are * the same as the strformat option. * @returns Coerced and converted data that can be sent to the emitter or * subparser. */ MimeParser.prototype._applyDataConversion = function (buf, type) { // If we need to convert data, do so. if (this._convertData) { // Prepend leftover data from the last conversion. buf = this._savedBuffer + buf; [buf, this._savedBuffer] = this._convertData(buf, true); } return this._coerceData(buf, type, true); }; /** * Coerce the input buffer into the given output type. * * @param buffer {BinaryString|Uint8Array} The data to be converted. * @param type {String} The type to convert the data to. * @param more {boolean} If true, this function will never be * called again. * @returns {BinaryString|String|Uint8Array} The desired output format. */ /// Coerces the buffer (a string or typedarray) into a given type MimeParser.prototype._coerceData = function (buffer, type, more) { if (typeof buffer == "string") { // string -> binarystring is a nop if (type == "binarystring") return buffer; // Either we're going to array or unicode. Both people need the array var typedarray = mimeutils.stringToTypedArray(buffer); // If it's unicode, do the coercion from the array // If its typedarray, just return the synthesized one return type == "unicode" ? this._coerceData(typedarray, "unicode", more) : typedarray; } else if (type == "binarystring") { // Doing array -> binarystring return mimeutils.typedArrayToString(buffer); } else if (type == "unicode") { // Doing array-> unicode: Use the decoder set up earlier to convert if (this._decoder) return this._decoder.decode(buffer, {stream: more}); // If there is no charset, just return the typed array instead. return buffer; } throw new Error("Invalid type: " + type); }; /** * Signal that no more data will be dispatched to this parser. * * @param partNum {String} The part number being currently parsed. */ MimeParser.prototype._dispatchEOF = function (partNum) { if (this._state == PARSING_HEADERS) { // Unexpected EOF in headers. Parse them now and call startPart/endPart this._headers = this._parseHeaders(); this._callEmitter("startPart", partNum, this._headers); } else if (this._state == SEND_TO_SUBPARSER) { // Pass in any lingering data if (this._convertData && this._savedBuffer) this._subparser._dispatchData(this._subPartNum, this._convertData(this._savedBuffer, false)[0], true); this._subparser._dispatchEOF(this._subPartNum); // Clean up after ourselves this._subparser = null; } else if (this._convertData && this._savedBuffer) { // Convert lingering data let [buffer, ] = this._convertData(this._savedBuffer, false); buffer = this._coerceData(buffer, this._options["strformat"], false); if (buffer.length > 0) this._callEmitter("deliverPartData", partNum, buffer); } // We've reached EOF for this part; tell the emitter this._callEmitter("endPart", partNum); }; /** * Produce a dictionary of all headers as if they were unstructured fields. * * @returns {StructuredHeaders} The structured header objects for the header * block. */ MimeParser.prototype._parseHeaders = function () { let headers = new StructuredHeaders(this._headerData, this._options); // Fill the headers.contentType parameter of headers. let contentType = headers.get('Content-Type'); if (typeof contentType === "undefined") { contentType = headerparser.parseStructuredHeader('Content-Type', this._defaultContentType || 'text/plain'); Object.defineProperty(headers, "contentType", { get: function () { return contentType; } }); } else { Object.defineProperty(headers, "contentType", { configurable: false }); } // Find the charset for the current part. If the user requested a forced // conversion, use that first. Otherwise, check the content-type for one and // fallback to a default if it is not present. let charset = ''; if (this._options["force-charset"]) charset = this._options["charset"]; else if (contentType.has("charset")) charset = contentType.get("charset"); else charset = this._options["charset"]; headers.charset = charset; // Retain a copy of the charset so that users don't override our decision for // decoding body parts. this._charset = charset; return headers; }; /** * Initialize the parser state for the body of this message. * * @param partNum {String} The part number being currently parsed. */ MimeParser.prototype._startBody = function Parser_startBody(partNum) { let contentType = this._headers.contentType; // Should the bodyformat be raw, we just want to pass through all data without // trying to interpret it. if (this._options["bodyformat"] == "raw" && partNum == this._options["pruneat"]) { this._state = SEND_TO_EMITTER; return; } // The output depents on the content-type. Basic rule of thumb: // 1. Discrete media types (text, video, audio, image, application) are passed // through with no alterations beyond Content-Transfer-Encoding unpacking. // 2. Everything with a media type of multipart is treated the same. // 3. Any message/* type that acts like a mail message (rfc822, news, global) // is parsed as a header/body pair again. Most of the other message/* types // have similar structures, but they don't have cascading child subparts, // so it's better to pass their entire contents to the emitter and let the // consumer deal with them. // 4. For untyped data, there needs to be no Content-Type header. This helps // avoid false positives. if (contentType.mediatype == 'multipart') { // If there's no boundary type, everything will be part of the prologue of // the multipart message, so just feed everything into a black hole. if (!contentType.has('boundary')) { this._state = SEND_TO_BLACK_HOLE; return; } // The boundary of a multipart message needs to start with -- and be at the // beginning of the line. If -- is after the boundary, it represents the // terminator of the multipart. After the line, there may be only whitespace // and then the CRLF at the end. Since the CRLFs in here are necessary for // distinguishing the parts, they are not included in the subparts, so we // need to capture them in the regex as well to prevent them leaking out. this._splitRegex = new RegExp('(\r\n|[\r\n]|^)--' + contentType.get('boundary').replace(/[\\^$*+?.()|{}[\]]/g, '\\$&') + '(--)?[ \t]*(?:\r\n|[\r\n]|$)'); this._handleSplit = this._whenMultipart; this._subparser = new MimeParser(this._emitter, this._options); // multipart/digest defaults to message/rfc822 instead of text/plain if (contentType.subtype == "digest") this._subparser._defaultContentType = "message/rfc822"; // All text before the first boundary and after the closing boundary are // supposed to be ignored ("must be ignored", according to RFC 2046 §5.1.1); // in accordance with these wishes, ensure they don't get passed to any // deliverPartData. this._state = SEND_TO_BLACK_HOLE; // Multipart MIME messages stipulate that the final CRLF before the boundary // delimiter is not matched. When the packet ends on a CRLF, we don't know // if the next text could be the boundary. Therefore, we need to withhold // the last line of text to be sure of what's going on. The _convertData is // how we do this, even though we're not really converting any data. this._convertData = function mpart_no_leak_crlf(buffer, more) { let splitPoint = buffer.length; if (more) { if (buffer.charAt(splitPoint - 1) == '\n') splitPoint--; if (splitPoint >= 0 && buffer.charAt(splitPoint - 1) == '\r') splitPoint--; } let res = conditionToEndOnCRLF(buffer.substring(0, splitPoint)); let preLF = res[0]; let rest = res[1]; return [preLF, rest + buffer.substring(splitPoint)]; } } else if (contentType.type == 'message/rfc822' || contentType.type == 'message/global' || contentType.type == 'message/news') { // The subpart is just another header/body pair that goes to EOF, so just // return the parse from that blob this._state = SEND_TO_SUBPARSER; this._subPartNum = partNum + "$"; this._subparser = new MimeParser(this._emitter, this._options); // So, RFC 6532 happily allows message/global types to have CTE applied. // This means that subparts would need to be decoded to determine their // contents properly. There seems to be some evidence that message/rfc822 // that is illegally-encoded exists in the wild, so be lenient and decode // for any message/* type that gets here. let cte = this._extractHeader('content-transfer-encoding', ''); if (cte in ContentDecoders) this._convertData = ContentDecoders[cte]; } else { // Okay, we just have to feed the data into the output this._state = SEND_TO_EMITTER; if (this._options["bodyformat"] == "decode") { // If we wish to decode, look it up in one of our decoders. let cte = this._extractHeader('content-transfer-encoding', ''); if (cte in ContentDecoders) this._convertData = ContentDecoders[cte]; } } // Set up the encoder for charset conversions; only do this for text parts. // Other parts are almost certainly binary, so no translation should be // applied to them. if (this._options["strformat"] == "unicode" && contentType.mediatype == "text") { // If the charset is nonempty, initialize the decoder if (this._charset !== "") { this._decoder = new TextDecoder(this._charset); } else { // There's no charset we can use for decoding, so pass through as an // identity encoder or otherwise this._coerceData will complain. this._decoder = { decode: function identity_decoder(buffer) { return MimeParser.prototype._coerceData(buffer, "binarystring", true); } }; } } else { this._decoder = null; } }; // Internal split handling for multipart messages. /** * When a multipary boundary is found, handle the process of managing the * subparser state. This is meant to be used as a value for this._handleSplit. * * @param partNum {String} The part number being currently parsed. * @param lastResult {Array} The result of the regular expression match. */ MimeParser.prototype._whenMultipart = function (partNum, lastResult) { // Fix up the part number (don't do '' -> '.4' and don't do '1' -> '14') if (partNum != "") partNum += "."; if (!this._subPartNum) { // No count? This means that this is the first time we've seen the boundary, // so do some initialization for later here. this._count = 1; } else { // If we did not match a CRLF at the beginning of the line, strip CRLF from // the saved buffer. We do this in the else block because it is not // necessary for the prologue, since that gets ignored anyways. if (this._savedBuffer != '' && lastResult[1] === '') { let useEnd = this._savedBuffer.length - 1; if (this._savedBuffer[useEnd] == '\n') useEnd--; if (useEnd >= 0 && this._savedBuffer[useEnd] == '\r') useEnd--; this._savedBuffer = this._savedBuffer.substring(0, useEnd + 1); } // If we have saved data and we matched a CRLF, pass the saved data in. if (this._savedBuffer != '') this._subparser._dispatchData(this._subPartNum, this._savedBuffer, true); // We've seen the boundary at least once before, so this must end a subpart. // Tell that subpart that it has reached EOF. this._subparser._dispatchEOF(this._subPartNum); } this._savedBuffer = ''; // The regex feeder has a capture on the (--)?, so if its result is present, // then we have seen the terminator. Alternatively, the message may have been // mangled to exclude the terminator, so also check if EOF has occurred. if (lastResult[2] == undefined) { this._subparser.resetParser(); this._state = SEND_TO_SUBPARSER; this._subPartNum = partNum + this._count; this._count += 1; } else { // Ignore the epilogue this._splitRegex = null; this._state = SEND_TO_BLACK_HOLE; } }; /** * Return the structured header from the current header block, or a default if * it is not present. * * @param name {String} The header name to get. * @param dflt {String} The default MIME value of the header. * @returns The structured representation of the header. */ MimeParser.prototype._extractHeader = function (name, dflt) { name = name.toLowerCase(); // Normalize name return this._headers.has(name) ? this._headers.get(name) : headerparser.parseStructuredHeader(name, [dflt]); }; var ContentDecoders = {}; ContentDecoders['quoted-printable'] = mimeutils.decode_qp; ContentDecoders['base64'] = mimeutils.decode_base64; return MimeParser; }); def('headeremitter', function(require) { /** * This module implements the code for emitting structured representations of * MIME headers into their encoded forms. The code here is a companion to, * but completely independent of, jsmime.headerparser: the structured * representations that are used as input to the functions in this file are the * same forms that would be parsed. */ "use strict"; var mimeutils = require('./mimeutils'); // Get the default structured encoders and add them to the map var structuredHeaders = require('./structuredHeaders'); var encoders = new Map(); var preferredSpellings = structuredHeaders.spellings; for (let [header, encoder] of structuredHeaders.encoders) { addStructuredEncoder(header, encoder); } /// Clamp a value in the range [min, max], defaulting to def if it is undefined. function clamp(value, min, max, def) { if (value === undefined) return def; if (value < min) return min; if (value > max) return max; return value; } /** * An object that can assemble structured header representations into their MIME * representation. * * The character-counting portion of this class operates using individual JS * characters as its representation of logical character, which is not the same * as the number of octets used as UTF-8. If non-ASCII characters are to be * included in headers without some form of encoding, then care should be taken * to set the maximum line length to account for the mismatch between character * counts and octet counts: the maximum line is 998 octets, which could be as * few as 332 JS characters (non-BMP characters, although they take up 4 octets * in UTF-8, count as 2 in JS strings). * * This code takes care to only insert line breaks at the higher-level breaking * points in a header (as recommended by RFC 5322), but it may need to resort to * including them more aggressively if this is not possible. If even aggressive * line-breaking cannot allow a header to be emitted without violating line * length restrictions, the methods will throw an exception to indicate this * situation. * * In general, this code does not attempt to modify its input; for example, it * does not attempt to change the case of any input characters, apply any * Unicode normalization algorithms, or convert email addresses to ACE where * applicable. The biggest exception to this rule is that most whitespace is * collapsed to a single space, even in unstructured headers, while most leading * and trailing whitespace is trimmed from inputs. * * @param {StreamHandler} handler The handler to which all output is sent. * @param {Function(String)} handler.deliverData Receives encoded data. * @param {Function()} handler.deliverEOF Sent when all text is sent. * @param {Object} options Options for the emitter. * @param [options.softMargin=78] {30 <= Integer <= 900} * The ideal maximum number of logical characters to include in a line, not * including the final CRLF pair. Lines may exceed this margin if parameters * are excessively long. * @param [options.hardMargin=332] {softMargin <= Integer <= 998} * The maximum number of logical characters that can be included in a line, * not including the final CRLF pair. If this count would be exceeded, then * an error will be thrown and encoding will not be possible. * @param [options.useASCII=true] {Boolean} * If true, then RFC 2047 and RFC 2231 encoding of headers will be performed * as needed to retain headers as ASCII. */ function HeaderEmitter(handler, options) { /// The inferred value of options.useASCII this._useASCII = options.useASCII === undefined ? true : options.useASCII; /// The handler to use. this._handler = handler; /** * The current line being built; note that we may insert a line break in the * middle to keep under the maximum line length. * * @type String * @private */ this._currentLine = ""; // Our bounds for soft and margins are not completely arbitrary. The minimum // amount we need to encode is 20 characters, which can encode a single // non-BMP character with RFC 2047. The value of 30 is chosen to give some // breathing room for delimiters or other unbreakable characters. The maximum // length is 998 octets, per RFC 5322; soft margins are slightly lower to // allow for breathing room as well. The default of 78 for the soft margin is // recommended by RFC 5322; the default of 332 for the hard margin ensures // that UTF-8 encoding the output never violates the 998 octet limit. this._softMargin = clamp(options.softMargin, 30, 900, 78); this._hardMargin = clamp(options.hardMargin, this._softMargin, 998, 332); /** * The index of the last preferred breakable position in the current line. * * @type Integer * @private */ this._preferredBreakpoint = 0; } /////////////////////// // Low-level methods // /////////////////////// // Explanation of the emitter internals: // RFC 5322 requires that we wrap our lines, ideally at 78 characters and at // least by 998 octets. We can't wrap in arbitrary places, but wherever CFWS is // valid... and ideally wherever clients are likely to expect it. In theory, we // can break between every token (this is how RFC 822 operates), but, in RFC // 5322, many of those breaks are relegated to obsolete productions, mostly // because it is common to not properly handle breaks in those locations. // // So how do we do line breaking? The algorithm we implement is greedy, to // simplify implementation. There are two margins: the soft margin, which we // want to keep within, and the hard margin, which we absolutely have to keep // within. There are also two kinds of break points: preferred and emergency. // As long as we keep the line within the hard margin, we will only break at // preferred breakpoints; emergency breakpoints are only used if we would // otherwise exceed the hard margin. // // For illustration, here is an example header and where these break points are // located: // // To: John "The Rock" Smith // Preferred: ^ ^ ^ // Emergency: ^ ^ ^ ^^ ^ ^ ^ ^ ^ // // Preferred breakpoints are indicated by setting the mayBreakAfter parameter of // addText to true, while emergency breakpoints are set after every token passed // into addText. This is handled implicitly by only adding text to _currentLine // if it ends in an emergency breakpoint. // // Internally, the code keeps track of margins by use of two variables. The // _softMargin and _hardMargin variables encode the positions at which code must // absolutely break, and are set up from the initial options parameter. Breaking // happens when _currentLine.length approaches these values, as mentioned above. /** * Send a header line consisting of the first N characters to the handler. * * If the count parameter is missing, then we presume that the current header * value being emitted is done and therefore we should not send a continuation * space. Otherwise, we presume that we're still working, so we will send the * continuation space. * * @private * @param [count] {Integer} The number of characters in the current line to * include before wrapping. */ HeaderEmitter.prototype._commitLine = function (count) { let isContinuing = typeof count !== "undefined"; // Split at the point, and lop off whitespace immediately before and after. if (isContinuing) { var firstN = this._currentLine.slice(0, count).trimRight(); var lastN = this._currentLine.slice(count).trimLeft(); } else { var firstN = this._currentLine.trimRight(); var lastN = ""; } // How many characters do we need to shift preferred/emergency breakpoints? let shift = this._currentLine.length - lastN.length; // Send the line plus the final CRLF. this._handler.deliverData(firstN + '\r\n'); // Fill the start of the line with the new data. this._currentLine = lastN; // If this is a continuation, add an extra space at the beginning of the line. // Adjust the breakpoint shift amount as well. if (isContinuing) { this._currentLine = ' ' + this._currentLine; shift++; } // We will always break at a point at or after the _preferredBreakpoint, if it // exists, so this always gets reset to 0. this._preferredBreakpoint = 0; }; /** * Reserve at least length characters in the current line. If there aren't * enough characters, insert a line break. * * @private * @param length {Integer} The number of characters to reserve space for. * @return {Boolean} Whether or not there is enough space for length characters. */ HeaderEmitter.prototype._reserveTokenSpace = function (length) { // We are not going to do a sanity check that length is within the wrap // margins. The rationale is that this lets code simply call this function to // force a higher-level line break than normal preferred line breaks (see // addAddress for an example use). The text that would be added may need to be // itself broken up, so it might not need all the length anyways, but it // starts the break already. // If we have enough space, we don't need to do anything. if (this._currentLine.length + length <= this._softMargin) return true; // If we have a preferred breakpoint, commit the line at that point, and see // if that is sufficient line-breaking. if (this._preferredBreakpoint > 0) { this._commitLine(this._preferredBreakpoint); if (this._currentLine.length + length <= this._softMargin) return true; } // At this point, we can no longer keep within the soft margin. Let us see if // we can fit within the hard margin. if (this._currentLine.length + length <= this._hardMargin) { return true; } // Adding the text to length would violate the hard margin as well. Break at // the last emergency breakpoint. if (this._currentLine.length > 0) { this._commitLine(this._currentLine.length); } // At this point, if there is still insufficient room in the hard margin, we // can no longer do anything to encode this word. Bail. return this._currentLine.length + length <= this._hardMargin; }; /** * Adds a block of text to the current header, inserting a break if necessary. * If mayBreakAfter is true and text does not end in whitespace, a single space * character may be added to the output. If the text could not be added without * violating line length restrictions, an error is thrown instead. * * @protected * @param {String} text The text to add to the output. * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred * breakpoint. */ HeaderEmitter.prototype.addText = function (text, mayBreakAfter) { // Try to reserve space for the tokens. If we can't, give up. if (!this._reserveTokenSpace(text.length)) throw new Error("Cannot encode " + text + " due to length."); this._currentLine += text; if (mayBreakAfter) { // Make sure that there is an extra space if text could break afterwards. this._preferredBreakpoint = this._currentLine.length; if (text[text.length - 1] != ' ') { this._currentLine += ' '; } } }; /** * Adds a block of text that may need quoting if it contains some character in * qchars. If it is already quoted, no quoting will be applied. If the text * cannot be added without violating maximum line length, an error is thrown * instead. * * @protected * @param {String} text The text to add to the output. * @param {String} qchars The set of characters that cannot appear * outside of a quoted string. * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred * breakpoint. */ HeaderEmitter.prototype.addQuotable = function (text, qchars, mayBreakAfter) { // No text -> no need to be quoted (prevents strict warning errors). if (text.length == 0) return; // Figure out if we need to quote the string. Don't quote a string which // already appears to be quoted. let needsQuote = false; if (!(text[0] == '"' && text[text.length - 1] == '"') && qchars != '') { for (let i = 0; i < text.length; i++) { if (qchars.includes(text[i])) { needsQuote = true; break; } } } if (needsQuote) text = '"' + text.replace(/["\\]/g, "\\$&") + '"'; this.addText(text, mayBreakAfter); }; /** * Adds a block of text that corresponds to the phrase production in RFC 5322. * Such text is a sequence of atoms, quoted-strings, or RFC-2047 encoded-words. * This method will preprocess input to normalize all space sequences to a * single space. If the text cannot be added without violating maximum line * length, an error is thrown instead. * * @protected * @param {String} text The text to add to the output. * @param {String} qchars The set of characters that cannot appear * outside of a quoted string. * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred * breakpoint. */ HeaderEmitter.prototype.addPhrase = function (text, qchars, mayBreakAfter) { // Collapse all whitespace spans into a single whitespace node. text = text.replace(/[ \t\r\n]+/g, " "); // If we have non-ASCII text, encode it using RFC 2047. if (this._useASCII && nonAsciiRe.test(text)) { this.encodeRFC2047Phrase(text, mayBreakAfter); return; } // If quoting the entire string at once could fit in the line length, then do // so. The check here is very loose, but this will inform is if we are going // to definitely overrun the soft margin. if ((this._currentLine.length + text.length) < this._softMargin) { try { this.addQuotable(text, qchars, mayBreakAfter); // If we don't have a breakpoint, and the text is encoded as a sequence of // atoms (and not a quoted-string), then make the last space we added a // breakpoint, regardless of the mayBreakAfter setting. if (this._preferredBreakpoint == 0 && text.includes(" ")) { if (this._currentLine[this._currentLine.length - 1] != '"') this._preferredBreakpoint = this._currentLine.lastIndexOf(" "); } return; } catch (e) { // If we get an error at this point, we failed to add the quoted string // because the string was too long. Fall through to the case where we know // that the input was too long to begin with. } } // If the text is too long, split the quotable string at space boundaries and // add each word invidually. If we still can't add all those words, there is // nothing that we can do. let words = text.split(' '); for (let i = 0; i < words.length; i++) { this.addQuotable(words[i], qchars, i == words.length - 1 ? mayBreakAfter : true); } }; /// A regular expression for characters that need to be encoded. var nonAsciiRe = /[^\x20-\x7e]/; /// The beginnings of RFC 2047 encoded-word var b64Prelude = "=?UTF-8?B?", qpPrelude = "=?UTF-8?Q?"; /// A list of ASCII characters forbidden in RFC 2047 encoded-words var qpForbidden = "\"#$%&'(),.:;<=>?@[\\]^_`{|}~"; var hexString = "0123456789abcdef"; /** * Add a block of text as a single RFC 2047 encoded word. This does not try to * split words if they are too long. * * @private * @param {Uint8Array} encodedText The octets to encode. * @param {Boolean} useQP If true, use quoted-printable; if false, * use base64. * @param {Boolean} mayBreakAfter If true, the end of this text is a * preferred breakpoint. */ HeaderEmitter.prototype._addRFC2047Word = function (encodedText, useQP, mayBreakAfter) { let binaryString = mimeutils.typedArrayToString(encodedText); if (useQP) { var token = qpPrelude; for (let i = 0; i < encodedText.length; i++) { if (encodedText[i] < 0x20 || encodedText[i] >= 0x7F || qpForbidden.includes(binaryString[i])) { let ch = encodedText[i]; token += "=" + hexString[(ch & 0xf0) >> 4] + hexString[ch & 0x0f]; } else if (binaryString[i] == " ") { token += "_"; } else { token += binaryString[i]; } } token += "?="; } else { var token = b64Prelude + btoa(binaryString) + "?="; } this.addText(token, mayBreakAfter); }; /** * Add a block of text as potentially several RFC 2047 encoded-word tokens. * * @protected * @param {String} text The text to add to the output. * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred * breakpoint. */ HeaderEmitter.prototype.encodeRFC2047Phrase = function (text, mayBreakAfter) { // Start by encoding the text into UTF-8 directly. let encodedText = new TextEncoder("UTF-8").encode(text); // Make sure there's enough room for a single token. let minLineLen = b64Prelude.length + 10; // Eight base64 characters plus ?= if (!this._reserveTokenSpace(minLineLen)) { this._commitLine(this._currentLine.length); } // Try to encode as much UTF-8 text as possible in each go. let b64Len = 0, qpLen = 0, start = 0; let maxChars = (this._softMargin - this._currentLine.length) - (b64Prelude.length + 2); for (let i = 0; i < encodedText.length; i++) { let b64Inc = 0, qpInc = 0; // The length we need for base64 is ceil(length / 3) * 4... if ((i - start) % 3 == 0) b64Inc += 4; // The length for quoted-printable is 3 chars only if encoded if (encodedText[i] < 0x20 || encodedText[i] >= 0x7f || qpForbidden.includes(String.fromCharCode(encodedText[i]))) { qpInc = 3; } else { qpInc = 1; } if (b64Len + b64Inc > maxChars && qpLen + qpInc > maxChars) { // Oops, we have too many characters! We need to encode everything through // the current character. However, we can't split in the middle of a // multibyte character. In UTF-8, characters that start with 10xx xxxx are // the middle of multibyte characters, so backtrack until the start // character is legal. while ((encodedText[i] & 0xC0) == 0x80) --i; // Add this part of the word and then make a continuation. this._addRFC2047Word(encodedText.subarray(start, i), b64Len >= qpLen, true); // Reset the array for parsing. start = i; --i; // Reparse this character as well b64Len = qpLen = 0; maxChars = this._softMargin - b64Prelude.length - 3; } else { // Add the counts for the current variable to the count to encode. b64Len += b64Inc; qpLen += qpInc; } } // Add the entire array at this point. this._addRFC2047Word(encodedText.subarray(start), b64Len >= qpLen, mayBreakAfter); }; //////////////////////// // High-level methods // //////////////////////// /** * Add the header name, with the colon and trailing space, to the output. * * @public * @param {String} name The name of the header. */ HeaderEmitter.prototype.addHeaderName = function (name) { this._currentLine = this._currentLine.trimRight(); if (this._currentLine.length > 0) { this._commitLine(); } this.addText(name + ": ", false); }; /** * Add a header and its structured value to the output. * * The name can be any case-insensitive variant of a known structured header; * the output will include the preferred name of the structure instead of the * case put into the name. If no structured encoder can be found, and the input * value is a string, then the header is assumed to be unstructured and the * value is added as if {@link addUnstructured} were called. * * @public * @param {String} name The name of the header. * @param value The structured value of the header. */ HeaderEmitter.prototype.addStructuredHeader = function (name, value) { let lowerName = name.toLowerCase(); if (encoders.has(lowerName)) { this.addHeaderName(preferredSpellings.get(lowerName)); encoders.get(lowerName).call(this, value); } else if (typeof value === "string") { // Assume it's an unstructured header. // All-lower-case-names are ugly, so capitalize first letters. name = name.replace(/(^|-)[a-z]/g, function(match) { return match.toUpperCase(); }); this.addHeaderName(name); this.addUnstructured(value); } else { throw new Error("Unknown header " + name); } }; /** * Add a single address to the header. The address is an object consisting of a * possibly-empty display name and an email address. * * @public * @param Address addr The address to be added. * @param {String} addr.name The (possibly-empty) name of the address to add. * @param {String} addr.email The email of the address to add. * @see headerparser.parseAddressingHeader */ HeaderEmitter.prototype.addAddress = function (addr) { // If we have a display name, add that first. if (addr.name) { // This is a simple estimate that keeps names on one line if possible. this._reserveTokenSpace(addr.name.length + addr.email.length + 3); this.addPhrase(addr.name, ",()<>[]:;@.\"", true); // If we don't have an email address, don't write out the angle brackets for // the address. It's already an abnormal situation should this appear, and // this has better round-tripping properties. if (!addr.email) return; this.addText("<", false); } // Find the local-part and domain of the address, since the local-part may // need to be quoted separately. Note that the @ goes to the domain, so that // the local-part may be quoted if it needs to be. let at = addr.email.lastIndexOf("@"); let localpart = "", domain = "" if (at == -1) localpart = addr.email; else { localpart = addr.email.slice(0, at); domain = addr.email.slice(at); } this.addQuotable(localpart, "()<>[]:;@\\,\" !", false); this.addText(domain + (addr.name ? ">" : ""), false); }; /** * Add an array of addresses and groups to the output. Such an array may be * found as the output of {@link headerparser.parseAddressingHeader}. Each * element is either an address (an object with properties name and email), or a * group (an object with properties name and group). * * @public * @param {(Address|Group)[]} addrs A collection of addresses to add. * @param {String} addrs[i].name The (possibly-empty) name of the * address or the group to add. * @param {String} [addrs[i].email] The email of the address to add. * @param {Address[]} [addrs[i].group] A list of email addresses in the group. * @see HeaderEmitter.addAddress * @see headerparser.parseAddressingHeader */ HeaderEmitter.prototype.addAddresses = function (addresses) { let needsComma = false; for (let addr of addresses) { // Add a comma if this is not the first element. if (needsComma) this.addText(", ", true); needsComma = true; if ("email" in addr) { this.addAddress(addr); } else { // A group has format name: member, member; // Note that we still add a comma after the group is completed. this.addPhrase(addr.name, ",()<>[]:;@.\"", false); this.addText(":", true); this.addAddresses(addr.group); this.addText(";", true); } } }; /** * Add an unstructured header value to the output. This effectively means only * inserting line breaks were necessary, and using RFC 2047 encoding where * necessary. * * @public * @param {String} text The text to add to the output. */ HeaderEmitter.prototype.addUnstructured = function (text) { if (text.length == 0) return; // Unstructured text is basically a phrase that can't be quoted. So, if we // have nothing in qchars, nothing should be quoted. this.addPhrase(text, "", false); }; /** RFC 822 labels for days of the week. */ var kDaysOfWeek = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; /** * Formatting helper to output numbers between 0-9 as 00-09 instead. */ function padTo2Digits(num) { return num < 10 ? "0" + num : num.toString(); } /** * Add a date/time field to the output, using the JS date object as the time * representation. The value will be output using the timezone offset of the * date object, which is usually the timezone of the user (modulo timezone and * DST changes). * * Note that if the date is an invalid date (its internal date parameter is a * NaN value), this method throws an error instead of generating an invalid * string. * * @public * @param {Date} date The date to be added to the output string. */ HeaderEmitter.prototype.addDate = function (date) { // Rather than make a header plastered with NaN values, throw an error on // specific invalid dates. if (isNaN(date.getTime())) throw new Error("Cannot encode an invalid date"); // RFC 5322 says years can't be before 1900. The after 9999 is a bit that // derives from the specification saying that years have 4 digits. if (date.getFullYear() < 1900 || date.getFullYear() > 9999) throw new Error("Date year is out of encodable range"); // Start by computing the timezone offset for a day. We lack a good format, so // the the 0-padding is done by hand. Note that the tzoffset we output is in // the form ±hhmm, so we need to separate the offset (in minutes) into an hour // and minute pair. let tzOffset = date.getTimezoneOffset(); let tzOffHours = Math.abs(Math.trunc(tzOffset / 60)); let tzOffMinutes = Math.abs(tzOffset) % 60; let tzOffsetStr = (tzOffset > 0 ? "-" : "+") + padTo2Digits(tzOffHours) + padTo2Digits(tzOffMinutes); // Convert the day-time figure into a single value to avoid unwanted line // breaks in the middle. let dayTime = [ kDaysOfWeek[date.getDay()] + ",", date.getDate(), mimeutils.kMonthNames[date.getMonth()], date.getFullYear(), padTo2Digits(date.getHours()) + ":" + padTo2Digits(date.getMinutes()) + ":" + padTo2Digits(date.getSeconds()), tzOffsetStr ].join(" "); this.addText(dayTime, false); }; /** * Signal that the current header has been finished encoding. * * @public * @param {Boolean} deliverEOF If true, signal to the handler that no more text * will be arriving. */ HeaderEmitter.prototype.finish = function (deliverEOF) { this._commitLine(); if (deliverEOF) this._handler.deliverEOF(); }; /** * Make a streaming header emitter that outputs on the given handler. * * @param {StreamHandler} handler The handler to consume output * @param options Options to pass into the HeaderEmitter * constructor. * @returns {HeaderEmitter} A header emitter constructed with the given options. */ function makeStreamingEmitter(handler, options) { return new HeaderEmitter(handler, options); } function StringHandler() { this.value = ""; this.deliverData = function (str) { this.value += str; }; this.deliverEOF = function () { }; } /** * Given a header name and its structured value, output a string containing its * MIME-encoded value. The trailing CRLF for the header is included. * * @param {String} name The name of the structured header. * @param value The value of the structured header. * @param options Options for the HeaderEmitter constructor. * @returns {String} A MIME-encoded representation of the structured header. * @see HeaderEmitter.addStructuredHeader */ function emitStructuredHeader(name, value, options) { let handler = new StringHandler(); let emitter = new HeaderEmitter(handler, options); emitter.addStructuredHeader(name, value); emitter.finish(true); return handler.value; } /** * Given a map of header names and their structured values, output a string * containing all of their headers and their MIME-encoded values. * * This method is designed to be able to emit header values given the headerData * values produced by MIME parsing. Thus, the values of the map are arrays * corresponding to header multiplicity. * * @param {Map(String->Object[])} headerValues A map of header names to arrays * of their structured values. * @param options Options for the HeaderEmitter * constructor. * @returns {String} A MIME-encoded representation of the structured header. * @see HeaderEmitter.addStructuredHeader */ function emitStructuredHeaders(headerValues, options) { let handler = new StringHandler(); let emitter = new HeaderEmitter(handler, options); for (let instance of headerValues) { instance[1].forEach(function (e) { emitter.addStructuredHeader(instance[0], e) }); } emitter.finish(true); return handler.value; } /** * Add a custom structured MIME encoder to the set of known encoders. These * encoders are used for {@link emitStructuredHeader} and similar functions to * encode richer, more structured values instead of relying on string * representations everywhere. * * Structured encoders are functions which take in a single parameter * representing their structured value. The this parameter is set to be an * instance of {@link HeaderEmitter}, and it is intended that the several public * or protected methods on that class are useful for encoding values. * * There is a large set of structured encoders built-in to the jsmime library * already. * * @param {String} header The header name (in its preferred case) for * which the encoder will be used. * @param {Function(Value)} encoder The structured encoder function. */ function addStructuredEncoder(header, encoder) { let lowerName = header.toLowerCase(); encoders.set(lowerName, encoder); if (!preferredSpellings.has(lowerName)) preferredSpellings.set(lowerName, header); } return Object.freeze({ addStructuredEncoder: addStructuredEncoder, emitStructuredHeader: emitStructuredHeader, emitStructuredHeaders: emitStructuredHeaders, makeStreamingEmitter: makeStreamingEmitter }); }); def('jsmime', function(require) { return { MimeParser: require('./mimeparser'), headerparser: require('./headerparser'), headeremitter: require('./headeremitter') } }); return mods['jsmime']; }));