diff options
Diffstat (limited to 'mailnews/imap/src/nsIMAPGenericParser.cpp')
-rw-r--r-- | mailnews/imap/src/nsIMAPGenericParser.cpp | 484 |
1 files changed, 484 insertions, 0 deletions
diff --git a/mailnews/imap/src/nsIMAPGenericParser.cpp b/mailnews/imap/src/nsIMAPGenericParser.cpp new file mode 100644 index 000000000..3053ab540 --- /dev/null +++ b/mailnews/imap/src/nsIMAPGenericParser.cpp @@ -0,0 +1,484 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "msgCore.h" // for pre-compiled headers + +#include "nsImapCore.h" +#include "nsImapProtocol.h" +#include "nsIMAPGenericParser.h" +#include "nsStringGlue.h" + +////////////////// nsIMAPGenericParser ///////////////////////// + + +nsIMAPGenericParser::nsIMAPGenericParser() : +fNextToken(nullptr), +fCurrentLine(nullptr), +fLineOfTokens(nullptr), +fStartOfLineOfTokens(nullptr), +fCurrentTokenPlaceHolder(nullptr), +fAtEndOfLine(false), +fParserState(stateOK) +{ +} + +nsIMAPGenericParser::~nsIMAPGenericParser() +{ + PR_FREEIF( fCurrentLine ); + PR_FREEIF( fStartOfLineOfTokens); +} + +void nsIMAPGenericParser::HandleMemoryFailure() +{ + SetConnected(false); +} + +void nsIMAPGenericParser::ResetLexAnalyzer() +{ + PR_FREEIF( fCurrentLine ); + PR_FREEIF( fStartOfLineOfTokens ); + + fNextToken = fCurrentLine = fLineOfTokens = fStartOfLineOfTokens = fCurrentTokenPlaceHolder = nullptr; + fAtEndOfLine = false; +} + +bool nsIMAPGenericParser::LastCommandSuccessful() +{ + return fParserState == stateOK; +} + +void nsIMAPGenericParser::SetSyntaxError(bool error, const char *msg) +{ + if (error) + fParserState |= stateSyntaxErrorFlag; + else + fParserState &= ~stateSyntaxErrorFlag; + NS_ASSERTION(!error, "syntax error in generic parser"); +} + +void nsIMAPGenericParser::SetConnected(bool connected) +{ + if (connected) + fParserState &= ~stateDisconnectedFlag; + else + fParserState |= stateDisconnectedFlag; +} + +void nsIMAPGenericParser::skip_to_CRLF() +{ + while (Connected() && !fAtEndOfLine) + AdvanceToNextToken(); +} + +// fNextToken initially should point to +// a string after the initial open paren ("(") +// After this call, fNextToken points to the +// first character after the matching close +// paren. Only call AdvanceToNextToken() to get the NEXT +// token after the one returned in fNextToken. +void nsIMAPGenericParser::skip_to_close_paren() +{ + int numberOfCloseParensNeeded = 1; + while (ContinueParse()) + { + // go through fNextToken, account for nested parens + const char *loc; + for (loc = fNextToken; loc && *loc; loc++) + { + if (*loc == '(') + numberOfCloseParensNeeded++; + else if (*loc == ')') + { + numberOfCloseParensNeeded--; + if (numberOfCloseParensNeeded == 0) + { + fNextToken = loc + 1; + if (!fNextToken || !*fNextToken) + AdvanceToNextToken(); + return; + } + } + else if (*loc == '{' || *loc == '"') { + // quoted or literal + fNextToken = loc; + char *a = CreateString(); + PR_FREEIF(a); + break; // move to next token + } + } + if (ContinueParse()) + AdvanceToNextToken(); + } +} + +void nsIMAPGenericParser::AdvanceToNextToken() +{ + if (!fCurrentLine || fAtEndOfLine) + AdvanceToNextLine(); + if (Connected()) + { + if (!fStartOfLineOfTokens) + { + // this is the first token of the line; setup tokenizer now + fStartOfLineOfTokens = PL_strdup(fCurrentLine); + if (!fStartOfLineOfTokens) + { + HandleMemoryFailure(); + return; + } + fLineOfTokens = fStartOfLineOfTokens; + fCurrentTokenPlaceHolder = fStartOfLineOfTokens; + } + fNextToken = NS_strtok(WHITESPACE, &fCurrentTokenPlaceHolder); + if (!fNextToken) + { + fAtEndOfLine = true; + fNextToken = CRLF; + } + } +} + +void nsIMAPGenericParser::AdvanceToNextLine() +{ + PR_FREEIF( fCurrentLine ); + PR_FREEIF( fStartOfLineOfTokens); + + bool ok = GetNextLineForParser(&fCurrentLine); + if (!ok) + { + SetConnected(false); + fStartOfLineOfTokens = nullptr; + fLineOfTokens = nullptr; + fCurrentTokenPlaceHolder = nullptr; + fAtEndOfLine = true; + fNextToken = CRLF; + } + else if (!fCurrentLine) + { + HandleMemoryFailure(); + } + else + { + fNextToken = nullptr; + // determine if there are any tokens (without calling AdvanceToNextToken); + // otherwise we are already at end of line + NS_ASSERTION(strlen(WHITESPACE) == 3, "assume 3 chars of whitespace"); + char *firstToken = fCurrentLine; + while (*firstToken && (*firstToken == WHITESPACE[0] || + *firstToken == WHITESPACE[1] || *firstToken == WHITESPACE[2])) + firstToken++; + fAtEndOfLine = (*firstToken == '\0'); + } +} + +// advances |fLineOfTokens| by |bytesToAdvance| bytes +void nsIMAPGenericParser::AdvanceTokenizerStartingPoint(int32_t bytesToAdvance) +{ + NS_PRECONDITION(bytesToAdvance>=0, "bytesToAdvance must not be negative"); + if (!fStartOfLineOfTokens) + { + AdvanceToNextToken(); // the tokenizer was not yet initialized, do it now + if (!fStartOfLineOfTokens) + return; + } + + if(!fStartOfLineOfTokens) + return; + // The last call to AdvanceToNextToken() cleared the token separator to '\0' + // iff |fCurrentTokenPlaceHolder|. We must recover this token separator now. + if (fCurrentTokenPlaceHolder) + { + int endTokenOffset = fCurrentTokenPlaceHolder - fStartOfLineOfTokens - 1; + if (endTokenOffset >= 0) + fStartOfLineOfTokens[endTokenOffset] = fCurrentLine[endTokenOffset]; + } + + NS_ASSERTION(bytesToAdvance + (fLineOfTokens-fStartOfLineOfTokens) <= + (int32_t)strlen(fCurrentLine), "cannot advance beyond end of fLineOfTokens"); + fLineOfTokens += bytesToAdvance; + fCurrentTokenPlaceHolder = fLineOfTokens; +} + +// RFC3501: astring = 1*ASTRING-CHAR / string +// string = quoted / literal +// This function leaves us off with fCurrentTokenPlaceHolder immediately after +// the end of the Astring. Call AdvanceToNextToken() to get the token after it. +char *nsIMAPGenericParser::CreateAstring() +{ + if (*fNextToken == '{') + return CreateLiteral(); // literal + else if (*fNextToken == '"') + return CreateQuoted(); // quoted + else + return CreateAtom(true); // atom +} + +// Create an atom +// This function does not advance the parser. +// Call AdvanceToNextToken() to get the next token after the atom. +// RFC3501: atom = 1*ATOM-CHAR +// ASTRING-CHAR = ATOM-CHAR / resp-specials +// ATOM-CHAR = <any CHAR except atom-specials> +// atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / +// quoted-specials / resp-specials +// list-wildcards = "%" / "*" +// quoted-specials = DQUOTE / "\" +// resp-specials = "]" +// "Characters are 7-bit US-ASCII unless otherwise specified." [RFC3501, 1.2.] +char *nsIMAPGenericParser::CreateAtom(bool isAstring) +{ + char *rv = PL_strdup(fNextToken); + if (!rv) + { + HandleMemoryFailure(); + return nullptr; + } + // We wish to stop at the following characters (in decimal ascii) + // 1-31 (CTL), 32 (SP), 34 '"', 37 '%', 40-42 "()*", 92 '\\', 123 '{' + // also, ']' is only allowed in astrings + char *last = rv; + char c = *last; + while ((c > 42 || c == 33 || c == 35 || c == 36 || c == 38 || c == 39) + && c != '\\' && c != '{' && (isAstring || c != ']')) + c = *++last; + if (rv == last) { + SetSyntaxError(true, "no atom characters found"); + PL_strfree(rv); + return nullptr; + } + if (*last) + { + // not the whole token was consumed + *last = '\0'; + AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + (last-rv)); + } + return rv; +} + +// CreateNilString return either NULL (for "NIL") or a string +// Call with fNextToken pointing to the thing which we think is the nilstring. +// This function leaves us off with fCurrentTokenPlaceHolder immediately after +// the end of the string. +// Regardless of type, call AdvanceToNextToken() to get the token after it. +// RFC3501: nstring = string / nil +// nil = "NIL" +char *nsIMAPGenericParser::CreateNilString() +{ + if (!PL_strncasecmp(fNextToken, "NIL", 3)) + { + // check if there is text after "NIL" in fNextToken, + // equivalent handling as in CreateQuoted + if (fNextToken[3]) + AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + 3); + return NULL; + } + else + return CreateString(); +} + + +// Create a string, which can either be quoted or literal, +// but not an atom. +// This function leaves us off with fCurrentTokenPlaceHolder immediately after +// the end of the String. Call AdvanceToNextToken() to get the token after it. +char *nsIMAPGenericParser::CreateString() +{ + if (*fNextToken == '{') + { + char *rv = CreateLiteral(); // literal + return (rv); + } + else if (*fNextToken == '"') + { + char *rv = CreateQuoted(); // quoted + return (rv); + } + else + { + SetSyntaxError(true, "string does not start with '{' or '\"'"); + return NULL; + } +} + +// This function sets fCurrentTokenPlaceHolder immediately after the end of the +// closing quote. Call AdvanceToNextToken() to get the token after it. +// QUOTED_CHAR ::= <any TEXT_CHAR except quoted_specials> / +// "\" quoted_specials +// TEXT_CHAR ::= <any CHAR except CR and LF> +// quoted_specials ::= <"> / "\" +// Note that according to RFC 1064 and RFC 2060, CRs and LFs are not allowed +// inside a quoted string. It is sufficient to read from the current line only. +char *nsIMAPGenericParser::CreateQuoted(bool /*skipToEnd*/) +{ + // one char past opening '"' + char *currentChar = fCurrentLine + (fNextToken - fStartOfLineOfTokens) + 1; + + int escapeCharsCut = 0; + nsCString returnString(currentChar); + int charIndex; + for (charIndex = 0; returnString.CharAt(charIndex) != '"'; charIndex++) + { + if (!returnString.CharAt(charIndex)) + { + SetSyntaxError(true, "no closing '\"' found in quoted"); + return nullptr; + } + else if (returnString.CharAt(charIndex) == '\\') + { + // eat the escape character, but keep the escaped character + returnString.Cut(charIndex, 1); + escapeCharsCut++; + } + } + // +2 because of the start and end quotes + AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + + charIndex + escapeCharsCut + 2); + + returnString.SetLength(charIndex); + return ToNewCString(returnString); +} + + +// This function leaves us off with fCurrentTokenPlaceHolder immediately after +// the end of the literal string. Call AdvanceToNextToken() to get the token +// after the literal string. +// RFC3501: literal = "{" number "}" CRLF *CHAR8 +// ; Number represents the number of CHAR8s +// CHAR8 = %x01-ff +// ; any OCTET except NUL, %x00 +char *nsIMAPGenericParser::CreateLiteral() +{ + int32_t numberOfCharsInMessage = atoi(fNextToken + 1); + uint32_t numBytes = numberOfCharsInMessage + 1; + NS_ASSERTION(numBytes, "overflow!"); + if (!numBytes) + return nullptr; + char *returnString = (char *)PR_Malloc(numBytes); + if (!returnString) + { + HandleMemoryFailure(); + return nullptr; + } + + int32_t currentLineLength = 0; + int32_t charsReadSoFar = 0; + int32_t bytesToCopy = 0; + while (charsReadSoFar < numberOfCharsInMessage) + { + AdvanceToNextLine(); + if (!ContinueParse()) + break; + + currentLineLength = strlen(fCurrentLine); + bytesToCopy = (currentLineLength > numberOfCharsInMessage - charsReadSoFar ? + numberOfCharsInMessage - charsReadSoFar : currentLineLength); + NS_ASSERTION(bytesToCopy, "zero-length line?"); + memcpy(returnString + charsReadSoFar, fCurrentLine, bytesToCopy); + charsReadSoFar += bytesToCopy; + } + + if (ContinueParse()) + { + if (currentLineLength == bytesToCopy) + { + // We have consumed the entire line. + // Consider the input "{4}\r\n" "L1\r\n" " A2\r\n" which is read + // line-by-line. Reading an Astring, this should result in "L1\r\n". + // Note that the second line is "L1\r\n", where the "\r\n" is part of + // the literal. Hence, we now read the next line to ensure that the + // next call to AdvanceToNextToken() leads to fNextToken=="A2" in our + // example. + AdvanceToNextLine(); + } + else + AdvanceTokenizerStartingPoint(bytesToCopy); + } + + returnString[charsReadSoFar] = 0; + return returnString; +} + + +// Call this to create a buffer containing all characters within +// a given set of parentheses. +// Call this with fNextToken[0]=='(', that is, the open paren +// of the group. +// It will allocate and return all characters up to and including the corresponding +// closing paren, and leave the parser in the right place afterwards. +char *nsIMAPGenericParser::CreateParenGroup() +{ + NS_ASSERTION(fNextToken[0] == '(', "we don't have a paren group!"); + + int numOpenParens = 0; + AdvanceTokenizerStartingPoint(fNextToken - fLineOfTokens); + + // Build up a buffer containing the paren group. + nsCString returnString; + char *parenGroupStart = fCurrentTokenPlaceHolder; + NS_ASSERTION(parenGroupStart[0] == '(', "we don't have a paren group (2)!"); + while (*fCurrentTokenPlaceHolder) + { + if (*fCurrentTokenPlaceHolder == '{') // literal + { + // Ensure it is a properly formatted literal. + NS_ASSERTION(!strcmp("}\r\n", fCurrentTokenPlaceHolder + strlen(fCurrentTokenPlaceHolder) - 3), "not a literal"); + + // Append previous characters and the "{xx}\r\n" to buffer. + returnString.Append(parenGroupStart); + + // Append literal itself. + AdvanceToNextToken(); + if (!ContinueParse()) + break; + char *lit = CreateLiteral(); + NS_ASSERTION(lit, "syntax error or out of memory"); + if (!lit) + break; + returnString.Append(lit); + PR_Free(lit); + if (!ContinueParse()) + break; + parenGroupStart = fCurrentTokenPlaceHolder; + } + else if (*fCurrentTokenPlaceHolder == '"') // quoted + { + // Append the _escaped_ version of the quoted string: + // just skip it (because the quoted string must be on the same line). + AdvanceToNextToken(); + if (!ContinueParse()) + break; + char *q = CreateQuoted(); + if (!q) + break; + PR_Free(q); + if (!ContinueParse()) + break; + } + else + { + // Append this character to the buffer. + char c = *fCurrentTokenPlaceHolder++; + if (c == '(') + numOpenParens++; + else if (c == ')') + { + numOpenParens--; + if (numOpenParens == 0) + break; + } + } + } + + if (numOpenParens != 0 || !ContinueParse()) + { + SetSyntaxError(true, "closing ')' not found in paren group"); + return nullptr; + } + + returnString.Append(parenGroupStart, fCurrentTokenPlaceHolder - parenGroupStart); + AdvanceToNextToken(); + return ToNewCString(returnString); +} + |