/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "msgCore.h" // for pre-compiled headers #include "nsImapCore.h" #include "nsImapProtocol.h" #include "nsIMAPGenericParser.h" #include "nsStringGlue.h" ////////////////// nsIMAPGenericParser ///////////////////////// nsIMAPGenericParser::nsIMAPGenericParser() : fNextToken(nullptr), fCurrentLine(nullptr), fLineOfTokens(nullptr), fStartOfLineOfTokens(nullptr), fCurrentTokenPlaceHolder(nullptr), fAtEndOfLine(false), fParserState(stateOK) { } nsIMAPGenericParser::~nsIMAPGenericParser() { PR_FREEIF( fCurrentLine ); PR_FREEIF( fStartOfLineOfTokens); } void nsIMAPGenericParser::HandleMemoryFailure() { SetConnected(false); } void nsIMAPGenericParser::ResetLexAnalyzer() { PR_FREEIF( fCurrentLine ); PR_FREEIF( fStartOfLineOfTokens ); fNextToken = fCurrentLine = fLineOfTokens = fStartOfLineOfTokens = fCurrentTokenPlaceHolder = nullptr; fAtEndOfLine = false; } bool nsIMAPGenericParser::LastCommandSuccessful() { return fParserState == stateOK; } void nsIMAPGenericParser::SetSyntaxError(bool error, const char *msg) { if (error) fParserState |= stateSyntaxErrorFlag; else fParserState &= ~stateSyntaxErrorFlag; NS_ASSERTION(!error, "syntax error in generic parser"); } void nsIMAPGenericParser::SetConnected(bool connected) { if (connected) fParserState &= ~stateDisconnectedFlag; else fParserState |= stateDisconnectedFlag; } void nsIMAPGenericParser::skip_to_CRLF() { while (Connected() && !fAtEndOfLine) AdvanceToNextToken(); } // fNextToken initially should point to // a string after the initial open paren ("(") // After this call, fNextToken points to the // first character after the matching close // paren. Only call AdvanceToNextToken() to get the NEXT // token after the one returned in fNextToken. void nsIMAPGenericParser::skip_to_close_paren() { int numberOfCloseParensNeeded = 1; while (ContinueParse()) { // go through fNextToken, account for nested parens const char *loc; for (loc = fNextToken; loc && *loc; loc++) { if (*loc == '(') numberOfCloseParensNeeded++; else if (*loc == ')') { numberOfCloseParensNeeded--; if (numberOfCloseParensNeeded == 0) { fNextToken = loc + 1; if (!fNextToken || !*fNextToken) AdvanceToNextToken(); return; } } else if (*loc == '{' || *loc == '"') { // quoted or literal fNextToken = loc; char *a = CreateString(); PR_FREEIF(a); break; // move to next token } } if (ContinueParse()) AdvanceToNextToken(); } } void nsIMAPGenericParser::AdvanceToNextToken() { if (!fCurrentLine || fAtEndOfLine) AdvanceToNextLine(); if (Connected()) { if (!fStartOfLineOfTokens) { // this is the first token of the line; setup tokenizer now fStartOfLineOfTokens = PL_strdup(fCurrentLine); if (!fStartOfLineOfTokens) { HandleMemoryFailure(); return; } fLineOfTokens = fStartOfLineOfTokens; fCurrentTokenPlaceHolder = fStartOfLineOfTokens; } fNextToken = NS_strtok(WHITESPACE, &fCurrentTokenPlaceHolder); if (!fNextToken) { fAtEndOfLine = true; fNextToken = CRLF; } } } void nsIMAPGenericParser::AdvanceToNextLine() { PR_FREEIF( fCurrentLine ); PR_FREEIF( fStartOfLineOfTokens); bool ok = GetNextLineForParser(&fCurrentLine); if (!ok) { SetConnected(false); fStartOfLineOfTokens = nullptr; fLineOfTokens = nullptr; fCurrentTokenPlaceHolder = nullptr; fAtEndOfLine = true; fNextToken = CRLF; } else if (!fCurrentLine) { HandleMemoryFailure(); } else { fNextToken = nullptr; // determine if there are any tokens (without calling AdvanceToNextToken); // otherwise we are already at end of line NS_ASSERTION(strlen(WHITESPACE) == 3, "assume 3 chars of whitespace"); char *firstToken = fCurrentLine; while (*firstToken && (*firstToken == WHITESPACE[0] || *firstToken == WHITESPACE[1] || *firstToken == WHITESPACE[2])) firstToken++; fAtEndOfLine = (*firstToken == '\0'); } } // advances |fLineOfTokens| by |bytesToAdvance| bytes void nsIMAPGenericParser::AdvanceTokenizerStartingPoint(int32_t bytesToAdvance) { NS_PRECONDITION(bytesToAdvance>=0, "bytesToAdvance must not be negative"); if (!fStartOfLineOfTokens) { AdvanceToNextToken(); // the tokenizer was not yet initialized, do it now if (!fStartOfLineOfTokens) return; } if(!fStartOfLineOfTokens) return; // The last call to AdvanceToNextToken() cleared the token separator to '\0' // iff |fCurrentTokenPlaceHolder|. We must recover this token separator now. if (fCurrentTokenPlaceHolder) { int endTokenOffset = fCurrentTokenPlaceHolder - fStartOfLineOfTokens - 1; if (endTokenOffset >= 0) fStartOfLineOfTokens[endTokenOffset] = fCurrentLine[endTokenOffset]; } NS_ASSERTION(bytesToAdvance + (fLineOfTokens-fStartOfLineOfTokens) <= (int32_t)strlen(fCurrentLine), "cannot advance beyond end of fLineOfTokens"); fLineOfTokens += bytesToAdvance; fCurrentTokenPlaceHolder = fLineOfTokens; } // RFC3501: astring = 1*ASTRING-CHAR / string // string = quoted / literal // This function leaves us off with fCurrentTokenPlaceHolder immediately after // the end of the Astring. Call AdvanceToNextToken() to get the token after it. char *nsIMAPGenericParser::CreateAstring() { if (*fNextToken == '{') return CreateLiteral(); // literal else if (*fNextToken == '"') return CreateQuoted(); // quoted else return CreateAtom(true); // atom } // Create an atom // This function does not advance the parser. // Call AdvanceToNextToken() to get the next token after the atom. // RFC3501: atom = 1*ATOM-CHAR // ASTRING-CHAR = ATOM-CHAR / resp-specials // ATOM-CHAR = // atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / // quoted-specials / resp-specials // list-wildcards = "%" / "*" // quoted-specials = DQUOTE / "\" // resp-specials = "]" // "Characters are 7-bit US-ASCII unless otherwise specified." [RFC3501, 1.2.] char *nsIMAPGenericParser::CreateAtom(bool isAstring) { char *rv = PL_strdup(fNextToken); if (!rv) { HandleMemoryFailure(); return nullptr; } // We wish to stop at the following characters (in decimal ascii) // 1-31 (CTL), 32 (SP), 34 '"', 37 '%', 40-42 "()*", 92 '\\', 123 '{' // also, ']' is only allowed in astrings char *last = rv; char c = *last; while ((c > 42 || c == 33 || c == 35 || c == 36 || c == 38 || c == 39) && c != '\\' && c != '{' && (isAstring || c != ']')) c = *++last; if (rv == last) { SetSyntaxError(true, "no atom characters found"); PL_strfree(rv); return nullptr; } if (*last) { // not the whole token was consumed *last = '\0'; AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + (last-rv)); } return rv; } // CreateNilString return either NULL (for "NIL") or a string // Call with fNextToken pointing to the thing which we think is the nilstring. // This function leaves us off with fCurrentTokenPlaceHolder immediately after // the end of the string. // Regardless of type, call AdvanceToNextToken() to get the token after it. // RFC3501: nstring = string / nil // nil = "NIL" char *nsIMAPGenericParser::CreateNilString() { if (!PL_strncasecmp(fNextToken, "NIL", 3)) { // check if there is text after "NIL" in fNextToken, // equivalent handling as in CreateQuoted if (fNextToken[3]) AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + 3); return NULL; } else return CreateString(); } // Create a string, which can either be quoted or literal, // but not an atom. // This function leaves us off with fCurrentTokenPlaceHolder immediately after // the end of the String. Call AdvanceToNextToken() to get the token after it. char *nsIMAPGenericParser::CreateString() { if (*fNextToken == '{') { char *rv = CreateLiteral(); // literal return (rv); } else if (*fNextToken == '"') { char *rv = CreateQuoted(); // quoted return (rv); } else { SetSyntaxError(true, "string does not start with '{' or '\"'"); return NULL; } } // This function sets fCurrentTokenPlaceHolder immediately after the end of the // closing quote. Call AdvanceToNextToken() to get the token after it. // QUOTED_CHAR ::= / // "\" quoted_specials // TEXT_CHAR ::= // quoted_specials ::= <"> / "\" // Note that according to RFC 1064 and RFC 2060, CRs and LFs are not allowed // inside a quoted string. It is sufficient to read from the current line only. char *nsIMAPGenericParser::CreateQuoted(bool /*skipToEnd*/) { // one char past opening '"' char *currentChar = fCurrentLine + (fNextToken - fStartOfLineOfTokens) + 1; int escapeCharsCut = 0; nsCString returnString(currentChar); int charIndex; for (charIndex = 0; returnString.CharAt(charIndex) != '"'; charIndex++) { if (!returnString.CharAt(charIndex)) { SetSyntaxError(true, "no closing '\"' found in quoted"); return nullptr; } else if (returnString.CharAt(charIndex) == '\\') { // eat the escape character, but keep the escaped character returnString.Cut(charIndex, 1); escapeCharsCut++; } } // +2 because of the start and end quotes AdvanceTokenizerStartingPoint((fNextToken - fLineOfTokens) + charIndex + escapeCharsCut + 2); returnString.SetLength(charIndex); return ToNewCString(returnString); } // This function leaves us off with fCurrentTokenPlaceHolder immediately after // the end of the literal string. Call AdvanceToNextToken() to get the token // after the literal string. // RFC3501: literal = "{" number "}" CRLF *CHAR8 // ; Number represents the number of CHAR8s // CHAR8 = %x01-ff // ; any OCTET except NUL, %x00 char *nsIMAPGenericParser::CreateLiteral() { int32_t numberOfCharsInMessage = atoi(fNextToken + 1); uint32_t numBytes = numberOfCharsInMessage + 1; NS_ASSERTION(numBytes, "overflow!"); if (!numBytes) return nullptr; char *returnString = (char *)PR_Malloc(numBytes); if (!returnString) { HandleMemoryFailure(); return nullptr; } int32_t currentLineLength = 0; int32_t charsReadSoFar = 0; int32_t bytesToCopy = 0; while (charsReadSoFar < numberOfCharsInMessage) { AdvanceToNextLine(); if (!ContinueParse()) break; currentLineLength = strlen(fCurrentLine); bytesToCopy = (currentLineLength > numberOfCharsInMessage - charsReadSoFar ? numberOfCharsInMessage - charsReadSoFar : currentLineLength); NS_ASSERTION(bytesToCopy, "zero-length line?"); memcpy(returnString + charsReadSoFar, fCurrentLine, bytesToCopy); charsReadSoFar += bytesToCopy; } if (ContinueParse()) { if (currentLineLength == bytesToCopy) { // We have consumed the entire line. // Consider the input "{4}\r\n" "L1\r\n" " A2\r\n" which is read // line-by-line. Reading an Astring, this should result in "L1\r\n". // Note that the second line is "L1\r\n", where the "\r\n" is part of // the literal. Hence, we now read the next line to ensure that the // next call to AdvanceToNextToken() leads to fNextToken=="A2" in our // example. AdvanceToNextLine(); } else AdvanceTokenizerStartingPoint(bytesToCopy); } returnString[charsReadSoFar] = 0; return returnString; } // Call this to create a buffer containing all characters within // a given set of parentheses. // Call this with fNextToken[0]=='(', that is, the open paren // of the group. // It will allocate and return all characters up to and including the corresponding // closing paren, and leave the parser in the right place afterwards. char *nsIMAPGenericParser::CreateParenGroup() { NS_ASSERTION(fNextToken[0] == '(', "we don't have a paren group!"); int numOpenParens = 0; AdvanceTokenizerStartingPoint(fNextToken - fLineOfTokens); // Build up a buffer containing the paren group. nsCString returnString; char *parenGroupStart = fCurrentTokenPlaceHolder; NS_ASSERTION(parenGroupStart[0] == '(', "we don't have a paren group (2)!"); while (*fCurrentTokenPlaceHolder) { if (*fCurrentTokenPlaceHolder == '{') // literal { // Ensure it is a properly formatted literal. NS_ASSERTION(!strcmp("}\r\n", fCurrentTokenPlaceHolder + strlen(fCurrentTokenPlaceHolder) - 3), "not a literal"); // Append previous characters and the "{xx}\r\n" to buffer. returnString.Append(parenGroupStart); // Append literal itself. AdvanceToNextToken(); if (!ContinueParse()) break; char *lit = CreateLiteral(); NS_ASSERTION(lit, "syntax error or out of memory"); if (!lit) break; returnString.Append(lit); PR_Free(lit); if (!ContinueParse()) break; parenGroupStart = fCurrentTokenPlaceHolder; } else if (*fCurrentTokenPlaceHolder == '"') // quoted { // Append the _escaped_ version of the quoted string: // just skip it (because the quoted string must be on the same line). AdvanceToNextToken(); if (!ContinueParse()) break; char *q = CreateQuoted(); if (!q) break; PR_Free(q); if (!ContinueParse()) break; } else { // Append this character to the buffer. char c = *fCurrentTokenPlaceHolder++; if (c == '(') numOpenParens++; else if (c == ')') { numOpenParens--; if (numOpenParens == 0) break; } } } if (numOpenParens != 0 || !ContinueParse()) { SetSyntaxError(true, "closing ')' not found in paren group"); return nullptr; } returnString.Append(parenGroupStart, fCurrentTokenPlaceHolder - parenGroupStart); AdvanceToNextToken(); return ToNewCString(returnString); }