/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * nsIContentSerializer implementation that can be used with an * nsIDocumentEncoder to convert a DOM into plaintext in a nice way * (eg for copy/paste as plaintext). */ #include "nsPlainTextSerializer.h" #include "nsLWBrkCIID.h" #include "nsIServiceManager.h" #include "nsGkAtoms.h" #include "nsNameSpaceManager.h" #include "nsTextFragment.h" #include "nsContentUtils.h" #include "nsReadableUtils.h" #include "nsUnicharUtils.h" #include "nsCRT.h" #include "mozilla/dom/Element.h" #include "mozilla/Preferences.h" #include "mozilla/BinarySearch.h" #include "nsComputedDOMStyle.h" using namespace mozilla; using namespace mozilla::dom; #define PREF_STRUCTS "converter.html2txt.structs" #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" #define PREF_ALWAYS_INCLUDE_RUBY "converter.html2txt.always_include_ruby" static const int32_t kTabSize=4; static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if mHeaderStrategy = 1 or = 2. Indention of other headers is derived from that. XXX center h1? */ static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, indent h(x+1) this many columns more than h(x) */ static const int32_t kIndentSizeList = kTabSize; // Indention of non-first lines of ul and ol static const int32_t kIndentSizeDD = kTabSize; // Indention of

static const char16_t kNBSP = 160; static const char16_t kSPACE = ' '; static int32_t HeaderLevel(nsIAtom* aTag); static int32_t GetUnicharWidth(char16_t ucs); static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); // Someday may want to make this non-const: static const uint32_t TagStackSize = 500; static const uint32_t OLStackSize = 100; nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) { RefPtr it = new nsPlainTextSerializer(); it.forget(aSerializer); return NS_OK; } nsPlainTextSerializer::nsPlainTextSerializer() : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" { mOutputString = nullptr; mHeadLevel = 0; mAtFirstColumn = true; mIndent = 0; mCiteQuoteLevel = 0; mStructs = true; // will be read from prefs later mHeaderStrategy = 1 /*indent increasingly*/; // ditto mHasWrittenCiteBlockquote = false; mSpanLevel = 0; for (int32_t i = 0; i <= 6; i++) { mHeaderCounter[i] = 0; } // Line breaker mWrapColumn = 72; // XXX magic number, we expect someone to reset this mCurrentLineWidth = 0; // Flow mEmptyLines = 1; // The start of the document is an "empty line" in itself, mInWhitespace = false; mPreFormattedMail = false; mStartedOutput = false; mPreformattedBlockBoundary = false; mWithRubyAnnotation = false; // will be read from pref and flag later // initialize the tag stack to zero: // The stack only ever contains pointers to static atoms, so they don't // need refcounting. mTagStack = new nsIAtom*[TagStackSize]; mTagStackIndex = 0; mIgnoreAboveIndex = (uint32_t)kNotFound; // initialize the OL stack, where numbers for ordered lists are kept mOLStack = new int32_t[OLStackSize]; mOLStackIndex = 0; mULCount = 0; mIgnoredChildNodeLevel = 0; } nsPlainTextSerializer::~nsPlainTextSerializer() { delete[] mTagStack; delete[] mOLStack; NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!"); } NS_IMPL_ISUPPORTS(nsPlainTextSerializer, nsIContentSerializer) NS_IMETHODIMP nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, const char* aCharSet, bool aIsCopying, bool aIsWholeDocument) { #ifdef DEBUG // Check if the major control flags are set correctly. if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, "If you want format=flowed, you must combine it with " "nsIDocumentEncoder::OutputFormatted"); } if (aFlags & nsIDocumentEncoder::OutputFormatted) { NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), "Can't do formatted and preformatted output at the same time!"); } #endif mFlags = aFlags; mWrapColumn = aWrapColumn; // Only create a linebreaker if we will handle wrapping. if (MayWrap() && MayBreakLines()) { mLineBreaker = nsContentUtils::LineBreaker(); } // Set the line break character: if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows mLineBreak.AssignLiteral("\r\n"); } else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac mLineBreak.Assign(char16_t('\r')); } else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM mLineBreak.Assign(char16_t('\n')); } else { // Platform/default mLineBreak.AssignLiteral(NS_LINEBREAK); } mLineBreakDue = false; mFloatingLines = -1; mPreformattedBlockBoundary = false; if (mFlags & nsIDocumentEncoder::OutputFormatted) { // Get some prefs that controls how we do formatted output mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); mHeaderStrategy = Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); } // The pref is default inited to false in libpref, but we use true // as fallback value because we don't want to affect behavior in // other places which use this serializer currently. mWithRubyAnnotation = Preferences::GetBool(PREF_ALWAYS_INCLUDE_RUBY, true) || (mFlags & nsIDocumentEncoder::OutputRubyAnnotation); // XXX We should let the caller decide whether to do this or not mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; return NS_OK; } bool nsPlainTextSerializer::GetLastBool(const nsTArray& aStack) { uint32_t size = aStack.Length(); if (size == 0) { return false; } return aStack.ElementAt(size-1); } void nsPlainTextSerializer::SetLastBool(nsTArray& aStack, bool aValue) { uint32_t size = aStack.Length(); if (size > 0) { aStack.ElementAt(size-1) = aValue; } else { NS_ERROR("There is no \"Last\" value"); } } void nsPlainTextSerializer::PushBool(nsTArray& aStack, bool aValue) { aStack.AppendElement(bool(aValue)); } bool nsPlainTextSerializer::PopBool(nsTArray& aStack) { bool returnValue = false; uint32_t size = aStack.Length(); if (size > 0) { returnValue = aStack.ElementAt(size-1); aStack.RemoveElementAt(size-1); } return returnValue; } bool nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) { // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, // non-textual container element should be serialized as placeholder // character and its child nodes should be ignored. See bug 895239. if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { return false; } return (aTag == nsGkAtoms::audio) || (aTag == nsGkAtoms::canvas) || (aTag == nsGkAtoms::iframe) || (aTag == nsGkAtoms::meter) || (aTag == nsGkAtoms::progress) || (aTag == nsGkAtoms::object) || (aTag == nsGkAtoms::svg) || (aTag == nsGkAtoms::video); } bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(nsIAtom* aTag) { if (mWithRubyAnnotation) { return false; } return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt || aTag == nsGkAtoms::rtc; } NS_IMETHODIMP nsPlainTextSerializer::AppendText(nsIContent* aText, int32_t aStartOffset, int32_t aEndOffset, nsAString& aStr) { if (mIgnoreAboveIndex != (uint32_t)kNotFound) { return NS_OK; } NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); if ( aStartOffset < 0 ) return NS_ERROR_INVALID_ARG; NS_ENSURE_ARG(aText); nsresult rv = NS_OK; nsIContent* content = aText; const nsTextFragment* frag; if (!content || !(frag = content->GetText())) { return NS_ERROR_FAILURE; } int32_t fragLength = frag->GetLength(); int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); int32_t length = endoffset - aStartOffset; if (length <= 0) { return NS_OK; } nsAutoString textstr; if (frag->Is2b()) { textstr.Assign(frag->Get2b() + aStartOffset, length); } else { // AssignASCII is for 7-bit character only, so don't use it const char *data = frag->Get1b(); CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); } mOutputString = &aStr; // We have to split the string across newlines // to match parser behavior int32_t start = 0; int32_t offset = textstr.FindCharInSet("\n\r"); while (offset != kNotFound) { if (offset>start) { // Pass in the line DoAddText(false, Substring(textstr, start, offset-start)); } // Pass in a newline DoAddText(true, mLineBreak); start = offset+1; offset = textstr.FindCharInSet("\n\r", start); } // Consume the last bit of the string if there's any left if (start < length) { if (start) { DoAddText(false, Substring(textstr, start, length - start)); } else { DoAddText(false, textstr); } } mOutputString = nullptr; return rv; } NS_IMETHODIMP nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, int32_t aStartOffset, int32_t aEndOffset, nsAString& aStr) { return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); } NS_IMETHODIMP nsPlainTextSerializer::AppendElementStart(Element* aElement, Element* aOriginalElement, nsAString& aStr) { NS_ENSURE_ARG(aElement); mElement = aElement; nsresult rv; nsIAtom* id = GetIdForContent(mElement); bool isContainer = !FragmentOrElement::IsHTMLVoid(id); mOutputString = &aStr; if (isContainer) { rv = DoOpenContainer(id); mPreformatStack.push(IsElementPreformatted(mElement)); } else { rv = DoAddLeaf(id); } mElement = nullptr; mOutputString = nullptr; if (id == nsGkAtoms::head) { ++mHeadLevel; } return rv; } NS_IMETHODIMP nsPlainTextSerializer::AppendElementEnd(Element* aElement, nsAString& aStr) { NS_ENSURE_ARG(aElement); mElement = aElement; nsresult rv; nsIAtom* id = GetIdForContent(mElement); bool isContainer = !FragmentOrElement::IsHTMLVoid(id); mOutputString = &aStr; rv = NS_OK; if (isContainer) { rv = DoCloseContainer(id); mPreformatStack.pop(); } mElement = nullptr; mOutputString = nullptr; if (id == nsGkAtoms::head) { NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0"); --mHeadLevel; } return rv; } NS_IMETHODIMP nsPlainTextSerializer::Flush(nsAString& aStr) { mOutputString = &aStr; FlushLine(); mOutputString = nullptr; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, nsAString& aStr) { return NS_OK; } nsresult nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) { // Check if we need output current node as placeholder character and ignore // child nodes. if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) { if (mIgnoredChildNodeLevel == 0) { // Serialize current node as placeholder character Write(NS_LITERAL_STRING(u"\xFFFC")); } // Ignore child nodes. mIgnoredChildNodeLevel++; return NS_OK; } if (IsIgnorableRubyAnnotation(aTag)) { // Ignorable ruby annotation shouldn't be replaced by a placeholder // character, neither any of its descendants. mIgnoredChildNodeLevel++; return NS_OK; } if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) { if (mPreformattedBlockBoundary && DoOutput()) { // Should always end a line, but get no more whitespace if (mFloatingLines < 0) mFloatingLines = 0; mLineBreakDue = true; } mPreformattedBlockBoundary = false; } if (mFlags & nsIDocumentEncoder::OutputRaw) { // Raw means raw. Don't even think about doing anything fancy // here like indenting, adding line breaks or any other // characters such as list item bullets, quote characters // around , etc. I mean it! Don't make me smack you! return NS_OK; } if (mTagStackIndex < TagStackSize) { mTagStack[mTagStackIndex++] = aTag; } if (mIgnoreAboveIndex != (uint32_t)kNotFound) { return NS_OK; } // Reset this so that

doesn't affect the whitespace // above random

s below it.
  mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
                              aTag == nsGkAtoms::pre;

  bool isInCiteBlockquote = false;

  // XXX special-case  so that we don't add additional
  // newlines before the text.
  if (aTag == nsGkAtoms::blockquote) {
    nsAutoString value;
    nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
    isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
  }

  if (mLineBreakDue && !isInCiteBlockquote)
    EnsureVerticalSpace(mFloatingLines);

  // Check if this tag's content that should not be output
  if ((aTag == nsGkAtoms::noscript &&
       !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
      ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
       !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
    // Ignore everything that follows the current tag in 
    // question until a matching end tag is encountered.
    mIgnoreAboveIndex = mTagStackIndex - 1;
    return NS_OK;
  }

  if (aTag == nsGkAtoms::body) {
    // Try to figure out here whether we have a
    // preformatted style attribute set by Thunderbird.
    //
    // Trigger on the presence of a "pre-wrap" in the
    // style attribute. That's a very simplistic way to do
    // it, but better than nothing.
    // Also set mWrapColumn to the value given there
    // (which arguably we should only do if told to do so).
    nsAutoString style;
    int32_t whitespace;
    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
       (kNotFound != (whitespace = style.Find("white-space:")))) {

      if (kNotFound != style.Find("pre-wrap", true, whitespace)) {
#ifdef DEBUG_preformatted
        printf("Set mPreFormattedMail based on style pre-wrap\n");
#endif
        mPreFormattedMail = true;
        int32_t widthOffset = style.Find("width:");
        if (widthOffset >= 0) {
          // We have to search for the ch before the semicolon,
          // not for the semicolon itself, because nsString::ToInteger()
          // considers 'c' to be a valid numeric char (even if radix=10)
          // but then gets confused if it sees it next to the number
          // when the radix specified was 10, and returns an error code.
          int32_t semiOffset = style.Find("ch", false, widthOffset+6);
          int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6
                            : style.Length() - widthOffset);
          nsAutoString widthstr;
          style.Mid(widthstr, widthOffset+6, length);
          nsresult err;
          int32_t col = widthstr.ToInteger(&err);

          if (NS_SUCCEEDED(err)) {
            mWrapColumn = (uint32_t)col;
#ifdef DEBUG_preformatted
            printf("Set wrap column to %d based on style\n", mWrapColumn);
#endif
          }
        }
      }
      else if (kNotFound != style.Find("pre", true, whitespace)) {
#ifdef DEBUG_preformatted
        printf("Set mPreFormattedMail based on style pre\n");
#endif
        mPreFormattedMail = true;
        mWrapColumn = 0;
      }
    } 
    else {
      /* See comment at end of function. */
      mInWhitespace = true;
      mPreFormattedMail = false;
    }

    return NS_OK;
  }

  // Keep this in sync with DoCloseContainer!
  if (!DoOutput()) {
    return NS_OK;
  }

  if (aTag == nsGkAtoms::p)
    EnsureVerticalSpace(1);
  else if (aTag == nsGkAtoms::pre) {
    if (GetLastBool(mIsInCiteBlockquote))
      EnsureVerticalSpace(0);
    else if (mHasWrittenCiteBlockquote) {
      EnsureVerticalSpace(0);
      mHasWrittenCiteBlockquote = false;
    }
    else
      EnsureVerticalSpace(1);
  }
  else if (aTag == nsGkAtoms::tr) {
    PushBool(mHasWrittenCellsForRow, false);
  }
  else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
    // We must make sure that the content of two table cells get a
    // space between them.

    // To make the separation between cells most obvious and
    // importable, we use a TAB.
    if (GetLastBool(mHasWrittenCellsForRow)) {
      // Bypass |Write| so that the TAB isn't compressed away.
      AddToLine(u"\t", 1);
      mInWhitespace = true;
    }
    else if (mHasWrittenCellsForRow.IsEmpty()) {
      // We don't always see a  (nor a ) before the  if we're
      // copying part of a table
      PushBool(mHasWrittenCellsForRow, true); // will never be popped
    }
    else {
      SetLastBool(mHasWrittenCellsForRow, true);
    }
  }
  else if (aTag == nsGkAtoms::ul) {
    // Indent here to support nested lists, which aren't included in li :-(
    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
         // Must end the current line before we change indention
    mIndent += kIndentSizeList;
    mULCount++;
  }
  else if (aTag == nsGkAtoms::ol) {
    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
      // Must end the current line before we change indention
      if (mOLStackIndex < OLStackSize) {
        nsAutoString startAttr;
        int32_t startVal = 1;
        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
          nsresult rv = NS_OK;
          startVal = startAttr.ToInteger(&rv);
          if (NS_FAILED(rv))
            startVal = 1;
        }
        mOLStack[mOLStackIndex++] = startVal;
      }
    } else {
      mOLStackIndex++;
    }
    mIndent += kIndentSizeList;  // see ul
  }
  else if (aTag == nsGkAtoms::li &&
           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
    if (mTagStackIndex > 1 && IsInOL()) {
      if (mOLStackIndex > 0) {
        nsAutoString valueAttr;
        if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
          nsresult rv = NS_OK;
          int32_t valueAttrVal = valueAttr.ToInteger(&rv);
          if (NS_SUCCEEDED(rv))
            mOLStack[mOLStackIndex-1] = valueAttrVal;
        }
        // This is what nsBulletFrame does for OLs:
        mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
      }
      else {
        mInIndentString.Append(char16_t('#'));
      }

      mInIndentString.Append(char16_t('.'));

    }
    else {
      static char bulletCharArray[] = "*o+#";
      uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
      char bulletChar = bulletCharArray[index % 4];
      mInIndentString.Append(char16_t(bulletChar));
    }

    mInIndentString.Append(char16_t(' '));
  }
  else if (aTag == nsGkAtoms::dl) {
    EnsureVerticalSpace(1);
  }
  else if (aTag == nsGkAtoms::dt) {
    EnsureVerticalSpace(0);
  }
  else if (aTag == nsGkAtoms::dd) {
    EnsureVerticalSpace(0);
    mIndent += kIndentSizeDD;
  }
  else if (aTag == nsGkAtoms::span) {
    ++mSpanLevel;
  }
  else if (aTag == nsGkAtoms::blockquote) {
    // Push
    PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
    if (isInCiteBlockquote) {
      EnsureVerticalSpace(0);
      mCiteQuoteLevel++;
    }
    else {
      EnsureVerticalSpace(1);
      mIndent += kTabSize; // Check for some maximum value?
    }
  }
  else if (aTag == nsGkAtoms::q) {
    Write(NS_LITERAL_STRING("\""));
  }

  // Else make sure we'll separate block level tags,
  // even if we're about to leave, before doing any other formatting.
  else if (IsElementBlock(mElement)) {
    EnsureVerticalSpace(0);
  }

  //////////////////////////////////////////////////////////////
  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
    return NS_OK;
  }
  //////////////////////////////////////////////////////////////
  // The rest of this routine is formatted output stuff,
  // which we should skip if we're not formatted:
  //////////////////////////////////////////////////////////////

  // Push on stack
  bool currentNodeIsConverted = IsCurrentNodeConverted();

  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
      aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
      aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6)
  {
    EnsureVerticalSpace(2);
    if (mHeaderStrategy == 2) {  // numbered
      mIndent += kIndentSizeHeaders;
      // Caching
      int32_t level = HeaderLevel(aTag);
      // Increase counter for current level
      mHeaderCounter[level]++;
      // Reset all lower levels
      int32_t i;

      for (i = level + 1; i <= 6; i++) {
        mHeaderCounter[i] = 0;
      }

      // Construct numbers
      nsAutoString leadup;
      for (i = 1; i <= level; i++) {
        leadup.AppendInt(mHeaderCounter[i]);
        leadup.Append(char16_t('.'));
      }
      leadup.Append(char16_t(' '));
      Write(leadup);
    }
    else if (mHeaderStrategy == 1) { // indent increasingly
      mIndent += kIndentSizeHeaders;
      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
           // for h(x), run x-1 times
        mIndent += kIndentIncrementHeaders;
      }
    }
  }
  else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
    nsAutoString url;
    if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url))
        && !url.IsEmpty()) {
      mURL = url;
    }
  }
  else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("^"));
  }
  else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("_"));
  }
  else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("|"));
  }
  else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
           && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("*"));
  }
  else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
           && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("/"));
  }
  else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("_"));
  }

  /* Container elements are always block elements, so we shouldn't
     output any whitespace immediately after the container tag even if
     there's extra whitespace there because the HTML is pretty-printed
     or something. To ensure that happens, tell the serializer we're
     already in whitespace so it won't output more. */
  mInWhitespace = true;

  return NS_OK;
}

nsresult
nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag)
{
  if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) {
    mIgnoredChildNodeLevel--;
    return NS_OK;
  }
  if (IsIgnorableRubyAnnotation(aTag)) {
    mIgnoredChildNodeLevel--;
    return NS_OK;
  }

  if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) {
    if (DoOutput() && IsInPre() && IsElementBlock(mElement)) {
      // If we're closing a preformatted block element, output a line break
      // when we find a new container.
      mPreformattedBlockBoundary = true;
    }
  }

  if (mFlags & nsIDocumentEncoder::OutputRaw) {
    // Raw means raw.  Don't even think about doing anything fancy
    // here like indenting, adding line breaks or any other
    // characters such as list item bullets, quote characters
    // around , etc.  I mean it!  Don't make me smack you!

    return NS_OK;
  }

  if (mTagStackIndex > 0) {
    --mTagStackIndex;
  }

  if (mTagStackIndex >= mIgnoreAboveIndex) {
    if (mTagStackIndex == mIgnoreAboveIndex) {
      // We're dealing with the close tag whose matching
      // open tag had set the mIgnoreAboveIndex value.
      // Reset mIgnoreAboveIndex before discarding this tag.
      mIgnoreAboveIndex = (uint32_t)kNotFound;
    }
    return NS_OK;
  }

  // End current line if we're ending a block level tag
  if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
    // We want the output to end with a new line,
    // but in preformatted areas like text fields,
    // we can't emit newlines that weren't there.
    // So add the newline only in the case of formatted output.
    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
      EnsureVerticalSpace(0);
    }
    else {
      FlushLine();
    }
    // We won't want to do anything with these in formatted mode either,
    // so just return now:
    return NS_OK;
  }

  // Keep this in sync with DoOpenContainer!
  if (!DoOutput()) {
    return NS_OK;
  }

  if (aTag == nsGkAtoms::tr) {
    PopBool(mHasWrittenCellsForRow);
    // Should always end a line, but get no more whitespace
    if (mFloatingLines < 0)
      mFloatingLines = 0;
    mLineBreakDue = true;
  }
  else if (((aTag == nsGkAtoms::li) ||
            (aTag == nsGkAtoms::dt)) &&
           (mFlags & nsIDocumentEncoder::OutputFormatted)) {
    // Items that should always end a line, but get no more whitespace
    if (mFloatingLines < 0)
      mFloatingLines = 0;
    mLineBreakDue = true;
  }
  else if (aTag == nsGkAtoms::pre) {
    mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
    mLineBreakDue = true;
  }
  else if (aTag == nsGkAtoms::ul) {
    FlushLine();
    mIndent -= kIndentSizeList;
    if (--mULCount + mOLStackIndex == 0) {
      mFloatingLines = 1;
      mLineBreakDue = true;
    }
  }
  else if (aTag == nsGkAtoms::ol) {
    FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
    mIndent -= kIndentSizeList;
    NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!");
    mOLStackIndex--;
    if (mULCount + mOLStackIndex == 0) {
      mFloatingLines = 1;
      mLineBreakDue = true;
    }
  }  
  else if (aTag == nsGkAtoms::dl) {
    mFloatingLines = 1;
    mLineBreakDue = true;
  }
  else if (aTag == nsGkAtoms::dd) {
    FlushLine();
    mIndent -= kIndentSizeDD;
  }
  else if (aTag == nsGkAtoms::span) {
    NS_ASSERTION(mSpanLevel, "Span level will be negative!");
    --mSpanLevel;
  }
  else if (aTag == nsGkAtoms::div) {
    if (mFloatingLines < 0)
      mFloatingLines = 0;
    mLineBreakDue = true;
  }
  else if (aTag == nsGkAtoms::blockquote) {
    FlushLine();    // Is this needed?

    // Pop
    bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);

    if (isInCiteBlockquote) {
      NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!");
      mCiteQuoteLevel--;
      mFloatingLines = 0;
      mHasWrittenCiteBlockquote = true;
    }
    else {
      mIndent -= kTabSize;
      mFloatingLines = 1;
    }
    mLineBreakDue = true;
  }
  else if (aTag == nsGkAtoms::q) {
    Write(NS_LITERAL_STRING("\""));
  }
  else if (IsElementBlock(mElement) && aTag != nsGkAtoms::script) {
    // All other blocks get 1 vertical space after them
    // in formatted mode, otherwise 0.
    // This is hard. Sometimes 0 is a better number, but
    // how to know?
    if (mFlags & nsIDocumentEncoder::OutputFormatted)
      EnsureVerticalSpace(1);
    else {
      if (mFloatingLines < 0)
        mFloatingLines = 0;
      mLineBreakDue = true;
    }
  }

  //////////////////////////////////////////////////////////////
  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
    return NS_OK;
  }
  //////////////////////////////////////////////////////////////
  // The rest of this routine is formatted output stuff,
  // which we should skip if we're not formatted:
  //////////////////////////////////////////////////////////////

  // Pop the currentConverted stack
  bool currentNodeIsConverted = IsCurrentNodeConverted();
  
  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 ||
      aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 ||
      aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
    
    if (mHeaderStrategy) {  /*numbered or indent increasingly*/ 
      mIndent -= kIndentSizeHeaders;
    }
    if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
           // for h(x), run x-1 times
        mIndent -= kIndentIncrementHeaders;
      }
    }
    EnsureVerticalSpace(1);
  }
  else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) {
    nsAutoString temp; 
    temp.AssignLiteral(" <");
    temp += mURL;
    temp.Append(char16_t('>'));
    Write(temp);
    mURL.Truncate();
  }
  else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub)
           && mStructs && !currentNodeIsConverted) {
    Write(kSpace);
  }
  else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("|"));
  }
  else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b)
           && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("*"));
  }
  else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i)
           && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("/"));
  }
  else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("_"));
  }

  return NS_OK;
}

bool
nsPlainTextSerializer::MustSuppressLeaf()
{
  if (mIgnoredChildNodeLevel > 0) {
    return true;
  }

  if ((mTagStackIndex > 1 &&
       mTagStack[mTagStackIndex-2] == nsGkAtoms::select) ||
      (mTagStackIndex > 0 &&
        mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) {
    // Don't output the contents of SELECT elements;
    // Might be nice, eventually, to output just the selected element.
    // Read more in bug 31994.
    return true;
  }

  if (mTagStackIndex > 0 &&
      (mTagStack[mTagStackIndex-1] == nsGkAtoms::script ||
       mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) {
    // Don't output the contents of