From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Fri, 2 Feb 2018 04:16:08 -0500 Subject: Add m-esr52 at 52.6.0 --- dom/base/nsPlainTextSerializer.cpp | 2034 ++++++++++++++++++++++++++++++++++++ 1 file changed, 2034 insertions(+) create mode 100644 dom/base/nsPlainTextSerializer.cpp (limited to 'dom/base/nsPlainTextSerializer.cpp') diff --git a/dom/base/nsPlainTextSerializer.cpp b/dom/base/nsPlainTextSerializer.cpp new file mode 100644 index 000000000..ef6bdcac7 --- /dev/null +++ b/dom/base/nsPlainTextSerializer.cpp @@ -0,0 +1,2034 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * nsIContentSerializer implementation that can be used with an + * nsIDocumentEncoder to convert a DOM into plaintext in a nice way + * (eg for copy/paste as plaintext). + */ + +#include "nsPlainTextSerializer.h" +#include "nsLWBrkCIID.h" +#include "nsIServiceManager.h" +#include "nsGkAtoms.h" +#include "nsNameSpaceManager.h" +#include "nsTextFragment.h" +#include "nsContentUtils.h" +#include "nsReadableUtils.h" +#include "nsUnicharUtils.h" +#include "nsCRT.h" +#include "mozilla/dom/Element.h" +#include "mozilla/Preferences.h" +#include "mozilla/BinarySearch.h" +#include "nsComputedDOMStyle.h" + +using namespace mozilla; +using namespace mozilla::dom; + +#define PREF_STRUCTS "converter.html2txt.structs" +#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" +#define PREF_ALWAYS_INCLUDE_RUBY "converter.html2txt.always_include_ruby" + +static const int32_t kTabSize=4; +static const int32_t kIndentSizeHeaders = 2; /* Indention of h1, if + mHeaderStrategy = 1 or = 2. + Indention of other headers + is derived from that. + XXX center h1? */ +static const int32_t kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, + indent h(x+1) this many + columns more than h(x) */ +static const int32_t kIndentSizeList = kTabSize; + // Indention of non-first lines of ul and ol +static const int32_t kIndentSizeDD = kTabSize; // Indention of
+static const char16_t kNBSP = 160; +static const char16_t kSPACE = ' '; + +static int32_t HeaderLevel(nsIAtom* aTag); +static int32_t GetUnicharWidth(char16_t ucs); +static int32_t GetUnicharStringWidth(const char16_t* pwcs, int32_t n); + +// Someday may want to make this non-const: +static const uint32_t TagStackSize = 500; +static const uint32_t OLStackSize = 100; + +nsresult +NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) +{ + RefPtr it = new nsPlainTextSerializer(); + it.forget(aSerializer); + return NS_OK; +} + +nsPlainTextSerializer::nsPlainTextSerializer() + : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" +{ + + mOutputString = nullptr; + mHeadLevel = 0; + mAtFirstColumn = true; + mIndent = 0; + mCiteQuoteLevel = 0; + mStructs = true; // will be read from prefs later + mHeaderStrategy = 1 /*indent increasingly*/; // ditto + mHasWrittenCiteBlockquote = false; + mSpanLevel = 0; + for (int32_t i = 0; i <= 6; i++) { + mHeaderCounter[i] = 0; + } + + // Line breaker + mWrapColumn = 72; // XXX magic number, we expect someone to reset this + mCurrentLineWidth = 0; + + // Flow + mEmptyLines = 1; // The start of the document is an "empty line" in itself, + mInWhitespace = false; + mPreFormattedMail = false; + mStartedOutput = false; + + mPreformattedBlockBoundary = false; + mWithRubyAnnotation = false; // will be read from pref and flag later + + // initialize the tag stack to zero: + // The stack only ever contains pointers to static atoms, so they don't + // need refcounting. + mTagStack = new nsIAtom*[TagStackSize]; + mTagStackIndex = 0; + mIgnoreAboveIndex = (uint32_t)kNotFound; + + // initialize the OL stack, where numbers for ordered lists are kept + mOLStack = new int32_t[OLStackSize]; + mOLStackIndex = 0; + + mULCount = 0; + + mIgnoredChildNodeLevel = 0; +} + +nsPlainTextSerializer::~nsPlainTextSerializer() +{ + delete[] mTagStack; + delete[] mOLStack; + NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!"); +} + +NS_IMPL_ISUPPORTS(nsPlainTextSerializer, + nsIContentSerializer) + + +NS_IMETHODIMP +nsPlainTextSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn, + const char* aCharSet, bool aIsCopying, + bool aIsWholeDocument) +{ +#ifdef DEBUG + // Check if the major control flags are set correctly. + if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { + NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, + "If you want format=flowed, you must combine it with " + "nsIDocumentEncoder::OutputFormatted"); + } + + if (aFlags & nsIDocumentEncoder::OutputFormatted) { + NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), + "Can't do formatted and preformatted output at the same time!"); + } +#endif + + mFlags = aFlags; + mWrapColumn = aWrapColumn; + + // Only create a linebreaker if we will handle wrapping. + if (MayWrap() && MayBreakLines()) { + mLineBreaker = nsContentUtils::LineBreaker(); + } + + // Set the line break character: + if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) + && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { + // Windows + mLineBreak.AssignLiteral("\r\n"); + } + else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { + // Mac + mLineBreak.Assign(char16_t('\r')); + } + else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { + // Unix/DOM + mLineBreak.Assign(char16_t('\n')); + } + else { + // Platform/default + mLineBreak.AssignLiteral(NS_LINEBREAK); + } + + mLineBreakDue = false; + mFloatingLines = -1; + + mPreformattedBlockBoundary = false; + + if (mFlags & nsIDocumentEncoder::OutputFormatted) { + // Get some prefs that controls how we do formatted output + mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); + + mHeaderStrategy = + Preferences::GetInt(PREF_HEADER_STRATEGY, mHeaderStrategy); + } + + // The pref is default inited to false in libpref, but we use true + // as fallback value because we don't want to affect behavior in + // other places which use this serializer currently. + mWithRubyAnnotation = + Preferences::GetBool(PREF_ALWAYS_INCLUDE_RUBY, true) || + (mFlags & nsIDocumentEncoder::OutputRubyAnnotation); + + // XXX We should let the caller decide whether to do this or not + mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; + + return NS_OK; +} + +bool +nsPlainTextSerializer::GetLastBool(const nsTArray& aStack) +{ + uint32_t size = aStack.Length(); + if (size == 0) { + return false; + } + return aStack.ElementAt(size-1); +} + +void +nsPlainTextSerializer::SetLastBool(nsTArray& aStack, bool aValue) +{ + uint32_t size = aStack.Length(); + if (size > 0) { + aStack.ElementAt(size-1) = aValue; + } + else { + NS_ERROR("There is no \"Last\" value"); + } +} + +void +nsPlainTextSerializer::PushBool(nsTArray& aStack, bool aValue) +{ + aStack.AppendElement(bool(aValue)); +} + +bool +nsPlainTextSerializer::PopBool(nsTArray& aStack) +{ + bool returnValue = false; + uint32_t size = aStack.Length(); + if (size > 0) { + returnValue = aStack.ElementAt(size-1); + aStack.RemoveElementAt(size-1); + } + return returnValue; +} + +bool +nsPlainTextSerializer::ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag) +{ + // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, + // non-textual container element should be serialized as placeholder + // character and its child nodes should be ignored. See bug 895239. + if (!(mFlags & nsIDocumentEncoder::OutputNonTextContentAsPlaceholder)) { + return false; + } + + return + (aTag == nsGkAtoms::audio) || + (aTag == nsGkAtoms::canvas) || + (aTag == nsGkAtoms::iframe) || + (aTag == nsGkAtoms::meter) || + (aTag == nsGkAtoms::progress) || + (aTag == nsGkAtoms::object) || + (aTag == nsGkAtoms::svg) || + (aTag == nsGkAtoms::video); +} + +bool +nsPlainTextSerializer::IsIgnorableRubyAnnotation(nsIAtom* aTag) +{ + if (mWithRubyAnnotation) { + return false; + } + + return + aTag == nsGkAtoms::rp || + aTag == nsGkAtoms::rt || + aTag == nsGkAtoms::rtc; +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendText(nsIContent* aText, + int32_t aStartOffset, + int32_t aEndOffset, + nsAString& aStr) +{ + if (mIgnoreAboveIndex != (uint32_t)kNotFound) { + return NS_OK; + } + + NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); + if ( aStartOffset < 0 ) + return NS_ERROR_INVALID_ARG; + + NS_ENSURE_ARG(aText); + + nsresult rv = NS_OK; + + nsIContent* content = aText; + const nsTextFragment* frag; + if (!content || !(frag = content->GetText())) { + return NS_ERROR_FAILURE; + } + + int32_t fragLength = frag->GetLength(); + int32_t endoffset = (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); + NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); + + int32_t length = endoffset - aStartOffset; + if (length <= 0) { + return NS_OK; + } + + nsAutoString textstr; + if (frag->Is2b()) { + textstr.Assign(frag->Get2b() + aStartOffset, length); + } + else { + // AssignASCII is for 7-bit character only, so don't use it + const char *data = frag->Get1b(); + CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); + } + + mOutputString = &aStr; + + // We have to split the string across newlines + // to match parser behavior + int32_t start = 0; + int32_t offset = textstr.FindCharInSet("\n\r"); + while (offset != kNotFound) { + + if (offset>start) { + // Pass in the line + DoAddText(false, + Substring(textstr, start, offset-start)); + } + + // Pass in a newline + DoAddText(true, mLineBreak); + + start = offset+1; + offset = textstr.FindCharInSet("\n\r", start); + } + + // Consume the last bit of the string if there's any left + if (start < length) { + if (start) { + DoAddText(false, Substring(textstr, start, length - start)); + } + else { + DoAddText(false, textstr); + } + } + + mOutputString = nullptr; + + return rv; +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection, + int32_t aStartOffset, + int32_t aEndOffset, + nsAString& aStr) +{ + return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendElementStart(Element* aElement, + Element* aOriginalElement, + nsAString& aStr) +{ + NS_ENSURE_ARG(aElement); + + mElement = aElement; + + nsresult rv; + nsIAtom* id = GetIdForContent(mElement); + + bool isContainer = !FragmentOrElement::IsHTMLVoid(id); + + mOutputString = &aStr; + + if (isContainer) { + rv = DoOpenContainer(id); + mPreformatStack.push(IsElementPreformatted(mElement)); + } + else { + rv = DoAddLeaf(id); + } + + mElement = nullptr; + mOutputString = nullptr; + + if (id == nsGkAtoms::head) { + ++mHeadLevel; + } + + return rv; +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendElementEnd(Element* aElement, + nsAString& aStr) +{ + NS_ENSURE_ARG(aElement); + + mElement = aElement; + + nsresult rv; + nsIAtom* id = GetIdForContent(mElement); + + bool isContainer = !FragmentOrElement::IsHTMLVoid(id); + + mOutputString = &aStr; + + rv = NS_OK; + if (isContainer) { + rv = DoCloseContainer(id); + mPreformatStack.pop(); + } + + mElement = nullptr; + mOutputString = nullptr; + + if (id == nsGkAtoms::head) { + NS_ASSERTION(mHeadLevel != 0, + "mHeadLevel being decremented below 0"); + --mHeadLevel; + } + + return rv; +} + +NS_IMETHODIMP +nsPlainTextSerializer::Flush(nsAString& aStr) +{ + mOutputString = &aStr; + FlushLine(); + mOutputString = nullptr; + return NS_OK; +} + +NS_IMETHODIMP +nsPlainTextSerializer::AppendDocumentStart(nsIDocument *aDocument, + nsAString& aStr) +{ + return NS_OK; +} + +nsresult +nsPlainTextSerializer::DoOpenContainer(nsIAtom* aTag) +{ + // Check if we need output current node as placeholder character and ignore + // child nodes. + if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) { + if (mIgnoredChildNodeLevel == 0) { + // Serialize current node as placeholder character + Write(NS_LITERAL_STRING(u"\xFFFC")); + } + // Ignore child nodes. + mIgnoredChildNodeLevel++; + return NS_OK; + } + if (IsIgnorableRubyAnnotation(aTag)) { + // Ignorable ruby annotation shouldn't be replaced by a placeholder + // character, neither any of its descendants. + mIgnoredChildNodeLevel++; + return NS_OK; + } + + if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) { + if (mPreformattedBlockBoundary && DoOutput()) { + // Should always end a line, but get no more whitespace + if (mFloatingLines < 0) + mFloatingLines = 0; + mLineBreakDue = true; + } + mPreformattedBlockBoundary = false; + } + + if (mFlags & nsIDocumentEncoder::OutputRaw) { + // Raw means raw. Don't even think about doing anything fancy + // here like indenting, adding line breaks or any other + // characters such as list item bullets, quote characters + // around , etc. I mean it! Don't make me smack you! + + return NS_OK; + } + + if (mTagStackIndex < TagStackSize) { + mTagStack[mTagStackIndex++] = aTag; + } + + if (mIgnoreAboveIndex != (uint32_t)kNotFound) { + return NS_OK; + } + + // Reset this so that
doesn't affect the whitespace + // above random
s below it.
+  mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote &&
+                              aTag == nsGkAtoms::pre;
+
+  bool isInCiteBlockquote = false;
+
+  // XXX special-case 
so that we don't add additional + // newlines before the text. + if (aTag == nsGkAtoms::blockquote) { + nsAutoString value; + nsresult rv = GetAttributeValue(nsGkAtoms::type, value); + isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite"); + } + + if (mLineBreakDue && !isInCiteBlockquote) + EnsureVerticalSpace(mFloatingLines); + + // Check if this tag's content that should not be output + if ((aTag == nsGkAtoms::noscript && + !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) || + ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) && + !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) { + // Ignore everything that follows the current tag in + // question until a matching end tag is encountered. + mIgnoreAboveIndex = mTagStackIndex - 1; + return NS_OK; + } + + if (aTag == nsGkAtoms::body) { + // Try to figure out here whether we have a + // preformatted style attribute set by Thunderbird. + // + // Trigger on the presence of a "pre-wrap" in the + // style attribute. That's a very simplistic way to do + // it, but better than nothing. + // Also set mWrapColumn to the value given there + // (which arguably we should only do if told to do so). + nsAutoString style; + int32_t whitespace; + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) && + (kNotFound != (whitespace = style.Find("white-space:")))) { + + if (kNotFound != style.Find("pre-wrap", true, whitespace)) { +#ifdef DEBUG_preformatted + printf("Set mPreFormattedMail based on style pre-wrap\n"); +#endif + mPreFormattedMail = true; + int32_t widthOffset = style.Find("width:"); + if (widthOffset >= 0) { + // We have to search for the ch before the semicolon, + // not for the semicolon itself, because nsString::ToInteger() + // considers 'c' to be a valid numeric char (even if radix=10) + // but then gets confused if it sees it next to the number + // when the radix specified was 10, and returns an error code. + int32_t semiOffset = style.Find("ch", false, widthOffset+6); + int32_t length = (semiOffset > 0 ? semiOffset - widthOffset - 6 + : style.Length() - widthOffset); + nsAutoString widthstr; + style.Mid(widthstr, widthOffset+6, length); + nsresult err; + int32_t col = widthstr.ToInteger(&err); + + if (NS_SUCCEEDED(err)) { + mWrapColumn = (uint32_t)col; +#ifdef DEBUG_preformatted + printf("Set wrap column to %d based on style\n", mWrapColumn); +#endif + } + } + } + else if (kNotFound != style.Find("pre", true, whitespace)) { +#ifdef DEBUG_preformatted + printf("Set mPreFormattedMail based on style pre\n"); +#endif + mPreFormattedMail = true; + mWrapColumn = 0; + } + } + else { + /* See comment at end of function. */ + mInWhitespace = true; + mPreFormattedMail = false; + } + + return NS_OK; + } + + // Keep this in sync with DoCloseContainer! + if (!DoOutput()) { + return NS_OK; + } + + if (aTag == nsGkAtoms::p) + EnsureVerticalSpace(1); + else if (aTag == nsGkAtoms::pre) { + if (GetLastBool(mIsInCiteBlockquote)) + EnsureVerticalSpace(0); + else if (mHasWrittenCiteBlockquote) { + EnsureVerticalSpace(0); + mHasWrittenCiteBlockquote = false; + } + else + EnsureVerticalSpace(1); + } + else if (aTag == nsGkAtoms::tr) { + PushBool(mHasWrittenCellsForRow, false); + } + else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) { + // We must make sure that the content of two table cells get a + // space between them. + + // To make the separation between cells most obvious and + // importable, we use a TAB. + if (GetLastBool(mHasWrittenCellsForRow)) { + // Bypass |Write| so that the TAB isn't compressed away. + AddToLine(u"\t", 1); + mInWhitespace = true; + } + else if (mHasWrittenCellsForRow.IsEmpty()) { + // We don't always see a (nor a ) before the
if we're + // copying part of a table + PushBool(mHasWrittenCellsForRow, true); // will never be popped + } + else { + SetLastBool(mHasWrittenCellsForRow, true); + } + } + else if (aTag == nsGkAtoms::ul) { + // Indent here to support nested lists, which aren't included in li :-( + EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); + // Must end the current line before we change indention + mIndent += kIndentSizeList; + mULCount++; + } + else if (aTag == nsGkAtoms::ol) { + EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0); + if (mFlags & nsIDocumentEncoder::OutputFormatted) { + // Must end the current line before we change indention + if (mOLStackIndex < OLStackSize) { + nsAutoString startAttr; + int32_t startVal = 1; + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) { + nsresult rv = NS_OK; + startVal = startAttr.ToInteger(&rv); + if (NS_FAILED(rv)) + startVal = 1; + } + mOLStack[mOLStackIndex++] = startVal; + } + } else { + mOLStackIndex++; + } + mIndent += kIndentSizeList; // see ul + } + else if (aTag == nsGkAtoms::li && + (mFlags & nsIDocumentEncoder::OutputFormatted)) { + if (mTagStackIndex > 1 && IsInOL()) { + if (mOLStackIndex > 0) { + nsAutoString valueAttr; + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) { + nsresult rv = NS_OK; + int32_t valueAttrVal = valueAttr.ToInteger(&rv); + if (NS_SUCCEEDED(rv)) + mOLStack[mOLStackIndex-1] = valueAttrVal; + } + // This is what nsBulletFrame does for OLs: + mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10); + } + else { + mInIndentString.Append(char16_t('#')); + } + + mInIndentString.Append(char16_t('.')); + + } + else { + static char bulletCharArray[] = "*o+#"; + uint32_t index = mULCount > 0 ? (mULCount - 1) : 3; + char bulletChar = bulletCharArray[index % 4]; + mInIndentString.Append(char16_t(bulletChar)); + } + + mInIndentString.Append(char16_t(' ')); + } + else if (aTag == nsGkAtoms::dl) { + EnsureVerticalSpace(1); + } + else if (aTag == nsGkAtoms::dt) { + EnsureVerticalSpace(0); + } + else if (aTag == nsGkAtoms::dd) { + EnsureVerticalSpace(0); + mIndent += kIndentSizeDD; + } + else if (aTag == nsGkAtoms::span) { + ++mSpanLevel; + } + else if (aTag == nsGkAtoms::blockquote) { + // Push + PushBool(mIsInCiteBlockquote, isInCiteBlockquote); + if (isInCiteBlockquote) { + EnsureVerticalSpace(0); + mCiteQuoteLevel++; + } + else { + EnsureVerticalSpace(1); + mIndent += kTabSize; // Check for some maximum value? + } + } + else if (aTag == nsGkAtoms::q) { + Write(NS_LITERAL_STRING("\"")); + } + + // Else make sure we'll separate block level tags, + // even if we're about to leave, before doing any other formatting. + else if (IsElementBlock(mElement)) { + EnsureVerticalSpace(0); + } + + ////////////////////////////////////////////////////////////// + if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { + return NS_OK; + } + ////////////////////////////////////////////////////////////// + // The rest of this routine is formatted output stuff, + // which we should skip if we're not formatted: + ////////////////////////////////////////////////////////////// + + // Push on stack + bool currentNodeIsConverted = IsCurrentNodeConverted(); + + if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || + aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || + aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) + { + EnsureVerticalSpace(2); + if (mHeaderStrategy == 2) { // numbered + mIndent += kIndentSizeHeaders; + // Caching + int32_t level = HeaderLevel(aTag); + // Increase counter for current level + mHeaderCounter[level]++; + // Reset all lower levels + int32_t i; + + for (i = level + 1; i <= 6; i++) { + mHeaderCounter[i] = 0; + } + + // Construct numbers + nsAutoString leadup; + for (i = 1; i <= level; i++) { + leadup.AppendInt(mHeaderCounter[i]); + leadup.Append(char16_t('.')); + } + leadup.Append(char16_t(' ')); + Write(leadup); + } + else if (mHeaderStrategy == 1) { // indent increasingly + mIndent += kIndentSizeHeaders; + for (int32_t i = HeaderLevel(aTag); i > 1; i--) { + // for h(x), run x-1 times + mIndent += kIndentIncrementHeaders; + } + } + } + else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) { + nsAutoString url; + if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) + && !url.IsEmpty()) { + mURL = url; + } + } + else if (aTag == nsGkAtoms::sup && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("^")); + } + else if (aTag == nsGkAtoms::sub && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("_")); + } + else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("|")); + } + else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) + && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("*")); + } + else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) + && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("/")); + } + else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("_")); + } + + /* Container elements are always block elements, so we shouldn't + output any whitespace immediately after the container tag even if + there's extra whitespace there because the HTML is pretty-printed + or something. To ensure that happens, tell the serializer we're + already in whitespace so it won't output more. */ + mInWhitespace = true; + + return NS_OK; +} + +nsresult +nsPlainTextSerializer::DoCloseContainer(nsIAtom* aTag) +{ + if (ShouldReplaceContainerWithPlaceholder(mElement->NodeInfo()->NameAtom())) { + mIgnoredChildNodeLevel--; + return NS_OK; + } + if (IsIgnorableRubyAnnotation(aTag)) { + mIgnoredChildNodeLevel--; + return NS_OK; + } + + if (mFlags & nsIDocumentEncoder::OutputForPlainTextClipboardCopy) { + if (DoOutput() && IsInPre() && IsElementBlock(mElement)) { + // If we're closing a preformatted block element, output a line break + // when we find a new container. + mPreformattedBlockBoundary = true; + } + } + + if (mFlags & nsIDocumentEncoder::OutputRaw) { + // Raw means raw. Don't even think about doing anything fancy + // here like indenting, adding line breaks or any other + // characters such as list item bullets, quote characters + // around , etc. I mean it! Don't make me smack you! + + return NS_OK; + } + + if (mTagStackIndex > 0) { + --mTagStackIndex; + } + + if (mTagStackIndex >= mIgnoreAboveIndex) { + if (mTagStackIndex == mIgnoreAboveIndex) { + // We're dealing with the close tag whose matching + // open tag had set the mIgnoreAboveIndex value. + // Reset mIgnoreAboveIndex before discarding this tag. + mIgnoreAboveIndex = (uint32_t)kNotFound; + } + return NS_OK; + } + + // End current line if we're ending a block level tag + if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) { + // We want the output to end with a new line, + // but in preformatted areas like text fields, + // we can't emit newlines that weren't there. + // So add the newline only in the case of formatted output. + if (mFlags & nsIDocumentEncoder::OutputFormatted) { + EnsureVerticalSpace(0); + } + else { + FlushLine(); + } + // We won't want to do anything with these in formatted mode either, + // so just return now: + return NS_OK; + } + + // Keep this in sync with DoOpenContainer! + if (!DoOutput()) { + return NS_OK; + } + + if (aTag == nsGkAtoms::tr) { + PopBool(mHasWrittenCellsForRow); + // Should always end a line, but get no more whitespace + if (mFloatingLines < 0) + mFloatingLines = 0; + mLineBreakDue = true; + } + else if (((aTag == nsGkAtoms::li) || + (aTag == nsGkAtoms::dt)) && + (mFlags & nsIDocumentEncoder::OutputFormatted)) { + // Items that should always end a line, but get no more whitespace + if (mFloatingLines < 0) + mFloatingLines = 0; + mLineBreakDue = true; + } + else if (aTag == nsGkAtoms::pre) { + mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1; + mLineBreakDue = true; + } + else if (aTag == nsGkAtoms::ul) { + FlushLine(); + mIndent -= kIndentSizeList; + if (--mULCount + mOLStackIndex == 0) { + mFloatingLines = 1; + mLineBreakDue = true; + } + } + else if (aTag == nsGkAtoms::ol) { + FlushLine(); // Doing this after decreasing OLStackIndex would be wrong. + mIndent -= kIndentSizeList; + NS_ASSERTION(mOLStackIndex, "Wrong OLStack level!"); + mOLStackIndex--; + if (mULCount + mOLStackIndex == 0) { + mFloatingLines = 1; + mLineBreakDue = true; + } + } + else if (aTag == nsGkAtoms::dl) { + mFloatingLines = 1; + mLineBreakDue = true; + } + else if (aTag == nsGkAtoms::dd) { + FlushLine(); + mIndent -= kIndentSizeDD; + } + else if (aTag == nsGkAtoms::span) { + NS_ASSERTION(mSpanLevel, "Span level will be negative!"); + --mSpanLevel; + } + else if (aTag == nsGkAtoms::div) { + if (mFloatingLines < 0) + mFloatingLines = 0; + mLineBreakDue = true; + } + else if (aTag == nsGkAtoms::blockquote) { + FlushLine(); // Is this needed? + + // Pop + bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote); + + if (isInCiteBlockquote) { + NS_ASSERTION(mCiteQuoteLevel, "CiteQuote level will be negative!"); + mCiteQuoteLevel--; + mFloatingLines = 0; + mHasWrittenCiteBlockquote = true; + } + else { + mIndent -= kTabSize; + mFloatingLines = 1; + } + mLineBreakDue = true; + } + else if (aTag == nsGkAtoms::q) { + Write(NS_LITERAL_STRING("\"")); + } + else if (IsElementBlock(mElement) && aTag != nsGkAtoms::script) { + // All other blocks get 1 vertical space after them + // in formatted mode, otherwise 0. + // This is hard. Sometimes 0 is a better number, but + // how to know? + if (mFlags & nsIDocumentEncoder::OutputFormatted) + EnsureVerticalSpace(1); + else { + if (mFloatingLines < 0) + mFloatingLines = 0; + mLineBreakDue = true; + } + } + + ////////////////////////////////////////////////////////////// + if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) { + return NS_OK; + } + ////////////////////////////////////////////////////////////// + // The rest of this routine is formatted output stuff, + // which we should skip if we're not formatted: + ////////////////////////////////////////////////////////////// + + // Pop the currentConverted stack + bool currentNodeIsConverted = IsCurrentNodeConverted(); + + if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || + aTag == nsGkAtoms::h3 || aTag == nsGkAtoms::h4 || + aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) { + + if (mHeaderStrategy) { /*numbered or indent increasingly*/ + mIndent -= kIndentSizeHeaders; + } + if (mHeaderStrategy == 1 /*indent increasingly*/ ) { + for (int32_t i = HeaderLevel(aTag); i > 1; i--) { + // for h(x), run x-1 times + mIndent -= kIndentIncrementHeaders; + } + } + EnsureVerticalSpace(1); + } + else if (aTag == nsGkAtoms::a && !currentNodeIsConverted && !mURL.IsEmpty()) { + nsAutoString temp; + temp.AssignLiteral(" <"); + temp += mURL; + temp.Append(char16_t('>')); + Write(temp); + mURL.Truncate(); + } + else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) + && mStructs && !currentNodeIsConverted) { + Write(kSpace); + } + else if (aTag == nsGkAtoms::code && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("|")); + } + else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) + && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("*")); + } + else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) + && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("/")); + } + else if (aTag == nsGkAtoms::u && mStructs && !currentNodeIsConverted) { + Write(NS_LITERAL_STRING("_")); + } + + return NS_OK; +} + +bool +nsPlainTextSerializer::MustSuppressLeaf() +{ + if (mIgnoredChildNodeLevel > 0) { + return true; + } + + if ((mTagStackIndex > 1 && + mTagStack[mTagStackIndex-2] == nsGkAtoms::select) || + (mTagStackIndex > 0 && + mTagStack[mTagStackIndex-1] == nsGkAtoms::select)) { + // Don't output the contents of SELECT elements; + // Might be nice, eventually, to output just the selected element. + // Read more in bug 31994. + return true; + } + + if (mTagStackIndex > 0 && + (mTagStack[mTagStackIndex-1] == nsGkAtoms::script || + mTagStack[mTagStackIndex-1] == nsGkAtoms::style)) { + // Don't output the contents of