summaryrefslogtreecommitdiffstats
path: root/dom/base/nsPlainTextSerializer.h
diff options
context:
space:
mode:
Diffstat (limited to 'dom/base/nsPlainTextSerializer.h')
-rw-r--r--dom/base/nsPlainTextSerializer.h245
1 files changed, 245 insertions, 0 deletions
diff --git a/dom/base/nsPlainTextSerializer.h b/dom/base/nsPlainTextSerializer.h
new file mode 100644
index 000000000..95cf5590c
--- /dev/null
+++ b/dom/base/nsPlainTextSerializer.h
@@ -0,0 +1,245 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
+ * (eg for copy/paste as plaintext).
+ */
+
+#ifndef nsPlainTextSerializer_h__
+#define nsPlainTextSerializer_h__
+
+#include "mozilla/Attributes.h"
+#include "nsCOMPtr.h"
+#include "nsIAtom.h"
+#include "nsIContentSerializer.h"
+#include "nsIDocumentEncoder.h"
+#include "nsILineBreaker.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+#include <stack>
+
+class nsIContent;
+
+namespace mozilla {
+namespace dom {
+class Element;
+} // namespace dom
+} // namespace mozilla
+
+class nsPlainTextSerializer final : public nsIContentSerializer
+{
+public:
+ nsPlainTextSerializer();
+
+ NS_DECL_ISUPPORTS
+
+ // nsIContentSerializer
+ NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
+ const char* aCharSet, bool aIsCopying,
+ bool aIsWholeDocument) override;
+
+ NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset, nsAString& aStr) override;
+ NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection,
+ int32_t aStartOffset, int32_t aEndOffset,
+ nsAString& aStr) override;
+ NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI,
+ int32_t aStartOffset,
+ int32_t aEndOffset,
+ nsAString& aStr) override { return NS_OK; }
+ NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset,
+ int32_t aEndOffset, nsAString& aStr) override { return NS_OK; }
+ NS_IMETHOD AppendDoctype(nsIContent *aDoctype,
+ nsAString& aStr) override { return NS_OK; }
+ NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement,
+ nsAString& aStr) override;
+ NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
+ nsAString& aStr) override;
+ NS_IMETHOD Flush(nsAString& aStr) override;
+
+ NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument,
+ nsAString& aStr) override;
+
+private:
+ ~nsPlainTextSerializer();
+
+ nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet);
+ void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
+ void EndLine(bool softlinebreak, bool aBreakBySpace = false);
+ void EnsureVerticalSpace(int32_t noOfRows);
+ void FlushLine();
+ void OutputQuotesAndIndent(bool stripTrailingSpaces=false);
+ void Output(nsString& aString);
+ void Write(const nsAString& aString);
+ bool IsInPre();
+ bool IsInOL();
+ bool IsCurrentNodeConverted();
+ bool MustSuppressLeaf();
+
+ /**
+ * Returns the local name of the element as an atom if the element is an
+ * HTML element and the atom is a static atom. Otherwise, nullptr is returned.
+ */
+ static nsIAtom* GetIdForContent(nsIContent* aContent);
+ nsresult DoOpenContainer(nsIAtom* aTag);
+ nsresult DoCloseContainer(nsIAtom* aTag);
+ nsresult DoAddLeaf(nsIAtom* aTag);
+ void DoAddText(bool aIsWhitespace, const nsAString& aText);
+
+ // Inlined functions
+ inline bool MayWrap()
+ {
+ return mWrapColumn &&
+ ((mFlags & nsIDocumentEncoder::OutputFormatted) ||
+ (mFlags & nsIDocumentEncoder::OutputWrap));
+ }
+ inline bool MayBreakLines()
+ {
+ return !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking);
+ }
+
+ inline bool DoOutput()
+ {
+ return mHeadLevel == 0;
+ }
+
+ inline bool IsQuotedLine(const nsAString& aLine)
+ {
+ return !aLine.IsEmpty() && aLine.First() == char16_t('>');
+ }
+
+ // Stack handling functions
+ bool GetLastBool(const nsTArray<bool>& aStack);
+ void SetLastBool(nsTArray<bool>& aStack, bool aValue);
+ void PushBool(nsTArray<bool>& aStack, bool aValue);
+ bool PopBool(nsTArray<bool>& aStack);
+
+ bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag);
+ bool IsIgnorableRubyAnnotation(nsIAtom* aTag);
+
+ bool IsElementPreformatted(mozilla::dom::Element* aElement);
+ bool IsElementBlock(mozilla::dom::Element* aElement);
+
+private:
+ nsString mCurrentLine;
+ uint32_t mHeadLevel;
+ bool mAtFirstColumn;
+
+ bool mStructs; // Output structs (pref)
+
+ // If we've just written out a cite blockquote, we need to remember it
+ // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
+ // old messages).
+ bool mHasWrittenCiteBlockquote;
+
+ int32_t mIndent;
+ // mInIndentString keeps a header that has to be written in the indent.
+ // That could be, for instance, the bullet in a bulleted list.
+ nsString mInIndentString;
+ int32_t mCiteQuoteLevel;
+ int32_t mFlags;
+ int32_t mFloatingLines; // To store the number of lazy line breaks
+
+ // The wrap column is how many standard sized chars (western languages)
+ // should be allowed on a line. There could be less chars if the chars
+ // are wider than latin chars of more if the chars are more narrow.
+ uint32_t mWrapColumn;
+
+ // The width of the line as it will appear on the screen (approx.)
+ uint32_t mCurrentLineWidth;
+
+ // Treat quoted text as though it's preformatted -- don't wrap it.
+ // Having it on a pref is a temporary measure, See bug 69638.
+ int32_t mSpanLevel;
+
+
+ int32_t mEmptyLines; // Will be the number of empty lines before
+ // the current. 0 if we are starting a new
+ // line and -1 if we are in a line.
+
+ bool mInWhitespace;
+ bool mPreFormattedMail; // we're dealing with special DOM
+ // used by Thunderbird code.
+ bool mStartedOutput; // we've produced at least a character
+
+ // While handling a new tag, this variable should remind if any line break
+ // is due because of a closing tag. Setting it to "TRUE" while closing the tags.
+ // Hence opening tags are guaranteed to start with appropriate line breaks.
+ bool mLineBreakDue;
+
+ bool mPreformattedBlockBoundary;
+
+ // Whether the output should include ruby annotations.
+ bool mWithRubyAnnotation;
+
+ nsString mURL;
+ int32_t mHeaderStrategy; /* Header strategy (pref)
+ 0 = no indention
+ 1 = indention, increased with
+ header level (default)
+ 2 = numbering and slight indention */
+ int32_t mHeaderCounter[7]; /* For header-numbering:
+ Number of previous headers of
+ the same depth and in the same
+ section.
+ mHeaderCounter[1] for <h1> etc. */
+
+ RefPtr<mozilla::dom::Element> mElement;
+
+ // For handling table rows
+ AutoTArray<bool, 8> mHasWrittenCellsForRow;
+
+ // Values gotten in OpenContainer that is (also) needed in CloseContainer
+ AutoTArray<bool, 8> mIsInCiteBlockquote;
+
+ // The output data
+ nsAString* mOutputString;
+
+ // The tag stack: the stack of tags we're operating on, so we can nest.
+ // The stack only ever points to static atoms, so they don't need to be
+ // refcounted.
+ nsIAtom** mTagStack;
+ uint32_t mTagStackIndex;
+
+ // The stack indicating whether the elements we've been operating on are
+ // CSS preformatted elements, so that we can tell if the text inside them
+ // should be formatted.
+ std::stack<bool> mPreformatStack;
+
+ // Content in the stack above this index should be ignored:
+ uint32_t mIgnoreAboveIndex;
+
+ // The stack for ordered lists
+ int32_t *mOLStack;
+ uint32_t mOLStackIndex;
+
+ uint32_t mULCount;
+
+ nsString mLineBreak;
+ nsCOMPtr<nsILineBreaker> mLineBreaker;
+
+ // Conveniance constant. It would be nice to have it as a const static
+ // variable, but that causes issues with OpenBSD and module unloading.
+ const nsString kSpace;
+
+ // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child
+ // nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored.
+ // mIgnoredChildNodeLevel is used to tell if current node is an ignorable
+ // child node. The initial value of mIgnoredChildNodeLevel is 0. When
+ // serializer enters those specific nodes, mIgnoredChildNodeLevel increases
+ // and is greater than 0. Otherwise when serializer leaves those nodes,
+ // mIgnoredChildNodeLevel decreases.
+ uint32_t mIgnoredChildNodeLevel;
+};
+
+nsresult
+NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
+
+#endif