diff options
Diffstat (limited to 'dom/base/nsPlainTextSerializer.h')
-rw-r--r-- | dom/base/nsPlainTextSerializer.h | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/dom/base/nsPlainTextSerializer.h b/dom/base/nsPlainTextSerializer.h new file mode 100644 index 000000000..95cf5590c --- /dev/null +++ b/dom/base/nsPlainTextSerializer.h @@ -0,0 +1,245 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * nsIContentSerializer implementation that can be used with an + * nsIDocumentEncoder to convert a DOM into plaintext in a nice way + * (eg for copy/paste as plaintext). + */ + +#ifndef nsPlainTextSerializer_h__ +#define nsPlainTextSerializer_h__ + +#include "mozilla/Attributes.h" +#include "nsCOMPtr.h" +#include "nsIAtom.h" +#include "nsIContentSerializer.h" +#include "nsIDocumentEncoder.h" +#include "nsILineBreaker.h" +#include "nsString.h" +#include "nsTArray.h" + +#include <stack> + +class nsIContent; + +namespace mozilla { +namespace dom { +class Element; +} // namespace dom +} // namespace mozilla + +class nsPlainTextSerializer final : public nsIContentSerializer +{ +public: + nsPlainTextSerializer(); + + NS_DECL_ISUPPORTS + + // nsIContentSerializer + NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn, + const char* aCharSet, bool aIsCopying, + bool aIsWholeDocument) override; + + NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset, + int32_t aEndOffset, nsAString& aStr) override; + NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, + int32_t aStartOffset, int32_t aEndOffset, + nsAString& aStr) override; + NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI, + int32_t aStartOffset, + int32_t aEndOffset, + nsAString& aStr) override { return NS_OK; } + NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset, + int32_t aEndOffset, nsAString& aStr) override { return NS_OK; } + NS_IMETHOD AppendDoctype(nsIContent *aDoctype, + nsAString& aStr) override { return NS_OK; } + NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement, + mozilla::dom::Element* aOriginalElement, + nsAString& aStr) override; + NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement, + nsAString& aStr) override; + NS_IMETHOD Flush(nsAString& aStr) override; + + NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument, + nsAString& aStr) override; + +private: + ~nsPlainTextSerializer(); + + nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet); + void AddToLine(const char16_t* aStringToAdd, int32_t aLength); + void EndLine(bool softlinebreak, bool aBreakBySpace = false); + void EnsureVerticalSpace(int32_t noOfRows); + void FlushLine(); + void OutputQuotesAndIndent(bool stripTrailingSpaces=false); + void Output(nsString& aString); + void Write(const nsAString& aString); + bool IsInPre(); + bool IsInOL(); + bool IsCurrentNodeConverted(); + bool MustSuppressLeaf(); + + /** + * Returns the local name of the element as an atom if the element is an + * HTML element and the atom is a static atom. Otherwise, nullptr is returned. + */ + static nsIAtom* GetIdForContent(nsIContent* aContent); + nsresult DoOpenContainer(nsIAtom* aTag); + nsresult DoCloseContainer(nsIAtom* aTag); + nsresult DoAddLeaf(nsIAtom* aTag); + void DoAddText(bool aIsWhitespace, const nsAString& aText); + + // Inlined functions + inline bool MayWrap() + { + return mWrapColumn && + ((mFlags & nsIDocumentEncoder::OutputFormatted) || + (mFlags & nsIDocumentEncoder::OutputWrap)); + } + inline bool MayBreakLines() + { + return !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking); + } + + inline bool DoOutput() + { + return mHeadLevel == 0; + } + + inline bool IsQuotedLine(const nsAString& aLine) + { + return !aLine.IsEmpty() && aLine.First() == char16_t('>'); + } + + // Stack handling functions + bool GetLastBool(const nsTArray<bool>& aStack); + void SetLastBool(nsTArray<bool>& aStack, bool aValue); + void PushBool(nsTArray<bool>& aStack, bool aValue); + bool PopBool(nsTArray<bool>& aStack); + + bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag); + bool IsIgnorableRubyAnnotation(nsIAtom* aTag); + + bool IsElementPreformatted(mozilla::dom::Element* aElement); + bool IsElementBlock(mozilla::dom::Element* aElement); + +private: + nsString mCurrentLine; + uint32_t mHeadLevel; + bool mAtFirstColumn; + + bool mStructs; // Output structs (pref) + + // If we've just written out a cite blockquote, we need to remember it + // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote + // old messages). + bool mHasWrittenCiteBlockquote; + + int32_t mIndent; + // mInIndentString keeps a header that has to be written in the indent. + // That could be, for instance, the bullet in a bulleted list. + nsString mInIndentString; + int32_t mCiteQuoteLevel; + int32_t mFlags; + int32_t mFloatingLines; // To store the number of lazy line breaks + + // The wrap column is how many standard sized chars (western languages) + // should be allowed on a line. There could be less chars if the chars + // are wider than latin chars of more if the chars are more narrow. + uint32_t mWrapColumn; + + // The width of the line as it will appear on the screen (approx.) + uint32_t mCurrentLineWidth; + + // Treat quoted text as though it's preformatted -- don't wrap it. + // Having it on a pref is a temporary measure, See bug 69638. + int32_t mSpanLevel; + + + int32_t mEmptyLines; // Will be the number of empty lines before + // the current. 0 if we are starting a new + // line and -1 if we are in a line. + + bool mInWhitespace; + bool mPreFormattedMail; // we're dealing with special DOM + // used by Thunderbird code. + bool mStartedOutput; // we've produced at least a character + + // While handling a new tag, this variable should remind if any line break + // is due because of a closing tag. Setting it to "TRUE" while closing the tags. + // Hence opening tags are guaranteed to start with appropriate line breaks. + bool mLineBreakDue; + + bool mPreformattedBlockBoundary; + + // Whether the output should include ruby annotations. + bool mWithRubyAnnotation; + + nsString mURL; + int32_t mHeaderStrategy; /* Header strategy (pref) + 0 = no indention + 1 = indention, increased with + header level (default) + 2 = numbering and slight indention */ + int32_t mHeaderCounter[7]; /* For header-numbering: + Number of previous headers of + the same depth and in the same + section. + mHeaderCounter[1] for <h1> etc. */ + + RefPtr<mozilla::dom::Element> mElement; + + // For handling table rows + AutoTArray<bool, 8> mHasWrittenCellsForRow; + + // Values gotten in OpenContainer that is (also) needed in CloseContainer + AutoTArray<bool, 8> mIsInCiteBlockquote; + + // The output data + nsAString* mOutputString; + + // The tag stack: the stack of tags we're operating on, so we can nest. + // The stack only ever points to static atoms, so they don't need to be + // refcounted. + nsIAtom** mTagStack; + uint32_t mTagStackIndex; + + // The stack indicating whether the elements we've been operating on are + // CSS preformatted elements, so that we can tell if the text inside them + // should be formatted. + std::stack<bool> mPreformatStack; + + // Content in the stack above this index should be ignored: + uint32_t mIgnoreAboveIndex; + + // The stack for ordered lists + int32_t *mOLStack; + uint32_t mOLStackIndex; + + uint32_t mULCount; + + nsString mLineBreak; + nsCOMPtr<nsILineBreaker> mLineBreaker; + + // Conveniance constant. It would be nice to have it as a const static + // variable, but that causes issues with OpenBSD and module unloading. + const nsString kSpace; + + // If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child + // nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored. + // mIgnoredChildNodeLevel is used to tell if current node is an ignorable + // child node. The initial value of mIgnoredChildNodeLevel is 0. When + // serializer enters those specific nodes, mIgnoredChildNodeLevel increases + // and is greater than 0. Otherwise when serializer leaves those nodes, + // mIgnoredChildNodeLevel decreases. + uint32_t mIgnoredChildNodeLevel; +}; + +nsresult +NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer); + +#endif |