1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* nsIContentSerializer implementation that can be used with an
* nsIDocumentEncoder to convert a DOM into plaintext in a nice way
* (eg for copy/paste as plaintext).
*/
#ifndef nsPlainTextSerializer_h__
#define nsPlainTextSerializer_h__
#include "mozilla/Attributes.h"
#include "nsCOMPtr.h"
#include "nsIAtom.h"
#include "nsIContentSerializer.h"
#include "nsIDocumentEncoder.h"
#include "nsILineBreaker.h"
#include "nsString.h"
#include "nsTArray.h"
#include <stack>
class nsIContent;
namespace mozilla {
namespace dom {
class Element;
} // namespace dom
} // namespace mozilla
class nsPlainTextSerializer final : public nsIContentSerializer
{
public:
nsPlainTextSerializer();
NS_DECL_ISUPPORTS
// nsIContentSerializer
NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
const char* aCharSet, bool aIsCopying,
bool aIsWholeDocument) override;
NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
int32_t aEndOffset, nsAString& aStr) override;
NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection,
int32_t aStartOffset, int32_t aEndOffset,
nsAString& aStr) override;
NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI,
int32_t aStartOffset,
int32_t aEndOffset,
nsAString& aStr) override { return NS_OK; }
NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset,
int32_t aEndOffset, nsAString& aStr) override { return NS_OK; }
NS_IMETHOD AppendDoctype(nsIContent *aDoctype,
nsAString& aStr) override { return NS_OK; }
NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
mozilla::dom::Element* aOriginalElement,
nsAString& aStr) override;
NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
nsAString& aStr) override;
NS_IMETHOD Flush(nsAString& aStr) override;
NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument,
nsAString& aStr) override;
private:
~nsPlainTextSerializer();
nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet);
void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
void EndLine(bool softlinebreak, bool aBreakBySpace = false);
void EnsureVerticalSpace(int32_t noOfRows);
void FlushLine();
void OutputQuotesAndIndent(bool stripTrailingSpaces=false);
void Output(nsString& aString);
void Write(const nsAString& aString);
bool IsInPre();
bool IsInOL();
bool IsCurrentNodeConverted();
bool MustSuppressLeaf();
/**
* Returns the local name of the element as an atom if the element is an
* HTML element and the atom is a static atom. Otherwise, nullptr is returned.
*/
static nsIAtom* GetIdForContent(nsIContent* aContent);
nsresult DoOpenContainer(nsIAtom* aTag);
nsresult DoCloseContainer(nsIAtom* aTag);
nsresult DoAddLeaf(nsIAtom* aTag);
void DoAddText(bool aIsWhitespace, const nsAString& aText);
// Inlined functions
inline bool MayWrap()
{
return mWrapColumn &&
((mFlags & nsIDocumentEncoder::OutputFormatted) ||
(mFlags & nsIDocumentEncoder::OutputWrap));
}
inline bool MayBreakLines()
{
return !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking);
}
inline bool DoOutput()
{
return mHeadLevel == 0;
}
inline bool IsQuotedLine(const nsAString& aLine)
{
return !aLine.IsEmpty() && aLine.First() == char16_t('>');
}
// Stack handling functions
bool GetLastBool(const nsTArray<bool>& aStack);
void SetLastBool(nsTArray<bool>& aStack, bool aValue);
void PushBool(nsTArray<bool>& aStack, bool aValue);
bool PopBool(nsTArray<bool>& aStack);
bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag);
bool IsIgnorableRubyAnnotation(nsIAtom* aTag);
bool IsElementPreformatted(mozilla::dom::Element* aElement);
bool IsElementBlock(mozilla::dom::Element* aElement);
private:
nsString mCurrentLine;
uint32_t mHeadLevel;
bool mAtFirstColumn;
bool mStructs; // Output structs (pref)
// If we've just written out a cite blockquote, we need to remember it
// so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
// old messages).
bool mHasWrittenCiteBlockquote;
int32_t mIndent;
// mInIndentString keeps a header that has to be written in the indent.
// That could be, for instance, the bullet in a bulleted list.
nsString mInIndentString;
int32_t mCiteQuoteLevel;
int32_t mFlags;
int32_t mFloatingLines; // To store the number of lazy line breaks
// The wrap column is how many standard sized chars (western languages)
// should be allowed on a line. There could be less chars if the chars
// are wider than latin chars of more if the chars are more narrow.
uint32_t mWrapColumn;
// The width of the line as it will appear on the screen (approx.)
uint32_t mCurrentLineWidth;
// Treat quoted text as though it's preformatted -- don't wrap it.
// Having it on a pref is a temporary measure, See bug 69638.
int32_t mSpanLevel;
int32_t mEmptyLines; // Will be the number of empty lines before
// the current. 0 if we are starting a new
// line and -1 if we are in a line.
bool mInWhitespace;
bool mPreFormattedMail; // we're dealing with special DOM
// used by Thunderbird code.
bool mStartedOutput; // we've produced at least a character
// While handling a new tag, this variable should remind if any line break
// is due because of a closing tag. Setting it to "TRUE" while closing the tags.
// Hence opening tags are guaranteed to start with appropriate line breaks.
bool mLineBreakDue;
bool mPreformattedBlockBoundary;
// Whether the output should include ruby annotations.
bool mWithRubyAnnotation;
nsString mURL;
int32_t mHeaderStrategy; /* Header strategy (pref)
0 = no indention
1 = indention, increased with
header level (default)
2 = numbering and slight indention */
int32_t mHeaderCounter[7]; /* For header-numbering:
Number of previous headers of
the same depth and in the same
section.
mHeaderCounter[1] for <h1> etc. */
RefPtr<mozilla::dom::Element> mElement;
// For handling table rows
AutoTArray<bool, 8> mHasWrittenCellsForRow;
// Values gotten in OpenContainer that is (also) needed in CloseContainer
AutoTArray<bool, 8> mIsInCiteBlockquote;
// The output data
nsAString* mOutputString;
// The tag stack: the stack of tags we're operating on, so we can nest.
// The stack only ever points to static atoms, so they don't need to be
// refcounted.
nsIAtom** mTagStack;
uint32_t mTagStackIndex;
// The stack indicating whether the elements we've been operating on are
// CSS preformatted elements, so that we can tell if the text inside them
// should be formatted.
std::stack<bool> mPreformatStack;
// Content in the stack above this index should be ignored:
uint32_t mIgnoreAboveIndex;
// The stack for ordered lists
int32_t *mOLStack;
uint32_t mOLStackIndex;
uint32_t mULCount;
nsString mLineBreak;
nsCOMPtr<nsILineBreaker> mLineBreaker;
// Conveniance constant. It would be nice to have it as a const static
// variable, but that causes issues with OpenBSD and module unloading.
const nsString kSpace;
// If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child
// nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored.
// mIgnoredChildNodeLevel is used to tell if current node is an ignorable
// child node. The initial value of mIgnoredChildNodeLevel is 0. When
// serializer enters those specific nodes, mIgnoredChildNodeLevel increases
// and is greater than 0. Otherwise when serializer leaves those nodes,
// mIgnoredChildNodeLevel decreases.
uint32_t mIgnoredChildNodeLevel;
};
nsresult
NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
#endif
|