netwerk/streamconv/converters/mozTXTToHTMLConv.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/**
  Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct.
*/

#ifndef _mozTXTToHTMLConv_h__
#define _mozTXTToHTMLConv_h__

#include "mozITXTToHTMLConv.h"
#include "nsString.h"
#include "nsCOMPtr.h"

class nsIIOService;

class mozTXTToHTMLConv : public mozITXTToHTMLConv
{

  virtual ~mozTXTToHTMLConv();

//////////////////////////////////////////////////////////
public:
//////////////////////////////////////////////////////////

  mozTXTToHTMLConv();
  NS_DECL_ISUPPORTS

  NS_DECL_MOZITXTTOHTMLCONV
  NS_DECL_NSIREQUESTOBSERVER
  NS_DECL_NSISTREAMLISTENER
  NS_DECL_NSISTREAMCONVERTER

/**
  see mozITXTToHTMLConv::ScanTXT
 */
  void ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString);

/**
  see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially...
 */
  void ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString);

/**
  see mozITXTToHTMLConv::CiteLevelTXT
 */
  int32_t CiteLevelTXT(const char16_t * line,uint32_t& logLineStart);


//////////////////////////////////////////////////////////
protected:
//////////////////////////////////////////////////////////
  nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service...
/**
  Completes<ul>
  <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
  <li>Case 2: http:   "www.mozilla.org"     -> "http://www.mozilla.org"
  <li>Case 3: ftp:    "ftp.mozilla.org"     -> "ftp://www.mozilla.org"
  </ul>
  It does no check, if the resulting URL is valid.
  @param text (in): abbreviated URL
  @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
  @return Completed URL at success and empty string at failure
 */
  void CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength, 
                              const uint32_t pos, nsString& aOutString);


//////////////////////////////////////////////////////////
private:
//////////////////////////////////////////////////////////

  enum LIMTYPE
  {
    LT_IGNORE,     // limitation not checked
    LT_DELIMITER,  // not alphanumeric and not rep[0]. End of text is also ok.
    LT_ALPHA,      // alpha char
    LT_DIGIT
  };

/**
  @param text (in): the string to search through.<p>
         If before = IGNORE,<br>
           rep is compared starting at 1. char of text (text[0]),<br>
           else starting at 2. char of text (text[1]).
         Chars after "after"-delimiter are ignored.
  @param rep (in): the string to look for
  @param aRepLen (in): the number of bytes in the string to look for
  @param before (in): limitation before rep
  @param after (in): limitation after rep
  @return true, if rep is found and limitation spec is met or rep is empty
*/
  bool ItMatchesDelimited(const char16_t * aInString, int32_t aInLength,
      const char16_t * rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);

/**
  @param see ItMatchesDelimited
  @return Number of ItMatchesDelimited in text
*/
  uint32_t NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
      const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after);

/**
  Currently only changes "<", ">" and "&". All others stay as they are.<p>
  "Char" in function name to avoid side effects with nsString(ch)
  constructors.
  @param ch (in) 
  @param aStringToAppendto (out) - the string to append the escaped
                                   string to.
  @param inAttribute (in) - will escape quotes, too (which is
                            only needed for attribute values)
*/
  void EscapeChar(const char16_t ch, nsString& aStringToAppendto,
                  bool inAttribute);

/**
  See EscapeChar. Escapes the string in place.
*/
  void EscapeStr(nsString& aInString, bool inAttribute);

/**
  Currently only reverts "<", ">" and "&". All others stay as they are.<p>
  @param aInString (in) HTML string
  @param aStartPos (in) start index into the buffer
  @param aLength (in) length of the buffer
  @param aOutString (out) unescaped buffer
*/
  void UnescapeStr(const char16_t * aInString, int32_t aStartPos,
                   int32_t aLength, nsString& aOutString);

/**
  <em>Note</em>: I use different strategies to pass context between the
  functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
  replaceBefore/-After). It makes some sense, but is hard to understand
  (maintain) :-(.
*/

/**
  <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
  in text should be replaced by outputHTML.</p>
  <p><em>Note:</em> This function should be able to process a URL on multiple
  lines, but currently, ScanForURLs is called for every line, so it can't.</p>
  @param text (in): includes possibly a URL
  @param pos (in): position in text, where either ":", "." or "@" are found
  @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
              (not-linkified) text, i.e. usually the "whattodo" parameter.
              (Needed to calculate replaceBefore.) NOT what will be done with
              the content of the link.
  @param outputHTML (out): URL with HTML-a tag
  @param replaceBefore (out): Number of chars of URL before pos
  @param replaceAfter (out): Number of chars of URL after pos
  @return URL found
*/
  bool FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
          const uint32_t whathasbeendone,
          nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter);

  enum modetype {
         unknown,
         RFC1738,          /* Check, if RFC1738, APPENDIX compliant,
                              like "<URL:http://www.mozilla.org>". */
         RFC2396E,         /* RFC2396, APPENDIX E allows anglebrackets (like
                              "<http://www.mozilla.org>") (without "URL:") or
                              quotation marks(like ""http://www.mozilla.org"").
                              Also allow email addresses without scheme,
                              e.g. "<mozilla@bucksch.org>" */
         freetext,         /* assume heading scheme
                              with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
                              (see RFC2396, Section 3.1).
                              Certain characters (see code) or any whitespace
                              (including linebreaks) end the URL.
                              Other certain (punctation) characters (see code)
                              at the end are stripped off. */
         abbreviated       /* Similar to freetext, but without scheme, e.g.
	                      "www.mozilla.org", "ftp.mozilla.org" and
                              "mozilla@bucksch.org". */
      /* RFC1738 and RFC2396E type URLs may use multiple lines,
         whitespace is stripped. Special characters like ")" stay intact.*/
  };

/**
 * @param text (in), pos (in): see FindURL
 * @param check (in): Start must be conform with this mode
 * @param start (out): Position in text, where URL (including brackets or
 *             similar) starts
 * @return |check|-conform start has been found
 */
  bool FindURLStart(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
            	               const modetype check, uint32_t& start);

/**
 * @param text (in), pos (in): see FindURL
 * @param check (in): End must be conform with this mode
 * @param start (in): see FindURLStart
 * @param end (out): Similar to |start| param of FindURLStart
 * @return |check|-conform end has been found
 */
  bool FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
           const modetype check, const uint32_t start, uint32_t& end);

/**
 * @param text (in), pos (in), whathasbeendone (in): see FindURL
 * @param check (in): Current mode
 * @param start (in), end (in): see FindURLEnd
 * @param txtURL (out): Guessed (raw) URL.
 *             Without whitespace, but not completed.
 * @param desc (out): Link as shown to the user, but already escaped.
 *             Should be placed between the <a> and </a> tags.
 * @param replaceBefore(out), replaceAfter (out): see FindURL
 */
  void CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength, 
     const uint32_t pos, const uint32_t whathasbeendone,
     const modetype check, const uint32_t start, const uint32_t end,
     nsString& txtURL, nsString& desc,
     int32_t& replaceBefore, int32_t& replaceAfter);

/**
 * @param txtURL (in), desc (in): see CalculateURLBoundaries
 * @param outputHTML (out): see FindURL
 * @return A valid URL could be found (and creation of HTML successful)
 */
  bool CheckURLAndCreateHTML(
       const nsString& txtURL, const nsString& desc, const modetype mode,
       nsString& outputHTML);

/**
  @param text (in): line of text possibly with tagTXT.<p>
              if col0 is true,
                starting with tagTXT<br>
              else
                starting one char before tagTXT
  @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
              open must be 0 then.
  @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
  @param aTagTxtLen (in): length of tagTXT.
  @param tagHTML (in): HTML-Tag to replace tagTXT with,
              without "<" and ">", e.g. "strong"
  @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
              e.g. "class=txt_star"
  @param aOutString: string to APPEND the converted html into
  @param open (in/out): Number of currently open tags of type tagHTML
  @return Conversion succeeded
*/
  bool StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
     const char16_t* tagTXT,
     int32_t aTagTxtLen, 
     const char* tagHTML, const char* attributeHTML,
     nsString& aOutputString, uint32_t& openTags);

/**
  @param text (in), col0 (in): see GlyphHit
  @param tagTXT (in): Smily, see also StructPhraseHit
  @param imageName (in): the basename of the file that contains the image for this smilie
  @param outputHTML (out): new string containing the html for the smily
  @param glyphTextLen (out): see GlyphHit
*/
  bool
         SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
         const char* tagTXT, const char* imageName,
         nsString& outputHTML, int32_t& glyphTextLen);

/**
  Checks, if we can replace some chars at the start of line with prettier HTML
  code.<p>
  If success is reported, replace the first glyphTextLen chars with outputHTML

  @param text (in): line of text possibly with Glyph.<p>
              If col0 is true,
                starting with Glyph <br><!-- (br not part of text) -->
              else
                starting one char before Glyph
  @param col0 (in): text starts at the beginning of the line (or paragraph)
  @param aOutString (out): APPENDS html for the glyph to this string
  @param glyphTextLen (out): Length of original text to replace
  @return see StructPhraseHit
*/
  bool GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
       nsString& aOutString, int32_t& glyphTextLen);

/**
  Check if a given url should be linkified.
  @param aURL (in): url to be checked on.
*/
  bool ShouldLinkify(const nsCString& aURL);
};

// It's said, that Win32 and Mac don't like static const members
const int32_t mozTXTToHTMLConv_lastMode = 4;
	                        // Needed (only) by mozTXTToHTMLConv::FindURL
const int32_t mozTXTToHTMLConv_numberOfModes = 4;  // dito; unknown not counted

#endif