summaryrefslogtreecommitdiffstats
path: root/intl/uconv/nsIUnicodeDecoder.h
blob: 36a46759b5b9b77c7973802f8077db41571959a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsIUnicodeDecoder_h___
#define nsIUnicodeDecoder_h___

#include "nscore.h"
#include "nsISupports.h"

// Interface ID for our Unicode Decoder interface
// {25359602-FC70-4d13-A9AB-8086D3827C0D}
//NS_DECLARE_ID(kIUnicodeDecoderIID,
//  0x25359602, 0xfc70, 0x4d13, 0xa9, 0xab, 0x80, 0x86, 0xd3, 0x82, 0x7c, 0xd);

#define NS_IUNICODEDECODER_IID	\
	{ 0x25359602, 0xfc70, 0x4d13,	\
		{ 0xa9, 0xab, 0x80, 0x86, 0xd3, 0x82, 0x7c, 0xd }}


#define NS_UNICODEDECODER_CONTRACTID_BASE "@mozilla.org/intl/unicode/decoder;1?charset="

/**
 * Interface for a Converter from a Charset into Unicode.
 *
 * @created         23/Nov/1998
 * @author  Catalin Rotaru [CATA]
 */
class nsIUnicodeDecoder : public nsISupports
{
public:
  NS_DECLARE_STATIC_IID_ACCESSOR(NS_IUNICODEDECODER_IID)

  enum {
    kOnError_Recover,       // on an error, recover and continue
    kOnError_Signal         // on an error, stop and signal
  };

  /**
   * Converts the data from one Charset to Unicode.
   *
   * About the byte ordering:
   * - For input, if the converter cares (that depends of the charset, for 
   * example a singlebyte will ignore the byte ordering) it should assume 
   * network order. If necessary and requested, we can add a method 
   * SetInputByteOrder() so that the reverse order can be used, too. That 
   * method would have as default the assumed network order.
   * - The output stream is Unicode, having the byte order which is internal
   * for the machine on which the converter is running on.
   *
   * Unless there is not enough output space, this method must consume all the
   * available input data! The eventual incomplete final character data will be
   * stored internally in the converter and used when the method is called 
   * again for continuing the conversion. This way, the caller will not have to
   * worry about managing incomplete input data by mergeing it with the next 
   * buffer.
   *
   * Error conditions: 
   * If the read value does not belong to this character set, one should 
   * replace it with the Unicode special 0xFFFD. When an actual input error is 
   * encountered, like a format error, the converter stop and return error.
   * However, we should keep in mind that we need to be lax in decoding. When
   * a decoding error is returned to the caller, it is the caller's
   * responsibility to advance over the bad byte (unless aSrcLength is -1 in
   * which case the caller should call the decoder with 0 offset again) and
   * reset the decoder before trying to call the decoder again.
   *
   * Converter required behavior:
   * In this order: when output space is full - return right away. When input
   * data is wrong, return input pointer right after the wrong byte. When 
   * partial input, it will be consumed and cached. All the time input pointer
   * will show how much was actually consumed and how much was actually 
   * written.
   *
   * @param aSrc        [IN] the source data buffer
   * @param aSrcLength  [IN/OUT] the length of source data buffer; after
   *                    conversion will contain the number of bytes read or
   *                    -1 on error to indicate that the caller should re-push
   *                    the same buffer after resetting the decoder
   * @param aDest       [OUT] the destination data buffer
   * @param aDestLength [IN/OUT] the length of the destination data buffer;
   *                    after conversion will contain the number of Unicode
   *                    characters written
   * @return            NS_ERROR_UDEC_ILLEGALINPUT if an illegal input sequence
   *                    was encountered and the behavior was set to "signal";
   *                    the caller must skip over one byte, reset the decoder
   *                    and retry.
   *                    NS_OK_UDEC_MOREOUTPUT if only a partial conversion
   *                    was done; more output space is needed to continue
   *                    NS_OK_UDEC_MOREINPUT if the input ended in the middle
   *                    of an input code unit sequence. If this is the last
   *                    result the caller has at the end of the stream, the
   *                    caller must append one U+FFFD to the output.
   *                    NS_OK if the input ended after a complete input code
   *                    unit sequence.
   */
  NS_IMETHOD Convert(const char * aSrc, int32_t * aSrcLength, 
      char16_t * aDest, int32_t * aDestLength) = 0;

  /**
   * Returns a quick estimation of the size of the buffer needed to hold the
   * converted data. Remember: this estimation is >= with the actual size of 
   * the buffer needed. It will be computed for the "worst case"
   *
   * @param aSrc        [IN] the source data buffer
   * @param aSrcLength  [IN] the length of source data buffer
   * @param aDestLength [OUT] the needed size of the destination buffer
   * @return            NS_EXACT_LENGTH if an exact length was computed
   *                    NS_ERROR_OUT_OF_MEMORY if OOM
   *                    NS_OK is all we have is an approximation
   */
   MOZ_MUST_USE NS_IMETHOD GetMaxLength(const char* aSrc,
                                        int32_t aSrcLength,
                                        int32_t* aDestLength) = 0;

  /**
   * Resets the charset converter so it may be recycled for a completely 
   * different and urelated buffer of data.
   */
  NS_IMETHOD Reset() = 0;

  /**
   * Specify what to do when a character cannot be mapped into unicode
   *
   * @param aBehavior [IN] the desired behavior
   * @see kOnError_Recover
   * @see kOnError_Signal
   */
  virtual void SetInputErrorBehavior(int32_t aBehavior) = 0;

  /**
   * return the UNICODE character for unmapped character
   */
  virtual char16_t GetCharacterForUnMapped() = 0;
};

NS_DEFINE_STATIC_IID_ACCESSOR(nsIUnicodeDecoder, NS_IUNICODEDECODER_IID)

#endif /* nsIUnicodeDecoder_h___ */