summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/unicode/ucnvsel.h
blob: 3eed081a37b4bd3e6d27f1ca72e54a8f58a9840e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
*   Copyright (C) 2008-2011, International Business Machines
*   Corporation, Google and others.  All Rights Reserved.
*
*******************************************************************************
*/
/*
 * Author : eldawy@google.com (Mohamed Eldawy)
 * ucnvsel.h
 *
 * Purpose: To generate a list of encodings capable of handling
 * a given Unicode text
 *
 * Started 09-April-2008
 */

#ifndef __ICU_UCNV_SEL_H__
#define __ICU_UCNV_SEL_H__

#include "unicode/utypes.h"

#if !UCONFIG_NO_CONVERSION

#include "unicode/uset.h"
#include "unicode/utf16.h"
#include "unicode/uenum.h"
#include "unicode/ucnv.h"
#include "unicode/localpointer.h"

/**
 * \file
 *
 * A converter selector is built with a set of encoding/charset names
 * and given an input string returns the set of names of the
 * corresponding converters which can convert the string.
 *
 * A converter selector can be serialized into a buffer and reopened
 * from the serialized form.
 */

/**
 * @{
 * The selector data structure
 */
struct UConverterSelector;
typedef struct UConverterSelector UConverterSelector;
/** @} */

/**
 * Open a selector.
 * If converterListSize is 0, build for all available converters.
 * If excludedCodePoints is NULL, don't exclude any code points.
 *
 * @param converterList a pointer to encoding names needed to be involved. 
 *                      Can be NULL if converterListSize==0.
 *                      The list and the names will be cloned, and the caller
 *                      retains ownership of the original.
 * @param converterListSize number of encodings in above list.
 *                          If 0, builds a selector for all available converters.
 * @param excludedCodePoints a set of code points to be excluded from consideration.
 *                           That is, excluded code points in a string do not change
 *                           the selection result. (They might be handled by a callback.)
 *                           Use NULL to exclude nothing.
 * @param whichSet what converter set to use? Use this to determine whether
 *                 to consider only roundtrip mappings or also fallbacks.
 * @param status an in/out ICU UErrorCode
 * @return the new selector
 *
 * @stable ICU 4.2
 */
U_STABLE UConverterSelector* U_EXPORT2
ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
             const USet* excludedCodePoints,
             const UConverterUnicodeSet whichSet, UErrorCode* status);

/**
 * Closes a selector.
 * If any Enumerations were returned by ucnv_select*, they become invalid.
 * They can be closed before or after calling ucnv_closeSelector,
 * but should never be used after the selector is closed.
 *
 * @see ucnv_selectForString
 * @see ucnv_selectForUTF8
 *
 * @param sel selector to close
 *
 * @stable ICU 4.2
 */
U_STABLE void U_EXPORT2
ucnvsel_close(UConverterSelector *sel);

#if U_SHOW_CPLUSPLUS_API

U_NAMESPACE_BEGIN

/**
 * \class LocalUConverterSelectorPointer
 * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
 * For most methods see the LocalPointerBase base class.
 *
 * @see LocalPointerBase
 * @see LocalPointer
 * @stable ICU 4.4
 */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);

U_NAMESPACE_END

#endif

/**
 * Open a selector from its serialized form.
 * The buffer must remain valid and unchanged for the lifetime of the selector.
 * This is much faster than creating a selector from scratch.
 * Using a serialized form from a different machine (endianness/charset) is supported.
 *
 * @param buffer pointer to the serialized form of a converter selector;
 *               must be 32-bit-aligned
 * @param length the capacity of this buffer (can be equal to or larger than
 *               the actual data length)
 * @param status an in/out ICU UErrorCode
 * @return the new selector
 *
 * @stable ICU 4.2
 */
U_STABLE UConverterSelector* U_EXPORT2
ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);

/**
 * Serialize a selector into a linear buffer.
 * The serialized form is portable to different machines.
 *
 * @param sel selector to consider
 * @param buffer pointer to 32-bit-aligned memory to be filled with the
 *               serialized form of this converter selector
 * @param bufferCapacity the capacity of this buffer
 * @param status an in/out ICU UErrorCode
 * @return the required buffer capacity to hold serialize data (even if the call fails
 *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
 *
 * @stable ICU 4.2
 */
U_STABLE int32_t U_EXPORT2
ucnvsel_serialize(const UConverterSelector* sel,
                  void* buffer, int32_t bufferCapacity, UErrorCode* status);

/**
 * Select converters that can map all characters in a UTF-16 string,
 * ignoring the excluded code points.
 *
 * @param sel a selector
 * @param s UTF-16 string
 * @param length length of the string, or -1 if NUL-terminated
 * @param status an in/out ICU UErrorCode
 * @return an enumeration containing encoding names.
 *         The returned encoding names and their order will be the same as
 *         supplied when building the selector.
 *
 * @stable ICU 4.2
 */
U_STABLE UEnumeration * U_EXPORT2
ucnvsel_selectForString(const UConverterSelector* sel,
                        const UChar *s, int32_t length, UErrorCode *status);

/**
 * Select converters that can map all characters in a UTF-8 string,
 * ignoring the excluded code points.
 *
 * @param sel a selector
 * @param s UTF-8 string
 * @param length length of the string, or -1 if NUL-terminated
 * @param status an in/out ICU UErrorCode
 * @return an enumeration containing encoding names.
 *         The returned encoding names and their order will be the same as
 *         supplied when building the selector.
 *
 * @stable ICU 4.2
 */
U_STABLE UEnumeration * U_EXPORT2
ucnvsel_selectForUTF8(const UConverterSelector* sel,
                      const char *s, int32_t length, UErrorCode *status);

#endif  /* !UCONFIG_NO_CONVERSION */

#endif  /* __ICU_UCNV_SEL_H__ */