intl/icu/source/i18n/unicode/sortkey.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340

// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *****************************************************************************
 * Copyright (C) 1996-2014, International Business Machines Corporation and others.
 * All Rights Reserved.
 *****************************************************************************
 *
 * File sortkey.h
 *
 * Created by: Helena Shih
 *
 * Modification History:
 *
 *  Date         Name          Description
 *
 *  6/20/97     helena      Java class name change.
 *  8/18/97     helena      Added internal API documentation.
 *  6/26/98     erm         Changed to use byte arrays and memcmp.
 *****************************************************************************
 */

#ifndef SORTKEY_H
#define SORTKEY_H

#include "unicode/utypes.h"

/**
 * \file 
 * \brief C++ API: Keys for comparing strings multiple times. 
 */
 
#if !UCONFIG_NO_COLLATION

#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "unicode/coll.h"

U_NAMESPACE_BEGIN

/* forward declaration */
class RuleBasedCollator;
class CollationKeyByteSink;

/**
 *
 * Collation keys are generated by the Collator class.  Use the CollationKey objects
 * instead of Collator to compare strings multiple times.  A CollationKey
 * preprocesses the comparison information from the Collator object to
 * make the comparison faster.  If you are not going to comparing strings
 * multiple times, then using the Collator object is generally faster,
 * since it only processes as much of the string as needed to make a
 * comparison.
 * <p> For example (with strength == tertiary)
 * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator
 * only needs to process a couple of characters, while a comparison
 * with CollationKeys will process all of the characters.  On the other hand,
 * if you are doing a sort of a number of fields, it is much faster to use
 * CollationKeys, since you will be comparing strings multiple times.
 * <p>Typical use of CollationKeys are in databases, where you store a CollationKey
 * in a hidden field, and use it for sorting or indexing.
 *
 * <p>Example of use:
 * <pre>
 * \code
 *     UErrorCode success = U_ZERO_ERROR;
 *     Collator* myCollator = Collator::createInstance(success);
 *     CollationKey* keys = new CollationKey [3];
 *     myCollator->getCollationKey("Tom", keys[0], success );
 *     myCollator->getCollationKey("Dick", keys[1], success );
 *     myCollator->getCollationKey("Harry", keys[2], success );
 *
 *     // Inside body of sort routine, compare keys this way:
 *     CollationKey tmp;
 *     if(keys[0].compareTo( keys[1] ) > 0 ) {
 *         tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp;
 *     }
 *     //...
 * \endcode
 * </pre>
 * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort
 * long lists of words by retrieving collation keys with Collator::getCollationKey().
 * You can then cache the collation keys and compare them using CollationKey::compareTo().
 * <p>
 * <strong>Note:</strong> <code>Collator</code>s with different Locale,
 * CollationStrength and DecompositionMode settings will return different
 * CollationKeys for the same set of strings. Locales have specific
 * collation rules, and the way in which secondary and tertiary differences
 * are taken into account, for example, will result in different CollationKeys
 * for same strings.
 * <p>

 * @see          Collator
 * @see          RuleBasedCollator
 * @version      1.3 12/18/96
 * @author       Helena Shih
 * @stable ICU 2.0
 */
class U_I18N_API CollationKey : public UObject {
public:
    /**
    * This creates an empty collation key based on the null string.  An empty
    * collation key contains no sorting information.  When comparing two empty
    * collation keys, the result is Collator::EQUAL.  Comparing empty collation key
    * with non-empty collation key is always Collator::LESS.
    * @stable ICU 2.0
    */
    CollationKey();


    /**
    * Creates a collation key based on the collation key values.
    * @param values the collation key values
    * @param count number of collation key values, including trailing nulls.
    * @stable ICU 2.0
    */
    CollationKey(const  uint8_t*    values,
                int32_t     count);

    /**
    * Copy constructor.
    * @param other    the object to be copied.
    * @stable ICU 2.0
    */
    CollationKey(const CollationKey& other);

    /**
    * Sort key destructor.
    * @stable ICU 2.0
    */
    virtual ~CollationKey();

    /**
    * Assignment operator
    * @param other    the object to be copied.
    * @stable ICU 2.0
    */
    const   CollationKey&   operator=(const CollationKey& other);

    /**
    * Compare if two collation keys are the same.
    * @param source the collation key to compare to.
    * @return Returns true if two collation keys are equal, false otherwise.
    * @stable ICU 2.0
    */
    UBool                   operator==(const CollationKey& source) const;

    /**
    * Compare if two collation keys are not the same.
    * @param source the collation key to compare to.
    * @return Returns TRUE if two collation keys are different, FALSE otherwise.
    * @stable ICU 2.0
    */
    UBool                   operator!=(const CollationKey& source) const;


    /**
    * Test to see if the key is in an invalid state. The key will be in an
    * invalid state if it couldn't allocate memory for some operation.
    * @return Returns TRUE if the key is in an invalid, FALSE otherwise.
    * @stable ICU 2.0
    */
    UBool                   isBogus(void) const;

    /**
    * Returns a pointer to the collation key values. The storage is owned
    * by the collation key and the pointer will become invalid if the key
    * is deleted.
    * @param count the output parameter of number of collation key values,
    * including any trailing nulls.
    * @return a pointer to the collation key values.
    * @stable ICU 2.0
    */
    const    uint8_t*       getByteArray(int32_t& count) const;

#ifdef U_USE_COLLATION_KEY_DEPRECATES
    /**
    * Extracts the collation key values into a new array. The caller owns
    * this storage and should free it.
    * @param count the output parameter of number of collation key values,
    * including any trailing nulls.
    * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release.
    */
    uint8_t*                toByteArray(int32_t& count) const;
#endif

#ifndef U_HIDE_DEPRECATED_API 
    /**
    * Convenience method which does a string(bit-wise) comparison of the
    * two collation keys.
    * @param target target collation key to be compared with
    * @return Returns Collator::LESS if sourceKey &lt; targetKey,
    * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL
    * otherwise.
    * @deprecated ICU 2.6 use the overload with error code
    */
    Collator::EComparisonResult compareTo(const CollationKey& target) const;
#endif  /* U_HIDE_DEPRECATED_API */

    /**
    * Convenience method which does a string(bit-wise) comparison of the
    * two collation keys.
    * @param target target collation key to be compared with
    * @param status error code
    * @return Returns UCOL_LESS if sourceKey &lt; targetKey,
    * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL
    * otherwise.
    * @stable ICU 2.6
    */
    UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const;

    /**
    * Creates an integer that is unique to the collation key.  NOTE: this
    * is not the same as String.hashCode.
    * <p>Example of use:
    * <pre>
    * .    UErrorCode status = U_ZERO_ERROR;
    * .    Collator *myCollation = Collator::createInstance(Locale::US, status);
    * .    if (U_FAILURE(status)) return;
    * .    CollationKey key1, key2;
    * .    UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
    * .    myCollation->getCollationKey("abc", key1, status1);
    * .    if (U_FAILURE(status1)) { delete myCollation; return; }
    * .    myCollation->getCollationKey("ABC", key2, status2);
    * .    if (U_FAILURE(status2)) { delete myCollation; return; }
    * .    // key1.hashCode() != key2.hashCode()
    * </pre>
    * @return the hash value based on the string's collation order.
    * @see UnicodeString#hashCode
    * @stable ICU 2.0
    */
    int32_t                 hashCode(void) const;

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     * @stable ICU 2.2
     */
    virtual UClassID getDynamicClassID() const;

    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     * @stable ICU 2.2
     */
    static UClassID U_EXPORT2 getStaticClassID();

private:
    /**
     * Replaces the current bytes buffer with a new one of newCapacity
     * and copies length bytes from the old buffer to the new one.
     * @return the new buffer, or NULL if the allocation failed
     */
    uint8_t *reallocate(int32_t newCapacity, int32_t length);
    /**
     * Set a new length for a new sort key in the existing fBytes.
     */
    void setLength(int32_t newLength);

    uint8_t *getBytes() {
        return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
    }
    const uint8_t *getBytes() const {
        return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
    }
    int32_t getCapacity() const {
        return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity;
    }
    int32_t getLength() const { return fFlagAndLength & 0x7fffffff; }

    /**
    * Set the CollationKey to a "bogus" or invalid state
    * @return this CollationKey
    */
    CollationKey&           setToBogus(void);
    /**
    * Resets this CollationKey to an empty state
    * @return this CollationKey
    */
    CollationKey&           reset(void);

    /**
    * Allow private access to RuleBasedCollator
    */
    friend  class           RuleBasedCollator;
    friend  class           CollationKeyByteSink;

    // Class fields. sizeof(CollationKey) is intended to be 48 bytes
    // on a machine with 64-bit pointers.
    // We use a union to maximize the size of the internal buffer,
    // similar to UnicodeString but not as tight and complex.

    // (implicit) *vtable;
    /**
     * Sort key length and flag.
     * Bit 31 is set if the buffer is heap-allocated.
     * Bits 30..0 contain the sort key length.
     */
    int32_t fFlagAndLength;
    /**
    * Unique hash value of this CollationKey.
    * Special value 2 if the key is bogus.
    */
    mutable int32_t fHashCode;
    /**
     * fUnion provides 32 bytes for the internal buffer or for
     * pointer+capacity.
     */
    union StackBufferOrFields {
        /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */
        uint8_t fStackBuffer[32];
        struct {
            uint8_t *fBytes;
            int32_t fCapacity;
        } fFields;
    } fUnion;
};

inline UBool
CollationKey::operator!=(const CollationKey& other) const
{
    return !(*this == other);
}

inline UBool
CollationKey::isBogus() const
{
    return fHashCode == 2;  // kBogusHashCode
}

inline const uint8_t*
CollationKey::getByteArray(int32_t &count) const
{
    count = getLength();
    return getBytes();
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_COLLATION */

#endif