summaryrefslogtreecommitdiffstats
path: root/intl/unicharutil/util/nsUnicodeProperties.h
blob: ba5526a4da6b82478a25a247cb88428d5cc339e8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* vim:set ts=4 sw=4 sts=4 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef NS_UNICODEPROPERTIES_H
#define NS_UNICODEPROPERTIES_H

#include "nsBidiUtils.h"
#include "nsIUGenCategory.h"
#include "nsUnicodeScriptCodes.h"
#include "harfbuzz/hb.h"

#if ENABLE_INTL_API
#include "unicode/uchar.h"
#include "unicode/uscript.h"
#endif

const nsCharProps2& GetCharProps2(uint32_t aCh);

namespace mozilla {

namespace unicode {

extern const nsIUGenCategory::nsUGenCategory sDetailedToGeneralCategory[];

/* This MUST match the values assigned by genUnicodePropertyData.pl! */
enum VerticalOrientation {
  VERTICAL_ORIENTATION_U  = 0,
  VERTICAL_ORIENTATION_R  = 1,
  VERTICAL_ORIENTATION_Tu = 2,
  VERTICAL_ORIENTATION_Tr = 3
};

/* This MUST match the values assigned by genUnicodePropertyData.pl! */
enum PairedBracketType {
  PAIRED_BRACKET_TYPE_NONE = 0,
  PAIRED_BRACKET_TYPE_OPEN = 1,
  PAIRED_BRACKET_TYPE_CLOSE = 2
};

enum XidmodType {
  XIDMOD_RECOMMENDED,
  XIDMOD_INCLUSION,
  XIDMOD_UNCOMMON_USE,
  XIDMOD_TECHNICAL,
  XIDMOD_OBSOLETE,
  XIDMOD_ASPIRATIONAL,
  XIDMOD_LIMITED_USE,
  XIDMOD_EXCLUSION,
  XIDMOD_NOT_XID,
  XIDMOD_NOT_NFKC,
  XIDMOD_DEFAULT_IGNORABLE,
  XIDMOD_DEPRECATED,
  XIDMOD_NOT_CHARS
};

#if ENABLE_INTL_API // ICU is available, so simply forward to its API

extern const hb_unicode_general_category_t sICUtoHBcategory[];

inline uint32_t
GetMirroredChar(uint32_t aCh)
{
  return u_charMirror(aCh);
}

inline bool
HasMirroredChar(uint32_t aCh)
{
  return u_isMirrored(aCh);
}

inline uint8_t
GetCombiningClass(uint32_t aCh)
{
  return u_getCombiningClass(aCh);
}

inline uint8_t
GetGeneralCategory(uint32_t aCh)
{
  return sICUtoHBcategory[u_charType(aCh)];
}

inline nsCharType
GetBidiCat(uint32_t aCh)
{
  return nsCharType(u_charDirection(aCh));
}

inline int8_t
GetNumericValue(uint32_t aCh)
{
  UNumericType type =
    UNumericType(u_getIntPropertyValue(aCh, UCHAR_NUMERIC_TYPE));
  return type == U_NT_DECIMAL || type == U_NT_DIGIT
         ? int8_t(u_getNumericValue(aCh)) : -1;
}

inline uint8_t
GetLineBreakClass(uint32_t aCh)
{
  return u_getIntPropertyValue(aCh, UCHAR_LINE_BREAK);
}

inline Script
GetScriptCode(uint32_t aCh)
{
  UErrorCode err = U_ZERO_ERROR;
  return Script(uscript_getScript(aCh, &err));
}

inline uint32_t
GetScriptTagForCode(Script aScriptCode)
{
  const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
  return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
}

inline PairedBracketType
GetPairedBracketType(uint32_t aCh)
{
  return PairedBracketType
           (u_getIntPropertyValue(aCh, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
}

inline uint32_t
GetPairedBracket(uint32_t aCh)
{
  return u_getBidiPairedBracket(aCh);
}

inline uint32_t
GetUppercase(uint32_t aCh)
{
  return u_toupper(aCh);
}

inline uint32_t
GetLowercase(uint32_t aCh)
{
  return u_tolower(aCh);
}

inline uint32_t
GetTitlecaseForLower(uint32_t aCh) // maps LC to titlecase, UC unchanged
{
  return u_isULowercase(aCh) ? u_totitle(aCh) : aCh;
}

inline uint32_t
GetTitlecaseForAll(uint32_t aCh) // maps both UC and LC to titlecase
{
  return u_totitle(aCh);
}

inline bool
IsEastAsianWidthFWH(uint32_t aCh)
{
  switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
    case U_EA_FULLWIDTH:
    case U_EA_WIDE:
    case U_EA_HALFWIDTH:
      return true;
    case U_EA_AMBIGUOUS:
    case U_EA_NARROW:
    case U_EA_NEUTRAL:
      return false;
  }
  return false;
}

#else // not ENABLE_INTL_API

// Return whether the char has a mirrored-pair counterpart.
uint32_t GetMirroredChar(uint32_t aCh);

bool HasMirroredChar(uint32_t aChr);

uint8_t GetCombiningClass(uint32_t aCh);

// returns the detailed General Category in terms of HB_UNICODE_* values
uint8_t GetGeneralCategory(uint32_t aCh);

nsCharType GetBidiCat(uint32_t aCh);

uint8_t GetLineBreakClass(uint32_t aCh);

Script GetScriptCode(uint32_t aCh);

uint32_t GetScriptTagForCode(Script aScriptCode);

PairedBracketType GetPairedBracketType(uint32_t aCh);
uint32_t GetPairedBracket(uint32_t aCh);

/**
 * Return the numeric value of the character. The value returned is the value
 * of the Numeric_Value in field 7 of the UCD, or -1 if field 7 is empty.
 * To restrict to decimal digits, the caller should also check whether
 * GetGeneralCategory returns HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER
 */
int8_t GetNumericValue(uint32_t aCh);

uint32_t GetUppercase(uint32_t aCh);
uint32_t GetLowercase(uint32_t aCh);
uint32_t GetTitlecaseForLower(uint32_t aCh); // maps LC to titlecase, UC unchanged
uint32_t GetTitlecaseForAll(uint32_t aCh); // maps both UC and LC to titlecase

// Return whether the char has EastAsianWidth class F or W or H.
bool IsEastAsianWidthFWH(uint32_t aCh);

#endif // !ENABLE_INTL_API

// returns the simplified Gen Category as defined in nsIUGenCategory
inline nsIUGenCategory::nsUGenCategory GetGenCategory(uint32_t aCh) {
  return sDetailedToGeneralCategory[GetGeneralCategory(aCh)];
}

inline VerticalOrientation GetVerticalOrientation(uint32_t aCh) {
  return VerticalOrientation(GetCharProps2(aCh).mVertOrient);
}

inline XidmodType GetIdentifierModification(uint32_t aCh) {
  return XidmodType(GetCharProps2(aCh).mXidmod);
}

uint32_t GetFullWidth(uint32_t aCh);
// This is the reverse function of GetFullWidth which guarantees that
// for every codepoint c, GetFullWidthInverse(GetFullWidth(c)) == c.
// Note that, this function does not guarantee to convert all wide
// form characters to their possible narrow form.
uint32_t GetFullWidthInverse(uint32_t aCh);

bool IsClusterExtender(uint32_t aCh, uint8_t aCategory);

inline bool IsClusterExtender(uint32_t aCh) {
  return IsClusterExtender(aCh, GetGeneralCategory(aCh));
}

// A simple iterator for a string of char16_t codepoints that advances
// by Unicode grapheme clusters
class ClusterIterator
{
public:
    ClusterIterator(const char16_t* aText, uint32_t aLength)
        : mPos(aText), mLimit(aText + aLength)
#ifdef DEBUG
        , mText(aText)
#endif
    { }

    operator const char16_t* () const {
        return mPos;
    }

    bool AtEnd() const {
        return mPos >= mLimit;
    }

    void Next();

private:
    const char16_t* mPos;
    const char16_t* mLimit;
#ifdef DEBUG
    const char16_t* mText;
#endif
};

// Count the number of grapheme clusters in the given string
uint32_t CountGraphemeClusters(const char16_t* aText, uint32_t aLength);

// A simple reverse iterator for a string of char16_t codepoints that
// advances by Unicode grapheme clusters
class ClusterReverseIterator
{
public:
    ClusterReverseIterator(const char16_t* aText, uint32_t aLength)
        : mPos(aText + aLength), mLimit(aText)
    { }

    operator const char16_t* () const {
        return mPos;
    }

    bool AtEnd() const {
        return mPos <= mLimit;
    }

    void Next();

private:
    const char16_t* mPos;
    const char16_t* mLimit;
};

} // end namespace unicode

} // end namespace mozilla

#endif /* NS_UNICODEPROPERTIES_H */