summaryrefslogtreecommitdiffstats
path: root/intl/unicharutil/util/GreekCasing.cpp
blob: 5c805c275662c6f8a4d198ae0611d4de019ef63d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "GreekCasing.h"
#include "nsUnicharUtils.h"

// Custom uppercase mapping for Greek; see bug 307039 for details
#define GREEK_LOWER_ALPHA                      0x03B1
#define GREEK_LOWER_ALPHA_TONOS                0x03AC
#define GREEK_LOWER_ALPHA_OXIA                 0x1F71
#define GREEK_LOWER_EPSILON                    0x03B5
#define GREEK_LOWER_EPSILON_TONOS              0x03AD
#define GREEK_LOWER_EPSILON_OXIA               0x1F73
#define GREEK_LOWER_ETA                        0x03B7
#define GREEK_LOWER_ETA_TONOS                  0x03AE
#define GREEK_LOWER_ETA_OXIA                   0x1F75
#define GREEK_LOWER_IOTA                       0x03B9
#define GREEK_LOWER_IOTA_TONOS                 0x03AF
#define GREEK_LOWER_IOTA_OXIA                  0x1F77
#define GREEK_LOWER_IOTA_DIALYTIKA             0x03CA
#define GREEK_LOWER_IOTA_DIALYTIKA_TONOS       0x0390
#define GREEK_LOWER_IOTA_DIALYTIKA_OXIA        0x1FD3
#define GREEK_LOWER_OMICRON                    0x03BF
#define GREEK_LOWER_OMICRON_TONOS              0x03CC
#define GREEK_LOWER_OMICRON_OXIA               0x1F79
#define GREEK_LOWER_UPSILON                    0x03C5
#define GREEK_LOWER_UPSILON_TONOS              0x03CD
#define GREEK_LOWER_UPSILON_OXIA               0x1F7B
#define GREEK_LOWER_UPSILON_DIALYTIKA          0x03CB
#define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS    0x03B0
#define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA     0x1FE3
#define GREEK_LOWER_OMEGA                      0x03C9
#define GREEK_LOWER_OMEGA_TONOS                0x03CE
#define GREEK_LOWER_OMEGA_OXIA                 0x1F7D
#define GREEK_UPPER_ALPHA                      0x0391
#define GREEK_UPPER_EPSILON                    0x0395
#define GREEK_UPPER_ETA                        0x0397
#define GREEK_UPPER_IOTA                       0x0399
#define GREEK_UPPER_IOTA_DIALYTIKA             0x03AA
#define GREEK_UPPER_OMICRON                    0x039F
#define GREEK_UPPER_UPSILON                    0x03A5
#define GREEK_UPPER_UPSILON_DIALYTIKA          0x03AB
#define GREEK_UPPER_OMEGA                      0x03A9
#define GREEK_UPPER_ALPHA_TONOS                0x0386
#define GREEK_UPPER_ALPHA_OXIA                 0x1FBB
#define GREEK_UPPER_EPSILON_TONOS              0x0388
#define GREEK_UPPER_EPSILON_OXIA               0x1FC9
#define GREEK_UPPER_ETA_TONOS                  0x0389
#define GREEK_UPPER_ETA_OXIA                   0x1FCB
#define GREEK_UPPER_IOTA_TONOS                 0x038A
#define GREEK_UPPER_IOTA_OXIA                  0x1FDB
#define GREEK_UPPER_OMICRON_TONOS              0x038C
#define GREEK_UPPER_OMICRON_OXIA               0x1FF9
#define GREEK_UPPER_UPSILON_TONOS              0x038E
#define GREEK_UPPER_UPSILON_OXIA               0x1FEB
#define GREEK_UPPER_OMEGA_TONOS                0x038F
#define GREEK_UPPER_OMEGA_OXIA                 0x1FFB
#define COMBINING_ACUTE_ACCENT                 0x0301
#define COMBINING_DIAERESIS                    0x0308
#define COMBINING_ACUTE_TONE_MARK              0x0341
#define COMBINING_GREEK_DIALYTIKA_TONOS        0x0344

namespace mozilla {

uint32_t
GreekCasing::UpperCase(uint32_t aCh, GreekCasing::State& aState)
{
  switch (aCh) {
  case GREEK_UPPER_ALPHA:
  case GREEK_LOWER_ALPHA:
    aState = kAlpha;
    return GREEK_UPPER_ALPHA;

  case GREEK_UPPER_EPSILON:
  case GREEK_LOWER_EPSILON:
    aState = kEpsilon;
    return GREEK_UPPER_EPSILON;

  case GREEK_UPPER_ETA:
  case GREEK_LOWER_ETA:
    aState = kEta;
    return GREEK_UPPER_ETA;

  case GREEK_UPPER_IOTA:
    aState = kIota;
    return GREEK_UPPER_IOTA;

  case GREEK_UPPER_OMICRON:
  case GREEK_LOWER_OMICRON:
    aState = kOmicron;
    return GREEK_UPPER_OMICRON;

  case GREEK_UPPER_UPSILON:
    switch (aState) {
    case kOmicron:
      aState = kOmicronUpsilon;
      break;
    default:
      aState = kUpsilon;
      break;
    }
    return GREEK_UPPER_UPSILON;

  case GREEK_UPPER_OMEGA:
  case GREEK_LOWER_OMEGA:
    aState = kOmega;
    return GREEK_UPPER_OMEGA;

  // iota and upsilon may be the second vowel of a diphthong
  case GREEK_LOWER_IOTA:
    switch (aState) {
    case kAlphaAcc:
    case kEpsilonAcc:
    case kOmicronAcc:
    case kUpsilonAcc:
      aState = kStart;
      return GREEK_UPPER_IOTA_DIALYTIKA;
    default:
      break;
    }
    aState = kIota;
    return GREEK_UPPER_IOTA;

  case GREEK_LOWER_UPSILON:
    switch (aState) {
    case kAlphaAcc:
    case kEpsilonAcc:
    case kEtaAcc:
    case kOmicronAcc:
      aState = kStart;
      return GREEK_UPPER_UPSILON_DIALYTIKA;
    case kOmicron:
      aState = kOmicronUpsilon;
      break;
    default:
      aState = kUpsilon;
      break;
    }
    return GREEK_UPPER_UPSILON;

  case GREEK_UPPER_IOTA_DIALYTIKA:
  case GREEK_LOWER_IOTA_DIALYTIKA:
  case GREEK_UPPER_UPSILON_DIALYTIKA:
  case GREEK_LOWER_UPSILON_DIALYTIKA:
  case COMBINING_DIAERESIS:
    aState = kDiaeresis;
    return ToUpperCase(aCh);

  // remove accent if it follows a vowel or diaeresis,
  // and set appropriate state for diphthong detection
  case COMBINING_ACUTE_ACCENT:
  case COMBINING_ACUTE_TONE_MARK:
    switch (aState) {
    case kAlpha:
      aState = kAlphaAcc;
      return uint32_t(-1); // omit this char from result string
    case kEpsilon:
      aState = kEpsilonAcc;
      return uint32_t(-1);
    case kEta:
      aState = kEtaAcc;
      return uint32_t(-1);
    case kIota:
      aState = kIotaAcc;
      return uint32_t(-1);
    case kOmicron:
      aState = kOmicronAcc;
      return uint32_t(-1);
    case kUpsilon:
      aState = kUpsilonAcc;
      return uint32_t(-1);
    case kOmicronUpsilon:
      aState = kStart; // this completed a diphthong
      return uint32_t(-1);
    case kOmega:
      aState = kOmegaAcc;
      return uint32_t(-1);
    case kDiaeresis:
      aState = kStart;
      return uint32_t(-1);
    default:
      break;
    }
    break;

  // combinations with dieresis+accent just strip the accent,
  // and reset to start state (don't form diphthong with following vowel)
  case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
  case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
    aState = kStart;
    return GREEK_UPPER_IOTA_DIALYTIKA;

  case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
  case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
    aState = kStart;
    return GREEK_UPPER_UPSILON_DIALYTIKA;

  case COMBINING_GREEK_DIALYTIKA_TONOS:
    aState = kStart;
    return COMBINING_DIAERESIS;

  // strip accents from vowels, and note the vowel seen so that we can detect
  // diphthongs where diaeresis needs to be added
  case GREEK_LOWER_ALPHA_TONOS:
  case GREEK_LOWER_ALPHA_OXIA:
  case GREEK_UPPER_ALPHA_TONOS:
  case GREEK_UPPER_ALPHA_OXIA:
    aState = kAlphaAcc;
    return GREEK_UPPER_ALPHA;

  case GREEK_LOWER_EPSILON_TONOS:
  case GREEK_LOWER_EPSILON_OXIA:
  case GREEK_UPPER_EPSILON_TONOS:
  case GREEK_UPPER_EPSILON_OXIA:
    aState = kEpsilonAcc;
    return GREEK_UPPER_EPSILON;

  case GREEK_LOWER_ETA_TONOS:
  case GREEK_LOWER_ETA_OXIA:
  case GREEK_UPPER_ETA_TONOS:
  case GREEK_UPPER_ETA_OXIA:
    aState = kEtaAcc;
    return GREEK_UPPER_ETA;

  case GREEK_LOWER_IOTA_TONOS:
  case GREEK_LOWER_IOTA_OXIA:
  case GREEK_UPPER_IOTA_TONOS:
  case GREEK_UPPER_IOTA_OXIA:
    aState = kIotaAcc;
    return GREEK_UPPER_IOTA;

  case GREEK_LOWER_OMICRON_TONOS:
  case GREEK_LOWER_OMICRON_OXIA:
  case GREEK_UPPER_OMICRON_TONOS:
  case GREEK_UPPER_OMICRON_OXIA:
    aState = kOmicronAcc;
    return GREEK_UPPER_OMICRON;

  case GREEK_LOWER_UPSILON_TONOS:
  case GREEK_LOWER_UPSILON_OXIA:
  case GREEK_UPPER_UPSILON_TONOS:
  case GREEK_UPPER_UPSILON_OXIA:
    switch (aState) {
    case kOmicron:
      aState = kStart; // this completed a diphthong
      break;
    default:
      aState = kUpsilonAcc;
      break;
    }
    return GREEK_UPPER_UPSILON;

  case GREEK_LOWER_OMEGA_TONOS:
  case GREEK_LOWER_OMEGA_OXIA:
  case GREEK_UPPER_OMEGA_TONOS:
  case GREEK_UPPER_OMEGA_OXIA:
    aState = kOmegaAcc;
    return GREEK_UPPER_OMEGA;
  }

  // all other characters just reset the state, and use standard mappings
  aState = kStart;
  return ToUpperCase(aCh);
}

} // namespace mozilla