diff options
author | Matt A. Tobin <email@mattatobin.com> | 2018-02-02 03:32:58 -0500 |
---|---|---|
committer | Matt A. Tobin <email@mattatobin.com> | 2018-02-02 03:32:58 -0500 |
commit | e72ef92b5bdc43cd2584198e2e54e951b70299e8 (patch) | |
tree | 01ceb4a897c33eca9e7ccf2bc3aefbe530169fe5 /application/basilisk/components/translation/cld2/public/encodings.h | |
parent | 0d19b77d3eaa5b8d837bf52c19759e68e42a1c4c (diff) | |
download | UXP-e72ef92b5bdc43cd2584198e2e54e951b70299e8.tar UXP-e72ef92b5bdc43cd2584198e2e54e951b70299e8.tar.gz UXP-e72ef92b5bdc43cd2584198e2e54e951b70299e8.tar.lz UXP-e72ef92b5bdc43cd2584198e2e54e951b70299e8.tar.xz UXP-e72ef92b5bdc43cd2584198e2e54e951b70299e8.zip |
Add Basilisk
Diffstat (limited to 'application/basilisk/components/translation/cld2/public/encodings.h')
-rw-r--r-- | application/basilisk/components/translation/cld2/public/encodings.h | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/application/basilisk/components/translation/cld2/public/encodings.h b/application/basilisk/components/translation/cld2/public/encodings.h new file mode 100644 index 000000000..1eb8f0a15 --- /dev/null +++ b/application/basilisk/components/translation/cld2/public/encodings.h @@ -0,0 +1,169 @@ +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +// Author: dsites@google.com (Dick Sites) +// + +#ifndef I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ +#define I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ + +namespace CLD2 { + +enum Encoding { + ISO_8859_1 = 0, // ASCII + ISO_8859_2 = 1, // Latin2 + ISO_8859_3 = 2, // + ISO_8859_4 = 3, // Latin4 + ISO_8859_5 = 4, // ISO-8859-5 + ISO_8859_6 = 5, // Arabic + ISO_8859_7 = 6, // Greek + ISO_8859_8 = 7, // Hebrew + ISO_8859_9 = 8, // + ISO_8859_10 = 9, // + JAPANESE_EUC_JP = 10, // EUC_JP + JAPANESE_SHIFT_JIS = 11, // SJS + JAPANESE_JIS = 12, // JIS + CHINESE_BIG5 = 13, // BIG5 + CHINESE_GB = 14, // GB + CHINESE_EUC_CN = 15, // Misnamed. Should be EUC_TW. Was Basis Tech + // CNS11643EUC, before that EUC-CN(!) + KOREAN_EUC_KR = 16, // KSC + UNICODE_UNUSED = 17, // Unicode + CHINESE_EUC_DEC = 18, // Misnamed. Should be EUC_TW. Was + // CNS11643EUC, before that EUC. + CHINESE_CNS = 19, // Misnamed. Should be EUC_TW. Was + // CNS11643EUC, before that CNS. + CHINESE_BIG5_CP950 = 20, // BIG5_CP950 + JAPANESE_CP932 = 21, // CP932 + UTF8 = 22, + UNKNOWN_ENCODING = 23, + ASCII_7BIT = 24, // ISO_8859_1 with all characters <= 127. + RUSSIAN_KOI8_R = 25, // KOI8R + RUSSIAN_CP1251 = 26, // CP1251 + + //---------------------------------------------------------- + MSFT_CP1252 = 27, // 27: CP1252 aka MSFT euro ascii + RUSSIAN_KOI8_RU = 28, // CP21866 aka KOI8-U, used for Ukrainian. + // Misnamed, this is _not_ KOI8-RU but KOI8-U. + // KOI8-U is used much more often than KOI8-RU. + MSFT_CP1250 = 29, // CP1250 aka MSFT eastern european + ISO_8859_15 = 30, // aka ISO_8859_0 aka ISO_8859_1 euroized + //---------------------------------------------------------- + + //---------------------------------------------------------- + MSFT_CP1254 = 31, // used for Turkish + MSFT_CP1257 = 32, // used in Baltic countries + //---------------------------------------------------------- + + //---------------------------------------------------------- + //---------------------------------------------------------- + ISO_8859_11 = 33, // aka TIS-620, used for Thai + MSFT_CP874 = 34, // used for Thai + MSFT_CP1256 = 35, // used for Arabic + + //---------------------------------------------------------- + MSFT_CP1255 = 36, // Logical Hebrew Microsoft + ISO_8859_8_I = 37, // Iso Hebrew Logical + HEBREW_VISUAL = 38, // Iso Hebrew Visual + //---------------------------------------------------------- + + //---------------------------------------------------------- + CZECH_CP852 = 39, + CZECH_CSN_369103 = 40, // aka ISO_IR_139 aka KOI8_CS + MSFT_CP1253 = 41, // used for Greek + RUSSIAN_CP866 = 42, + //---------------------------------------------------------- + + //---------------------------------------------------------- + // Handled by iconv in glibc + ISO_8859_13 = 43, + ISO_2022_KR = 44, + GBK = 45, + GB18030 = 46, + BIG5_HKSCS = 47, + ISO_2022_CN = 48, + + //----------------------------------------------------------- + // Following 4 encodings are deprecated (font encodings) + TSCII = 49, + TAMIL_MONO = 50, + TAMIL_BI = 51, + JAGRAN = 52, + + + MACINTOSH_ROMAN = 53, + UTF7 = 54, + + //----------------------------------------------------------- + // Following 2 encodings are deprecated (font encodings) + BHASKAR = 55, // Indic encoding - Devanagari + HTCHANAKYA = 56, // 56 Indic encoding - Devanagari + + //----------------------------------------------------------- + UTF16BE = 57, // big-endian UTF-16 + UTF16LE = 58, // little-endian UTF-16 + UTF32BE = 59, // big-endian UTF-32 + UTF32LE = 60, // little-endian UTF-32 + //----------------------------------------------------------- + + //----------------------------------------------------------- + // An encoding that means "This is not text, but it may have some + // simple ASCII text embedded". Intended input conversion + // is to keep strings of >=4 seven-bit ASCII characters + BINARYENC = 61, + //----------------------------------------------------------- + + //----------------------------------------------------------- + // Some Web pages allow a mixture of HZ-GB and GB-2312 by using + // ~{ ... ~} for 2-byte pairs, and the browsers support this. + HZ_GB_2312 = 62, + //----------------------------------------------------------- + + //----------------------------------------------------------- + // Some external vendors make the common input error of + // converting MSFT_CP1252 to UTF8 *twice*. + UTF8UTF8 = 63, + //----------------------------------------------------------- + + //----------------------------------------------------------- + // Following 6 encodings are deprecated (font encodings) + TAM_ELANGO = 64, // Elango - Tamil + TAM_LTTMBARANI = 65, // Barani - Tamil + TAM_SHREE = 66, // Shree - Tamil + TAM_TBOOMIS = 67, // TBoomis - Tamil + TAM_TMNEWS = 68, // TMNews - Tamil + TAM_WEBTAMIL = 69, // Webtamil - Tamil + //----------------------------------------------------------- + + //----------------------------------------------------------- + // Shift_JIS variants used by Japanese cell phone carriers. + KDDI_SHIFT_JIS = 70, + DOCOMO_SHIFT_JIS = 71, + SOFTBANK_SHIFT_JIS = 72, + // ISO-2022-JP variants used by KDDI and SoftBank. + KDDI_ISO_2022_JP = 73, + SOFTBANK_ISO_2022_JP = 74, + //----------------------------------------------------------- + + NUM_ENCODINGS = 75, // Always keep this at the end. It is not a + // valid Encoding enum, it is only used to + // indicate the total number of Encodings. +}; + +} // End namespace CLD2 + +#endif // I18N_ENCODINGS_CLD2_PUBLIC_ENCODINGS_H__ + + |