diff options
Diffstat (limited to 'intl/icu/source/i18n/esctrn.cpp')
-rw-r--r-- | intl/icu/source/i18n/esctrn.cpp | 181 |
1 files changed, 181 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/esctrn.cpp b/intl/icu/source/i18n/esctrn.cpp new file mode 100644 index 000000000..9be926f13 --- /dev/null +++ b/intl/icu/source/i18n/esctrn.cpp @@ -0,0 +1,181 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2001-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/19/2001 aliu Creation. +********************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_TRANSLITERATION + +#include "unicode/utf16.h" +#include "esctrn.h" +#include "util.h" + +U_NAMESPACE_BEGIN + +static const UChar UNIPRE[] = {85,43,0}; // "U+" +static const UChar BS_u[] = {92,117,0}; // "\\u" +static const UChar BS_U[] = {92,85,0}; // "\\U" +static const UChar XMLPRE[] = {38,35,120,0}; // "&#x" +static const UChar XML10PRE[] = {38,35,0}; // "&#" +static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" +static const UChar SEMI[] = {59,0}; // ";" +static const UChar RBRACE[] = {125,0}; // "}" + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) + +/** + * Factory methods + */ +static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { + // Unicode: "U+10FFFF" hex, min=4, max=6 + return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL); +} +static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { + // Java: "\\uFFFF" hex, min=4, max=4 + return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL); +} +static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { + // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 + return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE, + new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL)); +} +static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { + // XML: "" hex, min=1, max=6 + return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL); +} +static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { + // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") + return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL); +} +static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { + // Perl: "\\x{263A}" hex, min=1, max=6 + return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL); +} + +/** + * Registers standard variants with the system. Called by + * Transliterator during initialization. + */ +void EscapeTransliterator::registerIDs() { + Token t = integerToken(0); + + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); + + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); + + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); + + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); + + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); + + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); + + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); +} + +/** + * Constructs an escape transliterator with the given ID and + * parameters. See the class member documentation for details. + */ +EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, + const UnicodeString& _prefix, const UnicodeString& _suffix, + int32_t _radix, int32_t _minDigits, + UBool _grokSupplementals, + EscapeTransliterator* adoptedSupplementalHandler) : + Transliterator(newID, NULL) +{ + this->prefix = _prefix; + this->suffix = _suffix; + this->radix = _radix; + this->minDigits = _minDigits; + this->grokSupplementals = _grokSupplementals; + this->supplementalHandler = adoptedSupplementalHandler; +} + +/** + * Copy constructor. + */ +EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : + Transliterator(o), + prefix(o.prefix), + suffix(o.suffix), + radix(o.radix), + minDigits(o.minDigits), + grokSupplementals(o.grokSupplementals) { + supplementalHandler = (o.supplementalHandler != 0) ? + new EscapeTransliterator(*o.supplementalHandler) : NULL; +} + +EscapeTransliterator::~EscapeTransliterator() { + delete supplementalHandler; +} + +/** + * Transliterator API. + */ +Transliterator* EscapeTransliterator::clone() const { + return new EscapeTransliterator(*this); +} + +/** + * Implements {@link Transliterator#handleTransliterate}. + */ +void EscapeTransliterator::handleTransliterate(Replaceable& text, + UTransPosition& pos, + UBool /*isIncremental*/) const +{ + /* TODO: Verify that isIncremental can be ignored */ + int32_t start = pos.start; + int32_t limit = pos.limit; + + UnicodeString buf(prefix); + int32_t prefixLen = prefix.length(); + UBool redoPrefix = FALSE; + + while (start < limit) { + int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); + int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; + + if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { + buf.truncate(0); + buf.append(supplementalHandler->prefix); + ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, + supplementalHandler->minDigits); + buf.append(supplementalHandler->suffix); + redoPrefix = TRUE; + } else { + if (redoPrefix) { + buf.truncate(0); + buf.append(prefix); + redoPrefix = FALSE; + } else { + buf.truncate(prefixLen); + } + ICU_Utility::appendNumber(buf, c, radix, minDigits); + buf.append(suffix); + } + + text.handleReplaceBetween(start, start + charLen, buf); + start += buf.length(); + limit += buf.length() - charLen; + } + + pos.contextLimit += limit - pos.limit; + pos.limit = limit; + pos.start = start; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ + +//eof |