diff options
Diffstat (limited to 'intl/icu/source/tools/genrb/prscmnts.cpp')
-rw-r--r-- | intl/icu/source/tools/genrb/prscmnts.cpp | 248 |
1 files changed, 248 insertions, 0 deletions
diff --git a/intl/icu/source/tools/genrb/prscmnts.cpp b/intl/icu/source/tools/genrb/prscmnts.cpp new file mode 100644 index 000000000..03cad7f32 --- /dev/null +++ b/intl/icu/source/tools/genrb/prscmnts.cpp @@ -0,0 +1,248 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 2003-2014, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************************* + * + * File prscmnts.cpp + * + * Modification History: + * + * Date Name Description + * 08/22/2003 ram Creation. + ******************************************************************************* + */ + +// Safer use of UnicodeString. +#ifndef UNISTR_FROM_CHAR_EXPLICIT +# define UNISTR_FROM_CHAR_EXPLICIT explicit +#endif + +// Less important, but still a good idea. +#ifndef UNISTR_FROM_STRING_EXPLICIT +# define UNISTR_FROM_STRING_EXPLICIT explicit +#endif + +#include "unicode/regex.h" +#include "unicode/unistr.h" +#include "unicode/parseerr.h" +#include "prscmnts.h" +#include <stdio.h> +#include <stdlib.h> + +U_NAMESPACE_USE + +#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ + +#define MAX_SPLIT_STRINGS 20 + +const char *patternStrings[UPC_LIMIT]={ + "^translate\\s*(.*)", + "^note\\s*(.*)" +}; + +U_CFUNC int32_t +removeText(UChar *source, int32_t srcLen, + UnicodeString patString,uint32_t options, + UnicodeString replaceText, UErrorCode *status){ + + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + + UnicodeString src(source, srcLen); + + RegexMatcher myMatcher(patString, src, options, *status); + if(U_FAILURE(*status)){ + return 0; + } + UnicodeString dest; + + + dest = myMatcher.replaceAll(replaceText,*status); + + + return dest.extract(source, srcLen, *status); + +} +U_CFUNC int32_t +trim(UChar *src, int32_t srcLen, UErrorCode *status){ + srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines + srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces + srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes + return srcLen; +} + +U_CFUNC int32_t +removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ + srcLen = trim(source, srcLen, status); + UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line + srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); + return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; +} + +U_CFUNC int32_t +getText(const UChar* source, int32_t srcLen, + UChar** dest, int32_t destCapacity, + UnicodeString patternString, + UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + + UnicodeString stringArray[MAX_SPLIT_STRINGS]; + RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); + UnicodeString src (source,srcLen); + + if (U_FAILURE(*status)) { + return 0; + } + pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); + + RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); + if (U_FAILURE(*status)) { + return 0; + } + for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ + matcher.reset(stringArray[i]); + if(matcher.lookingAt(*status)){ + UnicodeString out = matcher.group(1, *status); + + return out.extract(*dest, destCapacity,*status); + } + } + return 0; +} + + +#define AT_SIGN 0x0040 + +U_CFUNC int32_t +getDescription( const UChar* source, int32_t srcLen, + UChar** dest, int32_t destCapacity, + UErrorCode* status){ + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + + UnicodeString stringArray[MAX_SPLIT_STRINGS]; + RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); + UnicodeString src(source, srcLen); + + if (U_FAILURE(*status)) { + return 0; + } + pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); + + if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ + int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); + return trim(*dest, destLen, status); + } + return 0; +} + +U_CFUNC int32_t +getCount(const UChar* source, int32_t srcLen, + UParseCommentsOption option, UErrorCode *status){ + + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + + UnicodeString stringArray[MAX_SPLIT_STRINGS]; + RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); + UnicodeString src (source, srcLen); + + + if (U_FAILURE(*status)) { + return 0; + } + int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); + + UnicodeString patternString(patternStrings[option]); + RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); + if (U_FAILURE(*status)) { + return 0; + } + int32_t count = 0; + for(int32_t i=0; i<retLen; i++){ + matcher.reset(stringArray[i]); + if(matcher.lookingAt(*status)){ + count++; + } + } + if(option == UPC_TRANSLATE && count > 1){ + fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); + exit(U_UNSUPPORTED_ERROR); + } + return count; +} + +U_CFUNC int32_t +getAt(const UChar* source, int32_t srcLen, + UChar** dest, int32_t destCapacity, + int32_t index, + UParseCommentsOption option, + UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + + UnicodeString stringArray[MAX_SPLIT_STRINGS]; + RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); + UnicodeString src (source, srcLen); + + + if (U_FAILURE(*status)) { + return 0; + } + int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); + + UnicodeString patternString(patternStrings[option]); + RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); + if (U_FAILURE(*status)) { + return 0; + } + int32_t count = 0; + for(int32_t i=0; i<retLen; i++){ + matcher.reset(stringArray[i]); + if(matcher.lookingAt(*status)){ + if(count == index){ + UnicodeString out = matcher.group(1, *status); + return out.extract(*dest, destCapacity,*status); + } + count++; + + } + } + return 0; + +} + +U_CFUNC int32_t +getTranslate( const UChar* source, int32_t srcLen, + UChar** dest, int32_t destCapacity, + UErrorCode* status){ + UnicodeString notePatternString("^translate\\s*?(.*)"); + + int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); + return trim(*dest, destLen, status); +} + +U_CFUNC int32_t +getNote(const UChar* source, int32_t srcLen, + UChar** dest, int32_t destCapacity, + UErrorCode* status){ + + UnicodeString notePatternString("^note\\s*?(.*)"); + int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); + return trim(*dest, destLen, status); + +} + +#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ + |