diff options
Diffstat (limited to 'intl/icu/source/tools/genrb/derb.cpp')
-rw-r--r-- | intl/icu/source/tools/genrb/derb.cpp | 657 |
1 files changed, 657 insertions, 0 deletions
diff --git a/intl/icu/source/tools/genrb/derb.cpp b/intl/icu/source/tools/genrb/derb.cpp new file mode 100644 index 000000000..22e275ef3 --- /dev/null +++ b/intl/icu/source/tools/genrb/derb.cpp @@ -0,0 +1,657 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: derb.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000sep6 +* created by: Vladimir Weinstein as an ICU workshop example +* maintained by: Yves Arrouye <yves@realnames.com> +*/ + +#include "unicode/stringpiece.h" +#include "unicode/ucnv.h" +#include "unicode/unistr.h" +#include "unicode/ustring.h" +#include "unicode/putil.h" +#include "unicode/ustdio.h" + +#include "charstr.h" +#include "uresimp.h" +#include "cmemory.h" +#include "cstring.h" +#include "uoptions.h" +#include "toolutil.h" +#include "ustrfmt.h" + +#if !UCONFIG_NO_FORMATTING + +#define DERB_VERSION "1.1" + +#define DERB_DEFAULT_TRUNC 80 + +static const int32_t indentsize = 4; +static int32_t truncsize = DERB_DEFAULT_TRUNC; +static UBool opt_truncate = FALSE; + +static const char *getEncodingName(const char *encoding); +static void reportError(const char *pname, UErrorCode *status, const char *when); +static UChar *quotedString(const UChar *string); +static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status); +static void printString(UFILE *out, const UChar *str, int32_t len); +static void printCString(UFILE *out, const char *str, int32_t len); +static void printIndent(UFILE *out, int32_t indent); +static void printHex(UFILE *out, uint8_t what); + +static UOption options[]={ + UOPTION_HELP_H, + UOPTION_HELP_QUESTION_MARK, +/* 2 */ UOPTION_ENCODING, +/* 3 */ { "to-stdout", NULL, NULL, NULL, 'c', UOPT_NO_ARG, 0 } , +/* 4 */ { "truncate", NULL, NULL, NULL, 't', UOPT_OPTIONAL_ARG, 0 }, +/* 5 */ UOPTION_VERBOSE, +/* 6 */ UOPTION_DESTDIR, +/* 7 */ UOPTION_SOURCEDIR, +/* 8 */ { "bom", NULL, NULL, NULL, 0, UOPT_NO_ARG, 0 }, +/* 9 */ UOPTION_ICUDATADIR, +/* 10 */ UOPTION_VERSION, +/* 11 */ { "suppressAliases", NULL, NULL, NULL, 'A', UOPT_NO_ARG, 0 }, +}; + +static UBool verbose = FALSE; +static UBool suppressAliases = FALSE; +static UFILE *ustderr = NULL; + +extern int +main(int argc, char* argv[]) { + const char *encoding = NULL; + const char *outputDir = NULL; /* NULL = no output directory, use current */ + const char *inputDir = "."; + int tostdout = 0; + int prbom = 0; + + const char *pname; + + UResourceBundle *bundle = NULL; + int32_t i = 0; + + const char* arg; + + /* Get the name of tool. */ + pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); +#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR + if (!pname) { + pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR); + } +#endif + if (!pname) { + pname = *argv; + } else { + ++pname; + } + + /* error handling, printing usage message */ + argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); + + /* error handling, printing usage message */ + if(argc<0) { + fprintf(stderr, + "%s: error in command line argument \"%s\"\n", pname, + argv[-argc]); + } + if(argc<0 || options[0].doesOccur || options[1].doesOccur) { + fprintf(argc < 0 ? stderr : stdout, + "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n" + " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n" + " [ -t, --truncate [ size ] ]\n" + " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n" + " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n" + " [ -A, --suppressAliases]\n" + " bundle ...\n", argc < 0 ? 'u' : 'U', + pname); + return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; + } + + if(options[10].doesOccur) { + fprintf(stderr, + "%s version %s (ICU version %s).\n" + "%s\n", + pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING); + return U_ZERO_ERROR; + } + if(options[2].doesOccur) { + encoding = options[2].value; + } + + if (options[3].doesOccur) { + if(options[2].doesOccur) { + fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname); + return 3; + } + tostdout = 1; + } + + if(options[4].doesOccur) { + opt_truncate = TRUE; + if(options[4].value != NULL) { + truncsize = atoi(options[4].value); /* user defined printable size */ + } else { + truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */ + } + } else { + opt_truncate = FALSE; + } + + if(options[5].doesOccur) { + verbose = TRUE; + } + + if (options[6].doesOccur) { + outputDir = options[6].value; + } + + if(options[7].doesOccur) { + inputDir = options[7].value; /* we'll use users resources */ + } + + if (options[8].doesOccur) { + prbom = 1; + } + + if (options[9].doesOccur) { + u_setDataDirectory(options[9].value); + } + + if (options[11].doesOccur) { + suppressAliases = TRUE; + } + + fflush(stderr); // use ustderr now. + ustderr = u_finit(stderr, NULL, NULL); + + for (i = 1; i < argc; ++i) { + static const UChar sp[] = { 0x0020 }; /* " " */ + + arg = getLongPathname(argv[i]); + + if (verbose) { + u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]); + } + + icu::CharString locale; + UErrorCode status = U_ZERO_ERROR; + { + const char *p = findBasename(arg); + const char *q = uprv_strrchr(p, '.'); + if (q == NULL) { + locale.append(p, status); + } else { + locale.append(p, (int32_t)(q - p), status); + } + } + if (U_FAILURE(status)) { + return status; + } + + icu::CharString infile; + const char *thename = NULL; + UBool fromICUData = !uprv_strcmp(inputDir, "-"); + if (!fromICUData) { + UBool absfilename = *arg == U_FILE_SEP_CHAR; +#if U_PLATFORM_HAS_WIN32_API + if (!absfilename) { + absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0]) + && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR); + } +#endif + if (absfilename) { + thename = arg; + } else { + const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR); +#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR + if (q == NULL) { + q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR); + } +#endif + infile.append(inputDir, status); + if(q != NULL) { + infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status); + } + if (U_FAILURE(status)) { + return status; + } + thename = infile.data(); + } + } + if (thename) { + bundle = ures_openDirect(thename, locale.data(), &status); + } else { + bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status); + } + if (U_SUCCESS(status)) { + UFILE *out = NULL; + + const char *filename = 0; + const char *ext = 0; + + if (locale.isEmpty() || !tostdout) { + filename = findBasename(arg); + ext = uprv_strrchr(filename, '.'); + if (!ext) { + ext = uprv_strchr(filename, 0); + } + } + + if (tostdout) { + out = u_get_stdout(); + } else { + icu::CharString thefile; + if (outputDir) { + thefile.append(outputDir, status); + } + thefile.appendPathPart(filename, status); + if (*ext) { + thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext)); + } + thefile.append(".txt", status); + if (U_FAILURE(status)) { + return status; + } + + out = u_fopen(thefile.data(), "w", NULL, encoding); + if (!out) { + u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data()); + u_fclose(ustderr); + return 4; + } + } + + // now, set the callback. + ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status); + if (U_FAILURE(status)) { + u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname); + u_fclose(ustderr); + if(!tostdout) { + u_fclose(out); + } + return 3; + } + + if (prbom) { /* XXX: Should be done only for UTFs */ + u_fputc(0xFEFF, out); + } + u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName())); + u_fprintf(out, "// This file was dumped by derb(8) from "); + if (thename) { + u_fprintf(out, "%s", thename); + } else if (fromICUData) { + u_fprintf(out, "the ICU internal %s locale", locale.data()); + } + + u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n"); + + if (!locale.isEmpty()) { + u_fprintf(out, "%s", locale.data()); + } else { + u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename), filename, UPRV_LENGTHOF(sp), sp); + } + printOutBundle(out, bundle, 0, pname, &status); + + if (!tostdout) { + u_fclose(out); + } + } + else { + reportError(pname, &status, "opening resource file"); + } + + ures_close(bundle); + } + + return 0; +} + +static UChar *quotedString(const UChar *string) { + int len = u_strlen(string); + int alen = len; + const UChar *sp; + UChar *newstr, *np; + + for (sp = string; *sp; ++sp) { + switch (*sp) { + case '\n': + case 0x0022: + ++alen; + break; + } + } + + newstr = (UChar *) uprv_malloc((1 + alen) * U_SIZEOF_UCHAR); + for (sp = string, np = newstr; *sp; ++sp) { + switch (*sp) { + case '\n': + *np++ = 0x005C; + *np++ = 0x006E; + break; + + case 0x0022: + *np++ = 0x005C; + U_FALLTHROUGH; + default: + *np++ = *sp; + break; + } + } + *np = 0; + + return newstr; +} + + +static void printString(UFILE *out, const UChar *str, int32_t len) { + u_file_write(str, len, out); +} + +static void printCString(UFILE *out, const char *str, int32_t len) { + if(len==-1) { + u_fprintf(out, "%s", str); + } else { + u_fprintf(out, "%.*s", len, str); + } +} + +static void printIndent(UFILE *out, int32_t indent) { + icu::UnicodeString inchar(indent, 0x20, indent); + printString(out, inchar.getBuffer(), indent); +} + +static void printHex(UFILE *out, uint8_t what) { + static const char map[] = "0123456789ABCDEF"; + UChar hex[2]; + + hex[0] = map[what >> 4]; + hex[1] = map[what & 0xf]; + + printString(out, hex, 2); +} + +static void printOutAlias(UFILE *out, UResourceBundle *parent, Resource r, const char *key, int32_t indent, const char *pname, UErrorCode *status) { + static const UChar cr[] = { 0xA }; // LF + int32_t len = 0; + const UChar* thestr = res_getAlias(&(parent->fResData), r, &len); + UChar *string = quotedString(thestr); + if(opt_truncate && len > truncsize) { + char msg[128]; + printIndent(out, indent); + sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n", + (long)len, (long)truncsize/2); + printCString(out, msg, -1); + len = truncsize; + } + if(U_SUCCESS(*status)) { + static const UChar openStr[] = { 0x003A, 0x0061, 0x006C, 0x0069, 0x0061, 0x0073, 0x0020, 0x007B, 0x0020, 0x0022 }; /* ":alias { \"" */ + static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D, 0x0020 }; /* "\" } " */ + printIndent(out, indent); + if(key != NULL) { + printCString(out, key, -1); + } + printString(out, openStr, UPRV_LENGTHOF(openStr)); + printString(out, string, len); + printString(out, closeStr, UPRV_LENGTHOF(closeStr)); + if(verbose) { + printCString(out, " // ALIAS", -1); + } + printString(out, cr, UPRV_LENGTHOF(cr)); + } else { + reportError(pname, status, "getting binary value"); + } + uprv_free(string); +} + +static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status) +{ + static const UChar cr[] = { 0xA }; // LF + +/* int32_t noOfElements = ures_getSize(resource);*/ + int32_t i = 0; + const char *key = ures_getKey(resource); + + switch(ures_getType(resource)) { + case URES_STRING : + { + int32_t len=0; + const UChar* thestr = ures_getString(resource, &len, status); + UChar *string = quotedString(thestr); + + /* TODO: String truncation */ + if(opt_truncate && len > truncsize) { + char msg[128]; + printIndent(out, indent); + sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n", + (long)len, (long)(truncsize/2)); + printCString(out, msg, -1); + len = truncsize/2; + } + printIndent(out, indent); + if(key != NULL) { + static const UChar openStr[] = { 0x0020, 0x007B, 0x0020, 0x0022 }; /* " { \"" */ + static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D }; /* "\" }" */ + printCString(out, key, (int32_t)uprv_strlen(key)); + printString(out, openStr, UPRV_LENGTHOF(openStr)); + printString(out, string, len); + printString(out, closeStr, UPRV_LENGTHOF(closeStr)); + } else { + static const UChar openStr[] = { 0x0022 }; /* "\"" */ + static const UChar closeStr[] = { 0x0022, 0x002C }; /* "\"," */ + + printString(out, openStr, UPRV_LENGTHOF(openStr)); + printString(out, string, (int32_t)(u_strlen(string))); + printString(out, closeStr, UPRV_LENGTHOF(closeStr)); + } + + if(verbose) { + printCString(out, "// STRING", -1); + } + printString(out, cr, UPRV_LENGTHOF(cr)); + + uprv_free(string); + } + break; + + case URES_INT : + { + static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0020, 0x007B, 0x0020 }; /* ":int { " */ + static const UChar closeStr[] = { 0x0020, 0x007D }; /* " }" */ + UChar num[20]; + + printIndent(out, indent); + if(key != NULL) { + printCString(out, key, -1); + } + printString(out, openStr, UPRV_LENGTHOF(openStr)); + uprv_itou(num, 20, ures_getInt(resource, status), 10, 0); + printString(out, num, u_strlen(num)); + printString(out, closeStr, UPRV_LENGTHOF(closeStr)); + + if(verbose) { + printCString(out, "// INT", -1); + } + printString(out, cr, UPRV_LENGTHOF(cr)); + break; + } + case URES_BINARY : + { + int32_t len = 0; + const int8_t *data = (const int8_t *)ures_getBinary(resource, &len, status); + if(opt_truncate && len > truncsize) { + char msg[128]; + printIndent(out, indent); + sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n", + (long)len, (long)(truncsize/2)); + printCString(out, msg, -1); + len = truncsize; + } + if(U_SUCCESS(*status)) { + static const UChar openStr[] = { 0x003A, 0x0062, 0x0069, 0x006E, 0x0061, 0x0072, 0x0079, 0x0020, 0x007B, 0x0020 }; /* ":binary { " */ + static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */ + printIndent(out, indent); + if(key != NULL) { + printCString(out, key, -1); + } + printString(out, openStr, UPRV_LENGTHOF(openStr)); + for(i = 0; i<len; i++) { + printHex(out, *data++); + } + printString(out, closeStr, UPRV_LENGTHOF(closeStr)); + if(verbose) { + printCString(out, " // BINARY", -1); + } + printString(out, cr, UPRV_LENGTHOF(cr)); + } else { + reportError(pname, status, "getting binary value"); + } + } + break; + case URES_INT_VECTOR : + { + int32_t len = 0; + const int32_t *data = ures_getIntVector(resource, &len, status); + if(U_SUCCESS(*status)) { + static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0076, 0x0065, 0x0063, 0x0074, 0x006F, 0x0072, 0x0020, 0x007B, 0x0020 }; /* ":intvector { " */ + static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */ + UChar num[20]; + + printIndent(out, indent); + if(key != NULL) { + printCString(out, key, -1); + } + printString(out, openStr, UPRV_LENGTHOF(openStr)); + for(i = 0; i < len - 1; i++) { + int32_t numLen = uprv_itou(num, 20, data[i], 10, 0); + num[numLen++] = 0x002C; /* ',' */ + num[numLen++] = 0x0020; /* ' ' */ + num[numLen] = 0; + printString(out, num, u_strlen(num)); + } + if(len > 0) { + uprv_itou(num, 20, data[len - 1], 10, 0); + printString(out, num, u_strlen(num)); + } + printString(out, closeStr, UPRV_LENGTHOF(closeStr)); + if(verbose) { + printCString(out, "// INTVECTOR", -1); + } + printString(out, cr, UPRV_LENGTHOF(cr)); + } else { + reportError(pname, status, "getting int vector"); + } + } + break; + case URES_TABLE : + case URES_ARRAY : + { + static const UChar openStr[] = { 0x007B }; /* "{" */ + static const UChar closeStr[] = { 0x007D, '\n' }; /* "}\n" */ + + UResourceBundle *t = NULL; + ures_resetIterator(resource); + printIndent(out, indent); + if(key != NULL) { + printCString(out, key, -1); + } + printString(out, openStr, UPRV_LENGTHOF(openStr)); + if(verbose) { + if(ures_getType(resource) == URES_TABLE) { + printCString(out, "// TABLE", -1); + } else { + printCString(out, "// ARRAY", -1); + } + } + printString(out, cr, UPRV_LENGTHOF(cr)); + + if(suppressAliases == FALSE) { + while(U_SUCCESS(*status) && ures_hasNext(resource)) { + t = ures_getNextResource(resource, t, status); + if(U_SUCCESS(*status)) { + printOutBundle(out, t, indent+indentsize, pname, status); + } else { + reportError(pname, status, "While processing table"); + *status = U_ZERO_ERROR; + } + } + } else { /* we have to use low level access to do this */ + Resource r; + int32_t resSize = ures_getSize(resource); + UBool isTable = (UBool)(ures_getType(resource) == URES_TABLE); + for(i = 0; i < resSize; i++) { + /* need to know if it's an alias */ + if(isTable) { + r = res_getTableItemByIndex(&resource->fResData, resource->fRes, i, &key); + } else { + r = res_getArrayItem(&resource->fResData, resource->fRes, i); + } + if(U_SUCCESS(*status)) { + if(res_getPublicType(r) == URES_ALIAS) { + printOutAlias(out, resource, r, key, indent+indentsize, pname, status); + } else { + t = ures_getByIndex(resource, i, t, status); + printOutBundle(out, t, indent+indentsize, pname, status); + } + } else { + reportError(pname, status, "While processing table"); + *status = U_ZERO_ERROR; + } + } + } + + printIndent(out, indent); + printString(out, closeStr, UPRV_LENGTHOF(closeStr)); + ures_close(t); + } + break; + default: + break; + } + +} + +static const char *getEncodingName(const char *encoding) { + UErrorCode err; + const char *enc; + + err = U_ZERO_ERROR; + if (!(enc = ucnv_getStandardName(encoding, "MIME", &err))) { + err = U_ZERO_ERROR; + if (!(enc = ucnv_getStandardName(encoding, "IANA", &err))) { + ; + } + } + + return enc; +} + +static void reportError(const char *pname, UErrorCode *status, const char *when) { + u_fprintf(ustderr, "%s: error %d while %s: %s\n", pname, *status, when, u_errorName(*status)); +} + +#else +extern int +main(int argc, char* argv[]) { + /* Changing stdio.h ustdio.h requires that formatting not be disabled. */ + return 3; +} +#endif /* !UCONFIG_NO_FORMATTING */ + +/* + * Local Variables: + * indent-tabs-mode: nil + * End: + */ |