// Copyright (C) 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2000-2010, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: uparse.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2000apr18 * created by: Markus W. Scherer * * This file provides a parser for files that are delimited by one single * character like ';' or TAB. Example: the Unicode Character Properties files * like UnicodeData.txt are semicolon-delimited. */ #ifndef __UPARSE_H__ #define __UPARSE_H__ #include "unicode/utypes.h" /** * Is c an invariant-character whitespace? * @param c invariant character */ #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n') U_CDECL_BEGIN /** * Skip space ' ' and TAB '\t' characters. * * @param s Pointer to characters. * @return Pointer to first character at or after s that is not a space or TAB. */ U_CAPI const char * U_EXPORT2 u_skipWhitespace(const char *s); /** * Trim whitespace (including line endings) from the end of the string. * * @param s Pointer to the string. * @return Pointer to the new end of the string. */ U_CAPI char * U_EXPORT2 u_rtrim(char *s); /** Function type for u_parseDelimitedFile(). */ typedef void U_CALLCONV UParseLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode); /** * Parser for files that are similar to UnicodeData.txt: * This function opens the file and reads it line by line. It skips empty lines * and comment lines that start with a '#'. * All other lines are separated into fields with one delimiter character * (semicolon for Unicode Properties files) between two fields. The last field in * a line does not need to be terminated with a delimiter. * * For each line, after segmenting it, a line function is called. * It gets passed the array of field start and limit pointers that is * passed into this parser and filled by it for each line. * For each field i of the line, the start pointer in fields[i][0] * points to the beginning of the field, while the limit pointer in fields[i][1] * points behind the field, i.e., to the delimiter or the line end. * * The context parameter of the line function is * the same as the one for the parse function. * * The line function may modify the contents of the fields including the * limit characters. * * If the file cannot be opened, or there is a parsing error or a field function * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code. */ U_CAPI void U_EXPORT2 u_parseDelimitedFile(const char *filename, char delimiter, char *fields[][2], int32_t fieldCount, UParseLineFn *lineFn, void *context, UErrorCode *pErrorCode); /** * Parse a string of code points like 0061 0308 0300. * s must end with either ';' or NUL. * * @return Number of code points. */ U_CAPI int32_t U_EXPORT2 u_parseCodePoints(const char *s, uint32_t *dest, int32_t destCapacity, UErrorCode *pErrorCode); /** * Parse a list of code points like 0061 0308 0300 * into a UChar * string. * s must end with either ';' or NUL. * * Set the first code point in *pFirst. * * @param s Input char * string. * @param dest Output string buffer. * @param destCapacity Capacity of dest in numbers of UChars. * @param pFirst If pFirst!=NULL the *pFirst will be set to the first * code point in the string. * @param pErrorCode ICU error code. * @return The length of the string in numbers of UChars. */ U_CAPI int32_t U_EXPORT2 u_parseString(const char *s, UChar *dest, int32_t destCapacity, uint32_t *pFirst, UErrorCode *pErrorCode); /** * Parse a code point range like * 0085 or * 4E00..9FA5. * * s must contain such a range and end with either ';' or NUL. * * @return Length of code point range, end-start+1 */ U_CAPI int32_t U_EXPORT2 u_parseCodePointRange(const char *s, uint32_t *pStart, uint32_t *pEnd, UErrorCode *pErrorCode); /** * Same as u_parseCodePointRange() but the range may be terminated by * any character. The position of the terminating character is returned via * the *terminator output parameter. */ U_CAPI int32_t U_EXPORT2 u_parseCodePointRangeAnyTerminator(const char *s, uint32_t *pStart, uint32_t *pEnd, const char **terminator, UErrorCode *pErrorCode); U_CAPI int32_t U_EXPORT2 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status); U_CDECL_END #endif