diff options
Diffstat (limited to 'security/nss/lib/base/utf8.c')
-rw-r--r-- | security/nss/lib/base/utf8.c | 679 |
1 files changed, 679 insertions, 0 deletions
diff --git a/security/nss/lib/base/utf8.c b/security/nss/lib/base/utf8.c new file mode 100644 index 000000000..6d7b6a015 --- /dev/null +++ b/security/nss/lib/base/utf8.c @@ -0,0 +1,679 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * utf8.c + * + * This file contains some additional utility routines required for + * handling UTF8 strings. + */ + +#ifndef BASE_H +#include "base.h" +#endif /* BASE_H */ + +#include "plstr.h" + +/* + * NOTES: + * + * There's an "is hex string" function in pki1/atav.c. If we need + * it in more places, pull that one out. + */ + +/* + * nssUTF8_CaseIgnoreMatch + * + * Returns true if the two UTF8-encoded strings pointed to by the + * two specified NSSUTF8 pointers differ only in typcase. + * + * The error may be one of the following values: + * NSS_ERROR_INVALID_POINTER + * + * Return value: + * PR_TRUE if the strings match, ignoring case + * PR_FALSE if they don't + * PR_FALSE upon error + */ + +NSS_IMPLEMENT PRBool +nssUTF8_CaseIgnoreMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) +{ +#ifdef NSSDEBUG + if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_FAILURE; + } + return PR_FALSE; + } +#endif /* NSSDEBUG */ + + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_SUCCESS; + } + + /* + * XXX fgmr + * + * This is, like, so wrong! + */ + if (0 == PL_strcasecmp((const char *)a, (const char *)b)) { + return PR_TRUE; + } else { + return PR_FALSE; + } +} + +/* + * nssUTF8_PrintableMatch + * + * Returns true if the two Printable strings pointed to by the + * two specified NSSUTF8 pointers match when compared with the + * rules for Printable String (leading and trailing spaces are + * disregarded, extents of whitespace match irregardless of length, + * and case is not significant), then PR_TRUE will be returned. + * Otherwise, PR_FALSE will be returned. Upon failure, PR_FALSE + * will be returned. If the optional statusOpt argument is not + * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that + * location. + * + * The error may be one of the following values: + * NSS_ERROR_INVALID_POINTER + * + * Return value: + * PR_TRUE if the strings match, ignoring case + * PR_FALSE if they don't + * PR_FALSE upon error + */ + +NSS_IMPLEMENT PRBool +nssUTF8_PrintableMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) +{ + PRUint8 *c; + PRUint8 *d; + +#ifdef NSSDEBUG + if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_FAILURE; + } + return PR_FALSE; + } +#endif /* NSSDEBUG */ + + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_SUCCESS; + } + + c = (PRUint8 *)a; + d = (PRUint8 *)b; + + while (' ' == *c) { + c++; + } + + while (' ' == *d) { + d++; + } + + while (('\0' != *c) && ('\0' != *d)) { + PRUint8 e, f; + + e = *c; + f = *d; + + if (('a' <= e) && (e <= 'z')) { + e -= ('a' - 'A'); + } + + if (('a' <= f) && (f <= 'z')) { + f -= ('a' - 'A'); + } + + if (e != f) { + return PR_FALSE; + } + + c++; + d++; + + if (' ' == *c) { + while (' ' == *c) { + c++; + } + c--; + } + + if (' ' == *d) { + while (' ' == *d) { + d++; + } + d--; + } + } + + while (' ' == *c) { + c++; + } + + while (' ' == *d) { + d++; + } + + if (*c == *d) { + /* And both '\0', btw */ + return PR_TRUE; + } else { + return PR_FALSE; + } +} + +/* + * nssUTF8_Duplicate + * + * This routine duplicates the UTF8-encoded string pointed to by the + * specified NSSUTF8 pointer. If the optional arenaOpt argument is + * not null, the memory required will be obtained from that arena; + * otherwise, the memory required will be obtained from the heap. + * A pointer to the new string will be returned. In case of error, + * an error will be placed on the error stack and NULL will be + * returned. + * + * The error may be one of the following values: + * NSS_ERROR_INVALID_POINTER + * NSS_ERROR_INVALID_ARENA + * NSS_ERROR_NO_MEMORY + */ + +NSS_IMPLEMENT NSSUTF8 * +nssUTF8_Duplicate(const NSSUTF8 *s, NSSArena *arenaOpt) +{ + NSSUTF8 *rv; + PRUint32 len; + +#ifdef NSSDEBUG + if ((const NSSUTF8 *)NULL == s) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + return (NSSUTF8 *)NULL; + } + + if ((NSSArena *)NULL != arenaOpt) { + if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { + return (NSSUTF8 *)NULL; + } + } +#endif /* NSSDEBUG */ + + len = PL_strlen((const char *)s); +#ifdef PEDANTIC + if ('\0' != ((const char *)s)[len]) { + /* must have wrapped, e.g., too big for PRUint32 */ + nss_SetError(NSS_ERROR_NO_MEMORY); + return (NSSUTF8 *)NULL; + } +#endif /* PEDANTIC */ + len++; /* zero termination */ + + rv = nss_ZAlloc(arenaOpt, len); + if ((void *)NULL == rv) { + return (NSSUTF8 *)NULL; + } + + (void)nsslibc_memcpy(rv, s, len); + return rv; +} + +/* + * nssUTF8_Size + * + * This routine returns the length in bytes (including the terminating + * null) of the UTF8-encoded string pointed to by the specified + * NSSUTF8 pointer. Zero is returned on error. + * + * The error may be one of the following values: + * NSS_ERROR_INVALID_POINTER + * NSS_ERROR_VALUE_TOO_LARGE + * + * Return value: + * 0 on error + * nonzero length of the string. + */ + +NSS_IMPLEMENT PRUint32 +nssUTF8_Size(const NSSUTF8 *s, PRStatus *statusOpt) +{ + PRUint32 sv; + +#ifdef NSSDEBUG + if ((const NSSUTF8 *)NULL == s) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_FAILURE; + } + return 0; + } +#endif /* NSSDEBUG */ + + sv = PL_strlen((const char *)s) + 1; +#ifdef PEDANTIC + if ('\0' != ((const char *)s)[sv - 1]) { + /* wrapped */ + nss_SetError(NSS_ERROR_VALUE_TOO_LARGE); + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_FAILURE; + } + return 0; + } +#endif /* PEDANTIC */ + + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_SUCCESS; + } + + return sv; +} + +/* + * nssUTF8_Length + * + * This routine returns the length in characters (not including the + * terminating null) of the UTF8-encoded string pointed to by the + * specified NSSUTF8 pointer. + * + * The error may be one of the following values: + * NSS_ERROR_INVALID_POINTER + * NSS_ERROR_VALUE_TOO_LARGE + * NSS_ERROR_INVALID_STRING + * + * Return value: + * length of the string (which may be zero) + * 0 on error + */ + +NSS_IMPLEMENT PRUint32 +nssUTF8_Length(const NSSUTF8 *s, PRStatus *statusOpt) +{ + PRUint32 l = 0; + const PRUint8 *c = (const PRUint8 *)s; + +#ifdef NSSDEBUG + if ((const NSSUTF8 *)NULL == s) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + goto loser; + } +#endif /* NSSDEBUG */ + + /* + * From RFC 2044: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx + */ + + while (0 != *c) { + PRUint32 incr; + if ((*c & 0x80) == 0) { + incr = 1; + } else if ((*c & 0xE0) == 0xC0) { + incr = 2; + } else if ((*c & 0xF0) == 0xE0) { + incr = 3; + } else if ((*c & 0xF8) == 0xF0) { + incr = 4; + } else if ((*c & 0xFC) == 0xF8) { + incr = 5; + } else if ((*c & 0xFE) == 0xFC) { + incr = 6; + } else { + nss_SetError(NSS_ERROR_INVALID_STRING); + goto loser; + } + + l += incr; + +#ifdef PEDANTIC + if (l < incr) { + /* Wrapped-- too big */ + nss_SetError(NSS_ERROR_VALUE_TOO_LARGE); + goto loser; + } + + { + PRUint8 *d; + for (d = &c[1]; d < &c[incr]; d++) { + if ((*d & 0xC0) != 0xF0) { + nss_SetError(NSS_ERROR_INVALID_STRING); + goto loser; + } + } + } +#endif /* PEDANTIC */ + + c += incr; + } + + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_SUCCESS; + } + + return l; + +loser: + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_FAILURE; + } + + return 0; +} + +/* + * nssUTF8_Create + * + * This routine creates a UTF8 string from a string in some other + * format. Some types of string may include embedded null characters, + * so for them the length parameter must be used. For string types + * that are null-terminated, the length parameter is optional; if it + * is zero, it will be ignored. If the optional arena argument is + * non-null, the memory used for the new string will be obtained from + * that arena, otherwise it will be obtained from the heap. This + * routine may return NULL upon error, in which case it will have + * placed an error on the error stack. + * + * The error may be one of the following: + * NSS_ERROR_INVALID_POINTER + * NSS_ERROR_NO_MEMORY + * NSS_ERROR_UNSUPPORTED_TYPE + * + * Return value: + * NULL upon error + * A non-null pointer to a new UTF8 string otherwise + */ + +extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */ + +NSS_IMPLEMENT NSSUTF8 * +nssUTF8_Create(NSSArena *arenaOpt, nssStringType type, const void *inputString, + PRUint32 size /* in bytes, not characters */ + ) +{ + NSSUTF8 *rv = NULL; + +#ifdef NSSDEBUG + if ((NSSArena *)NULL != arenaOpt) { + if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { + return (NSSUTF8 *)NULL; + } + } + + if ((const void *)NULL == inputString) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + return (NSSUTF8 *)NULL; + } +#endif /* NSSDEBUG */ + + switch (type) { + case nssStringType_DirectoryString: + /* This is a composite type requiring BER */ + nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); + break; + case nssStringType_TeletexString: + /* + * draft-ietf-pkix-ipki-part1-11 says in part: + * + * In addition, many legacy implementations support names encoded + * in the ISO 8859-1 character set (Latin1String) but tag them as + * TeletexString. The Latin1String includes characters used in + * Western European countries which are not part of the + * TeletexString charcter set. Implementations that process + * TeletexString SHOULD be prepared to handle the entire ISO + * 8859-1 character set.[ISO 8859-1]. + */ + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_PrintableString: + /* + * PrintableString consists of A-Za-z0-9 ,()+,-./:=? + * This is a subset of ASCII, which is a subset of UTF8. + * So we can just duplicate the string over. + */ + + if (0 == size) { + rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt); + } else { + rv = nss_ZAlloc(arenaOpt, size + 1); + if ((NSSUTF8 *)NULL == rv) { + return (NSSUTF8 *)NULL; + } + + (void)nsslibc_memcpy(rv, inputString, size); + } + + break; + case nssStringType_UniversalString: + /* 4-byte unicode */ + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_BMPString: + /* Base Multilingual Plane of Unicode */ + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_UTF8String: + if (0 == size) { + rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt); + } else { + rv = nss_ZAlloc(arenaOpt, size + 1); + if ((NSSUTF8 *)NULL == rv) { + return (NSSUTF8 *)NULL; + } + + (void)nsslibc_memcpy(rv, inputString, size); + } + + break; + case nssStringType_PHGString: + /* + * PHGString is an IA5String (with case-insensitive comparisons). + * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has + * currency symbol. + */ + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_GeneralString: + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + default: + nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); + break; + } + + return rv; +} + +NSS_IMPLEMENT NSSItem * +nssUTF8_GetEncoding(NSSArena *arenaOpt, NSSItem *rvOpt, nssStringType type, + NSSUTF8 *string) +{ + NSSItem *rv = (NSSItem *)NULL; + PRStatus status = PR_SUCCESS; + +#ifdef NSSDEBUG + if ((NSSArena *)NULL != arenaOpt) { + if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { + return (NSSItem *)NULL; + } + } + + if ((NSSUTF8 *)NULL == string) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + return (NSSItem *)NULL; + } +#endif /* NSSDEBUG */ + + switch (type) { + case nssStringType_DirectoryString: + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_TeletexString: + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_PrintableString: + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_UniversalString: + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_BMPString: + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + case nssStringType_UTF8String: { + NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt); + if ((NSSUTF8 *)NULL == dup) { + return (NSSItem *)NULL; + } + + if ((NSSItem *)NULL == rvOpt) { + rv = nss_ZNEW(arenaOpt, NSSItem); + if ((NSSItem *)NULL == rv) { + (void)nss_ZFreeIf(dup); + return (NSSItem *)NULL; + } + } else { + rv = rvOpt; + } + + rv->data = dup; + dup = (NSSUTF8 *)NULL; + rv->size = nssUTF8_Size(rv->data, &status); + if ((0 == rv->size) && (PR_SUCCESS != status)) { + if ((NSSItem *)NULL == rvOpt) { + (void)nss_ZFreeIf(rv); + } + return (NSSItem *)NULL; + } + } break; + case nssStringType_PHGString: + nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ + break; + default: + nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); + break; + } + + return rv; +} + +/* + * nssUTF8_CopyIntoFixedBuffer + * + * This will copy a UTF8 string into a fixed-length buffer, making + * sure that the all characters are valid. Any remaining space will + * be padded with the specified ASCII character, typically either + * null or space. + * + * Blah, blah, blah. + */ + +NSS_IMPLEMENT PRStatus +nssUTF8_CopyIntoFixedBuffer(NSSUTF8 *string, char *buffer, PRUint32 bufferSize, + char pad) +{ + PRUint32 stringSize = 0; + +#ifdef NSSDEBUG + if ((char *)NULL == buffer) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + return PR_FALSE; + } + + if (0 == bufferSize) { + nss_SetError(NSS_ERROR_INVALID_ARGUMENT); + return PR_FALSE; + } + + if ((pad & 0x80) != 0x00) { + nss_SetError(NSS_ERROR_INVALID_ARGUMENT); + return PR_FALSE; + } +#endif /* NSSDEBUG */ + + if ((NSSUTF8 *)NULL == string) { + string = (NSSUTF8 *)""; + } + + stringSize = nssUTF8_Size(string, (PRStatus *)NULL); + stringSize--; /* don't count the trailing null */ + if (stringSize > bufferSize) { + PRUint32 bs = bufferSize; + (void)nsslibc_memcpy(buffer, string, bufferSize); + + if ((((buffer[bs - 1] & 0x80) == 0x00)) || + ((bs > 1) && ((buffer[bs - 2] & 0xE0) == 0xC0)) || + ((bs > 2) && ((buffer[bs - 3] & 0xF0) == 0xE0)) || + ((bs > 3) && ((buffer[bs - 4] & 0xF8) == 0xF0)) || + ((bs > 4) && ((buffer[bs - 5] & 0xFC) == 0xF8)) || + ((bs > 5) && ((buffer[bs - 6] & 0xFE) == 0xFC))) { + /* It fit exactly */ + return PR_SUCCESS; + } + + /* Too long. We have to trim the last character */ + for (/*bs*/; bs != 0; bs--) { + if ((buffer[bs - 1] & 0xC0) != 0x80) { + buffer[bs - 1] = pad; + break; + } else { + buffer[bs - 1] = pad; + } + } + } else { + (void)nsslibc_memset(buffer, pad, bufferSize); + (void)nsslibc_memcpy(buffer, string, stringSize); + } + + return PR_SUCCESS; +} + +/* + * nssUTF8_Equal + * + */ + +NSS_IMPLEMENT PRBool +nssUTF8_Equal(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) +{ + PRUint32 la, lb; + +#ifdef NSSDEBUG + if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { + nss_SetError(NSS_ERROR_INVALID_POINTER); + if ((PRStatus *)NULL != statusOpt) { + *statusOpt = PR_FAILURE; + } + return PR_FALSE; + } +#endif /* NSSDEBUG */ + + la = nssUTF8_Size(a, statusOpt); + if (0 == la) { + return PR_FALSE; + } + + lb = nssUTF8_Size(b, statusOpt); + if (0 == lb) { + return PR_FALSE; + } + + if (la != lb) { + return PR_FALSE; + } + + return nsslibc_memequal(a, b, la, statusOpt); +} |