From 32b3ed0a1362a4b0798ad71fac3450fb77cb7e41 Mon Sep 17 00:00:00 2001 From: Thomas Groman Date: Thu, 19 Sep 2019 00:41:48 -0700 Subject: merged from 0.6.7 codebase --- libraries/hoedown/src/autolink.c | 414 +++++++++++++++++++-------------------- 1 file changed, 207 insertions(+), 207 deletions(-) (limited to 'libraries/hoedown/src/autolink.c') diff --git a/libraries/hoedown/src/autolink.c b/libraries/hoedown/src/autolink.c index 9bc7fad5..3063b1a0 100644 --- a/libraries/hoedown/src/autolink.c +++ b/libraries/hoedown/src/autolink.c @@ -8,274 +8,274 @@ #ifndef _MSC_VER #include #else -#define strncasecmp _strnicmp +#define strncasecmp _strnicmp #endif int hoedown_autolink_is_safe(const uint8_t *data, size_t size) { - static const size_t valid_uris_count = 6; - static const char *valid_uris[] = { - "http://", "https://", "/", "#", "ftp://", "mailto:" - }; - static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 }; - size_t i; - - for (i = 0; i < valid_uris_count; ++i) { - size_t len = valid_uris_size[i]; - - if (size > len && - strncasecmp((char *)data, valid_uris[i], len) == 0 && - isalnum(data[len])) - return 1; - } - - return 0; + static const size_t valid_uris_count = 6; + static const char *valid_uris[] = { + "http://", "https://", "/", "#", "ftp://", "mailto:" + }; + static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 }; + size_t i; + + for (i = 0; i < valid_uris_count; ++i) { + size_t len = valid_uris_size[i]; + + if (size > len && + strncasecmp((char *)data, valid_uris[i], len) == 0 && + isalnum(data[len])) + return 1; + } + + return 0; } static size_t autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size) { - uint8_t cclose, copen = 0; - size_t i; - - for (i = 0; i < link_end; ++i) - if (data[i] == '<') { - link_end = i; - break; - } - - while (link_end > 0) { - if (strchr("?!.,:", data[link_end - 1]) != NULL) - link_end--; - - else if (data[link_end - 1] == ';') { - size_t new_end = link_end - 2; - - while (new_end > 0 && isalpha(data[new_end])) - new_end--; - - if (new_end < link_end - 2 && data[new_end] == '&') - link_end = new_end; - else - link_end--; - } - else break; - } - - if (link_end == 0) - return 0; - - cclose = data[link_end - 1]; - - switch (cclose) { - case '"': copen = '"'; break; - case '\'': copen = '\''; break; - case ')': copen = '('; break; - case ']': copen = '['; break; - case '}': copen = '{'; break; - } - - if (copen != 0) { - size_t closing = 0; - size_t opening = 0; - size_t i = 0; - - /* Try to close the final punctuation sign in this same line; - * if we managed to close it outside of the URL, that means that it's - * not part of the URL. If it closes inside the URL, that means it - * is part of the URL. - * - * Examples: - * - * foo http://www.pokemon.com/Pikachu_(Electric) bar - * => http://www.pokemon.com/Pikachu_(Electric) - * - * foo (http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric) - * - * foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric)) - * - * (foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => foo http://www.pokemon.com/Pikachu_(Electric) - */ - - while (i < link_end) { - if (data[i] == copen) - opening++; - else if (data[i] == cclose) - closing++; - - i++; - } - - if (closing != opening) - link_end--; - } - - return link_end; + uint8_t cclose, copen = 0; + size_t i; + + for (i = 0; i < link_end; ++i) + if (data[i] == '<') { + link_end = i; + break; + } + + while (link_end > 0) { + if (strchr("?!.,:", data[link_end - 1]) != NULL) + link_end--; + + else if (data[link_end - 1] == ';') { + size_t new_end = link_end - 2; + + while (new_end > 0 && isalpha(data[new_end])) + new_end--; + + if (new_end < link_end - 2 && data[new_end] == '&') + link_end = new_end; + else + link_end--; + } + else break; + } + + if (link_end == 0) + return 0; + + cclose = data[link_end - 1]; + + switch (cclose) { + case '"': copen = '"'; break; + case '\'': copen = '\''; break; + case ')': copen = '('; break; + case ']': copen = '['; break; + case '}': copen = '{'; break; + } + + if (copen != 0) { + size_t closing = 0; + size_t opening = 0; + size_t i = 0; + + /* Try to close the final punctuation sign in this same line; + * if we managed to close it outside of the URL, that means that it's + * not part of the URL. If it closes inside the URL, that means it + * is part of the URL. + * + * Examples: + * + * foo http://www.pokemon.com/Pikachu_(Electric) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo (http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric)) + * + * (foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => foo http://www.pokemon.com/Pikachu_(Electric) + */ + + while (i < link_end) { + if (data[i] == copen) + opening++; + else if (data[i] == cclose) + closing++; + + i++; + } + + if (closing != opening) + link_end--; + } + + return link_end; } static size_t check_domain(uint8_t *data, size_t size, int allow_short) { - size_t i, np = 0; - - if (!isalnum(data[0])) - return 0; - - for (i = 1; i < size - 1; ++i) { - if (strchr(".:", data[i]) != NULL) np++; - else if (!isalnum(data[i]) && data[i] != '-') break; - } - - if (allow_short) { - /* We don't need a valid domain in the strict sense (with - * least one dot; so just make sure it's composed of valid - * domain characters and return the length of the the valid - * sequence. */ - return i; - } else { - /* a valid domain needs to have at least a dot. - * that's as far as we get */ - return np ? i : 0; - } + size_t i, np = 0; + + if (!isalnum(data[0])) + return 0; + + for (i = 1; i < size - 1; ++i) { + if (strchr(".:", data[i]) != NULL) np++; + else if (!isalnum(data[i]) && data[i] != '-') break; + } + + if (allow_short) { + /* We don't need a valid domain in the strict sense (with + * least one dot; so just make sure it's composed of valid + * domain characters and return the length of the the valid + * sequence. */ + return i; + } else { + /* a valid domain needs to have at least a dot. + * that's as far as we get */ + return np ? i : 0; + } } size_t hoedown_autolink__www( - size_t *rewind_p, - hoedown_buffer *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) { - size_t link_end; + size_t link_end; - if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1])) - return 0; + if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1])) + return 0; - if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) - return 0; + if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) + return 0; - link_end = check_domain(data, size, 0); + link_end = check_domain(data, size, 0); - if (link_end == 0) - return 0; + if (link_end == 0) + return 0; - while (link_end < size && !isspace(data[link_end])) - link_end++; + while (link_end < size && !isspace(data[link_end])) + link_end++; - link_end = autolink_delim(data, link_end, max_rewind, size); + link_end = autolink_delim(data, link_end, max_rewind, size); - if (link_end == 0) - return 0; + if (link_end == 0) + return 0; - hoedown_buffer_put(link, data, link_end); - *rewind_p = 0; + hoedown_buffer_put(link, data, link_end); + *rewind_p = 0; - return (int)link_end; + return (int)link_end; } size_t hoedown_autolink__email( - size_t *rewind_p, - hoedown_buffer *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) { - size_t link_end, rewind; - int nb = 0, np = 0; + size_t link_end, rewind; + int nb = 0, np = 0; - for (rewind = 0; rewind < max_rewind; ++rewind) { - uint8_t c = data[-1 - rewind]; + for (rewind = 0; rewind < max_rewind; ++rewind) { + uint8_t c = data[-1 - rewind]; - if (isalnum(c)) - continue; + if (isalnum(c)) + continue; - if (strchr(".+-_", c) != NULL) - continue; + if (strchr(".+-_", c) != NULL) + continue; - break; - } + break; + } - if (rewind == 0) - return 0; + if (rewind == 0) + return 0; - for (link_end = 0; link_end < size; ++link_end) { - uint8_t c = data[link_end]; + for (link_end = 0; link_end < size; ++link_end) { + uint8_t c = data[link_end]; - if (isalnum(c)) - continue; + if (isalnum(c)) + continue; - if (c == '@') - nb++; - else if (c == '.' && link_end < size - 1) - np++; - else if (c != '-' && c != '_') - break; - } + if (c == '@') + nb++; + else if (c == '.' && link_end < size - 1) + np++; + else if (c != '-' && c != '_') + break; + } - if (link_end < 2 || nb != 1 || np == 0 || - !isalpha(data[link_end - 1])) - return 0; + if (link_end < 2 || nb != 1 || np == 0 || + !isalpha(data[link_end - 1])) + return 0; - link_end = autolink_delim(data, link_end, max_rewind, size); + link_end = autolink_delim(data, link_end, max_rewind, size); - if (link_end == 0) - return 0; + if (link_end == 0) + return 0; - hoedown_buffer_put(link, data - rewind, link_end + rewind); - *rewind_p = rewind; + hoedown_buffer_put(link, data - rewind, link_end + rewind); + *rewind_p = rewind; - return link_end; + return link_end; } size_t hoedown_autolink__url( - size_t *rewind_p, - hoedown_buffer *link, - uint8_t *data, - size_t max_rewind, - size_t size, - unsigned int flags) + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) { - size_t link_end, rewind = 0, domain_len; + size_t link_end, rewind = 0, domain_len; - if (size < 4 || data[1] != '/' || data[2] != '/') - return 0; + if (size < 4 || data[1] != '/' || data[2] != '/') + return 0; - while (rewind < max_rewind && isalpha(data[-1 - rewind])) - rewind++; + while (rewind < max_rewind && isalpha(data[-1 - rewind])) + rewind++; - if (!hoedown_autolink_is_safe(data - rewind, size + rewind)) - return 0; + if (!hoedown_autolink_is_safe(data - rewind, size + rewind)) + return 0; - link_end = strlen("://"); + link_end = strlen("://"); - domain_len = check_domain( - data + link_end, - size - link_end, - flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS); + domain_len = check_domain( + data + link_end, + size - link_end, + flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS); - if (domain_len == 0) - return 0; + if (domain_len == 0) + return 0; - link_end += domain_len; - while (link_end < size && !isspace(data[link_end])) - link_end++; + link_end += domain_len; + while (link_end < size && !isspace(data[link_end])) + link_end++; - link_end = autolink_delim(data, link_end, max_rewind, size); + link_end = autolink_delim(data, link_end, max_rewind, size); - if (link_end == 0) - return 0; + if (link_end == 0) + return 0; - hoedown_buffer_put(link, data - rewind, link_end + rewind); - *rewind_p = rewind; + hoedown_buffer_put(link, data - rewind, link_end + rewind); + *rewind_p = rewind; - return link_end; + return link_end; } -- cgit v1.2.3