From b6d455a02bd338e9dc0faa09d4d8177ecd8d569a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Mr=C3=A1zek?= Date: Sun, 10 Apr 2016 15:53:05 +0200 Subject: NOISSUE reorganize and document libraries --- libraries/hoedown/src/autolink.c | 281 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100644 libraries/hoedown/src/autolink.c (limited to 'libraries/hoedown/src/autolink.c') diff --git a/libraries/hoedown/src/autolink.c b/libraries/hoedown/src/autolink.c new file mode 100644 index 00000000..9bc7fad5 --- /dev/null +++ b/libraries/hoedown/src/autolink.c @@ -0,0 +1,281 @@ +#include "hoedown/autolink.h" + +#include +#include +#include +#include + +#ifndef _MSC_VER +#include +#else +#define strncasecmp _strnicmp +#endif + +int +hoedown_autolink_is_safe(const uint8_t *data, size_t size) +{ + static const size_t valid_uris_count = 6; + static const char *valid_uris[] = { + "http://", "https://", "/", "#", "ftp://", "mailto:" + }; + static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 }; + size_t i; + + for (i = 0; i < valid_uris_count; ++i) { + size_t len = valid_uris_size[i]; + + if (size > len && + strncasecmp((char *)data, valid_uris[i], len) == 0 && + isalnum(data[len])) + return 1; + } + + return 0; +} + +static size_t +autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size) +{ + uint8_t cclose, copen = 0; + size_t i; + + for (i = 0; i < link_end; ++i) + if (data[i] == '<') { + link_end = i; + break; + } + + while (link_end > 0) { + if (strchr("?!.,:", data[link_end - 1]) != NULL) + link_end--; + + else if (data[link_end - 1] == ';') { + size_t new_end = link_end - 2; + + while (new_end > 0 && isalpha(data[new_end])) + new_end--; + + if (new_end < link_end - 2 && data[new_end] == '&') + link_end = new_end; + else + link_end--; + } + else break; + } + + if (link_end == 0) + return 0; + + cclose = data[link_end - 1]; + + switch (cclose) { + case '"': copen = '"'; break; + case '\'': copen = '\''; break; + case ')': copen = '('; break; + case ']': copen = '['; break; + case '}': copen = '{'; break; + } + + if (copen != 0) { + size_t closing = 0; + size_t opening = 0; + size_t i = 0; + + /* Try to close the final punctuation sign in this same line; + * if we managed to close it outside of the URL, that means that it's + * not part of the URL. If it closes inside the URL, that means it + * is part of the URL. + * + * Examples: + * + * foo http://www.pokemon.com/Pikachu_(Electric) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo (http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric)) + * + * (foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => foo http://www.pokemon.com/Pikachu_(Electric) + */ + + while (i < link_end) { + if (data[i] == copen) + opening++; + else if (data[i] == cclose) + closing++; + + i++; + } + + if (closing != opening) + link_end--; + } + + return link_end; +} + +static size_t +check_domain(uint8_t *data, size_t size, int allow_short) +{ + size_t i, np = 0; + + if (!isalnum(data[0])) + return 0; + + for (i = 1; i < size - 1; ++i) { + if (strchr(".:", data[i]) != NULL) np++; + else if (!isalnum(data[i]) && data[i] != '-') break; + } + + if (allow_short) { + /* We don't need a valid domain in the strict sense (with + * least one dot; so just make sure it's composed of valid + * domain characters and return the length of the the valid + * sequence. */ + return i; + } else { + /* a valid domain needs to have at least a dot. + * that's as far as we get */ + return np ? i : 0; + } +} + +size_t +hoedown_autolink__www( + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) +{ + size_t link_end; + + if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1])) + return 0; + + if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) + return 0; + + link_end = check_domain(data, size, 0); + + if (link_end == 0) + return 0; + + while (link_end < size && !isspace(data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + hoedown_buffer_put(link, data, link_end); + *rewind_p = 0; + + return (int)link_end; +} + +size_t +hoedown_autolink__email( + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) +{ + size_t link_end, rewind; + int nb = 0, np = 0; + + for (rewind = 0; rewind < max_rewind; ++rewind) { + uint8_t c = data[-1 - rewind]; + + if (isalnum(c)) + continue; + + if (strchr(".+-_", c) != NULL) + continue; + + break; + } + + if (rewind == 0) + return 0; + + for (link_end = 0; link_end < size; ++link_end) { + uint8_t c = data[link_end]; + + if (isalnum(c)) + continue; + + if (c == '@') + nb++; + else if (c == '.' && link_end < size - 1) + np++; + else if (c != '-' && c != '_') + break; + } + + if (link_end < 2 || nb != 1 || np == 0 || + !isalpha(data[link_end - 1])) + return 0; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + hoedown_buffer_put(link, data - rewind, link_end + rewind); + *rewind_p = rewind; + + return link_end; +} + +size_t +hoedown_autolink__url( + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) +{ + size_t link_end, rewind = 0, domain_len; + + if (size < 4 || data[1] != '/' || data[2] != '/') + return 0; + + while (rewind < max_rewind && isalpha(data[-1 - rewind])) + rewind++; + + if (!hoedown_autolink_is_safe(data - rewind, size + rewind)) + return 0; + + link_end = strlen("://"); + + domain_len = check_domain( + data + link_end, + size - link_end, + flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS); + + if (domain_len == 0) + return 0; + + link_end += domain_len; + while (link_end < size && !isspace(data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + hoedown_buffer_put(link, data - rewind, link_end + rewind); + *rewind_p = rewind; + + return link_end; +} -- cgit v1.2.3