static cmark_node *url_match(cmark_parser *parser, cmark_node *parent, cmark_inline_parser *inline_parser) { size_t link_end, domain_len; int rewind = 0; cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); int max_rewind = cmark_inline_parser_get_offset(inline_parser); uint8_t *data = chunk->data + max_rewind; size_t size = chunk->len - max_rewind; if (size < 4 || data[1] != '/' || data[2] != '/') return 0; while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1])) rewind++; if (!sd_autolink_issafe(data - rewind, size + rewind)) return 0; link_end = strlen("://"); domain_len = check_domain(data + link_end, size - link_end, 1); if (domain_len == 0) return 0; link_end += domain_len; while (link_end < size && !cmark_isspace(data[link_end])) link_end++; link_end = autolink_delim(data, link_end); if (link_end == 0) return NULL; cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); cmark_node_unput(parent, rewind); cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind, (bufsize_t)(link_end + rewind)); node->as.link.url = url; cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); text->as.literal = url; cmark_node_append_child(node, text); return node; }
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, unsigned char nextc) { bool needs_escaping = false; bool follows_digit = renderer->buffer->size > 0 && cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); char encoded[ENCODED_SIZE]; needs_escaping = c < 0x80 && escape != LITERAL && ((escape == NORMAL && (c < 0x20 || c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || c == '>' || c == '\\' || c == '`' || c == '!' || (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || (renderer->begin_content && (c == '-' || c == '+' || c == '=') && // begin_content doesn't get set to false til we've passed digits // at the beginning of line, so... !follows_digit) || (renderer->begin_content && (c == '.' || c == ')') && follows_digit && (nextc == 0 || cmark_isspace(nextc))))) || (escape == URL && (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || c == ')' || c == '(')) || (escape == TITLE && (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); if (needs_escaping) { if (escape == URL && cmark_isspace(c)) { // use percent encoding for spaces snprintf(encoded, ENCODED_SIZE, "%%%2X", c); cmark_strbuf_puts(renderer->buffer, encoded); renderer->column += 3; } else if (cmark_ispunct(c)) { cmark_render_ascii(renderer, "\\"); cmark_render_code_point(renderer, c); } else { // render as entity snprintf(encoded, ENCODED_SIZE, "&#%d;", c); cmark_strbuf_puts(renderer->buffer, encoded); renderer->column += strlen(encoded); } } else { cmark_render_code_point(renderer, c); } }
static size_t autolink_delim(uint8_t *data, size_t link_end) { uint8_t cclose, copen; size_t i; for (i = 0; i < link_end; ++i) if (data[i] == '<') { link_end = i; break; } while (link_end > 0) { cclose = data[link_end - 1]; switch (cclose) { case ')': copen = '('; break; default: copen = 0; } if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL) link_end--; else if (data[link_end - 1] == ';') { size_t new_end = link_end - 2; while (new_end > 0 && cmark_isalpha(data[new_end])) new_end--; if (new_end < link_end - 2 && data[new_end] == '&') link_end = new_end; else link_end--; } else if (copen != 0) { size_t closing = 0; size_t opening = 0; i = 0; /* Allow any number of matching brackets (as recognised in copen/cclose) * at the end of the URL. If there is a greater number of closing * brackets than opening ones, we remove one character from the end of * the link. * * Examples (input text => output linked portion): * * http://www.pokemon.com/Pikachu_(Electric) * => http://www.pokemon.com/Pikachu_(Electric) * * http://www.pokemon.com/Pikachu_((Electric) * => http://www.pokemon.com/Pikachu_((Electric) * * http://www.pokemon.com/Pikachu_(Electric)) * => http://www.pokemon.com/Pikachu_(Electric) * * http://www.pokemon.com/Pikachu_((Electric)) * => http://www.pokemon.com/Pikachu_((Electric)) */ while (i < link_end) { if (data[i] == copen) opening++; else if (data[i] == cclose) closing++; i++; } if (closing <= opening) break; link_end--; } else break; } return link_end; }
static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) { size_t link_end; uint8_t *data = text->as.literal.data, *at; size_t size = text->as.literal.len; int rewind, max_rewind, nb = 0, np = 0, ns = 0; if (offset < 0 || (size_t)offset >= size) return; data += offset; size -= offset; at = (uint8_t *)memchr(data, '@', size); if (!at) return; max_rewind = (int)(at - data); data += max_rewind; size -= max_rewind; for (rewind = 0; rewind < max_rewind; ++rewind) { uint8_t c = data[-rewind - 1]; if (cmark_isalnum(c)) continue; if (strchr(".+-_", c) != NULL) continue; if (c == '/') ns++; break; } if (rewind == 0 || ns > 0) { postprocess_text(parser, text, max_rewind + 1 + offset); return; } for (link_end = 0; link_end < size; ++link_end) { uint8_t c = data[link_end]; if (cmark_isalnum(c)) continue; if (c == '@') nb++; else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1])) np++; else if (c != '-' && c != '_') break; } if (link_end < 2 || nb != 1 || np == 0 || (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) { postprocess_text(parser, text, max_rewind + 1 + offset); return; } link_end = autolink_delim(data, link_end); if (link_end == 0) { postprocess_text(parser, text, max_rewind + 1 + offset); return; } cmark_chunk_to_cstr(parser->mem, &text->as.literal); cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); cmark_strbuf buf; cmark_strbuf_init(parser->mem, &buf, 10); cmark_strbuf_puts(&buf, "mailto:"); cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind)); link_node->as.link.url = cmark_chunk_buf_detach(&buf); cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); cmark_chunk email = cmark_chunk_dup( &text->as.literal, offset + max_rewind - rewind, (bufsize_t)(link_end + rewind)); cmark_chunk_to_cstr(parser->mem, &email); link_text->as.literal = email; cmark_node_append_child(link_node, link_text); cmark_node_insert_after(text, link_node); cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); post->as.literal = cmark_chunk_dup(&text->as.literal, (bufsize_t)(offset + max_rewind + link_end), (bufsize_t)(size - link_end)); cmark_chunk_to_cstr(parser->mem, &post->as.literal); cmark_node_insert_after(link_node, post); text->as.literal.len = offset + max_rewind - rewind; text->as.literal.data[text->as.literal.len] = 0; postprocess_text(parser, post, 0); }