static void md_autolink(cmark_iter *const iter) { regex_t linkify[1]; // <http://daringfireball.net/2010/07/improved_regex_for_matching_urls> // Painstakingly ported to POSIX int rc = regcomp(linkify, "([a-z][a-z0-9_-]+:(/{1,3}|[a-z0-9%])|www[0-9]{0,3}[.]|[a-z0-9.-]+[.][a-z]{2,4}/)([^[:space:]()<>]+|\\(([^[:space:]()<>]+|(\\([^[:space:]()<>]+\\)))*\\))+(\\(([^[:space:]()<>]+|(\\([^[:space:]()<>]+\\)))*\\)|[^][[:space:]`!(){};:'\".,<>?«»“”‘’])", REG_ICASE | REG_EXTENDED); assert(0 == rc); for(;;) { cmark_event_type const event = cmark_iter_next(iter); if(CMARK_EVENT_DONE == event) break; if(CMARK_EVENT_ENTER != event) continue; cmark_node *const node = cmark_iter_get_node(iter); if(CMARK_NODE_TEXT != cmark_node_get_type(node)) continue; char const *const str = cmark_node_get_literal(node); char const *pos = str; regmatch_t match; while(0 == regexec(linkify, pos, 1, &match, 0)) { regoff_t const loc = match.rm_so; regoff_t const len = match.rm_eo - match.rm_so; char *pfx = strndup(pos, loc); char *link_abs = strndup(pos+loc, len); char *link_rel = aasprintf("/history/%s", link_abs); assert(pfx); assert(link_abs); assert(link_rel); cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(text, pfx); cmark_node *link = cmark_node_new(CMARK_NODE_LINK); cmark_node_set_url(link, link_rel); cmark_node *sup = superscript("^", "", link_abs); cmark_node *face = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(face, link_abs); cmark_node_append_child(link, face); cmark_node_insert_before(node, text); cmark_node_insert_before(node, link); cmark_node_insert_before(node, sup); free(pfx); pfx = NULL; free(link_abs); link_abs = NULL; free(link_rel); link_rel = NULL; pos += loc+len; } if(str != pos) { cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(text, pos); cmark_node_insert_before(node, text); cmark_node_free(node); } } regfree(linkify); }
static void md_block_external_images(cmark_iter *const iter) { for(;;) { cmark_event_type const event = cmark_iter_next(iter); if(CMARK_EVENT_DONE == event) break; if(CMARK_EVENT_EXIT != event) continue; cmark_node *const node = cmark_iter_get_node(iter); if(CMARK_NODE_IMAGE != cmark_node_get_type(node)) continue; char const *const URI = cmark_node_get_url(node); if(URI) { if(0 == strncasecmp(URI, STR_LEN("hash:"))) continue; if(0 == strncasecmp(URI, STR_LEN("data:"))) continue; } cmark_node *link = cmark_node_new(CMARK_NODE_LINK); cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_url(link, URI); for(;;) { cmark_node *child = cmark_node_first_child(node); if(!child) break; cmark_node_append_child(link, child); } if(cmark_node_first_child(link)) { cmark_node_set_literal(text, " (external image)"); } else { cmark_node_set_literal(text, "(external image)"); } cmark_node_append_child(link, text); cmark_node_insert_before(node, link); cmark_node_free(node); } }
int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode) { if (!cmark_node_insert_before(oldnode, newnode)) { return 0; } cmark_node_unlink(oldnode); return 1; }
static cmark_node *fixup_nodes(cmark_inline_parser *inline_parser, cmark_node *parent, int size) { int node_text_len; cmark_node *prev = NULL; cmark_node *tmp; int name_size = size; cmark_strbuf *name; for (prev = cmark_node_last_child(parent); prev; prev = cmark_node_previous(prev)) { if (cmark_node_get_type(prev) == CMARK_NODE_TEXT) { const char *text = cmark_node_get_literal(prev); node_text_len = strlen(text); size -= node_text_len; if (size <= 0) { if (size < 0) { char *split_text = my_strndup(text, size * -1); cmark_node *split = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(split, split_text); free(split_text); split_text = my_strndup(text + (size * - 1), node_text_len - size); cmark_node_set_literal(prev, split_text); free(split_text); cmark_node_insert_before(prev, split); } break; } } else { return NULL; } } name = cmark_strbuf_new(name_size + 1); tmp = prev; while (tmp) { cmark_node *next = cmark_node_next(tmp); cmark_strbuf_puts(name, cmark_node_get_literal(tmp)); if (tmp != prev) cmark_node_free(tmp); tmp = next; } cmark_node_set_type(prev, CMARK_NODE_LINK); cmark_node_set_url(prev, cmark_strbuf_get(name)); cmark_strbuf_free(name); return prev; }
static void md_escape_inline(cmark_iter *const iter) { for(;;) { cmark_event_type const event = cmark_iter_next(iter); if(CMARK_EVENT_DONE == event) break; if(CMARK_EVENT_ENTER != event) continue; cmark_node *const node = cmark_iter_get_node(iter); if(CMARK_NODE_INLINE_HTML != cmark_node_get_type(node)) continue; char const *const str = cmark_node_get_literal(node); cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(text, str); cmark_node_insert_before(node, text); cmark_node_free(node); } }
static cmark_node *fixup_nodes(cmark_syntax_extension *self, cmark_parser *parser, cmark_inline_parser *inline_parser, cmark_node *parent, int start_offset, int size) { int node_text_len; cmark_node *prev = NULL; cmark_node *tmp; int name_size = size; cmark_strbuf *name; NamedLink *named_link; for (prev = cmark_node_last_child(parent); prev; prev = cmark_node_previous(prev)) { if (cmark_node_get_type(prev) == CMARK_NODE_TEXT) { const char *text = cmark_node_get_literal(prev); node_text_len = strlen(text); size -= node_text_len; if (size <= 0) { if (size < 0) { char *split_text = my_strndup(text, size * -1); cmark_node *split = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(split, split_text); free(split_text); split_text = my_strndup(text + (size * - 1), node_text_len - size); cmark_node_set_literal(prev, split_text); free(split_text); cmark_node_insert_before(prev, split); } break; } } else { return NULL; } } name = cmark_strbuf_new(name_size + 1); tmp = prev; while (tmp) { cmark_node *next = cmark_node_next(tmp); cmark_strbuf_puts(name, cmark_node_get_literal(tmp)); if (tmp != prev) cmark_node_free(tmp); tmp = next; } named_link = PRIV(self)->link_resolve_func(cmark_strbuf_get(name)); if (!named_link || !named_link->ref) { int actual_line, actual_col; translate_sourcepos(get_first_parent_block(parent), start_offset, &actual_line, &actual_col); cmark_strbuf *message = cmark_strbuf_new(0); cmark_strbuf_puts(message, "Trying to link to non-existing symbol ‘"); cmark_strbuf_puts(message, cmark_strbuf_get(name)); cmark_strbuf_puts(message, "’"); diagnose("gtk-doc-bad-link", cmark_strbuf_get(message), actual_line - 1, actual_col - 1); cmark_strbuf_free(message); cmark_node_set_literal(prev, cmark_strbuf_get(name)); cmark_strbuf_free(name); return prev; } free_named_link(named_link); cmark_node_set_type(prev, CMARK_NODE_LINK); cmark_node_set_url(prev, cmark_strbuf_get(name)); cmark_strbuf_free(name); return prev; }
// Return a link, an image, or a literal close bracket. static cmark_node *handle_close_bracket(subject *subj) { bufsize_t initial_pos, after_link_text_pos; bufsize_t endurl, starttitle, endtitle, endall; bufsize_t sps, n; cmark_reference *ref = NULL; cmark_chunk url_chunk, title_chunk; cmark_chunk url, title; bracket *opener; cmark_node *inl; cmark_chunk raw_label; int found_label; cmark_node *tmp, *tmpnext; bool is_image; advance(subj); // advance past ] initial_pos = subj->pos; // get last [ or ![ opener = subj->last_bracket; if (opener == NULL) { return make_str(subj->mem, cmark_chunk_literal("]")); } if (!opener->active) { // take delimiter off stack pop_bracket(subj); return make_str(subj->mem, cmark_chunk_literal("]")); } // If we got here, we matched a potential link/image text. // Now we check to see if it's a link/image. is_image = opener->image; after_link_text_pos = subj->pos; // First, look for an inline link. if (peek_char(subj) == '(' && ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, &url_chunk)) > -1)) { // try to parse an explicit link: endurl = subj->pos + 1 + sps + n; starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + scan_link_title(&subj->input, starttitle); endall = endtitle + scan_spacechars(&subj->input, endtitle); if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); url = cmark_clean_url(subj->mem, &url_chunk); title = cmark_clean_title(subj->mem, &title_chunk); cmark_chunk_free(subj->mem, &url_chunk); cmark_chunk_free(subj->mem, &title_chunk); goto match; } else { // it could still be a shortcut reference link subj->pos = after_link_text_pos; } } // Next, look for a following [link label] that matches in refmap. // skip spaces raw_label = cmark_chunk_literal(""); found_label = link_label(subj, &raw_label); if (!found_label) { // If we have a shortcut reference link, back up // to before the spacse we skipped. subj->pos = initial_pos; } if ((!found_label || raw_label.len == 0) && !opener->bracket_after) { cmark_chunk_free(subj->mem, &raw_label); raw_label = cmark_chunk_dup(&subj->input, opener->position, initial_pos - opener->position - 1); found_label = true; } if (found_label) { ref = cmark_reference_lookup(subj->refmap, &raw_label); cmark_chunk_free(subj->mem, &raw_label); } if (ref != NULL) { // found url = chunk_clone(subj->mem, &ref->url); title = chunk_clone(subj->mem, &ref->title); goto match; } else { goto noMatch; } noMatch: // If we fall through to here, it means we didn't match a link: pop_bracket(subj); // remove this opener from delimiter list subj->pos = initial_pos; return make_str(subj->mem, cmark_chunk_literal("]")); match: inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); inl->as.link.url = url; inl->as.link.title = title; cmark_node_insert_before(opener->inl_text, inl); // Add link text: tmp = opener->inl_text->next; while (tmp) { tmpnext = tmp->next; cmark_node_append_child(inl, tmp); tmp = tmpnext; } // Free the bracket [: cmark_node_free(opener->inl_text); process_emphasis(subj, opener->previous_delimiter); pop_bracket(subj); // Now, if we have a link, we also want to deactivate earlier link // delimiters. (This code can be removed if we decide to allow links // inside links.) if (!is_image) { opener = subj->last_bracket; while (opener != NULL) { if (!opener->image) { if (!opener->active) { break; } else { opener->active = false; } } opener = opener->previous; } } return NULL; }
static void create_tree(test_batch_runner *runner) { char *html; cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH); OK(runner, !cmark_node_insert_before(doc, p), "insert before root fails"); OK(runner, !cmark_node_insert_after(doc, p), "insert after root fails"); OK(runner, cmark_node_append_child(doc, p), "append1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent"); OK(runner, cmark_node_parent(p) == doc, "node_parent"); cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH); OK(runner, cmark_node_prepend_child(p, emph), "prepend1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent"); cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str1, "Hello, "); OK(runner, cmark_node_prepend_child(p, str1), "prepend2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent"); cmark_node *str3 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str3, "!"); OK(runner, cmark_node_append_child(p, str3), "append2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent"); cmark_node *str2 = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(str2, "world"); OK(runner, cmark_node_append_child(emph, str2), "append3"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent"); html = cmark_render_html(doc, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "<p>Hello, <em>world</em>!</p>\n", "render_html"); free(html); OK(runner, cmark_node_insert_before(str1, str3), "ins before1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins before1 consistent"); // 31e OK(runner, cmark_node_first_child(p) == str3, "ins before1 works"); OK(runner, cmark_node_insert_before(str1, emph), "ins before2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins before2 consistent"); // 3e1 OK(runner, cmark_node_last_child(p) == str1, "ins before2 works"); OK(runner, cmark_node_insert_after(str1, str3), "ins after1"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins after1 consistent"); // e13 OK(runner, cmark_node_next(str1) == str3, "ins after1 works"); OK(runner, cmark_node_insert_after(str1, emph), "ins after2"); INT_EQ(runner, cmark_node_check(doc, NULL), 0, "ins after2 consistent"); // 1e3 OK(runner, cmark_node_previous(emph) == str1, "ins after2 works"); cmark_node_unlink(emph); html = cmark_render_html(doc, CMARK_OPT_DEFAULT); STR_EQ(runner, html, "<p>Hello, !</p>\n", "render_html after shuffling"); free(html); cmark_node_free(doc); // TODO: Test that the contents of an unlinked inline are valid // after the parent block was destroyed. This doesn't work so far. cmark_node_free(emph); }