void cmark_consolidate_text_nodes(cmark_node *root) { cmark_iter *iter = cmark_iter_new(root); cmark_strbuf buf = GH_BUF_INIT; cmark_event_type ev_type; cmark_node *cur, *tmp, *next; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT && cur->next && cur->next->type == CMARK_NODE_TEXT) { cmark_strbuf_clear(&buf); cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len); tmp = cur->next; while (tmp && tmp->type == CMARK_NODE_TEXT) { cmark_iter_next(iter); // advance pointer cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len); next = tmp->next; cmark_node_free(tmp); tmp = next; } cmark_chunk_free(&cur->as.literal); cur->as.literal = cmark_chunk_buf_detach(&buf); } } cmark_strbuf_free(&buf); cmark_iter_free(iter); }
char *cmark_render_xml(cmark_node *root, long options) { char *result; cmark_strbuf xml = GH_BUF_INIT; cmark_event_type ev_type; cmark_node *cur; struct render_state state = { &xml, 0 }; if (options & CMARK_OPT_NORMALIZE) { cmark_consolidate_text_nodes(root); } cmark_iter *iter = cmark_iter_new(root); cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); cmark_strbuf_puts(state.xml, "<!DOCTYPE CommonMark SYSTEM \"CommonMark.dtd\">\n"); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); S_render_node(cur, ev_type, &state, options); } result = (char *)cmark_strbuf_detach(&xml); cmark_iter_free(iter); cmark_strbuf_free(&xml); return result; }
static void md_convert_hashes(cmark_iter *const iter) { for(;;) { cmark_event_type const event = cmark_iter_next(iter); if(CMARK_EVENT_DONE == event) break; if(CMARK_EVENT_EXIT != event) continue; cmark_node *const node = cmark_iter_get_node(iter); cmark_node_type const type = cmark_node_get_type(node); if(CMARK_NODE_LINK != type && CMARK_NODE_IMAGE != type) continue; char const *const URI = cmark_node_get_url(node); if(!URI) continue; if(0 != strncasecmp(URI, STR_LEN("hash:"))) continue; cmark_node *sup = superscript("#", HASH_INFO_MSG, URI); cmark_node_insert_after(node, sup); cmark_iter_reset(iter, sup, CMARK_EVENT_EXIT); char *escaped = QSEscape(URI, strlen(URI), true); size_t const elen = strlen(escaped); cmark_strbuf rel[1]; char const qpfx[] = "/sources/"; cmark_strbuf_init(&DEFAULT_MEM_ALLOCATOR, rel, sizeof(qpfx)-1+elen); cmark_strbuf_put(rel, (unsigned char const *)qpfx, sizeof(qpfx)-1); cmark_strbuf_put(rel, (unsigned char const *)escaped, elen); free(escaped); escaped = NULL; cmark_node_set_url(node, cmark_strbuf_cstr(rel)); cmark_strbuf_free(rel); } }
static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) { cmark_iter *iter; cmark_event_type ev; cmark_node *node; bool in_link = false; cmark_consolidate_text_nodes(root); iter = cmark_iter_new(root); while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { node = cmark_iter_get_node(iter); if (in_link) { if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) { in_link = false; } continue; } if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) { in_link = true; continue; } if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) { postprocess_text(parser, node, 0); } } cmark_iter_free(iter); return root; }
static void md_block_external_images(cmark_iter *const iter) { for(;;) { cmark_event_type const event = cmark_iter_next(iter); if(CMARK_EVENT_DONE == event) break; if(CMARK_EVENT_EXIT != event) continue; cmark_node *const node = cmark_iter_get_node(iter); if(CMARK_NODE_IMAGE != cmark_node_get_type(node)) continue; char const *const URI = cmark_node_get_url(node); if(URI) { if(0 == strncasecmp(URI, STR_LEN("hash:"))) continue; if(0 == strncasecmp(URI, STR_LEN("data:"))) continue; } cmark_node *link = cmark_node_new(CMARK_NODE_LINK); cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_url(link, URI); for(;;) { cmark_node *child = cmark_node_first_child(node); if(!child) break; cmark_node_append_child(link, child); } if(cmark_node_first_child(link)) { cmark_node_set_literal(text, " (external image)"); } else { cmark_node_set_literal(text, "(external image)"); } cmark_node_append_child(link, text); cmark_node_insert_before(node, link); cmark_node_free(node); } }
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) { char *result; cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; cmark_html_renderer renderer = {&html, NULL, NULL, 0, 0, NULL}; cmark_iter *iter = cmark_iter_new(root); for (; extensions; extensions = extensions->next) if (((cmark_syntax_extension *) extensions->data)->html_filter_func) renderer.filter_extensions = cmark_llist_append( mem, renderer.filter_extensions, (cmark_syntax_extension *) extensions->data); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); S_render_node(&renderer, cur, ev_type, options); } if (renderer.footnote_ix) { cmark_strbuf_puts(&html, "</ol>\n</section>\n"); } result = (char *)cmark_strbuf_detach(&html); cmark_llist_free(mem, renderer.filter_extensions); cmark_iter_free(iter); return result; }
static void md_autolink(cmark_iter *const iter) { regex_t linkify[1]; // <http://daringfireball.net/2010/07/improved_regex_for_matching_urls> // Painstakingly ported to POSIX int rc = regcomp(linkify, "([a-z][a-z0-9_-]+:(/{1,3}|[a-z0-9%])|www[0-9]{0,3}[.]|[a-z0-9.-]+[.][a-z]{2,4}/)([^[:space:]()<>]+|\\(([^[:space:]()<>]+|(\\([^[:space:]()<>]+\\)))*\\))+(\\(([^[:space:]()<>]+|(\\([^[:space:]()<>]+\\)))*\\)|[^][[:space:]`!(){};:'\".,<>?«»“”‘’])", REG_ICASE | REG_EXTENDED); assert(0 == rc); for(;;) { cmark_event_type const event = cmark_iter_next(iter); if(CMARK_EVENT_DONE == event) break; if(CMARK_EVENT_ENTER != event) continue; cmark_node *const node = cmark_iter_get_node(iter); if(CMARK_NODE_TEXT != cmark_node_get_type(node)) continue; char const *const str = cmark_node_get_literal(node); char const *pos = str; regmatch_t match; while(0 == regexec(linkify, pos, 1, &match, 0)) { regoff_t const loc = match.rm_so; regoff_t const len = match.rm_eo - match.rm_so; char *pfx = strndup(pos, loc); char *link_abs = strndup(pos+loc, len); char *link_rel = aasprintf("/history/%s", link_abs); assert(pfx); assert(link_abs); assert(link_rel); cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(text, pfx); cmark_node *link = cmark_node_new(CMARK_NODE_LINK); cmark_node_set_url(link, link_rel); cmark_node *sup = superscript("^", "", link_abs); cmark_node *face = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(face, link_abs); cmark_node_append_child(link, face); cmark_node_insert_before(node, text); cmark_node_insert_before(node, link); cmark_node_insert_before(node, sup); free(pfx); pfx = NULL; free(link_abs); link_abs = NULL; free(link_rel); link_rel = NULL; pos += loc+len; } if(str != pos) { cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(text, pos); cmark_node_insert_before(node, text); cmark_node_free(node); } } regfree(linkify); }
static void md_escape_inline(cmark_iter *const iter) { for(;;) { cmark_event_type const event = cmark_iter_next(iter); if(CMARK_EVENT_DONE == event) break; if(CMARK_EVENT_ENTER != event) continue; cmark_node *const node = cmark_iter_get_node(iter); if(CMARK_NODE_INLINE_HTML != cmark_node_get_type(node)) continue; char const *const str = cmark_node_get_literal(node); cmark_node *text = cmark_node_new(CMARK_NODE_TEXT); cmark_node_set_literal(text, str); cmark_node_insert_before(node, text); cmark_node_free(node); } }
char* cmark_render(cmark_node *root, int options, int width, void (*outc)(cmark_renderer*, cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options)) { cmark_strbuf pref = GH_BUF_INIT; cmark_strbuf buf = GH_BUF_INIT; cmark_node *cur; cmark_event_type ev_type; char *result; cmark_iter *iter = cmark_iter_new(root); cmark_renderer renderer = { &buf, &pref, 0, width, 0, 0, true, false, false, outc, S_cr, S_blankline, S_out }; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (!render_node(&renderer, cur, ev_type, options)) { // a false value causes us to skip processing // the node's contents. this is used for // autolinks. cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT); } } // ensure final newline if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { cmark_strbuf_putc(renderer.buffer, '\n'); } result = (char *)cmark_strbuf_detach(renderer.buffer); cmark_iter_free(iter); cmark_strbuf_free(renderer.prefix); cmark_strbuf_free(renderer.buffer); return result; }
// Walk through node and all children, recursively, parsing // string content into inline content where appropriate. static void process_inlines(cmark_node* root, cmark_reference_map *refmap, int options) { cmark_iter *iter = cmark_iter_new(root); cmark_node *cur; cmark_event_type ev_type; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { if (cur->type == NODE_PARAGRAPH || cur->type == NODE_HEADER) { cmark_parse_inlines(cur, refmap, options); } } } cmark_iter_free(iter); }
char *cmark_render_html(cmark_node *root, int options) { char *result; cmark_strbuf html = GH_BUF_INIT; cmark_event_type ev_type; cmark_node *cur; struct render_state state = { &html, NULL }; cmark_iter *iter = cmark_iter_new(root); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); S_render_node(cur, ev_type, &state, options); } result = (char *)cmark_strbuf_detach(&html); cmark_iter_free(iter); return result; }
static void iterator(test_batch_runner *runner) { cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10, CMARK_OPT_DEFAULT); int parnodes = 0; cmark_event_type ev_type; cmark_iter *iter = cmark_iter_new(doc); cmark_node *cur; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (cur->type == CMARK_NODE_PARAGRAPH && ev_type == CMARK_EVENT_ENTER) { parnodes += 1; } } INT_EQ(runner, parnodes, 2, "iterate correctly counts paragraphs"); cmark_iter_free(iter); cmark_node_free(doc); }
char *cmark_render_man(cmark_node *root, long options) { char *result; cmark_strbuf man = GH_BUF_INIT; struct render_state state = { &man, NULL }; cmark_node *cur; cmark_event_type ev_type; cmark_iter *iter = cmark_iter_new(root); if (options == 0) options = 0; // avoid warning about unused parameters while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); S_render_node(cur, ev_type, &state); } result = (char *)cmark_strbuf_detach(&man); cmark_iter_free(iter); cmark_strbuf_free(&man); return result; }
static void iterator_delete(test_batch_runner *runner) { static const char md[] = "a *b* c\n" "\n" "* item1\n" "* item2\n" "\n" "a `b` c\n" "\n" "* item1\n" "* item2\n"; cmark_node *doc = cmark_parse_document(md, sizeof(md) - 1, CMARK_OPT_DEFAULT); cmark_iter *iter = cmark_iter_new(doc); cmark_event_type ev_type; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cmark_node *node = cmark_iter_get_node(iter); // Delete list, emph, and code nodes. if ((ev_type == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LIST) || (ev_type == CMARK_EVENT_EXIT && node->type == CMARK_NODE_EMPH) || (ev_type == CMARK_EVENT_ENTER && node->type == CMARK_NODE_CODE)) { cmark_node_free(node); } } char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT); static const char expected[] = "<p>a c</p>\n" "<p>a c</p>\n"; STR_EQ(runner, html, expected, "iterate and delete nodes"); free(html); cmark_iter_free(iter); cmark_node_free(doc); }
// Convert a single node. static char* S_node_to_pod(cmark_node *node, CFCClass *klass, int header_level) { char *result = CFCUtil_strdup(""); if (node == NULL) { return result; } int found_matching_code_block = false; cmark_iter *iter = cmark_iter_new(node); cmark_event_type ev_type; while (CMARK_EVENT_DONE != (ev_type = cmark_iter_next(iter))) { cmark_node *node = cmark_iter_get_node(iter); cmark_node_type type = cmark_node_get_type(node); switch (type) { case CMARK_NODE_DOCUMENT: break; case CMARK_NODE_PARAGRAPH: if (ev_type == CMARK_EVENT_EXIT) { result = CFCUtil_cat(result, "\n\n", NULL); } break; case CMARK_NODE_BLOCK_QUOTE: case CMARK_NODE_LIST: if (ev_type == CMARK_EVENT_ENTER) { result = CFCUtil_cat(result, "=over\n\n", NULL); } else { result = CFCUtil_cat(result, "=back\n\n", NULL); } break; case CMARK_NODE_ITEM: // TODO: Ordered lists. if (ev_type == CMARK_EVENT_ENTER) { result = CFCUtil_cat(result, "=item *\n\n", NULL); } break; case CMARK_NODE_HEADER: if (ev_type == CMARK_EVENT_ENTER) { int extra_level = cmark_node_get_header_level(node) - 1; char *header = CFCUtil_sprintf("=head%d ", header_level + extra_level); result = CFCUtil_cat(result, header, NULL); FREEMEM(header); } else { result = CFCUtil_cat(result, "\n\n", NULL); } break; case CMARK_NODE_CODE_BLOCK: { int is_host = CFCMarkdown_code_block_is_host(node, "perl"); if (is_host) { found_matching_code_block = true; const char *content = cmark_node_get_literal(node); char *copy = CFCUtil_strdup(content); // Chomp trailing newline. size_t len = strlen(copy); if (len > 0 && copy[len-1] == '\n') { copy[len-1] = '\0'; } char *indented = CFCUtil_global_replace(copy, "\n", "\n "); result = CFCUtil_cat(result, " ", indented, "\n\n", NULL); FREEMEM(indented); FREEMEM(copy); } if (CFCMarkdown_code_block_is_last(node)) { if (!found_matching_code_block) { result = CFCUtil_cat(result, " Code example for Perl is missing\n\n"); } else { // Reset. found_matching_code_block = false; } } break; } case CMARK_NODE_HTML: { const char *html = cmark_node_get_literal(node); result = CFCUtil_cat(result, "=begin html\n\n", html, "\n=end\n\n", NULL); break; } case CMARK_NODE_HRULE: break; case CMARK_NODE_TEXT: { const char *content = cmark_node_get_literal(node); char *escaped = S_pod_escape(content); result = CFCUtil_cat(result, escaped, NULL); FREEMEM(escaped); break; } case CMARK_NODE_LINEBREAK: // POD doesn't support line breaks. Start a new paragraph. result = CFCUtil_cat(result, "\n\n", NULL); break; case CMARK_NODE_SOFTBREAK: result = CFCUtil_cat(result, "\n", NULL); break; case CMARK_NODE_CODE: { const char *content = cmark_node_get_literal(node); char *escaped = S_pod_escape(content); result = CFCUtil_cat(result, "C<", escaped, ">", NULL); FREEMEM(escaped); break; } case CMARK_NODE_INLINE_HTML: { const char *html = cmark_node_get_literal(node); CFCUtil_warn("Inline HTML not supported in POD: %s", html); break; } case CMARK_NODE_LINK: if (ev_type == CMARK_EVENT_ENTER) { char *pod = S_convert_link(node, klass, header_level); result = CFCUtil_cat(result, pod, NULL); FREEMEM(pod); cmark_iter_reset(iter, node, CMARK_EVENT_EXIT); } break; case CMARK_NODE_IMAGE: CFCUtil_warn("Images not supported in POD"); break; case CMARK_NODE_STRONG: if (ev_type == CMARK_EVENT_ENTER) { result = CFCUtil_cat(result, "B<", NULL); } else { result = CFCUtil_cat(result, ">", NULL); } break; case CMARK_NODE_EMPH: if (ev_type == CMARK_EVENT_ENTER) { result = CFCUtil_cat(result, "I<", NULL); } else { result = CFCUtil_cat(result, ">", NULL); } break; default: CFCUtil_die("Invalid cmark node type: %d", (int)type); break; } } cmark_iter_free(iter); return result; }