void cmark_consolidate_text_nodes(cmark_node *root) { cmark_iter *iter; cmark_strbuf buf = CMARK_BUF_INIT(NULL); cmark_event_type ev_type; cmark_node *cur, *tmp, *next; if (root == NULL) { return; } iter = cmark_iter_new(root); buf.mem = iter->mem; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT && cur->next && cur->next->type == CMARK_NODE_TEXT) { cmark_strbuf_clear(&buf); cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len); tmp = cur->next; while (tmp && tmp->type == CMARK_NODE_TEXT) { cmark_iter_next(iter); // advance pointer cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len); next = tmp->next; cmark_node_free(tmp); tmp = next; } cmark_chunk_free(iter->mem, &cur->as.literal); cur->as.literal = cmark_chunk_buf_detach(&buf); } } cmark_strbuf_free(&buf); cmark_iter_free(iter); }
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) { char *result; cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; cmark_html_renderer renderer = {&html, NULL, NULL, 0, 0, NULL}; cmark_iter *iter = cmark_iter_new(root); for (; extensions; extensions = extensions->next) if (((cmark_syntax_extension *) extensions->data)->html_filter_func) renderer.filter_extensions = cmark_llist_append( mem, renderer.filter_extensions, (cmark_syntax_extension *) extensions->data); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); S_render_node(&renderer, cur, ev_type, options); } if (renderer.footnote_ix) { cmark_strbuf_puts(&html, "</ol>\n</section>\n"); } result = (char *)cmark_strbuf_detach(&html); cmark_llist_free(mem, renderer.filter_extensions); cmark_iter_free(iter); return result; }
// Like make_str, but parses entities. static cmark_node *make_str_with_entities(cmark_mem *mem, cmark_chunk *content) { cmark_strbuf unescaped = CMARK_BUF_INIT(mem); if (houdini_unescape_html(&unescaped, content->data, content->len)) { return make_str(mem, cmark_chunk_buf_detach(&unescaped)); } else { return make_str(mem, *content); } }
char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, void (*outc)(cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options)) { cmark_strbuf pref = CMARK_BUF_INIT(mem); cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_node *cur; cmark_event_type ev_type; char *result; cmark_iter *iter = cmark_iter_new(root); cmark_renderer renderer = {mem, &buf, &pref, 0, width, 0, 0, true, true, false, false, outc, S_cr, S_blankline, S_out, 0}; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (!render_node(&renderer, cur, ev_type, options)) { // a false value causes us to skip processing // the node's contents. this is used for // autolinks. cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT); } } // ensure final newline if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') { cmark_strbuf_putc(renderer.buffer, '\n'); } result = (char *)cmark_strbuf_detach(renderer.buffer); cmark_iter_free(iter); cmark_strbuf_free(renderer.prefix); cmark_strbuf_free(renderer.buffer); return result; }
// Clean a URL: remove surrounding whitespace, and remove \ that escape // punctuation. cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_chunk_trim(url); if (url->len == 0) { cmark_chunk result = CMARK_CHUNK_EMPTY; return result; } houdini_unescape_html_f(&buf, url->data, url->len); cmark_strbuf_unescape(&buf); return cmark_chunk_buf_detach(&buf); }
// Parse an entity or a regular "&" string. // Assumes the subject has an '&' character at the current position. static cmark_node *handle_entity(subject *subj) { cmark_strbuf ent = CMARK_BUF_INIT(subj->mem); bufsize_t len; advance(subj); len = houdini_unescape_ent(&ent, subj->input.data + subj->pos, subj->input.len - subj->pos); if (len == 0) return make_str(subj->mem, cmark_chunk_literal("&")); subj->pos += len; return make_str(subj->mem, cmark_chunk_buf_detach(&ent)); }
static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url, int is_email) { cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_chunk_trim(url); if (url->len == 0) { cmark_chunk result = CMARK_CHUNK_EMPTY; return result; } if (is_email) cmark_strbuf_puts(&buf, "mailto:"); houdini_unescape_html_f(&buf, url->data, url->len); return cmark_chunk_buf_detach(&buf); }
// Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static cmark_node *handle_backticks(subject *subj) { cmark_chunk openticks = take_while(subj, isbacktick); bufsize_t startpos = subj->pos; bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind return make_str(subj->mem, openticks); } else { cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); cmark_strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); cmark_strbuf_trim(&buf); cmark_strbuf_normalize_whitespace(&buf); return make_code(subj->mem, cmark_chunk_buf_detach(&buf)); } }
// Assumes we have a hyphen at the current position. static cmark_node *handle_hyphen(subject *subj, bool smart) { cmark_strbuf buf = CMARK_BUF_INIT(NULL); int startpos = subj->pos; int en_count = 0; int em_count = 0; int numhyphens; int i; advance(subj); if (!smart || peek_char(subj) != '-') { return make_str(subj->mem, cmark_chunk_literal("-")); } while (smart && peek_char(subj) == '-') { advance(subj); } numhyphens = subj->pos - startpos; buf.mem = subj->mem; if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes em_count = numhyphens / 3; } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes en_count = numhyphens / 2; } else if (numhyphens % 3 == 2) { // use one en dash at end en_count = 1; em_count = (numhyphens - 2) / 3; } else { // use two en dashes at the end en_count = 2; em_count = (numhyphens - 4) / 3; } for (i = em_count; i > 0; i--) { cmark_strbuf_puts(&buf, EMDASH); } for (i = en_count; i > 0; i--) { cmark_strbuf_puts(&buf, ENDASH); } return make_str(subj->mem, cmark_chunk_buf_detach(&buf)); }
cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { cmark_strbuf buf = CMARK_BUF_INIT(mem); unsigned char first, last; if (title->len == 0) { cmark_chunk result = CMARK_CHUNK_EMPTY; return result; } first = title->data[0]; last = title->data[title->len - 1]; // remove surrounding quotes if any: if ((first == '\'' && last == '\'') || (first == '(' && last == ')') || (first == '"' && last == '"')) { houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); } else { houdini_unescape_html_f(&buf, title->data, title->len); } cmark_strbuf_unescape(&buf); return cmark_chunk_buf_detach(&buf); }
// normalize reference: collapse internal whitespace to single space, // remove leading/trailing whitespace, case fold // Return NULL if the reference name is actually empty (i.e. composed // solely from whitespace) static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) { cmark_strbuf normalized = CMARK_BUF_INIT(mem); unsigned char *result; if (ref == NULL) return NULL; if (ref->len == 0) return NULL; cmark_utf8proc_case_fold(&normalized, ref->data, ref->len); cmark_strbuf_trim(&normalized); cmark_strbuf_normalize_whitespace(&normalized); result = cmark_strbuf_detach(&normalized); assert(result); if (result[0] == '\0') { free(result); return NULL; } return result; }