// Return a link, an image, or a literal close bracket. static cmark_node *handle_close_bracket(subject *subj) { bufsize_t initial_pos, after_link_text_pos; bufsize_t endurl, starttitle, endtitle, endall; bufsize_t sps, n; cmark_reference *ref = NULL; cmark_chunk url_chunk, title_chunk; cmark_chunk url, title; bracket *opener; cmark_node *inl; cmark_chunk raw_label; int found_label; cmark_node *tmp, *tmpnext; bool is_image; advance(subj); // advance past ] initial_pos = subj->pos; // get last [ or ![ opener = subj->last_bracket; if (opener == NULL) { return make_str(subj->mem, cmark_chunk_literal("]")); } if (!opener->active) { // take delimiter off stack pop_bracket(subj); return make_str(subj->mem, cmark_chunk_literal("]")); } // If we got here, we matched a potential link/image text. // Now we check to see if it's a link/image. is_image = opener->image; after_link_text_pos = subj->pos; // First, look for an inline link. if (peek_char(subj) == '(' && ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, &url_chunk)) > -1)) { // try to parse an explicit link: endurl = subj->pos + 1 + sps + n; starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + scan_link_title(&subj->input, starttitle); endall = endtitle + scan_spacechars(&subj->input, endtitle); if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); url = cmark_clean_url(subj->mem, &url_chunk); title = cmark_clean_title(subj->mem, &title_chunk); cmark_chunk_free(subj->mem, &url_chunk); cmark_chunk_free(subj->mem, &title_chunk); goto match; } else { // it could still be a shortcut reference link subj->pos = after_link_text_pos; } } // Next, look for a following [link label] that matches in refmap. // skip spaces raw_label = cmark_chunk_literal(""); found_label = link_label(subj, &raw_label); if (!found_label) { // If we have a shortcut reference link, back up // to before the spacse we skipped. subj->pos = initial_pos; } if ((!found_label || raw_label.len == 0) && !opener->bracket_after) { cmark_chunk_free(subj->mem, &raw_label); raw_label = cmark_chunk_dup(&subj->input, opener->position, initial_pos - opener->position - 1); found_label = true; } if (found_label) { ref = cmark_reference_lookup(subj->refmap, &raw_label); cmark_chunk_free(subj->mem, &raw_label); } if (ref != NULL) { // found url = chunk_clone(subj->mem, &ref->url); title = chunk_clone(subj->mem, &ref->title); goto match; } else { goto noMatch; } noMatch: // If we fall through to here, it means we didn't match a link: pop_bracket(subj); // remove this opener from delimiter list subj->pos = initial_pos; return make_str(subj->mem, cmark_chunk_literal("]")); match: inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); inl->as.link.url = url; inl->as.link.title = title; cmark_node_insert_before(opener->inl_text, inl); // Add link text: tmp = opener->inl_text->next; while (tmp) { tmpnext = tmp->next; cmark_node_append_child(inl, tmp); tmp = tmpnext; } // Free the bracket [: cmark_node_free(opener->inl_text); process_emphasis(subj, opener->previous_delimiter); pop_bracket(subj); // Now, if we have a link, we also want to deactivate earlier link // delimiters. (This code can be removed if we decide to allow links // inside links.) if (!is_image) { opener = subj->last_bracket; while (opener != NULL) { if (!opener->image) { if (!opener->active) { break; } else { opener->active = false; } } opener = opener->previous; } } return NULL; }
// Parse reference. Assumes string begins with '[' character. // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap) { subject subj; cmark_chunk lab; cmark_chunk url; cmark_chunk title; bufsize_t matchlen = 0; bufsize_t beforetitle; subject_from_buf(mem, &subj, input, NULL); // parse label: if (!link_label(&subj, &lab) || lab.len == 0) return 0; // colon: if (peek_char(&subj) == ':') { advance(&subj); } else { return 0; } // parse link url: spnl(&subj); if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 && url.len > 0) { subj.pos += matchlen; } else { return 0; } // parse optional link_title beforetitle = subj.pos; spnl(&subj); matchlen = scan_link_title(&subj.input, subj.pos); if (matchlen) { title = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { subj.pos = beforetitle; title = cmark_chunk_literal(""); } // parse final spaces and newline: skip_spaces(&subj); if (!skip_line_end(&subj)) { if (matchlen) { // try rewinding before title subj.pos = beforetitle; skip_spaces(&subj); if (!skip_line_end(&subj)) { return 0; } } else { return 0; } } // insert reference into refmap cmark_reference_create(refmap, &lab, &url, &title); return subj.pos; }
static void process_emphasis(subject *subj, delimiter *stack_bottom) { delimiter *closer = subj->last_delim; delimiter *opener; delimiter *old_closer; bool opener_found; int openers_bottom_index; delimiter *openers_bottom[6] = {stack_bottom, stack_bottom, stack_bottom, stack_bottom, stack_bottom, stack_bottom}; // move back to first relevant delim. while (closer != NULL && closer->previous != stack_bottom) { closer = closer->previous; } // now move forward, looking for closers, and handling each while (closer != NULL) { if (closer->can_close) { switch (closer->delim_char) { case '"': openers_bottom_index = 0; break; case '\'': openers_bottom_index = 1; break; case '_': openers_bottom_index = 2; break; case '*': openers_bottom_index = 3 + (closer->length % 3); break; default: assert(false); } // Now look backwards for first matching opener: opener = closer->previous; opener_found = false; while (opener != NULL && opener != openers_bottom[openers_bottom_index]) { if (opener->can_open && opener->delim_char == closer->delim_char) { // interior closer of size 2 can't match opener of size 1 // or of size 1 can't match 2 if (!(closer->can_open || opener->can_close) || ((opener->length + closer->length) % 3) != 0) { opener_found = true; break; } } opener = opener->previous; } old_closer = closer; if (closer->delim_char == '*' || closer->delim_char == '_') { if (opener_found) { closer = S_insert_emph(subj, opener, closer); } else { closer = closer->next; } } else if (closer->delim_char == '\'') { cmark_chunk_free(subj->mem, &closer->inl_text->as.literal); closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE); if (opener_found) { cmark_chunk_free(subj->mem, &opener->inl_text->as.literal); opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE); } closer = closer->next; } else if (closer->delim_char == '"') { cmark_chunk_free(subj->mem, &closer->inl_text->as.literal); closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE); if (opener_found) { cmark_chunk_free(subj->mem, &opener->inl_text->as.literal); opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE); } closer = closer->next; } if (!opener_found) { // set lower bound for future searches for openers openers_bottom[openers_bottom_index] = old_closer->previous; if (!old_closer->can_open) { // we can remove a closer that can't be an // opener, once we've seen there's no // matching opener: remove_delimiter(subj, old_closer); } } } else { closer = closer->next; } } // free all delimiters in list until stack_bottom: while (subj->last_delim != NULL && subj->last_delim != stack_bottom) { remove_delimiter(subj, subj->last_delim); } }
static void process_emphasis(subject *subj, delimiter *stack_bottom) { delimiter *closer = subj->last_delim; delimiter *opener; delimiter *old_closer; bool opener_found; bool odd_match; delimiter *openers_bottom[128]; // initialize openers_bottom: openers_bottom['*'] = stack_bottom; openers_bottom['_'] = stack_bottom; openers_bottom['\''] = stack_bottom; openers_bottom['"'] = stack_bottom; // move back to first relevant delim. while (closer != NULL && closer->previous != stack_bottom) { closer = closer->previous; } // now move forward, looking for closers, and handling each while (closer != NULL) { if (closer->can_close) { // Now look backwards for first matching opener: opener = closer->previous; opener_found = false; odd_match = false; while (opener != NULL && opener != stack_bottom && opener != openers_bottom[closer->delim_char]) { // interior closer of size 2 can't match opener of size 1 // or of size 1 can't match 2 odd_match = (closer->can_open || opener->can_close) && ((opener->inl_text->as.literal.len + closer->inl_text->as.literal.len) % 3 == 0); if (opener->delim_char == closer->delim_char && opener->can_open && !odd_match) { opener_found = true; break; } opener = opener->previous; } old_closer = closer; if (closer->delim_char == '*' || closer->delim_char == '_') { if (opener_found) { closer = S_insert_emph(subj, opener, closer); } else { closer = closer->next; } } else if (closer->delim_char == '\'') { cmark_chunk_free(subj->mem, &closer->inl_text->as.literal); closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE); if (opener_found) { cmark_chunk_free(subj->mem, &opener->inl_text->as.literal); opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE); } closer = closer->next; } else if (closer->delim_char == '"') { cmark_chunk_free(subj->mem, &closer->inl_text->as.literal); closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE); if (opener_found) { cmark_chunk_free(subj->mem, &opener->inl_text->as.literal); opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE); } closer = closer->next; } if (!opener_found && !odd_match) { // set lower bound for future searches for openers // (we don't do this with 'odd_match' set because // a ** that didn't match an earlier * might turn into // an opener, and the * might be matched by something // else. openers_bottom[old_closer->delim_char] = old_closer->previous; if (!old_closer->can_open) { // we can remove a closer that can't be an // opener, once we've seen there's no // matching opener: remove_delimiter(subj, old_closer); } } } else { closer = closer->next; } } // free all delimiters in list until stack_bottom: while (subj->last_delim != stack_bottom) { remove_delimiter(subj, subj->last_delim); } }
// Parse an inline, advancing subject, and add it as a child of parent. // Return 0 if no inline can be parsed, 1 otherwise. static int parse_inline(subject *subj, cmark_node *parent, int options) { cmark_node *new_inl = NULL; cmark_chunk contents; unsigned char c; bufsize_t endpos; c = peek_char(subj); if (c == 0) { return 0; } switch (c) { case '\r': case '\n': new_inl = handle_newline(subj); break; case '`': new_inl = handle_backticks(subj); break; case '\\': new_inl = handle_backslash(subj); break; case '&': new_inl = handle_entity(subj); break; case '<': new_inl = handle_pointy_brace(subj); break; case '*': case '_': case '\'': case '"': new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0); break; case '-': new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0); break; case '.': new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0); break; case '[': advance(subj); new_inl = make_str(subj->mem, cmark_chunk_literal("[")); push_bracket(subj, false, new_inl); break; case ']': new_inl = handle_close_bracket(subj); break; case '!': advance(subj); if (peek_char(subj) == '[') { advance(subj); new_inl = make_str(subj->mem, cmark_chunk_literal("![")); push_bracket(subj, true, new_inl); } else { new_inl = make_str(subj->mem, cmark_chunk_literal("!")); } break; default: endpos = subject_find_special_char(subj, options); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; // if we're at a newline, strip trailing spaces. if (S_is_line_end_char(peek_char(subj))) { cmark_chunk_rtrim(&contents); } new_inl = make_str(subj->mem, contents); } if (new_inl != NULL) { cmark_node_append_child(parent, new_inl); } return 1; }
// Return a link, an image, or a literal close bracket. static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) { bufsize_t initial_pos; bufsize_t starturl, endurl, starttitle, endtitle, endall; bufsize_t n; bufsize_t sps; cmark_reference *ref; bool is_image = false; cmark_chunk url_chunk, title_chunk; cmark_chunk url, title; delimiter *opener; cmark_node *link_text; cmark_node *inl; cmark_chunk raw_label; int found_label; advance(subj); // advance past ] initial_pos = subj->pos; // look through list of delimiters for a [ or ! opener = subj->last_delim; while (opener) { if (opener->delim_char == '[' || opener->delim_char == '!') { break; } opener = opener->previous; } if (opener == NULL) { return make_str(cmark_chunk_literal("]")); } if (!opener->active) { // take delimiter off stack remove_delimiter(subj, opener); return make_str(cmark_chunk_literal("]")); } // If we got here, we matched a potential link/image text. is_image = opener->delim_char == '!'; link_text = opener->inl_text->next; // Now we check to see if it's a link/image. // First, look for an inline link. if (peek_char(subj) == '(' && ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { // try to parse an explicit link: starturl = subj->pos + 1 + sps; // after ( endurl = starturl + n; starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + scan_link_title(&subj->input, starttitle); endall = endtitle + scan_spacechars(&subj->input, endtitle); if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl); title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); url = cmark_clean_url(&url_chunk); title = cmark_clean_title(&title_chunk); cmark_chunk_free(&url_chunk); cmark_chunk_free(&title_chunk); goto match; } else { goto noMatch; } } // Next, look for a following [link label] that matches in refmap. // skip spaces subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos); raw_label = cmark_chunk_literal(""); found_label = link_label(subj, &raw_label); if (!found_label || raw_label.len == 0) { cmark_chunk_free(&raw_label); raw_label = cmark_chunk_dup(&subj->input, opener->position, initial_pos - opener->position - 1); } if (!found_label) { // If we have a shortcut reference link, back up // to before the spacse we skipped. subj->pos = initial_pos; } ref = cmark_reference_lookup(subj->refmap, &raw_label); cmark_chunk_free(&raw_label); if (ref != NULL) { // found url = chunk_clone(&ref->url); title = chunk_clone(&ref->title); goto match; } else { goto noMatch; } noMatch: // If we fall through to here, it means we didn't match a link: remove_delimiter(subj, opener); // remove this opener from delimiter list subj->pos = initial_pos; return make_str(cmark_chunk_literal("]")); match: inl = opener->inl_text; inl->type = is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK; cmark_chunk_free(&inl->as.literal); inl->first_child = link_text; process_emphasis(subj, opener); inl->as.link.url = url; inl->as.link.title = title; inl->next = NULL; if (link_text) { cmark_node *tmp; link_text->prev = NULL; for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) { tmp->parent = inl; } tmp->parent = inl; inl->last_child = tmp; } parent->last_child = inl; // Now, if we have a link, we also want to deactivate earlier link // delimiters. (This code can be removed if we decide to allow links // inside links.) remove_delimiter(subj, opener); if (!is_image) { opener = subj->last_delim; while (opener != NULL) { if (opener->delim_char == '[') { if (!opener->active) { break; } else { opener->active = false; } } opener = opener->previous; } } return NULL; }
static void process_emphasis(subject *subj, delimiter *stack_bottom) { delimiter *closer = subj->last_delim; delimiter *opener; delimiter *old_closer; bool opener_found; delimiter *openers_bottom[128]; // initialize openers_bottom: openers_bottom['*'] = stack_bottom; openers_bottom['_'] = stack_bottom; openers_bottom['\''] = stack_bottom; openers_bottom['"'] = stack_bottom; // move back to first relevant delim. while (closer != NULL && closer->previous != stack_bottom) { closer = closer->previous; } // now move forward, looking for closers, and handling each while (closer != NULL) { if (closer->can_close && (closer->delim_char == '*' || closer->delim_char == '_' || closer->delim_char == '"' || closer->delim_char == '\'')) { // Now look backwards for first matching opener: opener = closer->previous; opener_found = false; while (opener != NULL && opener != stack_bottom && opener != openers_bottom[closer->delim_char]) { if (opener->delim_char == closer->delim_char && opener->can_open) { opener_found = true; break; } opener = opener->previous; } old_closer = closer; if (closer->delim_char == '*' || closer->delim_char == '_') { if (opener_found) { closer = S_insert_emph(subj, opener, closer); } else { closer = closer->next; } } else if (closer->delim_char == '\'') { cmark_chunk_free(&closer->inl_text->as.literal); closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE); if (opener_found) { cmark_chunk_free(&opener->inl_text->as.literal); opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE); } closer = closer->next; } else if (closer->delim_char == '"') { cmark_chunk_free(&closer->inl_text->as.literal); closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE); if (opener_found) { cmark_chunk_free(&opener->inl_text->as.literal); opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE); } closer = closer->next; } if (!opener_found) { // set lower bound for future searches for openers: openers_bottom[old_closer->delim_char] = old_closer->previous; if (!old_closer->can_open) { // we can remove a closer that can't be an // opener, once we've seen there's no // matching opener: remove_delimiter(subj, old_closer); } } } else { closer = closer->next; } } // free all delimiters in list until stack_bottom: while (subj->last_delim != stack_bottom) { remove_delimiter(subj, subj->last_delim); } }
static void S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) { cmark_node* last_matched_container; int offset = 0; int matched = 0; int lev = 0; int i; cmark_list *data = NULL; bool all_matched = true; cmark_node* container; cmark_node* cur = parser->current; bool blank = false; int first_nonspace; int indent; cmark_chunk input; utf8proc_detab(parser->curline, buffer, bytes); // Add a newline to the end if not present: // TODO this breaks abstraction: if (parser->curline->ptr[parser->curline->size - 1] != '\n') { cmark_strbuf_putc(parser->curline, '\n'); } input.data = parser->curline->ptr; input.len = parser->curline->size; // container starts at the document root. container = parser->root; parser->line_number++; // for each containing cmark_node, try to parse the associated line start. // bail out on failure: container will point to the last matching cmark_node. while (container->last_child && container->last_child->open) { container = container->last_child; first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') { first_nonspace++; } indent = first_nonspace - offset; blank = peek_at(&input, first_nonspace) == '\n'; if (container->type == NODE_BLOCK_QUOTE) { matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; if (matched) { offset = first_nonspace + 1; if (peek_at(&input, offset) == ' ') offset++; } else { all_matched = false; } } else if (container->type == NODE_LIST_ITEM) { if (indent >= container->as.list.marker_offset + container->as.list.padding) { offset += container->as.list.marker_offset + container->as.list.padding; } else if (blank) { offset = first_nonspace; } else { all_matched = false; } } else if (container->type == NODE_CODE_BLOCK) { if (!container->as.code.fenced) { // indented if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { offset = first_nonspace; } else { all_matched = false; } } else { // skip optional spaces of fence offset i = container->as.code.fence_offset; while (i > 0 && peek_at(&input, offset) == ' ') { offset++; i--; } } } else if (container->type == NODE_HEADER) { // a header can never contain more than one line all_matched = false; if (blank) { container->last_line_blank = true; } } else if (container->type == NODE_HTML) { if (blank) { container->last_line_blank = true; all_matched = false; } } else if (container->type == NODE_PARAGRAPH) { if (blank) { container->last_line_blank = true; all_matched = false; } } if (!all_matched) { container = container->parent; // back up to last matching cmark_node break; } } last_matched_container = container; // check to see if we've hit 2nd blank line, break out of list: if (blank && container->last_line_blank) { break_out_of_lists(parser, &container, parser->line_number); } // unless last matched container is code cmark_node, try new container starts: while (container->type != NODE_CODE_BLOCK && container->type != NODE_HTML) { first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') first_nonspace++; indent = first_nonspace - offset; blank = peek_at(&input, first_nonspace) == '\n'; if (indent >= CODE_INDENT) { if (cur->type != NODE_PARAGRAPH && !blank) { offset += CODE_INDENT; container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, offset + 1); container->as.code.fenced = false; container->as.code.fence_char = 0; container->as.code.fence_length = 0; container->as.code.fence_offset = 0; container->as.code.info = cmark_chunk_literal(""); } else { // indent > 4 in lazy line break; } } else if (peek_at(&input, first_nonspace) == '>') { offset = first_nonspace + 1; // optional following character if (peek_at(&input, offset) == ' ') offset++; container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->line_number, offset + 1); } else if ((matched = scan_atx_header_start(&input, first_nonspace))) { offset = first_nonspace + matched; container = add_child(parser, container, NODE_HEADER, parser->line_number, offset + 1); int hashpos = cmark_chunk_strchr(&input, '#', first_nonspace); int level = 0; while (peek_at(&input, hashpos) == '#') { level++; hashpos++; } container->as.header.level = level; container->as.header.setext = false; } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, first_nonspace + 1); container->as.code.fenced = true; container->as.code.fence_char = peek_at(&input, first_nonspace); container->as.code.fence_length = matched; container->as.code.fence_offset = first_nonspace - offset; container->as.code.info = cmark_chunk_literal(""); offset = first_nonspace + matched; } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { container = add_child(parser, container, NODE_HTML, parser->line_number, first_nonspace + 1); // note, we don't adjust offset because the tag is part of the text } else if (container->type == NODE_PARAGRAPH && (lev = scan_setext_header_line(&input, first_nonspace)) && // check that there is only one line in the paragraph: cmark_strbuf_strrchr(&container->string_content, '\n', cmark_strbuf_len(&container->string_content) - 2) < 0) { container->type = NODE_HEADER; container->as.header.level = lev; container->as.header.setext = true; offset = input.len - 1; } else if (!(container->type == NODE_PARAGRAPH && !all_matched) && (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: container = add_child(parser, container, NODE_HRULE, parser->line_number, first_nonspace + 1); container = finalize(parser, container, parser->line_number); offset = input.len - 1; } else if ((matched = parse_list_marker(&input, first_nonspace, &data))) { // compute padding: offset = first_nonspace + matched; i = 0; while (i <= 5 && peek_at(&input, offset + i) == ' ') { i++; } // i = number of spaces after marker, up to 5 if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') { data->padding = matched + 1; if (i > 0) { offset += 1; } } else { data->padding = matched + i; offset += i; } // check container; if it's a list, see if this list item // can continue the list; otherwise, create a list container. data->marker_offset = indent; if (container->type != NODE_LIST || !lists_match(&container->as.list, data)) { container = add_child(parser, container, NODE_LIST, parser->line_number, first_nonspace + 1); memcpy(&container->as.list, data, sizeof(*data)); } // add the list item container = add_child(parser, container, NODE_LIST_ITEM, parser->line_number, first_nonspace + 1); /* TODO: static */ memcpy(&container->as.list, data, sizeof(*data)); free(data); } else { break; } if (accepts_lines(container->type)) { // if it's a line container, it can't contain other containers break; } } // what remains at offset is a text line. add the text to the // appropriate container. first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') first_nonspace++; indent = first_nonspace - offset; blank = peek_at(&input, first_nonspace) == '\n'; // cmark_node quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. container->last_line_blank = (blank && container->type != NODE_BLOCK_QUOTE && container->type != NODE_HEADER && !(container->type == NODE_CODE_BLOCK && container->as.code.fenced) && !(container->type == NODE_LIST_ITEM && container->first_child == NULL && container->start_line == parser->line_number)); cmark_node *cont = container; while (cont->parent) { cont->parent->last_line_blank = false; cont = cont->parent; } if (cur != last_matched_container && container == last_matched_container && !blank && cur->type == NODE_PARAGRAPH && cmark_strbuf_len(&cur->string_content) > 0) { add_line(cur, &input, offset); } else { // not a lazy continuation // finalize any blocks that were not matched and set cur to container: while (cur != last_matched_container) { cur = finalize(parser, cur, parser->line_number); assert(cur != NULL); } if (container->type == NODE_CODE_BLOCK && !container->as.code.fenced) { add_line(container, &input, offset); } else if (container->type == NODE_CODE_BLOCK && container->as.code.fenced) { matched = 0; if (indent <= 3 && peek_at(&input, first_nonspace) == container->as.code.fence_char) { int fence_len = scan_close_code_fence(&input, first_nonspace); if (fence_len > container->as.code.fence_length) matched = 1; } if (matched) { // if closing fence, don't add line to container; instead, close it: container = finalize(parser, container, parser->line_number); } else { add_line(container, &input, offset); } } else if (container->type == NODE_HTML) { add_line(container, &input, offset); } else if (blank) { // ??? do nothing } else if (container->type == NODE_HEADER) { chop_trailing_hashtags(&input); add_line(container, &input, first_nonspace); container = finalize(parser, container, parser->line_number); } else if (accepts_lines(container->type)) { add_line(container, &input, first_nonspace); } else if (container->type != NODE_HRULE && container->type != NODE_HEADER) { // create paragraph container for line container = add_child(parser, container, NODE_PARAGRAPH, parser->line_number, first_nonspace + 1); add_line(container, &input, first_nonspace); } else { assert(false); } parser->current = container; } cmark_strbuf_clear(parser->curline); }
// Parse an inline, advancing subject, and add it as a child of parent. // Return 0 if no inline can be parsed, 1 otherwise. static int parse_inline(subject* subj, cmark_node * parent) { cmark_node* new_inl = NULL; cmark_chunk contents; unsigned char c; int endpos; c = peek_char(subj); if (c == 0) { return 0; } switch(c) { case '\n': new_inl = handle_newline(subj); break; case '`': new_inl = handle_backticks(subj); break; case '\\': new_inl = handle_backslash(subj); break; case '&': new_inl = handle_entity(subj); break; case '<': new_inl = handle_pointy_brace(subj); break; case '*': case '_': new_inl = handle_strong_emph(subj, c); break; case '[': advance(subj); new_inl = make_str(cmark_chunk_literal("[")); push_delimiter(subj, '[', true, false, new_inl); break; case ']': new_inl = handle_close_bracket(subj, parent); break; case '!': advance(subj); if (peek_char(subj) == '[') { advance(subj); new_inl = make_str(cmark_chunk_literal("![")); push_delimiter(subj, '!', false, true, new_inl); } else { new_inl = make_str(cmark_chunk_literal("!")); } break; default: endpos = subject_find_special_char(subj); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; // if we're at a newline, strip trailing spaces. if (peek_char(subj) == '\n') { cmark_chunk_rtrim(&contents); } new_inl = make_str(contents); } if (new_inl != NULL) { cmark_node_append_child(parent, new_inl); } return 1; }
static void S_out(cmark_renderer *renderer, const char *source, bool wrap, cmark_escaping escape) { int length = cmark_strbuf_safe_strlen(source); unsigned char nextc; int32_t c; int i = 0; int last_nonspace; int len; cmark_chunk remainder = cmark_chunk_literal(""); int k = renderer->buffer->size - 1; wrap = wrap && !renderer->no_wrap; if (renderer->in_tight_list_item && renderer->need_cr > 1) { renderer->need_cr = 1; } while (renderer->need_cr) { if (k < 0 || renderer->buffer->ptr[k] == '\n') { k -= 1; } else { cmark_strbuf_putc(renderer->buffer, '\n'); if (renderer->need_cr > 1) { cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, renderer->prefix->size); } } renderer->column = 0; renderer->begin_line = true; renderer->begin_content = true; renderer->need_cr -= 1; } while (i < length) { if (renderer->begin_line) { cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, renderer->prefix->size); // note: this assumes prefix is ascii: renderer->column = renderer->prefix->size; } len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c); if (len == -1) { // error condition return; // return without rendering rest of string } nextc = source[i + len]; if (c == 32 && wrap) { if (!renderer->begin_line) { last_nonspace = renderer->buffer->size; cmark_strbuf_putc(renderer->buffer, ' '); renderer->column += 1; renderer->begin_line = false; renderer->begin_content = false; // skip following spaces while (source[i + 1] == ' ') { i++; } // We don't allow breaks that make a digit the first character // because this causes problems with commonmark output. if (!cmark_isdigit(source[i + 1])) { renderer->last_breakable = last_nonspace; } } } else if (c == 10) { cmark_strbuf_putc(renderer->buffer, '\n'); renderer->column = 0; renderer->begin_line = true; renderer->begin_content = true; renderer->last_breakable = 0; } else if (escape == LITERAL) { cmark_render_code_point(renderer, c); renderer->begin_line = false; // we don't set 'begin_content' to false til we've // finished parsing a digit. Reason: in commonmark // we need to escape a potential list marker after // a digit: renderer->begin_content = renderer->begin_content && cmark_isdigit(c) == 1; } else { (renderer->outc)(renderer, escape, c, nextc); renderer->begin_line = false; renderer->begin_content = renderer->begin_content && cmark_isdigit(c) == 1; } // If adding the character went beyond width, look for an // earlier place where the line could be broken: if (renderer->width > 0 && renderer->column > renderer->width && !renderer->begin_line && renderer->last_breakable > 0) { // copy from last_breakable to remainder cmark_chunk_set_cstr(&remainder, (char *)renderer->buffer->ptr + renderer->last_breakable + 1); // truncate at last_breakable cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable); // add newline, prefix, and remainder cmark_strbuf_putc(renderer->buffer, '\n'); cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr, renderer->prefix->size); cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len); renderer->column = renderer->prefix->size + remainder.len; cmark_chunk_free(&remainder); renderer->last_breakable = 0; renderer->begin_line = false; renderer->begin_content = false; } i += len; } }