static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) { bufsize_t i = offset; size_t nb_p = 0; while (i < input->len) { if (input->data[i] == '\\' && i + 1 < input-> len && cmark_ispunct(input->data[i+1])) i += 2; else if (input->data[i] == '(') { ++nb_p; ++i; } else if (input->data[i] == ')') { if (nb_p == 0) break; --nb_p; ++i; } else if (cmark_isspace(input->data[i])) break; else ++i; } if (i >= input->len) return -1; { cmark_chunk result = {input->data + offset, i - offset, 0}; *output = result; } return i - offset; }
// Destructively unescape a string: remove backslashes before punctuation chars. extern void cmark_strbuf_unescape(cmark_strbuf *buf) { bufsize_t r, w; for (r = 0, w = 0; r < buf->size; ++r) { if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) r++; buf->ptr[w++] = buf->ptr[r]; } cmark_strbuf_truncate(buf, w); }
// Parse backslash-escape or just a backslash, returning an inline. static cmark_node *handle_backslash(subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); if (cmark_ispunct( nextchar)) { // only ascii symbols and newline can be escaped advance(subj); return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (!is_eof(subj) && skip_line_end(subj)) { return make_linebreak(subj->mem); } else { return make_str(subj->mem, cmark_chunk_literal("\\")); } }
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, unsigned char nextc) { bool needs_escaping = false; bool follows_digit = renderer->buffer->size > 0 && cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); char encoded[ENCODED_SIZE]; needs_escaping = c < 0x80 && escape != LITERAL && ((escape == NORMAL && (c < 0x20 || c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || c == '>' || c == '\\' || c == '`' || c == '!' || (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || (renderer->begin_content && (c == '-' || c == '+' || c == '=') && // begin_content doesn't get set to false til we've passed digits // at the beginning of line, so... !follows_digit) || (renderer->begin_content && (c == '.' || c == ')') && follows_digit && (nextc == 0 || cmark_isspace(nextc))))) || (escape == URL && (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || c == ')' || c == '(')) || (escape == TITLE && (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); if (needs_escaping) { if (escape == URL && cmark_isspace(c)) { // use percent encoding for spaces snprintf(encoded, ENCODED_SIZE, "%%%2X", c); cmark_strbuf_puts(renderer->buffer, encoded); renderer->column += 3; } else if (cmark_ispunct(c)) { cmark_render_ascii(renderer, "\\"); cmark_render_code_point(renderer, c); } else { // render as entity snprintf(encoded, ENCODED_SIZE, "&#%d;", c); cmark_strbuf_puts(renderer->buffer, encoded); renderer->column += strlen(encoded); } } else { cmark_render_code_point(renderer, c); } }
// Parse a link label. Returns 1 if successful. // Note: unescaped brackets are not allowed in labels. // The label begins with `[` and ends with the first `]` character // encountered. Backticks in labels do not start code spans. static int link_label(subject* subj, cmark_chunk *raw_label) { bufsize_t startpos = subj->pos; int length = 0; unsigned char c; // advance past [ if (peek_char(subj) == '[') { advance(subj); } else { return 0; } while ((c = peek_char(subj)) && c != '[' && c != ']') { if (c == '\\') { advance(subj); length++; if (cmark_ispunct(peek_char(subj))) { advance(subj); length++; } } else { advance(subj); length++; } if (length > MAX_LINK_LABEL_LENGTH) { goto noMatch; } } if (c == ']') { // match found *raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); cmark_chunk_trim(raw_label); advance(subj); // advance past ] return 1; } noMatch: subj->pos = startpos; // rewind return 0; }