Beispiel #1
0
void cmark_consolidate_text_nodes(cmark_node *root) {
  cmark_iter *iter;
  cmark_strbuf buf = CMARK_BUF_INIT(NULL);
  cmark_event_type ev_type;
  cmark_node *cur, *tmp, *next;
  
  if (root == NULL) {
    return;
  }
  iter = cmark_iter_new(root);
  buf.mem = iter->mem;

  while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
    cur = cmark_iter_get_node(iter);
    if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT &&
        cur->next && cur->next->type == CMARK_NODE_TEXT) {
      cmark_strbuf_clear(&buf);
      cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
      tmp = cur->next;
      while (tmp && tmp->type == CMARK_NODE_TEXT) {
        cmark_iter_next(iter); // advance pointer
        cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
        next = tmp->next;
        cmark_node_free(tmp);
        tmp = next;
      }
      cmark_chunk_free(iter->mem, &cur->as.literal);
      cur->as.literal = cmark_chunk_buf_detach(&buf);
    }
  }

  cmark_strbuf_free(&buf);
  cmark_iter_free(iter);
}
Beispiel #2
0
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) {
  char *result;
  cmark_strbuf html = CMARK_BUF_INIT(mem);
  cmark_event_type ev_type;
  cmark_node *cur;
  cmark_html_renderer renderer = {&html, NULL, NULL, 0, 0, NULL};
  cmark_iter *iter = cmark_iter_new(root);

  for (; extensions; extensions = extensions->next)
    if (((cmark_syntax_extension *) extensions->data)->html_filter_func)
      renderer.filter_extensions = cmark_llist_append(
          mem,
          renderer.filter_extensions,
          (cmark_syntax_extension *) extensions->data);

  while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
    cur = cmark_iter_get_node(iter);
    S_render_node(&renderer, cur, ev_type, options);
  }

  if (renderer.footnote_ix) {
    cmark_strbuf_puts(&html, "</ol>\n</section>\n");
  }

  result = (char *)cmark_strbuf_detach(&html);

  cmark_llist_free(mem, renderer.filter_extensions);

  cmark_iter_free(iter);
  return result;
}
Beispiel #3
0
// Like make_str, but parses entities.
static cmark_node *make_str_with_entities(cmark_mem *mem,
                                          cmark_chunk *content) {
  cmark_strbuf unescaped = CMARK_BUF_INIT(mem);

  if (houdini_unescape_html(&unescaped, content->data, content->len)) {
    return make_str(mem, cmark_chunk_buf_detach(&unescaped));
  } else {
    return make_str(mem, *content);
  }
}
Beispiel #4
0
char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width,
                   void (*outc)(cmark_renderer *, cmark_node *,
                                cmark_escaping, int32_t,
                                unsigned char),
                   int (*render_node)(cmark_renderer *renderer,
                                      cmark_node *node,
                                      cmark_event_type ev_type, int options)) {
  cmark_strbuf pref = CMARK_BUF_INIT(mem);
  cmark_strbuf buf = CMARK_BUF_INIT(mem);
  cmark_node *cur;
  cmark_event_type ev_type;
  char *result;
  cmark_iter *iter = cmark_iter_new(root);

  cmark_renderer renderer = {mem,   &buf, &pref, 0,           width,
                             0,     0,    true,  true,        false,
                             false, outc, S_cr,  S_blankline, S_out,
                             0};

  while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
    cur = cmark_iter_get_node(iter);
    if (!render_node(&renderer, cur, ev_type, options)) {
      // a false value causes us to skip processing
      // the node's contents.  this is used for
      // autolinks.
      cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT);
    }
  }

  // ensure final newline
  if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
    cmark_strbuf_putc(renderer.buffer, '\n');
  }

  result = (char *)cmark_strbuf_detach(renderer.buffer);

  cmark_iter_free(iter);
  cmark_strbuf_free(renderer.prefix);
  cmark_strbuf_free(renderer.buffer);

  return result;
}
Beispiel #5
0
// Clean a URL: remove surrounding whitespace, and remove \ that escape
// punctuation.
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
  cmark_strbuf buf = CMARK_BUF_INIT(mem);

  cmark_chunk_trim(url);

  if (url->len == 0) {
    cmark_chunk result = CMARK_CHUNK_EMPTY;
    return result;
  }

  houdini_unescape_html_f(&buf, url->data, url->len);

  cmark_strbuf_unescape(&buf);
  return cmark_chunk_buf_detach(&buf);
}
Beispiel #6
0
// Parse an entity or a regular "&" string.
// Assumes the subject has an '&' character at the current position.
static cmark_node *handle_entity(subject *subj) {
  cmark_strbuf ent = CMARK_BUF_INIT(subj->mem);
  bufsize_t len;

  advance(subj);

  len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
                             subj->input.len - subj->pos);

  if (len == 0)
    return make_str(subj->mem, cmark_chunk_literal("&"));

  subj->pos += len;
  return make_str(subj->mem, cmark_chunk_buf_detach(&ent));
}
Beispiel #7
0
static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
                                        int is_email) {
  cmark_strbuf buf = CMARK_BUF_INIT(mem);

  cmark_chunk_trim(url);

  if (url->len == 0) {
    cmark_chunk result = CMARK_CHUNK_EMPTY;
    return result;
  }

  if (is_email)
    cmark_strbuf_puts(&buf, "mailto:");

  houdini_unescape_html_f(&buf, url->data, url->len);
  return cmark_chunk_buf_detach(&buf);
}
Beispiel #8
0
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
static cmark_node *handle_backticks(subject *subj) {
  cmark_chunk openticks = take_while(subj, isbacktick);
  bufsize_t startpos = subj->pos;
  bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);

  if (endpos == 0) {      // not found
    subj->pos = startpos; // rewind
    return make_str(subj->mem, openticks);
  } else {
    cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);

    cmark_strbuf_set(&buf, subj->input.data + startpos,
                     endpos - startpos - openticks.len);
    cmark_strbuf_trim(&buf);
    cmark_strbuf_normalize_whitespace(&buf);

    return make_code(subj->mem, cmark_chunk_buf_detach(&buf));
  }
}
Beispiel #9
0
// Assumes we have a hyphen at the current position.
static cmark_node *handle_hyphen(subject *subj, bool smart) {
  cmark_strbuf buf = CMARK_BUF_INIT(NULL);
  int startpos = subj->pos;
  int en_count = 0;
  int em_count = 0;
  int numhyphens;
  int i;

  advance(subj);

  if (!smart || peek_char(subj) != '-') {
    return make_str(subj->mem, cmark_chunk_literal("-"));
  }

  while (smart && peek_char(subj) == '-') {
    advance(subj);
  }

  numhyphens = subj->pos - startpos;
  buf.mem = subj->mem;

  if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
    em_count = numhyphens / 3;
  } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
    en_count = numhyphens / 2;
  } else if (numhyphens % 3 == 2) { // use one en dash at end
    en_count = 1;
    em_count = (numhyphens - 2) / 3;
  } else { // use two en dashes at the end
    en_count = 2;
    em_count = (numhyphens - 4) / 3;
  }

  for (i = em_count; i > 0; i--) {
    cmark_strbuf_puts(&buf, EMDASH);
  }

  for (i = en_count; i > 0; i--) {
    cmark_strbuf_puts(&buf, ENDASH);
  }

  return make_str(subj->mem, cmark_chunk_buf_detach(&buf));
}
Beispiel #10
0
cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
  cmark_strbuf buf = CMARK_BUF_INIT(mem);
  unsigned char first, last;

  if (title->len == 0) {
    cmark_chunk result = CMARK_CHUNK_EMPTY;
    return result;
  }

  first = title->data[0];
  last = title->data[title->len - 1];

  // remove surrounding quotes if any:
  if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
      (first == '"' && last == '"')) {
    houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
  } else {
    houdini_unescape_html_f(&buf, title->data, title->len);
  }

  cmark_strbuf_unescape(&buf);
  return cmark_chunk_buf_detach(&buf);
}
Beispiel #11
0
// normalize reference:  collapse internal whitespace to single space,
// remove leading/trailing whitespace, case fold
// Return NULL if the reference name is actually empty (i.e. composed
// solely from whitespace)
static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) {
  cmark_strbuf normalized = CMARK_BUF_INIT(mem);
  unsigned char *result;

  if (ref == NULL)
    return NULL;

  if (ref->len == 0)
    return NULL;

  cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
  cmark_strbuf_trim(&normalized);
  cmark_strbuf_normalize_whitespace(&normalized);

  result = cmark_strbuf_detach(&normalized);
  assert(result);

  if (result[0] == '\0') {
    free(result);
    return NULL;
  }

  return result;
}