示例#1
0
static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
                             cmark_inline_parser *inline_parser) {
  size_t link_end, domain_len;
  int rewind = 0;

  cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
  int max_rewind = cmark_inline_parser_get_offset(inline_parser);
  uint8_t *data = chunk->data + max_rewind;
  size_t size = chunk->len - max_rewind;

  if (size < 4 || data[1] != '/' || data[2] != '/')
    return 0;

  while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1]))
    rewind++;

  if (!sd_autolink_issafe(data - rewind, size + rewind))
    return 0;

  link_end = strlen("://");

  domain_len = check_domain(data + link_end, size - link_end, 1);

  if (domain_len == 0)
    return 0;

  link_end += domain_len;
  while (link_end < size && !cmark_isspace(data[link_end]))
    link_end++;

  link_end = autolink_delim(data, link_end);

  if (link_end == 0)
    return NULL;

  cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
  cmark_node_unput(parent, rewind);

  cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);

  cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind,
                                    (bufsize_t)(link_end + rewind));
  node->as.link.url = url;

  cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
  text->as.literal = url;
  cmark_node_append_child(node, text);

  return node;
}
示例#2
0
文件: commonmark.c 项目: PKRoma/cmark
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
                              int32_t c, unsigned char nextc) {
  bool needs_escaping = false;
  bool follows_digit =
      renderer->buffer->size > 0 &&
      cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
  char encoded[ENCODED_SIZE];

  needs_escaping =
      c < 0x80 && escape != LITERAL &&
      ((escape == NORMAL &&
        (c < 0x20 ||
	 c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
         c == '>' || c == '\\' || c == '`' || c == '!' ||
         (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
         (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
          // begin_content doesn't get set to false til we've passed digits
          // at the beginning of line, so...
          !follows_digit) ||
         (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
          (nextc == 0 || cmark_isspace(nextc))))) ||
       (escape == URL &&
        (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' ||
         c == ')' || c == '(')) ||
       (escape == TITLE &&
        (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));

  if (needs_escaping) {
    if (escape == URL && cmark_isspace(c)) {
      // use percent encoding for spaces
      snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
      cmark_strbuf_puts(renderer->buffer, encoded);
      renderer->column += 3;
    } else if (cmark_ispunct(c)) {
      cmark_render_ascii(renderer, "\\");
      cmark_render_code_point(renderer, c);
    } else { // render as entity
      snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
      cmark_strbuf_puts(renderer->buffer, encoded);
      renderer->column += strlen(encoded);
    }
  } else {
    cmark_render_code_point(renderer, c);
  }
}
示例#3
0
static size_t autolink_delim(uint8_t *data, size_t link_end) {
  uint8_t cclose, copen;
  size_t i;

  for (i = 0; i < link_end; ++i)
    if (data[i] == '<') {
      link_end = i;
      break;
    }

  while (link_end > 0) {
    cclose = data[link_end - 1];

    switch (cclose) {
    case ')':
      copen = '(';
      break;
    default:
      copen = 0;
    }

    if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL)
      link_end--;

    else if (data[link_end - 1] == ';') {
      size_t new_end = link_end - 2;

      while (new_end > 0 && cmark_isalpha(data[new_end]))
        new_end--;

      if (new_end < link_end - 2 && data[new_end] == '&')
        link_end = new_end;
      else
        link_end--;
    } else if (copen != 0) {
      size_t closing = 0;
      size_t opening = 0;
      i = 0;

      /* Allow any number of matching brackets (as recognised in copen/cclose)
       * at the end of the URL.  If there is a greater number of closing
       * brackets than opening ones, we remove one character from the end of
       * the link.
       *
       * Examples (input text => output linked portion):
       *
       *	http://www.pokemon.com/Pikachu_(Electric)
       *		=> http://www.pokemon.com/Pikachu_(Electric)
       *
       *	http://www.pokemon.com/Pikachu_((Electric)
       *		=> http://www.pokemon.com/Pikachu_((Electric)
       *
       *	http://www.pokemon.com/Pikachu_(Electric))
       *		=> http://www.pokemon.com/Pikachu_(Electric)
       *
       *	http://www.pokemon.com/Pikachu_((Electric))
       *		=> http://www.pokemon.com/Pikachu_((Electric))
       */

      while (i < link_end) {
        if (data[i] == copen)
          opening++;
        else if (data[i] == cclose)
          closing++;

        i++;
      }

      if (closing <= opening)
        break;

      link_end--;
    } else
      break;
  }

  return link_end;
}
示例#4
0
static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) {
  size_t link_end;
  uint8_t *data = text->as.literal.data,
    *at;
  size_t size = text->as.literal.len;
  int rewind, max_rewind,
      nb = 0, np = 0, ns = 0;

  if (offset < 0 || (size_t)offset >= size)
    return;

  data += offset;
  size -= offset;

  at = (uint8_t *)memchr(data, '@', size);
  if (!at)
    return;

  max_rewind = (int)(at - data);
  data += max_rewind;
  size -= max_rewind;

  for (rewind = 0; rewind < max_rewind; ++rewind) {
    uint8_t c = data[-rewind - 1];

    if (cmark_isalnum(c))
      continue;

    if (strchr(".+-_", c) != NULL)
      continue;

    if (c == '/')
      ns++;

    break;
  }

  if (rewind == 0 || ns > 0) {
    postprocess_text(parser, text, max_rewind + 1 + offset);
    return;
  }

  for (link_end = 0; link_end < size; ++link_end) {
    uint8_t c = data[link_end];

    if (cmark_isalnum(c))
      continue;

    if (c == '@')
      nb++;
    else if (c == '.' && link_end < size - 1 && cmark_isalnum(data[link_end + 1]))
      np++;
    else if (c != '-' && c != '_')
      break;
  }

  if (link_end < 2 || nb != 1 || np == 0 ||
      (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) {
    postprocess_text(parser, text, max_rewind + 1 + offset);
    return;
  }

  link_end = autolink_delim(data, link_end);

  if (link_end == 0) {
    postprocess_text(parser, text, max_rewind + 1 + offset);
    return;
  }

  cmark_chunk_to_cstr(parser->mem, &text->as.literal);

  cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
  cmark_strbuf buf;
  cmark_strbuf_init(parser->mem, &buf, 10);
  cmark_strbuf_puts(&buf, "mailto:");
  cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind));
  link_node->as.link.url = cmark_chunk_buf_detach(&buf);

  cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
  cmark_chunk email = cmark_chunk_dup(
      &text->as.literal,
      offset + max_rewind - rewind,
      (bufsize_t)(link_end + rewind));
  cmark_chunk_to_cstr(parser->mem, &email);
  link_text->as.literal = email;
  cmark_node_append_child(link_node, link_text);

  cmark_node_insert_after(text, link_node);

  cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
  post->as.literal = cmark_chunk_dup(&text->as.literal,
    (bufsize_t)(offset + max_rewind + link_end),
    (bufsize_t)(size - link_end));
  cmark_chunk_to_cstr(parser->mem, &post->as.literal);

  cmark_node_insert_after(link_node, post);

  text->as.literal.len = offset + max_rewind - rewind;
  text->as.literal.data[text->as.literal.len] = 0;

  postprocess_text(parser, post, 0);
}