Exemple #1
0
// Return a link, an image, or a literal close bracket.
static cmark_node *handle_close_bracket(subject *subj) {
  bufsize_t initial_pos, after_link_text_pos;
  bufsize_t endurl, starttitle, endtitle, endall;
  bufsize_t sps, n;
  cmark_reference *ref = NULL;
  cmark_chunk url_chunk, title_chunk;
  cmark_chunk url, title;
  bracket *opener;
  cmark_node *inl;
  cmark_chunk raw_label;
  int found_label;
  cmark_node *tmp, *tmpnext;
  bool is_image;

  advance(subj); // advance past ]
  initial_pos = subj->pos;

  // get last [ or ![
  opener = subj->last_bracket;

  if (opener == NULL) {
    return make_str(subj->mem, cmark_chunk_literal("]"));
  }

  if (!opener->active) {
    // take delimiter off stack
    pop_bracket(subj);
    return make_str(subj->mem, cmark_chunk_literal("]"));
  }

  // If we got here, we matched a potential link/image text.
  // Now we check to see if it's a link/image.
  is_image = opener->image;

  after_link_text_pos = subj->pos;

  // First, look for an inline link.
  if (peek_char(subj) == '(' &&
      ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
      ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, &url_chunk)) > -1)) {

    // try to parse an explicit link:
    endurl = subj->pos + 1 + sps + n;
    starttitle = endurl + scan_spacechars(&subj->input, endurl);

    // ensure there are spaces btw url and title
    endtitle = (starttitle == endurl)
                   ? starttitle
                   : starttitle + scan_link_title(&subj->input, starttitle);

    endall = endtitle + scan_spacechars(&subj->input, endtitle);

    if (peek_at(subj, endall) == ')') {
      subj->pos = endall + 1;

      title_chunk =
          cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
      url = cmark_clean_url(subj->mem, &url_chunk);
      title = cmark_clean_title(subj->mem, &title_chunk);
      cmark_chunk_free(subj->mem, &url_chunk);
      cmark_chunk_free(subj->mem, &title_chunk);
      goto match;

    } else {
      // it could still be a shortcut reference link
      subj->pos = after_link_text_pos;
    }
  }

  // Next, look for a following [link label] that matches in refmap.
  // skip spaces
  raw_label = cmark_chunk_literal("");
  found_label = link_label(subj, &raw_label);
  if (!found_label) {
    // If we have a shortcut reference link, back up
    // to before the spacse we skipped.
    subj->pos = initial_pos;
  }

  if ((!found_label || raw_label.len == 0) && !opener->bracket_after) {
    cmark_chunk_free(subj->mem, &raw_label);
    raw_label = cmark_chunk_dup(&subj->input, opener->position,
                                initial_pos - opener->position - 1);
    found_label = true;
  }

  if (found_label) {
    ref = cmark_reference_lookup(subj->refmap, &raw_label);
    cmark_chunk_free(subj->mem, &raw_label);
  }

  if (ref != NULL) { // found
    url = chunk_clone(subj->mem, &ref->url);
    title = chunk_clone(subj->mem, &ref->title);
    goto match;
  } else {
    goto noMatch;
  }

noMatch:
  // If we fall through to here, it means we didn't match a link:
  pop_bracket(subj); // remove this opener from delimiter list
  subj->pos = initial_pos;
  return make_str(subj->mem, cmark_chunk_literal("]"));

match:
  inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
  inl->as.link.url = url;
  inl->as.link.title = title;
  cmark_node_insert_before(opener->inl_text, inl);
  // Add link text:
  tmp = opener->inl_text->next;
  while (tmp) {
    tmpnext = tmp->next;
    cmark_node_append_child(inl, tmp);
    tmp = tmpnext;
  }

  // Free the bracket [:
  cmark_node_free(opener->inl_text);

  process_emphasis(subj, opener->previous_delimiter);
  pop_bracket(subj);

  // Now, if we have a link, we also want to deactivate earlier link
  // delimiters. (This code can be removed if we decide to allow links
  // inside links.)
  if (!is_image) {
    opener = subj->last_bracket;
    while (opener != NULL) {
      if (!opener->image) {
        if (!opener->active) {
          break;
        } else {
          opener->active = false;
        }
      }
      opener = opener->previous;
    }
  }

  return NULL;
}
Exemple #2
0
// Parse reference.  Assumes string begins with '[' character.
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
                                       cmark_reference_map *refmap) {
  subject subj;

  cmark_chunk lab;
  cmark_chunk url;
  cmark_chunk title;

  bufsize_t matchlen = 0;
  bufsize_t beforetitle;

  subject_from_buf(mem, &subj, input, NULL);

  // parse label:
  if (!link_label(&subj, &lab) || lab.len == 0)
    return 0;

  // colon:
  if (peek_char(&subj) == ':') {
    advance(&subj);
  } else {
    return 0;
  }

  // parse link url:
  spnl(&subj);
  if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
      url.len > 0) {
    subj.pos += matchlen;
  } else {
    return 0;
  }

  // parse optional link_title
  beforetitle = subj.pos;
  spnl(&subj);
  matchlen = scan_link_title(&subj.input, subj.pos);
  if (matchlen) {
    title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
    subj.pos += matchlen;
  } else {
    subj.pos = beforetitle;
    title = cmark_chunk_literal("");
  }

  // parse final spaces and newline:
  skip_spaces(&subj);
  if (!skip_line_end(&subj)) {
    if (matchlen) { // try rewinding before title
      subj.pos = beforetitle;
      skip_spaces(&subj);
      if (!skip_line_end(&subj)) {
        return 0;
      }
    } else {
      return 0;
    }
  }
  // insert reference into refmap
  cmark_reference_create(refmap, &lab, &url, &title);
  return subj.pos;
}
Exemple #3
0
static void process_emphasis(subject *subj, delimiter *stack_bottom) {
  delimiter *closer = subj->last_delim;
  delimiter *opener;
  delimiter *old_closer;
  bool opener_found;
  int openers_bottom_index;
  delimiter *openers_bottom[6] = {stack_bottom, stack_bottom, stack_bottom,
                                  stack_bottom, stack_bottom, stack_bottom};

  // move back to first relevant delim.
  while (closer != NULL && closer->previous != stack_bottom) {
    closer = closer->previous;
  }

  // now move forward, looking for closers, and handling each
  while (closer != NULL) {
    if (closer->can_close) {
      switch (closer->delim_char) {
      case '"':
        openers_bottom_index = 0;
        break;
      case '\'':
        openers_bottom_index = 1;
        break;
      case '_':
        openers_bottom_index = 2;
        break;
      case '*':
        openers_bottom_index = 3 + (closer->length % 3);
        break;
      default:
        assert(false);
      }

      // Now look backwards for first matching opener:
      opener = closer->previous;
      opener_found = false;
      while (opener != NULL && opener != openers_bottom[openers_bottom_index]) {
        if (opener->can_open && opener->delim_char == closer->delim_char) {
          // interior closer of size 2 can't match opener of size 1
          // or of size 1 can't match 2
          if (!(closer->can_open || opener->can_close) ||
              ((opener->length + closer->length) % 3) != 0) {
            opener_found = true;
            break;
          }
        }
        opener = opener->previous;
      }
      old_closer = closer;
      if (closer->delim_char == '*' || closer->delim_char == '_') {
        if (opener_found) {
          closer = S_insert_emph(subj, opener, closer);
        } else {
          closer = closer->next;
        }
      } else if (closer->delim_char == '\'') {
        cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
        closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
        if (opener_found) {
          cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
          opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
        }
        closer = closer->next;
      } else if (closer->delim_char == '"') {
        cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
        closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
        if (opener_found) {
          cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
          opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
        }
        closer = closer->next;
      }
      if (!opener_found) {
        // set lower bound for future searches for openers
        openers_bottom[openers_bottom_index] = old_closer->previous;
        if (!old_closer->can_open) {
          // we can remove a closer that can't be an
          // opener, once we've seen there's no
          // matching opener:
          remove_delimiter(subj, old_closer);
        }
      }
    } else {
      closer = closer->next;
    }
  }
  // free all delimiters in list until stack_bottom:
  while (subj->last_delim != NULL && subj->last_delim != stack_bottom) {
    remove_delimiter(subj, subj->last_delim);
  }
}
Exemple #4
0
static void process_emphasis(subject *subj, delimiter *stack_bottom) {
  delimiter *closer = subj->last_delim;
  delimiter *opener;
  delimiter *old_closer;
  bool opener_found;
  bool odd_match;
  delimiter *openers_bottom[128];

  // initialize openers_bottom:
  openers_bottom['*'] = stack_bottom;
  openers_bottom['_'] = stack_bottom;
  openers_bottom['\''] = stack_bottom;
  openers_bottom['"'] = stack_bottom;

  // move back to first relevant delim.
  while (closer != NULL && closer->previous != stack_bottom) {
    closer = closer->previous;
  }

  // now move forward, looking for closers, and handling each
  while (closer != NULL) {
    if (closer->can_close) {
      // Now look backwards for first matching opener:
      opener = closer->previous;
      opener_found = false;
      odd_match = false;
      while (opener != NULL && opener != stack_bottom &&
             opener != openers_bottom[closer->delim_char]) {
        // interior closer of size 2 can't match opener of size 1
        // or of size 1 can't match 2
        odd_match = (closer->can_open || opener->can_close) &&
                    ((opener->inl_text->as.literal.len +
                      closer->inl_text->as.literal.len) %
                         3 ==
                     0);
        if (opener->delim_char == closer->delim_char && opener->can_open &&
            !odd_match) {
          opener_found = true;
          break;
        }
        opener = opener->previous;
      }
      old_closer = closer;
      if (closer->delim_char == '*' || closer->delim_char == '_') {
        if (opener_found) {
          closer = S_insert_emph(subj, opener, closer);
        } else {
          closer = closer->next;
        }
      } else if (closer->delim_char == '\'') {
        cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
        closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
        if (opener_found) {
          cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
          opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
        }
        closer = closer->next;
      } else if (closer->delim_char == '"') {
        cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
        closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
        if (opener_found) {
          cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
          opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
        }
        closer = closer->next;
      }
      if (!opener_found && !odd_match) {
        // set lower bound for future searches for openers
        // (we don't do this with 'odd_match' set because
        // a ** that didn't match an earlier * might turn into
        // an opener, and the * might be matched by something
        // else.
        openers_bottom[old_closer->delim_char] = old_closer->previous;
        if (!old_closer->can_open) {
          // we can remove a closer that can't be an
          // opener, once we've seen there's no
          // matching opener:
          remove_delimiter(subj, old_closer);
        }
      }
    } else {
      closer = closer->next;
    }
  }
  // free all delimiters in list until stack_bottom:
  while (subj->last_delim != stack_bottom) {
    remove_delimiter(subj, subj->last_delim);
  }
}
Exemple #5
0
// Parse an inline, advancing subject, and add it as a child of parent.
// Return 0 if no inline can be parsed, 1 otherwise.
static int parse_inline(subject *subj, cmark_node *parent, int options) {
  cmark_node *new_inl = NULL;
  cmark_chunk contents;
  unsigned char c;
  bufsize_t endpos;
  c = peek_char(subj);
  if (c == 0) {
    return 0;
  }
  switch (c) {
  case '\r':
  case '\n':
    new_inl = handle_newline(subj);
    break;
  case '`':
    new_inl = handle_backticks(subj);
    break;
  case '\\':
    new_inl = handle_backslash(subj);
    break;
  case '&':
    new_inl = handle_entity(subj);
    break;
  case '<':
    new_inl = handle_pointy_brace(subj);
    break;
  case '*':
  case '_':
  case '\'':
  case '"':
    new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
    break;
  case '-':
    new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
    break;
  case '.':
    new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
    break;
  case '[':
    advance(subj);
    new_inl = make_str(subj->mem, cmark_chunk_literal("["));
    push_bracket(subj, false, new_inl);
    break;
  case ']':
    new_inl = handle_close_bracket(subj);
    break;
  case '!':
    advance(subj);
    if (peek_char(subj) == '[') {
      advance(subj);
      new_inl = make_str(subj->mem, cmark_chunk_literal("!["));
      push_bracket(subj, true, new_inl);
    } else {
      new_inl = make_str(subj->mem, cmark_chunk_literal("!"));
    }
    break;
  default:
    endpos = subject_find_special_char(subj, options);
    contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
    subj->pos = endpos;

    // if we're at a newline, strip trailing spaces.
    if (S_is_line_end_char(peek_char(subj))) {
      cmark_chunk_rtrim(&contents);
    }

    new_inl = make_str(subj->mem, contents);
  }
  if (new_inl != NULL) {
    cmark_node_append_child(parent, new_inl);
  }

  return 1;
}
Exemple #6
0
// Return a link, an image, or a literal close bracket.
static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
{
	bufsize_t initial_pos;
	bufsize_t starturl, endurl, starttitle, endtitle, endall;
	bufsize_t n;
	bufsize_t sps;
	cmark_reference *ref;
	bool is_image = false;
	cmark_chunk url_chunk, title_chunk;
	cmark_chunk url, title;
	delimiter *opener;
	cmark_node *link_text;
	cmark_node *inl;
	cmark_chunk raw_label;
	int found_label;

	advance(subj);  // advance past ]
	initial_pos = subj->pos;

	// look through list of delimiters for a [ or !
	opener = subj->last_delim;
	while (opener) {
		if (opener->delim_char == '[' || opener->delim_char == '!') {
			break;
		}
		opener = opener->previous;
	}

	if (opener == NULL) {
		return make_str(cmark_chunk_literal("]"));
	}

	if (!opener->active) {
		// take delimiter off stack
		remove_delimiter(subj, opener);
		return make_str(cmark_chunk_literal("]"));
	}

	// If we got here, we matched a potential link/image text.
	is_image = opener->delim_char == '!';
	link_text = opener->inl_text->next;

	// Now we check to see if it's a link/image.

	// First, look for an inline link.
	if (peek_char(subj) == '(' &&
	    ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
	    ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {

		// try to parse an explicit link:
		starturl = subj->pos + 1 + sps; // after (
		endurl = starturl + n;
		starttitle = endurl + scan_spacechars(&subj->input, endurl);

		// ensure there are spaces btw url and title
		endtitle = (starttitle == endurl) ? starttitle :
		           starttitle + scan_link_title(&subj->input, starttitle);

		endall = endtitle + scan_spacechars(&subj->input, endtitle);

		if (peek_at(subj, endall) == ')') {
			subj->pos = endall + 1;

			url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
			title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
			url = cmark_clean_url(&url_chunk);
			title = cmark_clean_title(&title_chunk);
			cmark_chunk_free(&url_chunk);
			cmark_chunk_free(&title_chunk);
			goto match;

		} else {
			goto noMatch;
		}
	}

	// Next, look for a following [link label] that matches in refmap.
	// skip spaces
	subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
	raw_label = cmark_chunk_literal("");
	found_label = link_label(subj, &raw_label);
	if (!found_label || raw_label.len == 0) {
		cmark_chunk_free(&raw_label);
		raw_label = cmark_chunk_dup(&subj->input, opener->position,
		                            initial_pos - opener->position - 1);
	}

	if (!found_label) {
		// If we have a shortcut reference link, back up
		// to before the spacse we skipped.
		subj->pos = initial_pos;
	}

	ref = cmark_reference_lookup(subj->refmap, &raw_label);
	cmark_chunk_free(&raw_label);

	if (ref != NULL) { // found
		url   = chunk_clone(&ref->url);
		title = chunk_clone(&ref->title);
		goto match;
	} else {
		goto noMatch;
	}

noMatch:
	// If we fall through to here, it means we didn't match a link:
	remove_delimiter(subj, opener);  // remove this opener from delimiter list
	subj->pos = initial_pos;
	return make_str(cmark_chunk_literal("]"));

match:
	inl = opener->inl_text;
	inl->type = is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK;
	cmark_chunk_free(&inl->as.literal);
	inl->first_child = link_text;
	process_emphasis(subj, opener);
	inl->as.link.url   = url;
	inl->as.link.title = title;
	inl->next = NULL;
	if (link_text) {
		cmark_node *tmp;
		link_text->prev = NULL;
		for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) {
			tmp->parent = inl;
		}
		tmp->parent = inl;
		inl->last_child = tmp;
	}
	parent->last_child = inl;

	// Now, if we have a link, we also want to deactivate earlier link
	// delimiters. (This code can be removed if we decide to allow links
	// inside links.)
	remove_delimiter(subj, opener);
	if (!is_image) {
		opener = subj->last_delim;
		while (opener != NULL) {
			if (opener->delim_char == '[') {
				if (!opener->active) {
					break;
				} else {
					opener->active = false;
				}
			}
			opener = opener->previous;
		}
	}

	return NULL;
}
Exemple #7
0
static void process_emphasis(subject *subj, delimiter *stack_bottom)
{
	delimiter *closer = subj->last_delim;
	delimiter *opener;
	delimiter *old_closer;
	bool opener_found;
	delimiter *openers_bottom[128];

	// initialize openers_bottom:
	openers_bottom['*'] = stack_bottom;
	openers_bottom['_'] = stack_bottom;
	openers_bottom['\''] = stack_bottom;
	openers_bottom['"'] = stack_bottom;

	// move back to first relevant delim.
	while (closer != NULL && closer->previous != stack_bottom) {
		closer = closer->previous;
	}

	// now move forward, looking for closers, and handling each
	while (closer != NULL) {
		if (closer->can_close &&
		    (closer->delim_char == '*' || closer->delim_char == '_' ||
		     closer->delim_char == '"' || closer->delim_char == '\'')) {
			// Now look backwards for first matching opener:
			opener = closer->previous;
			opener_found = false;
			while (opener != NULL && opener != stack_bottom &&
			       opener != openers_bottom[closer->delim_char]) {
				if (opener->delim_char == closer->delim_char &&
				    opener->can_open) {
					opener_found = true;
					break;
				}
				opener = opener->previous;
			}
			old_closer = closer;
			if (closer->delim_char == '*' || closer->delim_char == '_') {
				if (opener_found) {
					closer = S_insert_emph(subj, opener, closer);
				} else {
					closer = closer->next;
				}
			} else if (closer->delim_char == '\'') {
				cmark_chunk_free(&closer->inl_text->as.literal);
				closer->inl_text->as.literal =
				    cmark_chunk_literal(RIGHTSINGLEQUOTE);
				if (opener_found) {
					cmark_chunk_free(&opener->inl_text->as.literal);
					opener->inl_text->as.literal =
					    cmark_chunk_literal(LEFTSINGLEQUOTE);
				}
				closer = closer->next;
			} else if (closer->delim_char == '"') {
				cmark_chunk_free(&closer->inl_text->as.literal);
				closer->inl_text->as.literal =
				    cmark_chunk_literal(RIGHTDOUBLEQUOTE);
				if (opener_found) {
					cmark_chunk_free(&opener->inl_text->as.literal);
					opener->inl_text->as.literal =
					    cmark_chunk_literal(LEFTDOUBLEQUOTE);
				}
				closer = closer->next;
			}
			if (!opener_found) {
				// set lower bound for future searches for openers:
				openers_bottom[old_closer->delim_char] = old_closer->previous;
				if (!old_closer->can_open) {
					// we can remove a closer that can't be an
					// opener, once we've seen there's no
					// matching opener:
					remove_delimiter(subj, old_closer);
				}
			}
		} else {
			closer = closer->next;
		}
	}
	// free all delimiters in list until stack_bottom:
	while (subj->last_delim != stack_bottom) {
		remove_delimiter(subj, subj->last_delim);
	}
}
Exemple #8
0
static void
S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
{
	cmark_node* last_matched_container;
	int offset = 0;
	int matched = 0;
	int lev = 0;
	int i;
	cmark_list *data = NULL;
	bool all_matched = true;
	cmark_node* container;
	cmark_node* cur = parser->current;
	bool blank = false;
	int first_nonspace;
	int indent;
	cmark_chunk input;

	utf8proc_detab(parser->curline, buffer, bytes);

	// Add a newline to the end if not present:
	// TODO this breaks abstraction:
	if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
		cmark_strbuf_putc(parser->curline, '\n');
	}
	input.data = parser->curline->ptr;
	input.len = parser->curline->size;

	// container starts at the document root.
	container = parser->root;

	parser->line_number++;

	// for each containing cmark_node, try to parse the associated line start.
	// bail out on failure:  container will point to the last matching cmark_node.

	while (container->last_child && container->last_child->open) {
		container = container->last_child;

		first_nonspace = offset;
		while (peek_at(&input, first_nonspace) == ' ') {
			first_nonspace++;
		}

		indent = first_nonspace - offset;
		blank = peek_at(&input, first_nonspace) == '\n';

		if (container->type == NODE_BLOCK_QUOTE) {
			matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
			if (matched) {
				offset = first_nonspace + 1;
				if (peek_at(&input, offset) == ' ')
					offset++;
			} else {
				all_matched = false;
			}

		} else if (container->type == NODE_LIST_ITEM) {

			if (indent >= container->as.list.marker_offset +
					container->as.list.padding) {
				offset += container->as.list.marker_offset +
					container->as.list.padding;
			} else if (blank) {
				offset = first_nonspace;
			} else {
				all_matched = false;
			}

		} else if (container->type == NODE_CODE_BLOCK) {

			if (!container->as.code.fenced) { // indented
				if (indent >= CODE_INDENT) {
					offset += CODE_INDENT;
				} else if (blank) {
					offset = first_nonspace;
				} else {
					all_matched = false;
				}
			} else {
				// skip optional spaces of fence offset
				i = container->as.code.fence_offset;
				while (i > 0 && peek_at(&input, offset) == ' ') {
					offset++;
					i--;
				}
			}

		} else if (container->type == NODE_HEADER) {

			// a header can never contain more than one line
			all_matched = false;
			if (blank) {
				container->last_line_blank = true;
			}

		} else if (container->type == NODE_HTML) {

			if (blank) {
				container->last_line_blank = true;
				all_matched = false;
			}

		} else if (container->type == NODE_PARAGRAPH) {

			if (blank) {
				container->last_line_blank = true;
				all_matched = false;
			}

		}

		if (!all_matched) {
			container = container->parent;  // back up to last matching cmark_node
			break;
		}
	}

	last_matched_container = container;

	// check to see if we've hit 2nd blank line, break out of list:
	if (blank && container->last_line_blank) {
		break_out_of_lists(parser, &container, parser->line_number);
	}

	// unless last matched container is code cmark_node, try new container starts:
	while (container->type != NODE_CODE_BLOCK &&
			container->type != NODE_HTML) {

		first_nonspace = offset;
		while (peek_at(&input, first_nonspace) == ' ')
			first_nonspace++;

		indent = first_nonspace - offset;
		blank = peek_at(&input, first_nonspace) == '\n';

		if (indent >= CODE_INDENT) {
			if (cur->type != NODE_PARAGRAPH && !blank) {
				offset += CODE_INDENT;
				container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, offset + 1);
				container->as.code.fenced = false;
				container->as.code.fence_char = 0;
				container->as.code.fence_length = 0;
				container->as.code.fence_offset = 0;
				container->as.code.info = cmark_chunk_literal("");
			} else { // indent > 4 in lazy line
				break;
			}

		} else if (peek_at(&input, first_nonspace) == '>') {

			offset = first_nonspace + 1;
			// optional following character
			if (peek_at(&input, offset) == ' ')
				offset++;
			container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->line_number, offset + 1);

		} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {

			offset = first_nonspace + matched;
			container = add_child(parser, container, NODE_HEADER, parser->line_number, offset + 1);

			int hashpos = cmark_chunk_strchr(&input, '#', first_nonspace);
			int level = 0;

			while (peek_at(&input, hashpos) == '#') {
				level++;
				hashpos++;
			}
			container->as.header.level = level;
			container->as.header.setext = false;

		} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {

			container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, first_nonspace + 1);
			container->as.code.fenced = true;
			container->as.code.fence_char = peek_at(&input, first_nonspace);
			container->as.code.fence_length = matched;
			container->as.code.fence_offset = first_nonspace - offset;
			container->as.code.info = cmark_chunk_literal("");
			offset = first_nonspace + matched;

		} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {

			container = add_child(parser, container, NODE_HTML, parser->line_number, first_nonspace + 1);
			// note, we don't adjust offset because the tag is part of the text

		} else if (container->type == NODE_PARAGRAPH &&
				(lev = scan_setext_header_line(&input, first_nonspace)) &&
				// check that there is only one line in the paragraph:
				cmark_strbuf_strrchr(&container->string_content, '\n',
					cmark_strbuf_len(&container->string_content) - 2) < 0) {

			container->type = NODE_HEADER;
			container->as.header.level = lev;
			container->as.header.setext = true;
			offset = input.len - 1;

		} else if (!(container->type == NODE_PARAGRAPH && !all_matched) &&
				(matched = scan_hrule(&input, first_nonspace))) {

			// it's only now that we know the line is not part of a setext header:
			container = add_child(parser, container, NODE_HRULE, parser->line_number, first_nonspace + 1);
			container = finalize(parser, container,
					     parser->line_number);
			offset = input.len - 1;

		} else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {

			// compute padding:
			offset = first_nonspace + matched;
			i = 0;
			while (i <= 5 && peek_at(&input, offset + i) == ' ') {
				i++;
			}
			// i = number of spaces after marker, up to 5
			if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
				data->padding = matched + 1;
				if (i > 0) {
					offset += 1;
				}
			} else {
				data->padding = matched + i;
				offset += i;
			}

			// check container; if it's a list, see if this list item
			// can continue the list; otherwise, create a list container.

			data->marker_offset = indent;

			if (container->type != NODE_LIST ||
					!lists_match(&container->as.list, data)) {
				container = add_child(parser, container, NODE_LIST, parser->line_number,
						first_nonspace + 1);

				memcpy(&container->as.list, data, sizeof(*data));
			}

			// add the list item
			container = add_child(parser, container, NODE_LIST_ITEM, parser->line_number,
					first_nonspace + 1);
			/* TODO: static */
			memcpy(&container->as.list, data, sizeof(*data));
			free(data);
		} else {
			break;
		}

		if (accepts_lines(container->type)) {
			// if it's a line container, it can't contain other containers
			break;
		}
	}

	// what remains at offset is a text line.  add the text to the
	// appropriate container.

	first_nonspace = offset;
	while (peek_at(&input, first_nonspace) == ' ')
		first_nonspace++;

	indent = first_nonspace - offset;
	blank = peek_at(&input, first_nonspace) == '\n';

	// cmark_node quote lines are never blank as they start with >
	// and we don't count blanks in fenced code for purposes of tight/loose
	// lists or breaking out of lists.  we also don't set last_line_blank
	// on an empty list item.
	container->last_line_blank = (blank &&
			container->type != NODE_BLOCK_QUOTE &&
			container->type != NODE_HEADER &&
			!(container->type == NODE_CODE_BLOCK &&
				container->as.code.fenced) &&
			!(container->type == NODE_LIST_ITEM &&
				container->first_child == NULL &&
				container->start_line == parser->line_number));

	cmark_node *cont = container;
	while (cont->parent) {
		cont->parent->last_line_blank = false;
		cont = cont->parent;
	}

	if (cur != last_matched_container &&
			container == last_matched_container &&
			!blank &&
			cur->type == NODE_PARAGRAPH &&
			cmark_strbuf_len(&cur->string_content) > 0) {

		add_line(cur, &input, offset);

	} else { // not a lazy continuation

		// finalize any blocks that were not matched and set cur to container:
		while (cur != last_matched_container) {
			cur = finalize(parser, cur, parser->line_number);
			assert(cur != NULL);
		}

		if (container->type == NODE_CODE_BLOCK &&
		    !container->as.code.fenced) {

			add_line(container, &input, offset);

		} else if (container->type == NODE_CODE_BLOCK &&
			   container->as.code.fenced) {
			matched = 0;

			if (indent <= 3 &&
					peek_at(&input, first_nonspace) == container->as.code.fence_char) {
				int fence_len = scan_close_code_fence(&input, first_nonspace);
				if (fence_len > container->as.code.fence_length)
					matched = 1;
			}

			if (matched) {
				// if closing fence, don't add line to container; instead, close it:
				container = finalize(parser, container,
						     parser->line_number);
			} else {
				add_line(container, &input, offset);
			}

		} else if (container->type == NODE_HTML) {

			add_line(container, &input, offset);

		} else if (blank) {

			// ??? do nothing

		} else if (container->type == NODE_HEADER) {

			chop_trailing_hashtags(&input);
			add_line(container, &input, first_nonspace);
			container = finalize(parser, container,
					     parser->line_number);

		} else if (accepts_lines(container->type)) {

			add_line(container, &input, first_nonspace);

		} else if (container->type != NODE_HRULE &&
			   container->type != NODE_HEADER) {

			// create paragraph container for line
			container = add_child(parser, container, NODE_PARAGRAPH, parser->line_number, first_nonspace + 1);
			add_line(container, &input, first_nonspace);

		} else {
			assert(false);
		}

		parser->current = container;
	}
	cmark_strbuf_clear(parser->curline);

}
// Parse an inline, advancing subject, and add it as a child of parent.
// Return 0 if no inline can be parsed, 1 otherwise.
static int parse_inline(subject* subj, cmark_node * parent)
{
	cmark_node* new_inl = NULL;
	cmark_chunk contents;
	unsigned char c;
	int endpos;
	c = peek_char(subj);
	if (c == 0) {
		return 0;
	}
	switch(c) {
	case '\n':
		new_inl = handle_newline(subj);
		break;
	case '`':
		new_inl = handle_backticks(subj);
		break;
	case '\\':
		new_inl = handle_backslash(subj);
		break;
	case '&':
		new_inl = handle_entity(subj);
		break;
	case '<':
		new_inl = handle_pointy_brace(subj);
		break;
	case '*':
	case '_':
		new_inl = handle_strong_emph(subj, c);
		break;
	case '[':
		advance(subj);
		new_inl = make_str(cmark_chunk_literal("["));
		push_delimiter(subj, '[', true, false, new_inl);
		break;
	case ']':
		new_inl = handle_close_bracket(subj, parent);
		break;
	case '!':
		advance(subj);
		if (peek_char(subj) == '[') {
			advance(subj);
			new_inl = make_str(cmark_chunk_literal("!["));
			push_delimiter(subj, '!', false, true, new_inl);
		} else {
			new_inl = make_str(cmark_chunk_literal("!"));
		}
		break;
	default:
		endpos = subject_find_special_char(subj);
		contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
		subj->pos = endpos;

		// if we're at a newline, strip trailing spaces.
		if (peek_char(subj) == '\n') {
			cmark_chunk_rtrim(&contents);
		}

		new_inl = make_str(contents);
	}
	if (new_inl != NULL) {
		cmark_node_append_child(parent, new_inl);
	}

	return 1;
}
Exemple #10
0
static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
                  cmark_escaping escape) {
    int length = cmark_strbuf_safe_strlen(source);
    unsigned char nextc;
    int32_t c;
    int i = 0;
    int last_nonspace;
    int len;
    cmark_chunk remainder = cmark_chunk_literal("");
    int k = renderer->buffer->size - 1;

    wrap = wrap && !renderer->no_wrap;

    if (renderer->in_tight_list_item && renderer->need_cr > 1) {
        renderer->need_cr = 1;
    }
    while (renderer->need_cr) {
        if (k < 0 || renderer->buffer->ptr[k] == '\n') {
            k -= 1;
        } else {
            cmark_strbuf_putc(renderer->buffer, '\n');
            if (renderer->need_cr > 1) {
                cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
                                 renderer->prefix->size);
            }
        }
        renderer->column = 0;
        renderer->begin_line = true;
        renderer->begin_content = true;
        renderer->need_cr -= 1;
    }

    while (i < length) {
        if (renderer->begin_line) {
            cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
                             renderer->prefix->size);
            // note: this assumes prefix is ascii:
            renderer->column = renderer->prefix->size;
        }

        len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c);
        if (len == -1) { // error condition
            return;        // return without rendering rest of string
        }
        nextc = source[i + len];
        if (c == 32 && wrap) {
            if (!renderer->begin_line) {
                last_nonspace = renderer->buffer->size;
                cmark_strbuf_putc(renderer->buffer, ' ');
                renderer->column += 1;
                renderer->begin_line = false;
                renderer->begin_content = false;
                // skip following spaces
                while (source[i + 1] == ' ') {
                    i++;
                }
                // We don't allow breaks that make a digit the first character
                // because this causes problems with commonmark output.
                if (!cmark_isdigit(source[i + 1])) {
                    renderer->last_breakable = last_nonspace;
                }
            }

        } else if (c == 10) {
            cmark_strbuf_putc(renderer->buffer, '\n');
            renderer->column = 0;
            renderer->begin_line = true;
            renderer->begin_content = true;
            renderer->last_breakable = 0;
        } else if (escape == LITERAL) {
            cmark_render_code_point(renderer, c);
            renderer->begin_line = false;
            // we don't set 'begin_content' to false til we've
            // finished parsing a digit.  Reason:  in commonmark
            // we need to escape a potential list marker after
            // a digit:
            renderer->begin_content = renderer->begin_content &&
                                      cmark_isdigit(c) == 1;
        } else {
            (renderer->outc)(renderer, escape, c, nextc);
            renderer->begin_line = false;
            renderer->begin_content = renderer->begin_content &&
                                      cmark_isdigit(c) == 1;
        }

        // If adding the character went beyond width, look for an
        // earlier place where the line could be broken:
        if (renderer->width > 0 && renderer->column > renderer->width &&
                !renderer->begin_line && renderer->last_breakable > 0) {

            // copy from last_breakable to remainder
            cmark_chunk_set_cstr(&remainder, (char *)renderer->buffer->ptr +
                                 renderer->last_breakable + 1);
            // truncate at last_breakable
            cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable);
            // add newline, prefix, and remainder
            cmark_strbuf_putc(renderer->buffer, '\n');
            cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
                             renderer->prefix->size);
            cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len);
            renderer->column = renderer->prefix->size + remainder.len;
            cmark_chunk_free(&remainder);
            renderer->last_breakable = 0;
            renderer->begin_line = false;
            renderer->begin_content = false;
        }

        i += len;
    }
}