static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) { char c; int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); parser->first_nonspace = parser->offset; parser->first_nonspace_column = parser->column; while ((c = peek_at(input, parser->first_nonspace))) { if (c == ' ') { parser->first_nonspace += 1; parser->first_nonspace_column += 1; chars_to_tab = chars_to_tab - 1; if (chars_to_tab == 0) { chars_to_tab = TAB_STOP; } } else if (c == '\t') { parser->first_nonspace += 1; parser->first_nonspace_column += chars_to_tab; chars_to_tab = TAB_STOP; } else { break; } } parser->indent = parser->first_nonspace_column - parser->column; parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace)); }
// Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. static cmark_node* handle_newline(subject *subj) { bufsize_t nlpos = subj->pos; // skip over newline advance(subj); // skip spaces at beginning of line skip_spaces(subj); if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' && peek_at(subj, nlpos - 2) == ' ') { return make_linebreak(); } else { return make_softbreak(); } }
static void chop_trailing_hashtags(cmark_chunk *ch) { int n, orig_n; cmark_chunk_rtrim(ch); orig_n = n = ch->len - 1; // if string ends in space followed by #s, remove these: while (n >= 0 && peek_at(ch, n) == '#') n--; // Check for a be a space before the final #s: if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') { ch->len = n; cmark_chunk_rtrim(ch); } }
// Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a cr or newline at the current position. static cmark_node *handle_newline(subject *subj) { bufsize_t nlpos = subj->pos; // skip over cr, crlf, or lf: if (peek_at(subj, subj->pos) == '\r') { advance(subj); } if (peek_at(subj, subj->pos) == '\n') { advance(subj); } // skip spaces at beginning of line skip_spaces(subj); if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' && peek_at(subj, nlpos - 2) == ' ') { return make_linebreak(subj->mem); } else { return make_softbreak(subj->mem); } }
static void S_advance_offset(cmark_parser *parser, cmark_chunk *input, bufsize_t count, bool columns) { char c; int chars_to_tab; int chars_to_advance; while (count > 0 && (c = peek_at(input, parser->offset))) { if (c == '\t') { chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); chars_to_advance = chars_to_tab > count ? count : chars_to_tab; parser->column += chars_to_advance; parser->offset += chars_to_advance < chars_to_tab ? 0 : 1; count -= (columns ? chars_to_advance : 1); } else { parser->offset += 1; parser->column += 1; // assume ascii; block starts are ascii count -= 1; } } }
int main(int argc, char* argv[]) { if(argc != 2) { std::cout << "Usage: " << argv[0] << " /path/to/lua/script.lua" << std::endl; exit(1); } std::cout.unsetf(std::ios::floatfield); std::cout.precision(5); std::cout << std::fixed; std::cout << "==================== SETUP ====================" << std::endl; // TODO: detect unusual currency bases for accurate profit calculation auto config = std::make_shared<LuaScript>(argv[1]); std::cout << "Simulating a maximum of " << config->steps() << " steps" << std::endl; std::cout << config->optimizations() << " optimizations per in sample" << std::endl; auto charts = Chart::load_from_string(config->charts()); auto ts = std::make_unique<TickSource>(config->csv_path()); auto vars = Variable::load_from_string(config->variables()); auto optimizer = std::make_unique<Optimizer>(vars); ts->fill_charts(charts); ts->advance_charts_to_next_sunday(charts); int step_number = 1; int weeks_in_sample = std::stoi(config->in_sample_time()); int weeks_out_of_sample = std::stoi(config->out_of_sample_time()); std::vector<float> optimization_scores; std::vector<float> execution_scores; // ----- MAIN LOOP ------------------------------------------------------------------------- for(;;) { // ----- FIND IN AND OUT OF SAMPLE START AND END POINTS ---------------------------- // in_sample_record_end is just out_of_sample_record_start - 1 unsigned long in_sample_record_start = ts->next_record_id(); unsigned long out_of_sample_record_start = 0; unsigned long out_of_sample_record_end = 0; bool tick_is_sunday = true; bool on_sunday = true; int sundays_seen = 0; for(auto tick = ts->next(); tick; tick = ts->next()) { tick_is_sunday = tick->is_sunday(); if(!on_sunday && tick_is_sunday) { on_sunday = true; sundays_seen++; } else if(on_sunday && !tick_is_sunday) { on_sunday = false; } if(sundays_seen == weeks_in_sample) { out_of_sample_record_start = ts->next_record_id() - 1; break; } } if(!out_of_sample_record_start) { bail("ran out of tick source when searching for out of sample start"); } sundays_seen = 0; on_sunday = true; for(auto tick = ts->next(); tick; tick = ts->next()) { tick_is_sunday = tick->is_sunday(); if(!on_sunday && tick_is_sunday) { on_sunday = true; sundays_seen++; } else if(on_sunday && !tick_is_sunday) { on_sunday = false; } if(sundays_seen == weeks_out_of_sample) { // stop on the tick before the current tick out_of_sample_record_end = ts->next_record_id() - 2; break; } } if(!out_of_sample_record_end) { bail("ran out of tick source when searching for out of sample end"); } std::cout << "In sample: " << ts->peek_at(in_sample_record_start)->show() << " through " << ts->peek_at(out_of_sample_record_start - 1)->show() << std::endl; std::cout << "Out of sample: " << ts->peek_at(out_of_sample_record_start)->show() << " through " << ts->peek_at(out_of_sample_record_end)->show() << std::endl; // ----- OPTIMIZE ON IN SAMPLE PERIOD ---------------------------------------------- auto subset = ts->subset(in_sample_record_start, out_of_sample_record_start - 1); auto winner = optimizer->optimize_variables_on(config, charts, subset); // optimizer->print_scores(); // ----- IF OPTIMIZATION SUCCEEDED, EXECUTE ON OUT OF SAMPLE PERIOD ---------------- if(winner->get_score() < config->minimum_optimization_score()) { bail("Failed to find a variables above the cutoff"); } optimization_scores.push_back(winner->get_score()); std::cout << "==================== EXECUTING ON ====================" << std::endl; std::cout << "Winning variables!" << std::endl; std::cout << "Score: " << winner->get_score() << std::endl; for(auto var: winner->get_variables()) { std::cout << "\t" << var->get_name() << " -> " << var->show() << std::endl; } auto new_vars = winner->get_variables(); auto new_config = config; auto new_charts = charts; auto oos = ts->subset(out_of_sample_record_start, out_of_sample_record_end); Simulation sim(new_config, new_charts, new_vars); sim.run(oos); std::cout << "Execution score: " << sim.get_score() << std::endl; std::cout << "% profitable: " << sim.percentage_of_profitable_trades() << "%" << std::endl; std::cout << "Average trade duration: " << sim.average_trade_duration() << std::endl; std::cout << "Worst DD: " << sim.get_worst_drawdown() << "%" << std::endl; std::cout << "Equity high: " << sim.get_equity_high() << std::endl; std::cout << "Equity low: " << sim.get_equity_low() << std::endl; std::cout << "Trades: " << sim.get_trade_count() << std::endl; std::cout << "Winners: " << sim.get_winning_trade_count() << std::endl; std::cout << "Losers: " << sim.get_losing_trade_count() << std::endl; std::cout << "Best: " << sim.best_winning_trade()->profit() << std::endl; std::cout << "Worst: " << sim.worst_losing_trade()->profit() << std::endl; // ----- IF EXECUTION SUCCEEDED, RECORD RESULTS ------------------------------------ if(sim.get_score() < config->minimum_execution_score()) { bail("Failed at optimization step"); } execution_scores.push_back(sim.get_score()); // ----- EXIT IF WE'RE AT MAX STEPS ------------------------------------------------ step_number++; if(step_number > config->steps()) { std::cout << "Reached max steps, breaking" << std::endl; break; } // ----- ADVANCE CHARTS TO NEXT IN SAMPLE START ------------------------------------ ts->seek_to_record(in_sample_record_start); ts->advance_charts_to_next_sunday(charts); } std::cout << "==================== RESULTS ====================" << std::endl; // ----- SHOW PROFIT FOR EACH PERIOD ------------------------------------------------------- std::cout << "Scores:" << std::endl; for(unsigned long i = 0; i < optimization_scores.size(); i++) { std::cout << i + 1 << " - "; std::cout << "Opti: " << optimization_scores[i]; std::cout << " - "; std::cout << "Exec: " << execution_scores[i]; std::cout << std::endl; } // ----- SHOW WALK FORWARD EFFICIENCY ------------------------------------------------------ float optimization_total = sum_vector(optimization_scores); float execution_total = sum_vector(execution_scores); float efficiency = execution_total / optimization_total; std::cout << "Optimization total: " << optimization_total << " - "; std::cout << "Execution total: " << execution_total << " - "; std::cout << "WFA efficiency: " << efficiency << "% - Verdict: "; if(efficiency < 0.5) { std::cout << "FAIL"; } else if(efficiency >= 0.5 && efficiency < 1.0) { std::cout << "ACCEPTABLE"; } else if(efficiency >= 1.0 && efficiency <= 1.5) { std::cout << "GREAT"; } else { std::cout << "TOO GOOD - SUSPICIOUS"; } std::cout << std::endl; // ----- SHOW PROFIT FACTOR ---------------------------------------------------------------- float gross_profit = 0.0; float gross_loss = 0.0; for(auto score: execution_scores) { if(score >= 0.0) gross_profit += score; else gross_loss += score; } gross_loss = std::abs(gross_loss); float profit_factor = gross_profit / gross_loss; std::cout << "Gross profit: " << gross_profit << " - "; std::cout << "Gross loss: " << gross_loss << " - "; std::cout << "Profit factor: " << profit_factor << " - Verdict: "; if(profit_factor < 1.5) { std::cout << "FAIL"; } else if(profit_factor >= 1.5 && profit_factor < 2.0) { std::cout << "ACCEPTABLE"; } else if(profit_factor >= 2.0 && profit_factor <= 3.0) { std::cout << "GREAT"; } else { std::cout << "TOO GOOD - SUSPICIOUS"; } std::cout << std::endl; // ----- SHOW FINAL SCORE ------------------------------------------------------------------ // TODO: final score should be some function of two or more of the following: // execution_total, WFA efficiency, max drawdown, profit factor, % profitable std::cout << "Final score: " << execution_total << " - Minimum score: " << config->minimum_overall_score() << " - Verdict: "; std::cout << (execution_total >= config->minimum_overall_score() ? "PASS" : "FAIL") << std::endl; std::cout << "==================== SHUTDOWN ====================" << std::endl; return 0; }
static void S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) { cmark_node* last_matched_container; int offset = 0; int matched = 0; int lev = 0; int i; cmark_list *data = NULL; bool all_matched = true; cmark_node* container; bool blank = false; int first_nonspace; int indent; bool indented; cmark_chunk input; bool maybe_lazy; int trim = 0; bool cr = false; bool lf = false; utf8proc_detab(parser->curline, buffer, bytes); // Add a newline to the end if not present: // TODO this breaks abstraction: if (parser->curline->size > trim && parser->curline->ptr[parser->curline->size - 1 - trim] == '\n') { trim += 1; lf = true; } if (parser->curline->size > trim && parser->curline->ptr[parser->curline->size - 1 - trim] == '\r') { trim += 1; cr = true; } if (cr) { cmark_strbuf_truncate(parser->curline, parser->curline->size - trim); } if (cr || !lf) { cmark_strbuf_putc(parser->curline, '\n'); } input.data = parser->curline->ptr; input.len = parser->curline->size; // container starts at the document root. container = parser->root; parser->line_number++; // for each containing node, try to parse the associated line start. // bail out on failure: container will point to the last matching node. while (container->last_child && container->last_child->open) { container = container->last_child; first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') { first_nonspace++; } indent = first_nonspace - offset; blank = peek_at(&input, first_nonspace) == '\n' || peek_at(&input, first_nonspace) == '\r'; if (container->type == NODE_BLOCK_QUOTE) { matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; if (matched) { offset = first_nonspace + 1; if (peek_at(&input, offset) == ' ') offset++; } else { all_matched = false; } } else if (container->type == NODE_ITEM) { if (indent >= container->as.list.marker_offset + container->as.list.padding) { offset += container->as.list.marker_offset + container->as.list.padding; } else if (blank) { offset = first_nonspace; } else { all_matched = false; } } else if (container->type == NODE_CODE_BLOCK) { if (!container->as.code.fenced) { // indented if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { offset = first_nonspace; } else { all_matched = false; } } else { // fenced matched = 0; if (indent <= 3 && (peek_at(&input, first_nonspace) == container->as.code.fence_char)) { matched = scan_close_code_fence(&input, first_nonspace); } if (matched >= container->as.code.fence_length) { // closing fence - and since we're at // the end of a line, we can return: all_matched = false; offset += matched; parser->current = finalize(parser, container); goto finished; } else { // skip opt. spaces of fence offset i = container->as.code.fence_offset; while (i > 0 && peek_at(&input, offset) == ' ') { offset++; i--; } } } } else if (container->type == NODE_HEADER) { // a header can never contain more than one line all_matched = false; } else if (container->type == NODE_HTML) { if (blank) { all_matched = false; } } else if (container->type == NODE_PARAGRAPH) { if (blank) { all_matched = false; } } if (!all_matched) { container = container->parent; // back up to last matching node break; } } last_matched_container = container; // check to see if we've hit 2nd blank line, break out of list: if (blank && container->last_line_blank) { break_out_of_lists(parser, &container); } maybe_lazy = parser->current->type == NODE_PARAGRAPH; // try new container starts: while (container->type != NODE_CODE_BLOCK && container->type != NODE_HTML) { first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') first_nonspace++; indent = first_nonspace - offset; indented = indent >= CODE_INDENT; blank = peek_at(&input, first_nonspace) == '\n' || peek_at(&input, first_nonspace) == '\r'; if (indented && !maybe_lazy && !blank) { offset += CODE_INDENT; container = add_child(parser, container, NODE_CODE_BLOCK, offset + 1); container->as.code.fenced = false; container->as.code.fence_char = 0; container->as.code.fence_length = 0; container->as.code.fence_offset = 0; container->as.code.info = cmark_chunk_literal(""); } else if (!indented && peek_at(&input, first_nonspace) == '>') { offset = first_nonspace + 1; // optional following character if (peek_at(&input, offset) == ' ') offset++; container = add_child(parser, container, NODE_BLOCK_QUOTE, offset + 1); } else if (!indented && (matched = scan_atx_header_start(&input, first_nonspace))) { offset = first_nonspace + matched; container = add_child(parser, container, NODE_HEADER, offset + 1); int hashpos = cmark_chunk_strchr(&input, '#', first_nonspace); int level = 0; while (peek_at(&input, hashpos) == '#') { level++; hashpos++; } container->as.header.level = level; container->as.header.setext = false; } else if (!indented && (matched = scan_open_code_fence(&input, first_nonspace))) { container = add_child(parser, container, NODE_CODE_BLOCK, first_nonspace + 1); container->as.code.fenced = true; container->as.code.fence_char = peek_at(&input, first_nonspace); container->as.code.fence_length = matched; container->as.code.fence_offset = first_nonspace - offset; container->as.code.info = cmark_chunk_literal(""); offset = first_nonspace + matched; } else if (!indented && (matched = scan_html_block_tag(&input, first_nonspace))) { container = add_child(parser, container, NODE_HTML, first_nonspace + 1); // note, we don't adjust offset because the tag is part of the text } else if (!indented && container->type == NODE_PARAGRAPH && (lev = scan_setext_header_line(&input, first_nonspace)) && // check that there is only one line in the paragraph: (cmark_strbuf_strrchr(&container->string_content, '\n', cmark_strbuf_len(&container->string_content) - 2) < 0 && cmark_strbuf_strrchr(&container->string_content, '\r', cmark_strbuf_len(&container->string_content) - 2) < 0)) { container->type = NODE_HEADER; container->as.header.level = lev; container->as.header.setext = true; offset = input.len - 1; } else if (!indented && !(container->type == NODE_PARAGRAPH && !all_matched) && (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: container = add_child(parser, container, NODE_HRULE, first_nonspace + 1); container = finalize(parser, container); offset = input.len - 1; } else if ((matched = parse_list_marker(&input, first_nonspace, &data))) { // compute padding: offset = first_nonspace + matched; i = 0; while (i <= 5 && peek_at(&input, offset + i) == ' ') { i++; } // i = number of spaces after marker, up to 5 if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n' || peek_at(&input, offset) == '\r') { data->padding = matched + 1; if (i > 0) { offset += 1; } } else { data->padding = matched + i; offset += i; } // check container; if it's a list, see if this list item // can continue the list; otherwise, create a list container. data->marker_offset = indent; if (container->type != NODE_LIST || !lists_match(&container->as.list, data)) { container = add_child(parser, container, NODE_LIST, first_nonspace + 1); memcpy(&container->as.list, data, sizeof(*data)); } // add the list item container = add_child(parser, container, NODE_ITEM, first_nonspace + 1); /* TODO: static */ memcpy(&container->as.list, data, sizeof(*data)); free(data); } else { break; } if (accepts_lines(container->type)) { // if it's a line container, it can't contain other containers break; } maybe_lazy = false; } // what remains at offset is a text line. add the text to the // appropriate container. first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') first_nonspace++; indent = first_nonspace - offset; blank = peek_at(&input, first_nonspace) == '\n' || peek_at(&input, first_nonspace) == '\r'; if (blank && container->last_child) { container->last_child->last_line_blank = true; } // block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. container->last_line_blank = (blank && container->type != NODE_BLOCK_QUOTE && container->type != NODE_HEADER && !(container->type == NODE_CODE_BLOCK && container->as.code.fenced) && !(container->type == NODE_ITEM && container->first_child == NULL && container->start_line == parser->line_number)); cmark_node *cont = container; while (cont->parent) { cont->parent->last_line_blank = false; cont = cont->parent; } if (parser->current != last_matched_container && container == last_matched_container && !blank && parser->current->type == NODE_PARAGRAPH && cmark_strbuf_len(&parser->current->string_content) > 0) { add_line(parser->current, &input, offset); } else { // not a lazy continuation // finalize any blocks that were not matched and set cur to container: while (parser->current != last_matched_container) { parser->current = finalize(parser, parser->current); assert(parser->current != NULL); } if (container->type == NODE_CODE_BLOCK || container->type == NODE_HTML) { add_line(container, &input, offset); } else if (blank) { // ??? do nothing } else if (accepts_lines(container->type)) { if (container->type == NODE_HEADER && container->as.header.setext == false) { chop_trailing_hashtags(&input); } add_line(container, &input, first_nonspace); } else { // create paragraph container for line container = add_child(parser, container, NODE_PARAGRAPH, first_nonspace + 1); add_line(container, &input, first_nonspace); } parser->current = container; } finished: parser->last_line_length = parser->curline->size; if (parser->last_line_length && parser->curline->ptr[parser->last_line_length - 1] == '\n') parser->last_line_length--; if (parser->last_line_length && parser->curline->ptr[parser->last_line_length - 1] == '\r') parser->last_line_length--; cmark_strbuf_clear(parser->curline); }
// Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr) { unsigned char c; int startpos; cmark_list *data; startpos = pos; c = peek_at(input, pos); if (c == '*' || c == '-' || c == '+') { pos++; if (!cmark_isspace(peek_at(input, pos))) { return 0; } data = (cmark_list *)calloc(1, sizeof(*data)); if(data == NULL) { return 0; } else { data->marker_offset = 0; // will be adjusted later data->list_type = CMARK_BULLET_LIST; data->bullet_char = c; data->start = 1; data->delimiter = CMARK_PERIOD_DELIM; data->tight = false; } } else if (cmark_isdigit(c)) { int start = 0; do { start = (10 * start) + (peek_at(input, pos) - '0'); pos++; } while (cmark_isdigit(peek_at(input, pos))); c = peek_at(input, pos); if (c == '.' || c == ')') { pos++; if (!cmark_isspace(peek_at(input, pos))) { return 0; } data = (cmark_list *)calloc(1, sizeof(*data)); if(data == NULL) { return 0; } else { data->marker_offset = 0; // will be adjusted later data->list_type = CMARK_ORDERED_LIST; data->bullet_char = 0; data->start = start; data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM); data->tight = false; } } else { return 0; } } else { return 0; } *dataptr = data; return (pos - startpos); }
// Return a link, an image, or a literal close bracket. static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) { int initial_pos; int starturl, endurl, starttitle, endtitle, endall; int n; int sps; cmark_reference *ref; bool is_image = false; cmark_chunk url_chunk, title_chunk; unsigned char *url, *title; delimiter *opener; cmark_node *link_text; cmark_node *inl; cmark_chunk raw_label; int found_label; advance(subj); // advance past ] initial_pos = subj->pos; // look through list of delimiters for a [ or ! opener = subj->last_delim; while (opener) { if (opener->delim_char == '[' || opener->delim_char == '!') { break; } opener = opener->previous; } if (opener == NULL) { return make_str(cmark_chunk_literal("]")); } if (!opener->active) { // take delimiter off stack remove_delimiter(subj, opener); return make_str(cmark_chunk_literal("]")); } // If we got here, we matched a potential link/image text. is_image = opener->delim_char == '!'; link_text = opener->inl_text->next; // Now we check to see if it's a link/image. // First, look for an inline link. if (peek_char(subj) == '(' && ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { // try to parse an explicit link: starturl = subj->pos + 1 + sps; // after ( endurl = starturl + n; starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + scan_link_title(&subj->input, starttitle); endall = endtitle + scan_spacechars(&subj->input, endtitle); if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl); title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); url = cmark_clean_url(&url_chunk); title = cmark_clean_title(&title_chunk); cmark_chunk_free(&url_chunk); cmark_chunk_free(&title_chunk); goto match; } else { goto noMatch; } } // Next, look for a following [link label] that matches in refmap. // skip spaces subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos); raw_label = cmark_chunk_literal(""); found_label = link_label(subj, &raw_label); if (!found_label || raw_label.len == 0) { cmark_chunk_free(&raw_label); raw_label = cmark_chunk_dup(&subj->input, opener->position, initial_pos - opener->position - 1); } if (!found_label) { // If we have a shortcut reference link, back up // to before the spacse we skipped. subj->pos = initial_pos; } ref = cmark_reference_lookup(subj->refmap, &raw_label); cmark_chunk_free(&raw_label); if (ref != NULL) { // found url = bufdup(ref->url); title = bufdup(ref->title); goto match; } else { goto noMatch; } noMatch: // If we fall through to here, it means we didn't match a link: remove_delimiter(subj, opener); // remove this opener from delimiter list subj->pos = initial_pos; return make_str(cmark_chunk_literal("]")); match: inl = opener->inl_text; inl->type = is_image ? NODE_IMAGE : NODE_LINK; cmark_chunk_free(&inl->as.literal); inl->first_child = link_text; process_emphasis(subj, opener->previous); inl->as.link.url = url; inl->as.link.title = title; inl->next = NULL; if (link_text) { cmark_node *tmp; link_text->prev = NULL; for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) { tmp->parent = inl; } tmp->parent = inl; inl->last_child = tmp; } parent->last_child = inl; // process_emphasis will remove this delimiter and all later ones. // Now, if we have a link, we also want to deactivate earlier link // delimiters. (This code can be removed if we decide to allow links // inside links.) if (!is_image) { opener = subj->last_delim; while (opener != NULL) { if (opener->delim_char == '[') { if (!opener->active) { break; } else { opener->active = false; } } opener = opener->previous; } } return NULL; }
static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes) { cmark_node *last_matched_container; bufsize_t matched = 0; int lev = 0; int i; cmark_list *data = NULL; bool all_matched = true; cmark_node *container; bool indented; cmark_chunk input; bool maybe_lazy; if (parser->options & CMARK_OPT_VALIDATE_UTF8) { cmark_utf8proc_check(parser->curline, buffer, bytes); } else { cmark_strbuf_put(parser->curline, buffer, bytes); } // ensure line ends with a newline: if (bytes == 0 || !S_is_line_end_char(parser->curline->ptr[bytes - 1])) { cmark_strbuf_putc(parser->curline, '\n'); } parser->offset = 0; parser->column = 0; parser->blank = false; input.data = parser->curline->ptr; input.len = parser->curline->size; // container starts at the document root. container = parser->root; parser->line_number++; // for each containing node, try to parse the associated line start. // bail out on failure: container will point to the last matching node. while (container->last_child && container->last_child->open) { container = container->last_child; S_find_first_nonspace(parser, &input); if (container->type == CMARK_NODE_BLOCK_QUOTE) { matched = parser->indent <= 3 && peek_at(&input, parser->first_nonspace) == '>'; if (matched) { S_advance_offset(parser, &input, parser->indent + 1, true); if (peek_at(&input, parser->offset) == ' ') parser->offset++; } else { all_matched = false; } } else if (container->type == CMARK_NODE_ITEM) { if (parser->indent >= container->as.list.marker_offset + container->as.list.padding) { S_advance_offset(parser, &input, container->as.list.marker_offset + container->as.list.padding, true); } else if (parser->blank && container->first_child != NULL) { // if container->first_child is NULL, then the opening line // of the list item was blank after the list marker; in this // case, we are done with the list item. S_advance_offset(parser, &input, parser->first_nonspace - parser->offset, false); } else { all_matched = false; } } else if (container->type == CMARK_NODE_CODE_BLOCK) { if (!container->as.code.fenced) { // indented if (parser->indent >= CODE_INDENT) { S_advance_offset(parser, &input, CODE_INDENT, true); } else if (parser->blank) { S_advance_offset(parser, &input, parser->first_nonspace - parser->offset, false); } else { all_matched = false; } } else { // fenced matched = 0; if (parser->indent <= 3 && (peek_at(&input, parser->first_nonspace) == container->as.code.fence_char)) { matched = scan_close_code_fence(&input, parser->first_nonspace); } if (matched >= container->as.code.fence_length) { // closing fence - and since we're at // the end of a line, we can return: all_matched = false; S_advance_offset(parser, &input, matched, false); parser->current = finalize(parser, container); goto finished; } else { // skip opt. spaces of fence parser->offset i = container->as.code.fence_offset; while (i > 0 && peek_at(&input, parser->offset) == ' ') { S_advance_offset(parser, &input, 1, false); i--; } } } } else if (container->type == CMARK_NODE_HEADING) { // a heading can never contain more than one line all_matched = false; } else if (container->type == CMARK_NODE_HTML_BLOCK) { switch (container->as.html_block_type) { case 1: case 2: case 3: case 4: case 5: // these types of blocks can accept blanks break; case 6: case 7: if (parser->blank) { all_matched = false; } break; default: fprintf(stderr, "Error (%s:%d): Unknown HTML block type %d\n", __FILE__, __LINE__, container->as.html_block_type); exit(1); } } else if (container->type == CMARK_NODE_PARAGRAPH) { if (parser->blank) { all_matched = false; } } if (!all_matched) { container = container->parent; // back up to last matching node break; } } last_matched_container = container; // check to see if we've hit 2nd blank line, break out of list: if (parser->blank && container->last_line_blank) { break_out_of_lists(parser, &container); } maybe_lazy = parser->current->type == CMARK_NODE_PARAGRAPH; // try new container starts: while (container->type != CMARK_NODE_CODE_BLOCK && container->type != CMARK_NODE_HTML_BLOCK) { S_find_first_nonspace(parser, &input); indented = parser->indent >= CODE_INDENT; if (!indented && peek_at(&input, parser->first_nonspace) == '>') { S_advance_offset(parser, &input, parser->first_nonspace + 1 - parser->offset, false); // optional following character if (peek_at(&input, parser->offset) == ' ') S_advance_offset(parser, &input, 1, false); container = add_child(parser, container, CMARK_NODE_BLOCK_QUOTE, parser->offset + 1); } else if (!indented && (matched = scan_atx_heading_start( &input, parser->first_nonspace))) { S_advance_offset(parser, &input, parser->first_nonspace + matched - parser->offset, false); container = add_child(parser, container, CMARK_NODE_HEADING, parser->offset + 1); bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); int level = 0; while (peek_at(&input, hashpos) == '#') { level++; hashpos++; } container->as.heading.level = level; container->as.heading.setext = false; } else if (!indented && (matched = scan_open_code_fence( &input, parser->first_nonspace))) { container = add_child(parser, container, CMARK_NODE_CODE_BLOCK, parser->first_nonspace + 1); container->as.code.fenced = true; container->as.code.fence_char = peek_at(&input, parser->first_nonspace); container->as.code.fence_length = matched; container->as.code.fence_offset = (int8_t)(parser->first_nonspace - parser->offset); container->as.code.info = cmark_chunk_literal(""); S_advance_offset(parser, &input, parser->first_nonspace + matched - parser->offset, false); } else if (!indented && ((matched = scan_html_block_start( &input, parser->first_nonspace)) || (container->type != CMARK_NODE_PARAGRAPH && (matched = scan_html_block_start_7( &input, parser->first_nonspace))))) { container = add_child(parser, container, CMARK_NODE_HTML_BLOCK, parser->first_nonspace + 1); container->as.html_block_type = matched; // note, we don't adjust parser->offset because the tag is part of the // text } else if (!indented && container->type == CMARK_NODE_PARAGRAPH && (lev = scan_setext_heading_line(&input, parser->first_nonspace))) { container->type = CMARK_NODE_HEADING; container->as.heading.level = lev; container->as.heading.setext = true; S_advance_offset(parser, &input, input.len - 1 - parser->offset, false); } else if (!indented && !(container->type == CMARK_NODE_PARAGRAPH && !all_matched) && (matched = scan_thematic_break(&input, parser->first_nonspace))) { // it's only now that we know the line is not part of a setext heading: container = add_child(parser, container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); S_advance_offset(parser, &input, input.len - 1 - parser->offset, false); } else if ((matched = parse_list_marker(&input, parser->first_nonspace, &data)) && (!indented || container->type == CMARK_NODE_LIST)) { // Note that we can have new list items starting with >= 4 // spaces indent, as long as the list container is still open. // compute padding: S_advance_offset(parser, &input, parser->first_nonspace + matched - parser->offset, false); i = 0; while (i <= 5 && peek_at(&input, parser->offset + i) == ' ') { i++; } // i = number of spaces after marker, up to 5 if (i >= 5 || i < 1 || S_is_line_end_char(peek_at(&input, parser->offset))) { data->padding = matched + 1; if (i > 0) { S_advance_offset(parser, &input, 1, false); } } else { data->padding = matched + i; S_advance_offset(parser, &input, i, true); } // check container; if it's a list, see if this list item // can continue the list; otherwise, create a list container. data->marker_offset = parser->indent; if (container->type != CMARK_NODE_LIST || !lists_match(&container->as.list, data)) { container = add_child(parser, container, CMARK_NODE_LIST, parser->first_nonspace + 1); memcpy(&container->as.list, data, sizeof(*data)); } // add the list item container = add_child(parser, container, CMARK_NODE_ITEM, parser->first_nonspace + 1); /* TODO: static */ memcpy(&container->as.list, data, sizeof(*data)); free(data); } else if (indented && !maybe_lazy && !parser->blank) { S_advance_offset(parser, &input, CODE_INDENT, true); container = add_child(parser, container, CMARK_NODE_CODE_BLOCK, parser->offset + 1); container->as.code.fenced = false; container->as.code.fence_char = 0; container->as.code.fence_length = 0; container->as.code.fence_offset = 0; container->as.code.info = cmark_chunk_literal(""); } else { break; } if (accepts_lines(container->type)) { // if it's a line container, it can't contain other containers break; } maybe_lazy = false; } // what remains at parser->offset is a text line. add the text to the // appropriate container. S_find_first_nonspace(parser, &input); if (parser->blank && container->last_child) { container->last_child->last_line_blank = true; } // block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. container->last_line_blank = (parser->blank && container->type != CMARK_NODE_BLOCK_QUOTE && container->type != CMARK_NODE_HEADING && container->type != CMARK_NODE_THEMATIC_BREAK && !(container->type == CMARK_NODE_CODE_BLOCK && container->as.code.fenced) && !(container->type == CMARK_NODE_ITEM && container->first_child == NULL && container->start_line == parser->line_number)); cmark_node *cont = container; while (cont->parent) { cont->parent->last_line_blank = false; cont = cont->parent; } if (parser->current != last_matched_container && container == last_matched_container && !parser->blank && parser->current->type == CMARK_NODE_PARAGRAPH && cmark_strbuf_len(&parser->current->string_content) > 0) { add_line(parser->current, &input, parser->offset); } else { // not a lazy continuation // finalize any blocks that were not matched and set cur to container: while (parser->current != last_matched_container) { parser->current = finalize(parser, parser->current); assert(parser->current != NULL); } if (container->type == CMARK_NODE_CODE_BLOCK) { add_line(container, &input, parser->offset); } else if (container->type == CMARK_NODE_HTML_BLOCK) { add_line(container, &input, parser->offset); int matches_end_condition; switch (container->as.html_block_type) { case 1: // </script>, </style>, </pre> matches_end_condition = scan_html_block_end_1(&input, parser->first_nonspace); break; case 2: // --> matches_end_condition = scan_html_block_end_2(&input, parser->first_nonspace); break; case 3: // ?> matches_end_condition = scan_html_block_end_3(&input, parser->first_nonspace); break; case 4: // > matches_end_condition = scan_html_block_end_4(&input, parser->first_nonspace); break; case 5: // ]]> matches_end_condition = scan_html_block_end_5(&input, parser->first_nonspace); break; default: matches_end_condition = 0; break; } if (matches_end_condition) { container = finalize(parser, container); assert(parser->current != NULL); } } else if (parser->blank) { // ??? do nothing } else if (accepts_lines(container->type)) { if (container->type == CMARK_NODE_HEADING && container->as.heading.setext == false) { chop_trailing_hashtags(&input); } add_line(container, &input, parser->first_nonspace); } else { // create paragraph container for line container = add_child(parser, container, CMARK_NODE_PARAGRAPH, parser->first_nonspace + 1); add_line(container, &input, parser->first_nonspace); } parser->current = container; } finished: parser->last_line_length = input.len; if (parser->last_line_length && input.data[parser->last_line_length - 1] == '\n') parser->last_line_length -= 1; if (parser->last_line_length && input.data[parser->last_line_length - 1] == '\r') parser->last_line_length -= 1; cmark_strbuf_clear(parser->curline); }
// Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos, cmark_list **dataptr) { unsigned char c; bufsize_t startpos; cmark_list *data; startpos = pos; c = peek_at(input, pos); if (c == '*' || c == '-' || c == '+') { pos++; if (!cmark_isspace(peek_at(input, pos))) { return 0; } data = (cmark_list *)calloc(1, sizeof(*data)); if (data == NULL) { return 0; } else { data->marker_offset = 0; // will be adjusted later data->list_type = CMARK_BULLET_LIST; data->bullet_char = c; data->start = 1; data->delimiter = CMARK_PERIOD_DELIM; data->tight = false; } } else if (cmark_isdigit(c)) { int start = 0; int digits = 0; do { start = (10 * start) + (peek_at(input, pos) - '0'); pos++; digits++; // We limit to 9 digits to avoid overflow, // assuming max int is 2^31 - 1 // This also seems to be the limit for 'start' in some browsers. } while (digits < 9 && cmark_isdigit(peek_at(input, pos))); c = peek_at(input, pos); if (c == '.' || c == ')') { pos++; if (!cmark_isspace(peek_at(input, pos))) { return 0; } data = (cmark_list *)calloc(1, sizeof(*data)); if (data == NULL) { return 0; } else { data->marker_offset = 0; // will be adjusted later data->list_type = CMARK_ORDERED_LIST; data->bullet_char = 0; data->start = start; data->delimiter = (c == '.' ? CMARK_PERIOD_DELIM : CMARK_PAREN_DELIM); data->tight = false; } } else { return 0; } } else { return 0; } *dataptr = data; return (pos - startpos); }
// Return a link, an image, or a literal close bracket. static cmark_node *handle_close_bracket(subject *subj) { bufsize_t initial_pos, after_link_text_pos; bufsize_t endurl, starttitle, endtitle, endall; bufsize_t sps, n; cmark_reference *ref = NULL; cmark_chunk url_chunk, title_chunk; cmark_chunk url, title; bracket *opener; cmark_node *inl; cmark_chunk raw_label; int found_label; cmark_node *tmp, *tmpnext; bool is_image; advance(subj); // advance past ] initial_pos = subj->pos; // get last [ or ![ opener = subj->last_bracket; if (opener == NULL) { return make_str(subj->mem, cmark_chunk_literal("]")); } if (!opener->active) { // take delimiter off stack pop_bracket(subj); return make_str(subj->mem, cmark_chunk_literal("]")); } // If we got here, we matched a potential link/image text. // Now we check to see if it's a link/image. is_image = opener->image; after_link_text_pos = subj->pos; // First, look for an inline link. if (peek_char(subj) == '(' && ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, &url_chunk)) > -1)) { // try to parse an explicit link: endurl = subj->pos + 1 + sps + n; starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : starttitle + scan_link_title(&subj->input, starttitle); endall = endtitle + scan_spacechars(&subj->input, endtitle); if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); url = cmark_clean_url(subj->mem, &url_chunk); title = cmark_clean_title(subj->mem, &title_chunk); cmark_chunk_free(subj->mem, &url_chunk); cmark_chunk_free(subj->mem, &title_chunk); goto match; } else { // it could still be a shortcut reference link subj->pos = after_link_text_pos; } } // Next, look for a following [link label] that matches in refmap. // skip spaces raw_label = cmark_chunk_literal(""); found_label = link_label(subj, &raw_label); if (!found_label) { // If we have a shortcut reference link, back up // to before the spacse we skipped. subj->pos = initial_pos; } if ((!found_label || raw_label.len == 0) && !opener->bracket_after) { cmark_chunk_free(subj->mem, &raw_label); raw_label = cmark_chunk_dup(&subj->input, opener->position, initial_pos - opener->position - 1); found_label = true; } if (found_label) { ref = cmark_reference_lookup(subj->refmap, &raw_label); cmark_chunk_free(subj->mem, &raw_label); } if (ref != NULL) { // found url = chunk_clone(subj->mem, &ref->url); title = chunk_clone(subj->mem, &ref->title); goto match; } else { goto noMatch; } noMatch: // If we fall through to here, it means we didn't match a link: pop_bracket(subj); // remove this opener from delimiter list subj->pos = initial_pos; return make_str(subj->mem, cmark_chunk_literal("]")); match: inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK); inl->as.link.url = url; inl->as.link.title = title; cmark_node_insert_before(opener->inl_text, inl); // Add link text: tmp = opener->inl_text->next; while (tmp) { tmpnext = tmp->next; cmark_node_append_child(inl, tmp); tmp = tmpnext; } // Free the bracket [: cmark_node_free(opener->inl_text); process_emphasis(subj, opener->previous_delimiter); pop_bracket(subj); // Now, if we have a link, we also want to deactivate earlier link // delimiters. (This code can be removed if we decide to allow links // inside links.) if (!is_image) { opener = subj->last_bracket; while (opener != NULL) { if (!opener->image) { if (!opener->active) { break; } else { opener->active = false; } } opener = opener->previous; } } return NULL; }