void mp_obj_exception_add_traceback(mp_obj_t self_in, qstr file, size_t line, qstr block) { GET_NATIVE_EXCEPTION(self, self_in); // append this traceback info to traceback data // if memory allocation fails (eg because gc is locked), just return if (self->traceback_data == NULL) { self->traceback_data = m_new_maybe(size_t, 3); if (self->traceback_data == NULL) { return; } self->traceback_alloc = 3; self->traceback_len = 0; } else if (self->traceback_len + 3 > self->traceback_alloc) { // be conservative with growing traceback data size_t *tb_data = m_renew_maybe(size_t, self->traceback_data, self->traceback_alloc, self->traceback_alloc + 3, true); if (tb_data == NULL) { return; } self->traceback_data = tb_data; self->traceback_alloc += 3; } size_t *tb_data = &self->traceback_data[self->traceback_len]; self->traceback_len += 3; tb_data[0] = file; tb_data[1] = line; tb_data[2] = block; }
STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) { // use a custom memory allocator to store parse nodes sequentially in large chunks mp_parse_chunk_t *chunk = parser->cur_chunk; if (chunk != NULL && chunk->union_.used + num_bytes > chunk->alloc) { // not enough room at end of previously allocated chunk so try to grow mp_parse_chunk_t *new_data = (mp_parse_chunk_t*)m_renew_maybe(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc, sizeof(mp_parse_chunk_t) + chunk->alloc + num_bytes, false); if (new_data == NULL) { // could not grow existing memory; shrink it to fit previous (void)m_renew_maybe(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc, sizeof(mp_parse_chunk_t) + chunk->union_.used, false); chunk->alloc = chunk->union_.used; chunk->union_.next = parser->tree.chunk; parser->tree.chunk = chunk; chunk = NULL; } else { // could grow existing memory chunk->alloc += num_bytes; } } if (chunk == NULL) { // no previous chunk, allocate a new chunk size_t alloc = MICROPY_ALLOC_PARSE_CHUNK_INIT; if (alloc < num_bytes) { alloc = num_bytes; } chunk = (mp_parse_chunk_t*)m_new(byte, sizeof(mp_parse_chunk_t) + alloc); chunk->alloc = alloc; chunk->union_.used = 0; parser->cur_chunk = chunk; } byte *ret = chunk->data + chunk->union_.used; chunk->union_.used += num_bytes; return ret; }
void mp_obj_exception_add_traceback(mp_obj_t self_in, qstr file, size_t line, qstr block) { GET_NATIVE_EXCEPTION(self, self_in); // append this traceback info to traceback data // if memory allocation fails (eg because gc is locked), just return if (self->traceback_data == NULL) { self->traceback_data = m_new_maybe(size_t, TRACEBACK_ENTRY_LEN); if (self->traceback_data == NULL) { #if MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF if (mp_emergency_exception_buf_size >= EMG_TRACEBACK_ALLOC * sizeof(size_t)) { // There is room in the emergency buffer for traceback data size_t *tb = (size_t*)MP_STATE_VM(mp_emergency_exception_buf); self->traceback_data = tb; self->traceback_alloc = EMG_TRACEBACK_ALLOC; } else { // Can't allocate and no room in emergency buffer return; } #else // Can't allocate return; #endif } else { // Allocated the traceback data on the heap self->traceback_alloc = TRACEBACK_ENTRY_LEN; } self->traceback_len = 0; } else if (self->traceback_len + TRACEBACK_ENTRY_LEN > self->traceback_alloc) { #if MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF if (self->traceback_data == (size_t*)MP_STATE_VM(mp_emergency_exception_buf)) { // Can't resize the emergency buffer return; } #endif // be conservative with growing traceback data size_t *tb_data = m_renew_maybe(size_t, self->traceback_data, self->traceback_alloc, self->traceback_alloc + TRACEBACK_ENTRY_LEN, true); if (tb_data == NULL) { return; } self->traceback_data = tb_data; self->traceback_alloc += TRACEBACK_ENTRY_LEN; } size_t *tb_data = &self->traceback_data[self->traceback_len]; self->traceback_len += TRACEBACK_ENTRY_LEN; tb_data[0] = file; tb_data[1] = line; tb_data[2] = block; }
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) { if (parser->parse_error) { return; } if (parser->result_stack_top >= parser->result_stack_alloc) { mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true); if (stack == NULL) { parser->parse_error = PARSE_ERROR_MEMORY; return; } parser->result_stack = stack; parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC; } parser->result_stack[parser->result_stack_top++] = pn; }
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) { if (parser->had_memory_error) { return; } if (parser->result_stack_top >= parser->result_stack_alloc) { mp_parse_node_t *pn = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC); if (pn == NULL) { memory_error(parser); return; } parser->result_stack = pn; parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC; } parser->result_stack[parser->result_stack_top++] = pn; }
STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t arg_i) { if (parser->parse_error) { return; } if (parser->rule_stack_top >= parser->rule_stack_alloc) { rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true); if (rs == NULL) { parser->parse_error = PARSE_ERROR_MEMORY; return; } parser->rule_stack = rs; parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC; } rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++]; rs->src_line = src_line; rs->rule_id = rule->rule_id; rs->arg_i = arg_i; }
STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) { if (parser->had_memory_error) { return; } if (parser->rule_stack_top >= parser->rule_stack_alloc) { rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true); if (rs == NULL) { memory_error(parser); return; } parser->rule_stack = rs; parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC; } rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++]; rs->src_line = src_line; rs->rule_id = rule->rule_id; rs->arg_i = arg_i; }
STATIC void exc_add_strn(void *data, const char *str, size_t len) { struct _exc_printer_t *pr = data; if (pr->len + len >= pr->alloc) { // Not enough room for data plus a null byte so try to grow the buffer if (pr->allow_realloc) { size_t new_alloc = pr->alloc + len + 16; byte *new_buf = m_renew_maybe(byte, pr->buf, pr->alloc, new_alloc, true); if (new_buf == NULL) { pr->allow_realloc = false; len = pr->alloc - pr->len - 1; } else { pr->alloc = new_alloc; pr->buf = new_buf; } } else { len = pr->alloc - pr->len - 1; } } memcpy(pr->buf + pr->len, str, len); pr->len += len; }
mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { // initialise parser and allocate memory for its stacks parser_t parser; parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT; parser.rule_stack_top = 0; parser.rule_stack = m_new(rule_stack_t, parser.rule_stack_alloc); parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT; parser.result_stack_top = 0; parser.result_stack = m_new(mp_parse_node_t, parser.result_stack_alloc); parser.lexer = lex; parser.tree.chunk = NULL; parser.cur_chunk = NULL; #if MICROPY_COMP_CONST mp_map_init(&parser.consts, 0); #endif // work out the top-level rule to use, and push it on the stack size_t top_level_rule; switch (input_kind) { case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break; case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break; default: top_level_rule = RULE_file_input; } push_rule(&parser, lex->tok_line, rules[top_level_rule], 0); // parse! size_t n, i; // state for the current rule size_t rule_src_line; // source line for the first token matched by the current rule bool backtrack = false; const rule_t *rule = NULL; for (;;) { next_rule: if (parser.rule_stack_top == 0) { break; } pop_rule(&parser, &rule, &i, &rule_src_line); n = rule->act & RULE_ACT_ARG_MASK; /* // debugging printf("depth=%d ", parser.rule_stack_top); for (int j = 0; j < parser.rule_stack_top; ++j) { printf(" "); } printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack); */ switch (rule->act & RULE_ACT_KIND_MASK) { case RULE_ACT_OR: if (i > 0 && !backtrack) { goto next_rule; } else { backtrack = false; } for (; i < n; ++i) { uint16_t kind = rule->arg[i] & RULE_ARG_KIND_MASK; if (kind == RULE_ARG_TOK) { if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) { push_result_token(&parser, rule); mp_lexer_to_next(lex); goto next_rule; } } else { assert(kind == RULE_ARG_RULE); if (i + 1 < n) { push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule } push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule goto next_rule; } } backtrack = true; break; case RULE_ACT_AND: { // failed, backtrack if we can, else syntax error if (backtrack) { assert(i > 0); if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) { // an optional rule that failed, so continue with next arg push_result_node(&parser, MP_PARSE_NODE_NULL); backtrack = false; } else { // a mandatory rule that failed, so propagate backtrack if (i > 1) { // already eaten tokens so can't backtrack goto syntax_error; } else { goto next_rule; } } } // progress through the rule for (; i < n; ++i) { if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { // need to match a token mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK; if (lex->tok_kind == tok_kind) { // matched token if (tok_kind == MP_TOKEN_NAME) { push_result_token(&parser, rule); } mp_lexer_to_next(lex); } else { // failed to match token if (i > 0) { // already eaten tokens so can't backtrack goto syntax_error; } else { // this rule failed, so backtrack backtrack = true; goto next_rule; } } } else { push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule goto next_rule; } } assert(i == n); // matched the rule, so now build the corresponding parse_node #if !MICROPY_ENABLE_DOC_STRING // this code discards lonely statements, such as doc strings if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) { mp_parse_node_t p = peek_result(&parser, 1); if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_const_object)) { pop_result(&parser); // MP_PARSE_NODE_NULL pop_result(&parser); // const expression (leaf or RULE_const_object) // Pushing the "pass" rule here will overwrite any RULE_const_object // entry that was on the result stack, allowing the GC to reclaim // the memory from the const object when needed. push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0); break; } } #endif // count number of arguments for the parse node i = 0; size_t num_not_nil = 0; for (size_t x = n; x > 0;) { --x; if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK; if (tok_kind == MP_TOKEN_NAME) { // only tokens which were names are pushed to stack i += 1; num_not_nil += 1; } } else { // rules are always pushed if (peek_result(&parser, i) != MP_PARSE_NODE_NULL) { num_not_nil += 1; } i += 1; } } if (num_not_nil == 1 && (rule->act & RULE_ACT_ALLOW_IDENT)) { // this rule has only 1 argument and should not be emitted mp_parse_node_t pn = MP_PARSE_NODE_NULL; for (size_t x = 0; x < i; ++x) { mp_parse_node_t pn2 = pop_result(&parser); if (pn2 != MP_PARSE_NODE_NULL) { pn = pn2; } } push_result_node(&parser, pn); } else { // this rule must be emitted if (rule->act & RULE_ACT_ADD_BLANK) { // and add an extra blank node at the end (used by the compiler to store data) push_result_node(&parser, MP_PARSE_NODE_NULL); i += 1; } push_result_rule(&parser, rule_src_line, rule, i); } break; } default: { assert((rule->act & RULE_ACT_KIND_MASK) == RULE_ACT_LIST); // n=2 is: item item* // n=1 is: item (sep item)* // n=3 is: item (sep item)* [sep] bool had_trailing_sep; if (backtrack) { list_backtrack: had_trailing_sep = false; if (n == 2) { if (i == 1) { // fail on item, first time round; propagate backtrack goto next_rule; } else { // fail on item, in later rounds; finish with this rule backtrack = false; } } else { if (i == 1) { // fail on item, first time round; propagate backtrack goto next_rule; } else if ((i & 1) == 1) { // fail on item, in later rounds; have eaten tokens so can't backtrack if (n == 3) { // list allows trailing separator; finish parsing list had_trailing_sep = true; backtrack = false; } else { // list doesn't allowing trailing separator; fail goto syntax_error; } } else { // fail on separator; finish parsing list backtrack = false; } } } else { for (;;) { size_t arg = rule->arg[i & 1 & n]; if ((arg & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) { if (i & 1 & n) { // separators which are tokens are not pushed to result stack } else { push_result_token(&parser, rule); } mp_lexer_to_next(lex); // got element of list, so continue parsing list i += 1; } else { // couldn't get element of list i += 1; backtrack = true; goto list_backtrack; } } else { assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE); push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule push_rule_from_arg(&parser, arg); // push child of list-rule goto next_rule; } } } assert(i >= 1); // compute number of elements in list, result in i i -= 1; if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { // don't count separators when they are tokens i = (i + 1) / 2; } if (i == 1) { // list matched single item if (had_trailing_sep) { // if there was a trailing separator, make a list of a single item push_result_rule(&parser, rule_src_line, rule, i); } else { // just leave single item on stack (ie don't wrap in a list) } } else { push_result_rule(&parser, rule_src_line, rule, i); } break; } } } #if MICROPY_COMP_CONST mp_map_deinit(&parser.consts); #endif // truncate final chunk and link into chain of chunks if (parser.cur_chunk != NULL) { (void)m_renew_maybe(byte, parser.cur_chunk, sizeof(mp_parse_chunk_t) + parser.cur_chunk->alloc, sizeof(mp_parse_chunk_t) + parser.cur_chunk->union_.used, false); parser.cur_chunk->alloc = parser.cur_chunk->union_.used; parser.cur_chunk->union_.next = parser.tree.chunk; parser.tree.chunk = parser.cur_chunk; } if ( lex->tok_kind != MP_TOKEN_END // check we are at the end of the token stream || parser.result_stack_top == 0 // check that we got a node (can fail on empty input) ) { syntax_error:; mp_obj_t exc; if (lex->tok_kind == MP_TOKEN_INDENT) { exc = mp_obj_new_exception_msg(&mp_type_IndentationError, "unexpected indent"); } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) { exc = mp_obj_new_exception_msg(&mp_type_IndentationError, "unindent does not match any outer indentation level"); } else { exc = mp_obj_new_exception_msg(&mp_type_SyntaxError, "invalid syntax"); } // add traceback to give info about file name and location // we don't have a 'block' name, so just pass the NULL qstr to indicate this mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL); nlr_raise(exc); } // get the root parse node that we created assert(parser.result_stack_top == 1); parser.tree.root = parser.result_stack[0]; // free the memory that we don't need anymore m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc); m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc); // we also free the lexer on behalf of the caller mp_lexer_free(lex); return parser.tree; }