void mp_parse_node_show(mp_parse_node_t pn, int indent) { for (int i = 0; i < indent; i++) { printf(" "); } if (MP_PARSE_NODE_IS_NULL(pn)) { printf("NULL\n"); } else if (MP_PARSE_NODE_IS_LEAF(pn)) { int arg = MP_PARSE_NODE_LEAF_ARG(pn); switch (MP_PARSE_NODE_LEAF_KIND(pn)) { case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_SMALL_INT: printf("int(%d)\n", arg); break; case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_TOKEN: printf("tok(%d)\n", arg); break; default: assert(0); } } else { mp_parse_node_struct_t *pns2 = (mp_parse_node_struct_t*)pn; int n = pns2->kind_num_nodes >> 8; #ifdef USE_RULE_NAME printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns2), n); #else printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns2), n); #endif for (int i = 0; i < n; i++) { mp_parse_node_show(pns2->nodes[i], indent + 2); } } }
STATIC mp_uint_t get_arg_reglist(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn) { // a register list looks like {r0, r1, r2} and is parsed as a Python set if (!MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_atom_brace)) { goto bad_arg; } mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; assert(MP_PARSE_NODE_STRUCT_NUM_NODES(pns) == 1); // should always be pn = pns->nodes[0]; mp_uint_t reglist = 0; if (MP_PARSE_NODE_IS_ID(pn)) { // set with one element reglist |= 1 << get_arg_reg(emit, op, pn, 15); } else if (MP_PARSE_NODE_IS_STRUCT(pn)) { pns = (mp_parse_node_struct_t*)pn; if (MP_PARSE_NODE_STRUCT_KIND(pns) == PN_dictorsetmaker) { assert(MP_PARSE_NODE_IS_STRUCT(pns->nodes[1])); // should succeed mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t*)pns->nodes[1]; if (MP_PARSE_NODE_STRUCT_KIND(pns1) == PN_dictorsetmaker_list) { // set with multiple elements // get first element of set (we rely on get_arg_reg to catch syntax errors) reglist |= 1 << get_arg_reg(emit, op, pns->nodes[0], 15); // get tail elements (2nd, 3rd, ...) mp_parse_node_t *nodes; int n = mp_parse_node_extract_list(&pns1->nodes[0], PN_dictorsetmaker_list2, &nodes); // process rest of elements for (int i = 0; i < n; i++) { reglist |= 1 << get_arg_reg(emit, op, nodes[i], 15); } } else { goto bad_arg; } } else { goto bad_arg; } } else { goto bad_arg; } return reglist; bad_arg: emit_inline_thumb_error_exc(emit, mp_obj_new_exception_msg_varg(&mp_type_SyntaxError, "'%s' expects {r0, r1, ...}", op)); return 0; }
void mp_parse_node_print(mp_parse_node_t pn, size_t indent) { if (MP_PARSE_NODE_IS_STRUCT(pn)) { printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line); } else { printf(" "); } for (size_t i = 0; i < indent; i++) { printf(" "); } if (MP_PARSE_NODE_IS_NULL(pn)) { printf("NULL\n"); } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) { mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn); printf("int(" INT_FMT ")\n", arg); } else if (MP_PARSE_NODE_IS_LEAF(pn)) { uintptr_t arg = MP_PARSE_NODE_LEAF_ARG(pn); switch (MP_PARSE_NODE_LEAF_KIND(pn)) { case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_TOKEN: printf("tok(%u)\n", (uint)arg); break; default: assert(0); } } else { // node must be a mp_parse_node_struct_t mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) { printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) { printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) { #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32)); #else printf("literal const(%p)\n", (mp_obj_t)pns->nodes[0]); #endif } else { size_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); #ifdef USE_RULE_NAME printf("%s(%u) (n=%u)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (uint)MP_PARSE_NODE_STRUCT_KIND(pns), (uint)n); #else printf("rule(%u) (n=%u)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), (uint)n); #endif for (size_t i = 0; i < n; i++) { mp_parse_node_print(pns->nodes[i], indent + 2); } } } }
void mp_parse_node_free(mp_parse_node_t pn) { if (MP_PARSE_NODE_IS_STRUCT(pn)) { mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); mp_uint_t rule_id = MP_PARSE_NODE_STRUCT_KIND(pns); if (rule_id == RULE_string || rule_id == RULE_bytes) { m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]); } else if (rule_id == RULE_const_object) {
void mp_parse_node_print(mp_parse_node_t pn, int indent) { if (MP_PARSE_NODE_IS_STRUCT(pn)) { printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line); } else { printf(" "); } for (int i = 0; i < indent; i++) { printf(" "); } if (MP_PARSE_NODE_IS_NULL(pn)) { printf("NULL\n"); } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) { machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn); printf("int(" INT_FMT ")\n", arg); } else if (MP_PARSE_NODE_IS_LEAF(pn)) { machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn); switch (MP_PARSE_NODE_LEAF_KIND(pn)) { case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break; case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break; default: assert(0); } } else { // node must be a mp_parse_node_struct_t mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) { printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); } else { uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); #ifdef USE_RULE_NAME printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n); #else printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n); #endif for (uint i = 0; i < n; i++) { mp_parse_node_print(pns->nodes[i], indent + 2); } } } }
int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes) { if (MP_PARSE_NODE_IS_NULL(*pn)) { *nodes = NULL; return 0; } else if (MP_PARSE_NODE_IS_LEAF(*pn)) { *nodes = pn; return 1; } else { mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)(*pn); if (MP_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) { *nodes = pn; return 1; } else { *nodes = pns->nodes; return MP_PARSE_NODE_STRUCT_NUM_NODES(pns); } } }
void mp_parse_node_free(mp_parse_node_t pn) { if (MP_PARSE_NODE_IS_STRUCT(pn)) { mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); mp_uint_t rule_id = MP_PARSE_NODE_STRUCT_KIND(pns); if (rule_id == RULE_string) { m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]); return; } bool adjust = ADD_BLANK_NODE(rule_id); if (adjust) { n--; } for (mp_uint_t i = 0; i < n; i++) { mp_parse_node_free(pns->nodes[i]); } if (adjust) { n++; } m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns); }
mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { // initialise parser and allocate memory for its stacks parser_t parser; parser.parse_error = PARSE_ERROR_NONE; parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT; parser.rule_stack_top = 0; parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc); parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT; parser.result_stack_top = 0; parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc); parser.lexer = lex; parser.tree.chunk = NULL; parser.cur_chunk = NULL; #if MICROPY_COMP_CONST mp_map_init(&parser.consts, 0); #endif // check if we could allocate the stacks if (parser.rule_stack == NULL || parser.result_stack == NULL) { goto memory_error; } // work out the top-level rule to use, and push it on the stack size_t top_level_rule; switch (input_kind) { case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break; case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break; default: top_level_rule = RULE_file_input; } push_rule(&parser, lex->tok_line, rules[top_level_rule], 0); // parse! size_t n, i; // state for the current rule size_t rule_src_line; // source line for the first token matched by the current rule bool backtrack = false; const rule_t *rule = NULL; for (;;) { next_rule: if (parser.rule_stack_top == 0 || parser.parse_error) { break; } pop_rule(&parser, &rule, &i, &rule_src_line); n = rule->act & RULE_ACT_ARG_MASK; /* // debugging printf("depth=%d ", parser.rule_stack_top); for (int j = 0; j < parser.rule_stack_top; ++j) { printf(" "); } printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack); */ switch (rule->act & RULE_ACT_KIND_MASK) { case RULE_ACT_OR: if (i > 0 && !backtrack) { goto next_rule; } else { backtrack = false; } for (; i < n; ++i) { uint16_t kind = rule->arg[i] & RULE_ARG_KIND_MASK; if (kind == RULE_ARG_TOK) { if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) { push_result_token(&parser); mp_lexer_to_next(lex); goto next_rule; } } else { assert(kind == RULE_ARG_RULE); if (i + 1 < n) { push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule } push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule goto next_rule; } } backtrack = true; break; case RULE_ACT_AND: { // failed, backtrack if we can, else syntax error if (backtrack) { assert(i > 0); if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) { // an optional rule that failed, so continue with next arg push_result_node(&parser, MP_PARSE_NODE_NULL); backtrack = false; } else { // a mandatory rule that failed, so propagate backtrack if (i > 1) { // already eaten tokens so can't backtrack goto syntax_error; } else { goto next_rule; } } } // progress through the rule for (; i < n; ++i) { switch (rule->arg[i] & RULE_ARG_KIND_MASK) { case RULE_ARG_TOK: { // need to match a token mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK; if (lex->tok_kind == tok_kind) { // matched token if (tok_kind == MP_TOKEN_NAME) { push_result_token(&parser); } mp_lexer_to_next(lex); } else { // failed to match token if (i > 0) { // already eaten tokens so can't backtrack goto syntax_error; } else { // this rule failed, so backtrack backtrack = true; goto next_rule; } } break; } case RULE_ARG_RULE: case RULE_ARG_OPT_RULE: rule_and_no_other_choice: push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule goto next_rule; default: assert(0); goto rule_and_no_other_choice; // to help flow control analysis } } assert(i == n); // matched the rule, so now build the corresponding parse_node #if !MICROPY_ENABLE_DOC_STRING // this code discards lonely statements, such as doc strings if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) { mp_parse_node_t p = peek_result(&parser, 1); if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) { pop_result(&parser); // MP_PARSE_NODE_NULL mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string if (MP_PARSE_NODE_IS_STRUCT(pn)) { mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) { m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]); } } push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0); break; } } #endif // count number of arguments for the parse node i = 0; size_t num_not_nil = 0; for (size_t x = n; x > 0;) { --x; if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK; if (tok_kind == MP_TOKEN_NAME) { // only tokens which were names are pushed to stack i += 1; num_not_nil += 1; } } else { // rules are always pushed if (peek_result(&parser, i) != MP_PARSE_NODE_NULL) { num_not_nil += 1; } i += 1; } } if (num_not_nil == 1 && (rule->act & RULE_ACT_ALLOW_IDENT)) { // this rule has only 1 argument and should not be emitted mp_parse_node_t pn = MP_PARSE_NODE_NULL; for (size_t x = 0; x < i; ++x) { mp_parse_node_t pn2 = pop_result(&parser); if (pn2 != MP_PARSE_NODE_NULL) { pn = pn2; } } push_result_node(&parser, pn); } else { // this rule must be emitted if (rule->act & RULE_ACT_ADD_BLANK) { // and add an extra blank node at the end (used by the compiler to store data) push_result_node(&parser, MP_PARSE_NODE_NULL); i += 1; } push_result_rule(&parser, rule_src_line, rule, i); } break; }