STATIC void push_result_token(parser_t *parser, const rule_t *rule) { mp_parse_node_t pn; mp_lexer_t *lex = parser->lexer; if (lex->tok_kind == MP_TOKEN_NAME) { qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len); #if MICROPY_COMP_CONST // if name is a standalone identifier, look it up in the table of dynamic constants mp_map_elem_t *elem; if (rule->rule_id == RULE_atom && (elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP)) != NULL) { if (MP_OBJ_IS_SMALL_INT(elem->value)) { pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(elem->value)); } else { pn = make_node_const_object(parser, lex->tok_line, elem->value); } } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id); } #else (void)rule; pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id); #endif } else if (lex->tok_kind == MP_TOKEN_INTEGER) { mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex); if (MP_OBJ_IS_SMALL_INT(o)) { pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(o)); } else { pn = make_node_const_object(parser, lex->tok_line, o); } } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) { mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex); pn = make_node_const_object(parser, lex->tok_line, o); } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) { // Don't automatically intern all strings/bytes. doc strings (which are usually large) // will be discarded by the compiler, and so we shouldn't intern them. qstr qst = MP_QSTR_NULL; if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) { // intern short strings qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len); } else { // check if this string is already interned qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len); } if (qst != MP_QSTR_NULL) { // qstr exists, make a leaf node pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst); } else { // not interned, make a node holding a pointer to the string/bytes object mp_obj_t o = mp_obj_new_str_of_type( lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes, (const byte*)lex->vstr.buf, lex->vstr.len); pn = make_node_const_object(parser, lex->tok_line, o); } } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind); } push_result_node(parser, pn); }
STATIC bool fold_logical_constants(parser_t *parser, const rule_t *rule, size_t *num_args) { if (rule->rule_id == RULE_or_test || rule->rule_id == RULE_and_test) { // folding for binary logical ops: or and size_t copy_to = *num_args; for (size_t i = copy_to; i > 0;) { mp_parse_node_t pn = peek_result(parser, --i); parser->result_stack[parser->result_stack_top - copy_to] = pn; if (i == 0) { // always need to keep the last value break; } if (rule->rule_id == RULE_or_test) { if (mp_parse_node_is_const_true(pn)) { // break; } else if (!mp_parse_node_is_const_false(pn)) { copy_to -= 1; } } else { // RULE_and_test if (mp_parse_node_is_const_false(pn)) { break; } else if (!mp_parse_node_is_const_true(pn)) { copy_to -= 1; } } } copy_to -= 1; // copy_to now contains number of args to pop // pop and discard all the short-circuited expressions for (size_t i = 0; i < copy_to; ++i) { pop_result(parser); } *num_args -= copy_to; // we did a complete folding if there's only 1 arg left return *num_args == 1; } else if (rule->rule_id == RULE_not_test_2) { // folding for unary logical op: not mp_parse_node_t pn = peek_result(parser, 0); if (mp_parse_node_is_const_false(pn)) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, MP_TOKEN_KW_TRUE); } else if (mp_parse_node_is_const_true(pn)) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, MP_TOKEN_KW_FALSE); } else { return false; } pop_result(parser); push_result_node(parser, pn); return true; } return false; }
static void push_result_token(parser_t *parser, const mp_lexer_t *lex) { const mp_token_t *tok = mp_lexer_cur(lex); mp_parse_node_t pn; if (tok->kind == MP_TOKEN_NAME) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len)); } else if (tok->kind == MP_TOKEN_NUMBER) { bool dec = false; bool small_int = true; machine_int_t int_val = 0; int len = tok->len; const char *str = tok->str; int base = 10; int i = 0; if (len >= 3 && str[0] == '0') { if (str[1] == 'o' || str[1] == 'O') { // octal base = 8; i = 2; } else if (str[1] == 'x' || str[1] == 'X') { // hexadecimal base = 16; i = 2; } else if (str[1] == 'b' || str[1] == 'B') { // binary base = 2; i = 2; } } bool overflow = false; for (; i < len; i++) { machine_int_t old_val = int_val; if (unichar_isdigit(str[i]) && str[i] - '0' < base) { int_val = base * int_val + str[i] - '0'; } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') { int_val = base * int_val + str[i] - 'a' + 10; } else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') { int_val = base * int_val + str[i] - 'A' + 10; } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') { dec = true; break; } else { small_int = false; break; } if (int_val < old_val) { // If new value became less than previous, it's overflow overflow = true; } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) { // If signed number changed sign - it's overflow overflow = true; } } if (dec) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len)); } else if (small_int && !overflow && MP_FIT_SMALL_INT(int_val)) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val); } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len)); } } else if (tok->kind == MP_TOKEN_STRING) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len)); } else if (tok->kind == MP_TOKEN_BYTES) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len)); } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind); } push_result_node(parser, pn); }
STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args) { // this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4 // it does not do partial folding, eg 1 + 2 + x -> 3 + x mp_obj_t arg0; if (rule->rule_id == RULE_expr || rule->rule_id == RULE_xor_expr || rule->rule_id == RULE_and_expr) { // folding for binary ops: | ^ & mp_parse_node_t pn = peek_result(parser, num_args - 1); if (!mp_parse_node_get_int_maybe(pn, &arg0)) { return false; } mp_binary_op_t op; if (rule->rule_id == RULE_expr) { op = MP_BINARY_OP_OR; } else if (rule->rule_id == RULE_xor_expr) { op = MP_BINARY_OP_XOR; } else { op = MP_BINARY_OP_AND; } for (ssize_t i = num_args - 2; i >= 0; --i) { pn = peek_result(parser, i); mp_obj_t arg1; if (!mp_parse_node_get_int_maybe(pn, &arg1)) { return false; } arg0 = mp_binary_op(op, arg0, arg1); } } else if (rule->rule_id == RULE_shift_expr || rule->rule_id == RULE_arith_expr || rule->rule_id == RULE_term) { // folding for binary ops: << >> + - * / % // mp_parse_node_t pn = peek_result(parser, num_args - 1); if (!mp_parse_node_get_int_maybe(pn, &arg0)) { return false; } for (ssize_t i = num_args - 2; i >= 1; i -= 2) { pn = peek_result(parser, i - 1); mp_obj_t arg1; if (!mp_parse_node_get_int_maybe(pn, &arg1)) { return false; } mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i)); static const uint8_t token_to_op[] = { MP_BINARY_OP_ADD, MP_BINARY_OP_SUBTRACT, MP_BINARY_OP_MULTIPLY, 255,//MP_BINARY_OP_POWER, 255,//MP_BINARY_OP_TRUE_DIVIDE, MP_BINARY_OP_FLOOR_DIVIDE, MP_BINARY_OP_MODULO, 255,//MP_BINARY_OP_LESS MP_BINARY_OP_LSHIFT, 255,//MP_BINARY_OP_MORE MP_BINARY_OP_RSHIFT, }; mp_binary_op_t op = token_to_op[tok - MP_TOKEN_OP_PLUS]; if (op == (mp_binary_op_t)255) { return false; } int rhs_sign = mp_obj_int_sign(arg1); if (op <= MP_BINARY_OP_RSHIFT) { // << and >> can't have negative rhs if (rhs_sign < 0) { return false; } } else if (op >= MP_BINARY_OP_FLOOR_DIVIDE) { // % and // can't have zero rhs if (rhs_sign == 0) { return false; } } arg0 = mp_binary_op(op, arg0, arg1); } } else if (rule->rule_id == RULE_factor_2) { // folding for unary ops: + - ~ mp_parse_node_t pn = peek_result(parser, 0); if (!mp_parse_node_get_int_maybe(pn, &arg0)) { return false; } mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1)); mp_unary_op_t op; if (tok == MP_TOKEN_OP_PLUS) { op = MP_UNARY_OP_POSITIVE; } else if (tok == MP_TOKEN_OP_MINUS) { op = MP_UNARY_OP_NEGATIVE; } else { assert(tok == MP_TOKEN_OP_TILDE); // should be op = MP_UNARY_OP_INVERT; } arg0 = mp_unary_op(op, arg0); #if MICROPY_COMP_CONST } else if (rule->rule_id == RULE_expr_stmt) { mp_parse_node_t pn1 = peek_result(parser, 0); if (!MP_PARSE_NODE_IS_NULL(pn1) && !(MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_augassign) || MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_assign_list))) { // this node is of the form <x> = <y> mp_parse_node_t pn0 = peek_result(parser, 1); if (MP_PARSE_NODE_IS_ID(pn0) && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_atom_expr_normal) && MP_PARSE_NODE_IS_ID(((mp_parse_node_struct_t*)pn1)->nodes[0]) && MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn1)->nodes[0]) == MP_QSTR_const && MP_PARSE_NODE_IS_STRUCT_KIND(((mp_parse_node_struct_t*)pn1)->nodes[1], RULE_trailer_paren) ) { // code to assign dynamic constants: id = const(value) // get the id qstr id = MP_PARSE_NODE_LEAF_ARG(pn0); // get the value mp_parse_node_t pn_value = ((mp_parse_node_struct_t*)((mp_parse_node_struct_t*)pn1)->nodes[1])->nodes[0]; if (!MP_PARSE_NODE_IS_SMALL_INT(pn_value)) { parser->parse_error = PARSE_ERROR_CONST; return false; } mp_int_t value = MP_PARSE_NODE_LEAF_SMALL_INT(pn_value); // store the value in the table of dynamic constants mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND); assert(elem->value == MP_OBJ_NULL); elem->value = MP_OBJ_NEW_SMALL_INT(value); // If the constant starts with an underscore then treat it as a private // variable and don't emit any code to store the value to the id. if (qstr_str(id)[0] == '_') { pop_result(parser); // pop const(value) pop_result(parser); // pop id push_result_rule(parser, 0, rules[RULE_pass_stmt], 0); // replace with "pass" return true; } // replace const(value) with value pop_result(parser); push_result_node(parser, pn_value); // finished folding this assignment, but we still want it to be part of the tree return false; } } return false; #endif #if MICROPY_COMP_MODULE_CONST } else if (rule->rule_id == RULE_atom_expr_normal) { mp_parse_node_t pn0 = peek_result(parser, 1); mp_parse_node_t pn1 = peek_result(parser, 0); if (!(MP_PARSE_NODE_IS_ID(pn0) && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_trailer_period))) { return false; } // id1.id2 // look it up in constant table, see if it can be replaced with an integer mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t*)pn1; assert(MP_PARSE_NODE_IS_ID(pns1->nodes[0])); qstr q_base = MP_PARSE_NODE_LEAF_ARG(pn0); qstr q_attr = MP_PARSE_NODE_LEAF_ARG(pns1->nodes[0]); mp_map_elem_t *elem = mp_map_lookup((mp_map_t*)&mp_constants_map, MP_OBJ_NEW_QSTR(q_base), MP_MAP_LOOKUP); if (elem == NULL) { return false; } mp_obj_t dest[2]; mp_load_method_maybe(elem->value, q_attr, dest); if (!(dest[0] != MP_OBJ_NULL && MP_OBJ_IS_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) { return false; } arg0 = dest[0]; #endif } else { return false; } // success folding this rule for (size_t i = num_args; i > 0; i--) { pop_result(parser); } if (MP_OBJ_IS_SMALL_INT(arg0)) { push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(arg0))); } else { // TODO reuse memory for parse node struct? push_result_node(parser, make_node_const_object(parser, 0, arg0)); } return true; }
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) { const mp_token_t *tok = mp_lexer_cur(lex); mp_parse_node_t pn; if (tok->kind == MP_TOKEN_NAME) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len)); } else if (tok->kind == MP_TOKEN_NUMBER) { bool dec = false; bool small_int = true; machine_int_t int_val = 0; int len = tok->len; const char *str = tok->str; int base = 0; int i = mp_parse_num_base(str, len, &base); bool overflow = false; for (; i < len; i++) { int dig; if (unichar_isdigit(str[i]) && str[i] - '0' < base) { dig = str[i] - '0'; } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') { dig = str[i] - 'a' + 10; } else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') { dig = str[i] - 'A' + 10; } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') { dec = true; break; } else { small_int = false; break; } // add next digi and check for overflow if (mp_small_int_mul_overflow(int_val, base)) { overflow = true; } int_val = int_val * base + dig; if (!MP_SMALL_INT_FITS(int_val)) { overflow = true; } } if (dec) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len)); } else if (small_int && !overflow && MP_SMALL_INT_FITS(int_val)) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val); } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len)); } } else if (tok->kind == MP_TOKEN_STRING) { // Don't automatically intern all strings. doc strings (which are usually large) // will be discarded by the compiler, and so we shouldn't intern them. qstr qst = MP_QSTR_NULL; if (tok->len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) { // intern short strings qst = qstr_from_strn(tok->str, tok->len); } else { // check if this string is already interned qst = qstr_find_strn(tok->str, tok->len); } if (qst != MP_QSTR_NULL) { // qstr exists, make a leaf node pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst); } else { // not interned, make a node holding a pointer to the string data push_result_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len); return; } } else if (tok->kind == MP_TOKEN_BYTES) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len)); } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind); } push_result_node(parser, pn); }