// This dispatcher function is expected to be independent of the implementation of long int STATIC mp_obj_t mp_obj_int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { mp_arg_check_num(n_args, n_kw, 0, 2, false); switch (n_args) { case 0: return MP_OBJ_NEW_SMALL_INT(0); case 1: if (MP_OBJ_IS_INT(args[0])) { // already an int (small or long), just return it return args[0]; } else if (MP_OBJ_IS_STR(args[0])) { // a string, parse it uint l; const char *s = mp_obj_str_get_data(args[0], &l); return mp_parse_num_integer(s, l, 0); #if MICROPY_PY_BUILTINS_FLOAT } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_float)) { return MP_OBJ_NEW_SMALL_INT((machine_int_t)(MICROPY_FLOAT_C_FUN(trunc)(mp_obj_float_get(args[0])))); #endif } else { // try to convert to small int (eg from bool) return MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(args[0])); } case 2: default: { // should be a string, parse it // TODO proper error checking of argument types uint l; const char *s = mp_obj_str_get_data(args[0], &l); return mp_parse_num_integer(s, l, mp_obj_get_int(args[1])); } } }
// This dispatcher function is expected to be independent of the implementation of long int STATIC mp_obj_t mp_obj_int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { // TODO check n_kw == 0 switch (n_args) { case 0: return MP_OBJ_NEW_SMALL_INT(0); case 1: if (MP_OBJ_IS_STR(args[0])) { // a string, parse it uint l; const char *s = mp_obj_str_get_data(args[0], &l); return mp_parse_num_integer(s, l, 0); #if MICROPY_ENABLE_FLOAT } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_float)) { return MP_OBJ_NEW_SMALL_INT((machine_int_t)(MICROPY_FLOAT_C_FUN(trunc)(mp_obj_float_get(args[0])))); #endif } else { return MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(args[0])); } case 2: { // should be a string, parse it // TODO proper error checking of argument types uint l; const char *s = mp_obj_str_get_data(args[0], &l); return mp_parse_num_integer(s, l, mp_obj_get_int(args[1])); } default: nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "int takes at most 2 arguments, %d given", n_args)); } }
// This dispatcher function is expected to be independent of the implementation of long int STATIC mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { (void)type_in; mp_arg_check_num(n_args, n_kw, 0, 2, false); switch (n_args) { case 0: return MP_OBJ_NEW_SMALL_INT(0); case 1: if (MP_OBJ_IS_INT(args[0])) { // already an int (small or long), just return it return args[0]; } else if (MP_OBJ_IS_STR_OR_BYTES(args[0])) { // a string, parse it size_t l; const char *s = mp_obj_str_get_data(args[0], &l); return mp_parse_num_integer(s, l, 0, NULL); #if MICROPY_PY_BUILTINS_FLOAT } else if (mp_obj_is_float(args[0])) { return mp_obj_new_int_from_float(mp_obj_float_get(args[0])); #endif } else { return mp_unary_op(MP_UNARY_OP_INT, args[0]); } case 2: default: { // should be a string, parse it size_t l; const char *s = mp_obj_str_get_data(args[0], &l); return mp_parse_num_integer(s, l, mp_obj_get_int(args[1]), NULL); } } }
STATIC void push_result_token(parser_t *parser, const rule_t *rule) { mp_parse_node_t pn; mp_lexer_t *lex = parser->lexer; if (lex->tok_kind == MP_TOKEN_NAME) { qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len); #if MICROPY_COMP_CONST // if name is a standalone identifier, look it up in the table of dynamic constants mp_map_elem_t *elem; if (rule->rule_id == RULE_atom && (elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP)) != NULL) { if (MP_OBJ_IS_SMALL_INT(elem->value)) { pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(elem->value)); } else { pn = make_node_const_object(parser, lex->tok_line, elem->value); } } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id); } #else (void)rule; pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id); #endif } else if (lex->tok_kind == MP_TOKEN_INTEGER) { mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex); if (MP_OBJ_IS_SMALL_INT(o)) { pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(o)); } else { pn = make_node_const_object(parser, lex->tok_line, o); } } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) { mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex); pn = make_node_const_object(parser, lex->tok_line, o); } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) { // Don't automatically intern all strings/bytes. doc strings (which are usually large) // will be discarded by the compiler, and so we shouldn't intern them. qstr qst = MP_QSTR_NULL; if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) { // intern short strings qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len); } else { // check if this string is already interned qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len); } if (qst != MP_QSTR_NULL) { // qstr exists, make a leaf node pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst); } else { // not interned, make a node holding a pointer to the string/bytes object mp_obj_t o = mp_obj_new_str_of_type( lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes, (const byte*)lex->vstr.buf, lex->vstr.len); pn = make_node_const_object(parser, lex->tok_line, o); } } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind); } push_result_node(parser, pn); }
STATIC machine_uint_t get_fmt_num(const char **p) { const char *num = *p; uint len = 1; while (unichar_isdigit(*++num)) { len++; } machine_uint_t val = (machine_uint_t)MP_OBJ_SMALL_INT_VALUE(mp_parse_num_integer(*p, len, 10)); *p = num; return val; }
// This dispatcher function is expected to be independent of the implementation of long int STATIC mp_obj_t mp_obj_int_make_new(mp_obj_t type_in, mp_uint_t n_args, mp_uint_t n_kw, const mp_obj_t *args) { (void)type_in; mp_arg_check_num(n_args, n_kw, 0, 2, false); switch (n_args) { case 0: return MP_OBJ_NEW_SMALL_INT(0); case 1: if (MP_OBJ_IS_INT(args[0])) { // already an int (small or long), just return it return args[0]; } else if (MP_OBJ_IS_STR_OR_BYTES(args[0])) { // a string, parse it mp_uint_t l; const char *s = mp_obj_str_get_data(args[0], &l); return mp_parse_num_integer(s, l, 0, NULL); #if MICROPY_PY_BUILTINS_FLOAT } else if (mp_obj_is_float(args[0])) { return mp_obj_new_int_from_float(mp_obj_float_get(args[0])); #endif } else { // try to convert to small int (eg from bool) return MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(args[0])); } case 2: default: { // should be a string, parse it // TODO proper error checking of argument types mp_uint_t l; const char *s = mp_obj_str_get_data(args[0], &l); return mp_parse_num_integer(s, l, mp_obj_get_int(args[1]), NULL); } } }
STATIC mp_obj_t load_obj(mp_reader_t *reader) { byte obj_type = read_byte(reader); if (obj_type == 'e') { return MP_OBJ_FROM_PTR(&mp_const_ellipsis_obj); } else { size_t len = read_uint(reader); vstr_t vstr; vstr_init_len(&vstr, len); read_bytes(reader, (byte*)vstr.buf, len); if (obj_type == 's' || obj_type == 'b') { return mp_obj_new_str_from_vstr(obj_type == 's' ? &mp_type_str : &mp_type_bytes, &vstr); } else if (obj_type == 'i') { return mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL); } else { assert(obj_type == 'f' || obj_type == 'c'); return mp_parse_num_decimal(vstr.buf, vstr.len, obj_type == 'c', false, NULL); } } }
STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) { const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ); ujson_stream_t s = {stream_obj, stream_p->read, 0, 0}; vstr_t vstr; vstr_init(&vstr, 8); mp_obj_list_t stack; // we use a list as a simple stack for nested JSON stack.len = 0; stack.items = NULL; mp_obj_t stack_top = MP_OBJ_NULL; mp_obj_type_t *stack_top_type = NULL; mp_obj_t stack_key = MP_OBJ_NULL; S_NEXT(s); for (;;) { cont: if (S_END(s)) { break; } mp_obj_t next = MP_OBJ_NULL; bool enter = false; byte cur = S_CUR(s); S_NEXT(s); switch (cur) { case ',': case ':': case ' ': case '\t': case '\n': case '\r': goto cont; case 'n': if (S_CUR(s) == 'u' && S_NEXT(s) == 'l' && S_NEXT(s) == 'l') { S_NEXT(s); next = mp_const_none; } else { goto fail; } break; case 'f': if (S_CUR(s) == 'a' && S_NEXT(s) == 'l' && S_NEXT(s) == 's' && S_NEXT(s) == 'e') { S_NEXT(s); next = mp_const_false; } else { goto fail; } break; case 't': if (S_CUR(s) == 'r' && S_NEXT(s) == 'u' && S_NEXT(s) == 'e') { S_NEXT(s); next = mp_const_true; } else { goto fail; } break; case '"': vstr_reset(&vstr); for (; !S_END(s) && S_CUR(s) != '"';) { byte c = S_CUR(s); if (c == '\\') { c = S_NEXT(s); switch (c) { case 'b': c = 0x08; break; case 'f': c = 0x0c; break; case 'n': c = 0x0a; break; case 'r': c = 0x0d; break; case 't': c = 0x09; break; case 'u': { mp_uint_t num = 0; for (int i = 0; i < 4; i++) { c = (S_NEXT(s) | 0x20) - '0'; if (c > 9) { c -= ('a' - ('9' + 1)); } num = (num << 4) | c; } vstr_add_char(&vstr, num); goto str_cont; } } } vstr_add_byte(&vstr, c); str_cont: S_NEXT(s); } if (S_END(s)) { goto fail; } S_NEXT(s); next = mp_obj_new_str(vstr.buf, vstr.len, false); break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { bool flt = false; vstr_reset(&vstr); for (;;) { vstr_add_byte(&vstr, cur); cur = S_CUR(s); if (cur == '.' || cur == 'E' || cur == 'e') { flt = true; } else if (cur == '-' || unichar_isdigit(cur)) { // pass } else { break; } S_NEXT(s); } if (flt) { next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL); } else { next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL); } break; } case '[': next = mp_obj_new_list(0, NULL); enter = true; break; case '{': next = mp_obj_new_dict(0); enter = true; break; case '}': case ']': { if (stack_top == MP_OBJ_NULL) { // no object at all goto fail; } if (stack.len == 0) { // finished; compound object goto success; } stack.len -= 1; stack_top = stack.items[stack.len]; stack_top_type = mp_obj_get_type(stack_top); goto cont; } default: goto fail; } if (stack_top == MP_OBJ_NULL) { stack_top = next; stack_top_type = mp_obj_get_type(stack_top); if (!enter) { // finished; single primitive only goto success; } } else { // append to list or dict if (stack_top_type == &mp_type_list) { mp_obj_list_append(stack_top, next); } else { if (stack_key == MP_OBJ_NULL) { stack_key = next; if (enter) { goto fail; } } else { mp_obj_dict_store(stack_top, stack_key, next); stack_key = MP_OBJ_NULL; } } if (enter) { if (stack.items == NULL) { mp_obj_list_init(&stack, 1); stack.items[0] = stack_top; } else { mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top); } stack_top = next; stack_top_type = mp_obj_get_type(stack_top); } } } success: // eat trailing whitespace while (unichar_isspace(S_CUR(s))) { S_NEXT(s); } if (!S_END(s)) { // unexpected chars goto fail; } if (stack_top == MP_OBJ_NULL || stack.len != 0) { // not exactly 1 object goto fail; } vstr_clear(&vstr); return stack_top; fail: nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "syntax error in JSON")); }
// This function implements a simple non-recursive JSON parser. // // The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt // The parser here will parse any valid JSON and return the correct // corresponding Python object. It allows through a superset of JSON, since // it treats commas and colons as "whitespace", and doesn't care if // brackets/braces are correctly paired. It will raise a ValueError if the // input is outside it's specs. // // Most of the work is parsing the primitives (null, false, true, numbers, // strings). It does 1 pass over the input string and so is easily extended to // being able to parse from a non-seekable stream. It tries to be fast and // small in code size, while not using more RAM than necessary. STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) { mp_uint_t len; const char *s = mp_obj_str_get_data(obj, &len); const char *top = s + len; vstr_t vstr; vstr_init(&vstr, 8); mp_obj_list_t stack; // we use a list as a simple stack for nested JSON stack.len = 0; stack.items = NULL; mp_obj_t stack_top = MP_OBJ_NULL; mp_obj_type_t *stack_top_type = NULL; mp_obj_t stack_key = MP_OBJ_NULL; for (;;) { cont: if (s == top) { break; } mp_obj_t next = MP_OBJ_NULL; bool enter = false; switch (*s) { case ',': case ':': case ' ': case '\t': case '\n': case '\r': s += 1; goto cont; case 'n': if (s + 3 < top && s[1] == 'u' && s[2] == 'l' && s[3] == 'l') { s += 4; next = mp_const_none; } else { goto fail; } break; case 'f': if (s + 4 < top && s[1] == 'a' && s[2] == 'l' && s[3] == 's' && s[4] == 'e') { s += 5; next = mp_const_false; } else { goto fail; } break; case 't': if (s + 3 < top && s[1] == 'r' && s[2] == 'u' && s[3] == 'e') { s += 4; next = mp_const_true; } else { goto fail; } break; case '"': vstr_reset(&vstr); for (s++; s < top && *s != '"';) { byte c = *s; if (c == '\\') { s++; c = *s; switch (c) { case 'b': c = 0x08; break; case 'f': c = 0x0c; break; case 'n': c = 0x0a; break; case 'r': c = 0x0d; break; case 't': c = 0x09; break; case 'u': { if (s + 4 >= top) { goto fail; } mp_uint_t num = 0; for (int i = 0; i < 4; i++) { c = (*++s | 0x20) - '0'; if (c > 9) { c -= ('a' - ('9' + 1)); } num = (num << 4) | c; } vstr_add_char(&vstr, num); goto str_cont; } } } vstr_add_byte(&vstr, c); str_cont: s++; } if (s == top) { goto fail; } s++; next = mp_obj_new_str(vstr.buf, vstr.len, false); break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { bool flt = false; vstr_reset(&vstr); for (; s < top; s++) { if (*s == '.' || *s == 'E' || *s == 'e') { flt = true; } else if (*s == '-' || unichar_isdigit(*s)) { // pass } else { break; } vstr_add_byte(&vstr, *s); } if (flt) { next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL); } else { next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL); } break; } case '[': next = mp_obj_new_list(0, NULL); enter = true; s += 1; break; case '{': next = mp_obj_new_dict(0); enter = true; s += 1; break; case '}': case ']': { s += 1; if (stack_top == MP_OBJ_NULL) { // no object at all goto fail; } if (stack.len == 0) { // finished; compound object goto success; } stack.len -= 1; stack_top = stack.items[stack.len]; stack_top_type = mp_obj_get_type(stack_top); goto cont; } default: goto fail; } if (stack_top == MP_OBJ_NULL) { stack_top = next; stack_top_type = mp_obj_get_type(stack_top); if (!enter) { // finished; single primitive only goto success; } } else { // append to list or dict if (stack_top_type == &mp_type_list) { mp_obj_list_append(stack_top, next); } else { if (stack_key == MP_OBJ_NULL) { stack_key = next; if (enter) { goto fail; } } else { mp_obj_dict_store(stack_top, stack_key, next); stack_key = MP_OBJ_NULL; } } if (enter) { if (stack.items == NULL) { mp_obj_list_init(&stack, 1); stack.items[0] = stack_top; } else { mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top); } stack_top = next; stack_top_type = mp_obj_get_type(stack_top); } } } success: // eat trailing whitespace while (s < top && unichar_isspace(*s)) { s++; } if (s < top) { // unexpected chars goto fail; } if (stack_top == MP_OBJ_NULL || stack.len != 0) { // not exactly 1 object goto fail; } vstr_clear(&vstr); return stack_top; fail: nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "syntax error in JSON")); }