STATIC mp_obj_t struct_unpack(mp_obj_t fmt_in, mp_obj_t data_in) { // TODO: "The buffer must contain exactly the amount of data required by the format (len(bytes) must equal calcsize(fmt))." const char *fmt = mp_obj_str_get_str(fmt_in); char fmt_type = get_fmt_type(&fmt); uint size = calcsize_items(fmt); mp_obj_tuple_t *res = mp_obj_new_tuple(size, NULL); mp_buffer_info_t bufinfo; mp_get_buffer_raise(data_in, &bufinfo, MP_BUFFER_READ); byte *p = bufinfo.buf; for (uint i = 0; i < size; i++) { machine_uint_t sz = 1; if (unichar_isdigit(*fmt)) { sz = get_fmt_num(&fmt); } if (sz > 1) { // TODO: size spec support only for string len assert(*fmt == 's'); } mp_obj_t item; if (*fmt == 's') { item = mp_obj_new_bytes(p, sz); p += sz; fmt++; } else { item = mp_binary_get_val(fmt_type, *fmt++, &p); } res->items[i] = item; } return res; }
STATIC mp_obj_t struct_calcsize(mp_obj_t fmt_in) { const char *fmt = mp_obj_str_get_str(fmt_in); char fmt_type = get_fmt_type(&fmt); machine_uint_t size; for (size = 0; *fmt; fmt++) { uint align; machine_uint_t cnt = 1; if (unichar_isdigit(*fmt)) { cnt = get_fmt_num(&fmt); } if (cnt > 1) { // TODO: count spec support only for string len assert(*fmt == 's'); } machine_uint_t sz; if (*fmt == 's') { sz = cnt; } else { sz = (machine_uint_t)mp_binary_get_size(fmt_type, *fmt, &align); } // TODO assert(sz != (machine_uint_t)-1); // Apply alignment size = (size + align - 1) & ~(align - 1); size += sz; } return MP_OBJ_NEW_SMALL_INT(size); }
STATIC uint calcsize_items(const char *fmt) { uint cnt = 0; while (*fmt) { // TODO supports size spec only for "s" if (!unichar_isdigit(*fmt++)) { cnt++; } } return cnt; }
STATIC machine_uint_t get_fmt_num(const char **p) { const char *num = *p; uint len = 1; while (unichar_isdigit(*++num)) { len++; } machine_uint_t val = (machine_uint_t)MP_OBJ_SMALL_INT_VALUE(mp_parse_num_integer(*p, len, 10)); *p = num; return val; }
STATIC mp_obj_t struct_pack(uint n_args, mp_obj_t *args) { // TODO: "The arguments must match the values required by the format exactly." const char *fmt = mp_obj_str_get_str(args[0]); char fmt_type = get_fmt_type(&fmt); int size = MP_OBJ_SMALL_INT_VALUE(struct_calcsize(args[0])); byte *p; mp_obj_t res = mp_obj_str_builder_start(&mp_type_bytes, size, &p); memset(p, 0, size); for (uint i = 1; i < n_args; i++) { machine_uint_t sz = 1; if (unichar_isdigit(*fmt)) { sz = get_fmt_num(&fmt); } if (sz > 1) { // TODO: size spec support only for string len assert(*fmt == 's'); } if (*fmt == 's') { mp_buffer_info_t bufinfo; mp_get_buffer_raise(args[i], &bufinfo, MP_BUFFER_READ); machine_uint_t to_copy = sz; if (bufinfo.len < to_copy) { to_copy = bufinfo.len; } memcpy(p, bufinfo.buf, to_copy); memset(p + to_copy, 0, sz - to_copy); p += sz; fmt++; } else { mp_binary_set_val(fmt_type, *fmt++, args[i], &p); } } return res; }
int main(int argc, char **argv) { /* printf("sizeof(void*)=%u\n", (uint)sizeof(void*)); for (int i = 0; i < sizeof(table); ++i) { byte *ptr = (void*)&table[0]; printf(" %02x", ptr[i]); if ((i + 1)%8 == 0) printf("\n"); } */ mp_stack_set_limit(40000 * (BYTES_PER_WORD / 4)); pre_process_options(argc, argv); #if MICROPY_ENABLE_GC char *heap = malloc(heap_size); gc_init(heap, heap + heap_size); #endif mp_init(); // create keyboard interrupt object MP_STATE_VM(keyboard_interrupt_obj) = mp_obj_new_exception(&mp_type_KeyboardInterrupt); char *home = getenv("HOME"); char *path = getenv("MICROPYPATH"); if (path == NULL) { #ifdef MICROPY_PY_SYS_PATH_DEFAULT path = MICROPY_PY_SYS_PATH_DEFAULT; #else path = "~/.micropython/lib:/usr/lib/micropython"; #endif } mp_uint_t path_num = 1; // [0] is for current dir (or base dir of the script) for (char *p = path; p != NULL; p = strchr(p, PATHLIST_SEP_CHAR)) { path_num++; if (p != NULL) { p++; } } mp_obj_list_init(MP_OBJ_TO_PTR(mp_sys_path), path_num); mp_obj_t *path_items; mp_obj_list_get(mp_sys_path, &path_num, &path_items); path_items[0] = MP_OBJ_NEW_QSTR(MP_QSTR_); { char *p = path; for (mp_uint_t i = 1; i < path_num; i++) { char *p1 = strchr(p, PATHLIST_SEP_CHAR); if (p1 == NULL) { p1 = p + strlen(p); } if (p[0] == '~' && p[1] == '/' && home != NULL) { // Expand standalone ~ to $HOME CHECKBUF(buf, PATH_MAX); CHECKBUF_APPEND(buf, home, strlen(home)); CHECKBUF_APPEND(buf, p + 1, (size_t)(p1 - p - 1)); path_items[i] = MP_OBJ_NEW_QSTR(qstr_from_strn(buf, CHECKBUF_LEN(buf))); } else { path_items[i] = MP_OBJ_NEW_QSTR(qstr_from_strn(p, p1 - p)); } p = p1 + 1; } } mp_obj_list_init(MP_OBJ_TO_PTR(mp_sys_argv), 0); #if defined(MICROPY_UNIX_COVERAGE) { MP_DECLARE_CONST_FUN_OBJ(extra_coverage_obj); mp_store_global(QSTR_FROM_STR_STATIC("extra_coverage"), (mp_obj_t)&extra_coverage_obj); } #endif // Here is some example code to create a class and instance of that class. // First is the Python, then the C code. // // class TestClass: // pass // test_obj = TestClass() // test_obj.attr = 42 // // mp_obj_t test_class_type, test_class_instance; // test_class_type = mp_obj_new_type(QSTR_FROM_STR_STATIC("TestClass"), mp_const_empty_tuple, mp_obj_new_dict(0)); // mp_store_name(QSTR_FROM_STR_STATIC("test_obj"), test_class_instance = mp_call_function_0(test_class_type)); // mp_store_attr(test_class_instance, QSTR_FROM_STR_STATIC("attr"), mp_obj_new_int(42)); /* printf("bytes:\n"); printf(" total %d\n", m_get_total_bytes_allocated()); printf(" cur %d\n", m_get_current_bytes_allocated()); printf(" peak %d\n", m_get_peak_bytes_allocated()); */ const int NOTHING_EXECUTED = -2; int ret = NOTHING_EXECUTED; for (int a = 1; a < argc; a++) { if (argv[a][0] == '-') { if (strcmp(argv[a], "-c") == 0) { if (a + 1 >= argc) { return usage(argv); } ret = do_str(argv[a + 1]); if (ret & FORCED_EXIT) { break; } a += 1; } else if (strcmp(argv[a], "-m") == 0) { if (a + 1 >= argc) { return usage(argv); } mp_obj_t import_args[4]; import_args[0] = mp_obj_new_str(argv[a + 1], strlen(argv[a + 1]), false); import_args[1] = import_args[2] = mp_const_none; // Ask __import__ to handle imported module specially - set its __name__ // to __main__, and also return this leaf module, not top-level package // containing it. import_args[3] = mp_const_false; // TODO: https://docs.python.org/3/using/cmdline.html#cmdoption-m : // "the first element of sys.argv will be the full path to // the module file (while the module file is being located, // the first element will be set to "-m")." set_sys_argv(argv, argc, a + 1); mp_obj_t mod; nlr_buf_t nlr; if (nlr_push(&nlr) == 0) { mod = mp_builtin___import__(MP_ARRAY_SIZE(import_args), import_args); nlr_pop(); } else { // uncaught exception return handle_uncaught_exception(nlr.ret_val) & 0xff; } if (mp_obj_is_package(mod)) { // TODO mp_printf(&mp_stderr_print, "%s: -m for packages not yet implemented\n", argv[0]); exit(1); } ret = 0; break; } else if (strcmp(argv[a], "-X") == 0) { a += 1; #if MICROPY_DEBUG_PRINTERS } else if (strcmp(argv[a], "-v") == 0) { mp_verbose_flag++; #endif } else if (strncmp(argv[a], "-O", 2) == 0) { if (unichar_isdigit(argv[a][2])) { MP_STATE_VM(mp_optimise_value) = argv[a][2] & 0xf; } else { MP_STATE_VM(mp_optimise_value) = 0; for (char *p = argv[a] + 1; *p && *p == 'O'; p++, MP_STATE_VM(mp_optimise_value)++); } } else { return usage(argv); } } else { char *pathbuf = malloc(PATH_MAX); char *basedir = realpath(argv[a], pathbuf); if (basedir == NULL) { mp_printf(&mp_stderr_print, "%s: can't open file '%s': [Errno %d] %s\n", argv[0], argv[a], errno, strerror(errno)); // CPython exits with 2 in such case ret = 2; break; } // Set base dir of the script as first entry in sys.path char *p = strrchr(basedir, '/'); path_items[0] = MP_OBJ_NEW_QSTR(qstr_from_strn(basedir, p - basedir)); free(pathbuf); set_sys_argv(argv, argc, a); ret = do_file(argv[a]); break; } } if (ret == NOTHING_EXECUTED) { if (isatty(0)) { prompt_read_history(); ret = do_repl(); prompt_write_history(); } else { mp_lexer_t *lex = mp_lexer_new_from_fd(MP_QSTR__lt_stdin_gt_, 0, false); ret = execute_from_lexer(lex, MP_PARSE_FILE_INPUT, false); } } #if MICROPY_PY_MICROPYTHON_MEM_INFO if (mp_verbose_flag) { mp_micropython_mem_info(0, NULL); } #endif mp_deinit(); #if MICROPY_ENABLE_GC && !defined(NDEBUG) // We don't really need to free memory since we are about to exit the // process, but doing so helps to find memory leaks. free(heap); #endif //printf("total bytes = %d\n", m_get_total_bytes_allocated()); return ret & 0xff; }
bool unichar_isxdigit(unichar c) { return unichar_isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); }
STATIC bool is_following_digit(mp_lexer_t *lex) { return unichar_isdigit(lex->chr1); }
size_t mp_repl_autocomplete(const char *str, size_t len, const mp_print_t *print, const char **compl_str) { // scan backwards to find start of "a.b.c" chain const char *org_str = str; const char *top = str + len; for (const char *s = top; --s >= str;) { if (!(unichar_isalpha(*s) || unichar_isdigit(*s) || *s == '_' || *s == '.')) { ++s; str = s; break; } } size_t nqstr = QSTR_TOTAL(); // begin search in outer global dict which is accessed from __main__ mp_obj_t obj = MP_OBJ_FROM_PTR(&mp_module___main__); mp_obj_t dest[2]; for (;;) { // get next word in string to complete const char *s_start = str; while (str < top && *str != '.') { ++str; } size_t s_len = str - s_start; if (str < top) { // a complete word, lookup in current object qstr q = qstr_find_strn(s_start, s_len); if (q == MP_QSTR_NULL) { // lookup will fail return 0; } mp_load_method_maybe(obj, q, dest); obj = dest[0]; // attribute, method, or MP_OBJ_NULL if nothing found if (obj == MP_OBJ_NULL) { // lookup failed return 0; } // skip '.' to move to next word ++str; } else { // end of string, do completion on this partial name // look for matches const char *match_str = NULL; size_t match_len = 0; qstr q_first = 0, q_last; for (qstr q = 1; q < nqstr; ++q) { size_t d_len; const char *d_str = (const char*)qstr_data(q, &d_len); if (s_len <= d_len && strncmp(s_start, d_str, s_len) == 0) { mp_load_method_maybe(obj, q, dest); if (dest[0] != MP_OBJ_NULL) { if (match_str == NULL) { match_str = d_str; match_len = d_len; } else { // search for longest common prefix of match_str and d_str // (assumes these strings are null-terminated) for (size_t j = s_len; j <= match_len && j <= d_len; ++j) { if (match_str[j] != d_str[j]) { match_len = j; break; } } } if (q_first == 0) { q_first = q; } q_last = q; } } } // nothing found if (q_first == 0) { // If there're no better alternatives, and if it's first word // in the line, try to complete "import". if (s_start == org_str) { static const char import_str[] = "import "; if (memcmp(s_start, import_str, s_len) == 0) { *compl_str = import_str + s_len; return sizeof(import_str) - 1 - s_len; } } return 0; } // 1 match found, or multiple matches with a common prefix if (q_first == q_last || match_len > s_len) { *compl_str = match_str + s_len; return match_len - s_len; } // multiple matches found, print them out #define WORD_SLOT_LEN (16) #define MAX_LINE_LEN (4 * WORD_SLOT_LEN) int line_len = MAX_LINE_LEN; // force a newline for first word for (qstr q = q_first; q <= q_last; ++q) { size_t d_len; const char *d_str = (const char*)qstr_data(q, &d_len); if (s_len <= d_len && strncmp(s_start, d_str, s_len) == 0) { mp_load_method_maybe(obj, q, dest); if (dest[0] != MP_OBJ_NULL) { int gap = (line_len + WORD_SLOT_LEN - 1) / WORD_SLOT_LEN * WORD_SLOT_LEN - line_len; if (gap < 2) { gap += WORD_SLOT_LEN; } if (line_len + gap + d_len <= MAX_LINE_LEN) { // TODO optimise printing of gap? for (int j = 0; j < gap; ++j) { mp_print_str(print, " "); } mp_print_str(print, d_str); line_len += gap + d_len; } else { mp_printf(print, "\n%s", d_str); line_len = d_len; } } } } mp_print_str(print, "\n"); return (size_t)(-1); // indicate many matches } } }
MP_NOINLINE int main_(int argc, char **argv) { mp_stack_set_limit(40000 * (BYTES_PER_WORD / 4)); pre_process_options(argc, argv); char *heap = malloc(heap_size); gc_init(heap, heap + heap_size); mp_init(); #ifdef _WIN32 set_fmode_binary(); #endif mp_obj_list_init(mp_sys_path, 0); mp_obj_list_init(mp_sys_argv, 0); // set default compiler configuration mp_dynamic_compiler.small_int_bits = 31; mp_dynamic_compiler.opt_cache_map_lookup_in_bytecode = 0; mp_dynamic_compiler.py_builtins_str_unicode = 1; const char *input_file = NULL; const char *output_file = NULL; const char *source_file = NULL; // parse main options for (int a = 1; a < argc; a++) { if (argv[a][0] == '-') { if (strcmp(argv[a], "-X") == 0) { a += 1; } else if (strcmp(argv[a], "-v") == 0) { mp_verbose_flag++; } else if (strncmp(argv[a], "-O", 2) == 0) { if (unichar_isdigit(argv[a][2])) { MP_STATE_VM(mp_optimise_value) = argv[a][2] & 0xf; } else { MP_STATE_VM(mp_optimise_value) = 0; for (char *p = argv[a] + 1; *p && *p == 'O'; p++, MP_STATE_VM(mp_optimise_value)++); } } else if (strcmp(argv[a], "-o") == 0) { if (a + 1 >= argc) { exit(usage(argv)); } a += 1; output_file = argv[a]; } else if (strcmp(argv[a], "-s") == 0) { if (a + 1 >= argc) { exit(usage(argv)); } a += 1; source_file = argv[a]; } else if (strncmp(argv[a], "-msmall-int-bits=", sizeof("-msmall-int-bits=") - 1) == 0) { char *end; mp_dynamic_compiler.small_int_bits = strtol(argv[a] + sizeof("-msmall-int-bits=") - 1, &end, 0); if (*end) { return usage(argv); } // TODO check that small_int_bits is within range of host's capabilities } else if (strcmp(argv[a], "-mno-cache-lookup-bc") == 0) { mp_dynamic_compiler.opt_cache_map_lookup_in_bytecode = 0; } else if (strcmp(argv[a], "-mcache-lookup-bc") == 0) { mp_dynamic_compiler.opt_cache_map_lookup_in_bytecode = 1; } else if (strcmp(argv[a], "-mno-unicode") == 0) { mp_dynamic_compiler.py_builtins_str_unicode = 0; } else if (strcmp(argv[a], "-municode") == 0) { mp_dynamic_compiler.py_builtins_str_unicode = 1; } else { return usage(argv); } } else { if (input_file != NULL) { mp_printf(&mp_stderr_print, "multiple input files\n"); exit(1); } input_file = argv[a]; } } if (input_file == NULL) { mp_printf(&mp_stderr_print, "no input file\n"); exit(1); } int ret = compile_and_save(input_file, output_file, source_file); #if MICROPY_PY_MICROPYTHON_MEM_INFO if (mp_verbose_flag) { mp_micropython_mem_info(0, NULL); } #endif mp_deinit(); return ret & 0xff; }
STATIC bool is_digit(mp_lexer_t *lex) { return unichar_isdigit(lex->chr0); }
static void push_result_token(parser_t *parser, const mp_lexer_t *lex) { const mp_token_t *tok = mp_lexer_cur(lex); mp_parse_node_t pn; if (tok->kind == MP_TOKEN_NAME) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn_copy(tok->str, tok->len)); } else if (tok->kind == MP_TOKEN_NUMBER) { bool dec = false; bool small_int = true; machine_int_t int_val = 0; int len = tok->len; const char *str = tok->str; int base = 10; int i = 0; if (len >= 3 && str[0] == '0') { if (str[1] == 'o' || str[1] == 'O') { // octal base = 8; i = 2; } else if (str[1] == 'x' || str[1] == 'X') { // hexadecimal base = 16; i = 2; } else if (str[1] == 'b' || str[1] == 'B') { // binary base = 2; i = 2; } } bool overflow = false; for (; i < len; i++) { machine_int_t old_val = int_val; if (unichar_isdigit(str[i]) && str[i] - '0' < base) { int_val = base * int_val + str[i] - '0'; } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') { int_val = base * int_val + str[i] - 'a' + 10; } else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') { int_val = base * int_val + str[i] - 'A' + 10; } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') { dec = true; break; } else { small_int = false; break; } if (int_val < old_val) { // If new value became less than previous, it's overflow overflow = true; } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) { // If signed number changed sign - it's overflow overflow = true; } } if (dec) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn_copy(str, len)); } else if (small_int && !overflow && MP_FIT_SMALL_INT(int_val)) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val); } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn_copy(str, len)); } } else if (tok->kind == MP_TOKEN_STRING) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn_copy(tok->str, tok->len)); } else if (tok->kind == MP_TOKEN_BYTES) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn_copy(tok->str, tok->len)); } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind); } push_result_node(parser, pn); }
STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) { const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ); ujson_stream_t s = {stream_obj, stream_p->read, 0, 0}; vstr_t vstr; vstr_init(&vstr, 8); mp_obj_list_t stack; // we use a list as a simple stack for nested JSON stack.len = 0; stack.items = NULL; mp_obj_t stack_top = MP_OBJ_NULL; mp_obj_type_t *stack_top_type = NULL; mp_obj_t stack_key = MP_OBJ_NULL; S_NEXT(s); for (;;) { cont: if (S_END(s)) { break; } mp_obj_t next = MP_OBJ_NULL; bool enter = false; byte cur = S_CUR(s); S_NEXT(s); switch (cur) { case ',': case ':': case ' ': case '\t': case '\n': case '\r': goto cont; case 'n': if (S_CUR(s) == 'u' && S_NEXT(s) == 'l' && S_NEXT(s) == 'l') { S_NEXT(s); next = mp_const_none; } else { goto fail; } break; case 'f': if (S_CUR(s) == 'a' && S_NEXT(s) == 'l' && S_NEXT(s) == 's' && S_NEXT(s) == 'e') { S_NEXT(s); next = mp_const_false; } else { goto fail; } break; case 't': if (S_CUR(s) == 'r' && S_NEXT(s) == 'u' && S_NEXT(s) == 'e') { S_NEXT(s); next = mp_const_true; } else { goto fail; } break; case '"': vstr_reset(&vstr); for (; !S_END(s) && S_CUR(s) != '"';) { byte c = S_CUR(s); if (c == '\\') { c = S_NEXT(s); switch (c) { case 'b': c = 0x08; break; case 'f': c = 0x0c; break; case 'n': c = 0x0a; break; case 'r': c = 0x0d; break; case 't': c = 0x09; break; case 'u': { mp_uint_t num = 0; for (int i = 0; i < 4; i++) { c = (S_NEXT(s) | 0x20) - '0'; if (c > 9) { c -= ('a' - ('9' + 1)); } num = (num << 4) | c; } vstr_add_char(&vstr, num); goto str_cont; } } } vstr_add_byte(&vstr, c); str_cont: S_NEXT(s); } if (S_END(s)) { goto fail; } S_NEXT(s); next = mp_obj_new_str(vstr.buf, vstr.len, false); break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { bool flt = false; vstr_reset(&vstr); for (;;) { vstr_add_byte(&vstr, cur); cur = S_CUR(s); if (cur == '.' || cur == 'E' || cur == 'e') { flt = true; } else if (cur == '-' || unichar_isdigit(cur)) { // pass } else { break; } S_NEXT(s); } if (flt) { next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL); } else { next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL); } break; } case '[': next = mp_obj_new_list(0, NULL); enter = true; break; case '{': next = mp_obj_new_dict(0); enter = true; break; case '}': case ']': { if (stack_top == MP_OBJ_NULL) { // no object at all goto fail; } if (stack.len == 0) { // finished; compound object goto success; } stack.len -= 1; stack_top = stack.items[stack.len]; stack_top_type = mp_obj_get_type(stack_top); goto cont; } default: goto fail; } if (stack_top == MP_OBJ_NULL) { stack_top = next; stack_top_type = mp_obj_get_type(stack_top); if (!enter) { // finished; single primitive only goto success; } } else { // append to list or dict if (stack_top_type == &mp_type_list) { mp_obj_list_append(stack_top, next); } else { if (stack_key == MP_OBJ_NULL) { stack_key = next; if (enter) { goto fail; } } else { mp_obj_dict_store(stack_top, stack_key, next); stack_key = MP_OBJ_NULL; } } if (enter) { if (stack.items == NULL) { mp_obj_list_init(&stack, 1); stack.items[0] = stack_top; } else { mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top); } stack_top = next; stack_top_type = mp_obj_get_type(stack_top); } } } success: // eat trailing whitespace while (unichar_isspace(S_CUR(s))) { S_NEXT(s); } if (!S_END(s)) { // unexpected chars goto fail; } if (stack_top == MP_OBJ_NULL || stack.len != 0) { // not exactly 1 object goto fail; } vstr_clear(&vstr); return stack_top; fail: nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "syntax error in JSON")); }
// This function implements a simple non-recursive JSON parser. // // The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt // The parser here will parse any valid JSON and return the correct // corresponding Python object. It allows through a superset of JSON, since // it treats commas and colons as "whitespace", and doesn't care if // brackets/braces are correctly paired. It will raise a ValueError if the // input is outside it's specs. // // Most of the work is parsing the primitives (null, false, true, numbers, // strings). It does 1 pass over the input string and so is easily extended to // being able to parse from a non-seekable stream. It tries to be fast and // small in code size, while not using more RAM than necessary. STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) { mp_uint_t len; const char *s = mp_obj_str_get_data(obj, &len); const char *top = s + len; vstr_t vstr; vstr_init(&vstr, 8); mp_obj_list_t stack; // we use a list as a simple stack for nested JSON stack.len = 0; stack.items = NULL; mp_obj_t stack_top = MP_OBJ_NULL; mp_obj_type_t *stack_top_type = NULL; mp_obj_t stack_key = MP_OBJ_NULL; for (;;) { cont: if (s == top) { break; } mp_obj_t next = MP_OBJ_NULL; bool enter = false; switch (*s) { case ',': case ':': case ' ': case '\t': case '\n': case '\r': s += 1; goto cont; case 'n': if (s + 3 < top && s[1] == 'u' && s[2] == 'l' && s[3] == 'l') { s += 4; next = mp_const_none; } else { goto fail; } break; case 'f': if (s + 4 < top && s[1] == 'a' && s[2] == 'l' && s[3] == 's' && s[4] == 'e') { s += 5; next = mp_const_false; } else { goto fail; } break; case 't': if (s + 3 < top && s[1] == 'r' && s[2] == 'u' && s[3] == 'e') { s += 4; next = mp_const_true; } else { goto fail; } break; case '"': vstr_reset(&vstr); for (s++; s < top && *s != '"';) { byte c = *s; if (c == '\\') { s++; c = *s; switch (c) { case 'b': c = 0x08; break; case 'f': c = 0x0c; break; case 'n': c = 0x0a; break; case 'r': c = 0x0d; break; case 't': c = 0x09; break; case 'u': { if (s + 4 >= top) { goto fail; } mp_uint_t num = 0; for (int i = 0; i < 4; i++) { c = (*++s | 0x20) - '0'; if (c > 9) { c -= ('a' - ('9' + 1)); } num = (num << 4) | c; } vstr_add_char(&vstr, num); goto str_cont; } } } vstr_add_byte(&vstr, c); str_cont: s++; } if (s == top) { goto fail; } s++; next = mp_obj_new_str(vstr.buf, vstr.len, false); break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { bool flt = false; vstr_reset(&vstr); for (; s < top; s++) { if (*s == '.' || *s == 'E' || *s == 'e') { flt = true; } else if (*s == '-' || unichar_isdigit(*s)) { // pass } else { break; } vstr_add_byte(&vstr, *s); } if (flt) { next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL); } else { next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL); } break; } case '[': next = mp_obj_new_list(0, NULL); enter = true; s += 1; break; case '{': next = mp_obj_new_dict(0); enter = true; s += 1; break; case '}': case ']': { s += 1; if (stack_top == MP_OBJ_NULL) { // no object at all goto fail; } if (stack.len == 0) { // finished; compound object goto success; } stack.len -= 1; stack_top = stack.items[stack.len]; stack_top_type = mp_obj_get_type(stack_top); goto cont; } default: goto fail; } if (stack_top == MP_OBJ_NULL) { stack_top = next; stack_top_type = mp_obj_get_type(stack_top); if (!enter) { // finished; single primitive only goto success; } } else { // append to list or dict if (stack_top_type == &mp_type_list) { mp_obj_list_append(stack_top, next); } else { if (stack_key == MP_OBJ_NULL) { stack_key = next; if (enter) { goto fail; } } else { mp_obj_dict_store(stack_top, stack_key, next); stack_key = MP_OBJ_NULL; } } if (enter) { if (stack.items == NULL) { mp_obj_list_init(&stack, 1); stack.items[0] = stack_top; } else { mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top); } stack_top = next; stack_top_type = mp_obj_get_type(stack_top); } } } success: // eat trailing whitespace while (s < top && unichar_isspace(*s)) { s++; } if (s < top) { // unexpected chars goto fail; } if (stack_top == MP_OBJ_NULL || stack.len != 0) { // not exactly 1 object goto fail; } vstr_clear(&vstr); return stack_top; fail: nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "syntax error in JSON")); }
mp_uint_t mp_repl_autocomplete(const char *str, mp_uint_t len, const mp_print_t *print, const char **compl_str) { // scan backwards to find start of "a.b.c" chain const char *org_str = str; const char *top = str + len; for (const char *s = top; --s >= str;) { if (!(unichar_isalpha(*s) || unichar_isdigit(*s) || *s == '_' || *s == '.')) { ++s; str = s; break; } } // begin search in locals dict mp_obj_dict_t *dict = mp_locals_get(); for (;;) { // get next word in string to complete const char *s_start = str; while (str < top && *str != '.') { ++str; } mp_uint_t s_len = str - s_start; if (str < top) { // a complete word, lookup in current dict mp_obj_t obj = MP_OBJ_NULL; for (mp_uint_t i = 0; i < dict->map.alloc; i++) { if (MP_MAP_SLOT_IS_FILLED(&dict->map, i)) { size_t d_len; const char *d_str = mp_obj_str_get_data(dict->map.table[i].key, &d_len); if (s_len == d_len && strncmp(s_start, d_str, d_len) == 0) { obj = dict->map.table[i].value; break; } } } if (obj == MP_OBJ_NULL) { // lookup failed return 0; } // found an object of this name; try to get its dict if (MP_OBJ_IS_TYPE(obj, &mp_type_module)) { dict = mp_obj_module_get_globals(obj); } else { mp_obj_type_t *type; if (MP_OBJ_IS_TYPE(obj, &mp_type_type)) { type = MP_OBJ_TO_PTR(obj); } else { type = mp_obj_get_type(obj); } if (type->locals_dict != NULL && type->locals_dict->base.type == &mp_type_dict) { dict = type->locals_dict; } else { // obj has no dict return 0; } } // skip '.' to move to next word ++str; } else { // end of string, do completion on this partial name // look for matches int n_found = 0; const char *match_str = NULL; mp_uint_t match_len = 0; for (mp_uint_t i = 0; i < dict->map.alloc; i++) { if (MP_MAP_SLOT_IS_FILLED(&dict->map, i)) { size_t d_len; const char *d_str = mp_obj_str_get_data(dict->map.table[i].key, &d_len); if (s_len <= d_len && strncmp(s_start, d_str, s_len) == 0) { if (match_str == NULL) { match_str = d_str; match_len = d_len; } else { // search for longest common prefix of match_str and d_str // (assumes these strings are null-terminated) for (mp_uint_t j = s_len; j <= match_len && j <= d_len; ++j) { if (match_str[j] != d_str[j]) { match_len = j; break; } } } ++n_found; } } } // nothing found if (n_found == 0) { // If there're no better alternatives, and if it's first word // in the line, try to complete "import". if (s_start == org_str) { static const char import_str[] = "import "; if (memcmp(s_start, import_str, s_len) == 0) { *compl_str = import_str + s_len; return sizeof(import_str) - 1 - s_len; } } return 0; } // 1 match found, or multiple matches with a common prefix if (n_found == 1 || match_len > s_len) { *compl_str = match_str + s_len; return match_len - s_len; } // multiple matches found, print them out #define WORD_SLOT_LEN (16) #define MAX_LINE_LEN (4 * WORD_SLOT_LEN) int line_len = MAX_LINE_LEN; // force a newline for first word for (mp_uint_t i = 0; i < dict->map.alloc; i++) { if (MP_MAP_SLOT_IS_FILLED(&dict->map, i)) { size_t d_len; const char *d_str = mp_obj_str_get_data(dict->map.table[i].key, &d_len); if (s_len <= d_len && strncmp(s_start, d_str, s_len) == 0) { int gap = (line_len + WORD_SLOT_LEN - 1) / WORD_SLOT_LEN * WORD_SLOT_LEN - line_len; if (gap < 2) { gap += WORD_SLOT_LEN; } if (line_len + gap + d_len <= MAX_LINE_LEN) { // TODO optimise printing of gap? for (int j = 0; j < gap; ++j) { mp_print_str(print, " "); } mp_print_str(print, d_str); line_len += gap + d_len; } else { mp_printf(print, "\n%s", d_str); line_len = d_len; } } } } mp_print_str(print, "\n"); return (mp_uint_t)(-1); // indicate many matches } } }
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) { const mp_token_t *tok = mp_lexer_cur(lex); mp_parse_node_t pn; if (tok->kind == MP_TOKEN_NAME) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len)); } else if (tok->kind == MP_TOKEN_NUMBER) { bool dec = false; bool small_int = true; machine_int_t int_val = 0; int len = tok->len; const char *str = tok->str; int base = 0; int i = mp_parse_num_base(str, len, &base); bool overflow = false; for (; i < len; i++) { int dig; if (unichar_isdigit(str[i]) && str[i] - '0' < base) { dig = str[i] - '0'; } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') { dig = str[i] - 'a' + 10; } else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') { dig = str[i] - 'A' + 10; } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') { dec = true; break; } else { small_int = false; break; } // add next digi and check for overflow if (mp_small_int_mul_overflow(int_val, base)) { overflow = true; } int_val = int_val * base + dig; if (!MP_SMALL_INT_FITS(int_val)) { overflow = true; } } if (dec) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len)); } else if (small_int && !overflow && MP_SMALL_INT_FITS(int_val)) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val); } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len)); } } else if (tok->kind == MP_TOKEN_STRING) { // Don't automatically intern all strings. doc strings (which are usually large) // will be discarded by the compiler, and so we shouldn't intern them. qstr qst = MP_QSTR_NULL; if (tok->len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) { // intern short strings qst = qstr_from_strn(tok->str, tok->len); } else { // check if this string is already interned qst = qstr_find_strn(tok->str, tok->len); } if (qst != MP_QSTR_NULL) { // qstr exists, make a leaf node pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst); } else { // not interned, make a node holding a pointer to the string data push_result_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len); return; } } else if (tok->kind == MP_TOKEN_BYTES) { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len)); } else { pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind); } push_result_node(parser, pn); }