Пример #1
0
STATIC void push_result_token(parser_t *parser, const rule_t *rule) {
    mp_parse_node_t pn;
    mp_lexer_t *lex = parser->lexer;
    if (lex->tok_kind == MP_TOKEN_NAME) {
        qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
        #if MICROPY_COMP_CONST
        // if name is a standalone identifier, look it up in the table of dynamic constants
        mp_map_elem_t *elem;
        if (rule->rule_id == RULE_atom
            && (elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP)) != NULL) {
            if (MP_OBJ_IS_SMALL_INT(elem->value)) {
                pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(elem->value));
            } else {
                pn = make_node_const_object(parser, lex->tok_line, elem->value);
            }
        } else {
            pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
        }
        #else
        (void)rule;
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
        #endif
    } else if (lex->tok_kind == MP_TOKEN_INTEGER) {
        mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex);
        if (MP_OBJ_IS_SMALL_INT(o)) {
            pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(o));
        } else {
            pn = make_node_const_object(parser, lex->tok_line, o);
        }
    } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) {
        mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex);
        pn = make_node_const_object(parser, lex->tok_line, o);
    } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) {
        // Don't automatically intern all strings/bytes.  doc strings (which are usually large)
        // will be discarded by the compiler, and so we shouldn't intern them.
        qstr qst = MP_QSTR_NULL;
        if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
            // intern short strings
            qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
        } else {
            // check if this string is already interned
            qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
        }
        if (qst != MP_QSTR_NULL) {
            // qstr exists, make a leaf node
            pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
        } else {
            // not interned, make a node holding a pointer to the string/bytes object
            mp_obj_t o = mp_obj_new_str_of_type(
                lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes,
                (const byte*)lex->vstr.buf, lex->vstr.len);
            pn = make_node_const_object(parser, lex->tok_line, o);
        }
    } else {
        pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
    }
    push_result_node(parser, pn);
}
Пример #2
0
STATIC mp_obj_t complex_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
    // TODO check n_kw == 0

    switch (n_args) {
        case 0:
            return mp_obj_new_complex(0, 0);

        case 1:
            if (MP_OBJ_IS_STR(args[0])) {
                // a string, parse it
                uint l;
                const char *s = mp_obj_str_get_data(args[0], &l);
                return mp_parse_num_decimal(s, l, true, true);
            } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
                // a complex, just return it
                return args[0];
            } else {
                // something else, try to cast it to a complex
                return mp_obj_new_complex(mp_obj_get_float(args[0]), 0);
            }

        case 2: {
            mp_float_t real, imag;
            if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
                mp_obj_complex_get(args[0], &real, &imag);
            } else {
                real = mp_obj_get_float(args[0]);
                imag = 0;
            }
            if (MP_OBJ_IS_TYPE(args[1], &mp_type_complex)) {
                mp_float_t real2, imag2;
                mp_obj_complex_get(args[1], &real2, &imag2);
                real -= imag2;
                imag += real2;
            } else {
                imag += mp_obj_get_float(args[1]);
            }
            return mp_obj_new_complex(real, imag);
        }

        default:
            nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "complex takes at most 2 arguments, %d given", n_args));
    }
}
Пример #3
0
STATIC mp_obj_t load_obj(mp_reader_t *reader) {
    byte obj_type = read_byte(reader);
    if (obj_type == 'e') {
        return MP_OBJ_FROM_PTR(&mp_const_ellipsis_obj);
    } else {
        size_t len = read_uint(reader);
        vstr_t vstr;
        vstr_init_len(&vstr, len);
        read_bytes(reader, (byte*)vstr.buf, len);
        if (obj_type == 's' || obj_type == 'b') {
            return mp_obj_new_str_from_vstr(obj_type == 's' ? &mp_type_str : &mp_type_bytes, &vstr);
        } else if (obj_type == 'i') {
            return mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL);
        } else {
            assert(obj_type == 'f' || obj_type == 'c');
            return mp_parse_num_decimal(vstr.buf, vstr.len, obj_type == 'c', false, NULL);
        }
    }
}
Пример #4
0
STATIC mp_obj_t complex_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
    (void)type_in;
    mp_arg_check_num(n_args, n_kw, 0, 2, false);

    switch (n_args) {
        case 0:
            return mp_obj_new_complex(0, 0);

        case 1:
            if (MP_OBJ_IS_STR(args[0])) {
                // a string, parse it
                size_t l;
                const char *s = mp_obj_str_get_data(args[0], &l);
                return mp_parse_num_decimal(s, l, true, true, NULL);
            } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
                // a complex, just return it
                return args[0];
            } else {
                // something else, try to cast it to a complex
                return mp_obj_new_complex(mp_obj_get_float(args[0]), 0);
            }

        case 2:
        default: {
            mp_float_t real, imag;
            if (MP_OBJ_IS_TYPE(args[0], &mp_type_complex)) {
                mp_obj_complex_get(args[0], &real, &imag);
            } else {
                real = mp_obj_get_float(args[0]);
                imag = 0;
            }
            if (MP_OBJ_IS_TYPE(args[1], &mp_type_complex)) {
                mp_float_t real2, imag2;
                mp_obj_complex_get(args[1], &real2, &imag2);
                real -= imag2;
                imag += real2;
            } else {
                imag += mp_obj_get_float(args[1]);
            }
            return mp_obj_new_complex(real, imag);
        }
    }
}
Пример #5
0
STATIC mp_obj_t float_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
    mp_arg_check_num(n_args, n_kw, 0, 1, false);

    switch (n_args) {
        case 0:
            return mp_obj_new_float(0);

        case 1:
        default:
            if (MP_OBJ_IS_STR(args[0])) {
                // a string, parse it
                uint l;
                const char *s = mp_obj_str_get_data(args[0], &l);
                return mp_parse_num_decimal(s, l, false, false);
            } else if (MP_OBJ_IS_TYPE(args[0], &mp_type_float)) {
                // a float, just return it
                return args[0];
            } else {
                // something else, try to cast it to a float
                return mp_obj_new_float(mp_obj_get_float(args[0]));
            }
    }
}
Пример #6
0
STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
    const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ);
    ujson_stream_t s = {stream_obj, stream_p->read, 0, 0};
    vstr_t vstr;
    vstr_init(&vstr, 8);
    mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
    stack.len = 0;
    stack.items = NULL;
    mp_obj_t stack_top = MP_OBJ_NULL;
    mp_obj_type_t *stack_top_type = NULL;
    mp_obj_t stack_key = MP_OBJ_NULL;
    S_NEXT(s);
    for (;;) {
        cont:
        if (S_END(s)) {
            break;
        }
        mp_obj_t next = MP_OBJ_NULL;
        bool enter = false;
        byte cur = S_CUR(s);
        S_NEXT(s);
        switch (cur) {
            case ',':
            case ':':
            case ' ':
            case '\t':
            case '\n':
            case '\r':
                goto cont;
            case 'n':
                if (S_CUR(s) == 'u' && S_NEXT(s) == 'l' && S_NEXT(s) == 'l') {
                    S_NEXT(s);
                    next = mp_const_none;
                } else {
                    goto fail;
                }
                break;
            case 'f':
                if (S_CUR(s) == 'a' && S_NEXT(s) == 'l' && S_NEXT(s) == 's' && S_NEXT(s) == 'e') {
                    S_NEXT(s);
                    next = mp_const_false;
                } else {
                    goto fail;
                }
                break;
            case 't':
                if (S_CUR(s) == 'r' && S_NEXT(s) == 'u' && S_NEXT(s) == 'e') {
                    S_NEXT(s);
                    next = mp_const_true;
                } else {
                    goto fail;
                }
                break;
            case '"':
                vstr_reset(&vstr);
                for (; !S_END(s) && S_CUR(s) != '"';) {
                    byte c = S_CUR(s);
                    if (c == '\\') {
                        c = S_NEXT(s);
                        switch (c) {
                            case 'b': c = 0x08; break;
                            case 'f': c = 0x0c; break;
                            case 'n': c = 0x0a; break;
                            case 'r': c = 0x0d; break;
                            case 't': c = 0x09; break;
                            case 'u': {
                                mp_uint_t num = 0;
                                for (int i = 0; i < 4; i++) {
                                    c = (S_NEXT(s) | 0x20) - '0';
                                    if (c > 9) {
                                        c -= ('a' - ('9' + 1));
                                    }
                                    num = (num << 4) | c;
                                }
                                vstr_add_char(&vstr, num);
                                goto str_cont;
                            }
                        }
                    }
                    vstr_add_byte(&vstr, c);
                str_cont:
                    S_NEXT(s);
                }
                if (S_END(s)) {
                    goto fail;
                }
                S_NEXT(s);
                next = mp_obj_new_str(vstr.buf, vstr.len, false);
                break;
            case '-':
            case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
                bool flt = false;
                vstr_reset(&vstr);
                for (;;) {
                    vstr_add_byte(&vstr, cur);
                    cur = S_CUR(s);
                    if (cur == '.' || cur == 'E' || cur == 'e') {
                        flt = true;
                    } else if (cur == '-' || unichar_isdigit(cur)) {
                        // pass
                    } else {
                        break;
                    }
                    S_NEXT(s);
                }
                if (flt) {
                    next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL);
                } else {
                    next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL);
                }
                break;
            }
            case '[':
                next = mp_obj_new_list(0, NULL);
                enter = true;
                break;
            case '{':
                next = mp_obj_new_dict(0);
                enter = true;
                break;
            case '}':
            case ']': {
                if (stack_top == MP_OBJ_NULL) {
                    // no object at all
                    goto fail;
                }
                if (stack.len == 0) {
                    // finished; compound object
                    goto success;
                }
                stack.len -= 1;
                stack_top = stack.items[stack.len];
                stack_top_type = mp_obj_get_type(stack_top);
                goto cont;
            }
            default:
                goto fail;
        }
        if (stack_top == MP_OBJ_NULL) {
            stack_top = next;
            stack_top_type = mp_obj_get_type(stack_top);
            if (!enter) {
                // finished; single primitive only
                goto success;
            }
        } else {
            // append to list or dict
            if (stack_top_type == &mp_type_list) {
                mp_obj_list_append(stack_top, next);
            } else {
                if (stack_key == MP_OBJ_NULL) {
                    stack_key = next;
                    if (enter) {
                        goto fail;
                    }
                } else {
                    mp_obj_dict_store(stack_top, stack_key, next);
                    stack_key = MP_OBJ_NULL;
                }
            }
            if (enter) {
                if (stack.items == NULL) {
                    mp_obj_list_init(&stack, 1);
                    stack.items[0] = stack_top;
                } else {
                    mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top);
                }
                stack_top = next;
                stack_top_type = mp_obj_get_type(stack_top);
            }
        }
    }
    success:
    // eat trailing whitespace
    while (unichar_isspace(S_CUR(s))) {
        S_NEXT(s);
    }
    if (!S_END(s)) {
        // unexpected chars
        goto fail;
    }
    if (stack_top == MP_OBJ_NULL || stack.len != 0) {
        // not exactly 1 object
        goto fail;
    }
    vstr_clear(&vstr);
    return stack_top;

    fail:
    nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "syntax error in JSON"));
}
Пример #7
0
// This function implements a simple non-recursive JSON parser.
//
// The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
// The parser here will parse any valid JSON and return the correct
// corresponding Python object.  It allows through a superset of JSON, since
// it treats commas and colons as "whitespace", and doesn't care if
// brackets/braces are correctly paired.  It will raise a ValueError if the
// input is outside it's specs.
//
// Most of the work is parsing the primitives (null, false, true, numbers,
// strings).  It does 1 pass over the input string and so is easily extended to
// being able to parse from a non-seekable stream.  It tries to be fast and
// small in code size, while not using more RAM than necessary.
STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
    mp_uint_t len;
    const char *s = mp_obj_str_get_data(obj, &len);
    const char *top = s + len;
    vstr_t vstr;
    vstr_init(&vstr, 8);
    mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
    stack.len = 0;
    stack.items = NULL;
    mp_obj_t stack_top = MP_OBJ_NULL;
    mp_obj_type_t *stack_top_type = NULL;
    mp_obj_t stack_key = MP_OBJ_NULL;
    for (;;) {
        cont:
        if (s == top) {
            break;
        }
        mp_obj_t next = MP_OBJ_NULL;
        bool enter = false;
        switch (*s) {
            case ',':
            case ':':
            case ' ':
            case '\t':
            case '\n':
            case '\r':
                s += 1;
                goto cont;
            case 'n':
                if (s + 3 < top && s[1] == 'u' && s[2] == 'l' && s[3] == 'l') {
                    s += 4;
                    next = mp_const_none;
                } else {
                    goto fail;
                }
                break;
            case 'f':
                if (s + 4 < top && s[1] == 'a' && s[2] == 'l' && s[3] == 's' && s[4] == 'e') {
                    s += 5;
                    next = mp_const_false;
                } else {
                    goto fail;
                }
                break;
            case 't':
                if (s + 3 < top && s[1] == 'r' && s[2] == 'u' && s[3] == 'e') {
                    s += 4;
                    next = mp_const_true;
                } else {
                    goto fail;
                }
                break;
            case '"':
                vstr_reset(&vstr);
                for (s++; s < top && *s != '"';) {
                    byte c = *s;
                    if (c == '\\') {
                        s++;
                        c = *s;
                        switch (c) {
                            case 'b': c = 0x08; break;
                            case 'f': c = 0x0c; break;
                            case 'n': c = 0x0a; break;
                            case 'r': c = 0x0d; break;
                            case 't': c = 0x09; break;
                            case 'u': {
                                if (s + 4 >= top) { goto fail; }
                                mp_uint_t num = 0;
                                for (int i = 0; i < 4; i++) {
                                    c = (*++s | 0x20) - '0';
                                    if (c > 9) {
                                        c -= ('a' - ('9' + 1));
                                    }
                                    num = (num << 4) | c;
                                }
                                vstr_add_char(&vstr, num);
                                goto str_cont;
                            }
                        }
                    }
                    vstr_add_byte(&vstr, c);
                str_cont:
                    s++;
                }
                if (s == top) {
                    goto fail;
                }
                s++;
                next = mp_obj_new_str(vstr.buf, vstr.len, false);
                break;
            case '-':
            case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
                bool flt = false;
                vstr_reset(&vstr);
                for (; s < top; s++) {
                    if (*s == '.' || *s == 'E' || *s == 'e') {
                        flt = true;
                    } else if (*s == '-' || unichar_isdigit(*s)) {
                        // pass
                    } else {
                        break;
                    }
                    vstr_add_byte(&vstr, *s);
                }
                if (flt) {
                    next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL);
                } else {
                    next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL);
                }
                break;
            }
            case '[':
                next = mp_obj_new_list(0, NULL);
                enter = true;
                s += 1;
                break;
            case '{':
                next = mp_obj_new_dict(0);
                enter = true;
                s += 1;
                break;
            case '}':
            case ']': {
                s += 1;
                if (stack_top == MP_OBJ_NULL) {
                    // no object at all
                    goto fail;
                }
                if (stack.len == 0) {
                    // finished; compound object
                    goto success;
                }
                stack.len -= 1;
                stack_top = stack.items[stack.len];
                stack_top_type = mp_obj_get_type(stack_top);
                goto cont;
            }
            default:
                goto fail;
        }
        if (stack_top == MP_OBJ_NULL) {
            stack_top = next;
            stack_top_type = mp_obj_get_type(stack_top);
            if (!enter) {
                // finished; single primitive only
                goto success;
            }
        } else {
            // append to list or dict
            if (stack_top_type == &mp_type_list) {
                mp_obj_list_append(stack_top, next);
            } else {
                if (stack_key == MP_OBJ_NULL) {
                    stack_key = next;
                    if (enter) {
                        goto fail;
                    }
                } else {
                    mp_obj_dict_store(stack_top, stack_key, next);
                    stack_key = MP_OBJ_NULL;
                }
            }
            if (enter) {
                if (stack.items == NULL) {
                    mp_obj_list_init(&stack, 1);
                    stack.items[0] = stack_top;
                } else {
                    mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top);
                }
                stack_top = next;
                stack_top_type = mp_obj_get_type(stack_top);
            }
        }
    }
    success:
    // eat trailing whitespace
    while (s < top && unichar_isspace(*s)) {
        s++;
    }
    if (s < top) {
        // unexpected chars
        goto fail;
    }
    if (stack_top == MP_OBJ_NULL || stack.len != 0) {
        // not exactly 1 object
        goto fail;
    }
    vstr_clear(&vstr);
    return stack_top;

    fail:
    nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "syntax error in JSON"));
}