Ejemplo n.º 1
0
STATIC bool is_whitespace(mp_lexer_t *lex) {
    return unichar_isspace(lex->chr0);
}
Ejemplo n.º 2
0
// This function implements a simple non-recursive JSON parser.
//
// The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
// The parser here will parse any valid JSON and return the correct
// corresponding Python object.  It allows through a superset of JSON, since
// it treats commas and colons as "whitespace", and doesn't care if
// brackets/braces are correctly paired.  It will raise a ValueError if the
// input is outside it's specs.
//
// Most of the work is parsing the primitives (null, false, true, numbers,
// strings).  It does 1 pass over the input string and so is easily extended to
// being able to parse from a non-seekable stream.  It tries to be fast and
// small in code size, while not using more RAM than necessary.
STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
    mp_uint_t len;
    const char *s = mp_obj_str_get_data(obj, &len);
    const char *top = s + len;
    vstr_t vstr;
    vstr_init(&vstr, 8);
    mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
    stack.len = 0;
    stack.items = NULL;
    mp_obj_t stack_top = MP_OBJ_NULL;
    mp_obj_type_t *stack_top_type = NULL;
    mp_obj_t stack_key = MP_OBJ_NULL;
    for (;;) {
        cont:
        if (s == top) {
            break;
        }
        mp_obj_t next = MP_OBJ_NULL;
        bool enter = false;
        switch (*s) {
            case ',':
            case ':':
            case ' ':
                s += 1;
                goto cont;
            case 'n':
                if (s + 3 < top && s[1] == 'u' && s[2] == 'l' && s[3] == 'l') {
                    s += 4;
                    next = mp_const_none;
                } else {
                    goto fail;
                }
                break;
            case 'f':
                if (s + 4 < top && s[1] == 'a' && s[2] == 'l' && s[3] == 's' && s[4] == 'e') {
                    s += 5;
                    next = mp_const_false;
                } else {
                    goto fail;
                }
                break;
            case 't':
                if (s + 3 < top && s[1] == 'r' && s[2] == 'u' && s[3] == 'e') {
                    s += 4;
                    next = mp_const_true;
                } else {
                    goto fail;
                }
                break;
            case '"':
                vstr_reset(&vstr);
                for (s++; s < top && *s != '"';) {
                    byte c = *s;
                    if (c == '\\') {
                        s++;
                        c = *s;
                        switch (c) {
                            case 'b': c = 0x08; break;
                            case 'f': c = 0x0c; break;
                            case 'n': c = 0x0a; break;
                            case 'r': c = 0x0d; break;
                            case 't': c = 0x09; break;
                            case 'u': {
                                if (s + 4 >= top) { goto fail; }
                                mp_uint_t num = 0;
                                for (int i = 0; i < 4; i++) {
                                    c = (*++s | 0x20) - '0';
                                    if (c > 9) {
                                        c -= ('a' - ('9' + 1));
                                    }
                                    num = (num << 4) | c;
                                }
                                vstr_add_char(&vstr, num);
                                goto str_cont;
                            }
                        }
                    }
                    vstr_add_byte(&vstr, c);
                str_cont:
                    s++;
                }
                if (s == top) {
                    goto fail;
                }
                s++;
                next = mp_obj_new_str(vstr.buf, vstr.len, false);
                break;
            case '-':
            case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
                bool flt = false;
                vstr_reset(&vstr);
                for (; s < top; s++) {
                    if (*s == '.' || *s == 'E' || *s == 'e') {
                        flt = true;
                    } else if (*s == '-' || unichar_isdigit(*s)) {
                        // pass
                    } else {
                        break;
                    }
                    vstr_add_byte(&vstr, *s);
                }
                if (flt) {
                    next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false);
                } else {
                    next = mp_parse_num_integer(vstr.buf, vstr.len, 10);
                }
                break;
            }
            case '[':
                next = mp_obj_new_list(0, NULL);
                enter = true;
                s += 1;
                break;
            case '{':
                next = mp_obj_new_dict(0);
                enter = true;
                s += 1;
                break;
            case '}':
            case ']': {
                s += 1;
                if (stack_top == MP_OBJ_NULL) {
                    // no object at all
                    goto fail;
                }
                if (stack.len == 0) {
                    // finished; compound object
                    goto success;
                }
                stack.len -= 1;
                stack_top = stack.items[stack.len];
                stack_top_type = mp_obj_get_type(stack_top);
                goto cont;
            }
            default:
                goto fail;
        }
        if (stack_top == MP_OBJ_NULL) {
            stack_top = next;
            stack_top_type = mp_obj_get_type(stack_top);
            if (!enter) {
                // finished; single primitive only
                goto success;
            }
        } else {
            // append to list or dict
            if (stack_top_type == &mp_type_list) {
                mp_obj_list_append(stack_top, next);
            } else {
                if (stack_key == MP_OBJ_NULL) {
                    stack_key = next;
                    if (enter) {
                        goto fail;
                    }
                } else {
                    mp_obj_dict_store(stack_top, stack_key, next);
                    stack_key = MP_OBJ_NULL;
                }
            }
            if (enter) {
                if (stack.items == NULL) {
                    mp_obj_list_init(&stack, 1);
                    stack.items[0] = stack_top;
                } else {
                    mp_obj_list_append(&stack, stack_top);
                }
                stack_top = next;
                stack_top_type = mp_obj_get_type(stack_top);
            }
        }
    }
    success:
    // eat trailing whitespace
    while (s < top && unichar_isspace(*s)) {
        s++;
    }
    if (s < top) {
        // unexpected chars
        goto fail;
    }
    if (stack.len != 0) {
        goto fail;
    }
    vstr_clear(&vstr);
    return stack_top;

    fail:
    nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "syntax error in JSON"));
}