static int json_skip_all_skippables(struct json_parse_state_s *state) { // skip all whitespace first int result = json_skip_whitespace(state); if (result) { state->error = json_parse_error_premature_end_of_buffer; return result; } // are we allowed to parse c style comments? if (json_parse_flags_allow_c_style_comments & state->flags_bitset) { // then skip all c style comments result = json_skip_c_style_comments(state); if (result) { state->error = json_parse_error_premature_end_of_buffer; return result; } // and skip any whitespace that happened after the comment. We don't check // this skip whitespace because it could be the case that the c-style // comment was at the end of the file which is totally ok! json_skip_whitespace(state); } return 0; }
static int json_get_array_size(struct json_parse_state_s* state) { size_t elements = 0; if ('[' != state->src[state->offset]) { // expected array to begin with leading '[' return 1; } // skip leading '[' state->offset++; state->dom_size += sizeof(struct json_array_s); while (state->offset < state->size) { if (json_skip_whitespace(state)) { // reached end of buffer before array was complete! return 1; } if (']' == state->src[state->offset]) { // skip trailing ']' state->offset++; // finished the object! break; } // if we parsed at least once element previously, grok for a comma if (0 < elements) { if (',' != state->src[state->offset]) { // expected a comma where there was none! return 1; } // skip comma state->offset++; if (json_skip_whitespace(state)) { // reached end of buffer before array was complete! return 1; } } if (json_get_value_size(state)) { // value parsing failed! return 1; } // successfully parsed an array element! elements++; } state->dom_size += sizeof(struct json_value_s) * elements; state->dom_size += sizeof(struct json_array_element_s) * elements; return 0; }
static muse_cell json_read_object_expr_items( muse_env *env, muse_port_t p, muse_cell h, muse_cell t, int sp ) { json_skip_whitespace(p); if ( port_eof(p) ) return muse_raise_error( env, _csymbol(L"json:end-of-file-in-object"), MUSE_NIL ); else { muse_char c = port_getchar(p); if ( c == '}' ) { return h; } else { muse_cell key, value; port_ungetchar(c,p); key = json_read_key(p); json_skip_whitespace(p); if ( port_eof(p) ) { return muse_raise_error( env, _csymbol(L"json:end-of-file-in-object"), MUSE_NIL ); } else { muse_char c = port_getchar(p); if ( c == ':' ) { muse_cell assoc; value = json_read_expr(p); if ( json_is_constant(env, value) ) { assoc = _cons( muse_quote( env, _cons( key, value ) ), MUSE_NIL ); } else { assoc = _cons( _cons( _mk_nativefn(fn_cons,NULL), _cons( muse_quote(env,key), _cons( value, MUSE_NIL ) ) ), MUSE_NIL ); } _sett( t, assoc ); t = assoc; _unwind(sp); json_skip_whitespace(p); { muse_char c = port_getchar(p); if ( c == ',' ) { return json_read_object_expr_items( env, p, h, t, sp ); } else if ( c == '}' ) { return h; } else { return muse_raise_error( env, _csymbol(L"json:object-syntax-error"), h ); } } } else { return muse_raise_error( env, _csymbol(L"json:object-syntax-error"), h ); } } } } }
static muse_cell json_read( muse_port_t p ) { json_skip_whitespace(p); if ( !port_eof(p) ) { muse_char c = port_getchar(p); port_ungetchar(c,p); switch ( c ) { case '"': return json_read_string(p); case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_read_number(p); case '[': return json_read_array(p); case '{': return json_read_object(p); default: if ( c >= 'a' && c <= 'z' ) return json_read_keyword(p); else { return muse_raise_error( p->env, muse_csymbol( p->env, L"json:syntax-error" ), MUSE_NIL ); } } } else { return muse_raise_error( p->env, muse_csymbol( p->env, L"json:unexpected-end-of-stream" ), MUSE_NIL ); } }
static muse_cell json_read_object( muse_port_t p ) { muse_env *env = p->env; muse_debug_only(muse_char c =) port_getchar(p); assert( c == '{' ); json_skip_whitespace(p); return json_read_object_items( env, p, muse_mk_hashtable( env, 8 ) ); }
static muse_cell json_read_array( muse_port_t p ) { muse_env *env = p->env; muse_debug_only(muse_char c =) port_getchar(p); assert( c == '[' ); json_skip_whitespace(p); return json_read_array_items( env, p, MUSE_NIL, MUSE_NIL, 0 ); }
static int json_get_value_size(struct json_parse_state_s* state) { if (json_skip_whitespace(state)) { // consumed the whole buffer when we expected a value! return 1; } state->dom_size += sizeof(struct json_value_s); switch (state->src[state->offset]) { case '"': return json_get_string_size(state); case '{': return json_get_object_size(state); case '[': return json_get_array_size(state); case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_get_number_size(state); default: if ((state->offset + 4) < state->size && 't' == state->src[state->offset + 0] && 'r' == state->src[state->offset + 1] && 'u' == state->src[state->offset + 2] && 'e' == state->src[state->offset + 3]) { state->offset += 4; return 0; } else if ((state->offset + 5) < state->size && 'f' == state->src[state->offset + 0] && 'a' == state->src[state->offset + 1] && 'l' == state->src[state->offset + 2] && 's' == state->src[state->offset + 3] && 'e' == state->src[state->offset + 4]) { state->offset += 5; return 0; } else if ((state->offset + 4) < state->size && 'n' == state->src[state->offset + 0] && 'u' == state->src[state->offset + 1] && 'l' == state->src[state->offset + 2] && 'l' == state->src[state->offset + 3]) { state->offset += 4; return 0; } // invalid value! return 1; } }
static muse_cell json_read_object_items( muse_env *env, muse_port_t p, muse_cell table ) { json_skip_whitespace(p); if ( port_eof(p) ) return muse_raise_error( env, _csymbol(L"json:end-of-file-in-object"), MUSE_NIL ); else { muse_char c = port_getchar(p); if ( c == '}' ) { return table; } else { int sp = _spos(); muse_cell key, value; port_ungetchar(c,p); key = json_read_key(p); json_skip_whitespace(p); if ( port_eof(p) ) { return muse_raise_error( env, _csymbol(L"json:end-of-file-in-object"), MUSE_NIL ); } else { muse_char c = port_getchar(p); if ( c == ':' ) { value = json_read(p); muse_hashtable_put( env, table, key, value ); _unwind(sp); { muse_char c = port_getchar(p); if ( c == ',' ) { return json_read_object_items( env, p, table ); } else if ( c == '}' ) { return table; } else { return muse_raise_error( env, _csymbol(L"json:object-syntax-error"), table ); } } } else { return muse_raise_error( env, _csymbol(L"json:object-syntax-error"), table ); } } } } }
static muse_cell json_read_array_expr( muse_port_t p ) { muse_env *env = p->env; muse_debug_only(muse_char c =) port_getchar(p); assert( c == '[' ); json_skip_whitespace(p); return json_share_array_expr( env, _cons( _mk_nativefn(fn_vector_from_args,NULL), json_read_array_expr_items( env, p, MUSE_NIL, MUSE_NIL, 0 ) ) ); }
static void json_skip_whitespace( muse_port_t p ) { if ( port_eof(p) ) return; else { muse_char c = port_getchar(p); if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' ) json_skip_whitespace(p); else port_ungetchar( c, p ); } }
static muse_cell json_read_object_expr( muse_port_t p ) { muse_env *env = p->env; muse_debug_only(muse_char c =) port_getchar(p); assert( c == '{' ); json_skip_whitespace(p); { muse_cell h = _cons( MUSE_NIL, MUSE_NIL ); int sp = _spos(); muse_cell t = _cons( _mk_nativefn(fn_list,NULL), MUSE_NIL ); _setht( h, _mk_nativefn(fn_alist_to_hashtable,NULL), _cons( t, MUSE_NIL ) ); _unwind(sp); return json_share_object_expr( env, json_read_object_expr_items( env, p, h, t, sp ) ); } }
static muse_cell json_read_array_items( muse_env *env, muse_port_t p, muse_cell h, muse_cell t, int N ) { int i; if ( port_eof(p) ) { return muse_raise_error( env, _csymbol(L"json:end-of-file-in-array"), MUSE_NIL ); } else { muse_char c = port_getchar(p); if ( c == ']' ) { muse_cell v = muse_mk_vector( env, N ); for ( i = 0; i < N; ++i ) { muse_vector_put( env, v, i, _next(&h) ); } return v; } else { port_ungetchar( c, p ); } } if ( h ) { int sp = _spos(); muse_cell n = _cons( json_read(p), MUSE_NIL ); _sett( t, n ); t = n; _unwind(sp); } else { h = t = _cons( json_read(p), MUSE_NIL ); } json_skip_whitespace(p); if ( port_eof(p) ) { return muse_raise_error( env, _csymbol(L"json:end-of-file-in-array"), h ); } else { muse_char c = port_getchar(p); if ( c == ',' ) { return json_read_array_items( env, p, h, t, N+1 ); } else if ( c == ']' ) { port_ungetc( c, p ); return json_read_array_items( env, p, h, t, N+1 ); } else { return muse_raise_error( env, _csymbol(L"json:array-syntax-error"), h ); } } }
static muse_cell json_read_expr( muse_port_t p ) { json_skip_whitespace(p); if ( !port_eof(p) ) { muse_char c = port_getchar(p); port_ungetchar(c,p); switch ( c ) { case '"': return json_read_string(p); case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return json_read_number(p); case '[': return json_read_array_expr(p); case '{': return json_read_object_expr(p); case '(': return muse_pread(p); default: { muse_cell e = muse_pread(p); if ( _cellt(e) == MUSE_SYMBOL_CELL ) { const muse_char *name = muse_symbol_name(p->env,e); if ( wcscmp( name, L"null" ) == 0 ) return MUSE_NIL; else if ( wcscmp( name, L"true" ) == 0 || wcscmp( name, L"false" ) == 0 ) return muse_quote( p->env, e ); else return e; } else return e; } } } else { return muse_raise_error( p->env, muse_csymbol( p->env, L"json:unexpected-end-of-stream" ), MUSE_NIL ); } }
static muse_cell json_read_array_expr_items( muse_env *env, muse_port_t p, muse_cell h, muse_cell t, int N ) { if ( port_eof(p) ) { return muse_raise_error( env, _csymbol(L"json:end-of-file-in-array"), MUSE_NIL ); } else { muse_char c = port_getchar(p); if ( c == ']' ) { return h; } else { port_ungetchar( c, p ); } } if ( h ) { int sp = _spos(); muse_cell n = _cons( json_read_expr(p), MUSE_NIL ); _sett( t, n ); t = n; _unwind(sp); } else { h = t = _cons( json_read_expr(p), MUSE_NIL ); } json_skip_whitespace(p); if ( port_eof(p) ) { return muse_raise_error( env, _csymbol(L"json:end-of-file-in-array"), h ); } else { muse_char c = port_getchar(p); if ( c == ',' ) { return json_read_array_expr_items( env, p, h, t, N+1 ); } else if ( c == ']' ) { port_ungetchar( c, p ); return json_read_array_expr_items( env, p, h, t, N+1 ); } else { return muse_raise_error( env, _csymbol(L"json:array-syntax-error"), h ); } } }
static int json_parse_value(struct json_parse_state_s* state, struct json_value_s* value) { if (json_skip_whitespace(state)) { // consumed the whole buffer when we expected a value! return 1; } switch (state->src[state->offset]) { case '"': value->type = json_type_string; value->payload = state->dom; state->dom += sizeof(struct json_string_s); return json_parse_string(state, value->payload); case '{': value->type = json_type_object; value->payload = state->dom; state->dom += sizeof(struct json_object_s); return json_parse_object(state, value->payload); case '[': value->type = json_type_array; value->payload = state->dom; state->dom += sizeof(struct json_array_s); return json_parse_array(state, value->payload); case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': value->type = json_type_number; value->payload = state->dom; state->dom += sizeof(struct json_number_s); return json_parse_number(state, value->payload); default: if ((state->offset + 4) < state->size && 't' == state->src[state->offset + 0] && 'r' == state->src[state->offset + 1] && 'u' == state->src[state->offset + 2] && 'e' == state->src[state->offset + 3]) { value->type = json_type_true; value->payload = 0; state->offset += 4; return 0; } else if ((state->offset + 5) < state->size && 'f' == state->src[state->offset + 0] && 'a' == state->src[state->offset + 1] && 'l' == state->src[state->offset + 2] && 's' == state->src[state->offset + 3] && 'e' == state->src[state->offset + 4]) { value->type = json_type_false; value->payload = 0; state->offset += 5; return 0; } else if ((state->offset + 4) < state->size && 'n' == state->src[state->offset + 0] && 'u' == state->src[state->offset + 1] && 'l' == state->src[state->offset + 2] && 'l' == state->src[state->offset + 3]) { value->type = json_type_null; value->payload = 0; state->offset += 4; return 0; } // invalid value! return 1; } }
static int json_parse_array(struct json_parse_state_s* state, struct json_array_s* array) { size_t elements = 0; struct json_array_element_s* previous = 0; if ('[' != state->src[state->offset]) { // expected object to begin with leading '[' return 1; } // skip leading '[' state->offset++; if (json_skip_whitespace(state)) { // consumed the whole buffer when we expected a value! return 1; } // reset elements elements = 0; while (state->offset < state->size) { struct json_array_element_s* element = 0; struct json_value_s* value = 0; if (json_skip_whitespace(state)) { // reached end of buffer before array was complete! return 1; } if (']' == state->src[state->offset]) { // skip trailing ']' state->offset++; // finished the array! break; } // if we parsed at least one element previously, grok for a comma if (0 < elements) { if (',' != state->src[state->offset]) { // expected a comma where there was none! return 1; } // skip comma state->offset++; if (json_skip_whitespace(state)) { // reached end of buffer before array was complete! return 1; } } element = (struct json_array_element_s* )state->dom; state->dom += sizeof(struct json_array_element_s); if (0 == previous) { // this is our first element, so record it in our array array->start = element; } else { previous->next = element; } previous = element; value = (struct json_value_s* )state->dom; state->dom += sizeof(struct json_value_s); element->value = value; if (json_parse_value(state, value)) { // value parsing failed! return 1; } // successfully parsed an array element! elements++; } // end the linked list if (previous) { previous->next = 0; } if (0 == elements) { array->start = 0; } array->length = elements; return 0; }
static int json_parse_object(struct json_parse_state_s* state, struct json_object_s* object) { size_t elements = 0; struct json_object_element_s* previous = 0; if ('{' != state->src[state->offset]) { // expected object to begin with leading '{' return 1; } // skip leading '{' state->offset++; if (json_skip_whitespace(state)) { // consumed the whole buffer when we expected a value! return 1; } if ('}' != state->src[state->offset]) { // we have at least one element as we definitely don't have // an empty object { }! elements++; } // reset elements elements = 0; while (state->offset < state->size) { struct json_object_element_s* element = 0; struct json_string_s* string = 0; struct json_value_s* value = 0; if (json_skip_whitespace(state)) { // reached end of buffer before object was complete! return 1; } if ('}' == state->src[state->offset]) { // skip trailing '}' state->offset++; // finished the object! break; } // if we parsed at least one element previously, grok for a comma if (0 < elements) { if (',' != state->src[state->offset]) { // expected a comma where there was none! return 1; } // skip comma state->offset++; if (json_skip_whitespace(state)) { // reached end of buffer before object was complete! return 1; } } element = (struct json_object_element_s* )state->dom; state->dom += sizeof(struct json_object_element_s); if (0 == previous) { // this is our first element, so record it in our object object->start = element; } else { previous->next = element; } previous = element; string = (struct json_string_s* )state->dom; state->dom += sizeof(struct json_string_s); element->name = string; if (json_parse_string(state, string)) { // string parsing failed! return 1; } if (json_skip_whitespace(state)) { // reached end of buffer before object was complete! return 1; } if (':' != state->src[state->offset]) { // colon seperating name/value pair was missing! return 1; } // skip colon state->offset++; if (json_skip_whitespace(state)) { // reached end of buffer before object was complete! return 1; } value = (struct json_value_s* )state->dom; state->dom += sizeof(struct json_value_s); element->value = value; if (json_parse_value(state, value)) { // value parsing failed! return 1; } // successfully parsed a name/value pair! elements++; } // if we had at least one element, end the linked list if (previous) { previous->next = 0; } if (0 == elements) { object->start = 0; } object->length = elements; return 0; }
static int json_get_object_size(struct json_parse_state_s* state) { size_t elements = 0; if ('{' != state->src[state->offset]) { // expected object to begin with leading '{' return 1; } // skip leading '{' state->offset++; state->dom_size += sizeof(struct json_object_s); while (state->offset < state->size) { if (json_skip_whitespace(state)) { // reached end of buffer before object was complete! return 1; } if ('}' == state->src[state->offset]) { // skip trailing '}' state->offset++; // finished the object! break; } // if we parsed at least once element previously, grok for a comma if (0 < elements) { if (',' != state->src[state->offset]) { // expected a comma where there was none! return 1; } // skip comma state->offset++; if (json_skip_whitespace(state)) { // reached end of buffer before object was complete! return 1; } } if (json_get_string_size(state)) { // string parsing failed! return 1; } if (json_skip_whitespace(state)) { // reached end of buffer before object was complete! return 1; } if (':' != state->src[state->offset]) { // colon seperating name/value pair was missing! return 1; } // skip colon state->offset++; if (json_skip_whitespace(state)) { // reached end of buffer before object was complete! return 1; } if (json_get_value_size(state)) { // value parsing failed! return 1; } // successfully parsed a name/value pair! elements++; } state->dom_size += sizeof(struct json_string_s) * elements; state->dom_size += sizeof(struct json_value_s) * elements; state->dom_size += sizeof(struct json_object_element_s) * elements; return 0; }
/** @brief Parse an array. @param text The text we're parsing. @param arr The token buffer. @param maxtoken The length of the token buffer. @param p The parser state. @returns Parser state after parsing the array. */ static struct json_parser json_parse_array(wchar_t *text, struct json_token *arr, size_t maxtoken, struct json_parser p) { size_t array_tokenidx = p.tokenidx, prev_tokenidx, curr_tokenidx, length=0; struct json_token tok = { .type = JSON_ARRAY, .start = p.textidx, .length = 0, .end = 0, .child = 0, .next = 0, }; json_settoken(arr, tok, p, maxtoken); // current char is [, so we need to go past it. p.textidx++; p.tokenidx++; // Skip through whitespace. p = json_skip_whitespace(text, p); while (text[p.textidx] != L']') { if (text[p.textidx] == L'\0') { p.error = JSONERR_PREMATURE_EOF; return p; } // Parse a value. prev_tokenidx = curr_tokenidx; curr_tokenidx = p.tokenidx; p = json_parse_rec(text, arr, maxtoken, p); if (p.error != JSONERR_NO_ERROR) { return p; } // Now set some bookkeeping of previous values. if (tok.child == 0) { // If this is the first element of the list, set the list's child to point // to it. tok.child = curr_tokenidx; json_setchild(arr, array_tokenidx, curr_tokenidx, maxtoken); } else { // Otherwise set the previous element's next pointer to point to it. json_setnext(arr, prev_tokenidx, curr_tokenidx, maxtoken); } length++; // Skip whitespace. p = json_skip_whitespace(text, p); if (text[p.textidx] == L',') { p.textidx++; p = json_skip_whitespace(text, p); } else if (text[p.textidx] != L']') { // If there was no comma, this better be the end of the object. p.error = JSONERR_EXPECTED_TOKEN; p.errorarg = L','; return p; } } // Set the end of the array token to point to the closing bracket, then move // it up. json_setend(arr, array_tokenidx, p.textidx, maxtoken); json_setlength(arr, array_tokenidx, length, maxtoken); p.textidx++; return p; } /** @brief Parse an object. @param text The text we're parsing. @param arr The token buffer. @param maxtoken The length of the token buffer. @param p The parser state. @returns Parser state after parsing the object. */ static struct json_parser json_parse_object(wchar_t *text, struct json_token *arr, size_t maxtoken, struct json_parser p) { size_t object_tokenidx = p.tokenidx, prev_keyidx, curr_keyidx, length=0; struct json_token tok = { .type = JSON_OBJECT, .start = p.textidx, .length = 0, .end = 0, .child = 0, .next = 0, }; json_settoken(arr, tok, p, maxtoken); // current char is {, so we need to go past it. p.textidx++; p.tokenidx++; // Skip through whitespace. p = json_skip_whitespace(text, p); while (text[p.textidx] != L'}') { // Make sure the string didn't end. if (text[p.textidx] == L'\0') { p.error = JSONERR_PREMATURE_EOF; return p; } // Parse a string (key) and value. prev_keyidx = curr_keyidx; curr_keyidx = p.tokenidx; p = json_parse_string(text, arr, maxtoken, p); if (p.error != JSONERR_NO_ERROR) { return p; } p = json_skip_whitespace(text, p); if (text[p.textidx] != L':') { p.error = JSONERR_EXPECTED_TOKEN; p.errorarg = L':'; return p; } p.textidx++; p = json_parse_rec(text, arr, maxtoken, p); if (p.error != JSONERR_NO_ERROR) { return p; } // Now set some bookkeeping of previous values. if (tok.child == 0) { // If this is the first element of the list, set the list's child to point // to it. tok.child = curr_keyidx; json_setchild(arr, object_tokenidx, curr_keyidx, maxtoken); } else { // Otherwise set the previous element's next pointer to point to it. json_setnext(arr, prev_keyidx, curr_keyidx, maxtoken); } // Set the key's child pointer to point at its value. Just cause we can. json_setchild(arr, curr_keyidx, curr_keyidx + 1, maxtoken); length++; // Skip whitespace. p = json_skip_whitespace(text, p); if (text[p.textidx] == L',') { p.textidx++; p = json_skip_whitespace(text, p); } else if (text[p.textidx] != L'}') { // If there was no comma, this better be the end of the object. p.error = JSONERR_EXPECTED_TOKEN; p.errorarg = L','; return p; } } // Set the end of the array token to point to the closing bracket, then move // it up. json_setend(arr, object_tokenidx, p.textidx, maxtoken); json_setlength(arr, object_tokenidx, length, maxtoken); p.textidx++; return p; } char *parse_number_state[] = { "START", "MINUS", "ZERO", "DIGIT", "DECIMAL", "DECIMAL_ACCEPT", "EXPONENT", "EXPONENT_DIGIT", "EXPONENT_DIGIT_ACCEPT", "END" }; /** @brief Parse a string number. @param text The text we're parsing. @param arr The token buffer. @param maxtoken The length of the token buffer. @param p The parser state. @returns Parser state after parsing the number. */ static struct json_parser json_parse_number(wchar_t *text, struct json_token *arr, size_t maxtoken, struct json_parser p) { struct json_token tok = { .type = JSON_NUMBER, .start = p.textidx, .length = 0, // not used .end = 0, .child = 0, .next = 0 }; enum state { START, MINUS, ZERO, DIGIT, DECIMAL, DECIMAL_ACCEPT, EXPONENT, EXPONENT_DIGIT, EXPONENT_DIGIT_ACCEPT, END } state = START; /* This function is completely described by this FSM. States marked by asterisk are accepting. Unexpected input at accepting states ends the number, and unexpected input at rejecting states causes an error. This state machine is designed to accept any input given by the diagram in the ECMA JSON spec. -----START----- / | (-) \ / v \ (0) | +----MINUS----+ | (1-9) v v (0) (1-9) v v *ZERO* *DIGIT*-------- | \ (.) (.) / |-\ (0-9) \ | --->DECIMAL<--- \ | | \ | v (0-9) /----\ (0-9) | | *DECIMAL_ACCEPT* ----/ | | | / |(e,E) v (e,E) (e,E) / +-----> EXPONENT <------------- / \ (+,-)v v (0-9) EXPONENT_DIGIT *EXPONENT_DIGIT_ACCEPT* \-----------/ \ /(0-9) (0-9) \--/ */ //printf("input: %s\n", text + p.textidx); while (state != END) { wchar_t c = text[p.textidx]; //printf("state: %s\n", parse_number_state[state]); switch (state) { case START: if (c == L'0') { state = ZERO; } else if (c == L'-') { state = MINUS; } else if (L'1' <= c && c <= L'9') { state = DIGIT; } else { p.error = JSONERR_INVALID_NUMBER; state = END; // ERROR } break; case MINUS: if (c == L'0') { state = ZERO; } else if (L'1' <= c && c <= L'9') { state = DIGIT; } else { p.error = JSONERR_INVALID_NUMBER; state = END; // ERROR } break; case ZERO: if (c == L'.') { state = DECIMAL; } else if (c == L'e' || c == L'E') { state = EXPONENT; } else { state = END; } break; case DIGIT: if (c == L'.') { state = DECIMAL; } else if (c == L'e' || c == L'E') { state = EXPONENT; } else if (L'0' <= c && c <= L'9') { state = DIGIT; } else { state = END; } break; case DECIMAL: if (L'0' <= c && c <= L'9') { state = DECIMAL_ACCEPT; } else { p.error = JSONERR_INVALID_NUMBER; state = END; // ERROR } break; case DECIMAL_ACCEPT: if (L'0' <= c && c <= L'9') { state = DECIMAL_ACCEPT; } else if (c == L'e' || c == L'E') { state = EXPONENT; } else { state = END; } break; case EXPONENT: if (c == L'+' || c == L'-') { state = EXPONENT_DIGIT; } else if (L'0' <= c && c <= L'9') { state = EXPONENT_DIGIT_ACCEPT; } else { p.error = JSONERR_INVALID_NUMBER; state = END; // ERROR } break; case EXPONENT_DIGIT: if (L'0' <= c && c <= L'9') { state = EXPONENT_DIGIT_ACCEPT; } else { p.error = JSONERR_INVALID_NUMBER; state = END; // ERROR } break; case EXPONENT_DIGIT_ACCEPT: if (L'0' <= c && c <= L'9') { state = EXPONENT_DIGIT_ACCEPT; } else { state = END; } break; case END: // never happens assert(false); } p.textidx++; } p.textidx--; // the character we failed on tok.end = p.textidx - 1; // the previous character json_settoken(arr, tok, p, maxtoken); p.tokenidx++; return p; } /** @brief Parse any JSON value. @param text The text we're parsing. @param arr The token buffer. @param maxtoken The length of the token buffer. @param p The parser state. @returns Parser state after parsing the value. */ static struct json_parser json_parse_rec(wchar_t *text, struct json_token *arr, size_t maxtoken, struct json_parser p) { p = json_skip_whitespace(text, p); if (text[p.textidx] == '\0') { p.error = JSONERR_PREMATURE_EOF; return p; } switch (text[p.textidx]) { case L'{': return json_parse_object(text, arr, maxtoken, p); case L'[': return json_parse_array(text, arr, maxtoken, p); case L'"': return json_parse_string(text, arr, maxtoken, p); case L't': return json_parse_true(text, arr, maxtoken, p); case L'f': return json_parse_false(text, arr, maxtoken, p); case L'n': return json_parse_null(text, arr, maxtoken, p); default: if (json_isnumber(text[p.textidx])) { return json_parse_number(text, arr, maxtoken, p); } else { p.error = JSONERR_UNEXPECTED_TOKEN; return p; } } } char *json_type_str[] = { "object", "array", "number", "string", "true", "false", "null" }; char *json_error_str[] = { "no error", "encountered an invalid numeric literal", "string ended prematurely", "unexpected token", "invalid surrogate pair", "expected token '%c'", }; struct json_parser json_parse(wchar_t *text, struct json_token *arr, size_t maxtoken) { struct json_parser parser = { .textidx = 0, .tokenidx = 0, .error = JSONERR_NO_ERROR, .errorarg = 0 }; return json_parse_rec(text, arr, maxtoken, parser); } void json_print(struct json_token *arr, size_t n) { size_t i; for (i = 0; i < n; i++) { printf("%03lu: %6s\t%04lu-%04lu,\tlength=%lu,\tchild=%lu,\tnext=%lu\n", i, json_type_str[arr[i].type], arr[i].start, arr[i].end, arr[i].length, arr[i].child, arr[i].next); } } void json_print_error(FILE *f, struct json_parser p) { fprintf(f, "at character %lu: ", p.textidx); fprintf(f, json_error_str[p.error], p.errorarg); fprintf(f, "\n"); }