static int lex_scan(lex_t *lex, json_error_t *error) { int c; strbuffer_clear(&lex->saved_text); if(lex->token == TOKEN_STRING) { jsonp_free(lex->value.string); lex->value.string = NULL; } c = lex_get(lex, error); while(c == ' ' || c == '\t' || c == '\n' || c == '\r') c = lex_get(lex, error); if(c == STREAM_STATE_EOF) { lex->token = TOKEN_EOF; goto out; } if(c == STREAM_STATE_ERROR) { lex->token = TOKEN_INVALID; goto out; } lex_save(lex, c); if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') lex->token = c; else if(c == '"') lex_scan_string(lex, error); else if(l_isdigit(c) || c == '-') { if(lex_scan_number(lex, c, error)) goto out; } else if(l_isalpha(c)) { /* eat up the whole identifier for clearer error messages */ const char *saved_text; c = lex_get_save(lex, error); while(l_isalpha(c)) c = lex_get_save(lex, error); lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); if(strcmp(saved_text, "true") == 0) lex->token = TOKEN_TRUE; else if(strcmp(saved_text, "false") == 0) lex->token = TOKEN_FALSE; else if(strcmp(saved_text, "null") == 0) lex->token = TOKEN_NULL; else lex->token = TOKEN_INVALID; } else { /* save the rest of the input UTF-8 sequence to get an error message of valid UTF-8 */ lex_save_cached(lex); lex->token = TOKEN_INVALID; } out: return lex->token; }
static int lex_scan_number(lex_t *lex, int c, json_error_t *error) { const char *saved_text; char *end; double value; lex->token = TOKEN_INVALID; if(c == '-') c = lex_get_save(lex, error); if(c == '0') { c = lex_get_save(lex, error); if(l_isdigit(c)) { lex_unget_unsave(lex, c); goto out; } } else if(l_isdigit(c)) { c = lex_get_save(lex, error); while(l_isdigit(c)) c = lex_get_save(lex, error); } else { lex_unget_unsave(lex, c); goto out; } if(c != '.' && c != 'E' && c != 'e') { json_int_t value; lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); errno = 0; value = json_strtoint(saved_text, &end, 10); if(errno == ERANGE) { if(value < 0) error_set(error, lex, "too big negative integer"); else error_set(error, lex, "too big integer"); goto out; } assert(end == saved_text + lex->saved_text.length); lex->token = TOKEN_INTEGER; lex->value.integer = value; return 0; } if(c == '.') { c = lex_get(lex, error); if(!l_isdigit(c)) { lex_unget(lex, c); goto out; } lex_save(lex, c); c = lex_get_save(lex, error); while(l_isdigit(c)) c = lex_get_save(lex, error); } if(c == 'E' || c == 'e') { c = lex_get_save(lex, error); if(c == '+' || c == '-') c = lex_get_save(lex, error); if(!l_isdigit(c)) { lex_unget_unsave(lex, c); goto out; } c = lex_get_save(lex, error); while(l_isdigit(c)) c = lex_get_save(lex, error); } lex_unget_unsave(lex, c); if(jsonp_strtod(&lex->saved_text, &value)) { error_set(error, lex, "real number overflow"); goto out; } lex->token = TOKEN_REAL; lex->value.real = value; return 0; out: return -1; }
static int lex_scan_number(lex_t *lex, char c, json_error_t *error) { const char *saved_text; char *end; double value; lex->token = TOKEN_INVALID; if(c == '-') c = lex_get_save(lex, error); if(c == '0') { c = lex_get_save(lex, error); if(isdigit(c)) { lex_unget_unsave(lex, c); goto out; } } else if(isdigit(c)) { c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } else { lex_unget_unsave(lex, c); goto out; } if(c != '.' && c != 'E' && c != 'e') { long value; lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); value = strtol(saved_text, &end, 10); assert(end == saved_text + lex->saved_text.length); if((value == LONG_MAX && errno == ERANGE) || value > INT_MAX) { error_set(error, lex, "too big integer"); goto out; } else if((value == LONG_MIN && errno == ERANGE) || value < INT_MIN) { error_set(error, lex, "too big negative integer"); goto out; } lex->token = TOKEN_INTEGER; lex->value.integer = (int)value; return 0; } if(c == '.') { c = lex_get(lex, error); if(!isdigit(c)) goto out; lex_save(lex, c); c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } if(c == 'E' || c == 'e') { c = lex_get_save(lex, error); if(c == '+' || c == '-') c = lex_get_save(lex, error); if(!isdigit(c)) { lex_unget_unsave(lex, c); goto out; } c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); value = strtod(saved_text, &end); assert(end == saved_text + lex->saved_text.length); if(value == 0 && errno == ERANGE) { error_set(error, lex, "real number underflow"); goto out; } /* Cannot test for +/-HUGE_VAL because the HUGE_VAL constant is only defined in C99 mode. So let's trust in sole errno. */ else if(errno == ERANGE) { error_set(error, lex, "real number overflow"); goto out; } lex->token = TOKEN_REAL; lex->value.real = value; return 0; out: return -1; }
static void lex_scan_string(lex_t *lex, json_error_t *error) { int c; const char *p; char *t; int i; lex->value.string = NULL; lex->token = TOKEN_INVALID; c = lex_get_save(lex, error); while(c != '"') { if(c == STREAM_STATE_ERROR) goto out; else if(c == STREAM_STATE_EOF) { error_set(error, lex, "premature end of input"); goto out; } else if(0 <= c && c <= 0x1F) { /* control character */ lex_unget_unsave(lex, c); if(c == '\n') error_set(error, lex, "unexpected newline", c); else error_set(error, lex, "control character 0x%x", c); goto out; } else if(c == '\\') { c = lex_get_save(lex, error); if(c == 'u') { c = lex_get_save(lex, error); for(i = 0; i < 4; i++) { if(!l_isxdigit(c)) { error_set(error, lex, "invalid escape"); goto out; } c = lex_get_save(lex, error); } } else if(c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' || c == 'n' || c == 'r' || c == 't') c = lex_get_save(lex, error); else { error_set(error, lex, "invalid escape"); goto out; } } else c = lex_get_save(lex, error); } /* the actual value is at most of the same length as the source string, because: - shortcut escapes (e.g. "\t") (length 2) are converted to 1 byte - a single \uXXXX escape (length 6) is converted to at most 3 bytes - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair are converted to 4 bytes */ lex->value.string = jsonp_malloc(lex->saved_text.length + 1); if(!lex->value.string) { /* this is not very nice, since TOKEN_INVALID is returned */ goto out; } /* the target */ t = lex->value.string; /* + 1 to skip the " */ p = strbuffer_value(&lex->saved_text) + 1; while(*p != '"') { if(*p == '\\') { p++; if(*p == 'u') { char buffer[4]; int length; int32_t value; value = decode_unicode_escape(p); p += 5; if(0xD800 <= value && value <= 0xDBFF) { /* surrogate pair */ if(*p == '\\' && *(p + 1) == 'u') { int32_t value2 = decode_unicode_escape(++p); p += 5; if(0xDC00 <= value2 && value2 <= 0xDFFF) { /* valid second surrogate */ value = ((value - 0xD800) << 10) + (value2 - 0xDC00) + 0x10000; } else { /* invalid second surrogate */ error_set(error, lex, "invalid Unicode '\\u%04X\\u%04X'", value, value2); goto out; } } else { /* no second surrogate */ error_set(error, lex, "invalid Unicode '\\u%04X'", value); goto out; } } else if(0xDC00 <= value && value <= 0xDFFF) { error_set(error, lex, "invalid Unicode '\\u%04X'", value); goto out; } else if(value == 0) { error_set(error, lex, "\\u0000 is not allowed"); goto out; } if(utf8_encode(value, buffer, &length)) assert(0); memcpy(t, buffer, length); t += length; } else { switch(*p) { case '"': case '\\': case '/': *t = *p; break; case 'b': *t = '\b'; break; case 'f': *t = '\f'; break; case 'n': *t = '\n'; break; case 'r': *t = '\r'; break; case 't': *t = '\t'; break; default: assert(0); } t++; p++; } } else *(t++) = *(p++); } *t = '\0'; lex->token = TOKEN_STRING; return; out: jsonp_free(lex->value.string); }
static int lex_scan_number(lex_t *lex, char c, json_error_t *error) { const char *saved_text; char *end; double value; lex->token = TOKEN_INVALID; if(c == '-') c = lex_get_save(lex, error); if(c == '0') { c = lex_get_save(lex, error); if(isdigit(c)) { lex_unget_unsave(lex, c); goto out; } } else if(isdigit(c)) { c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } else { lex_unget_unsave(lex, c); goto out; } if(c != '.' && c != 'E' && c != 'e') { long value; lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); value = strtol(saved_text, &end, 10); assert(end == saved_text + lex->saved_text.length); if((value == LONG_MAX && errno == ERANGE) || value > INT_MAX) { unsigned long uvalue = strtoul(saved_text, &end, 10); if( !(uvalue>value) ) { error_set(error, lex, "too big integer"); goto out; } else { value = uvalue; } } else if((value == LONG_MIN && errno == ERANGE) || value < INT_MIN) { error_set(error, lex, "too big negative integer"); goto out; } lex->token = TOKEN_INTEGER; lex->value.integer = (int)value; return 0; } if(c == '.') { c = lex_get(lex, error); if(!isdigit(c)) goto out; lex_save(lex, c); c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } if(c == 'E' || c == 'e') { c = lex_get_save(lex, error); if(c == '+' || c == '-') c = lex_get_save(lex, error); if(!isdigit(c)) { lex_unget_unsave(lex, c); goto out; } c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); value = strtod(saved_text, &end); assert(end == saved_text + lex->saved_text.length); if(errno == ERANGE && value != 0) { error_set(error, lex, "real number overflow"); goto out; } lex->token = TOKEN_REAL; lex->value.real = value; return 0; out: return -1; }