ConversionResultFlags parseJSONNumber(raw_buffer *str, int64_t *integerPortion, int64_t *exponentPortion, int64_t *decimalPortion, int64_t *decimalLeadingZeros) { size_t i = 0; int integerMultiplier = 1; int exponentMultiplier = 1; int64_t exponent = 0; int64_t fraction = 0; int64_t fractionFactor = 0; int64_t trailingZeros = 0; bool validDecimal; int64_t temp; ConversionResultFlags result = CONV_OK; *integerPortion = 0; if (str->m_len == 0) { result = CONV_NOT_A_NUM; goto fast_stop; } switch (str->m_str[i]) { case '-': integerMultiplier = -1; i++; goto parse_integer_portion; case '0': i++; if (i >= str->m_len) goto fast_stop; switch(str->m_str[i]) { case '.': goto parse_decimal_portion; case 'e': case 'E': goto parse_exponent_portion; default: result = CONV_NOT_A_NUM; goto fast_stop; } case '1'...'9': goto parse_integer_portion; default: goto not_a_number; } parse_integer_portion: assert(integerMultiplier == 1 || integerMultiplier == -1); assert(exponent == 0); for (; i < str->m_len; i++) { switch (str->m_str[i]) { case 'e': case 'E': goto parse_exponent_portion; case '.': goto parse_decimal_portion; case '0'...'9': if (exponent == 0) { if (integerMultiplier == 1) { if (*integerPortion > (INT64_MAX / 10)) exponent = 1; } else { if (*integerPortion < (INT64_MIN) / 10) exponent = 1; } if (exponent == 0) { temp = *integerPortion * 10 + integerMultiplier * (str->m_str[i] - '0'); if (UNLIKELY(temp * integerMultiplier < 0)) { // sign flipped - overflow exponent = 1; } else { *integerPortion = temp; } } } else { if (exponent++ == INT64_MAX) return CONV_PRECISION_LOSS | (integerMultiplier == 1 ? CONV_POSITIVE_INFINITY : CONV_NEGATIVE_INFINITY); } break; default: PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR1", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]), "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i); goto not_a_number; } } goto finish_parse; parse_decimal_portion: validDecimal = false; assert(fraction == 0); assert(fractionFactor == 0); assert(trailingZeros == 0); if (str->m_str[i] != '.') { assert(false); PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR2", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]), "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int) str->m_len, str->m_str, i); goto not_a_number; } i++; for (; i < str->m_len; i++) { switch(str->m_str[i]) { case 'e': case 'E': goto parse_exponent_portion; case '0'...'9': validDecimal = true; if (str->m_str[i] == '0') { // short-circuit - trailing 0s are ignored if that's what they are. trailingZeros ++; break; } if (UNLIKELY(fractionFactor == INT64_MAX)) { assert(false); // this will only become an issue if 10^INT64_MAX < (2^((sizeof(fraction)*8) - 1) - 1) // which will never happen PJ_LOG_ERR("PBNJSON_FRACTION_ERR", 1, PMLOGKS("STRING", str->m_str), "Internal error for input: %.*s", (int)str->m_len, str->m_str); return CONV_GENERIC_ERROR; } while (trailingZeros != 0) { temp = fraction * 10; if (temp < 0) goto skip_remaining_decimal; trailingZeros--; fractionFactor++; fraction = temp; } fractionFactor++; if (fraction != INT64_MAX) { temp = fraction * 10 + (str->m_str[i] - '0'); if (UNLIKELY(temp < 0)) { fractionFactor--; goto skip_remaining_decimal; } else { fraction = temp; } } break; default: PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR3", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]), "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i); goto not_a_number; } } if (UNLIKELY(!validDecimal)) { PJ_LOG_WARN("PBNJSON_UNXPCTD_EOS", 0, "Unexpected end of string at %zu in '%.*s'", i, (int)str->m_len, str->m_str); goto not_a_number; } goto finish_parse; skip_remaining_decimal: assert(str->m_str[i] >= '0'); assert(str->m_str[i] <= '9'); result |= CONV_PRECISION_LOSS; for (; i < str->m_len; i++) { if (str->m_str[i] >= '0' && str->m_str[i] <= '9') continue; if (str->m_str[i] == 'e' || str->m_str[i] == 'E') goto parse_exponent_portion; PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR4", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]), "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i); goto not_a_number; } assert(i == str->m_len); goto finish_parse; parse_exponent_portion: assert(exponent >= 0); if (UNLIKELY(str->m_str[i] != 'e' && str->m_str[i] != 'E')) { // problem with the state machine assert(false); PJ_LOG_ERR("PBNJSON_NO_EXPONENT", 1, PMLOGKS("STRING", str->m_str), "Expecting an exponent but didn't get one at %zu in '%.*s'", i, (int)str->m_len, str->m_str); return CONV_GENERIC_ERROR; } i++; switch (str->m_str[i]) { case '-': i++; exponentMultiplier = -1; break; case '+': i++; case '0'...'9': exponentMultiplier = 1; break; default: PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR5", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]), "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i); goto not_a_number; } assert(exponentMultiplier == 1 || exponentMultiplier == -1); for (; i < str->m_len; i++) { switch (str->m_str[i]) { case '0'...'9': if (exponentMultiplier == 1) { if (UNLIKELY(exponent > (INT64_MAX / 10))) goto exponent_overflow; } else if (exponentMultiplier == -1) { if (UNLIKELY(exponent < (INT64_MIN / 10))) goto exponent_overflow; } exponent *= 10; exponent += exponentMultiplier * (str->m_str[i] - '0'); if (exponent * exponentMultiplier < 0) { goto exponent_overflow; } break; default: PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR6", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]), "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i); goto not_a_number; } } assert(i == str->m_len); goto finish_parse; exponent_overflow: // overflow of a 64-bit exponent - +/- infinity or 0 it is. assert(exponent > (INT64_MAX / 10 - 10) || exponent < (INT64_MIN / 10 + 10)); if (exponentMultiplier == 1) { exponent = INT64_MAX; if (integerMultiplier == 1) { *integerPortion = INT64_MAX; result |= CONV_POSITIVE_INFINITY; } else { *integerPortion = INT64_MIN; result |= CONV_NEGATIVE_INFINITY; } } else { result |= CONV_PRECISION_LOSS; exponent = INT64_MIN; *integerPortion = 0; } goto finish_parse; finish_parse: if (trailingZeros) { PJ_LOG_DBG("%"PRId64 " unnecessary 0s in fraction portion of '%.*s'", trailingZeros, (int)str->m_len, str->m_str); } if (fraction == 0) { assert(fractionFactor == 0); } if (*integerPortion == 0 && (decimalPortion == NULL || fraction == 0)) { // shortcut - exponent is redundant if the number is 0.something but we're // ignoring the decimal (or there's no fractional portion) exponent = 0; if (fraction != 0) { result |= CONV_PRECISION_LOSS; } } // can't really do this anyways - it would require us shifting values into or out // of the fractional component when we adjust the integerPortion by the exponent. // internally, we would never use this case anyways because if we care what the // fraction is (i.e. we're converting to a floating point), we'll provide the exponent // pointer anyways if (exponentPortion == NULL && exponent != 0 && fraction != 0) { result |= CONV_PRECISION_LOSS; fraction = 0; fractionFactor = 0; } if (!exponentPortion) { if (*integerPortion != 0) { if (exponent > 0) { while (exponent) { if (*integerPortion > INT64_MAX / 10) { assert(integerMultiplier == 1); result |= CONV_POSITIVE_OVERFLOW; *integerPortion = INT64_MAX; break; } else if (*integerPortion < INT64_MIN / 10) { assert(integerMultiplier == -1); result |= CONV_NEGATIVE_OVERFLOW; *integerPortion = INT64_MIN; break; } if (*integerPortion != 0) assert(*integerPortion * 10 > 0); *integerPortion *= 10; exponent--; } } else if (exponent < 0) { if (fraction) { result |= CONV_PRECISION_LOSS; goto lost_precision; } while (exponent) { if (*integerPortion % 10 != 0) { result |= CONV_PRECISION_LOSS; goto lost_precision; } *integerPortion /= 10; exponent++; } lost_precision: while (exponent++ && *integerPortion > 0) *integerPortion /= 10; } } } else { *exponentPortion = exponent; } if (!decimalPortion) { if (fraction != 0) { result |= CONV_PRECISION_LOSS; } } else { *decimalPortion = fraction; *decimalLeadingZeros = fractionFactor; } return result; not_a_number: return CONV_NOT_A_NUM; fast_stop: if (exponentPortion) *exponentPortion = exponent; if (decimalPortion) *decimalPortion = fraction; if (decimalLeadingZeros) *decimalLeadingZeros = fractionFactor; return result; }
static bool jsax_parse_internal(PJSAXCallbacks *parser, raw_buffer input, JSchemaInfoRef schemaInfo, void **ctxt, bool logError, bool comments) { yajl_status parseResult; PJ_LOG_TRACE("Parsing '%.*s'", RB_PRINTF(input)); if (parser == NULL) parser = &no_callbacks; if (jis_null_schema(schemaInfo->m_schema)) { PJ_LOG_WARN("Cannot match against schema that matches nothing: Schema pointer = %p", schemaInfo->m_schema); return false; } if (schemaInfo->m_schema == jschema_all()) { PJ_LOG_DBG("Using default empty schema for matching"); } else { if (schemaInfo->m_resolver == NULL) { PJ_LOG_DBG("No resolver specified for the schema. Make sure %p doesn't contain any external references", schemaInfo->m_schema); } } if (schemaInfo->m_errHandler == NULL) schemaInfo->m_errHandler = &null_err_handler; #ifdef _DEBUG logError = true; #endif yajl_callbacks yajl_cb = { (pj_yajl_null)parser->m_null, // yajl_null (pj_yajl_boolean)parser->m_boolean, // yajl_boolean NULL, // yajl_integer NULL, // yajl_double (pj_yajl_number)parser->m_number, // yajl_number (pj_yajl_string)parser->m_string, // yajl_stirng (pj_yajl_start_map)parser->m_objStart, // yajl_start_map (pj_yajl_map_key)parser->m_objKey, // yajl_map_key (pj_yajl_end_map)parser->m_objEnd, // yajl_end_map (pj_yajl_start_array)parser->m_arrStart, // yajl_start_array (pj_yajl_end_array)parser->m_arrEnd, // yajl_end_array }; yajl_parser_config yajl_opts = { comments, // comments are not allowed 0, // currently only UTF-8 will be supported for input. }; PJSAXContext internalCtxt = { .ctxt = (ctxt != NULL ? *ctxt : NULL), .m_handlers = &yajl_cb, .m_errors = schemaInfo->m_errHandler, }; #if !BYPASS_SCHEMA internalCtxt.m_validation = jschema_init(schemaInfo); if (internalCtxt.m_validation == NULL) { PJ_LOG_WARN("Failed to initialize validation state machine"); return false; } #endif yajl_handle handle = yajl_alloc(&my_bounce, &yajl_opts, NULL, &internalCtxt); parseResult = yajl_parse(handle, (unsigned char *)input.m_str, input.m_len); if (ctxt != NULL) *ctxt = jsax_getContext(&internalCtxt); switch (parseResult) { case yajl_status_ok: break; case yajl_status_client_canceled: if (ERR_HANDLER_FAILED(schemaInfo->m_errHandler, m_unknown, &internalCtxt)) goto parse_failure; PJ_LOG_WARN("Client claims they handled an unknown error in '%.*s'", (int)input.m_len, input.m_str); break; case yajl_status_insufficient_data: if (ERR_HANDLER_FAILED(schemaInfo->m_errHandler, m_parser, &internalCtxt)) goto parse_failure; PJ_LOG_WARN("Client claims they handled incomplete JSON input provided '%.*s'", (int)input.m_len, input.m_str); break; case yajl_status_error: default: if (ERR_HANDLER_FAILED(schemaInfo->m_errHandler, m_unknown, &internalCtxt)) goto parse_failure; PJ_LOG_WARN("Client claims they handled an unknown error in '%.*s'", (int)input.m_len, input.m_str); break; } #if !BYPASS_SCHEMA jschema_state_release(&internalCtxt.m_validation); #endif #ifndef NDEBUG assert(yajl_get_error(handle, 0, NULL, 0) == NULL); #endif yajl_free(handle); return true; parse_failure: if (UNLIKELY(logError)) { unsigned char *errMsg = yajl_get_error(handle, 1, (unsigned char *)input.m_str, input.m_len); PJ_LOG_WARN("Parser reason for failure:\n'%s'", errMsg); yajl_free_error(handle, errMsg); } #if !BYPASS_SCHEMA jschema_state_release(&internalCtxt.m_validation); #endif yajl_free(handle); return false; } bool jsax_parse_ex(PJSAXCallbacks *parser, raw_buffer input, JSchemaInfoRef schemaInfo, void **ctxt, bool logError) { return jsax_parse_internal(parser, input, schemaInfo, ctxt, logError, false); } bool jsax_parse(PJSAXCallbacks *parser, raw_buffer input, JSchemaInfoRef schema) { assert(schema != NULL); return jsax_parse_ex(parser, input, schema, NULL, false); }