예제 #1
0
ConversionResultFlags parseJSONNumber(raw_buffer *str, int64_t *integerPortion,
		int64_t *exponentPortion, int64_t *decimalPortion, int64_t *decimalLeadingZeros)
{
	size_t i = 0;
	int integerMultiplier = 1;
	int exponentMultiplier = 1;
	int64_t exponent = 0;
	int64_t fraction = 0;
	int64_t fractionFactor = 0;
	int64_t trailingZeros = 0;
	bool validDecimal;
	int64_t temp;

	ConversionResultFlags result = CONV_OK;

	*integerPortion = 0;

	if (str->m_len == 0) {
		result = CONV_NOT_A_NUM;
		goto fast_stop;
	}

	switch (str->m_str[i]) {
		case '-':
			integerMultiplier = -1;
			i++;
			goto parse_integer_portion;
		case '0':
			i++;
			if (i >= str->m_len)
				goto fast_stop;
			switch(str->m_str[i])
			{
			case '.':
				goto parse_decimal_portion;
			case 'e':
			case 'E':
				goto parse_exponent_portion;
			default:
				result = CONV_NOT_A_NUM;
				goto fast_stop;
			}
		case '1'...'9':
			goto parse_integer_portion;
		default:
			goto not_a_number;
	}

parse_integer_portion:
	assert(integerMultiplier == 1 || integerMultiplier == -1);
	assert(exponent == 0);
	for (; i  < str->m_len; i++) {
		switch (str->m_str[i]) {
			case 'e':
			case 'E':
				goto parse_exponent_portion;
			case '.':
				goto parse_decimal_portion;
			case '0'...'9':
				if (exponent == 0) {
					if (integerMultiplier == 1) {
						if (*integerPortion > (INT64_MAX / 10))
							exponent = 1;
					} else {
						if (*integerPortion < (INT64_MIN) / 10)
							exponent = 1;
					}
					if (exponent == 0) {
						temp = *integerPortion * 10 + integerMultiplier * (str->m_str[i] - '0');
						if (UNLIKELY(temp * integerMultiplier < 0)) {
							// sign flipped - overflow
							exponent = 1;
						} else {
							*integerPortion = temp;
						}
					}
				} else {
					if (exponent++ == INT64_MAX)
						return CONV_PRECISION_LOSS | (integerMultiplier == 1 ? CONV_POSITIVE_INFINITY : CONV_NEGATIVE_INFINITY);
				}
				break;
			default:
				PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR1", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]),
				            "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i);
				goto not_a_number;
		}
	}
	goto finish_parse;

parse_decimal_portion:
	validDecimal = false;
	assert(fraction == 0);
	assert(fractionFactor == 0);
	assert(trailingZeros == 0);

	if (str->m_str[i] != '.') {
		assert(false);
		PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR2", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]),
		            "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int) str->m_len, str->m_str, i);
		goto not_a_number;
	}
	i++;

	for (; i < str->m_len; i++) {
		switch(str->m_str[i]) {
			case 'e':
			case 'E':
				goto parse_exponent_portion;
			case '0'...'9':
				validDecimal = true;
				if (str->m_str[i] == '0') {
					// short-circuit - trailing 0s are ignored if that's what they are.
					trailingZeros ++;
					break;
				}
				if (UNLIKELY(fractionFactor == INT64_MAX)) {
					assert(false);
					// this will only become an issue if 10^INT64_MAX < (2^((sizeof(fraction)*8) - 1) - 1)
					// which will never happen
					PJ_LOG_ERR("PBNJSON_FRACTION_ERR", 1, PMLOGKS("STRING", str->m_str), "Internal error for input: %.*s", (int)str->m_len, str->m_str);
					return CONV_GENERIC_ERROR;
				}

				while (trailingZeros != 0) {
					temp = fraction * 10;
					if (temp < 0)
						goto skip_remaining_decimal;
					trailingZeros--;
					fractionFactor++;
					fraction = temp;
				}
				fractionFactor++;

				if (fraction != INT64_MAX) {
					temp = fraction * 10 + (str->m_str[i] - '0');
					if (UNLIKELY(temp < 0)) {
						fractionFactor--;
						goto skip_remaining_decimal;
					} else {
						fraction = temp;
					}
				}
				break;
			default:
				PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR3", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]),
				            "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i);
				goto not_a_number;
		}
	}
	if (UNLIKELY(!validDecimal)) {
		PJ_LOG_WARN("PBNJSON_UNXPCTD_EOS", 0, "Unexpected end of string at %zu in '%.*s'", i, (int)str->m_len, str->m_str);
		goto not_a_number;
	}
	goto finish_parse;

skip_remaining_decimal:
	assert(str->m_str[i] >= '0');
	assert(str->m_str[i] <= '9');

	result |= CONV_PRECISION_LOSS;

	for (; i < str->m_len; i++) {
		if (str->m_str[i] >= '0' && str->m_str[i] <= '9')
			continue;
		if (str->m_str[i] == 'e' || str->m_str[i] == 'E')
			goto parse_exponent_portion;

		PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR4", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]),
		            "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i);
		goto not_a_number;
	}
	assert(i == str->m_len);
	goto finish_parse;

parse_exponent_portion:
	assert(exponent >= 0);
	if (UNLIKELY(str->m_str[i] != 'e' && str->m_str[i] != 'E')) {
		// problem with the state machine
		assert(false);
		PJ_LOG_ERR("PBNJSON_NO_EXPONENT", 1, PMLOGKS("STRING", str->m_str), "Expecting an exponent but didn't get one at %zu in '%.*s'", i, (int)str->m_len, str->m_str);
		return CONV_GENERIC_ERROR;
	}
	i++;

	switch (str->m_str[i]) {
	case '-':
		i++;
		exponentMultiplier = -1;
		break;
	case '+':
		i++;
	case '0'...'9':
		exponentMultiplier = 1;
		break;
	default:
		PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR5", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]),
		            "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i);
		goto not_a_number;
	}
	assert(exponentMultiplier == 1 || exponentMultiplier == -1);

	for (; i < str->m_len; i++) {
		switch (str->m_str[i]) {
			case '0'...'9':
				if (exponentMultiplier == 1) {
					if (UNLIKELY(exponent > (INT64_MAX / 10)))
						goto exponent_overflow;
				} else if (exponentMultiplier == -1) {
					if (UNLIKELY(exponent < (INT64_MIN / 10)))
						goto exponent_overflow;
				}
				exponent *= 10;
				exponent += exponentMultiplier * (str->m_str[i] - '0');
				if (exponent * exponentMultiplier < 0) {
					goto exponent_overflow;
				}
				break;
			default:
				PJ_LOG_WARN("PBNJSON_PNUM_UNXPCTD_CHAR6", 1, PMLOGKFV("CHAR", "%c", str->m_str[i]),
				            "Unexpected character %d('%c') in '%.*s' at %zu", (int)str->m_str[i], str->m_str[i], (int)str->m_len, str->m_str, i);
				goto not_a_number;
		}
	}
	assert(i == str->m_len);
	goto finish_parse;

exponent_overflow:
	// overflow of a 64-bit exponent - +/- infinity or 0 it is.
	assert(exponent > (INT64_MAX / 10 - 10) || exponent < (INT64_MIN / 10 + 10));

	if (exponentMultiplier == 1) {
		exponent = INT64_MAX;
		if (integerMultiplier == 1) {
			*integerPortion = INT64_MAX;
			result |= CONV_POSITIVE_INFINITY;
		} else {
			*integerPortion = INT64_MIN;
			result |= CONV_NEGATIVE_INFINITY;
		}
	} else {
		result |= CONV_PRECISION_LOSS;
		exponent = INT64_MIN;
		*integerPortion = 0;
	}
	goto finish_parse;

finish_parse:
	if (trailingZeros) {
		PJ_LOG_DBG("%"PRId64 " unnecessary 0s in fraction portion of '%.*s'", trailingZeros, (int)str->m_len, str->m_str);
	}

	if (fraction == 0) {
		assert(fractionFactor == 0);
	}

	if (*integerPortion == 0 && (decimalPortion == NULL || fraction == 0)) {
		// shortcut - exponent is redundant if the number is 0.something but we're
		// ignoring the decimal (or there's no fractional portion)
		exponent = 0;
		if (fraction != 0) {
			result |= CONV_PRECISION_LOSS;
		}
	}

	// can't really do this anyways - it would require us shifting values into or out
	// of the fractional component when we adjust the integerPortion by the exponent.
	// internally, we would never use this case anyways because if we care what the
	// fraction is (i.e. we're converting to a floating point), we'll provide the exponent
	// pointer anyways
	if (exponentPortion == NULL && exponent != 0 && fraction != 0) {
		result |= CONV_PRECISION_LOSS;
		fraction = 0;
		fractionFactor = 0;
	}

	if (!exponentPortion) {
		if (*integerPortion != 0) {
			if (exponent > 0) {
				while (exponent) {
					if (*integerPortion > INT64_MAX / 10) {
						assert(integerMultiplier == 1);
						result |= CONV_POSITIVE_OVERFLOW;
						*integerPortion = INT64_MAX;
						break;
					} else if (*integerPortion < INT64_MIN / 10) {
						assert(integerMultiplier == -1);
						result |= CONV_NEGATIVE_OVERFLOW;
						*integerPortion = INT64_MIN;
						break;
					}
					if (*integerPortion != 0)
						assert(*integerPortion * 10 > 0);
					*integerPortion *= 10;
					exponent--;
				}
			} else if (exponent < 0) {
				if (fraction) {
					result |= CONV_PRECISION_LOSS;
					goto lost_precision;
				}
				while (exponent) {
					if (*integerPortion % 10 != 0) {
						result |= CONV_PRECISION_LOSS;
						goto lost_precision;
					}
					*integerPortion /= 10;
					exponent++;
				}
lost_precision:
				while (exponent++ && *integerPortion > 0)
					*integerPortion /= 10;
			}
		}
	} else {
		*exponentPortion = exponent;
	}

	if (!decimalPortion) {
		if (fraction != 0) {
			result |= CONV_PRECISION_LOSS;
		}
	} else {
		*decimalPortion = fraction;
		*decimalLeadingZeros = fractionFactor;
	}

	return result;

not_a_number:
	return CONV_NOT_A_NUM;

fast_stop:
	if (exponentPortion) *exponentPortion = exponent;
	if (decimalPortion) *decimalPortion = fraction;
	if (decimalLeadingZeros) *decimalLeadingZeros = fractionFactor;
	return result;
}
예제 #2
0
static bool jsax_parse_internal(PJSAXCallbacks *parser, raw_buffer input, JSchemaInfoRef schemaInfo, void **ctxt, bool logError, bool comments)
{
	yajl_status parseResult;

	PJ_LOG_TRACE("Parsing '%.*s'", RB_PRINTF(input));

	if (parser == NULL)
		parser = &no_callbacks;

	if (jis_null_schema(schemaInfo->m_schema)) {
		PJ_LOG_WARN("Cannot match against schema that matches nothing: Schema pointer = %p", schemaInfo->m_schema);
		return false;
	}

	if (schemaInfo->m_schema == jschema_all()) {
		PJ_LOG_DBG("Using default empty schema for matching");
	} else {
		if (schemaInfo->m_resolver == NULL) {
			PJ_LOG_DBG("No resolver specified for the schema.  Make sure %p doesn't contain any external references", schemaInfo->m_schema);
		}
	}

	if (schemaInfo->m_errHandler == NULL)
		schemaInfo->m_errHandler = &null_err_handler;

#ifdef _DEBUG
	logError = true;
#endif

	yajl_callbacks yajl_cb = {
		(pj_yajl_null)parser->m_null, // yajl_null
		(pj_yajl_boolean)parser->m_boolean, // yajl_boolean
		NULL, // yajl_integer
		NULL, // yajl_double
		(pj_yajl_number)parser->m_number, // yajl_number
		(pj_yajl_string)parser->m_string, // yajl_stirng
		(pj_yajl_start_map)parser->m_objStart, // yajl_start_map
		(pj_yajl_map_key)parser->m_objKey, // yajl_map_key
		(pj_yajl_end_map)parser->m_objEnd, // yajl_end_map
		(pj_yajl_start_array)parser->m_arrStart, // yajl_start_array
		(pj_yajl_end_array)parser->m_arrEnd, // yajl_end_array
	};

	yajl_parser_config yajl_opts = {
		comments, // comments are not allowed
		0, // currently only UTF-8 will be supported for input.
	};

	PJSAXContext internalCtxt = {
		.ctxt = (ctxt != NULL ? *ctxt : NULL),
		.m_handlers = &yajl_cb,
		.m_errors = schemaInfo->m_errHandler,
	};

#if !BYPASS_SCHEMA
	internalCtxt.m_validation = jschema_init(schemaInfo);
	if (internalCtxt.m_validation == NULL) {
		PJ_LOG_WARN("Failed to initialize validation state machine");
		return false;
	}
#endif

	yajl_handle handle = yajl_alloc(&my_bounce, &yajl_opts, NULL, &internalCtxt);

	parseResult = yajl_parse(handle, (unsigned char *)input.m_str, input.m_len);
	if (ctxt != NULL) *ctxt = jsax_getContext(&internalCtxt);

	switch (parseResult) {
		case yajl_status_ok:
			break;
		case yajl_status_client_canceled:
			if (ERR_HANDLER_FAILED(schemaInfo->m_errHandler, m_unknown, &internalCtxt))
				goto parse_failure;
			PJ_LOG_WARN("Client claims they handled an unknown error in '%.*s'", (int)input.m_len, input.m_str);
			break;
		case yajl_status_insufficient_data:
			if (ERR_HANDLER_FAILED(schemaInfo->m_errHandler, m_parser, &internalCtxt))
				goto parse_failure;
			PJ_LOG_WARN("Client claims they handled incomplete JSON input provided '%.*s'", (int)input.m_len, input.m_str);
			break;
		case yajl_status_error:
		default:
			if (ERR_HANDLER_FAILED(schemaInfo->m_errHandler, m_unknown, &internalCtxt))
				goto parse_failure;

			PJ_LOG_WARN("Client claims they handled an unknown error in '%.*s'", (int)input.m_len, input.m_str);
			break;
	}

#if !BYPASS_SCHEMA
	jschema_state_release(&internalCtxt.m_validation);
#endif

#ifndef NDEBUG
	assert(yajl_get_error(handle, 0, NULL, 0) == NULL);
#endif

	yajl_free(handle);
	return true;

parse_failure:
	if (UNLIKELY(logError)) {
		unsigned char *errMsg = yajl_get_error(handle, 1, (unsigned char *)input.m_str, input.m_len);
		PJ_LOG_WARN("Parser reason for failure:\n'%s'", errMsg);
		yajl_free_error(handle, errMsg);
	}

#if !BYPASS_SCHEMA
	jschema_state_release(&internalCtxt.m_validation);
#endif
	yajl_free(handle);
	return false;
}

bool jsax_parse_ex(PJSAXCallbacks *parser, raw_buffer input, JSchemaInfoRef schemaInfo, void **ctxt, bool logError)
{
	return jsax_parse_internal(parser, input, schemaInfo, ctxt, logError, false);
}

bool jsax_parse(PJSAXCallbacks *parser, raw_buffer input, JSchemaInfoRef schema)
{
	assert(schema != NULL);
	return jsax_parse_ex(parser, input, schema, NULL, false);
}