コード例 #1
0
// The stdio-JSON lrec-reader is non-streaming: we ingest all records here in the start-of-file hook.
// Then in the process method we pop one lrec off the list at a time, until they are all exhausted.
// This is in contrast to other Miller lrec-readers.
//
// It would be possible to extend the streaming framework to also have an end-of-file hook
// which we could use here to free parsed-JSON data. However, we simply leverage the start-of-file
// hook for the *next* file (if any) or the free method (if not): these free parsed-JSON structures
// from the previous file (if any).
static void lrec_reader_stdio_json_sof(void* pvstate, void* pvhandle) {
	lrec_reader_stdio_json_state_t* pstate = pvstate;
	file_ingestor_stdio_state_t* phandle = pvhandle;
	json_char* json_input = (json_char*)phandle->sof;
	json_value_t* parsed_top_level_json;
	json_char error_buf[JSON_ERROR_MAX];

	if (pstate->ptop_level_json_objects != NULL) {
		for (sllve_t* pe = pstate->ptop_level_json_objects->phead; pe != NULL; pe = pe->pnext) {
			json_value_t* top_level_json_object = pe->pvvalue;
			json_value_free(top_level_json_object);
		}
		sllv_free(pstate->ptop_level_json_objects);
	}
	if (pstate->precords != NULL) {
		for (sllve_t* pf = pstate->precords->phead; pf != NULL; pf = pf->pnext) {
			lrec_t* prec = pf->pvvalue;
			lrec_free(prec);
		}
		sllv_free(pstate->precords);
	}

	pstate->ptop_level_json_objects = sllv_alloc();
	pstate->precords = sllv_alloc();

	// This enables us to handle input of the form
	//
	//   { "a" : 1 }
	//   { "b" : 2 }
	//   { "c" : 3 }
	//
	// in addition to
	//
	// [
	//   { "a" : 1 }
	//   { "b" : 2 }
	//   { "c" : 3 }
	// ]
	//
	// This is in line with what jq can handle. In this case, json_parse will return
	// once for each top-level item and will give us back a pointer to the start of
	// the rest of the input stream, so we can call json_parse on the rest until it is
	// all exhausted.

	json_char* item_start = json_input;
	int length = phandle->eof - phandle->sof;

	while (TRUE) {
		parsed_top_level_json = json_parse(item_start, length, error_buf, &item_start);

		if (parsed_top_level_json == NULL) {
			fprintf(stderr, "Unable to parse JSON data: %s\n", error_buf);
			exit(1);
		}

		// The lrecs have their string pointers pointing into the parsed-JSON objects (for
		// efficiency) so it's important we not free the latter until our free method.
		reference_json_objects_as_lrecs(pstate->precords, parsed_top_level_json, pstate->json_flatten_separator);

		if (item_start == NULL)
			break;
		if (*item_start == 0)
			break;
		length -= (item_start - json_input);
		json_input = item_start;

	}

}
コード例 #2
0
// The stdio-JSON lrec-reader is non-streaming: we ingest all records here in the start-of-file hook.
// Then in the process method we pop one lrec off the list at a time, until they are all exhausted.
// This is in contrast to other Miller lrec-readers.
//
// It would be possible to extend the streaming framework to also have an end-of-file hook
// which we could use here to free parsed-JSON data. However, we simply leverage the start-of-file
// hook for the *next* file (if any) or the free method (if not): these free parsed-JSON structures
// from the previous file (if any).
static void lrec_reader_stdio_json_sof(void* pvstate, void* pvhandle) {
	lrec_reader_stdio_json_state_t* pstate = pvstate;
	file_ingestor_stdio_state_t* phandle = pvhandle;
	json_char* json_input = (json_char*)phandle->sof;
	json_value_t* parsed_top_level_json;
	json_char error_buf[JSON_ERROR_MAX];

	// This enables us to handle input of the form
	//
	//   { "a" : 1 }
	//   { "b" : 2 }
	//   { "c" : 3 }
	//
	// in addition to
	//
	// [
	//   { "a" : 1 }
	//   { "b" : 2 }
	//   { "c" : 3 }
	// ]
	//
	// This is in line with what jq can handle. In this case, json_parse will return
	// once for each top-level item and will give us back a pointer to the start of
	// the rest of the input stream, so we can call json_parse on the rest until it is
	// all exhausted.

	json_char* item_start = json_input;
	int length = phandle->eof - phandle->sof;
	char* detected_line_term = NULL;

	if (pstate->do_auto_line_term) {
		// Find the first line-ending sequence (if any): LF or CRLF.
		for (char* p = phandle->sof; p < phandle->eof; p++) {
			if (p[0] == '\n') {
				if (p > phandle->sof && p[-1] == '\r') {
					detected_line_term = "\r\n";
				} else {
					detected_line_term = "\n";
				}
				break;
			}
		}
	}

	// Skip comments. For JSON, we ingest the entire blob, this is a matter of finding and iterating over lines.
	// Miller data comments must be at start of line.
	if (pstate->comment_handling != COMMENTS_ARE_DATA) {
		char* line_term = pstate->specified_line_term;
		if (pstate->do_auto_line_term && detected_line_term != NULL)
			line_term = detected_line_term;
		mlr_json_strip_comments(phandle->sof, phandle->eof,
			pstate->comment_handling, pstate->comment_string, line_term);
	}
	mlr_json_end_strip(phandle->sof, &phandle->eof);
	length = phandle->eof - phandle->sof;

	if (length > 0) {
		while (TRUE) {

			parsed_top_level_json = json_parse(item_start, length, error_buf, &item_start);

			if (parsed_top_level_json == NULL) {
				fprintf(stderr, "%s: Unable to parse JSON data: %s\n", MLR_GLOBALS.bargv0, error_buf);
				exit(1);
			}

			// The lrecs have their string pointers pointing into the parsed-JSON objects (for
			// efficiency) so it's important we not free the latter until our free method.
			if (!reference_json_objects_as_lrecs(pstate->precords, parsed_top_level_json,
				pstate->input_json_flatten_separator, pstate->json_array_ingest))
			{
				fprintf(stderr, "%s: Unable to parse JSON data.\n", MLR_GLOBALS.bargv0);
				exit(1);
			}

			if (item_start == NULL)
				break;
			if (*item_start == 0)
				break;
			length -= (item_start - json_input);
			json_input = item_start;
			// json_parse goes up to the '\r' or '\n' (whichever is found first) on the first
			// parse, then keeps going from there on the next. E.g. in the CRLF case it
			// consumes the CR at the end of the first read and consumes the LF at the start
			// of the second, and so on. After the very last parse, we need to here consume
			// the final '\n' which is (by itself) a parse error.
			if (length == 1 && *(char*)json_input == '\n') {
				break;
			}
		}
	}
	if (detected_line_term != NULL) {
		pstate->detected_line_term = detected_line_term;
	}
}