// ---------------------------------------------------------------- static char* test_lrec_csv_api() { char* hdr_line = strdup("w,x,y,z"); slls_t* hdr_fields = split_csvlite_header_line(hdr_line, ',', FALSE); header_keeper_t* pheader_keeper = header_keeper_alloc(hdr_line, hdr_fields); char* data_line_1 = strdup("2,3,4,5"); lrec_t* prec_1 = lrec_parse_stdio_csvlite_data_line(pheader_keeper, data_line_1, ',', FALSE); char* data_line_2 = strdup("6,7,8,9"); lrec_t* prec_2 = lrec_parse_stdio_csvlite_data_line(pheader_keeper, data_line_2, ',', FALSE); mu_assert_lf(prec_1->field_count == 4); mu_assert_lf(prec_2->field_count == 4); mu_assert_lf(streq(lrec_get(prec_1, "w"), "2")); mu_assert_lf(streq(lrec_get(prec_1, "x"), "3")); mu_assert_lf(streq(lrec_get(prec_1, "y"), "4")); mu_assert_lf(streq(lrec_get(prec_1, "z"), "5")); mu_assert_lf(streq(lrec_get(prec_2, "w"), "6")); mu_assert_lf(streq(lrec_get(prec_2, "x"), "7")); mu_assert_lf(streq(lrec_get(prec_2, "y"), "8")); mu_assert_lf(streq(lrec_get(prec_2, "z"), "9")); lrec_remove(prec_1, "w"); mu_assert_lf(prec_1->field_count == 3); mu_assert_lf(prec_2->field_count == 4); mu_assert_lf(lrec_get(prec_1, "w") == NULL); mu_assert_lf(streq(lrec_get(prec_2, "w"), "6")); // Non-replacing-rename case //lrec_dump_titled("Before rename", prec_1); lrec_rename(prec_1, "x", "u"); //lrec_dump_titled("After rename", prec_1); mu_assert_lf(prec_1->field_count == 3); mu_assert_lf(lrec_get(prec_1, "x") == NULL); mu_assert_lf(streq(lrec_get(prec_1, "u"), "3")); // Replacing-rename case //lrec_dump_titled("Before rename", prec_2); lrec_rename(prec_2, "y", "z"); //lrec_dump_titled("After rename", prec_2); mu_assert_lf(prec_2->field_count == 3); mu_assert_lf(streq(lrec_get(prec_2, "w"), "6")); mu_assert_lf(streq(lrec_get(prec_2, "x"), "7")); mu_assert_lf(lrec_get(prec_2, "y") == NULL); mu_assert_lf(streq(lrec_get(prec_2, "z"), "8")); lrec_free(prec_1); lrec_free(prec_2); // xxx need a test case for alloc1,free1,alloc2,free2 w/ same hdr. return NULL; }
// ---------------------------------------------------------------- static char* test_lrec_unbacked_api() { lrec_t* prec = lrec_unbacked_alloc(); mu_assert_lf(prec->field_count == 0); lrec_put_no_free(prec, "x", "3"); mu_assert_lf(prec->field_count == 1); mu_assert_lf(streq(lrec_get(prec, "x"), "3")); lrec_put_no_free(prec, "y", "4"); mu_assert_lf(prec->field_count == 2); mu_assert_lf(streq(lrec_get(prec, "x"), "3")); mu_assert_lf(streq(lrec_get(prec, "y"), "4")); lrec_put_no_free(prec, "x", "5"); mu_assert_lf(prec->field_count == 2); mu_assert_lf(streq(lrec_get(prec, "x"), "5")); mu_assert_lf(streq(lrec_get(prec, "y"), "4")); lrec_remove(prec, "x"); mu_assert_lf(prec->field_count == 1); mu_assert_lf(lrec_get(prec, "x") == NULL); // Non-replacing-rename case //lrec_dump_titled("Before rename", prec); lrec_rename(prec, "y", "z"); //lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 1); mu_assert_lf(lrec_get(prec, "x") == NULL); mu_assert_lf(lrec_get(prec, "y") == NULL); mu_assert_lf(streq(lrec_get(prec, "z"), "4")); lrec_free(prec); // Replacing-rename case prec = lrec_unbacked_alloc(); lrec_put_no_free(prec, "x", "3"); lrec_put_no_free(prec, "y", "4"); lrec_put_no_free(prec, "z", "5"); mu_assert_lf(prec->field_count == 3); //lrec_dump_titled("Before rename", prec); lrec_rename(prec, "y", "z"); //lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 2); mu_assert_lf(streq(lrec_get(prec, "x"), "3")); mu_assert_lf(lrec_get(prec, "y") == NULL); mu_assert_lf(streq(lrec_get(prec, "z"), "4")); lrec_free(prec); return NULL; }
// ---------------------------------------------------------------- static sllv_t* mapper_tail_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_tail_state_t* pstate = pvstate; if (pinrec != NULL) { slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); sllv_t* precord_list_for_group = lhmslv_get(pstate->precord_lists_by_group, pgroup_by_field_values); if (precord_list_for_group == NULL) { precord_list_for_group = sllv_alloc(); lhmslv_put(pstate->precord_lists_by_group, slls_copy(pgroup_by_field_values), precord_list_for_group); } if (precord_list_for_group->length >= pstate->tail_count) { lrec_t* porec = sllv_pop(precord_list_for_group); if (porec != NULL) lrec_free(porec); } sllv_add(precord_list_for_group, pinrec); return NULL; } else { sllv_t* poutrecs = sllv_alloc(); for (lhmslve_t* pa = pstate->precord_lists_by_group->phead; pa != NULL; pa = pa->pnext) { sllv_t* precord_list_for_group = pa->pvvalue; for (sllve_t* pb = precord_list_for_group->phead; pb != NULL; pb = pb->pnext) { sllv_add(poutrecs, pb->pvdata); } } sllv_add(poutrecs, NULL); return poutrecs; } }
static void lrec_writer_xtab_process_aligned(FILE* output_stream, lrec_t* prec, void* pvstate) { if (prec == NULL) return; lrec_writer_xtab_state_t* pstate = pvstate; if (pstate->record_count > 0LL) fputs(pstate->ofs, output_stream); pstate->record_count++; int max_key_width = 1; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { int key_width = strlen_for_utf8_display(pe->key); if (key_width > max_key_width) max_key_width = key_width; } for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { // "%-*s" fprintf format isn't correct for non-ASCII UTF-8 fprintf(output_stream, "%s", pe->key); int d = max_key_width - strlen_for_utf8_display(pe->key); for (int i = 0; i < d; i++) fputs(pstate->ops, output_stream); fprintf(output_stream, "%s%s%s", pstate->ops, pe->value, pstate->ofs); } lrec_free(prec); // xxx cmt mem-mgmt }
// ---------------------------------------------------------------- static sllv_t* mapper_nothing_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { if (pinrec != NULL) { lrec_free(pinrec); return NULL; } else { return sllv_single(NULL); } }
static sllv_t* mapper_stats1_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_stats1_state_t* pstate = pvstate; if (pinrec != NULL) { mapper_stats1_ingest(pinrec, pstate); lrec_free(pinrec); return NULL; } else { return mapper_stats1_emit(pstate); } }
// ---------------------------------------------------------------- static void handle_full_srec_assignment( mlr_dsl_cst_statement_t* pstatement, variables_t* pvars, cst_outputs_t* pcst_outputs) { full_srec_assignment_state_t* pstate = pstatement->pvstate; lrec_t* poutrec = lrec_unbacked_alloc(); // pinrec might be part of the RHS. lhmsmv_t* pout_typed_overlay = lhmsmv_alloc(); rxval_evaluator_t* prhs_xevaluator = pstate->prhs_xevaluator; boxed_xval_t boxed_xval = prhs_xevaluator->pprocess_func(prhs_xevaluator->pvstate, pvars); if (!boxed_xval.xval.is_terminal) { for (mlhmmv_level_entry_t* pe = boxed_xval.xval.pnext_level->phead; pe != NULL; pe = pe->pnext) { mv_t* pkey = &pe->level_key; mlhmmv_xvalue_t* pval = &pe->level_xvalue; if (pval->is_terminal) { // xxx else collapse-down using json separator? char* skey = mv_alloc_format_val(pkey); // xxx if we're going to transfer here *and* free below, this needs a nullptr poke // at the copy-from site //mv_t val = boxed_xval.is_ephemeral ? pval->terminal_mlrval : mv_copy(&pval->terminal_mlrval); mv_t val = mv_copy(&pval->terminal_mlrval); // Write typed mlrval output to the typed overlay rather than into the lrec // (which holds only string values). // // The rval_evaluator reads the overlay in preference to the lrec. E.g. if the // input had "x"=>"abc","y"=>"def" but a previous statement had set "y"=>7.4 and // "z"=>"ghi", then an expression right-hand side referring to $y would get the // floating-point value 7.4. So we don't need to lrec_put the value here, and // moreover should not for two reasons: (1) there is a performance hit of doing // throwaway number-to-string formatting -- it's better to do it once at the // end; (2) having the string values doubly owned by the typed overlay and the // lrec would result in double frees, or awkward bookkeeping. However, the NR // variable evaluator reads prec->field_count, so we need to put something here. // And putting something statically allocated minimizes copying/freeing. lhmsmv_put(pout_typed_overlay, mlr_strdup_or_die(skey), &val, FREE_ENTRY_KEY | FREE_ENTRY_VALUE); lrec_put(poutrec, skey, "bug", FREE_ENTRY_KEY); } } if (boxed_xval.is_ephemeral) { mlhmmv_xvalue_free(&boxed_xval.xval); } } else { mlhmmv_xvalue_free(&boxed_xval.xval); } lrec_free(pvars->pinrec); lhmsmv_free(pvars->ptyped_overlay); pvars->pinrec = poutrec; pvars->ptyped_overlay = pout_typed_overlay; }
// ---------------------------------------------------------------- lrec_t* lrec_parse_mmap_xtab_single_ifs(file_reader_mmap_state_t* phandle, char ifs, char ips, int allow_repeat_ips) { while (phandle->sol < phandle->eof && *phandle->sol == ifs) phandle->sol++; if (phandle->sol >= phandle->eof) return NULL; lrec_t* prec = lrec_unbacked_alloc(); // Loop over fields, one per line while (TRUE) { char* line = phandle->sol; char* key = line; char* value = ""; char* p; // Construct one field for (p = line; p < phandle->eof && *p; ) { if (*p == ifs) { *p = 0; phandle->sol = p+1; break; } else if (*p == ips) { key = line; *p = 0; p++; if (allow_repeat_ips) { while (*p == ips) p++; } value = p; } else { p++; } } if (p >= phandle->eof) phandle->sol = p+1; lrec_put_no_free(prec, key, value); if (phandle->sol >= phandle->eof || *phandle->sol == ifs) break; } if (prec->field_count == 0) { lrec_free(prec); return NULL; } else { return prec; } }
static void lrec_writer_xtab_process_unaligned(FILE* output_stream, lrec_t* prec, void* pvstate) { if (prec == NULL) return; lrec_writer_xtab_state_t* pstate = pvstate; if (pstate->record_count > 0LL) fputs(pstate->ofs, output_stream); pstate->record_count++; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { // "%-*s" fprintf format isn't correct for non-ASCII UTF-8 fprintf(output_stream, "%s%s%s%s", pe->key, pstate->ops, pe->value, pstate->ofs); } lrec_free(prec); // xxx cmt mem-mgmt }
// ---------------------------------------------------------------- static void lrec_writer_csv_process(FILE* output_stream, lrec_t* prec, void* pvstate) { if (prec == NULL) return; lrec_writer_csv_state_t* pstate = pvstate; char *ors = pstate->ors; char *ofs = pstate->ofs; if (pstate->plast_header_output != NULL) { if (!lrec_keys_equal_list(prec, pstate->plast_header_output)) { slls_free(pstate->plast_header_output); pstate->plast_header_output = NULL; if (pstate->num_header_lines_output > 0LL) fputs(ors, output_stream); } } if (pstate->plast_header_output == NULL) { int nf = 0; if (!pstate->headerless_csv_output) { for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) fputs(ofs, output_stream); pstate->pquoted_output_func(output_stream, pe->key, pstate->ors, pstate->ofs, pstate->orslen, pstate->ofslen); nf++; } fputs(ors, output_stream); } pstate->plast_header_output = mlr_copy_keys_from_record(prec); pstate->num_header_lines_output++; } int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) fputs(ofs, output_stream); pstate->pquoted_output_func(output_stream, pe->value, pstate->ors, pstate->ofs, pstate->orslen, pstate->ofslen); nf++; } fputs(ors, output_stream); pstate->onr++; // See ../README.md for memory-management conventions lrec_free(prec); }
// ---------------------------------------------------------------- static char* test_lrec_xtab_api() { char* line_1 = strdup("w 2"); char* line_2 = strdup("x 3"); char* line_3 = strdup("y 4"); char* line_4 = strdup("z 5"); slls_t* pxtab_lines = slls_alloc(); slls_add_with_free(pxtab_lines, line_1); slls_add_with_free(pxtab_lines, line_2); slls_add_with_free(pxtab_lines, line_3); slls_add_with_free(pxtab_lines, line_4); lrec_t* prec = lrec_parse_stdio_xtab(pxtab_lines, ' ', TRUE); mu_assert_lf(prec->field_count == 4); mu_assert_lf(streq(lrec_get(prec, "w"), "2")); mu_assert_lf(streq(lrec_get(prec, "x"), "3")); mu_assert_lf(streq(lrec_get(prec, "y"), "4")); mu_assert_lf(streq(lrec_get(prec, "z"), "5")); lrec_remove(prec, "w"); mu_assert_lf(prec->field_count == 3); mu_assert_lf(lrec_get(prec, "w") == NULL); // Non-replacing-rename case //lrec_dump_titled("Before rename", prec); lrec_rename(prec, "x", "u"); //lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 3); mu_assert_lf(lrec_get(prec, "x") == NULL); mu_assert_lf(streq(lrec_get(prec, "u"), "3")); // Replacing-rename case //lrec_dump_titled("Before rename", prec); lrec_rename(prec, "y", "z"); //lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 2); mu_assert_lf(streq(lrec_get(prec, "u"), "3")); mu_assert_lf(lrec_get(prec, "y") == NULL); mu_assert_lf(streq(lrec_get(prec, "z"), "4")); lrec_free(prec); return NULL; }
static void lrec_reader_stdio_json_free(lrec_reader_t* preader) { lrec_reader_stdio_json_state_t* pstate = preader->pvstate; for (sllve_t* pe = pstate->ptop_level_json_objects->phead; pe != NULL; pe = pe->pnext) { json_value_t* top_level_json_object = pe->pvvalue; json_free_value(top_level_json_object); } sllv_free(pstate->ptop_level_json_objects); for (sllve_t* pf = pstate->precords->phead; pf != NULL; pf = pf->pnext) { lrec_t* prec = pf->pvvalue; lrec_free(prec); } sllv_free(pstate->precords); pstate->precords = NULL; free(pstate); free(preader); }
static void lrec_writer_csv_process(FILE* output_stream, lrec_t* prec, void* pvstate) { if (prec == NULL) return; lrec_writer_csv_state_t* pstate = pvstate; char ors = pstate->ors; char ofs = pstate->ofs; if (pstate->plast_header_output != NULL) { // xxx make a fcn to compare these w/o copy: put it in mixutil. if (!lrec_keys_equal_list(prec, pstate->plast_header_output)) { slls_free(pstate->plast_header_output); pstate->plast_header_output = NULL; if (pstate->num_header_lines_output > 0LL) fputc(ors, output_stream); } } if (pstate->plast_header_output == NULL) { int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) fputc(ofs, output_stream); fputs(pe->key, output_stream); nf++; } fputc(ors, output_stream); pstate->plast_header_output = mlr_copy_keys_from_record(prec); pstate->num_header_lines_output++; } int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) fputc(ofs, output_stream); fputs(pe->value, output_stream); nf++; } fputc(ors, output_stream); pstate->onr++; lrec_free(prec); // xxx cmt mem-mgmt }
// ---------------------------------------------------------------- static void lrec_writer_dkvp_process(FILE* output_stream, lrec_t* prec, void* pvstate) { if (prec == NULL) return; lrec_writer_dkvp_state_t* pstate = pvstate; char* ors = pstate->ors; char* ofs = pstate->ofs; char* ops = pstate->ops; int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) fputs(ofs, output_stream); fputs(pe->key, output_stream); fputs(ops, output_stream); fputs(pe->value, output_stream); nf++; } fputs(ors, output_stream); lrec_free(prec); // end of baton-pass }
// ---------------------------------------------------------------- // xxx if empty key then make a way to communicate back to the reader that it // can stop reading further records -- ? static sllv_t* mapper_head_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_head_state_t* pstate = pvstate; if (pinrec != NULL) { slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); unsigned long long* pcount_for_group = lhmslv_get(pstate->precord_lists_by_group, pgroup_by_field_values); if (pcount_for_group == NULL) { pcount_for_group = mlr_malloc_or_die(sizeof(unsigned long long)); *pcount_for_group = 0LL; lhmslv_put(pstate->precord_lists_by_group, slls_copy(pgroup_by_field_values), pcount_for_group); } (*pcount_for_group)++; if (*pcount_for_group <= pstate->head_count) { return sllv_single(pinrec); } else { lrec_free(pinrec); return NULL; } } else { return sllv_single(NULL); } }
// ---------------------------------------------------------------- static sllv_t* mapper_regularize_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { if (pinrec != NULL) { mapper_regularize_state_t* pstate = (mapper_regularize_state_t*)pvstate; slls_t* current_sorted_field_names = mlr_reference_keys_from_record(pinrec); slls_sort(current_sorted_field_names); slls_t* previous_sorted_field_names = lhmslv_get(pstate->psorted_to_original, current_sorted_field_names); if (previous_sorted_field_names == NULL) { previous_sorted_field_names = slls_copy(current_sorted_field_names); lhmslv_put(pstate->psorted_to_original, previous_sorted_field_names, mlr_copy_keys_from_record(pinrec)); return sllv_single(pinrec); } else { lrec_t* poutrec = lrec_unbacked_alloc(); for (sllse_t* pe = previous_sorted_field_names->phead; pe != NULL; pe = pe->pnext) { lrec_put(poutrec, pe->value, strdup(lrec_get(pinrec, pe->value)), LREC_FREE_ENTRY_VALUE); } lrec_free(pinrec); return sllv_single(poutrec); } } else { return sllv_single(NULL); } }
// ---------------------------------------------------------------- static char* test_lrec_nidx_api() { char* line = strdup("a,b,c,d"); lrec_t* prec = lrec_parse_stdio_nidx(line, ',', FALSE); mu_assert_lf(prec->field_count == 4); mu_assert_lf(streq(lrec_get(prec, "1"), "a")); mu_assert_lf(streq(lrec_get(prec, "2"), "b")); mu_assert_lf(streq(lrec_get(prec, "3"), "c")); mu_assert_lf(streq(lrec_get(prec, "4"), "d")); lrec_remove(prec, "1"); mu_assert_lf(prec->field_count == 3); mu_assert_lf(lrec_get(prec, "1") == NULL); // Non-replacing-rename case lrec_dump_titled("Before rename", prec); lrec_rename(prec, "2", "u"); lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 3); mu_assert_lf(lrec_get(prec, "2") == NULL); mu_assert_lf(streq(lrec_get(prec, "u"), "b")); // Replacing-rename case lrec_dump_titled("Before rename", prec); lrec_rename(prec, "3", "4"); lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 2); mu_assert_lf(streq(lrec_get(prec, "u"), "b")); mu_assert_lf(lrec_get(prec, "3") == NULL); mu_assert_lf(streq(lrec_get(prec, "4"), "c")); lrec_free(prec); return NULL; }
// ---------------------------------------------------------------- static char* test_lrec_dkvp_api() { char* line = strdup("w=2,x=3,y=4,z=5"); lrec_t* prec = lrec_parse_stdio_dkvp(line, ',', '=', FALSE); mu_assert_lf(prec->field_count == 4); mu_assert_lf(streq(lrec_get(prec, "w"), "2")); mu_assert_lf(streq(lrec_get(prec, "x"), "3")); mu_assert_lf(streq(lrec_get(prec, "y"), "4")); mu_assert_lf(streq(lrec_get(prec, "z"), "5")); lrec_remove(prec, "w"); mu_assert_lf(prec->field_count == 3); mu_assert_lf(lrec_get(prec, "w") == NULL); // Non-replacing-rename case //lrec_dump_titled("Before rename", prec); lrec_rename(prec, "x", "u"); //lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 3); mu_assert_lf(lrec_get(prec, "x") == NULL); mu_assert_lf(streq(lrec_get(prec, "u"), "3")); // Replacing-rename case //lrec_dump_titled("Before rename", prec); lrec_rename(prec, "y", "z"); //lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 2); mu_assert_lf(streq(lrec_get(prec, "u"), "3")); mu_assert_lf(lrec_get(prec, "y") == NULL); mu_assert_lf(streq(lrec_get(prec, "z"), "4")); lrec_free(prec); return NULL; }
// The stdio-JSON lrec-reader is non-streaming: we ingest all records here in the start-of-file hook. // Then in the process method we pop one lrec off the list at a time, until they are all exhausted. // This is in contrast to other Miller lrec-readers. // // It would be possible to extend the streaming framework to also have an end-of-file hook // which we could use here to free parsed-JSON data. However, we simply leverage the start-of-file // hook for the *next* file (if any) or the free method (if not): these free parsed-JSON structures // from the previous file (if any). static void lrec_reader_stdio_json_sof(void* pvstate, void* pvhandle) { lrec_reader_stdio_json_state_t* pstate = pvstate; file_ingestor_stdio_state_t* phandle = pvhandle; json_char* json_input = (json_char*)phandle->sof; json_value_t* parsed_top_level_json; json_char error_buf[JSON_ERROR_MAX]; if (pstate->ptop_level_json_objects != NULL) { for (sllve_t* pe = pstate->ptop_level_json_objects->phead; pe != NULL; pe = pe->pnext) { json_value_t* top_level_json_object = pe->pvvalue; json_value_free(top_level_json_object); } sllv_free(pstate->ptop_level_json_objects); } if (pstate->precords != NULL) { for (sllve_t* pf = pstate->precords->phead; pf != NULL; pf = pf->pnext) { lrec_t* prec = pf->pvvalue; lrec_free(prec); } sllv_free(pstate->precords); } pstate->ptop_level_json_objects = sllv_alloc(); pstate->precords = sllv_alloc(); // This enables us to handle input of the form // // { "a" : 1 } // { "b" : 2 } // { "c" : 3 } // // in addition to // // [ // { "a" : 1 } // { "b" : 2 } // { "c" : 3 } // ] // // This is in line with what jq can handle. In this case, json_parse will return // once for each top-level item and will give us back a pointer to the start of // the rest of the input stream, so we can call json_parse on the rest until it is // all exhausted. json_char* item_start = json_input; int length = phandle->eof - phandle->sof; while (TRUE) { parsed_top_level_json = json_parse(item_start, length, error_buf, &item_start); if (parsed_top_level_json == NULL) { fprintf(stderr, "Unable to parse JSON data: %s\n", error_buf); exit(1); } // The lrecs have their string pointers pointing into the parsed-JSON objects (for // efficiency) so it's important we not free the latter until our free method. reference_json_objects_as_lrecs(pstate->precords, parsed_top_level_json, pstate->json_flatten_separator); if (item_start == NULL) break; if (*item_start == 0) break; length -= (item_start - json_input); json_input = item_start; } }
// ---------------------------------------------------------------- static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, char* ors, char ofs, int left_align) { if (precords->length == 0) { sllv_free(precords); return; } lrec_t* prec1 = precords->phead->pvvalue; int* max_widths = mlr_malloc_or_die(sizeof(int) * prec1->field_count); int j = 0; for (lrece_t* pe = prec1->phead; pe != NULL; pe = pe->pnext, j++) { max_widths[j] = strlen_for_utf8_display(pe->key); } for (sllve_t* pnode = precords->phead; pnode != NULL; pnode = pnode->pnext) { lrec_t* prec = pnode->pvvalue; j = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext, j++) { int width = strlen_for_utf8_display(pe->value); if (width > max_widths[j]) max_widths[j] = width; } } int onr = 0; for (sllve_t* pnode = precords->phead; pnode != NULL; pnode = pnode->pnext, onr++) { lrec_t* prec = pnode->pvvalue; if (onr == 0) { j = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext, j++) { if (j > 0) { fputc(ofs, output_stream); } if (left_align) { if (pe->pnext == NULL) { fprintf(output_stream, "%s", pe->key); } else { // "%-*s" fprintf format isn't correct for non-ASCII UTF-8 fprintf(output_stream, "%s", pe->key); int d = max_widths[j] - strlen_for_utf8_display(pe->key); for (int i = 0; i < d; i++) fputc(ofs, output_stream); } } else { int d = max_widths[j] - strlen_for_utf8_display(pe->key); for (int i = 0; i < d; i++) fputc(ofs, output_stream); fprintf(output_stream, "%s", pe->key); } } fputs(ors, output_stream); } j = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext, j++) { if (j > 0) { fputc(ofs, output_stream); } char* value = pe->value; if (*value == 0) // empty string value = "-"; if (left_align) { if (pe->pnext == NULL) { fprintf(output_stream, "%s", value); } else { fprintf(output_stream, "%s", value); int d = max_widths[j] - strlen_for_utf8_display(value); for (int i = 0; i < d; i++) fputc(ofs, output_stream); } } else { int d = max_widths[j] - strlen_for_utf8_display(value); for (int i = 0; i < d; i++) fputc(ofs, output_stream); fprintf(output_stream, "%s", value); } } fputs(ors, output_stream); lrec_free(prec); // end of baton-pass } free(max_widths); sllv_free(precords); }
static lrec_t* lrec_parse_mmap_xtab_single_ifs_multi_ips(file_reader_mmap_state_t* phandle, char ifs, lrec_reader_mmap_xtab_state_t* pstate, context_t* pctx) { if (pstate->do_auto_line_term) { // Skip over otherwise empty LF-only or CRLF-only lines. while (phandle->sol < phandle->eof) { if (*phandle->sol == '\n') { context_set_autodetected_lf(pctx); phandle->sol += 1; } else if (*phandle->sol == '\r') { char* q = phandle->sol + 1; if (q < phandle->eof && *q == '\n') { context_set_autodetected_crlf(pctx); phandle->sol += 2; } else { phandle->sol += 1; } } else { break; } } } else { // Skip over otherwise empty IFS-only lines. while (phandle->sol < phandle->eof && *phandle->sol == ifs) phandle->sol++; } if (phandle->sol >= phandle->eof) return NULL; char* ips = pstate->ips; int ipslen = pstate->ipslen; lrec_t* prec = lrec_unbacked_alloc(); // Loop over fields, one per line while (TRUE) { if (phandle->sol >= phandle->eof) break; char* line = phandle->sol; char* key = line; char* value = ""; char* p; // Construct one field int saw_eol = FALSE; for (p = line; p < phandle->eof && *p; ) { if (*p == ifs) { *p = 0; if (pstate->do_auto_line_term) { if (p > line && p[-1] == '\r') { p[-1] = 0; context_set_autodetected_crlf(pctx); } else { context_set_autodetected_lf(pctx); } } phandle->sol = p+1; saw_eol = TRUE; break; } else if (streqn(p, ips, ipslen)) { key = line; *p = 0; p += ipslen; if (pstate->allow_repeat_ips) { while (streqn(p, ips, ipslen)) p += ipslen; } value = p; } else { p++; } } if (p >= phandle->eof) phandle->sol = p+1; if (saw_eol) { // Easy and simple case: we read until end of line. We zero-poked the irs to a null character to terminate // the C string so it's OK to retain a pointer to that. lrec_put(prec, key, value, NO_FREE); } else { // Messier case: we read to end of file without seeing end of line. We can't always zero-poke a null // character to terminate the C string: if the file size is not a multiple of the OS page size it'll work // (it's our copy-on-write memory). But if the file size is a multiple of the page size, then zero-poking at // EOF is one byte past the page and that will segv us. char* copy = mlr_alloc_string_from_char_range(value, phandle->eof - value); lrec_put(prec, key, copy, FREE_ENTRY_VALUE); } if (phandle->sol >= phandle->eof || *phandle->sol == ifs) break; } if (prec->field_count == 0) { lrec_free(prec); return NULL; } else { return prec; } }
static sllv_t* mapper_step_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_step_state_t* pstate = pvstate; if (pinrec == NULL) return sllv_single(NULL); // ["s", "t"] slls_t* pvalue_field_values = mlr_selected_values_from_record(pinrec, pstate->pvalue_field_names); slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); if (pgroup_by_field_values->length != pstate->pgroup_by_field_names->length) { lrec_free(pinrec); return NULL; } lhmsv_t* group_to_acc_field = lhmslv_get(pstate->groups, pgroup_by_field_values); if (group_to_acc_field == NULL) { group_to_acc_field = lhmsv_alloc(); lhmslv_put(pstate->groups, slls_copy(pgroup_by_field_values), group_to_acc_field); } sllse_t* pa = pstate->pvalue_field_names->phead; sllse_t* pb = pvalue_field_values->phead; // for x=1 and y=2 for ( ; pa != NULL && pb != NULL; pa = pa->pnext, pb = pb->pnext) { char* value_field_name = pa->value; char* value_field_sval = pb->value; int have_dval = FALSE; double value_field_dval = -999.0; lhmsv_t* acc_field_to_acc_state = lhmsv_get(group_to_acc_field, value_field_name); if (acc_field_to_acc_state == NULL) { acc_field_to_acc_state = lhmsv_alloc(); lhmsv_put(group_to_acc_field, value_field_name, acc_field_to_acc_state); } // for "delta", "rsum" sllse_t* pc = pstate->pstepper_names->phead; for ( ; pc != NULL; pc = pc->pnext) { char* step_name = pc->value; step_t* pstep = lhmsv_get(acc_field_to_acc_state, step_name); if (pstep == NULL) { pstep = make_step(step_name, value_field_name); if (pstep == NULL) { fprintf(stderr, "mlr step: stepper \"%s\" not found.\n", step_name); exit(1); } lhmsv_put(acc_field_to_acc_state, step_name, pstep); } if (pstep->psprocess_func != NULL) { pstep->psprocess_func(pstep->pvstate, value_field_sval, pinrec); } if (pstep->pdprocess_func != NULL) { if (!have_dval) { value_field_dval = mlr_double_from_string_or_die(value_field_sval); have_dval = TRUE; } pstep->pdprocess_func(pstep->pvstate, value_field_dval, pinrec); } } } return sllv_single(pinrec); }
static lrec_t* lrec_parse_mmap_xtab_multi_ifs_multi_ips(file_reader_mmap_state_t* phandle, lrec_reader_mmap_xtab_state_t* pstate) { char* ips = pstate->ips; int ipslen = pstate->ipslen; char* ifs = pstate->ifs; int ifslen = pstate->ifslen; // Skip blank lines while (phandle->eof - phandle->sol >= ifslen && streqn(phandle->sol, ifs, ifslen)) { phandle->sol += ifslen; } if (phandle->sol >= phandle->eof) return NULL; lrec_t* prec = lrec_unbacked_alloc(); // Loop over fields, one per line while (TRUE) { if (phandle->sol >= phandle->eof) break; char* line = phandle->sol; char* key = line; char* value = ""; char* p; // Construct one field int saw_eol = FALSE; for (p = line; p < phandle->eof && *p; ) { if (streqn(p, ifs, ifslen)) { *p = 0; phandle->sol = p + ifslen; saw_eol = TRUE; break; } else if (streqn(p, ips, ipslen)) { key = line; *p = 0; p += ipslen; if (pstate->allow_repeat_ips) { while (streqn(p, ips, ipslen)) p += ipslen; } value = p; } else { p++; } } if (p >= phandle->eof) phandle->sol = p+1; if (saw_eol) { // Easy and simple case: we read until end of line. We zero-poked the irs to a null character to terminate // the C string so it's OK to retain a pointer to that. lrec_put(prec, key, value, NO_FREE); } else { // Messier case: we read to end of file without seeing end of line. We can't always zero-poke a null // character to terminate the C string: if the file size is not a multiple of the OS page size it'll work // (it's our copy-on-write memory). But if the file size is a multiple of the page size, then zero-poking at // EOF is one byte past the page and that will segv us. char* copy = mlr_alloc_string_from_char_range(value, phandle->eof - value); lrec_put(prec, key, copy, FREE_ENTRY_VALUE); } if (phandle->sol >= phandle->eof || streqn(phandle->sol, ifs, ifslen)) break; } if (prec->field_count == 0) { lrec_free(prec); return NULL; } else { return prec; } }