Пример #1
0
// ----------------------------------------------------------------
static char* test_lrec_csv_api() {
	char* hdr_line = strdup("w,x,y,z");
	slls_t* hdr_fields = split_csvlite_header_line(hdr_line, ',', FALSE);
	header_keeper_t* pheader_keeper = header_keeper_alloc(hdr_line, hdr_fields);

	char* data_line_1 = strdup("2,3,4,5");
	lrec_t* prec_1 = lrec_parse_stdio_csvlite_data_line(pheader_keeper, data_line_1, ',', FALSE);

	char* data_line_2 = strdup("6,7,8,9");
	lrec_t* prec_2 = lrec_parse_stdio_csvlite_data_line(pheader_keeper, data_line_2, ',', FALSE);

	mu_assert_lf(prec_1->field_count == 4);
	mu_assert_lf(prec_2->field_count == 4);

	mu_assert_lf(streq(lrec_get(prec_1, "w"), "2"));
	mu_assert_lf(streq(lrec_get(prec_1, "x"), "3"));
	mu_assert_lf(streq(lrec_get(prec_1, "y"), "4"));
	mu_assert_lf(streq(lrec_get(prec_1, "z"), "5"));

	mu_assert_lf(streq(lrec_get(prec_2, "w"), "6"));
	mu_assert_lf(streq(lrec_get(prec_2, "x"), "7"));
	mu_assert_lf(streq(lrec_get(prec_2, "y"), "8"));
	mu_assert_lf(streq(lrec_get(prec_2, "z"), "9"));

	lrec_remove(prec_1, "w");
	mu_assert_lf(prec_1->field_count == 3);
	mu_assert_lf(prec_2->field_count == 4);
	mu_assert_lf(lrec_get(prec_1, "w") == NULL);
	mu_assert_lf(streq(lrec_get(prec_2, "w"), "6"));

	// Non-replacing-rename case
	//lrec_dump_titled("Before rename", prec_1);
	lrec_rename(prec_1, "x", "u");
	//lrec_dump_titled("After rename", prec_1);
	mu_assert_lf(prec_1->field_count == 3);
	mu_assert_lf(lrec_get(prec_1, "x") == NULL);
	mu_assert_lf(streq(lrec_get(prec_1, "u"), "3"));

	// Replacing-rename case
	//lrec_dump_titled("Before rename", prec_2);
	lrec_rename(prec_2, "y", "z");
	//lrec_dump_titled("After rename", prec_2);

	mu_assert_lf(prec_2->field_count == 3);
	mu_assert_lf(streq(lrec_get(prec_2, "w"), "6"));
	mu_assert_lf(streq(lrec_get(prec_2, "x"), "7"));
	mu_assert_lf(lrec_get(prec_2, "y") == NULL);
	mu_assert_lf(streq(lrec_get(prec_2, "z"), "8"));

	lrec_free(prec_1);
	lrec_free(prec_2);

	// xxx need a test case for alloc1,free1,alloc2,free2 w/ same hdr.
	return NULL;
}
Пример #2
0
// ----------------------------------------------------------------
static char* test_lrec_unbacked_api() {
	lrec_t* prec = lrec_unbacked_alloc();
	mu_assert_lf(prec->field_count == 0);

	lrec_put_no_free(prec, "x", "3");
	mu_assert_lf(prec->field_count == 1);
	mu_assert_lf(streq(lrec_get(prec, "x"), "3"));

	lrec_put_no_free(prec, "y", "4");
	mu_assert_lf(prec->field_count == 2);
	mu_assert_lf(streq(lrec_get(prec, "x"), "3"));
	mu_assert_lf(streq(lrec_get(prec, "y"), "4"));

	lrec_put_no_free(prec, "x", "5");
	mu_assert_lf(prec->field_count == 2);
	mu_assert_lf(streq(lrec_get(prec, "x"), "5"));
	mu_assert_lf(streq(lrec_get(prec, "y"), "4"));

	lrec_remove(prec, "x");
	mu_assert_lf(prec->field_count == 1);
	mu_assert_lf(lrec_get(prec, "x") == NULL);

	// Non-replacing-rename case
	//lrec_dump_titled("Before rename", prec);
	lrec_rename(prec, "y", "z");
	//lrec_dump_titled("After rename", prec);
	mu_assert_lf(prec->field_count == 1);
	mu_assert_lf(lrec_get(prec, "x") == NULL);
	mu_assert_lf(lrec_get(prec, "y") == NULL);
	mu_assert_lf(streq(lrec_get(prec, "z"), "4"));

	lrec_free(prec);

	// Replacing-rename case
	prec = lrec_unbacked_alloc();

	lrec_put_no_free(prec, "x", "3");
	lrec_put_no_free(prec, "y", "4");
	lrec_put_no_free(prec, "z", "5");
	mu_assert_lf(prec->field_count == 3);

	//lrec_dump_titled("Before rename", prec);
	lrec_rename(prec, "y", "z");
	//lrec_dump_titled("After rename", prec);

	mu_assert_lf(prec->field_count == 2);
	mu_assert_lf(streq(lrec_get(prec, "x"), "3"));
	mu_assert_lf(lrec_get(prec, "y") == NULL);
	mu_assert_lf(streq(lrec_get(prec, "z"), "4"));

	lrec_free(prec);

	return NULL;
}
Пример #3
0
// ----------------------------------------------------------------
static sllv_t* mapper_tail_process(lrec_t* pinrec, context_t* pctx, void* pvstate) {
    mapper_tail_state_t* pstate = pvstate;
    if (pinrec != NULL) {
        slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names);
        sllv_t* precord_list_for_group = lhmslv_get(pstate->precord_lists_by_group, pgroup_by_field_values);
        if (precord_list_for_group == NULL) {
            precord_list_for_group = sllv_alloc();
            lhmslv_put(pstate->precord_lists_by_group, slls_copy(pgroup_by_field_values), precord_list_for_group);
        }
        if (precord_list_for_group->length >= pstate->tail_count) {
            lrec_t* porec = sllv_pop(precord_list_for_group);
            if (porec != NULL)
                lrec_free(porec);
        }
        sllv_add(precord_list_for_group, pinrec);

        return NULL;
    }
    else {
        sllv_t* poutrecs = sllv_alloc();

        for (lhmslve_t* pa = pstate->precord_lists_by_group->phead; pa != NULL; pa = pa->pnext) {
            sllv_t* precord_list_for_group = pa->pvvalue;
            for (sllve_t* pb = precord_list_for_group->phead; pb != NULL; pb = pb->pnext) {
                sllv_add(poutrecs, pb->pvdata);
            }
        }
        sllv_add(poutrecs, NULL);
        return poutrecs;
    }
}
Пример #4
0
static void lrec_writer_xtab_process_aligned(FILE* output_stream, lrec_t* prec, void* pvstate) {
	if (prec == NULL)
		return;
	lrec_writer_xtab_state_t* pstate = pvstate;
	if (pstate->record_count > 0LL)
		fputs(pstate->ofs, output_stream);
	pstate->record_count++;

	int max_key_width = 1;
	for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
		int key_width = strlen_for_utf8_display(pe->key);
		if (key_width > max_key_width)
			max_key_width = key_width;
	}

	for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
		// "%-*s" fprintf format isn't correct for non-ASCII UTF-8
		fprintf(output_stream, "%s", pe->key);
		int d = max_key_width - strlen_for_utf8_display(pe->key);
		for (int i = 0; i < d; i++)
			fputs(pstate->ops, output_stream);
		fprintf(output_stream, "%s%s%s", pstate->ops, pe->value, pstate->ofs);
	}
	lrec_free(prec); // xxx cmt mem-mgmt
}
Пример #5
0
// ----------------------------------------------------------------
static sllv_t* mapper_nothing_process(lrec_t* pinrec, context_t* pctx, void* pvstate) {
	if (pinrec != NULL) {
		lrec_free(pinrec);
		return NULL;
	} else {
		return sllv_single(NULL);
	}
}
Пример #6
0
static sllv_t* mapper_stats1_process(lrec_t* pinrec, context_t* pctx, void* pvstate) {
	mapper_stats1_state_t* pstate = pvstate;
	if (pinrec != NULL) {
		mapper_stats1_ingest(pinrec, pstate);
		lrec_free(pinrec);
		return NULL;
	} else {
		return mapper_stats1_emit(pstate);
	}
}
// ----------------------------------------------------------------
static void handle_full_srec_assignment(
	mlr_dsl_cst_statement_t* pstatement,
	variables_t*             pvars,
	cst_outputs_t*           pcst_outputs)
{
	full_srec_assignment_state_t* pstate = pstatement->pvstate;

	lrec_t* poutrec = lrec_unbacked_alloc(); // pinrec might be part of the RHS.
	lhmsmv_t* pout_typed_overlay = lhmsmv_alloc();

	rxval_evaluator_t* prhs_xevaluator = pstate->prhs_xevaluator;
	boxed_xval_t boxed_xval = prhs_xevaluator->pprocess_func(prhs_xevaluator->pvstate, pvars);

	if (!boxed_xval.xval.is_terminal) {
		for (mlhmmv_level_entry_t* pe = boxed_xval.xval.pnext_level->phead; pe != NULL; pe = pe->pnext) {
			mv_t* pkey = &pe->level_key;
			mlhmmv_xvalue_t* pval = &pe->level_xvalue;

			if (pval->is_terminal) { // xxx else collapse-down using json separator?
				char* skey = mv_alloc_format_val(pkey);
				// xxx if we're going to transfer here *and* free below, this needs a nullptr poke
				// at the copy-from site
				//mv_t val = boxed_xval.is_ephemeral ? pval->terminal_mlrval : mv_copy(&pval->terminal_mlrval);
				mv_t val = mv_copy(&pval->terminal_mlrval);

				// Write typed mlrval output to the typed overlay rather than into the lrec
				// (which holds only string values).
				//
				// The rval_evaluator reads the overlay in preference to the lrec. E.g. if the
				// input had "x"=>"abc","y"=>"def" but a previous statement had set "y"=>7.4 and
				// "z"=>"ghi", then an expression right-hand side referring to $y would get the
				// floating-point value 7.4. So we don't need to lrec_put the value here, and
				// moreover should not for two reasons: (1) there is a performance hit of doing
				// throwaway number-to-string formatting -- it's better to do it once at the
				// end; (2) having the string values doubly owned by the typed overlay and the
				// lrec would result in double frees, or awkward bookkeeping. However, the NR
				// variable evaluator reads prec->field_count, so we need to put something here.
				// And putting something statically allocated minimizes copying/freeing.
				lhmsmv_put(pout_typed_overlay, mlr_strdup_or_die(skey), &val,
					FREE_ENTRY_KEY | FREE_ENTRY_VALUE);
				lrec_put(poutrec, skey, "bug", FREE_ENTRY_KEY);
			}
		}
		if (boxed_xval.is_ephemeral) {
			mlhmmv_xvalue_free(&boxed_xval.xval);
		}
	} else {
		mlhmmv_xvalue_free(&boxed_xval.xval);
	}
	lrec_free(pvars->pinrec);
	lhmsmv_free(pvars->ptyped_overlay);
	pvars->pinrec = poutrec;
	pvars->ptyped_overlay = pout_typed_overlay;
}
Пример #8
0
// ----------------------------------------------------------------
lrec_t* lrec_parse_mmap_xtab_single_ifs(file_reader_mmap_state_t* phandle, char ifs, char ips, int allow_repeat_ips) {

	while (phandle->sol < phandle->eof && *phandle->sol == ifs)
		phandle->sol++;

	if (phandle->sol >= phandle->eof)
		return NULL;

	lrec_t* prec = lrec_unbacked_alloc();

	// Loop over fields, one per line
	while (TRUE) {
		char* line  = phandle->sol;
		char* key   = line;
		char* value = "";
		char* p;

		// Construct one field
		for (p = line; p < phandle->eof && *p; ) {
			if (*p == ifs) {
				*p = 0;
				phandle->sol = p+1;
				break;
			} else if (*p == ips) {
				key = line;
				*p = 0;

				p++;
				if (allow_repeat_ips) {
					while (*p == ips)
						p++;
				}
				value = p;
			} else {
				p++;
			}
		}
		if (p >= phandle->eof)
			phandle->sol = p+1;

		lrec_put_no_free(prec, key, value);

		if (phandle->sol >= phandle->eof || *phandle->sol == ifs)
			break;
	}
	if (prec->field_count == 0) {
		lrec_free(prec);
		return NULL;
	} else {
		return prec;
	}
}
Пример #9
0
static void lrec_writer_xtab_process_unaligned(FILE* output_stream, lrec_t* prec, void* pvstate) {
	if (prec == NULL)
		return;
	lrec_writer_xtab_state_t* pstate = pvstate;
	if (pstate->record_count > 0LL)
		fputs(pstate->ofs, output_stream);
	pstate->record_count++;

	for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
		// "%-*s" fprintf format isn't correct for non-ASCII UTF-8
		fprintf(output_stream, "%s%s%s%s", pe->key, pstate->ops, pe->value, pstate->ofs);
	}
	lrec_free(prec); // xxx cmt mem-mgmt
}
Пример #10
0
// ----------------------------------------------------------------
static void lrec_writer_csv_process(FILE* output_stream, lrec_t* prec, void* pvstate) {
	if (prec == NULL)
		return;
	lrec_writer_csv_state_t* pstate = pvstate;
	char *ors = pstate->ors;
	char *ofs = pstate->ofs;

	if (pstate->plast_header_output != NULL) {
		if (!lrec_keys_equal_list(prec, pstate->plast_header_output)) {
			slls_free(pstate->plast_header_output);
			pstate->plast_header_output = NULL;
			if (pstate->num_header_lines_output > 0LL)
				fputs(ors, output_stream);
		}
	}

	if (pstate->plast_header_output == NULL) {
		int nf = 0;
		if (!pstate->headerless_csv_output) {
			for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
				if (nf > 0)
					fputs(ofs, output_stream);
				pstate->pquoted_output_func(output_stream, pe->key, pstate->ors, pstate->ofs,
					pstate->orslen, pstate->ofslen);
				nf++;
			}
			fputs(ors, output_stream);
		}
		pstate->plast_header_output = mlr_copy_keys_from_record(prec);
		pstate->num_header_lines_output++;
	}

	int nf = 0;
	for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
		if (nf > 0)
			fputs(ofs, output_stream);
		pstate->pquoted_output_func(output_stream, pe->value, pstate->ors, pstate->ofs,
			pstate->orslen, pstate->ofslen);
		nf++;
	}
	fputs(ors, output_stream);
	pstate->onr++;

	// See ../README.md for memory-management conventions
	lrec_free(prec);
}
Пример #11
0
// ----------------------------------------------------------------
static char* test_lrec_xtab_api() {
	char* line_1 = strdup("w 2");
	char* line_2 = strdup("x    3");
	char* line_3 = strdup("y 4");
	char* line_4 = strdup("z  5");
	slls_t* pxtab_lines = slls_alloc();
	slls_add_with_free(pxtab_lines, line_1);
	slls_add_with_free(pxtab_lines, line_2);
	slls_add_with_free(pxtab_lines, line_3);
	slls_add_with_free(pxtab_lines, line_4);

	lrec_t* prec = lrec_parse_stdio_xtab(pxtab_lines, ' ', TRUE);
	mu_assert_lf(prec->field_count == 4);

	mu_assert_lf(streq(lrec_get(prec, "w"), "2"));
	mu_assert_lf(streq(lrec_get(prec, "x"), "3"));
	mu_assert_lf(streq(lrec_get(prec, "y"), "4"));
	mu_assert_lf(streq(lrec_get(prec, "z"), "5"));

	lrec_remove(prec, "w");
	mu_assert_lf(prec->field_count == 3);
	mu_assert_lf(lrec_get(prec, "w") == NULL);

	// Non-replacing-rename case
	//lrec_dump_titled("Before rename", prec);
	lrec_rename(prec, "x", "u");
	//lrec_dump_titled("After rename", prec);
	mu_assert_lf(prec->field_count == 3);
	mu_assert_lf(lrec_get(prec, "x") == NULL);
	mu_assert_lf(streq(lrec_get(prec, "u"), "3"));

	// Replacing-rename case
	//lrec_dump_titled("Before rename", prec);
	lrec_rename(prec, "y", "z");
	//lrec_dump_titled("After rename", prec);

	mu_assert_lf(prec->field_count == 2);
	mu_assert_lf(streq(lrec_get(prec, "u"), "3"));
	mu_assert_lf(lrec_get(prec, "y") == NULL);
	mu_assert_lf(streq(lrec_get(prec, "z"), "4"));

	lrec_free(prec);

	return NULL;
}
Пример #12
0
static void lrec_reader_stdio_json_free(lrec_reader_t* preader) {
	lrec_reader_stdio_json_state_t* pstate = preader->pvstate;

	for (sllve_t* pe = pstate->ptop_level_json_objects->phead; pe != NULL; pe = pe->pnext) {
		json_value_t* top_level_json_object = pe->pvvalue;
		json_free_value(top_level_json_object);
	}
	sllv_free(pstate->ptop_level_json_objects);
	for (sllve_t* pf = pstate->precords->phead; pf != NULL; pf = pf->pnext) {
		lrec_t* prec = pf->pvvalue;
		lrec_free(prec);
	}
	sllv_free(pstate->precords);
	pstate->precords = NULL;

	free(pstate);
	free(preader);
}
Пример #13
0
static void lrec_writer_csv_process(FILE* output_stream, lrec_t* prec, void* pvstate) {
    if (prec == NULL)
        return;
    lrec_writer_csv_state_t* pstate = pvstate;
    char ors = pstate->ors;
    char ofs = pstate->ofs;

    if (pstate->plast_header_output != NULL) {
        // xxx make a fcn to compare these w/o copy: put it in mixutil.
        if (!lrec_keys_equal_list(prec, pstate->plast_header_output)) {
            slls_free(pstate->plast_header_output);
            pstate->plast_header_output = NULL;
            if (pstate->num_header_lines_output > 0LL)
                fputc(ors, output_stream);
        }
    }

    if (pstate->plast_header_output == NULL) {
        int nf = 0;
        for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
            if (nf > 0)
                fputc(ofs, output_stream);
            fputs(pe->key, output_stream);
            nf++;
        }
        fputc(ors, output_stream);
        pstate->plast_header_output = mlr_copy_keys_from_record(prec);
        pstate->num_header_lines_output++;
    }

    int nf = 0;
    for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
        if (nf > 0)
            fputc(ofs, output_stream);
        fputs(pe->value, output_stream);
        nf++;
    }
    fputc(ors, output_stream);
    pstate->onr++;

    lrec_free(prec); // xxx cmt mem-mgmt
}
Пример #14
0
// ----------------------------------------------------------------
static void lrec_writer_dkvp_process(FILE* output_stream, lrec_t* prec, void* pvstate) {
	if (prec == NULL)
		return;
	lrec_writer_dkvp_state_t* pstate = pvstate;
	char* ors = pstate->ors;
	char* ofs = pstate->ofs;
	char* ops = pstate->ops;

	int nf = 0;
	for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
		if (nf > 0)
			fputs(ofs, output_stream);
		fputs(pe->key, output_stream);
		fputs(ops, output_stream);
		fputs(pe->value, output_stream);
		nf++;
	}
	fputs(ors, output_stream);
	lrec_free(prec); // end of baton-pass
}
Пример #15
0
// ----------------------------------------------------------------
// xxx if empty key then make a way to communicate back to the reader that it
// can stop reading further records -- ?
static sllv_t* mapper_head_process(lrec_t* pinrec, context_t* pctx, void* pvstate) {
	mapper_head_state_t* pstate = pvstate;
	if (pinrec != NULL) {
		slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names);
		unsigned long long* pcount_for_group = lhmslv_get(pstate->precord_lists_by_group, pgroup_by_field_values);
		if (pcount_for_group == NULL) {
			pcount_for_group = mlr_malloc_or_die(sizeof(unsigned long long));
			*pcount_for_group = 0LL;
			lhmslv_put(pstate->precord_lists_by_group, slls_copy(pgroup_by_field_values), pcount_for_group);
		}
		(*pcount_for_group)++;
		if (*pcount_for_group <= pstate->head_count) {
			return sllv_single(pinrec);
		} else {
			lrec_free(pinrec);
			return NULL;
		}
	}
	else {
		return sllv_single(NULL);
	}
}
Пример #16
0
// ----------------------------------------------------------------
static sllv_t* mapper_regularize_process(lrec_t* pinrec, context_t* pctx, void* pvstate) {
	if (pinrec != NULL) {
		mapper_regularize_state_t* pstate = (mapper_regularize_state_t*)pvstate;
		slls_t* current_sorted_field_names = mlr_reference_keys_from_record(pinrec);
		slls_sort(current_sorted_field_names);
		slls_t* previous_sorted_field_names = lhmslv_get(pstate->psorted_to_original, current_sorted_field_names);
		if (previous_sorted_field_names == NULL) {
			previous_sorted_field_names = slls_copy(current_sorted_field_names);
			lhmslv_put(pstate->psorted_to_original, previous_sorted_field_names, mlr_copy_keys_from_record(pinrec));
			return sllv_single(pinrec);
		} else {
			lrec_t* poutrec = lrec_unbacked_alloc();
			for (sllse_t* pe = previous_sorted_field_names->phead; pe != NULL; pe = pe->pnext) {
				lrec_put(poutrec, pe->value, strdup(lrec_get(pinrec, pe->value)), LREC_FREE_ENTRY_VALUE);
			}
			lrec_free(pinrec);
			return sllv_single(poutrec);
		}
	}
	else {
		return sllv_single(NULL);
	}
}
Пример #17
0
// ----------------------------------------------------------------
static char* test_lrec_nidx_api() {
	char* line = strdup("a,b,c,d");
	lrec_t* prec = lrec_parse_stdio_nidx(line, ',', FALSE);
	mu_assert_lf(prec->field_count == 4);

	mu_assert_lf(streq(lrec_get(prec, "1"), "a"));
	mu_assert_lf(streq(lrec_get(prec, "2"), "b"));
	mu_assert_lf(streq(lrec_get(prec, "3"), "c"));
	mu_assert_lf(streq(lrec_get(prec, "4"), "d"));

	lrec_remove(prec, "1");
	mu_assert_lf(prec->field_count == 3);
	mu_assert_lf(lrec_get(prec, "1") == NULL);

	// Non-replacing-rename case
	lrec_dump_titled("Before rename", prec);
	lrec_rename(prec, "2", "u");
	lrec_dump_titled("After rename", prec);
	mu_assert_lf(prec->field_count == 3);
	mu_assert_lf(lrec_get(prec, "2") == NULL);
	mu_assert_lf(streq(lrec_get(prec, "u"), "b"));

	// Replacing-rename case
	lrec_dump_titled("Before rename", prec);
	lrec_rename(prec, "3", "4");
	lrec_dump_titled("After rename", prec);

	mu_assert_lf(prec->field_count == 2);
	mu_assert_lf(streq(lrec_get(prec, "u"), "b"));
	mu_assert_lf(lrec_get(prec, "3") == NULL);
	mu_assert_lf(streq(lrec_get(prec, "4"), "c"));

	lrec_free(prec);

	return NULL;
}
Пример #18
0
// ----------------------------------------------------------------
static char* test_lrec_dkvp_api() {
	char* line = strdup("w=2,x=3,y=4,z=5");
	lrec_t* prec = lrec_parse_stdio_dkvp(line, ',', '=', FALSE);
	mu_assert_lf(prec->field_count == 4);

	mu_assert_lf(streq(lrec_get(prec, "w"), "2"));
	mu_assert_lf(streq(lrec_get(prec, "x"), "3"));
	mu_assert_lf(streq(lrec_get(prec, "y"), "4"));
	mu_assert_lf(streq(lrec_get(prec, "z"), "5"));

	lrec_remove(prec, "w");
	mu_assert_lf(prec->field_count == 3);
	mu_assert_lf(lrec_get(prec, "w") == NULL);

	// Non-replacing-rename case
	//lrec_dump_titled("Before rename", prec);
	lrec_rename(prec, "x", "u");
	//lrec_dump_titled("After rename", prec);
	mu_assert_lf(prec->field_count == 3);
	mu_assert_lf(lrec_get(prec, "x") == NULL);
	mu_assert_lf(streq(lrec_get(prec, "u"), "3"));

	// Replacing-rename case
	//lrec_dump_titled("Before rename", prec);
	lrec_rename(prec, "y", "z");
	//lrec_dump_titled("After rename", prec);

	mu_assert_lf(prec->field_count == 2);
	mu_assert_lf(streq(lrec_get(prec, "u"), "3"));
	mu_assert_lf(lrec_get(prec, "y") == NULL);
	mu_assert_lf(streq(lrec_get(prec, "z"), "4"));

	lrec_free(prec);

	return NULL;
}
Пример #19
0
// The stdio-JSON lrec-reader is non-streaming: we ingest all records here in the start-of-file hook.
// Then in the process method we pop one lrec off the list at a time, until they are all exhausted.
// This is in contrast to other Miller lrec-readers.
//
// It would be possible to extend the streaming framework to also have an end-of-file hook
// which we could use here to free parsed-JSON data. However, we simply leverage the start-of-file
// hook for the *next* file (if any) or the free method (if not): these free parsed-JSON structures
// from the previous file (if any).
static void lrec_reader_stdio_json_sof(void* pvstate, void* pvhandle) {
	lrec_reader_stdio_json_state_t* pstate = pvstate;
	file_ingestor_stdio_state_t* phandle = pvhandle;
	json_char* json_input = (json_char*)phandle->sof;
	json_value_t* parsed_top_level_json;
	json_char error_buf[JSON_ERROR_MAX];

	if (pstate->ptop_level_json_objects != NULL) {
		for (sllve_t* pe = pstate->ptop_level_json_objects->phead; pe != NULL; pe = pe->pnext) {
			json_value_t* top_level_json_object = pe->pvvalue;
			json_value_free(top_level_json_object);
		}
		sllv_free(pstate->ptop_level_json_objects);
	}
	if (pstate->precords != NULL) {
		for (sllve_t* pf = pstate->precords->phead; pf != NULL; pf = pf->pnext) {
			lrec_t* prec = pf->pvvalue;
			lrec_free(prec);
		}
		sllv_free(pstate->precords);
	}

	pstate->ptop_level_json_objects = sllv_alloc();
	pstate->precords = sllv_alloc();

	// This enables us to handle input of the form
	//
	//   { "a" : 1 }
	//   { "b" : 2 }
	//   { "c" : 3 }
	//
	// in addition to
	//
	// [
	//   { "a" : 1 }
	//   { "b" : 2 }
	//   { "c" : 3 }
	// ]
	//
	// This is in line with what jq can handle. In this case, json_parse will return
	// once for each top-level item and will give us back a pointer to the start of
	// the rest of the input stream, so we can call json_parse on the rest until it is
	// all exhausted.

	json_char* item_start = json_input;
	int length = phandle->eof - phandle->sof;

	while (TRUE) {
		parsed_top_level_json = json_parse(item_start, length, error_buf, &item_start);

		if (parsed_top_level_json == NULL) {
			fprintf(stderr, "Unable to parse JSON data: %s\n", error_buf);
			exit(1);
		}

		// The lrecs have their string pointers pointing into the parsed-JSON objects (for
		// efficiency) so it's important we not free the latter until our free method.
		reference_json_objects_as_lrecs(pstate->precords, parsed_top_level_json, pstate->json_flatten_separator);

		if (item_start == NULL)
			break;
		if (*item_start == 0)
			break;
		length -= (item_start - json_input);
		json_input = item_start;

	}

}
Пример #20
0
// ----------------------------------------------------------------
static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, char* ors, char ofs, int left_align) {
	if (precords->length == 0) {
		sllv_free(precords);
		return;
	}
	lrec_t* prec1 = precords->phead->pvvalue;

	int* max_widths = mlr_malloc_or_die(sizeof(int) * prec1->field_count);
	int j = 0;
	for (lrece_t* pe = prec1->phead; pe != NULL; pe = pe->pnext, j++) {
		max_widths[j] = strlen_for_utf8_display(pe->key);
	}
	for (sllve_t* pnode = precords->phead; pnode != NULL; pnode = pnode->pnext) {
		lrec_t* prec = pnode->pvvalue;
		j = 0;
		for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext, j++) {
			int width = strlen_for_utf8_display(pe->value);
			if (width > max_widths[j])
				max_widths[j] = width;
		}
	}

	int onr = 0;
	for (sllve_t* pnode = precords->phead; pnode != NULL; pnode = pnode->pnext, onr++) {
		lrec_t* prec = pnode->pvvalue;

		if (onr == 0) {
			j = 0;
			for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext, j++) {
				if (j > 0) {
					fputc(ofs, output_stream);
				}
				if (left_align) {
					if (pe->pnext == NULL) {
						fprintf(output_stream, "%s", pe->key);
					} else {
						// "%-*s" fprintf format isn't correct for non-ASCII UTF-8
						fprintf(output_stream, "%s", pe->key);
						int d = max_widths[j] - strlen_for_utf8_display(pe->key);
						for (int i = 0; i < d; i++)
							fputc(ofs, output_stream);
					}
				} else {
					int d = max_widths[j] - strlen_for_utf8_display(pe->key);
					for (int i = 0; i < d; i++)
						fputc(ofs, output_stream);
					fprintf(output_stream, "%s", pe->key);
				}
			}
			fputs(ors, output_stream);
		}

		j = 0;
		for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext, j++) {
			if (j > 0) {
				fputc(ofs, output_stream);
			}
			char* value = pe->value;
			if (*value == 0) // empty string
				value = "-";
			if (left_align) {
				if (pe->pnext == NULL) {
					fprintf(output_stream, "%s", value);
				} else {
					fprintf(output_stream, "%s", value);
					int d = max_widths[j] - strlen_for_utf8_display(value);
					for (int i = 0; i < d; i++)
						fputc(ofs, output_stream);
				}
			} else {
				int d = max_widths[j] - strlen_for_utf8_display(value);
				for (int i = 0; i < d; i++)
					fputc(ofs, output_stream);
				fprintf(output_stream, "%s", value);
			}
		}
		fputs(ors, output_stream);

		lrec_free(prec); // end of baton-pass
	}

	free(max_widths);
	sllv_free(precords);
}
Пример #21
0
static lrec_t* lrec_parse_mmap_xtab_single_ifs_multi_ips(file_reader_mmap_state_t* phandle, char ifs,
	lrec_reader_mmap_xtab_state_t* pstate, context_t* pctx)
{
	if (pstate->do_auto_line_term) {
		// Skip over otherwise empty LF-only or CRLF-only lines.
		while (phandle->sol < phandle->eof) {
			if (*phandle->sol == '\n') {
				context_set_autodetected_lf(pctx);
				phandle->sol += 1;
			} else if (*phandle->sol == '\r') {
				char* q = phandle->sol + 1;
				if (q < phandle->eof && *q == '\n') {
					context_set_autodetected_crlf(pctx);
					phandle->sol += 2;
				} else {
					phandle->sol += 1;
				}
			} else {
				break;
			}
		}
	} else {
		// Skip over otherwise empty IFS-only lines.
		while (phandle->sol < phandle->eof && *phandle->sol == ifs)
			phandle->sol++;
	}

	if (phandle->sol >= phandle->eof)
		return NULL;

	char* ips = pstate->ips;
	int ipslen = pstate->ipslen;

	lrec_t* prec = lrec_unbacked_alloc();

	// Loop over fields, one per line
	while (TRUE) {
		if (phandle->sol >= phandle->eof)
			break;

		char* line  = phandle->sol;
		char* key   = line;
		char* value = "";
		char* p;

		// Construct one field
		int saw_eol = FALSE;
		for (p = line; p < phandle->eof && *p; ) {
			if (*p == ifs) {
				*p = 0;

				if (pstate->do_auto_line_term) {
					if (p > line && p[-1] == '\r') {
						p[-1] = 0;
						context_set_autodetected_crlf(pctx);
					} else {
						context_set_autodetected_lf(pctx);
					}
				}

				phandle->sol = p+1;
				saw_eol = TRUE;
				break;
			} else if (streqn(p, ips, ipslen)) {
				key = line;
				*p = 0;

				p += ipslen;
				if (pstate->allow_repeat_ips) {
					while (streqn(p, ips, ipslen))
						p += ipslen;
				}
				value = p;
			} else {
				p++;
			}
		}
		if (p >= phandle->eof)
			phandle->sol = p+1;

		if (saw_eol) {
			// Easy and simple case: we read until end of line.  We zero-poked the irs to a null character to terminate
			// the C string so it's OK to retain a pointer to that.
			lrec_put(prec, key, value, NO_FREE);
		} else {
			// Messier case: we read to end of file without seeing end of line.  We can't always zero-poke a null
			// character to terminate the C string: if the file size is not a multiple of the OS page size it'll work
			// (it's our copy-on-write memory). But if the file size is a multiple of the page size, then zero-poking at
			// EOF is one byte past the page and that will segv us.
			char* copy = mlr_alloc_string_from_char_range(value, phandle->eof - value);
			lrec_put(prec, key, copy, FREE_ENTRY_VALUE);
		}

		if (phandle->sol >= phandle->eof || *phandle->sol == ifs)
			break;
	}
	if (prec->field_count == 0) {
		lrec_free(prec);
		return NULL;
	} else {
		return prec;
	}
}
Пример #22
0
static sllv_t* mapper_step_process(lrec_t* pinrec, context_t* pctx, void* pvstate) {
	mapper_step_state_t* pstate = pvstate;
	if (pinrec == NULL)
		return sllv_single(NULL);

	// ["s", "t"]
	slls_t* pvalue_field_values    = mlr_selected_values_from_record(pinrec, pstate->pvalue_field_names);
	slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names);

	if (pgroup_by_field_values->length != pstate->pgroup_by_field_names->length) {
		lrec_free(pinrec);
		return NULL;
	}

	lhmsv_t* group_to_acc_field = lhmslv_get(pstate->groups, pgroup_by_field_values);
	if (group_to_acc_field == NULL) {
		group_to_acc_field = lhmsv_alloc();
		lhmslv_put(pstate->groups, slls_copy(pgroup_by_field_values), group_to_acc_field);
	}

	sllse_t* pa = pstate->pvalue_field_names->phead;
	sllse_t* pb =         pvalue_field_values->phead;
	// for x=1 and y=2
	for ( ; pa != NULL && pb != NULL; pa = pa->pnext, pb = pb->pnext) {
		char* value_field_name = pa->value;
		char* value_field_sval = pb->value;
		int   have_dval = FALSE;
		double value_field_dval = -999.0;

		lhmsv_t* acc_field_to_acc_state = lhmsv_get(group_to_acc_field, value_field_name);
		if (acc_field_to_acc_state == NULL) {
			acc_field_to_acc_state = lhmsv_alloc();
			lhmsv_put(group_to_acc_field, value_field_name, acc_field_to_acc_state);
		}

		// for "delta", "rsum"
		sllse_t* pc = pstate->pstepper_names->phead;
		for ( ; pc != NULL; pc = pc->pnext) {
			char* step_name = pc->value;
			step_t* pstep = lhmsv_get(acc_field_to_acc_state, step_name);
			if (pstep == NULL) {
				pstep = make_step(step_name, value_field_name);
				if (pstep == NULL) {
					fprintf(stderr, "mlr step: stepper \"%s\" not found.\n",
						step_name);
					exit(1);
				}
				lhmsv_put(acc_field_to_acc_state, step_name, pstep);
			}

			if (pstep->psprocess_func != NULL) {
				pstep->psprocess_func(pstep->pvstate, value_field_sval, pinrec);
			}
			if (pstep->pdprocess_func != NULL) {
				if (!have_dval) {
					value_field_dval = mlr_double_from_string_or_die(value_field_sval);
					have_dval = TRUE;
				}
				pstep->pdprocess_func(pstep->pvstate, value_field_dval, pinrec);
			}
		}
	}
	return sllv_single(pinrec);
}
Пример #23
0
static lrec_t* lrec_parse_mmap_xtab_multi_ifs_multi_ips(file_reader_mmap_state_t* phandle,
	lrec_reader_mmap_xtab_state_t* pstate)
{
	char* ips = pstate->ips;
	int ipslen = pstate->ipslen;
	char* ifs = pstate->ifs;
	int ifslen = pstate->ifslen;

	// Skip blank lines
	while (phandle->eof - phandle->sol >= ifslen && streqn(phandle->sol, ifs, ifslen)) {
		phandle->sol += ifslen;
	}

	if (phandle->sol >= phandle->eof)
		return NULL;

	lrec_t* prec = lrec_unbacked_alloc();

	// Loop over fields, one per line
	while (TRUE) {
		if (phandle->sol >= phandle->eof)
			break;

		char* line  = phandle->sol;
		char* key   = line;
		char* value = "";
		char* p;

		// Construct one field
		int saw_eol = FALSE;
		for (p = line; p < phandle->eof && *p; ) {
			if (streqn(p, ifs, ifslen)) {
				*p = 0;
				phandle->sol = p + ifslen;
				saw_eol = TRUE;
				break;
			} else if (streqn(p, ips, ipslen)) {
				key = line;
				*p = 0;

				p += ipslen;
				if (pstate->allow_repeat_ips) {
					while (streqn(p, ips, ipslen))
						p += ipslen;
				}
				value = p;
			} else {
				p++;
			}
		}
		if (p >= phandle->eof)
			phandle->sol = p+1;

		if (saw_eol) {
			// Easy and simple case: we read until end of line.  We zero-poked the irs to a null character to terminate
			// the C string so it's OK to retain a pointer to that.
			lrec_put(prec, key, value, NO_FREE);
		} else {
			// Messier case: we read to end of file without seeing end of line.  We can't always zero-poke a null
			// character to terminate the C string: if the file size is not a multiple of the OS page size it'll work
			// (it's our copy-on-write memory). But if the file size is a multiple of the page size, then zero-poking at
			// EOF is one byte past the page and that will segv us.
			char* copy = mlr_alloc_string_from_char_range(value, phandle->eof - value);
			lrec_put(prec, key, copy, FREE_ENTRY_VALUE);
		}

		if (phandle->sol >= phandle->eof || streqn(phandle->sol, ifs, ifslen))
			break;
	}
	if (prec->field_count == 0) {
		lrec_free(prec);
		return NULL;
	} else {
		return prec;
	}
}