// ---------------------------------------------------------------- static void make_accs( slls_t* paccumulator_names, // Input lhmsv_t* acc_field_to_acc_state) // Output { acc_t* ppercentile_acc = NULL; for (sllse_t* pc = paccumulator_names->phead; pc != NULL; pc = pc->pnext) { // for "sum", "count" char* acc_name = pc->value; if (is_percentile_acc_name(acc_name)) { if (ppercentile_acc == NULL) { ppercentile_acc = acc_percentile_alloc(); } lhmsv_put(acc_field_to_acc_state, acc_name, ppercentile_acc); } else { acc_t* pacc = make_acc(acc_name); if (pacc == NULL) { fprintf(stderr, "%s stats1: accumulator \"%s\" not found.\n", MLR_GLOBALS.argv0, acc_name); exit(1); } lhmsv_put(acc_field_to_acc_state, acc_name, pacc); } } }
// ---------------------------------------------------------------- void make_stats1_accs( char* value_field_name, // input slls_t* paccumulator_names, // input int allow_int_float, // input lhmsv_t* acc_field_to_acc_state) // output { stats1_acc_t* ppercentile_acc = NULL; for (sllse_t* pc = paccumulator_names->phead; pc != NULL; pc = pc->pnext) { // for "sum", "count" char* stats1_acc_name = pc->value; if (is_percentile_acc_name(stats1_acc_name)) { if (ppercentile_acc == NULL) { ppercentile_acc = stats1_percentile_alloc(value_field_name, stats1_acc_name, allow_int_float); } else { stats1_percentile_reuse(ppercentile_acc); } lhmsv_put(acc_field_to_acc_state, stats1_acc_name, ppercentile_acc, NO_FREE); } else { stats1_acc_t* pstats1_acc = make_stats1_acc(value_field_name, stats1_acc_name, allow_int_float); if (pstats1_acc == NULL) { fprintf(stderr, "%s stats1: accumulator \"%s\" not found.\n", MLR_GLOBALS.argv0, stats1_acc_name); exit(1); } lhmsv_put(acc_field_to_acc_state, stats1_acc_name, pstats1_acc, NO_FREE); } } }
// ---------------------------------------------------------------- static char* test_lhmsv() { lhmsv_t *pmap = lhmsv_alloc(); mu_assert_lf(pmap->num_occupied == 0); mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); mu_assert_lf(!lhmsv_has_key(pmap, "x")); mu_assert_lf(lhmsv_get(pmap, "x") == NULL); mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); mu_assert_lf(lhmsv_check_counts(pmap)); lhmsv_put(pmap, "x", "3"); mu_assert_lf(pmap->num_occupied == 1); mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "3")); mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); mu_assert_lf(lhmsv_check_counts(pmap)); lhmsv_put(pmap, "y", "5"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "3")); mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); mu_assert_lf(lhmsv_check_counts(pmap)); lhmsv_put(pmap, "x", "4"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); mu_assert_lf(lhmsv_check_counts(pmap)); lhmsv_put(pmap, "z", "7"); mu_assert_lf(pmap->num_occupied == 3); mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); mu_assert_lf( lhmsv_has_key(pmap, "z")); mu_assert_lf(streq(lhmsv_get(pmap, "z"), "7")); mu_assert_lf(lhmsv_check_counts(pmap)); lhmsv_remove(pmap, "y"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); mu_assert_lf( lhmsv_has_key(pmap, "z")); mu_assert_lf(streq(lhmsv_get(pmap, "z"), "7")); mu_assert_lf(lhmsv_check_counts(pmap)); lhmsv_free(pmap); return NULL; }
// ---------------------------------------------------------------- void multi_lrec_writer_output_srec(multi_lrec_writer_t* pmlw, lrec_t* poutrec, char* filename_or_command, file_output_mode_t file_output_mode, int flush_every_record, context_t* pctx) { lrec_writer_and_fp_t* pstate = lhmsv_get(pmlw->pnames_to_lrec_writers_and_fps, filename_or_command); if (pstate == NULL) { pstate = mlr_malloc_or_die(sizeof(lrec_writer_and_fp_t)); pstate->plrec_writer = lrec_writer_alloc(pmlw->pwriter_opts); MLR_INTERNAL_CODING_ERROR_IF(pstate->plrec_writer == NULL); pstate->filename_or_command = mlr_strdup_or_die(filename_or_command); char* mode_string = get_mode_string(file_output_mode); char* mode_desc = get_mode_desc(file_output_mode); if (file_output_mode == MODE_PIPE) { pstate->is_popen = TRUE; pstate->output_stream = popen(filename_or_command, mode_string); if (pstate->output_stream == NULL) { perror("popen"); fprintf(stderr, "%s: failed popen for %s on \"%s\".\n", MLR_GLOBALS.bargv0, mode_desc, filename_or_command); exit(1); } } else { pstate->is_popen = FALSE; pstate->output_stream = fopen(filename_or_command, mode_string); if (pstate->output_stream == NULL) { perror("fopen"); fprintf(stderr, "%s: failed fopen for %s on \"%s\".\n", MLR_GLOBALS.bargv0, mode_desc, filename_or_command); exit(1); } } lhmsv_put(pmlw->pnames_to_lrec_writers_and_fps, mlr_strdup_or_die(filename_or_command), pstate, FREE_ENTRY_KEY); } pstate->plrec_writer->pprocess_func(pstate->plrec_writer->pvstate, pstate->output_stream, poutrec, pctx); if (poutrec != NULL) { if (flush_every_record) fflush(pstate->output_stream); } else { if (pstate->is_popen) { // Sadly, pclose returns an error even on well-formed commands. For example, if the popened // command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns // non-zero and popen flags it as an error. We cannot differentiate these from genuine // failure cases so the best choice is to simply call pclose and ignore error codes. // If a piped-to command does fail then it should have some output to stderr which the // user can take advantage of. (void)pclose(pstate->output_stream); } else { if (fclose(pstate->output_stream) != 0) { perror("fclose"); fprintf(stderr, "%s: fclose error on \"%s\".\n", MLR_GLOBALS.bargv0, filename_or_command); exit(1); } } pstate->output_stream = NULL; } }
static sllv_t* mapper_step_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_step_state_t* pstate = pvstate; if (pinrec == NULL) return sllv_single(NULL); // ["s", "t"] slls_t* pvalue_field_values = mlr_selected_values_from_record(pinrec, pstate->pvalue_field_names); slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); if (pgroup_by_field_values->length != pstate->pgroup_by_field_names->length) { lrec_free(pinrec); return NULL; } lhmsv_t* group_to_acc_field = lhmslv_get(pstate->groups, pgroup_by_field_values); if (group_to_acc_field == NULL) { group_to_acc_field = lhmsv_alloc(); lhmslv_put(pstate->groups, slls_copy(pgroup_by_field_values), group_to_acc_field); } sllse_t* pa = pstate->pvalue_field_names->phead; sllse_t* pb = pvalue_field_values->phead; // for x=1 and y=2 for ( ; pa != NULL && pb != NULL; pa = pa->pnext, pb = pb->pnext) { char* value_field_name = pa->value; char* value_field_sval = pb->value; int have_dval = FALSE; double value_field_dval = -999.0; lhmsv_t* acc_field_to_acc_state = lhmsv_get(group_to_acc_field, value_field_name); if (acc_field_to_acc_state == NULL) { acc_field_to_acc_state = lhmsv_alloc(); lhmsv_put(group_to_acc_field, value_field_name, acc_field_to_acc_state); } // for "delta", "rsum" sllse_t* pc = pstate->pstepper_names->phead; for ( ; pc != NULL; pc = pc->pnext) { char* step_name = pc->value; step_t* pstep = lhmsv_get(acc_field_to_acc_state, step_name); if (pstep == NULL) { pstep = make_step(step_name, value_field_name); if (pstep == NULL) { fprintf(stderr, "mlr step: stepper \"%s\" not found.\n", step_name); exit(1); } lhmsv_put(acc_field_to_acc_state, step_name, pstep); } if (pstep->psprocess_func != NULL) { pstep->psprocess_func(pstep->pvstate, value_field_sval, pinrec); } if (pstep->pdprocess_func != NULL) { if (!have_dval) { value_field_dval = mlr_double_from_string_or_die(value_field_sval); have_dval = TRUE; } pstep->pdprocess_func(pstep->pvstate, value_field_dval, pinrec); } } } return sllv_single(pinrec); }
// ---------------------------------------------------------------- static void mapper_stats1_ingest(lrec_t* pinrec, mapper_stats1_state_t* pstate) { // E.g. ["s", "t"] // To do: make value_field_values into a hashmap. Then accept partial // population on that, but retain full-population requirement on group-by. // E.g. if accumulating stats of x,y on a,b then skip record with x,y,a but // process record with x,a,b. slls_t* pvalue_field_values = mlr_selected_values_from_record(pinrec, pstate->pvalue_field_names); slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); if (pvalue_field_values->length != pstate->pvalue_field_names->length) return; if (pgroup_by_field_values->length != pstate->pgroup_by_field_names->length) return; lhmsv_t* group_to_acc_field = lhmslv_get(pstate->groups, pgroup_by_field_values); if (group_to_acc_field == NULL) { group_to_acc_field = lhmsv_alloc(); lhmslv_put(pstate->groups, slls_copy(pgroup_by_field_values), group_to_acc_field); } sllse_t* pa = pstate->pvalue_field_names->phead; sllse_t* pb = pvalue_field_values->phead; // for x=1 and y=2 for ( ; pa != NULL && pb != NULL; pa = pa->pnext, pb = pb->pnext) { char* value_field_name = pa->value; char* value_field_sval = pb->value; int have_dval = FALSE; double value_field_dval = -999.0; lhmsv_t* acc_field_to_acc_state = lhmsv_get(group_to_acc_field, value_field_name); if (acc_field_to_acc_state == NULL) { acc_field_to_acc_state = lhmsv_alloc(); lhmsv_put(group_to_acc_field, value_field_name, acc_field_to_acc_state); } // Look up presence of all accumulators at this level's hashmap. char* presence = lhmsv_get(acc_field_to_acc_state, fake_acc_name_for_setups); if (presence == NULL) { make_accs(pstate->paccumulator_names, acc_field_to_acc_state); lhmsv_put(acc_field_to_acc_state, fake_acc_name_for_setups, fake_acc_name_for_setups); } // There isn't a one-to-one mapping between user-specified acc_names // and internal acc_t's. Here in the ingestor we feed each datum into // an acc_t. In the emitter, we loop over the acc_names in // user-specified order. Example: they ask for p10,mean,p90. Then there // is only one percentiles accumulator to be told about each point. In // the emitter it will be asked to produce output twice: once for the // 10th percentile & once for the 90th. for (lhmsve_t* pc = acc_field_to_acc_state->phead; pc != NULL; pc = pc->pnext) { char* acc_name = pc->key; if (streq(acc_name, fake_acc_name_for_setups)) continue; acc_t* pacc = pc->pvvalue; if (pacc->psingest_func != NULL) { pacc->psingest_func(pacc->pvstate, value_field_sval); } if (pacc->pdingest_func != NULL) { if (!have_dval) { value_field_dval = mlr_double_from_string_or_die(value_field_sval); have_dval = TRUE; } pacc->pdingest_func(pacc->pvstate, value_field_dval); } } } }