// ---------------------------------------------------------------- static sllv_t* mapper_tail_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_tail_state_t* pstate = pvstate; if (pinrec != NULL) { slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); sllv_t* precord_list_for_group = lhmslv_get(pstate->precord_lists_by_group, pgroup_by_field_values); if (precord_list_for_group == NULL) { precord_list_for_group = sllv_alloc(); lhmslv_put(pstate->precord_lists_by_group, slls_copy(pgroup_by_field_values), precord_list_for_group); } if (precord_list_for_group->length >= pstate->tail_count) { lrec_t* porec = sllv_pop(precord_list_for_group); if (porec != NULL) lrec_free(porec); } sllv_add(precord_list_for_group, pinrec); return NULL; } else { sllv_t* poutrecs = sllv_alloc(); for (lhmslve_t* pa = pstate->precord_lists_by_group->phead; pa != NULL; pa = pa->pnext) { sllv_t* precord_list_for_group = pa->pvvalue; for (sllve_t* pb = precord_list_for_group->phead; pb != NULL; pb = pb->pnext) { sllv_add(poutrecs, pb->pvdata); } } sllv_add(poutrecs, NULL); return poutrecs; } }
// ---------------------------------------------------------------- static sllv_t* mapper_group_like_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_group_like_state_t* pstate = pvstate; if (pinrec != NULL) { slls_t* pkey_field_names = mlr_reference_keys_from_record(pinrec); sllv_t* plist = lhmslv_get(pstate->precords_by_key_field_names, pkey_field_names); if (plist == NULL) { plist = sllv_alloc(); sllv_add(plist, pinrec); lhmslv_put(pstate->precords_by_key_field_names, slls_copy(pkey_field_names), plist); } else { sllv_add(plist, pinrec); } return NULL; } else { sllv_t* poutput = sllv_alloc(); for (lhmslve_t* pe = pstate->precords_by_key_field_names->phead; pe != NULL; pe = pe->pnext) { sllv_t* plist = pe->pvvalue; for (sllve_t* pf = plist->phead; pf != NULL; pf = pf->pnext) { sllv_add(poutput, pf->pvdata); } } sllv_add(poutput, NULL); return poutput; } }
// ---------------------------------------------------------------- // xxx if empty key then make a way to communicate back to the reader that it // can stop reading further records -- ? static sllv_t* mapper_head_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_head_state_t* pstate = pvstate; if (pinrec != NULL) { slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); unsigned long long* pcount_for_group = lhmslv_get(pstate->precord_lists_by_group, pgroup_by_field_values); if (pcount_for_group == NULL) { pcount_for_group = mlr_malloc_or_die(sizeof(unsigned long long)); *pcount_for_group = 0LL; lhmslv_put(pstate->precord_lists_by_group, slls_copy(pgroup_by_field_values), pcount_for_group); } (*pcount_for_group)++; if (*pcount_for_group <= pstate->head_count) { return sllv_single(pinrec); } else { lrec_free(pinrec); return NULL; } } else { return sllv_single(NULL); } }
// ---------------------------------------------------------------- static sllv_t* mapper_regularize_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { if (pinrec != NULL) { mapper_regularize_state_t* pstate = (mapper_regularize_state_t*)pvstate; slls_t* current_sorted_field_names = mlr_reference_keys_from_record(pinrec); slls_sort(current_sorted_field_names); slls_t* previous_sorted_field_names = lhmslv_get(pstate->psorted_to_original, current_sorted_field_names); if (previous_sorted_field_names == NULL) { previous_sorted_field_names = slls_copy(current_sorted_field_names); lhmslv_put(pstate->psorted_to_original, previous_sorted_field_names, mlr_copy_keys_from_record(pinrec)); return sllv_single(pinrec); } else { lrec_t* poutrec = lrec_unbacked_alloc(); for (sllse_t* pe = previous_sorted_field_names->phead; pe != NULL; pe = pe->pnext) { lrec_put(poutrec, pe->value, strdup(lrec_get(pinrec, pe->value)), LREC_FREE_ENTRY_VALUE); } lrec_free(pinrec); return sllv_single(poutrec); } } else { return sllv_single(NULL); } }
static sllv_t* mapper_step_process(lrec_t* pinrec, context_t* pctx, void* pvstate) { mapper_step_state_t* pstate = pvstate; if (pinrec == NULL) return sllv_single(NULL); // ["s", "t"] slls_t* pvalue_field_values = mlr_selected_values_from_record(pinrec, pstate->pvalue_field_names); slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); if (pgroup_by_field_values->length != pstate->pgroup_by_field_names->length) { lrec_free(pinrec); return NULL; } lhmsv_t* group_to_acc_field = lhmslv_get(pstate->groups, pgroup_by_field_values); if (group_to_acc_field == NULL) { group_to_acc_field = lhmsv_alloc(); lhmslv_put(pstate->groups, slls_copy(pgroup_by_field_values), group_to_acc_field); } sllse_t* pa = pstate->pvalue_field_names->phead; sllse_t* pb = pvalue_field_values->phead; // for x=1 and y=2 for ( ; pa != NULL && pb != NULL; pa = pa->pnext, pb = pb->pnext) { char* value_field_name = pa->value; char* value_field_sval = pb->value; int have_dval = FALSE; double value_field_dval = -999.0; lhmsv_t* acc_field_to_acc_state = lhmsv_get(group_to_acc_field, value_field_name); if (acc_field_to_acc_state == NULL) { acc_field_to_acc_state = lhmsv_alloc(); lhmsv_put(group_to_acc_field, value_field_name, acc_field_to_acc_state); } // for "delta", "rsum" sllse_t* pc = pstate->pstepper_names->phead; for ( ; pc != NULL; pc = pc->pnext) { char* step_name = pc->value; step_t* pstep = lhmsv_get(acc_field_to_acc_state, step_name); if (pstep == NULL) { pstep = make_step(step_name, value_field_name); if (pstep == NULL) { fprintf(stderr, "mlr step: stepper \"%s\" not found.\n", step_name); exit(1); } lhmsv_put(acc_field_to_acc_state, step_name, pstep); } if (pstep->psprocess_func != NULL) { pstep->psprocess_func(pstep->pvstate, value_field_sval, pinrec); } if (pstep->pdprocess_func != NULL) { if (!have_dval) { value_field_dval = mlr_double_from_string_or_die(value_field_sval); have_dval = TRUE; } pstep->pdprocess_func(pstep->pvstate, value_field_dval, pinrec); } } } return sllv_single(pinrec); }
// ---------------------------------------------------------------- static void mapper_stats1_ingest(lrec_t* pinrec, mapper_stats1_state_t* pstate) { // E.g. ["s", "t"] // To do: make value_field_values into a hashmap. Then accept partial // population on that, but retain full-population requirement on group-by. // E.g. if accumulating stats of x,y on a,b then skip record with x,y,a but // process record with x,a,b. slls_t* pvalue_field_values = mlr_selected_values_from_record(pinrec, pstate->pvalue_field_names); slls_t* pgroup_by_field_values = mlr_selected_values_from_record(pinrec, pstate->pgroup_by_field_names); if (pvalue_field_values->length != pstate->pvalue_field_names->length) return; if (pgroup_by_field_values->length != pstate->pgroup_by_field_names->length) return; lhmsv_t* group_to_acc_field = lhmslv_get(pstate->groups, pgroup_by_field_values); if (group_to_acc_field == NULL) { group_to_acc_field = lhmsv_alloc(); lhmslv_put(pstate->groups, slls_copy(pgroup_by_field_values), group_to_acc_field); } sllse_t* pa = pstate->pvalue_field_names->phead; sllse_t* pb = pvalue_field_values->phead; // for x=1 and y=2 for ( ; pa != NULL && pb != NULL; pa = pa->pnext, pb = pb->pnext) { char* value_field_name = pa->value; char* value_field_sval = pb->value; int have_dval = FALSE; double value_field_dval = -999.0; lhmsv_t* acc_field_to_acc_state = lhmsv_get(group_to_acc_field, value_field_name); if (acc_field_to_acc_state == NULL) { acc_field_to_acc_state = lhmsv_alloc(); lhmsv_put(group_to_acc_field, value_field_name, acc_field_to_acc_state); } // Look up presence of all accumulators at this level's hashmap. char* presence = lhmsv_get(acc_field_to_acc_state, fake_acc_name_for_setups); if (presence == NULL) { make_accs(pstate->paccumulator_names, acc_field_to_acc_state); lhmsv_put(acc_field_to_acc_state, fake_acc_name_for_setups, fake_acc_name_for_setups); } // There isn't a one-to-one mapping between user-specified acc_names // and internal acc_t's. Here in the ingestor we feed each datum into // an acc_t. In the emitter, we loop over the acc_names in // user-specified order. Example: they ask for p10,mean,p90. Then there // is only one percentiles accumulator to be told about each point. In // the emitter it will be asked to produce output twice: once for the // 10th percentile & once for the 90th. for (lhmsve_t* pc = acc_field_to_acc_state->phead; pc != NULL; pc = pc->pnext) { char* acc_name = pc->key; if (streq(acc_name, fake_acc_name_for_setups)) continue; acc_t* pacc = pc->pvvalue; if (pacc->psingest_func != NULL) { pacc->psingest_func(pacc->pvstate, value_field_sval); } if (pacc->pdingest_func != NULL) { if (!have_dval) { value_field_dval = mlr_double_from_string_or_die(value_field_sval); have_dval = TRUE; } pacc->pdingest_func(pacc->pvstate, value_field_dval); } } } }