slls_t* mlr_copy_keys_from_record(lrec_t* prec) { slls_t* plist = slls_alloc(); for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { slls_add_with_free(plist, strdup(pe->key)); } return plist; }
// ---------------------------------------------------------------- slls_t* split_csvlite_header_line_single_ifs(char* line, char ifs, int allow_repeat_ifs) { slls_t* plist = slls_alloc(); if (*line == 0) // empty string splits to empty list return plist; char* p = line; if (allow_repeat_ifs) { while (*p == ifs) p++; } char* start = p; for ( ; *p; p++) { if (*p == ifs) { *p = 0; p++; if (allow_repeat_ifs) { while (*p == ifs) p++; } slls_append_no_free(plist, start); start = p; } } if (allow_repeat_ifs && *start == 0) { ; // OK } else { slls_append_no_free(plist, start); } return plist; }
slls_t* mlr_reference_keys_from_record(lrec_t* prec) { slls_t* plist = slls_alloc(); for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { slls_add_no_free(plist, pe->key); } return plist; }
slls_t* split_csvlite_header_line_multi_ifs(char* line, char* ifs, int ifslen, int allow_repeat_ifs) { slls_t* plist = slls_alloc(); if (*line == 0) // empty string splits to empty list return plist; char* p = line; if (allow_repeat_ifs) { while (streqn(p, ifs, ifslen)) p += ifslen; } char* start = p; for ( ; *p; p++) { if (streqn(p, ifs, ifslen)) { *p = 0; p += ifslen; if (allow_repeat_ifs) { while (streqn(p, ifs, ifslen)) p += ifslen; } slls_add_no_free(plist, start); start = p; } } if (allow_repeat_ifs && *start == 0) { ; // OK } else { slls_add_no_free(plist, start); } return plist; }
static slls_t* lrec_reader_mmap_csvlite_get_header_multi_seps(file_reader_mmap_state_t* phandle, lrec_reader_mmap_csvlite_state_t* pstate) { char* irs = pstate->irs; char* ifs = pstate->ifs; int irslen = pstate->irslen; int ifslen = pstate->ifslen; int allow_repeat_ifs = pstate->allow_repeat_ifs; slls_t* pheader_names = slls_alloc(); while ((phandle->eof - phandle->sol) >= irslen && streqn(phandle->sol, irs, irslen)) { phandle->sol += irslen; pstate->ilno++; } char* p = phandle->sol; if (allow_repeat_ifs) { while (streqn(p, ifs, ifslen)) p += ifslen; } char* header_name = p; for ( ; p < phandle->eof && *p; ) { if (streqn(p, irs, irslen)) { *p = 0; phandle->sol = p + irslen; pstate->ilno++; break; } else if (streqn(p, ifs, ifslen)) { *p = 0; slls_add_no_free(pheader_names, header_name); p += ifslen; if (allow_repeat_ifs) { while (streqn(p, ifs, ifslen)) p += ifslen; } header_name = p; } else { p++; } } slls_add_no_free(pheader_names, header_name); return pheader_names; }
// ---------------------------------------------------------------- // xxx freeing contract. // xxx behavior on missing. doc, or make a second boolean flag. slls_t* mlr_selected_values_from_record(lrec_t* prec, slls_t* pselected_field_names) { slls_t* pvalue_list = slls_alloc(); for (sllse_t* pe = pselected_field_names->phead; pe != NULL; pe = pe->pnext) { char* selected_field_name = pe->value; char* value = lrec_get(prec, selected_field_name); if (value == NULL) { // xxx have stashed argv0 for error message. // xxx better to have filename + linenumber somehow. //fprintf(stderr, "Couldn't find field named \"%s\"\n", selected_field_name); //exit(1); } else { slls_add_no_free(pvalue_list, value); } } return pvalue_list; }
// ---------------------------------------------------------------- static void set_up( sllv_t* precords, slls_t** ppleft_field_names, lrec_reader_t** ppreader) { slls_t* pleft_field_names = slls_alloc(); slls_append_no_free(pleft_field_names, "l"); lrec_reader_t* preader = lrec_reader_in_memory_alloc(precords); printf("left records:\n"); lrec_print_list_with_prefix(precords, " "); printf("\n"); *ppleft_field_names = pleft_field_names; *ppreader = preader; }
// ---------------------------------------------------------------- static lrec_t* lrec_reader_mmap_csv_process(void* pvstate, void* pvhandle, context_t* pctx) { lrec_reader_mmap_csv_state_t* pstate = pvstate; file_reader_mmap_state_t* phandle = pvhandle; if (pstate->expect_header_line_next) { if (!lrec_reader_mmap_csv_get_fields(pstate, pstate->pfields, phandle)) return NULL; pstate->ilno++; slls_t* pheader_fields = slls_alloc(); int i = 0; for (rsllse_t* pe = pstate->pfields->phead; i < pstate->pfields->length && pe != NULL; pe = pe->pnext, i++) { if (*pe->value == 0) { fprintf(stderr, "%s: unacceptable empty CSV key at file \"%s\" line %lld.\n", MLR_GLOBALS.argv0, pctx->filename, pstate->ilno); exit(1); } // Transfer pointer-free responsibility from the rslls to the // header fields in the header keeper slls_append(pheader_fields, pe->value, pe->free_flag); pe->free_flag = 0; } rslls_reset(pstate->pfields); pstate->pheader_keeper = lhmslv_get(pstate->pheader_keepers, pheader_fields); if (pstate->pheader_keeper == NULL) { pstate->pheader_keeper = header_keeper_alloc(NULL, pheader_fields); lhmslv_put(pstate->pheader_keepers, pheader_fields, pstate->pheader_keeper, NO_FREE); // freed by header-keeper } else { // Re-use the header-keeper in the header cache slls_free(pheader_fields); } pstate->expect_header_line_next = FALSE; } int rc = lrec_reader_mmap_csv_get_fields(pstate, pstate->pfields, phandle); pstate->ilno++; if (rc == FALSE) // EOF return NULL; else { lrec_t* prec = pstate->use_implicit_header ? paste_indices_and_data(pstate, pstate->pfields, pctx) : paste_header_and_data(pstate, pstate->pfields, pctx); rslls_reset(pstate->pfields); return prec; } }
static mapper_t* mapper_head_parse_cli(int* pargi, int argc, char** argv) { int head_count = 10; slls_t* pgroup_by_field_names = slls_alloc(); char* verb = argv[(*pargi)++]; ap_state_t* pstate = ap_alloc(); ap_define_int_flag(pstate, "-n", &head_count); ap_define_string_list_flag(pstate, "-g", &pgroup_by_field_names); if (!ap_parse(pstate, verb, pargi, argc, argv)) { mapper_head_usage(argv[0], verb); return NULL; } return mapper_head_alloc(pgroup_by_field_names, head_count); }
static slls_t* lrec_reader_mmap_csvlite_get_header_single_seps(file_reader_mmap_state_t* phandle, lrec_reader_mmap_csvlite_state_t* pstate) { char irs = pstate->irs[0]; char ifs = pstate->ifs[0]; int allow_repeat_ifs = pstate->allow_repeat_ifs; slls_t* pheader_names = slls_alloc(); while (phandle->sol < phandle->eof && *phandle->sol == irs) { phandle->sol++; pstate->ilno++; } char* p = phandle->sol; if (allow_repeat_ifs) { while (*p == ifs) p++; } char* header_name = p; for ( ; p < phandle->eof && *p; ) { if (*p == irs) { *p = 0; phandle->sol = p+1; pstate->ilno++; break; } else if (*p == ifs) { *p = 0; slls_add_no_free(pheader_names, header_name); p++; if (allow_repeat_ifs) { while (*p == ifs) p++; } header_name = p; } else { p++; } } slls_add_no_free(pheader_names, header_name); return pheader_names; }
// ---------------------------------------------------------------- static char* test_lrec_xtab_api() { char* line_1 = strdup("w 2"); char* line_2 = strdup("x 3"); char* line_3 = strdup("y 4"); char* line_4 = strdup("z 5"); slls_t* pxtab_lines = slls_alloc(); slls_add_with_free(pxtab_lines, line_1); slls_add_with_free(pxtab_lines, line_2); slls_add_with_free(pxtab_lines, line_3); slls_add_with_free(pxtab_lines, line_4); lrec_t* prec = lrec_parse_stdio_xtab(pxtab_lines, ' ', TRUE); mu_assert_lf(prec->field_count == 4); mu_assert_lf(streq(lrec_get(prec, "w"), "2")); mu_assert_lf(streq(lrec_get(prec, "x"), "3")); mu_assert_lf(streq(lrec_get(prec, "y"), "4")); mu_assert_lf(streq(lrec_get(prec, "z"), "5")); lrec_remove(prec, "w"); mu_assert_lf(prec->field_count == 3); mu_assert_lf(lrec_get(prec, "w") == NULL); // Non-replacing-rename case //lrec_dump_titled("Before rename", prec); lrec_rename(prec, "x", "u"); //lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 3); mu_assert_lf(lrec_get(prec, "x") == NULL); mu_assert_lf(streq(lrec_get(prec, "u"), "3")); // Replacing-rename case //lrec_dump_titled("Before rename", prec); lrec_rename(prec, "y", "z"); //lrec_dump_titled("After rename", prec); mu_assert_lf(prec->field_count == 2); mu_assert_lf(streq(lrec_get(prec, "u"), "3")); mu_assert_lf(lrec_get(prec, "y") == NULL); mu_assert_lf(streq(lrec_get(prec, "z"), "4")); lrec_free(prec); return NULL; }
// ---------------------------------------------------------------- slls_t* slls_from_line(char* line, char ifs, int allow_repeat_ifs) { slls_t* plist = slls_alloc(); if (*line == 0) // empty string splits to empty list return plist; char* start = line; for (char* p = line; *p; p++) { if (*p == ifs) { *p = 0; p++; if (allow_repeat_ifs) { while (*p == ifs) p++; } slls_add_no_free(plist, start); start = p; } } slls_add_no_free(plist, start); return plist; }
// ---------------------------------------------------------------- // xxx cmt mem-mgt slls_t* split_csv_header_line(char* line, char ifs, int allow_repeat_ifs) { slls_t* plist = slls_alloc(); if (*line == 0) // empty string splits to empty list return plist; char* start = line; for (char* p = line; *p; p++) { if (*p == ifs) { *p = 0; p++; // xxx hoist loop invariant at the cost of some code duplication if (allow_repeat_ifs) { while (*p == ifs) p++; } slls_add_no_free(plist, start); start = p; } } slls_add_no_free(plist, start); return plist; }
// ---------------------------------------------------------------- static lrec_t* lrec_reader_stdio_xtab_process(void* pvstate, void* pvhandle, context_t* pctx) { FILE* input_stream = pvhandle; lrec_reader_stdio_xtab_state_t* pstate = pvstate; if (pstate->at_eof) return NULL; slls_t* pxtab_lines = slls_alloc(); while (TRUE) { char* line = (pstate->ifslen == 1) ? mlr_get_cline(input_stream, pstate->ifs[0]) : mlr_get_sline(input_stream, pstate->ifs, pstate->ifslen); if (line == NULL) { // EOF // EOF or blank line terminates the stanza. pstate->at_eof = TRUE; if (pxtab_lines->length == 0) { slls_free(pxtab_lines); return NULL; } else { return (pstate->ipslen == 1) ? lrec_parse_stdio_xtab_single_ips(pxtab_lines, pstate->ips[0], pstate->allow_repeat_ips) : lrec_parse_stdio_xtab_multi_ips(pxtab_lines, pstate->ips, pstate->ipslen, pstate->allow_repeat_ips); } } else if (*line == '\0') { free(line); if (pxtab_lines->length > 0) { return (pstate->ipslen == 1) ? lrec_parse_stdio_xtab_single_ips(pxtab_lines, pstate->ips[0], pstate->allow_repeat_ips) : lrec_parse_stdio_xtab_multi_ips(pxtab_lines, pstate->ips, pstate->ipslen, pstate->allow_repeat_ips); } } else { slls_add_with_free(pxtab_lines, line); } } }
static mapper_t* mapper_step_parse_cli(int* pargi, int argc, char** argv) { slls_t* pstepper_names = NULL; slls_t* pvalue_field_names = NULL; slls_t* pgroup_by_field_names = slls_alloc(); char* verb = argv[(*pargi)++]; ap_state_t* pstate = ap_alloc(); ap_define_string_list_flag(pstate, "-a", &pstepper_names); ap_define_string_list_flag(pstate, "-f", &pvalue_field_names); ap_define_string_list_flag(pstate, "-g", &pgroup_by_field_names); if (!ap_parse(pstate, verb, pargi, argc, argv)) { mapper_step_usage(argv[0], verb); return NULL; } if (pstepper_names == NULL || pvalue_field_names == NULL) { mapper_step_usage(argv[0], verb); return NULL; } return mapper_step_alloc(pstepper_names, pvalue_field_names, pgroup_by_field_names); }
// ---------------------------------------------------------------- slls_t* slls_copy(slls_t* pold) { slls_t* pnew = slls_alloc(); for (sllse_t* pe = pold->phead; pe != NULL; pe = pe->pnext) slls_add_with_free(pnew, strdup(pe->value)); return pnew; }
// ---------------------------------------------------------------- slls_t* slls_single_no_free(char* value) { slls_t* pslls = slls_alloc(); slls_add_no_free(pslls, value); return pslls; }
// ---------------------------------------------------------------- static lrec_t* lrec_reader_stdio_csv_process(void* pvstate, void* pvhandle, context_t* pctx) { lrec_reader_stdio_csv_state_t* pstate = pvstate; // Ingest the next header line, if expected if (pstate->expect_header_line_next) { while (TRUE) { if (!lrec_reader_stdio_csv_get_fields(pstate, pstate->pfields, pctx, TRUE)) return NULL; pstate->ilno++; // We check for comments here rather than within the parser since it's important // for users to be able to comment out lines containing double-quoted newlines. if (pstate->comment_string != NULL && pstate->pfields->phead != NULL) { if (streqn(pstate->pfields->phead->value, pstate->comment_string, pstate->comment_string_length)) { if (pstate->comment_handling == PASS_COMMENTS) { int i = 0; for ( rsllse_t* pe = pstate->pfields->phead; i < pstate->pfields->length && pe != NULL; pe = pe->pnext, i++) { if (i > 0) fputs(pstate->ifs, stdout); fputs(pe->value, stdout); } if (pstate->do_auto_line_term) { fputs(pctx->auto_line_term, stdout); } else { fputs(pstate->irs, stdout); } } rslls_reset(pstate->pfields); continue; } } slls_t* pheader_fields = slls_alloc(); int i = 0; for (rsllse_t* pe = pstate->pfields->phead; i < pstate->pfields->length && pe != NULL; pe = pe->pnext) { if (*pe->value == 0) { fprintf(stderr, "%s: unacceptable empty CSV key at file \"%s\" line %lld.\n", MLR_GLOBALS.bargv0, pctx->filename, pstate->ilno); exit(1); } // Transfer pointer-free responsibility from the rslls to the // header fields in the header keeper slls_append(pheader_fields, pe->value, pe->free_flag); pe->free_flag = 0; } rslls_reset(pstate->pfields); pstate->pheader_keeper = lhmslv_get(pstate->pheader_keepers, pheader_fields); if (pstate->pheader_keeper == NULL) { pstate->pheader_keeper = header_keeper_alloc(NULL, pheader_fields); lhmslv_put(pstate->pheader_keepers, pheader_fields, pstate->pheader_keeper, NO_FREE); // freed by header-keeper } else { // Re-use the header-keeper in the header cache slls_free(pheader_fields); } pstate->expect_header_line_next = FALSE; break; } } // Ingest the next data line, if expected while (TRUE) { int rc = lrec_reader_stdio_csv_get_fields(pstate, pstate->pfields, pctx, FALSE); pstate->ilno++; if (rc == FALSE) // EOF return NULL; // We check for comments here rather than within the parser since it's important // for users to be able to comment out lines containing double-quoted newlines. if (pstate->comment_string != NULL && pstate->pfields->phead != NULL) { if (streqn(pstate->pfields->phead->value, pstate->comment_string, pstate->comment_string_length)) { if (pstate->comment_handling == PASS_COMMENTS) { int i = 0; for ( rsllse_t* pe = pstate->pfields->phead; i < pstate->pfields->length && pe != NULL; pe = pe->pnext, i++) { if (i > 0) fputs(pstate->ifs, stdout); fputs(pe->value, stdout); } if (pstate->do_auto_line_term) { fputs(pctx->auto_line_term, stdout); } else { fputs(pstate->irs, stdout); } } rslls_reset(pstate->pfields); continue; } } lrec_t* prec = pstate->use_implicit_header ? paste_indices_and_data(pstate, pstate->pfields, pctx) : paste_header_and_data(pstate, pstate->pfields, pctx); rslls_reset(pstate->pfields); return prec; } }
// ---------------------------------------------------------------- static char* test_lhmslv() { slls_t* aw = slls_alloc(); slls_add_no_free(aw, "a"); slls_add_no_free(aw, "w"); slls_t* ax = slls_alloc(); slls_add_no_free(ax, "a"); slls_add_no_free(ax, "x"); slls_t* ay = slls_alloc(); slls_add_no_free(ay, "a"); slls_add_no_free(ay, "y"); slls_t* bz = slls_alloc(); slls_add_no_free(bz, "b"); slls_add_no_free(bz, "z"); lhmslv_t *pmap = lhmslv_alloc(); mu_assert_lf(pmap->num_occupied == 0); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ax, "3"); mu_assert_lf(pmap->num_occupied == 1); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ay, "5"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ax, "4"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, bz, "7"); mu_assert_lf(pmap->num_occupied == 3); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_remove(pmap, ay); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_clear(pmap); mu_assert_lf(pmap->num_occupied == 0); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_free(pmap); return NULL; }