// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_mmap_csvlite_alloc(char* irs, char* ifs, int allow_repeat_ifs) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_mmap_csvlite_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_mmap_csvlite_state_t)); pstate->ifnr = 0LL; pstate->irs = irs; pstate->ifs = ifs; pstate->irslen = strlen(irs); pstate->ifslen = strlen(ifs); pstate->allow_repeat_ifs = allow_repeat_ifs; pstate->expect_header_line_next = TRUE; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); // xxx get rid of func-ptr ampersands throughout the tree plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = file_reader_mmap_vopen; plrec_reader->pclose_func = file_reader_mmap_vclose; plrec_reader->pprocess_func = (pstate->irslen == 1 && pstate->ifslen == 1) ? lrec_reader_mmap_csvlite_process_single_seps : lrec_reader_mmap_csvlite_process_multi_seps; plrec_reader->psof_func = lrec_reader_mmap_csvlite_sof; plrec_reader->pfree_func = NULL; return plrec_reader; }
// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_csvex_alloc(byte_reader_t* pbr, char irs, char ifs) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_csvex_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_csvex_state_t)); pstate->ilno = 0LL; pstate->irs = "\r\n"; // xxx multi-byte the cli irs/ifs/etc, and integrate here pstate->ifs = ","; // xxx multi-byte the cli irs/ifs/etc, and integrate here pstate->dquote_irs = mlr_paste_2_strings("\"", pstate->irs); pstate->dquote_ifs = mlr_paste_2_strings("\"", pstate->ifs); pstate->dquote_eof = "\"\xff"; pstate->dquote = "\""; pstate->dquote_dquote = "\"\""; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->irs_len = strlen(pstate->irs); pstate->ifs_len = strlen(pstate->ifs); pstate->dquote_irs_len = strlen(pstate->dquote_irs); pstate->dquote_ifs_len = strlen(pstate->dquote_ifs); pstate->dquote_eof_len = strlen(pstate->dquote_eof); pstate->dquote_len = strlen(pstate->dquote); pstate->dquote_dquote_len = strlen(pstate->dquote_dquote); pstate->ifs_eof_len = strlen(pstate->ifs_eof); pstate->peek_buf_len = pstate->irs_len; pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->ifs_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_irs_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_ifs_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_eof_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_dquote_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->ifs_eof_len); pstate->peek_buf_len += 2; sb_init(&pstate->sb, STRING_BUILDER_INIT_SIZE); pstate->psb = &pstate->sb; pstate->pbr = pbr; pstate->pfr = NULL; // xxx allocate the parse-tries here -- one for dquote only, // the second for non-dquote-after-that, the third for dquoted-after-that. pstate->expect_header_line_next = TRUE; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = &file_reader_stdio_vopen; plrec_reader->pclose_func = &file_reader_stdio_vclose; plrec_reader->pprocess_func = &lrec_reader_csvex_process; plrec_reader->psof_func = &lrec_reader_csvex_sof; plrec_reader->pfree_func = &lrec_reader_csvex_free; return plrec_reader; }
static mapper_t* mapper_regularize_alloc() { mapper_t* pmapper = mlr_malloc_or_die(sizeof(mapper_t)); mapper_regularize_state_t* pstate = mlr_malloc_or_die(sizeof(mapper_regularize_state_t)); pstate->psorted_to_original = lhmslv_alloc(); pmapper->pvstate = (void*)pstate; pmapper->pprocess_func = mapper_regularize_process; pmapper->pfree_func = mapper_regularize_free; return pmapper; }
static mapper_t* mapper_group_like_alloc() { mapper_t* pmapper = mlr_malloc_or_die(sizeof(mapper_t)); mapper_group_like_state_t* pstate = mlr_malloc_or_die(sizeof(mapper_group_like_state_t)); pstate->precords_by_key_field_names = lhmslv_alloc(); pmapper->pvstate = pstate; pmapper->pprocess_func = mapper_group_like_process; pmapper->pfree_func = mapper_group_like_free; return pmapper; }
static mapper_t* mapper_head_alloc(slls_t* pgroup_by_field_names, unsigned long long head_count) { mapper_t* pmapper = mlr_malloc_or_die(sizeof(mapper_t)); mapper_head_state_t* pstate = mlr_malloc_or_die(sizeof(mapper_head_state_t)); pstate->pgroup_by_field_names = pgroup_by_field_names; pstate->head_count = head_count; pstate->precord_lists_by_group = lhmslv_alloc(); pmapper->pvstate = pstate; pmapper->pprocess_func = mapper_head_process; pmapper->pfree_func = mapper_head_free; return pmapper; }
static mapper_t* mapper_step_alloc(slls_t* pstepper_names, slls_t* pvalue_field_names, slls_t* pgroup_by_field_names) { mapper_t* pmapper = mlr_malloc_or_die(sizeof(mapper_t)); mapper_step_state_t* pstate = mlr_malloc_or_die(sizeof(mapper_step_state_t)); pstate->pstepper_names = pstepper_names; pstate->pvalue_field_names = pvalue_field_names; pstate->pgroup_by_field_names = pgroup_by_field_names; pstate->groups = lhmslv_alloc(); pmapper->pvstate = pstate; pmapper->pprocess_func = mapper_step_process; pmapper->pfree_func = mapper_step_free; return pmapper; }
// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_mmap_csv_alloc(char* irs, char* ifs, int use_implicit_header) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_mmap_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_mmap_csv_state_t)); pstate->ilno = 0LL; pstate->eof = "\xff"; pstate->irs = irs; pstate->ifs = ifs; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->dquote = "\""; pstate->dquote_irs = mlr_paste_2_strings("\"", pstate->irs); pstate->dquote_ifs = mlr_paste_2_strings("\"", pstate->ifs); pstate->dquote_eof = "\"\xff"; pstate->dquote_dquote = "\"\""; pstate->dquotelen = strlen(pstate->dquote); pstate->pno_dquote_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs, IRS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs, IFS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote, DQUOTE_STRIDX); pstate->pdquote_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs, DQUOTE_IFS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX); pstate->pfields = rslls_alloc(); pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE); pstate->expect_header_line_next = use_implicit_header ? FALSE : TRUE; pstate->use_implicit_header = use_implicit_header; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = file_reader_mmap_vopen; plrec_reader->pclose_func = file_reader_mmap_vclose; plrec_reader->pprocess_func = lrec_reader_mmap_csv_process; plrec_reader->psof_func = lrec_reader_mmap_csv_sof; plrec_reader->pfree_func = lrec_reader_mmap_csv_free; return plrec_reader; }
// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_stdio_csv_alloc(char irs, char ifs, int allow_repeat_ifs) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t)); pstate->ifnr = 0LL; pstate->irs = irs; pstate->ifs = ifs; pstate->allow_repeat_ifs = allow_repeat_ifs; pstate->expect_header_line_next = TRUE; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = &file_reader_stdio_vopen; plrec_reader->pclose_func = &file_reader_stdio_vclose; plrec_reader->pprocess_func = &lrec_reader_stdio_csv_process; plrec_reader->psof_func = &lrec_reader_stdio_sof; plrec_reader->pfree_func = &lrec_reader_stdio_csv_free; return plrec_reader; }
// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_stdio_csvlite_alloc(char* irs, char* ifs, int allow_repeat_ifs, int use_implicit_header) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_stdio_csvlite_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csvlite_state_t)); pstate->ifnr = 0LL; pstate->irs = irs; pstate->ifs = ifs; pstate->irslen = strlen(irs); pstate->ifslen = strlen(ifs); pstate->allow_repeat_ifs = allow_repeat_ifs; pstate->use_implicit_header = use_implicit_header; pstate->expect_header_line_next = use_implicit_header ? FALSE : TRUE; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = file_reader_stdio_vopen; plrec_reader->pclose_func = file_reader_stdio_vclose; plrec_reader->pprocess_func = lrec_reader_stdio_csvlite_process; plrec_reader->psof_func = lrec_reader_stdio_sof; plrec_reader->pfree_func = lrec_reader_stdio_csvlite_free; return plrec_reader; }
// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_stdio_csv_alloc(char* irs, char* ifs, int use_implicit_header, comment_handling_t comment_handling, char* comment_string) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t)); pstate->ilno = 0LL; pstate->do_auto_line_term = FALSE; if (streq(irs, "auto")) { irs = "\n"; pstate->do_auto_line_term = TRUE; } pstate->comment_handling = comment_handling; pstate->comment_string = comment_string; pstate->comment_string_length = comment_string == NULL ? 0 : strlen(comment_string); pstate->eof = "\xff"; pstate->irs = irs; pstate->ifs = ifs; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->dquote = "\""; pstate->dquote_ifs = mlr_paste_2_strings("\"", pstate->ifs); pstate->dquote_eof = "\"\xff"; pstate->dquote_dquote = "\"\""; pstate->dquotelen = strlen(pstate->dquote); // Parse trie for UTF-8 BOM pstate->putf8_bom_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->putf8_bom_parse_trie, UTF8_BOM, UTF8_BOM_STRIDX); // Parse trie for non-double-quoted fields pstate->pno_dquote_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->eof, EOF_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs, IRS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs_eof, IFS_EOF_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs, IFS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote, DQUOTE_STRIDX); // Parse trie for double-quoted fields pstate->pdquote_parse_trie = parse_trie_alloc(); if (pstate->do_auto_line_term) { pstate->dquote_irs = mlr_paste_2_strings("\"", "\n"); pstate->dquote_irs2 = mlr_paste_2_strings("\"", "\r\n"); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs2, DQUOTE_IRS2_STRIDX); } else { pstate->dquote_irs = mlr_paste_2_strings("\"", pstate->irs); pstate->dquote_irs2 = NULL; parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); } parse_trie_add_string(pstate->pdquote_parse_trie, pstate->eof, EOF_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs, DQUOTE_IFS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_eof, DQUOTE_EOF_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX); pstate->pfields = rslls_alloc(); pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE); pstate->pbr = stdio_byte_reader_alloc(); pstate->pfr = pfr_alloc(pstate->pbr, mlr_imax3( pstate->putf8_bom_parse_trie->maxlen, pstate->pno_dquote_parse_trie->maxlen, pstate->pdquote_parse_trie->maxlen)); pstate->expect_header_line_next = use_implicit_header ? FALSE : TRUE; pstate->use_implicit_header = use_implicit_header; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = lrec_reader_stdio_csv_open; plrec_reader->pclose_func = lrec_reader_stdio_csv_close; plrec_reader->pprocess_func = lrec_reader_stdio_csv_process; plrec_reader->psof_func = lrec_reader_stdio_csv_sof; plrec_reader->pfree_func = lrec_reader_stdio_csv_free; return plrec_reader; }
// ---------------------------------------------------------------- static char* test_lhmslv() { slls_t* aw = slls_alloc(); slls_add_no_free(aw, "a"); slls_add_no_free(aw, "w"); slls_t* ax = slls_alloc(); slls_add_no_free(ax, "a"); slls_add_no_free(ax, "x"); slls_t* ay = slls_alloc(); slls_add_no_free(ay, "a"); slls_add_no_free(ay, "y"); slls_t* bz = slls_alloc(); slls_add_no_free(bz, "b"); slls_add_no_free(bz, "z"); lhmslv_t *pmap = lhmslv_alloc(); mu_assert_lf(pmap->num_occupied == 0); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ax, "3"); mu_assert_lf(pmap->num_occupied == 1); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ay, "5"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ax, "4"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, bz, "7"); mu_assert_lf(pmap->num_occupied == 3); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_remove(pmap, ay); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_clear(pmap); mu_assert_lf(pmap->num_occupied == 0); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_free(pmap); return NULL; }