// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_mmap_csvlite_alloc(char* irs, char* ifs, int allow_repeat_ifs) {
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_mmap_csvlite_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_mmap_csvlite_state_t));
	pstate->ifnr                     = 0LL;
	pstate->irs                      = irs;
	pstate->ifs                      = ifs;
	pstate->irslen                   = strlen(irs);
	pstate->ifslen                   = strlen(ifs);
	pstate->allow_repeat_ifs         = allow_repeat_ifs;
	pstate->expect_header_line_next  = TRUE;
	pstate->pheader_keeper           = NULL;
	pstate->pheader_keepers          = lhmslv_alloc();

	// xxx get rid of func-ptr ampersands throughout the tree
	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = file_reader_mmap_vopen;
	plrec_reader->pclose_func   = file_reader_mmap_vclose;

	plrec_reader->pprocess_func = (pstate->irslen == 1 && pstate->ifslen == 1)
		? lrec_reader_mmap_csvlite_process_single_seps
		: lrec_reader_mmap_csvlite_process_multi_seps;

	plrec_reader->psof_func     = lrec_reader_mmap_csvlite_sof;
	plrec_reader->pfree_func    = NULL;

	return plrec_reader;
}
Example #2
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_csvex_alloc(byte_reader_t* pbr, char irs, char ifs) {
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_csvex_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_csvex_state_t));
	pstate->ilno                      = 0LL;
	pstate->irs                       = "\r\n"; // xxx multi-byte the cli irs/ifs/etc, and integrate here
	pstate->ifs                       = ",";    // xxx multi-byte the cli irs/ifs/etc, and integrate here

	pstate->dquote_irs                = mlr_paste_2_strings("\"", pstate->irs);
	pstate->dquote_ifs                = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof                = "\"\xff";
	pstate->dquote                    = "\"";
	pstate->dquote_dquote             = "\"\"";
	pstate->ifs_eof                   = mlr_paste_2_strings(pstate->ifs, "\xff");

	pstate->irs_len                   = strlen(pstate->irs);
	pstate->ifs_len                   = strlen(pstate->ifs);
	pstate->dquote_irs_len            = strlen(pstate->dquote_irs);
	pstate->dquote_ifs_len            = strlen(pstate->dquote_ifs);
	pstate->dquote_eof_len            = strlen(pstate->dquote_eof);
	pstate->dquote_len                = strlen(pstate->dquote);
	pstate->dquote_dquote_len         = strlen(pstate->dquote_dquote);
	pstate->ifs_eof_len               = strlen(pstate->ifs_eof);

	pstate->peek_buf_len              = pstate->irs_len;
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->ifs_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_irs_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_ifs_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_eof_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_dquote_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->ifs_eof_len);
	pstate->peek_buf_len             += 2;

	sb_init(&pstate->sb, STRING_BUILDER_INIT_SIZE);
	pstate->psb                       = &pstate->sb;
	pstate->pbr                       = pbr;
	pstate->pfr                       = NULL;

	// xxx allocate the parse-tries here -- one for dquote only,
	// the second for non-dquote-after-that, the third for dquoted-after-that.

	pstate->expect_header_line_next   = TRUE;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = &file_reader_stdio_vopen;
	plrec_reader->pclose_func   = &file_reader_stdio_vclose;
	plrec_reader->pprocess_func = &lrec_reader_csvex_process;
	plrec_reader->psof_func     = &lrec_reader_csvex_sof;
	plrec_reader->pfree_func    = &lrec_reader_csvex_free;

	return plrec_reader;
}
Example #3
0
static mapper_t* mapper_regularize_alloc() {
	mapper_t* pmapper = mlr_malloc_or_die(sizeof(mapper_t));

	mapper_regularize_state_t* pstate = mlr_malloc_or_die(sizeof(mapper_regularize_state_t));
	pstate->psorted_to_original = lhmslv_alloc();

	pmapper->pvstate       = (void*)pstate;
	pmapper->pprocess_func = mapper_regularize_process;
	pmapper->pfree_func    = mapper_regularize_free;

	return pmapper;
}
Example #4
0
static mapper_t* mapper_group_like_alloc() {
	mapper_t* pmapper = mlr_malloc_or_die(sizeof(mapper_t));

	mapper_group_like_state_t* pstate = mlr_malloc_or_die(sizeof(mapper_group_like_state_t));
	pstate->precords_by_key_field_names = lhmslv_alloc();

	pmapper->pvstate       = pstate;
	pmapper->pprocess_func = mapper_group_like_process;
	pmapper->pfree_func    = mapper_group_like_free;

	return pmapper;
}
Example #5
0
static mapper_t* mapper_head_alloc(slls_t* pgroup_by_field_names, unsigned long long head_count) {
	mapper_t* pmapper = mlr_malloc_or_die(sizeof(mapper_t));

	mapper_head_state_t* pstate = mlr_malloc_or_die(sizeof(mapper_head_state_t));

	pstate->pgroup_by_field_names  = pgroup_by_field_names;
	pstate->head_count             = head_count;
	pstate->precord_lists_by_group = lhmslv_alloc();

	pmapper->pvstate        = pstate;
	pmapper->pprocess_func  = mapper_head_process;
	pmapper->pfree_func     = mapper_head_free;

	return pmapper;
}
Example #6
0
static mapper_t* mapper_step_alloc(slls_t* pstepper_names, slls_t* pvalue_field_names, slls_t* pgroup_by_field_names) {
	mapper_t* pmapper = mlr_malloc_or_die(sizeof(mapper_t));

	mapper_step_state_t* pstate = mlr_malloc_or_die(sizeof(mapper_step_state_t));

	pstate->pstepper_names        = pstepper_names;
	pstate->pvalue_field_names    = pvalue_field_names;
	pstate->pgroup_by_field_names = pgroup_by_field_names;
	pstate->groups                = lhmslv_alloc();

	pmapper->pvstate       = pstate;
	pmapper->pprocess_func = mapper_step_process;
	pmapper->pfree_func    = mapper_step_free;

	return pmapper;
}
Example #7
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_mmap_csv_alloc(char* irs, char* ifs, int use_implicit_header) {
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_mmap_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_mmap_csv_state_t));
	pstate->ilno          = 0LL;

	pstate->eof           = "\xff";
	pstate->irs           = irs;
	pstate->ifs           = ifs;
	pstate->ifs_eof       = mlr_paste_2_strings(pstate->ifs, "\xff");
	pstate->dquote        = "\"";

	pstate->dquote_irs    = mlr_paste_2_strings("\"", pstate->irs);
	pstate->dquote_ifs    = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof    = "\"\xff";
	pstate->dquote_dquote = "\"\"";

	pstate->dquotelen     = strlen(pstate->dquote);

	pstate->pno_dquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs,     IRS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs,     IFS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote,  DQUOTE_STRIDX);

	pstate->pdquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,    DQUOTE_IRS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs,    DQUOTE_IFS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX);

	pstate->pfields = rslls_alloc();
	pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE);

	pstate->expect_header_line_next   = use_implicit_header ? FALSE : TRUE;
	pstate->use_implicit_header       = use_implicit_header;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = file_reader_mmap_vopen;
	plrec_reader->pclose_func   = file_reader_mmap_vclose;
	plrec_reader->pprocess_func = lrec_reader_mmap_csv_process;
	plrec_reader->psof_func     = lrec_reader_mmap_csv_sof;
	plrec_reader->pfree_func    = lrec_reader_mmap_csv_free;

	return plrec_reader;
}
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_stdio_csv_alloc(char irs, char ifs, int allow_repeat_ifs) {
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t));
	pstate->ifnr                      = 0LL;
	pstate->irs                       = irs;
	pstate->ifs                       = ifs;
	pstate->allow_repeat_ifs          = allow_repeat_ifs;
	pstate->expect_header_line_next   = TRUE;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = &file_reader_stdio_vopen;
	plrec_reader->pclose_func   = &file_reader_stdio_vclose;
	plrec_reader->pprocess_func = &lrec_reader_stdio_csv_process;
	plrec_reader->psof_func     = &lrec_reader_stdio_sof;
	plrec_reader->pfree_func    = &lrec_reader_stdio_csv_free;

	return plrec_reader;
}
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_stdio_csvlite_alloc(char* irs, char* ifs, int allow_repeat_ifs, int use_implicit_header) {
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_stdio_csvlite_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csvlite_state_t));
	pstate->ifnr                      = 0LL;
	pstate->irs                       = irs;
	pstate->ifs                       = ifs;
	pstate->irslen                    = strlen(irs);
	pstate->ifslen                    = strlen(ifs);
	pstate->allow_repeat_ifs          = allow_repeat_ifs;
	pstate->use_implicit_header       = use_implicit_header;
	pstate->expect_header_line_next   = use_implicit_header ? FALSE : TRUE;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = file_reader_stdio_vopen;
	plrec_reader->pclose_func   = file_reader_stdio_vclose;
	plrec_reader->pprocess_func = lrec_reader_stdio_csvlite_process;
	plrec_reader->psof_func     = lrec_reader_stdio_sof;
	plrec_reader->pfree_func    = lrec_reader_stdio_csvlite_free;

	return plrec_reader;
}
Example #10
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_stdio_csv_alloc(char* irs, char* ifs, int use_implicit_header,
	comment_handling_t comment_handling, char* comment_string)
{
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t));
	pstate->ilno          = 0LL;

	pstate->do_auto_line_term = FALSE;
	if (streq(irs, "auto")) {
		irs = "\n";
		pstate->do_auto_line_term = TRUE;
	}

	pstate->comment_handling = comment_handling;
	pstate->comment_string   = comment_string;
	pstate->comment_string_length = comment_string == NULL ? 0 : strlen(comment_string);

	pstate->eof           = "\xff";
	pstate->irs           = irs;
	pstate->ifs           = ifs;
	pstate->ifs_eof       = mlr_paste_2_strings(pstate->ifs, "\xff");
	pstate->dquote        = "\"";

	pstate->dquote_ifs    = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof    = "\"\xff";
	pstate->dquote_dquote = "\"\"";

	pstate->dquotelen     = strlen(pstate->dquote);


	// Parse trie for UTF-8 BOM
	pstate->putf8_bom_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->putf8_bom_parse_trie, UTF8_BOM, UTF8_BOM_STRIDX);

	// Parse trie for non-double-quoted fields
	pstate->pno_dquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->eof,     EOF_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs,     IRS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs_eof, IFS_EOF_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs,     IFS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote,  DQUOTE_STRIDX);

	// Parse trie for double-quoted fields
	pstate->pdquote_parse_trie = parse_trie_alloc();
	if (pstate->do_auto_line_term) {
		pstate->dquote_irs  = mlr_paste_2_strings("\"", "\n");
		pstate->dquote_irs2 = mlr_paste_2_strings("\"", "\r\n");
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,  DQUOTE_IRS_STRIDX);
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs2, DQUOTE_IRS2_STRIDX);
	} else {
		pstate->dquote_irs  = mlr_paste_2_strings("\"", pstate->irs);
		pstate->dquote_irs2 = NULL;
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX);
	}
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->eof,           EOF_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,    DQUOTE_IRS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs,    DQUOTE_IFS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_eof,    DQUOTE_EOF_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX);


	pstate->pfields = rslls_alloc();
	pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE);
	pstate->pbr = stdio_byte_reader_alloc();
	pstate->pfr = pfr_alloc(pstate->pbr, mlr_imax3(
		pstate->putf8_bom_parse_trie->maxlen,
		pstate->pno_dquote_parse_trie->maxlen,
		pstate->pdquote_parse_trie->maxlen));

	pstate->expect_header_line_next   = use_implicit_header ? FALSE : TRUE;
	pstate->use_implicit_header       = use_implicit_header;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = lrec_reader_stdio_csv_open;
	plrec_reader->pclose_func   = lrec_reader_stdio_csv_close;
	plrec_reader->pprocess_func = lrec_reader_stdio_csv_process;
	plrec_reader->psof_func     = lrec_reader_stdio_csv_sof;
	plrec_reader->pfree_func    = lrec_reader_stdio_csv_free;

	return plrec_reader;
}
Example #11
0
// ----------------------------------------------------------------
static char* test_lhmslv() {

	slls_t* aw = slls_alloc(); slls_add_no_free(aw, "a"); slls_add_no_free(aw, "w");
	slls_t* ax = slls_alloc(); slls_add_no_free(ax, "a"); slls_add_no_free(ax, "x");
	slls_t* ay = slls_alloc(); slls_add_no_free(ay, "a"); slls_add_no_free(ay, "y");
	slls_t* bz = slls_alloc(); slls_add_no_free(bz, "b"); slls_add_no_free(bz, "z");

	lhmslv_t *pmap = lhmslv_alloc();
	mu_assert_lf(pmap->num_occupied == 0);
	mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
	mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL);
	mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL);
	mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
	mu_assert_lf(lhmslv_check_counts(pmap));

	lhmslv_put(pmap, ax, "3");
	mu_assert_lf(pmap->num_occupied == 1);
	mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
	mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3"));
	mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL);
	mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
	mu_assert_lf(lhmslv_check_counts(pmap));

	lhmslv_put(pmap, ay, "5");
	mu_assert_lf(pmap->num_occupied == 2);
	mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
	mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3"));
	mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5"));
	mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
	mu_assert_lf(lhmslv_check_counts(pmap));

	lhmslv_put(pmap, ax, "4");
	mu_assert_lf(pmap->num_occupied == 2);
	mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
	mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4"));
	mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5"));
	mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
	mu_assert_lf(lhmslv_check_counts(pmap));

	lhmslv_put(pmap, bz, "7");
	mu_assert_lf(pmap->num_occupied == 3);
	mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
	mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4"));
	mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5"));
	mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7"));
	mu_assert_lf(lhmslv_check_counts(pmap));

	lhmslv_remove(pmap, ay);
	mu_assert_lf(pmap->num_occupied == 2);
	mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
	mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4"));
	mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL);
	mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7"));
	mu_assert_lf(lhmslv_check_counts(pmap));

	lhmslv_clear(pmap);
	mu_assert_lf(pmap->num_occupied == 0);
	mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
	mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL);
	mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL);
	mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
	mu_assert_lf(lhmslv_check_counts(pmap));

	lhmslv_free(pmap);

	return NULL;
}