Esempio n. 1
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_csvex_alloc(byte_reader_t* pbr, char irs, char ifs) {
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_csvex_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_csvex_state_t));
	pstate->ilno                      = 0LL;
	pstate->irs                       = "\r\n"; // xxx multi-byte the cli irs/ifs/etc, and integrate here
	pstate->ifs                       = ",";    // xxx multi-byte the cli irs/ifs/etc, and integrate here

	pstate->dquote_irs                = mlr_paste_2_strings("\"", pstate->irs);
	pstate->dquote_ifs                = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof                = "\"\xff";
	pstate->dquote                    = "\"";
	pstate->dquote_dquote             = "\"\"";
	pstate->ifs_eof                   = mlr_paste_2_strings(pstate->ifs, "\xff");

	pstate->irs_len                   = strlen(pstate->irs);
	pstate->ifs_len                   = strlen(pstate->ifs);
	pstate->dquote_irs_len            = strlen(pstate->dquote_irs);
	pstate->dquote_ifs_len            = strlen(pstate->dquote_ifs);
	pstate->dquote_eof_len            = strlen(pstate->dquote_eof);
	pstate->dquote_len                = strlen(pstate->dquote);
	pstate->dquote_dquote_len         = strlen(pstate->dquote_dquote);
	pstate->ifs_eof_len               = strlen(pstate->ifs_eof);

	pstate->peek_buf_len              = pstate->irs_len;
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->ifs_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_irs_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_ifs_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_eof_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->dquote_dquote_len);
	pstate->peek_buf_len              = mlr_imax2(pstate->peek_buf_len, pstate->ifs_eof_len);
	pstate->peek_buf_len             += 2;

	sb_init(&pstate->sb, STRING_BUILDER_INIT_SIZE);
	pstate->psb                       = &pstate->sb;
	pstate->pbr                       = pbr;
	pstate->pfr                       = NULL;

	// xxx allocate the parse-tries here -- one for dquote only,
	// the second for non-dquote-after-that, the third for dquoted-after-that.

	pstate->expect_header_line_next   = TRUE;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = &file_reader_stdio_vopen;
	plrec_reader->pclose_func   = &file_reader_stdio_vclose;
	plrec_reader->pprocess_func = &lrec_reader_csvex_process;
	plrec_reader->psof_func     = &lrec_reader_csvex_sof;
	plrec_reader->pfree_func    = &lrec_reader_csvex_free;

	return plrec_reader;
}
Esempio n. 2
0
// ----------------------------------------------------------------
static char * test_paste() {
	mu_assert("error: paste 2", streq(mlr_paste_2_strings("ab", "cd"), "abcd"));
	mu_assert("error: paste 3", streq(mlr_paste_3_strings("ab", "cd", "ef"), "abcdef"));
	mu_assert("error: paste 4", streq(mlr_paste_4_strings("ab", "cd", "ef", "gh"), "abcdefgh"));
	mu_assert("error: paste 5", streq(mlr_paste_5_strings("ab", "cd", "ef", "gh", "ij"), "abcdefghij"));
	return 0;
}
Esempio n. 3
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_mmap_csv_alloc(char* irs, char* ifs, int use_implicit_header) {
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_mmap_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_mmap_csv_state_t));
	pstate->ilno          = 0LL;

	pstate->eof           = "\xff";
	pstate->irs           = irs;
	pstate->ifs           = ifs;
	pstate->ifs_eof       = mlr_paste_2_strings(pstate->ifs, "\xff");
	pstate->dquote        = "\"";

	pstate->dquote_irs    = mlr_paste_2_strings("\"", pstate->irs);
	pstate->dquote_ifs    = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof    = "\"\xff";
	pstate->dquote_dquote = "\"\"";

	pstate->dquotelen     = strlen(pstate->dquote);

	pstate->pno_dquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs,     IRS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs,     IFS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote,  DQUOTE_STRIDX);

	pstate->pdquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,    DQUOTE_IRS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs,    DQUOTE_IFS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX);

	pstate->pfields = rslls_alloc();
	pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE);

	pstate->expect_header_line_next   = use_implicit_header ? FALSE : TRUE;
	pstate->use_implicit_header       = use_implicit_header;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = file_reader_mmap_vopen;
	plrec_reader->pclose_func   = file_reader_mmap_vclose;
	plrec_reader->pprocess_func = lrec_reader_mmap_csv_process;
	plrec_reader->psof_func     = lrec_reader_mmap_csv_sof;
	plrec_reader->pfree_func    = lrec_reader_mmap_csv_free;

	return plrec_reader;
}
Esempio n. 4
0
static step_t* step_counter_alloc(char* input_field_name) {
	step_t* pstep = mlr_malloc_or_die(sizeof(step_t));
	step_counter_state_t* pstate = mlr_malloc_or_die(sizeof(step_counter_state_t));
	pstate->counter       = 0LL;
	pstate->output_field_name = mlr_paste_2_strings(input_field_name, "_counter");

	pstep->pvstate        = (void*)pstate;
	pstep->psprocess_func = &step_counter_sprocess;
	pstep->pdprocess_func = NULL;
	return pstep;
}
Esempio n. 5
0
static step_t* step_rsum_alloc(char* input_field_name) {
	step_t* pstep = mlr_malloc_or_die(sizeof(step_t));
	step_rsum_state_t* pstate = mlr_malloc_or_die(sizeof(step_rsum_state_t));
	pstate->rsum          = 0.0;
	pstate->output_field_name = mlr_paste_2_strings(input_field_name, "_rsum");

	pstep->pvstate        = (void*)pstate;
	pstep->psprocess_func = NULL;
	pstep->pdprocess_func = &step_rsum_dprocess;
	return pstep;
}
Esempio n. 6
0
static step_t* step_delta_alloc(char* input_field_name) {
	step_t* pstep = mlr_malloc_or_die(sizeof(step_t));
	step_delta_state_t* pstate = mlr_malloc_or_die(sizeof(step_delta_state_t));
	pstate->prev          = -999.0;
	pstate->have_prev     = FALSE;
	pstate->output_field_name = mlr_paste_2_strings(input_field_name, "_delta");

	pstep->pvstate        = (void*)pstate;
	pstep->psprocess_func = NULL;
	pstep->pdprocess_func = &step_delta_dprocess;
	return pstep;
}
Esempio n. 7
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_stdio_csv_alloc(char* irs, char* ifs, int use_implicit_header,
	comment_handling_t comment_handling, char* comment_string)
{
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t));
	pstate->ilno          = 0LL;

	pstate->do_auto_line_term = FALSE;
	if (streq(irs, "auto")) {
		irs = "\n";
		pstate->do_auto_line_term = TRUE;
	}

	pstate->comment_handling = comment_handling;
	pstate->comment_string   = comment_string;
	pstate->comment_string_length = comment_string == NULL ? 0 : strlen(comment_string);

	pstate->eof           = "\xff";
	pstate->irs           = irs;
	pstate->ifs           = ifs;
	pstate->ifs_eof       = mlr_paste_2_strings(pstate->ifs, "\xff");
	pstate->dquote        = "\"";

	pstate->dquote_ifs    = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof    = "\"\xff";
	pstate->dquote_dquote = "\"\"";

	pstate->dquotelen     = strlen(pstate->dquote);


	// Parse trie for UTF-8 BOM
	pstate->putf8_bom_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->putf8_bom_parse_trie, UTF8_BOM, UTF8_BOM_STRIDX);

	// Parse trie for non-double-quoted fields
	pstate->pno_dquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->eof,     EOF_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs,     IRS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs_eof, IFS_EOF_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs,     IFS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote,  DQUOTE_STRIDX);

	// Parse trie for double-quoted fields
	pstate->pdquote_parse_trie = parse_trie_alloc();
	if (pstate->do_auto_line_term) {
		pstate->dquote_irs  = mlr_paste_2_strings("\"", "\n");
		pstate->dquote_irs2 = mlr_paste_2_strings("\"", "\r\n");
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,  DQUOTE_IRS_STRIDX);
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs2, DQUOTE_IRS2_STRIDX);
	} else {
		pstate->dquote_irs  = mlr_paste_2_strings("\"", pstate->irs);
		pstate->dquote_irs2 = NULL;
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX);
	}
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->eof,           EOF_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,    DQUOTE_IRS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs,    DQUOTE_IFS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_eof,    DQUOTE_EOF_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX);


	pstate->pfields = rslls_alloc();
	pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE);
	pstate->pbr = stdio_byte_reader_alloc();
	pstate->pfr = pfr_alloc(pstate->pbr, mlr_imax3(
		pstate->putf8_bom_parse_trie->maxlen,
		pstate->pno_dquote_parse_trie->maxlen,
		pstate->pdquote_parse_trie->maxlen));

	pstate->expect_header_line_next   = use_implicit_header ? FALSE : TRUE;
	pstate->use_implicit_header       = use_implicit_header;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = lrec_reader_stdio_csv_open;
	plrec_reader->pclose_func   = lrec_reader_stdio_csv_close;
	plrec_reader->pprocess_func = lrec_reader_stdio_csv_process;
	plrec_reader->psof_func     = lrec_reader_stdio_csv_sof;
	plrec_reader->pfree_func    = lrec_reader_stdio_csv_free;

	return plrec_reader;
}