Exemplo n.º 1
0
static int read_file_pfr_psb(char* filename, int do_write) {
	byte_reader_t* pbr = stdio_byte_reader_alloc();
	string_builder_t* psb = sb_alloc(STRING_BUILDER_INIT_SIZE);
	pbr->popen_func(pbr, NULL, filename);

	peek_file_reader_t* pfr = pfr_alloc(pbr, PEEK_BUF_LEN);

	parse_trie_t* ptrie = parse_trie_alloc();
	parse_trie_add_string(ptrie, "\n", IRS_STRIDX);
	parse_trie_add_string(ptrie, "\xff", EOF_STRIDX);
	parse_trie_add_string(ptrie, "\n\xff", IRSEOF_STRIDX);

	int bc = 0;

	while (TRUE) {
		char* line = read_line_pfr_psb(pfr, psb, ptrie);
		if (line == NULL)
			break;
		if (do_write) {
			fputs(line, stdout);
			fputc('\n', stdout);
		}
		bc += strlen(line);
		free(line);
	}
	sb_free(psb);
	pbr->pclose_func(pbr, NULL);
	return bc;
}
Exemplo n.º 2
0
// ----------------------------------------------------------------
static char* test_empty() {
	byte_reader_t* pbr = string_byte_reader_alloc();
	int ok = pbr->popen_func(pbr, NULL, "");
	mu_assert_lf(ok == TRUE);

	peek_file_reader_t* pfr = pfr_alloc(pbr, 7);

	mu_assert_lf(pfr_peek_char(pfr) == (char)EOF); // char defaults to unsigned on some platforms
	mu_assert_lf(pfr_read_char(pfr) == (char)EOF);

	pbr->pclose_func(pbr, NULL);
	pfr_free(pfr);

	return NULL;
}
Exemplo n.º 3
0
static int read_file_pfr_psb(char* filename) {
	FILE* fp = fopen_or_die(filename);
	char* irs = "\n";
	int irs_len = strlen(irs);

	peek_file_reader_t* pfr = pfr_alloc(fp, PEEK_BUF_LEN);
	string_builder_t  sb;
	string_builder_t* psb = &sb;
	sb_init(&sb, STRING_BUILDER_INIT_SIZE);

	int bc = 0;

	while (TRUE) {
		char* line = read_line_pfr_psb(pfr, psb, irs, irs_len);
		if (line == NULL)
			break;
		bc += strlen(line);
	}
	fclose(fp);
	return bc;
}
Exemplo n.º 4
0
// ----------------------------------------------------------------
static char* test_non_empty() {
	byte_reader_t* pbr = string_byte_reader_alloc();
	int ok = pbr->popen_func(pbr,

		NULL,

		"ab,cde\n"
		"123,4567\n"
	);
	mu_assert_lf(ok == TRUE);

	peek_file_reader_t* pfr = pfr_alloc(pbr, 7);

	pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'a');
	pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'a');
	pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'b');
	pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'b');

	pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ',');
	pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ',');
	pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == ',');
	pfr_print(pfr); pfr_buffer_by(pfr, 5);
	pfr_print(pfr); pfr_advance_by(pfr, 5);
	pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '2');

	pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3');
	pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3');
	pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '3');
	pfr_print(pfr); pfr_buffer_by(pfr, 5);
	pfr_print(pfr); pfr_advance_by(pfr, 5);
	pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '\n');

	pbr->pclose_func(pbr, NULL);
	pfr_free(pfr);

	return NULL;
}
Exemplo n.º 5
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_stdio_csv_alloc(char* irs, char* ifs, int use_implicit_header,
	comment_handling_t comment_handling, char* comment_string)
{
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t));
	pstate->ilno          = 0LL;

	pstate->do_auto_line_term = FALSE;
	if (streq(irs, "auto")) {
		irs = "\n";
		pstate->do_auto_line_term = TRUE;
	}

	pstate->comment_handling = comment_handling;
	pstate->comment_string   = comment_string;
	pstate->comment_string_length = comment_string == NULL ? 0 : strlen(comment_string);

	pstate->eof           = "\xff";
	pstate->irs           = irs;
	pstate->ifs           = ifs;
	pstate->ifs_eof       = mlr_paste_2_strings(pstate->ifs, "\xff");
	pstate->dquote        = "\"";

	pstate->dquote_ifs    = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof    = "\"\xff";
	pstate->dquote_dquote = "\"\"";

	pstate->dquotelen     = strlen(pstate->dquote);


	// Parse trie for UTF-8 BOM
	pstate->putf8_bom_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->putf8_bom_parse_trie, UTF8_BOM, UTF8_BOM_STRIDX);

	// Parse trie for non-double-quoted fields
	pstate->pno_dquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->eof,     EOF_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs,     IRS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs_eof, IFS_EOF_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs,     IFS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote,  DQUOTE_STRIDX);

	// Parse trie for double-quoted fields
	pstate->pdquote_parse_trie = parse_trie_alloc();
	if (pstate->do_auto_line_term) {
		pstate->dquote_irs  = mlr_paste_2_strings("\"", "\n");
		pstate->dquote_irs2 = mlr_paste_2_strings("\"", "\r\n");
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,  DQUOTE_IRS_STRIDX);
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs2, DQUOTE_IRS2_STRIDX);
	} else {
		pstate->dquote_irs  = mlr_paste_2_strings("\"", pstate->irs);
		pstate->dquote_irs2 = NULL;
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX);
	}
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->eof,           EOF_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,    DQUOTE_IRS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs,    DQUOTE_IFS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_eof,    DQUOTE_EOF_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX);


	pstate->pfields = rslls_alloc();
	pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE);
	pstate->pbr = stdio_byte_reader_alloc();
	pstate->pfr = pfr_alloc(pstate->pbr, mlr_imax3(
		pstate->putf8_bom_parse_trie->maxlen,
		pstate->pno_dquote_parse_trie->maxlen,
		pstate->pdquote_parse_trie->maxlen));

	pstate->expect_header_line_next   = use_implicit_header ? FALSE : TRUE;
	pstate->use_implicit_header       = use_implicit_header;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = lrec_reader_stdio_csv_open;
	plrec_reader->pclose_func   = lrec_reader_stdio_csv_close;
	plrec_reader->pprocess_func = lrec_reader_stdio_csv_process;
	plrec_reader->psof_func     = lrec_reader_stdio_csv_sof;
	plrec_reader->pfree_func    = lrec_reader_stdio_csv_free;

	return plrec_reader;
}