Example #1
0
static int read_file_pfr_psb(char* filename, int do_write) {
	byte_reader_t* pbr = stdio_byte_reader_alloc();
	string_builder_t* psb = sb_alloc(STRING_BUILDER_INIT_SIZE);
	pbr->popen_func(pbr, NULL, filename);

	peek_file_reader_t* pfr = pfr_alloc(pbr, PEEK_BUF_LEN);

	parse_trie_t* ptrie = parse_trie_alloc();
	parse_trie_add_string(ptrie, "\n", IRS_STRIDX);
	parse_trie_add_string(ptrie, "\xff", EOF_STRIDX);
	parse_trie_add_string(ptrie, "\n\xff", IRSEOF_STRIDX);

	int bc = 0;

	while (TRUE) {
		char* line = read_line_pfr_psb(pfr, psb, ptrie);
		if (line == NULL)
			break;
		if (do_write) {
			fputs(line, stdout);
			fputc('\n', stdout);
		}
		bc += strlen(line);
		free(line);
	}
	sb_free(psb);
	pbr->pclose_func(pbr, NULL);
	return bc;
}
Example #2
0
// ----------------------------------------------------------------
static void test_case(
	char*  test_name,
	char** strings,
	int    num_strings,
	char*  buf,
	int*   prc,
	int*   pstridx,
	int*   pmatchlen)
{
	int stridx, matchlen, rc;

	parse_trie_t* ptrie = parse_trie_alloc();
	printf("%s %s\n", sep, test_name);
	parse_trie_print(ptrie);
	for (stridx = 0; stridx < num_strings; stridx++) {
		printf("Adding string[%d] = [%s]\n", stridx, strings[stridx]);
		parse_trie_add_string(ptrie, strings[stridx], stridx);
		parse_trie_print(ptrie);
	}

	stridx = -2;
	matchlen = -2;
	rc = parse_trie_match(ptrie, buf, 0, strlen(buf), 0xff, &stridx, &matchlen);

	parse_trie_free(ptrie);

	printf("buf      = %s\n", buf);
	printf("rc       = %d\n", rc);
	printf("stridx   = %d (%s)\n", stridx, strings[stridx]);
	printf("matchlen = %d\n", matchlen);

	*prc       = rc;
	*pstridx   = stridx;
	*pmatchlen = matchlen;
}
Example #3
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_mmap_csv_alloc(char* irs, char* ifs, int use_implicit_header) {
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_mmap_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_mmap_csv_state_t));
	pstate->ilno          = 0LL;

	pstate->eof           = "\xff";
	pstate->irs           = irs;
	pstate->ifs           = ifs;
	pstate->ifs_eof       = mlr_paste_2_strings(pstate->ifs, "\xff");
	pstate->dquote        = "\"";

	pstate->dquote_irs    = mlr_paste_2_strings("\"", pstate->irs);
	pstate->dquote_ifs    = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof    = "\"\xff";
	pstate->dquote_dquote = "\"\"";

	pstate->dquotelen     = strlen(pstate->dquote);

	pstate->pno_dquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs,     IRS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs,     IFS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote,  DQUOTE_STRIDX);

	pstate->pdquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,    DQUOTE_IRS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs,    DQUOTE_IFS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX);

	pstate->pfields = rslls_alloc();
	pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE);

	pstate->expect_header_line_next   = use_implicit_header ? FALSE : TRUE;
	pstate->use_implicit_header       = use_implicit_header;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = file_reader_mmap_vopen;
	plrec_reader->pclose_func   = file_reader_mmap_vclose;
	plrec_reader->pprocess_func = lrec_reader_mmap_csv_process;
	plrec_reader->psof_func     = lrec_reader_mmap_csv_sof;
	plrec_reader->pfree_func    = lrec_reader_mmap_csv_free;

	return plrec_reader;
}
Example #4
0
// ----------------------------------------------------------------
static char* show_it() {
	char* test_name = "show_it";
	char* strings[] = { "=" , ",", "\r\n", "\xff" };
	const int EOF_TOKEN = 3;
	int num_strings = sizeof(strings) / sizeof(strings[0]);
	char* buf =
		"abc=123,def=456\r\n"
		"ghi=789\xff";
	char* p = buf;

	printf("%s %s\n", sep, test_name);
	int stridx, matchlen, rc;

	parse_trie_t* ptrie = parse_trie_alloc();
	parse_trie_print(ptrie);
	for (stridx = 0; stridx < num_strings; stridx++) {
		printf("Adding string[%d] = [%s]\n", stridx, strings[stridx]);
		parse_trie_add_string(ptrie, strings[stridx], stridx);
	}
	parse_trie_print(ptrie);

	while (TRUE) {
		rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen);
		if (rc) {
			printf("match token %d (%s)\n", stridx, strings[stridx]);
			p += matchlen;
			if (stridx == EOF_TOKEN) {
				break;
			}
		} else {
			char c = *p;
			printf("c %c[%02x]\n", isprint((unsigned char)c) ? c : '?', (unsigned)c);
			p++;
		}
	}

	mu_assert_lf(*p == 0);

	return 0;
}
Example #5
0
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_stdio_csv_alloc(char* irs, char* ifs, int use_implicit_header,
	comment_handling_t comment_handling, char* comment_string)
{
	lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));

	lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t));
	pstate->ilno          = 0LL;

	pstate->do_auto_line_term = FALSE;
	if (streq(irs, "auto")) {
		irs = "\n";
		pstate->do_auto_line_term = TRUE;
	}

	pstate->comment_handling = comment_handling;
	pstate->comment_string   = comment_string;
	pstate->comment_string_length = comment_string == NULL ? 0 : strlen(comment_string);

	pstate->eof           = "\xff";
	pstate->irs           = irs;
	pstate->ifs           = ifs;
	pstate->ifs_eof       = mlr_paste_2_strings(pstate->ifs, "\xff");
	pstate->dquote        = "\"";

	pstate->dquote_ifs    = mlr_paste_2_strings("\"", pstate->ifs);
	pstate->dquote_eof    = "\"\xff";
	pstate->dquote_dquote = "\"\"";

	pstate->dquotelen     = strlen(pstate->dquote);


	// Parse trie for UTF-8 BOM
	pstate->putf8_bom_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->putf8_bom_parse_trie, UTF8_BOM, UTF8_BOM_STRIDX);

	// Parse trie for non-double-quoted fields
	pstate->pno_dquote_parse_trie = parse_trie_alloc();
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->eof,     EOF_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs,     IRS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs_eof, IFS_EOF_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs,     IFS_STRIDX);
	parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote,  DQUOTE_STRIDX);

	// Parse trie for double-quoted fields
	pstate->pdquote_parse_trie = parse_trie_alloc();
	if (pstate->do_auto_line_term) {
		pstate->dquote_irs  = mlr_paste_2_strings("\"", "\n");
		pstate->dquote_irs2 = mlr_paste_2_strings("\"", "\r\n");
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,  DQUOTE_IRS_STRIDX);
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs2, DQUOTE_IRS2_STRIDX);
	} else {
		pstate->dquote_irs  = mlr_paste_2_strings("\"", pstate->irs);
		pstate->dquote_irs2 = NULL;
		parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX);
	}
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->eof,           EOF_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs,    DQUOTE_IRS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs,    DQUOTE_IFS_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_eof,    DQUOTE_EOF_STRIDX);
	parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX);


	pstate->pfields = rslls_alloc();
	pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE);
	pstate->pbr = stdio_byte_reader_alloc();
	pstate->pfr = pfr_alloc(pstate->pbr, mlr_imax3(
		pstate->putf8_bom_parse_trie->maxlen,
		pstate->pno_dquote_parse_trie->maxlen,
		pstate->pdquote_parse_trie->maxlen));

	pstate->expect_header_line_next   = use_implicit_header ? FALSE : TRUE;
	pstate->use_implicit_header       = use_implicit_header;
	pstate->pheader_keeper            = NULL;
	pstate->pheader_keepers           = lhmslv_alloc();

	plrec_reader->pvstate       = (void*)pstate;
	plrec_reader->popen_func    = lrec_reader_stdio_csv_open;
	plrec_reader->pclose_func   = lrec_reader_stdio_csv_close;
	plrec_reader->pprocess_func = lrec_reader_stdio_csv_process;
	plrec_reader->psof_func     = lrec_reader_stdio_csv_sof;
	plrec_reader->pfree_func    = lrec_reader_stdio_csv_free;

	return plrec_reader;
}
Example #6
0
// ----------------------------------------------------------------
static char* test_dkvp() {
	char* test_name = "dkvp";
	char* strings[] = { "=" , ",", "\r\n", "\xff" };
	const int PS_TOKEN  = 0;
	const int FS_TOKEN  = 1;
	const int RS_TOKEN  = 2;
	const int EOF_TOKEN = 3;
	int num_strings = sizeof(strings) / sizeof(strings[0]);
	char* buf =
		"abc=123,def=456\r\n"
		"ghi=789\xff";
	char* p = buf;

	printf("%s %s\n", sep, test_name);
	int stridx, matchlen, rc;

	parse_trie_t* ptrie = parse_trie_alloc();
	parse_trie_print(ptrie);
	for (stridx = 0; stridx < num_strings; stridx++) {
		printf("Adding string[%d] = [%s]\n", stridx, strings[stridx]);
		parse_trie_add_string(ptrie, strings[stridx], stridx);
	}
	parse_trie_print(ptrie);

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen);
	mu_assert_lf(rc == TRUE);
	mu_assert_lf(stridx == PS_TOKEN);
	mu_assert_lf(matchlen == strlen(strings[PS_TOKEN]));
	p += matchlen;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen);
	mu_assert_lf(rc == TRUE);
	mu_assert_lf(stridx == FS_TOKEN);
	mu_assert_lf(matchlen == strlen(strings[FS_TOKEN]));
	p += matchlen;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen);
	mu_assert_lf(rc == TRUE);
	mu_assert_lf(stridx == PS_TOKEN);
	mu_assert_lf(matchlen == strlen(strings[PS_TOKEN]));
	p += matchlen;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen);
	mu_assert_lf(rc == TRUE);
	mu_assert_lf(stridx == RS_TOKEN);
	mu_assert_lf(matchlen == strlen(strings[RS_TOKEN]));
	p += matchlen;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen);
	mu_assert_lf(rc == TRUE);
	mu_assert_lf(stridx == PS_TOKEN);
	mu_assert_lf(matchlen == strlen(strings[PS_TOKEN]));
	p += matchlen;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;
	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen); mu_assert_lf(rc == FALSE); p++;

	rc = parse_trie_match(ptrie, p, 0, strlen(p), 0xff, &stridx, &matchlen);
	mu_assert_lf(rc == TRUE);
	mu_assert_lf(stridx == EOF_TOKEN);
	mu_assert_lf(matchlen == strlen(strings[EOF_TOKEN]));
	p += matchlen;

	return 0;
}