static int read_file_pfr_psb(char* filename, int do_write) { byte_reader_t* pbr = stdio_byte_reader_alloc(); string_builder_t* psb = sb_alloc(STRING_BUILDER_INIT_SIZE); pbr->popen_func(pbr, NULL, filename); peek_file_reader_t* pfr = pfr_alloc(pbr, PEEK_BUF_LEN); parse_trie_t* ptrie = parse_trie_alloc(); parse_trie_add_string(ptrie, "\n", IRS_STRIDX); parse_trie_add_string(ptrie, "\xff", EOF_STRIDX); parse_trie_add_string(ptrie, "\n\xff", IRSEOF_STRIDX); int bc = 0; while (TRUE) { char* line = read_line_pfr_psb(pfr, psb, ptrie); if (line == NULL) break; if (do_write) { fputs(line, stdout); fputc('\n', stdout); } bc += strlen(line); free(line); } sb_free(psb); pbr->pclose_func(pbr, NULL); return bc; }
// ---------------------------------------------------------------- static char* test_empty() { byte_reader_t* pbr = string_byte_reader_alloc(); int ok = pbr->popen_func(pbr, NULL, ""); mu_assert_lf(ok == TRUE); peek_file_reader_t* pfr = pfr_alloc(pbr, 7); mu_assert_lf(pfr_peek_char(pfr) == (char)EOF); // char defaults to unsigned on some platforms mu_assert_lf(pfr_read_char(pfr) == (char)EOF); pbr->pclose_func(pbr, NULL); pfr_free(pfr); return NULL; }
static int read_file_pfr_psb(char* filename) { FILE* fp = fopen_or_die(filename); char* irs = "\n"; int irs_len = strlen(irs); peek_file_reader_t* pfr = pfr_alloc(fp, PEEK_BUF_LEN); string_builder_t sb; string_builder_t* psb = &sb; sb_init(&sb, STRING_BUILDER_INIT_SIZE); int bc = 0; while (TRUE) { char* line = read_line_pfr_psb(pfr, psb, irs, irs_len); if (line == NULL) break; bc += strlen(line); } fclose(fp); return bc; }
// ---------------------------------------------------------------- static char* test_non_empty() { byte_reader_t* pbr = string_byte_reader_alloc(); int ok = pbr->popen_func(pbr, NULL, "ab,cde\n" "123,4567\n" ); mu_assert_lf(ok == TRUE); peek_file_reader_t* pfr = pfr_alloc(pbr, 7); pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'a'); pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'a'); pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'b'); pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'b'); pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == ','); pfr_print(pfr); pfr_buffer_by(pfr, 5); pfr_print(pfr); pfr_advance_by(pfr, 5); pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '2'); pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '3'); pfr_print(pfr); pfr_buffer_by(pfr, 5); pfr_print(pfr); pfr_advance_by(pfr, 5); pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '\n'); pbr->pclose_func(pbr, NULL); pfr_free(pfr); return NULL; }
// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_stdio_csv_alloc(char* irs, char* ifs, int use_implicit_header, comment_handling_t comment_handling, char* comment_string) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t)); pstate->ilno = 0LL; pstate->do_auto_line_term = FALSE; if (streq(irs, "auto")) { irs = "\n"; pstate->do_auto_line_term = TRUE; } pstate->comment_handling = comment_handling; pstate->comment_string = comment_string; pstate->comment_string_length = comment_string == NULL ? 0 : strlen(comment_string); pstate->eof = "\xff"; pstate->irs = irs; pstate->ifs = ifs; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->dquote = "\""; pstate->dquote_ifs = mlr_paste_2_strings("\"", pstate->ifs); pstate->dquote_eof = "\"\xff"; pstate->dquote_dquote = "\"\""; pstate->dquotelen = strlen(pstate->dquote); // Parse trie for UTF-8 BOM pstate->putf8_bom_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->putf8_bom_parse_trie, UTF8_BOM, UTF8_BOM_STRIDX); // Parse trie for non-double-quoted fields pstate->pno_dquote_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->eof, EOF_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs, IRS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs_eof, IFS_EOF_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs, IFS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote, DQUOTE_STRIDX); // Parse trie for double-quoted fields pstate->pdquote_parse_trie = parse_trie_alloc(); if (pstate->do_auto_line_term) { pstate->dquote_irs = mlr_paste_2_strings("\"", "\n"); pstate->dquote_irs2 = mlr_paste_2_strings("\"", "\r\n"); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs2, DQUOTE_IRS2_STRIDX); } else { pstate->dquote_irs = mlr_paste_2_strings("\"", pstate->irs); pstate->dquote_irs2 = NULL; parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); } parse_trie_add_string(pstate->pdquote_parse_trie, pstate->eof, EOF_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs, DQUOTE_IFS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_eof, DQUOTE_EOF_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX); pstate->pfields = rslls_alloc(); pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE); pstate->pbr = stdio_byte_reader_alloc(); pstate->pfr = pfr_alloc(pstate->pbr, mlr_imax3( pstate->putf8_bom_parse_trie->maxlen, pstate->pno_dquote_parse_trie->maxlen, pstate->pdquote_parse_trie->maxlen)); pstate->expect_header_line_next = use_implicit_header ? FALSE : TRUE; pstate->use_implicit_header = use_implicit_header; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = lrec_reader_stdio_csv_open; plrec_reader->pclose_func = lrec_reader_stdio_csv_close; plrec_reader->pprocess_func = lrec_reader_stdio_csv_process; plrec_reader->psof_func = lrec_reader_stdio_csv_sof; plrec_reader->pfree_func = lrec_reader_stdio_csv_free; return plrec_reader; }