// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_csvex_alloc(byte_reader_t* pbr, char irs, char ifs) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_csvex_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_csvex_state_t)); pstate->ilno = 0LL; pstate->irs = "\r\n"; // xxx multi-byte the cli irs/ifs/etc, and integrate here pstate->ifs = ","; // xxx multi-byte the cli irs/ifs/etc, and integrate here pstate->dquote_irs = mlr_paste_2_strings("\"", pstate->irs); pstate->dquote_ifs = mlr_paste_2_strings("\"", pstate->ifs); pstate->dquote_eof = "\"\xff"; pstate->dquote = "\""; pstate->dquote_dquote = "\"\""; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->irs_len = strlen(pstate->irs); pstate->ifs_len = strlen(pstate->ifs); pstate->dquote_irs_len = strlen(pstate->dquote_irs); pstate->dquote_ifs_len = strlen(pstate->dquote_ifs); pstate->dquote_eof_len = strlen(pstate->dquote_eof); pstate->dquote_len = strlen(pstate->dquote); pstate->dquote_dquote_len = strlen(pstate->dquote_dquote); pstate->ifs_eof_len = strlen(pstate->ifs_eof); pstate->peek_buf_len = pstate->irs_len; pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->ifs_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_irs_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_ifs_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_eof_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->dquote_dquote_len); pstate->peek_buf_len = mlr_imax2(pstate->peek_buf_len, pstate->ifs_eof_len); pstate->peek_buf_len += 2; sb_init(&pstate->sb, STRING_BUILDER_INIT_SIZE); pstate->psb = &pstate->sb; pstate->pbr = pbr; pstate->pfr = NULL; // xxx allocate the parse-tries here -- one for dquote only, // the second for non-dquote-after-that, the third for dquoted-after-that. pstate->expect_header_line_next = TRUE; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = &file_reader_stdio_vopen; plrec_reader->pclose_func = &file_reader_stdio_vclose; plrec_reader->pprocess_func = &lrec_reader_csvex_process; plrec_reader->psof_func = &lrec_reader_csvex_sof; plrec_reader->pfree_func = &lrec_reader_csvex_free; return plrec_reader; }
// ---------------------------------------------------------------- static char * test_paste() { mu_assert("error: paste 2", streq(mlr_paste_2_strings("ab", "cd"), "abcd")); mu_assert("error: paste 3", streq(mlr_paste_3_strings("ab", "cd", "ef"), "abcdef")); mu_assert("error: paste 4", streq(mlr_paste_4_strings("ab", "cd", "ef", "gh"), "abcdefgh")); mu_assert("error: paste 5", streq(mlr_paste_5_strings("ab", "cd", "ef", "gh", "ij"), "abcdefghij")); return 0; }
// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_mmap_csv_alloc(char* irs, char* ifs, int use_implicit_header) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_mmap_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_mmap_csv_state_t)); pstate->ilno = 0LL; pstate->eof = "\xff"; pstate->irs = irs; pstate->ifs = ifs; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->dquote = "\""; pstate->dquote_irs = mlr_paste_2_strings("\"", pstate->irs); pstate->dquote_ifs = mlr_paste_2_strings("\"", pstate->ifs); pstate->dquote_eof = "\"\xff"; pstate->dquote_dquote = "\"\""; pstate->dquotelen = strlen(pstate->dquote); pstate->pno_dquote_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs, IRS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs, IFS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote, DQUOTE_STRIDX); pstate->pdquote_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs, DQUOTE_IFS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX); pstate->pfields = rslls_alloc(); pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE); pstate->expect_header_line_next = use_implicit_header ? FALSE : TRUE; pstate->use_implicit_header = use_implicit_header; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = file_reader_mmap_vopen; plrec_reader->pclose_func = file_reader_mmap_vclose; plrec_reader->pprocess_func = lrec_reader_mmap_csv_process; plrec_reader->psof_func = lrec_reader_mmap_csv_sof; plrec_reader->pfree_func = lrec_reader_mmap_csv_free; return plrec_reader; }
static step_t* step_counter_alloc(char* input_field_name) { step_t* pstep = mlr_malloc_or_die(sizeof(step_t)); step_counter_state_t* pstate = mlr_malloc_or_die(sizeof(step_counter_state_t)); pstate->counter = 0LL; pstate->output_field_name = mlr_paste_2_strings(input_field_name, "_counter"); pstep->pvstate = (void*)pstate; pstep->psprocess_func = &step_counter_sprocess; pstep->pdprocess_func = NULL; return pstep; }
static step_t* step_rsum_alloc(char* input_field_name) { step_t* pstep = mlr_malloc_or_die(sizeof(step_t)); step_rsum_state_t* pstate = mlr_malloc_or_die(sizeof(step_rsum_state_t)); pstate->rsum = 0.0; pstate->output_field_name = mlr_paste_2_strings(input_field_name, "_rsum"); pstep->pvstate = (void*)pstate; pstep->psprocess_func = NULL; pstep->pdprocess_func = &step_rsum_dprocess; return pstep; }
static step_t* step_delta_alloc(char* input_field_name) { step_t* pstep = mlr_malloc_or_die(sizeof(step_t)); step_delta_state_t* pstate = mlr_malloc_or_die(sizeof(step_delta_state_t)); pstate->prev = -999.0; pstate->have_prev = FALSE; pstate->output_field_name = mlr_paste_2_strings(input_field_name, "_delta"); pstep->pvstate = (void*)pstate; pstep->psprocess_func = NULL; pstep->pdprocess_func = &step_delta_dprocess; return pstep; }
// ---------------------------------------------------------------- lrec_reader_t* lrec_reader_stdio_csv_alloc(char* irs, char* ifs, int use_implicit_header, comment_handling_t comment_handling, char* comment_string) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_stdio_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_stdio_csv_state_t)); pstate->ilno = 0LL; pstate->do_auto_line_term = FALSE; if (streq(irs, "auto")) { irs = "\n"; pstate->do_auto_line_term = TRUE; } pstate->comment_handling = comment_handling; pstate->comment_string = comment_string; pstate->comment_string_length = comment_string == NULL ? 0 : strlen(comment_string); pstate->eof = "\xff"; pstate->irs = irs; pstate->ifs = ifs; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->dquote = "\""; pstate->dquote_ifs = mlr_paste_2_strings("\"", pstate->ifs); pstate->dquote_eof = "\"\xff"; pstate->dquote_dquote = "\"\""; pstate->dquotelen = strlen(pstate->dquote); // Parse trie for UTF-8 BOM pstate->putf8_bom_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->putf8_bom_parse_trie, UTF8_BOM, UTF8_BOM_STRIDX); // Parse trie for non-double-quoted fields pstate->pno_dquote_parse_trie = parse_trie_alloc(); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->eof, EOF_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->irs, IRS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs_eof, IFS_EOF_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->ifs, IFS_STRIDX); parse_trie_add_string(pstate->pno_dquote_parse_trie, pstate->dquote, DQUOTE_STRIDX); // Parse trie for double-quoted fields pstate->pdquote_parse_trie = parse_trie_alloc(); if (pstate->do_auto_line_term) { pstate->dquote_irs = mlr_paste_2_strings("\"", "\n"); pstate->dquote_irs2 = mlr_paste_2_strings("\"", "\r\n"); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs2, DQUOTE_IRS2_STRIDX); } else { pstate->dquote_irs = mlr_paste_2_strings("\"", pstate->irs); pstate->dquote_irs2 = NULL; parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); } parse_trie_add_string(pstate->pdquote_parse_trie, pstate->eof, EOF_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_irs, DQUOTE_IRS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_ifs, DQUOTE_IFS_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_eof, DQUOTE_EOF_STRIDX); parse_trie_add_string(pstate->pdquote_parse_trie, pstate->dquote_dquote, DQUOTE_DQUOTE_STRIDX); pstate->pfields = rslls_alloc(); pstate->psb = sb_alloc(STRING_BUILDER_INIT_SIZE); pstate->pbr = stdio_byte_reader_alloc(); pstate->pfr = pfr_alloc(pstate->pbr, mlr_imax3( pstate->putf8_bom_parse_trie->maxlen, pstate->pno_dquote_parse_trie->maxlen, pstate->pdquote_parse_trie->maxlen)); pstate->expect_header_line_next = use_implicit_header ? FALSE : TRUE; pstate->use_implicit_header = use_implicit_header; pstate->pheader_keeper = NULL; pstate->pheader_keepers = lhmslv_alloc(); plrec_reader->pvstate = (void*)pstate; plrec_reader->popen_func = lrec_reader_stdio_csv_open; plrec_reader->pclose_func = lrec_reader_stdio_csv_close; plrec_reader->pprocess_func = lrec_reader_stdio_csv_process; plrec_reader->psof_func = lrec_reader_stdio_csv_sof; plrec_reader->pfree_func = lrec_reader_stdio_csv_free; return plrec_reader; }