static lrec_t* lrec_reader_mmap_csvlite_process_multi_seps(void* pvstate, void* pvhandle, context_t* pctx) {
	file_reader_mmap_state_t* phandle = pvhandle;
	lrec_reader_mmap_csvlite_state_t* pstate = pvstate;

	while (TRUE) {
		if (pstate->expect_header_line_next) {

			slls_t* pheader_fields = lrec_reader_mmap_csvlite_get_header_multi_seps(phandle, pstate);
			if (pheader_fields == NULL) // EOF
				return NULL;

			pstate->expect_header_line_next = FALSE;

			pstate->pheader_keeper = lhmslv_get(pstate->pheader_keepers, pheader_fields);
			if (pstate->pheader_keeper == NULL) {
				pstate->pheader_keeper = header_keeper_alloc(NULL, pheader_fields);
				lhmslv_put(pstate->pheader_keepers, pheader_fields, pstate->pheader_keeper);
			} else { // Re-use the header-keeper in the header cache
				slls_free(pheader_fields);
			}
		}

		int end_of_stanza = FALSE;
		lrec_t* prec = lrec_reader_mmap_csvlite_get_record_multi_seps(phandle, pstate, pstate->pheader_keeper,
			&end_of_stanza);
		if (end_of_stanza) {
			pstate->expect_header_line_next = TRUE;
		} else if (prec == NULL) { // EOF
			return NULL;
		} else {
			return prec;
		}
	}
}
Beispiel #2
0
static lrec_t* lrec_reader_csvex_process(void* pvhandle, void* pvstate, context_t* pctx) {
	lrec_reader_csvex_state_t* pstate = pvstate;

//	xxx byte-reader open ...
//	if (pstate->pfr == NULL) {
//		pstate->pfr = pfr_alloc((FILE*)pvhandle, pstate->peek_buf_len);
//	}

	if (pstate->expect_header_line_next) {
		slls_t* pheader_fields = lrec_reader_csvex_get_fields(pstate);
		if (pheader_fields == NULL)
			return NULL;
		pstate->ilno++;
		pstate->expect_header_line_next = FALSE;

		pstate->pheader_keeper = lhmslv_get(pstate->pheader_keepers, pheader_fields);
		if (pstate->pheader_keeper == NULL) {
			pstate->pheader_keeper = header_keeper_alloc(NULL, pheader_fields);
			lhmslv_put(pstate->pheader_keepers, pheader_fields, pstate->pheader_keeper);
		} else { // Re-use the header-keeper in the header cache
			slls_free(pheader_fields);
		}
	}
	pstate->ilno++;

	slls_t* pdata_fields = lrec_reader_csvex_get_fields(pstate);
	return paste_header_and_data(pstate, pdata_fields);
}
Beispiel #3
0
// ----------------------------------------------------------------
static char* test_lrec_csv_api() {
	char* hdr_line = strdup("w,x,y,z");
	slls_t* hdr_fields = split_csvlite_header_line(hdr_line, ',', FALSE);
	header_keeper_t* pheader_keeper = header_keeper_alloc(hdr_line, hdr_fields);

	char* data_line_1 = strdup("2,3,4,5");
	lrec_t* prec_1 = lrec_parse_stdio_csvlite_data_line(pheader_keeper, data_line_1, ',', FALSE);

	char* data_line_2 = strdup("6,7,8,9");
	lrec_t* prec_2 = lrec_parse_stdio_csvlite_data_line(pheader_keeper, data_line_2, ',', FALSE);

	mu_assert_lf(prec_1->field_count == 4);
	mu_assert_lf(prec_2->field_count == 4);

	mu_assert_lf(streq(lrec_get(prec_1, "w"), "2"));
	mu_assert_lf(streq(lrec_get(prec_1, "x"), "3"));
	mu_assert_lf(streq(lrec_get(prec_1, "y"), "4"));
	mu_assert_lf(streq(lrec_get(prec_1, "z"), "5"));

	mu_assert_lf(streq(lrec_get(prec_2, "w"), "6"));
	mu_assert_lf(streq(lrec_get(prec_2, "x"), "7"));
	mu_assert_lf(streq(lrec_get(prec_2, "y"), "8"));
	mu_assert_lf(streq(lrec_get(prec_2, "z"), "9"));

	lrec_remove(prec_1, "w");
	mu_assert_lf(prec_1->field_count == 3);
	mu_assert_lf(prec_2->field_count == 4);
	mu_assert_lf(lrec_get(prec_1, "w") == NULL);
	mu_assert_lf(streq(lrec_get(prec_2, "w"), "6"));

	// Non-replacing-rename case
	//lrec_dump_titled("Before rename", prec_1);
	lrec_rename(prec_1, "x", "u");
	//lrec_dump_titled("After rename", prec_1);
	mu_assert_lf(prec_1->field_count == 3);
	mu_assert_lf(lrec_get(prec_1, "x") == NULL);
	mu_assert_lf(streq(lrec_get(prec_1, "u"), "3"));

	// Replacing-rename case
	//lrec_dump_titled("Before rename", prec_2);
	lrec_rename(prec_2, "y", "z");
	//lrec_dump_titled("After rename", prec_2);

	mu_assert_lf(prec_2->field_count == 3);
	mu_assert_lf(streq(lrec_get(prec_2, "w"), "6"));
	mu_assert_lf(streq(lrec_get(prec_2, "x"), "7"));
	mu_assert_lf(lrec_get(prec_2, "y") == NULL);
	mu_assert_lf(streq(lrec_get(prec_2, "z"), "8"));

	lrec_free(prec_1);
	lrec_free(prec_2);

	// xxx need a test case for alloc1,free1,alloc2,free2 w/ same hdr.
	return NULL;
}
static lrec_t* lrec_reader_stdio_csv_process(void* pvhandle, void* pvstate, context_t* pctx) {
	FILE* input_stream = pvhandle;
	lrec_reader_stdio_csv_state_t* pstate = pvstate;

	while (TRUE) {
		if (pstate->expect_header_line_next) {
			// xxx cmt
			while (TRUE) {
				char* hline = mlr_get_line(input_stream, pstate->irs);
				if (hline == NULL) // EOF
					return NULL;
				pstate->ilno++;

				slls_t* pheader_fields = split_csv_header_line(hline, pstate->ifs, pstate->allow_repeat_ifs);
				if (pheader_fields->length == 0) {
					pstate->expect_header_line_next = TRUE;
					if (pstate->pheader_keeper != NULL) {
						pstate->pheader_keeper = NULL;
					}
				} else {
					pstate->expect_header_line_next = FALSE;

					pstate->pheader_keeper = lhmslv_get(pstate->pheader_keepers, pheader_fields);
					if (pstate->pheader_keeper == NULL) {
						pstate->pheader_keeper = header_keeper_alloc(hline, pheader_fields);
						lhmslv_put(pstate->pheader_keepers, pheader_fields, pstate->pheader_keeper);
					} else { // Re-use the header-keeper in the header cache
						slls_free(pheader_fields);
						free(hline);
					}
					break;
				}
			}
		}

		char* line = mlr_get_line(input_stream, pstate->irs);
		if (line == NULL) // EOF
			return NULL;

		// xxx empty-line check ... make a lib func is_empty_modulo_whitespace().
		if (!*line) {
			if (pstate->pheader_keeper != NULL) {
				pstate->pheader_keeper = NULL;
				pstate->expect_header_line_next = TRUE;
				free(line);
				continue;
			}
		} else {
			pstate->ifnr++;
			return lrec_parse_stdio_csv_data_line(pstate->pheader_keeper, line, pstate->ifs, pstate->allow_repeat_ifs);
		}
	}
}
Beispiel #5
0
// ----------------------------------------------------------------
static lrec_t* lrec_reader_mmap_csv_process(void* pvstate, void* pvhandle, context_t* pctx) {
	lrec_reader_mmap_csv_state_t* pstate = pvstate;
	file_reader_mmap_state_t* phandle = pvhandle;

	if (pstate->expect_header_line_next) {
		if (!lrec_reader_mmap_csv_get_fields(pstate, pstate->pfields, phandle))
			return NULL;
		pstate->ilno++;

		slls_t* pheader_fields = slls_alloc();
		int i = 0;
		for (rsllse_t* pe = pstate->pfields->phead; i < pstate->pfields->length && pe != NULL; pe = pe->pnext, i++) {
			if (*pe->value == 0) {
				fprintf(stderr, "%s: unacceptable empty CSV key at file \"%s\" line %lld.\n",
					MLR_GLOBALS.argv0, pctx->filename, pstate->ilno);
				exit(1);
			}
			// Transfer pointer-free responsibility from the rslls to the
			// header fields in the header keeper
			slls_append(pheader_fields, pe->value, pe->free_flag);
			pe->free_flag = 0;
		}
		rslls_reset(pstate->pfields);

		pstate->pheader_keeper = lhmslv_get(pstate->pheader_keepers, pheader_fields);
		if (pstate->pheader_keeper == NULL) {
			pstate->pheader_keeper = header_keeper_alloc(NULL, pheader_fields);
			lhmslv_put(pstate->pheader_keepers, pheader_fields, pstate->pheader_keeper,
				NO_FREE); // freed by header-keeper
		} else { // Re-use the header-keeper in the header cache
			slls_free(pheader_fields);
		}

		pstate->expect_header_line_next = FALSE;
	}
	int rc = lrec_reader_mmap_csv_get_fields(pstate, pstate->pfields, phandle);
	pstate->ilno++;
	if (rc == FALSE) // EOF
		return NULL;
	else {
		lrec_t* prec = pstate->use_implicit_header
			? paste_indices_and_data(pstate, pstate->pfields, pctx)
			: paste_header_and_data(pstate, pstate->pfields, pctx);
		rslls_reset(pstate->pfields);
		return prec;
	}
}
// ----------------------------------------------------------------
static lrec_t* lrec_reader_stdio_csvlite_process(void* pvstate, void* pvhandle, context_t* pctx) {
	FILE* input_stream = pvhandle;
	lrec_reader_stdio_csvlite_state_t* pstate = pvstate;

	while (TRUE) {
		if (pstate->expect_header_line_next) {
			while (TRUE) {
				char* hline = (pstate->irslen == 1)
					? mlr_get_cline(input_stream, pstate->irs[0])
					: mlr_get_sline(input_stream, pstate->irs, pstate->irslen);
				if (hline == NULL) // EOF
					return NULL;
				pstate->ilno++;

				slls_t* pheader_fields = (pstate->ifslen == 1)
					? split_csvlite_header_line_single_ifs(hline, pstate->ifs[0], pstate->allow_repeat_ifs)
					: split_csvlite_header_line_multi_ifs(hline, pstate->ifs, pstate->ifslen, pstate->allow_repeat_ifs);
				if (pheader_fields->length == 0) {
					pstate->expect_header_line_next = TRUE;
					if (pstate->pheader_keeper != NULL) {
						pstate->pheader_keeper = NULL;
					}
				} else {
					for (sllse_t* pe = pheader_fields->phead; pe != NULL; pe = pe->pnext) {
						if (*pe->value == 0) {
							fprintf(stderr, "%s: unacceptable empty CSV key at file \"%s\" line %lld.\n",
								MLR_GLOBALS.argv0, pctx->filename, pstate->ilno);
							exit(1);
						}
					}

					pstate->expect_header_line_next = FALSE;

					pstate->pheader_keeper = lhmslv_get(pstate->pheader_keepers, pheader_fields);
					if (pstate->pheader_keeper == NULL) {
						pstate->pheader_keeper = header_keeper_alloc(hline, pheader_fields);
						lhmslv_put(pstate->pheader_keepers, pheader_fields, pstate->pheader_keeper,
							NO_FREE); // freed by header-keeper
					} else { // Re-use the header-keeper in the header cache
						slls_free(pheader_fields);
						free(hline);
					}
					break;
				}
			}
		}

		char* line = (pstate->irslen == 1)
			? mlr_get_cline(input_stream, pstate->irs[0])
			: mlr_get_sline(input_stream, pstate->irs, pstate->irslen);
		if (line == NULL) // EOF
			return NULL;
		pstate->ilno++;

		if (!*line) {
			if (pstate->pheader_keeper != NULL) {
				pstate->pheader_keeper = NULL;
				pstate->expect_header_line_next = TRUE;
				free(line);
				continue;
			}
		} else {
			pstate->ifnr++;
			if (pstate->ifslen == 1) {
				return pstate->use_implicit_header
					? lrec_parse_stdio_csvlite_data_line_single_ifs_implicit_header(
						pstate->pheader_keeper, pctx->filename, pstate->ilno, line,
						pstate->ifs[0], pstate->allow_repeat_ifs)
					:  lrec_parse_stdio_csvlite_data_line_single_ifs(pstate->pheader_keeper, pctx->filename, pstate->ilno, line,
						pstate->ifs[0], pstate->allow_repeat_ifs);
			} else {
				return pstate->use_implicit_header
					? lrec_parse_stdio_csvlite_data_line_multi_ifs_implicit_header(
						pstate->pheader_keeper, pctx->filename, pstate->ilno, line,
						pstate->ifs, pstate->ifslen, pstate->allow_repeat_ifs)
					: lrec_parse_stdio_csvlite_data_line_multi_ifs(pstate->pheader_keeper, pctx->filename, pstate->ilno, line,
						pstate->ifs, pstate->ifslen, pstate->allow_repeat_ifs);
			}
		}
	}
}
// ----------------------------------------------------------------
static lrec_t* lrec_reader_stdio_csv_process(void* pvstate, void* pvhandle, context_t* pctx) {
	lrec_reader_stdio_csv_state_t* pstate = pvstate;

	// Ingest the next header line, if expected
	if (pstate->expect_header_line_next) {
		while (TRUE) {
			if (!lrec_reader_stdio_csv_get_fields(pstate, pstate->pfields, pctx, TRUE))
				return NULL;
			pstate->ilno++;

			// We check for comments here rather than within the parser since it's important
			// for users to be able to comment out lines containing double-quoted newlines.
			if (pstate->comment_string != NULL && pstate->pfields->phead != NULL) {
				if (streqn(pstate->pfields->phead->value, pstate->comment_string, pstate->comment_string_length)) {
					if (pstate->comment_handling == PASS_COMMENTS) {
						int i = 0;
						for (
							rsllse_t* pe = pstate->pfields->phead;
							i < pstate->pfields->length && pe != NULL;
							pe = pe->pnext, i++)
						{
							if (i > 0)
								fputs(pstate->ifs, stdout);
							fputs(pe->value, stdout);
						}
						if (pstate->do_auto_line_term) {
							fputs(pctx->auto_line_term, stdout);
						} else {
							fputs(pstate->irs, stdout);
						}
					}
					rslls_reset(pstate->pfields);
					continue;
				}
			}

			slls_t* pheader_fields = slls_alloc();
			int i = 0;
			for (rsllse_t* pe = pstate->pfields->phead; i < pstate->pfields->length && pe != NULL; pe = pe->pnext) {
				if (*pe->value == 0) {
					fprintf(stderr, "%s: unacceptable empty CSV key at file \"%s\" line %lld.\n",
						MLR_GLOBALS.bargv0, pctx->filename, pstate->ilno);
					exit(1);
				}
				// Transfer pointer-free responsibility from the rslls to the
				// header fields in the header keeper
				slls_append(pheader_fields, pe->value, pe->free_flag);
				pe->free_flag = 0;
			}
			rslls_reset(pstate->pfields);

			pstate->pheader_keeper = lhmslv_get(pstate->pheader_keepers, pheader_fields);
			if (pstate->pheader_keeper == NULL) {
				pstate->pheader_keeper = header_keeper_alloc(NULL, pheader_fields);
				lhmslv_put(pstate->pheader_keepers, pheader_fields, pstate->pheader_keeper,
					NO_FREE); // freed by header-keeper
			} else { // Re-use the header-keeper in the header cache
				slls_free(pheader_fields);
			}

			pstate->expect_header_line_next = FALSE;
			break;
		}
	}

	// Ingest the next data line, if expected
	while (TRUE) {
		int rc = lrec_reader_stdio_csv_get_fields(pstate, pstate->pfields, pctx, FALSE);
		pstate->ilno++;
		if (rc == FALSE) // EOF
			return NULL;

		// We check for comments here rather than within the parser since it's important
		// for users to be able to comment out lines containing double-quoted newlines.
		if (pstate->comment_string != NULL && pstate->pfields->phead != NULL) {
			if (streqn(pstate->pfields->phead->value, pstate->comment_string, pstate->comment_string_length)) {
				if (pstate->comment_handling == PASS_COMMENTS) {
					int i = 0;
					for (
						rsllse_t* pe = pstate->pfields->phead;
						i < pstate->pfields->length && pe != NULL;
						pe = pe->pnext, i++)
					{
						if (i > 0)
							fputs(pstate->ifs, stdout);
						fputs(pe->value, stdout);
					}
					if (pstate->do_auto_line_term) {
						fputs(pctx->auto_line_term, stdout);
					} else {
						fputs(pstate->irs, stdout);
					}
				}
				rslls_reset(pstate->pfields);
				continue;
			}
		}

		lrec_t* prec =  pstate->use_implicit_header
			? paste_indices_and_data(pstate, pstate->pfields, pctx)
			: paste_header_and_data(pstate, pstate->pfields, pctx);
		rslls_reset(pstate->pfields);
		return prec;
	}
}