Loader::params::params(const Loader::params &other) : Factory(other.getFactory()), BasePath(other.getBasePath()), InsertOnly(other.getInsertOnly()), ModifiableMutableVerticalTable(other.getModifiableMutableVerticalTable()), ReturnsMutableVerticalTable(other.getReturnsMutableVerticalTable()), Compressed(other.getCompressed()) { if (other.Input != nullptr) Input = other.Input->clone(); if (other.Header != nullptr) Header = other.Header->clone(); if (other.ReferenceTable != nullptr) ReferenceTable = other.ReferenceTable; }
std::shared_ptr<AbstractTable> CSVInput::load(std::shared_ptr<AbstractTable> intable, const compound_metadata_list *meta, const Loader::params &args) { cb_data data(intable->columnCount(), _parameters.getUnsafe()); data.table = intable; csv::params params(_parameters.getCSVParams()); if (detectHeader(args.getBasePath() + _filename)) params.setLineStart(5); // Resize the table based on the file size data.table->resize(countLines(args.getBasePath() + _filename) - params.getLineStart() + 1); try { csv::genericParseFile(args.getBasePath() + _filename, (field_cb_t) cb_per_field, (line_cb_t) cb_per_line, &data, params); } catch (const csv::ParserError &e) { throw Loader::Error(e.what()); } data.table->resize(data.row); return intable; }
std::shared_ptr<storage::AbstractTable> RawTableLoader::load(std::shared_ptr<storage::AbstractTable> in, const storage::compound_metadata_list *ml, const Loader::params &args) { csv::params params; if (detectHeader(args.getBasePath() + _filename)) params.setLineStart(5); // Create the result table storage::metadata_vec_t v(in->columnCount()); for(size_t i=0; i < in->columnCount(); ++i) { v[i] = in->metadataAt(i); } auto result = std::make_shared<storage::RawTable>(v); // CSV Parsing std::ifstream file(args.getBasePath() + _filename, std::ios::binary); if (!file || file.bad()) { throw csv::ParserError("CSV file '" + _filename + "' does not exist"); } struct csv_parser parser; if (!csv_init(&parser, 0)) { csv_set_opts(&parser, CSV_APPEND_NULL); csv_set_delim(&parser, params.getDelimiter()); // If there is a header in the file, we will ignore it std::string line; int line_start = params.getLineStart(); if (line_start != 1) { while (line_start > 1) { std::getline(file, line); --line_start; } } // Prepare cb data handler struct raw_table_cb_data data(v); data.table = result; const size_t block_size = 16 * 1024; char rdbuf [block_size]; while (file.read(rdbuf, block_size).good()) { auto extracted = file.gcount(); if (extracted == 0) break; if (csv_parse(&parser, rdbuf, extracted, (field_cb_t) raw_table_cb_per_field, (line_cb_t) raw_table_cb_per_line, (void*) &data) != (size_t) extracted) { throw csv::ParserError(csv_strerror(csv_error(&parser))); } } // Parse the rest if (csv_parse(&parser, rdbuf, file.gcount(), (field_cb_t) raw_table_cb_per_field, (line_cb_t) raw_table_cb_per_line, (void*) &data) != (size_t) file.gcount()) { throw csv::ParserError(csv_strerror(csv_error(&parser))); } csv_fini(&parser, (field_cb_t) raw_table_cb_per_field, (line_cb_t) raw_table_cb_per_line, (void*) &data); } csv_free(&parser); return result; }