示例#1
0
std::shared_ptr<AbstractTable> CSVInput::load(std::shared_ptr<AbstractTable> intable, const compound_metadata_list *meta, const Loader::params &args) {
  cb_data data(intable->columnCount(), _parameters.getUnsafe());
  data.table = intable;
  csv::params params(_parameters.getCSVParams());

  if (detectHeader(args.getBasePath() + _filename)) params.setLineStart(5);

  // Resize the table based on the file size
  data.table->resize(countLines(args.getBasePath() + _filename) - params.getLineStart() + 1);

  try {
    csv::genericParseFile(args.getBasePath() + _filename,
                          (field_cb_t) cb_per_field,
                          (line_cb_t) cb_per_line,
                          &data,
                          params);
  } catch (const csv::ParserError &e) {
    throw Loader::Error(e.what());
  }

  data.table->resize(data.row);
  return intable;
}
示例#2
0
std::shared_ptr<storage::AbstractTable> RawTableLoader::load(std::shared_ptr<storage::AbstractTable> in,
        const storage::compound_metadata_list *ml,
        const Loader::params &args) {



    csv::params params;
    if (detectHeader(args.getBasePath() + _filename)) params.setLineStart(5);

    // Create the result table
    storage::metadata_vec_t v(in->columnCount());
    for(size_t i=0; i < in->columnCount(); ++i) {
        v[i] = in->metadataAt(i);
    }
    auto result = std::make_shared<storage::RawTable>(v);

    // CSV Parsing
    std::ifstream file(args.getBasePath() + _filename, std::ios::binary);
    if (!file || file.bad()) {
        throw csv::ParserError("CSV file '" + _filename + "' does not exist");
    }

    struct csv_parser parser;

    if (!csv_init(&parser, 0)) {
        csv_set_opts(&parser, CSV_APPEND_NULL);
        csv_set_delim(&parser, params.getDelimiter());

        // If there is a header in the file, we will ignore it
        std::string line;
        int line_start = params.getLineStart();

        if (line_start != 1) {
            while (line_start > 1) {
                std::getline(file, line);
                --line_start;
            }
        }

        // Prepare cb data handler
        struct raw_table_cb_data data(v);
        data.table = result;

        const size_t block_size = 16 * 1024;
        char rdbuf [block_size];

        while (file.read(rdbuf, block_size).good()) {
            auto extracted = file.gcount();
            if (extracted == 0)
                break;

            if (csv_parse(&parser,
                          rdbuf,
                          extracted,
                          (field_cb_t) raw_table_cb_per_field,
                          (line_cb_t) raw_table_cb_per_line,
                          (void*) &data) != (size_t) extracted) {
                throw csv::ParserError(csv_strerror(csv_error(&parser)));
            }
        }

        // Parse the rest
        if (csv_parse(&parser,
                      rdbuf,
                      file.gcount(),
                      (field_cb_t) raw_table_cb_per_field,
                      (line_cb_t) raw_table_cb_per_line,
                      (void*) &data) != (size_t) file.gcount()) {
            throw csv::ParserError(csv_strerror(csv_error(&parser)));
        }

        csv_fini(&parser,
                 (field_cb_t) raw_table_cb_per_field,
                 (line_cb_t) raw_table_cb_per_line,
                 (void*) &data);

    }
    csv_free(&parser);
    return result;
}