Esempio n. 1
0
void genericLineBasedParsing(
    std::istream &file,
    field_cb_t cb_per_field,
    line_cb_t cb_per_line,
    void *data,
    const csv::params &params) {
  struct csv_parser parser;

  if (!csv_init(&parser, 0)) {
    csv_set_opts(&parser, CSV_APPEND_NULL);
    csv_set_delim(&parser, params.getDelimiter());

    std::string line;
    int line_start = params.getLineStart();

    if (line_start != 1) {
      while (line_start > 1) {
        std::getline(file, line);
        --line_start;
      }
    }


    int lineCount = 0;
    while (std::getline(file, line)) {

      ++lineCount;
      line.append("\n");
      if (csv_parse(&parser,
                    line.c_str(),
                    line.size(),
                    cb_per_field,
                    cb_per_line,
                    data) != line.size()) {
        throw ParserError(csv_strerror(csv_error(&parser)));
      }

      if (params.getLineCount() != -1 && lineCount >= params.getLineCount())
        break;

      if (file.bad())
        break;
    }

    csv_fini(&parser,
             cb_per_field,
             cb_per_line,
             data);
  }
  csv_free(&parser);

}
Esempio n. 2
0
/* ==================================================================
 * Parser for comma-separated argument  list
 * ================================================================== */
void ParseVarList(int nLn, FILE *flp, char *dataName, char *leftPart, char *argString)	
{

  struct csv_parser p;
  unsigned char options = 0;
  LIST_DATA ldata;

  // fill in data for the callback
  memset(&ldata, '\x0', sizeof(LIST_DATA));
  ldata.nLn = nLn;
  ldata.flp = flp;
  ldata.cnt = 0;
  strcpy(ldata.dataName, dataName);
  strcpy(ldata.lp, leftPart);


  // Initialize csv parser
  if (csv_init(&p, options) != 0)
  {
  	fprintf(stderr, "Failed to initialize csv parser\n");
  	return;
  }
  
  // set white space, eol and delimiter
  csv_set_space_func(&p, is_space_list);
  csv_set_term_func(&p, is_term_list);
  
  csv_set_delim(&p, ',');
  
  unsigned int agrLen = strlen(argString);

  fprintf(stderr, "ParseVarList: argString = %s argLen - %d\n", argString, agrLen);

  memset(inputsLst, '\x0', sizeof(inputsLst));								
  InpCnt = 0;
  
  if (csv_parse(&p, argString, strlen(argString), cbProcessListElement, NULL, &ldata) != agrLen)
  {
  	fprintf(stderr, "ParseVarList: %s\n", csv_strerror(csv_error(&p)));
  	return;
  }
  
  csv_fini(&p, cbProcessListElement, NULL, &ldata);
  csv_free(&p);
  
  return;
  
}
Esempio n. 3
0
int main (int argc, char ** argv){
	if (argc != 4){
		printf("Usage: %s nodes.csv ways.csv direct.csv\n",argv[0]);
		return 1;
	}
	char * nodescsvname = argv[1];
	char * wayscsvname = argv[2];
	char * directcsvname = argv[3];
	
	struct csv_parser parser;
	csv_init(&parser,CSV_APPEND_NULL);
	csv_set_delim(&parser,';');

	struct parse_t * p_struct;
	p_struct = malloc(sizeof(struct parse_t));
	p_struct->state=0;
	p_struct->count=0;
	p_struct->ok=1;
	GARY_INIT(p_struct->vertices,0);
	GARY_INIT(p_struct->edges,0);

	parseFile(nodescsvname,&parser,p_struct,node_item_cb,node_line_cb);

	nodesIdx_refresh(GARY_SIZE(p_struct->vertices),p_struct->vertices);
	
	parseFile(wayscsvname,&parser,p_struct,way_item_cb,way_line_cb);
	parseFile(directcsvname,&parser,p_struct,direct_item_cb,direct_line_cb);

	Graph__Graph * graph;
	graph = malloc(sizeof(Graph__Graph));
	graph__graph__init(graph);
	graph->n_edges=GARY_SIZE(p_struct->edges);
	graph->edges=p_struct->edges;
	graph->n_vertices=GARY_SIZE(p_struct->vertices);
	graph->vertices=p_struct->vertices;

	printf("Created graph with %d edges and %d vertices\n",graph->n_edges,graph->n_vertices);

	struct vertexedges_t * vedges;
	vedges = makeVertexEdges(graph);
	largestComponent(graph,vedges);
	saveSearchGraph(graph,"../data/postgis-graph.pbf");


}
Esempio n. 4
0
File: csv.cpp Progetto: Zuko/csv2xls
void csv_init_parser(csv_file_t &csvin)
{
#if CSV_MAJOR >= 3
#define PARSER_OPTIONS CSV_APPEND_NULL
#else
#define PARSER_OPTIONS 0
#endif
	unsigned char parser_options = PARSER_OPTIONS;

	if (csv_init(&csvin.csv_file_parser, parser_options) != 0)
	{
		fprintf(stderr, "Failed to initialize csv parser\n");
		exit(EXIT_FAILURE);
	}

	csv_set_space_func(&csvin.csv_file_parser, csv_is_space);

	csv_set_term_func(&csvin.csv_file_parser, csv_is_term);

	csv_set_delim(&csvin.csv_file_parser, csvin.tab_delimter);
}/* ----- end of function csv_init_parser ----- */
Esempio n. 5
0
void genericParse(
    /*std::istream &file,*/
    std::string filename,
    field_cb_t cb_per_field,
    line_cb_t cb_per_line,
    void *data,
    const csv::params &params
                  ) {
  // Open the file
  typedef std::unique_ptr<std::FILE, int (*)(std::FILE *)> unique_file_ptr;
  unique_file_ptr file(fopen(filename.c_str(), "rb"), fclose);
  if (!file) {
    throw ParserError(std::string("File Opening Failed") +  std::strerror(errno));
  }

  struct csv_parser parser;

  if (!csv_init(&parser, 0)) {
    csv_set_opts(&parser, CSV_APPEND_NULL);
    csv_set_delim(&parser, params.getDelimiter());

    int line_start = params.getLineStart();
    if (line_start > 1) {
      int c;
      do {
        c = fgetc(file.get());
        if ( c== '\n') --line_start;
      } while (c!= EOF  && line_start > 1);
    }

    // 1GB Buffer
    size_t block_size;
    if (getenv("HYRISE_LOAD_BLOCK_SIZE"))
      block_size = strtoul(getenv("HYRISE_LOAD_BLOCK_SIZE"), nullptr, 0);
    else
      block_size = 1024 * 1024;

    // Read from the buffer
    size_t readBytes = 0;
    char rdbuf[block_size];

    // Read the file until we cannot extract more bytes
    do {
      readBytes = fread(rdbuf, 1, block_size, file.get());
      if (csv_parse(&parser,
                    rdbuf,
                    readBytes,
                    cb_per_field,
                    cb_per_line,
                    data) != (size_t) readBytes) {
        throw ParserError(csv_strerror(csv_error(&parser)));
      }
    } while (readBytes == block_size);

    if (ferror(file.get())) {
      throw ParserError("Could not read file");
    }

    csv_fini(&parser,
             cb_per_field,
             cb_per_line,
             data);
  }
  csv_free(&parser);
}
Esempio n. 6
0
File: rcsv.c Progetto: fiksu/rcsv
/* An rb_rescue()-compatible Ruby pseudo-method that handles the actual parsing */
VALUE rcsv_raw_parse(VALUE ensure_container) {
  /* Unpacking multiple variables from a single Ruby VALUE */
  VALUE options = rb_ary_entry(ensure_container, 0);
  VALUE csvio   = rb_ary_entry(ensure_container, 1);
  struct rcsv_metadata * meta = (struct rcsv_metadata *)NUM2LONG(rb_ary_entry(ensure_container, 2));
  struct csv_parser * cp = (struct csv_parser *)NUM2LONG(rb_ary_entry(ensure_container, 3));

  /* Helper temporary variables */
  VALUE option, csvstr, buffer_size;

  /* libcsv-related temporary variables */
  char * csv_string;
  size_t csv_string_len;
  int error;

  /* Generic iterator */
  size_t i = 0;

  /* IO buffer size can be controller via an option */
  buffer_size = rb_hash_aref(options, ID2SYM(rb_intern("buffer_size")));

  /* By default, parse as Array of Arrays */
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_as_hash")));
  if (option && (option != Qnil)) {
    meta->row_as_hash = true;
  }

  /* :col_sep sets the column separator, default is comma (,) */
  option = rb_hash_aref(options, ID2SYM(rb_intern("col_sep")));
  if (option != Qnil) {
    csv_set_delim(cp, (unsigned char)*StringValuePtr(option));
  }

  /* :quote_char sets the character used for quoting data; default is double-quote (") */
  option = rb_hash_aref(options, ID2SYM(rb_intern("quote_char")));
  if (option != Qnil) {
    csv_set_quote(cp, (unsigned char)*StringValuePtr(option));
  }

  /* Specify how many rows to skip from the beginning of CSV */
  option = rb_hash_aref(options, ID2SYM(rb_intern("offset_rows")));
  if (option != Qnil) {
    meta->offset_rows = (size_t)NUM2INT(option);
  }

  /* Specify the character encoding of the input data */
  option = rb_hash_aref(options, ID2SYM(rb_intern("output_encoding")));
  if (option && (option != Qnil)) {
    meta->encoding_index = RB_ENC_FIND_INDEX(StringValueCStr(option));
  }

  /* :only_rows is a list of values where row is only parsed
     if its fields match those in the passed array.
     [nil, nil, ["ABC", nil, 1]] skips all rows where 3rd column isn't equal to "ABC", nil or 1 */
  option = rb_hash_aref(options, ID2SYM(rb_intern("only_rows")));
  if (option != Qnil) {
    meta->num_only_rows = (size_t)RARRAY_LEN(option);
    meta->only_rows = (VALUE *)malloc(meta->num_only_rows * sizeof(VALUE));

    for (i = 0; i < meta->num_only_rows; i++) {
      VALUE only_row = rb_ary_entry(option, i);
      meta->only_rows[i] = validate_filter_row("only_rows", only_row);
    }
  }

  /* :except_rows is a list of values where row is only parsed
     if its fields don't match those in the passed array.
     [nil, nil, ["ABC", nil, 1]] skips all rows where 3rd column is equal to "ABC", nil or 1 */
  option = rb_hash_aref(options, ID2SYM(rb_intern("except_rows")));
  if (option != Qnil) {
    meta->num_except_rows = (size_t)RARRAY_LEN(option);
    meta->except_rows = (VALUE *)malloc(meta->num_except_rows * sizeof(VALUE));

    for (i = 0; i < meta->num_except_rows; i++) {
      VALUE except_row = rb_ary_entry(option, i);
      meta->except_rows[i] = validate_filter_row("except_rows", except_row);
    }
  }

  /* :row_defaults is an array of default values that are assigned to fields containing empty strings
     according to matching field positions */
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_defaults")));
  if (option != Qnil) {
    meta->num_row_defaults = RARRAY_LEN(option);
    meta->row_defaults = (VALUE*)malloc(meta->num_row_defaults * sizeof(VALUE*));

    for (i = 0; i < meta->num_row_defaults; i++) {
      VALUE row_default = rb_ary_entry(option, i);
      meta->row_defaults[i] = row_default;
    }
  }

  /* :row_conversions specifies Ruby types that CSV field values should be converted into.
     Each char of row_conversions string represents Ruby type for CSV field with matching position. */
  option = rb_hash_aref(options, ID2SYM(rb_intern("row_conversions")));
  if (option != Qnil) {
    meta->num_row_conversions = RSTRING_LEN(option);
    meta->row_conversions = StringValuePtr(option);
  }

 /* Column names should be declared explicitly when parsing fields as Hashes */
  if (meta->row_as_hash) { /* Only matters for hash results */
    option = rb_hash_aref(options, ID2SYM(rb_intern("column_names")));
    if (option == Qnil) {
      rb_raise(rcsv_parse_error, ":row_as_hash requires :column_names to be set.");
    } else {
      meta->last_entry = rb_hash_new();

      meta->num_columns = (size_t)RARRAY_LEN(option);
      meta->column_names = (VALUE*)malloc(meta->num_columns * sizeof(VALUE*));

      for (i = 0; i < meta->num_columns; i++) {
        meta->column_names[i] = rb_ary_entry(option, i);
      }
    }
  } else {
    meta->last_entry = rb_ary_new();
  }

  while(true) {
    csvstr = rb_funcall(csvio, rb_intern("read"), 1, buffer_size);
    if ((csvstr == Qnil) || (RSTRING_LEN(csvstr) == 0)) { break; }

    csv_string = StringValuePtr(csvstr);
    csv_string_len = strlen(csv_string);

    /* Actual parsing and error handling */
    if (csv_string_len != csv_parse(cp, csv_string, csv_string_len,
                                    &end_of_field_callback, &end_of_line_callback, meta)) {
      error = csv_error(cp);
      switch(error) {
        case CSV_EPARSE:
          rb_raise(rcsv_parse_error, "Error when parsing malformed data");
          break;
        case CSV_ENOMEM:
          rb_raise(rcsv_parse_error, "No memory");
          break;
        case CSV_ETOOBIG:
          rb_raise(rcsv_parse_error, "Field data is too large");
          break;
        case CSV_EINVALID:
          rb_raise(rcsv_parse_error, "%s", (const char *)csv_strerror(error));
        break;
        default:
          rb_raise(rcsv_parse_error, "Failed due to unknown reason");
      }
    }
  }

  /* Flushing libcsv's buffer */
  csv_fini(cp, &end_of_field_callback, &end_of_line_callback, meta);

  return Qnil;
}
Esempio n. 7
0
std::shared_ptr<storage::AbstractTable> RawTableLoader::load(std::shared_ptr<storage::AbstractTable> in,
        const storage::compound_metadata_list *ml,
        const Loader::params &args) {



    csv::params params;
    if (detectHeader(args.getBasePath() + _filename)) params.setLineStart(5);

    // Create the result table
    storage::metadata_vec_t v(in->columnCount());
    for(size_t i=0; i < in->columnCount(); ++i) {
        v[i] = in->metadataAt(i);
    }
    auto result = std::make_shared<storage::RawTable>(v);

    // CSV Parsing
    std::ifstream file(args.getBasePath() + _filename, std::ios::binary);
    if (!file || file.bad()) {
        throw csv::ParserError("CSV file '" + _filename + "' does not exist");
    }

    struct csv_parser parser;

    if (!csv_init(&parser, 0)) {
        csv_set_opts(&parser, CSV_APPEND_NULL);
        csv_set_delim(&parser, params.getDelimiter());

        // If there is a header in the file, we will ignore it
        std::string line;
        int line_start = params.getLineStart();

        if (line_start != 1) {
            while (line_start > 1) {
                std::getline(file, line);
                --line_start;
            }
        }

        // Prepare cb data handler
        struct raw_table_cb_data data(v);
        data.table = result;

        const size_t block_size = 16 * 1024;
        char rdbuf [block_size];

        while (file.read(rdbuf, block_size).good()) {
            auto extracted = file.gcount();
            if (extracted == 0)
                break;

            if (csv_parse(&parser,
                          rdbuf,
                          extracted,
                          (field_cb_t) raw_table_cb_per_field,
                          (line_cb_t) raw_table_cb_per_line,
                          (void*) &data) != (size_t) extracted) {
                throw csv::ParserError(csv_strerror(csv_error(&parser)));
            }
        }

        // Parse the rest
        if (csv_parse(&parser,
                      rdbuf,
                      file.gcount(),
                      (field_cb_t) raw_table_cb_per_field,
                      (line_cb_t) raw_table_cb_per_line,
                      (void*) &data) != (size_t) file.gcount()) {
            throw csv::ParserError(csv_strerror(csv_error(&parser)));
        }

        csv_fini(&parser,
                 (field_cb_t) raw_table_cb_per_field,
                 (line_cb_t) raw_table_cb_per_line,
                 (void*) &data);

    }
    csv_free(&parser);
    return result;
}
Esempio n. 8
0
readstat_error_t readstat_parse_csv(readstat_parser_t *parser, const char *path, const char *jsonpath, struct csv_metadata* md, void *user_ctx) {
    readstat_error_t retval = READSTAT_OK;
    readstat_io_t *io = parser->io;
    size_t file_size = 0;
    size_t bytes_read;
    struct csv_parser csvparser;
    struct csv_parser *p = &csvparser;
    char buf[BUFSIZ];
    size_t* column_width = md->column_width;
    md->pass = column_width ? 2 : 1;
    md->open_row = 0;
    md->columns = 0;
    md->_rows = md->rows;
    md->rows = 0;
    md->parser = parser;
    md->user_ctx = user_ctx;
    md->json_md = NULL;

    if ((md->json_md = get_json_metadata(jsonpath)) == NULL) {
        fprintf(stderr, "Could not get JSON metadata\n");
        retval = READSTAT_ERROR_PARSE;
        goto cleanup;
    }

    if (io->open(path, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }

    file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx);
    if (file_size == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) {
        retval = READSTAT_ERROR_SEEK;
        goto cleanup;
    }

    if (csv_init(p, CSV_APPEND_NULL) != 0)
    {
        retval = READSTAT_ERROR_OPEN;
        goto cleanup;
    }
    unsigned char sep = get_separator(md->json_md);
    csv_set_delim(p, sep);
    
    while ((bytes_read = io->read(buf, sizeof(buf), io->io_ctx)) > 0)
    {
        if (csv_parse(p, buf, bytes_read, csv_metadata_cell, csv_metadata_row, md) != bytes_read)
        {
            fprintf(stderr, "Error while parsing file: %s\n", csv_strerror(csv_error(p)));
            retval = READSTAT_ERROR_PARSE;
            goto cleanup;
        }
    }
    csv_fini(p, csv_metadata_cell, csv_metadata_row, md);
    if (!md->open_row) {
        md->rows--;
    }
    if (parser->info_handler && md->pass == 1) {
        parser->info_handler(md->rows, md->_columns, user_ctx);
    }

cleanup:
    if (md->variables) {
        free(md->variables);
        md->variables = NULL;
    }
    if (md->is_date) {
        free(md->is_date);
        md->is_date = NULL;
    }
    if (md->json_md) {
        free_json_metadata(md->json_md);
        md->json_md = NULL;
    }
    csv_free(p);
    io->close(io->io_ctx);
    return retval;
}