int loadSameNameUnoverlapped() { FILE *fp; int len=0; struct csv_parser *p; char buf[1024]; size_t bytes_read; const char *file_name="data/NewSameNameUnoverlapped.csv"; //const char *file_name="data/SameNameUnoverlapped.csv"; if((p=(struct csv_parser *)malloc(sizeof(struct csv_parser))) == 0) return -1; if ((fp = fopen(file_name,"r"))== NULL) fprintf(stderr, "Failed to open %s\n",file_name); csv_init(p, (unsigned char)0); while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) if (csv_parse(p, buf, bytes_read, col, row,&len) != bytes_read) { fprintf(stderr, "Error while parsing file: %s\n", csv_strerror(csv_error(p))); } qsort(aids,len,sizeof(int),intcmp); aidlen = len; csv_free(p); fclose(fp); for(int i=0;i<100;i++) printf("%d | ",aids[i]); printf("\n%d",aidlen); return 1; }
int SOMAexternal (SOMAsetup *ssetup, FitnessFunction ffunc, int nparams, Parameter *params, const char *file, SOMAreturn* sr) { Array2D array; char *buf; long len; struct csv_parser csvp; clock_t begin, end; array_init (&array); array_append_column(&array); //blank column for external fitness function array.currentcolumn++; //Parse CSV file if (csv_init (&csvp, CSV_APPEND_NULL)!=0) return ERR; if ((buf=read_file_into_buffer (file, &len))==NULL) return ERR_FILE; csv_parse (&csvp, buf, len, csvcallback1, csvcallback2e, (void*) &array); csv_fini (&csvp, csvcallback1, csvcallback2e, (void*) &array); csv_free (&csvp); //array_print (&array); //debug //soma_array=&array; //Initialize output struct SOMAreturn_init (sr, ssetup->Migrations, nparams); begin=clock(); //SOMA call SOMA2 (ssetup, nparams, params, sr, &array, ffunc); end=clock(); sr->time=(double) (end-begin)/CLOCKS_PER_SEC; array_delete (&array); return OK; }
/** * Initialize context pointers */ void context_init(struct csv_context *ctx) { // Init our passthrough buffer ctx->csv_buf = cbuf_init(BUFFER_SIZE); // Initialize our blocking queue fq_init(&ctx->io_queue, BG_QUEUE_MAX); // Initialize our CSV parser if(csv_init(&ctx->parser, 0) != 0) { fprintf(stderr, "Couldn't initialize CSV parser!\n"); exit(EXIT_FAILURE); } // Set our csv block realloc size csv_set_blk_size(&ctx->parser, CSV_BLK_SIZE); // Initialize our thread count ctx->thread_count = IO_THREADS_DEFAULT; // Default to no group column ctx->gcol = -1; // Default to not gzipping our output files ctx->gzip = 0; // Header injection flags ctx->use_header = 0; ctx->count_header = 0; ctx->header_len = 0; }
// CSV to Qucs conversion. int csv2qucs (struct actionset_t * action, char * infile, char * outfile) { int ret = 0; csv_init (); if ((csv_in = open_file (infile, "r")) == NULL) { ret = -1; } else if (csv_parse () != 0) { ret = -1; } else if (csv_check () != 0) { ret = -1; } csv_lex_destroy (); if (csv_in) fclose (csv_in); if (ret) { csv_destroy (); return -1; } if (!strcmp (action->out, "qucsdata")) { csv_result->setFile (outfile); qucsdata_producer (csv_result); } csv_destroy (); return 0; }
int SOMAascii (SOMAsetup *ssetup, const char *expr, int nparams, Parameter *params, const char *file, SOMAreturn* sr) { Array2D array; int i; char *buf; long len; struct csv_parser csvp; clock_t begin, end; MuExpr me; array_init(&array); //Parse CSV file if (csv_init (&csvp, CSV_APPEND_NULL)!=0) return ERR; if ((buf=read_file_into_buffer (file, &len))==NULL) return ERR_FILE; csv_parse (&csvp, buf, len, csvcallback1, csvcallback2, (void*) &array); csv_fini (&csvp, csvcallback1, csvcallback2, (void*) &array); csv_free (&csvp); //array_print (&array); //debug muexpr_init (&me, expr); for (i=0; i<nparams; i++) if (defpar (&me, params+i, i)!=OK) { return ERR_PARAMS; array_delete (&array); muexpr_free (&me); } findvars (&me, VAR_NAME_PREFIX); //CVS file and expression don't match if (me.nvars!=array.ncolumns-1) { array_delete (&array); muexpr_free (&me); return ERR_COLUMNS; } muexpr_eval (&me); if (mupError (me.hparser)) { array_delete (&array); muexpr_free (&me); return ERR_EXPR; } //Initialize output struct SOMAreturn_init (sr, ssetup->Migrations, nparams); begin=clock(); //SOMA call SOMA (ssetup, nparams, params, sr, &array, &me); end=clock(); sr->time=(double) (end-begin)/CLOCKS_PER_SEC; array_delete (&array); muexpr_free (&me); return OK; }
int main (int argc, char *argv[]) { char buf[1024]; size_t i; struct csv_parser p; FILE *infile, *outfile; csv_init(&p, 0); if (argc != 3) { fprintf(stderr, "Usage: csv_fix infile outfile\n"); return EXIT_FAILURE; } if (!strcmp(argv[1], argv[2])) { fprintf(stderr, "Input file and output file must not be the same!\n"); exit(EXIT_FAILURE); } infile = fopen(argv[1], "rb"); if (infile == NULL) { fprintf(stderr, "Failed to open file %s: %s\n", argv[1], strerror(errno)); exit(EXIT_FAILURE); } outfile = fopen(argv[2], "wb"); if (outfile == NULL) { fprintf(stderr, "Failed to open file %s: %s\n", argv[2], strerror(errno)); fclose(infile); exit(EXIT_FAILURE); } while ((i=fread(buf, 1, 1024, infile)) > 0) { if (csv_parse(&p, buf, i, cb1, cb2, outfile) != i) { fprintf(stderr, "Error parsing file: %s\n", csv_strerror(csv_error(&p))); fclose(infile); fclose(outfile); remove(argv[2]); exit(EXIT_FAILURE); } } csv_fini(&p, cb1, cb2, outfile); csv_free(&p); if (ferror(infile)) { fprintf(stderr, "Error reading from input file"); fclose(infile); fclose(outfile); remove(argv[2]); exit(EXIT_FAILURE); } fclose(infile); fclose(outfile); return EXIT_SUCCESS; }
int ptm_lib_init_msg(ptm_lib_handle_t *hdl, int cmd_id, int type, void *in_ctxt, void **out_ctxt) { ptm_lib_msg_ctxt_t *p_ctxt; ptm_lib_msg_ctxt_t *p_in_ctxt = in_ctxt; csv_t *csv; csv_record_t *rec, *d_rec; /* Initialize csv for using discrete record buffers */ csv = csv_init(NULL, NULL, PTMLIB_MSG_SZ); if (!csv) { ERRLOG("%s: Could not allocate csv \n", __FUNCTION__); return -1; } rec = _ptm_lib_encode_header(csv, NULL, 0, PTMLIB_MSG_VERSION, type, cmd_id, hdl->client_name); if (!rec) { ERRLOG("%s: Could not allocate record \n", __FUNCTION__); csv_clean(csv); csv_free(csv); return -1; } p_ctxt = calloc(1, sizeof(*p_ctxt)); if (!p_ctxt) { ERRLOG("%s: Could not allocate context \n", __FUNCTION__); csv_clean(csv); csv_free(csv); return -1; } p_ctxt->csv = csv; p_ctxt->cmd_id = cmd_id; p_ctxt->type = type; *(ptm_lib_msg_ctxt_t **)out_ctxt = p_ctxt; /* caller supplied a context to initialize with? */ if (p_in_ctxt) { /* insert the hdr rec */ rec = csv_record_iter(p_in_ctxt->csv); csv_clone_record(p_in_ctxt->csv, rec, &d_rec); csv_insert_record(csv, d_rec); /* insert the data rec */ rec = csv_record_iter_next(rec); csv_clone_record(p_in_ctxt->csv, rec, &d_rec); csv_insert_record(csv, d_rec); } return 0; }
void genericLineBasedParsing( std::istream &file, field_cb_t cb_per_field, line_cb_t cb_per_line, void *data, const csv::params ¶ms) { struct csv_parser parser; if (!csv_init(&parser, 0)) { csv_set_opts(&parser, CSV_APPEND_NULL); csv_set_delim(&parser, params.getDelimiter()); std::string line; int line_start = params.getLineStart(); if (line_start != 1) { while (line_start > 1) { std::getline(file, line); --line_start; } } int lineCount = 0; while (std::getline(file, line)) { ++lineCount; line.append("\n"); if (csv_parse(&parser, line.c_str(), line.size(), cb_per_field, cb_per_line, data) != line.size()) { throw ParserError(csv_strerror(csv_error(&parser))); } if (params.getLineCount() != -1 && lineCount >= params.getLineCount()) break; if (file.bad()) break; } csv_fini(&parser, cb_per_field, cb_per_line, data); } csv_free(&parser); }
int main (int argc, char* argv[]) { struct csv_parser parser = {0}; csv_init (&parser, CSV_APPEND_NULL); char *buf = (char*)malloc (READ_SZ); size_t buflen = READ_SZ; int count = 0; while ((buflen = read (0, buf, READ_SZ)) > 0) { csv_parse (&parser, buf, buflen, field_count, 0, &count); } printf ("%d\n", count); free (buf); csv_free (&parser); return EXIT_SUCCESS; }
CLUSTER_ITEM* KMeansLoadCSV(char *filename, int labelColumn, int startColumn, int featureCount) { FILE *fp; char buf[1024]; size_t bytes_read; struct _KMeansStructCSV c; struct csv_parser p; CLUSTER_ITEM *result; /* Setup csvlib to read the CSV file */ if (csv_init(&p, CSV_APPEND_NULL) != 0) exit(EXIT_FAILURE); fp = fopen(filename, "rb"); if (!fp) { printf("Could not open: %s\n", filename); exit(EXIT_FAILURE); } c.row = 0; c.col = 0; c.startCol = startColumn; c.featureCount = featureCount; c.labelCol = labelColumn; c.item = c.prevItem = c.firstItem = NULL; c.features = (double*)calloc(featureCount,sizeof(double)); /* Loop over the contents. It is important to note that we are not reading line by line, at this level. Rather, we are passing blocks off to csvlib. Then csvlib calls our two callbacks as needed. */ while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { if (csv_parse(&p, buf, bytes_read, _KMeansCallbackColumn, _KMeansCallbackRow, &c) != bytes_read) { fprintf(stderr, "Error while parsing file: %s\n", csv_strerror(csv_error(&p)) ); exit(EXIT_FAILURE); } } result = c.firstItem; /* Handle any final data. May call the callbacks once more */ csv_fini(&p, _KMeansCallbackColumn, _KMeansCallbackRow, &c); /* Cleanup */ free(c.features); fclose(fp); csv_free(&p); return result; }
bgpstream_csvfile_datasource_t * bgpstream_csvfile_datasource_create(bgpstream_filter_mgr_t *filter_mgr, char *csvfile_file) { bgpstream_debug("\t\tBSDS_CSVFILE: create csvfile_ds start"); bgpstream_csvfile_datasource_t *csvfile_ds = (bgpstream_csvfile_datasource_t*) malloc_zero(sizeof(bgpstream_csvfile_datasource_t)); if(csvfile_ds == NULL) { bgpstream_log_err("\t\tBSDS_CSVFILE: create csvfile_ds can't allocate memory"); goto err; } if(csvfile_file == NULL) { bgpstream_log_err("\t\tBSDS_CSVFILE: create csvfile_ds no file provided"); goto err; } if((csvfile_ds->csvfile_file = strdup(csvfile_file)) == NULL) { bgpstream_log_err("\t\tBSDS_CSVFILE: can't allocate memory for filename"); goto err; } /* cvs file parser options */ unsigned char options = CSV_STRICT | CSV_REPALL_NL | CSV_STRICT_FINI | CSV_APPEND_NULL | CSV_EMPTY_IS_NULL; if(csv_init(&(csvfile_ds->parser), options) !=0) { bgpstream_log_err("\t\tBSDS_CSVFILE: can't initialize csv parser"); goto err; } csvfile_ds->current_field = CSVFILE_PATH; csvfile_ds->filter_mgr = filter_mgr; csvfile_ds->input_mgr = NULL; csvfile_ds->num_results = 0; csvfile_ds->max_ts_infile = 0; csvfile_ds->last_processed_ts = 0; csvfile_ds->max_accepted_ts = 0; bgpstream_debug("\t\tBSDS_CSVFILE: create csvfile_ds end"); return csvfile_ds; err: bgpstream_csvfile_datasource_destroy(csvfile_ds); return NULL; }
/* ================================================================== * Parser for comma-separated argument list * ================================================================== */ void ParseVarList(int nLn, FILE *flp, char *dataName, char *leftPart, char *argString) { struct csv_parser p; unsigned char options = 0; LIST_DATA ldata; // fill in data for the callback memset(&ldata, '\x0', sizeof(LIST_DATA)); ldata.nLn = nLn; ldata.flp = flp; ldata.cnt = 0; strcpy(ldata.dataName, dataName); strcpy(ldata.lp, leftPart); // Initialize csv parser if (csv_init(&p, options) != 0) { fprintf(stderr, "Failed to initialize csv parser\n"); return; } // set white space, eol and delimiter csv_set_space_func(&p, is_space_list); csv_set_term_func(&p, is_term_list); csv_set_delim(&p, ','); unsigned int agrLen = strlen(argString); fprintf(stderr, "ParseVarList: argString = %s argLen - %d\n", argString, agrLen); memset(inputsLst, '\x0', sizeof(inputsLst)); InpCnt = 0; if (csv_parse(&p, argString, strlen(argString), cbProcessListElement, NULL, &ldata) != agrLen) { fprintf(stderr, "ParseVarList: %s\n", csv_strerror(csv_error(&p))); return; } csv_fini(&p, cbProcessListElement, NULL, &ldata); csv_free(&p); return; }
/* This example shows how to simply read a CSV file. This example reads the iris data set. The output is shown here. Reading CSV file: ./datasets/iris.csv Field: "sepal_length" Field: "sepal_width" Field: "petal_length" Field: "petal_width" Field: "class" Row done Field: "5.1" Field: "3.5" Field: "1.4" Field: "0.2" Field: "Iris-setosa" Row done Field: "4.9" Field: "3.0" Field: "1.4" Field: "0.2" Field: "Iris-setosa" Row done Field: "4.7" Field: "3.2" Field: "1.3" Field: "0.2" Field: "Iris-setosa" ... Row done Field: "6.5" Field: "3.0" Field: "5.2" Field: "2.0" Field: "Iris-virginica" Row done Field: "6.2" Field: "3.4" Field: "5.4" Field: "2.3" Field: "Iris-virginica" Row done Field: "5.9" Field: "3.0" Field: "5.1" Field: "1.8" Field: "Iris-virginica" Row done 755 fields, 151 rows */ void ExampleReadCSV(int argIndex, int argc, char **argv) { char filename[FILENAME_MAX]; FILE *fp; struct csv_parser p; char buf[1024]; size_t bytes_read; struct counts c = {0, 0}; if( argIndex>=argc ) { LocateFile("iris.csv",filename,FILENAME_MAX); } else { strncpy(filename,argv[argIndex],FILENAME_MAX); } printf("Reading CSV file: %s\n", filename); /* Setup csvlib to read the CSV file */ if (csv_init(&p, CSV_APPEND_NULL) != 0) exit(EXIT_FAILURE); fp = fopen(filename, "rb"); if (!fp) { printf("Could not open: %s\n", filename); exit(EXIT_FAILURE); } /* Loop over the contents. It is important to note that we are not reading line by line, at this level. Rather, we are passing blocks off to csvlib. Then csvlib calls our two callbacks as needed. */ while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) if (csv_parse(&p, buf, bytes_read, CallbackColumn, CallbackRow, &c) != bytes_read) { fprintf(stderr, "Error while parsing file: %s\n", csv_strerror(csv_error(&p)) ); exit(EXIT_FAILURE); } /* Handle any final data. May call the callbacks once more */ csv_fini(&p, CallbackColumn, CallbackRow, &c); /* Print final stats on CSV file */ printf("%lu fields, %lu rows\n", c.fields, c.rows); /* Cleanup */ fclose(fp); csv_free(&p); }
int main (int argc, char ** argv){ if (argc != 4){ printf("Usage: %s nodes.csv ways.csv direct.csv\n",argv[0]); return 1; } char * nodescsvname = argv[1]; char * wayscsvname = argv[2]; char * directcsvname = argv[3]; struct csv_parser parser; csv_init(&parser,CSV_APPEND_NULL); csv_set_delim(&parser,';'); struct parse_t * p_struct; p_struct = malloc(sizeof(struct parse_t)); p_struct->state=0; p_struct->count=0; p_struct->ok=1; GARY_INIT(p_struct->vertices,0); GARY_INIT(p_struct->edges,0); parseFile(nodescsvname,&parser,p_struct,node_item_cb,node_line_cb); nodesIdx_refresh(GARY_SIZE(p_struct->vertices),p_struct->vertices); parseFile(wayscsvname,&parser,p_struct,way_item_cb,way_line_cb); parseFile(directcsvname,&parser,p_struct,direct_item_cb,direct_line_cb); Graph__Graph * graph; graph = malloc(sizeof(Graph__Graph)); graph__graph__init(graph); graph->n_edges=GARY_SIZE(p_struct->edges); graph->edges=p_struct->edges; graph->n_vertices=GARY_SIZE(p_struct->vertices); graph->vertices=p_struct->vertices; printf("Created graph with %d edges and %d vertices\n",graph->n_edges,graph->n_vertices); struct vertexedges_t * vedges; vedges = makeVertexEdges(graph); largestComponent(graph,vedges); saveSearchGraph(graph,"../data/postgis-graph.pbf"); }
static int test_csv_file(FILE *f) { char buf[6]; size_t len; struct csv csv; csv_init(&csv, NULL, on_data, NULL); do { len = fread(buf, 1, sizeof(buf), f); if (csv_push(&csv, len, buf)) return -1; } while (!feof(f)); if (csv_eol(&csv)) return -1; return 0; }
int csvredis_init(struct state_conf *conf, char **fields, int fieldlens) { csv_init(conf, fields, fieldlens); redismodule_init(conf, fields, fieldlens); for (int i=0; i < fieldlens; i++) { if (!strcmp("success", fields[i])) { success_index = i; } else if (!strcmp("repeat", fields[i])) { repeat_index = i; } } if (success_index < 0 || repeat_index < 0) { log_fatal("csvredis", "success or repeat not included in fieldset"); } return EXIT_SUCCESS; }
static int csv_load(const char *filename, void *p, int (*field)(void *p, unsigned row, unsigned col, size_t len, const char *str), int (*row_end)(void *p, unsigned row)) { char inbuf[1024]; char fieldbuf[4096]; int len; struct csv_parser cp; FILE *f; f = fopen(filename, "r"); if (!f) { perror(filename); return -1; } if (csv_init(&cp, fieldbuf, sizeof(fieldbuf), p, field, row_end)) goto close_file; do { len = fread(inbuf, 1, sizeof(inbuf), f); if (ferror(f)) { perror(filename); goto close_file; } if (csv(&cp, inbuf, len)) { fprintf(stderr, "%s:parse error (row=%d col=%d)\n", filename, cp.row, cp.col); goto close_file; } } while (!feof(f)); if (csv_eof(&cp)) { fprintf(stderr, "%s:parse error:unexpected end of file\n", filename); goto close_file; } fclose(f); return 0; close_file: fclose(f); return -1; }
int main (void) { struct csv_parser p; int i; char c; csv_init(&p, 0); while ((i=getc(stdin)) != EOF) { c = i; if (csv_parse(&p, &c, 1, cb1, cb2, NULL) != 1) { fprintf(stderr, "Error: %s\n", csv_strerror(csv_error(&p))); exit(EXIT_FAILURE); } } csv_fini(&p, cb1, cb2, NULL); csv_free(&p); return EXIT_SUCCESS; }
static int test_csv_buffer(size_t len, const char *data) { const char *buf = data; size_t chunk_len; struct csv csv; csv_init(&csv, NULL, on_data, NULL); while (len > 0) { chunk_len = rand() % len; if (!chunk_len) chunk_len = len; if (csv_push(&csv, chunk_len, buf)) return -1; buf += chunk_len; len -= chunk_len; } if (csv_eol(&csv)) return -1; return 0; }
void csv_init_parser(csv_file_t &csvin) { #if CSV_MAJOR >= 3 #define PARSER_OPTIONS CSV_APPEND_NULL #else #define PARSER_OPTIONS 0 #endif unsigned char parser_options = PARSER_OPTIONS; if (csv_init(&csvin.csv_file_parser, parser_options) != 0) { fprintf(stderr, "Failed to initialize csv parser\n"); exit(EXIT_FAILURE); } csv_set_space_func(&csvin.csv_file_parser, csv_is_space); csv_set_term_func(&csvin.csv_file_parser, csv_is_term); csv_set_delim(&csvin.csv_file_parser, csvin.tab_delimter); }/* ----- end of function csv_init_parser ----- */
static int config_load(control_t *control) { struct csv_parser p; FILE *fp; char buf[MAX_BUF]; size_t bytes_read; assert(control); assert(control->configfile); assert(control->entries == NULL); assert(control->start == 0 && control->end == 0); // open the file. fp = fopen(control->configfile, "rb"); if (fp) { if (csv_init(&p, 0) == 0) { while ((bytes_read=fread(buf, 1, MAX_BUF, fp)) > 0) { if (csv_parse(&p, buf, bytes_read, csv_data, csv_line, control) != bytes_read) { fprintf(stderr, "Error while parsing file: %s\n", csv_strerror(csv_error(&p)) ); exit(EXIT_FAILURE); } } csv_fini(&p, csv_data, csv_line, control); csv_free(&p); } fclose(fp); return 0; } else { return -1; } }
int main (int argc, char** argv) { paillier_pubkey_t* pkey; paillier_prvkey_t* skey; paillier_keygen(128,&pkey,&skey,&paillier_get_rand_devrandom); void *context = zmq_ctx_new (); struct opts options; parse_options(argc,argv, &options); if(options.size <= 0 || options.scale <= 0 || !options.fileset){ fprintf(stderr,"Size and scale must be greater than 0 and file must be set\n"); exit(EXIT_FAILURE); } struct classify_data data; data.pub = pkey; data.prv = skey; data.maxcol = options.size; data.scale_factor = options.scale; data.texts = (paillier_plaintext_t**)malloc(options.size*sizeof(paillier_plaintext_t*)); data.col = 0; data.correct = 0; data.total = 0; init_rand(data.rand,&paillier_get_rand_devurandom,pkey->bits / 8 + 1); // Socket to talk to server gmp_printf("n: %Zd, lambda: %Zd\n",pkey->n,skey->lambda); void *requester = zmq_socket (context, ZMQ_REQ); zmq_connect (requester, "ipc:///tmp/karma"); char* pubkeyhex = paillier_pubkey_to_hex(pkey); s_send(requester,pubkeyhex); char* recv = s_recv(requester); free(recv); free(pubkeyhex); data.socket = requester; char* file = options.file; FILE* fp; struct csv_parser p; char buf[1024]; size_t bytes_read; if(csv_init(&p,0)) { fprintf(stderr, "Failed to initialize parser\n"); exit(EXIT_FAILURE); } fp = fopen(file,"rb"); if(!fp){ fprintf(stderr,"Failed to open classify file %s\n",strerror(errno)); exit(EXIT_FAILURE); } while ((bytes_read=fread(buf,1,1024,fp)) > 0){ if(!csv_parse(&p,buf,bytes_read,field_parsed,row_parsed,&data)){ fprintf(stderr, "Failed to parse file: %s\n",csv_strerror(csv_error(&p))); } } csv_fini(&p,field_parsed,row_parsed,&data); //fini took care of freeing the plaintexts csv_free(&p); free(data.texts); gmp_randclear(data.rand); printf("Correct(%i)/Total(%i) = %f\n",data.correct,data.total,data.correct/(data.total+0.0)); sleep (2); zmq_close (requester); zmq_ctx_destroy (context); return 0; }
void genericParse( /*std::istream &file,*/ std::string filename, field_cb_t cb_per_field, line_cb_t cb_per_line, void *data, const csv::params ¶ms ) { // Open the file typedef std::unique_ptr<std::FILE, int (*)(std::FILE *)> unique_file_ptr; unique_file_ptr file(fopen(filename.c_str(), "rb"), fclose); if (!file) { throw ParserError(std::string("File Opening Failed") + std::strerror(errno)); } struct csv_parser parser; if (!csv_init(&parser, 0)) { csv_set_opts(&parser, CSV_APPEND_NULL); csv_set_delim(&parser, params.getDelimiter()); int line_start = params.getLineStart(); if (line_start > 1) { int c; do { c = fgetc(file.get()); if ( c== '\n') --line_start; } while (c!= EOF && line_start > 1); } // 1GB Buffer size_t block_size; if (getenv("HYRISE_LOAD_BLOCK_SIZE")) block_size = strtoul(getenv("HYRISE_LOAD_BLOCK_SIZE"), nullptr, 0); else block_size = 1024 * 1024; // Read from the buffer size_t readBytes = 0; char rdbuf[block_size]; // Read the file until we cannot extract more bytes do { readBytes = fread(rdbuf, 1, block_size, file.get()); if (csv_parse(&parser, rdbuf, readBytes, cb_per_field, cb_per_line, data) != (size_t) readBytes) { throw ParserError(csv_strerror(csv_error(&parser))); } } while (readBytes == block_size); if (ferror(file.get())) { throw ParserError("Could not read file"); } csv_fini(&parser, cb_per_field, cb_per_line, data); } csv_free(&parser); }
int ptm_lib_process_msg(ptm_lib_handle_t *hdl, int fd, char *inbuf, int inlen, void *arg) { int rc, len; char client_name[32]; int cmd_id = 0, type = 0, ver = 0, msglen = 0; csv_t *csv; ptm_lib_msg_ctxt_t *p_ctxt = NULL; len = _ptm_lib_read_ptm_socket(fd, inbuf, PTMLIB_MSG_HDR_LEN); if (len <= 0) return (len); csv = csv_init(NULL, inbuf, PTMLIB_MSG_HDR_LEN); if (!csv) { DLOG("Cannot allocate csv for hdr\n"); return (-1); } rc = _ptm_lib_decode_header(csv, &msglen, &ver, &type, &cmd_id, client_name); csv_clean(csv); csv_free(csv); if (rc < 0) { /* could not decode the CSV - maybe its legacy cmd? * get the entire cmd from the socket and see if we can process * it */ if (len == PTMLIB_MSG_HDR_LEN) { len += _ptm_lib_read_ptm_socket( fd, (inbuf + PTMLIB_MSG_HDR_LEN), inlen - PTMLIB_MSG_HDR_LEN); if (len <= 0) return (len); } inbuf[len] = '\0'; /* we only support the get-status cmd */ if (strcmp(inbuf, PTMLIB_CMD_GET_STATUS)) { DLOG("unsupported legacy cmd %s\n", inbuf); return (-1); } /* internally create a csv-style cmd */ ptm_lib_init_msg(hdl, 0, PTMLIB_MSG_TYPE_CMD, NULL, (void *)&p_ctxt); if (!p_ctxt) { DLOG("couldnt allocate context\n"); return (-1); } ptm_lib_append_msg(hdl, p_ctxt, "cmd", PTMLIB_CMD_GET_STATUS); } else { if (msglen > inlen) { DLOG("msglen [%d] > inlen [%d]\n", msglen, inlen); return -1; } /* read the rest of the msg */ len = _ptm_lib_read_ptm_socket(fd, inbuf, msglen); if (len <= 0) { return (len); } inbuf[len] = '\0'; csv = csv_init(NULL, NULL, PTMLIB_MSG_SZ); if (!csv) { ERRLOG("Cannot allocate csv for msg\n"); return -1; } csv_decode(csv, inbuf); p_ctxt = calloc(1, sizeof(*p_ctxt)); if (!p_ctxt) { ERRLOG("%s: Could not allocate context \n", __FUNCTION__); csv_clean(csv); csv_free(csv); return -1; } p_ctxt->csv = csv; p_ctxt->cmd_id = cmd_id; p_ctxt->type = type; } switch (p_ctxt->type) { case PTMLIB_MSG_TYPE_NOTIFICATION: if (hdl->notify_cb) hdl->notify_cb(arg, p_ctxt); break; case PTMLIB_MSG_TYPE_CMD: if (hdl->cmd_cb) hdl->cmd_cb(arg, p_ctxt); break; case PTMLIB_MSG_TYPE_RESPONSE: if (hdl->response_cb) hdl->response_cb(arg, p_ctxt); break; default: return -1; } csv_clean(p_ctxt->csv); csv_free(p_ctxt->csv); free(p_ctxt); return len; }
std::shared_ptr<storage::AbstractTable> RawTableLoader::load(std::shared_ptr<storage::AbstractTable> in, const storage::compound_metadata_list *ml, const Loader::params &args) { csv::params params; if (detectHeader(args.getBasePath() + _filename)) params.setLineStart(5); // Create the result table storage::metadata_vec_t v(in->columnCount()); for(size_t i=0; i < in->columnCount(); ++i) { v[i] = in->metadataAt(i); } auto result = std::make_shared<storage::RawTable>(v); // CSV Parsing std::ifstream file(args.getBasePath() + _filename, std::ios::binary); if (!file || file.bad()) { throw csv::ParserError("CSV file '" + _filename + "' does not exist"); } struct csv_parser parser; if (!csv_init(&parser, 0)) { csv_set_opts(&parser, CSV_APPEND_NULL); csv_set_delim(&parser, params.getDelimiter()); // If there is a header in the file, we will ignore it std::string line; int line_start = params.getLineStart(); if (line_start != 1) { while (line_start > 1) { std::getline(file, line); --line_start; } } // Prepare cb data handler struct raw_table_cb_data data(v); data.table = result; const size_t block_size = 16 * 1024; char rdbuf [block_size]; while (file.read(rdbuf, block_size).good()) { auto extracted = file.gcount(); if (extracted == 0) break; if (csv_parse(&parser, rdbuf, extracted, (field_cb_t) raw_table_cb_per_field, (line_cb_t) raw_table_cb_per_line, (void*) &data) != (size_t) extracted) { throw csv::ParserError(csv_strerror(csv_error(&parser))); } } // Parse the rest if (csv_parse(&parser, rdbuf, file.gcount(), (field_cb_t) raw_table_cb_per_field, (line_cb_t) raw_table_cb_per_line, (void*) &data) != (size_t) file.gcount()) { throw csv::ParserError(csv_strerror(csv_error(&parser))); } csv_fini(&parser, (field_cb_t) raw_table_cb_per_field, (line_cb_t) raw_table_cb_per_line, (void*) &data); } csv_free(&parser); return result; }
void UPCdata::readCSVfile(char* filename){ /* UPC dataset is of an un-orthodox format each row, starting from second, contain 1020 data columns and 6 description columns data columns correspond to 60 ORNs per 17 OR types. description columns denote compound, set (TrainingSet, TestSet, ValidationSet), conc1, conc2, conc3, sample (minute) I use c-style parameter handling in that vector parameters are handed over by pointers (c++ references to be exact). */ cout << "reading file " << filename << endl; // empty output vectors compounds.resize(0); samples.resize(0); /* std::ifstream fh(filename, ifstream::in); if (fh.is_open()){ if(fh.good()){ string line; getline(fh,line); // ignore first line } */ char buf[10000]; FILE *fp = fopen( filename, "rb" ); fgets(buf, 10000, fp); // ignore first line while(fgets(buf, 10000, fp)){ // while(fh.good() ){ // initalizations for file reading string line(buf); Sample sample; struct counts c = {vector<float>(0),0, 0}; if (csv_init(&m_csvParser, CSV_STRICT) != 0){ fprintf(stderr, "Failed to initialize csv parser\n"); return; //exit(EXIT_FAILURE); } //getline(fh,line); int bytes_read=line.size(); if (csv_parse(&m_csvParser, buf, bytes_read, cbColumn, cbRow, &c) != bytes_read){ fprintf(stderr, "Error while parsing file: %s\n", csv_strerror(csv_error(&m_csvParser))); } csv_fini(&m_csvParser, cbColumn, cbRow, &c); if(c.fData.size()>0){ for(int i=0;i<c.fData.size()-6;i++) sample.data.push_back(c.fData[i]); sample.samplenr=c.fData[c.fData.size()-1]; sample.concentrations[0]=c.fData[c.fData.size()-4]; sample.concentrations[1]=c.fData[c.fData.size()-3]; sample.concentrations[2]=c.fData[c.fData.size()-2]; // now get the compound and the set without using regular expressions // in order to avoid problems in code compatibility if neither boost nor tr1 are available string str=line; string setstr=""; for(int i=0;i<5;i++){ int found=str.find_last_of(","); str=str.substr(0,found); if(i==3){ setstr=str.substr(str.find_last_of(",")+2,str.size()-1); } } string compound=str.substr(str.find_last_of(",")+2,str.size()-1); if(setstr.compare("TrainingSet\"")==0) sample.set=Sample::Training; else if(setstr.compare("TestSet\"")==0) sample.set=Sample::Test; else if(setstr.compare("ValidationSet\"")==0) sample.set=Sample::Validation; else if(setstr.compare("InterimSet\"")==0) sample.set=Sample::Interim; if(sample.set==Sample::Undefined) fprintf(stderr, "Storage::ReadUPCfile(): Could not match set\n"); for(int i=0;i<compounds.size();i++){ if(compounds[i]==compound){ sample.compoundnr=i; break; } } if(sample.compoundnr<0){ compounds.push_back(compound); sample.compoundnr=compounds.size()-1; } samples.push_back(sample); } } // line treatment //fh.close(); fclose(fp); //} else // cerr<<"Error: Could not open file. ("<<filename<<")\n"; cout<<"finished reading " << filename << endl; return; }
int main(int argc, char *argv[]) { //check arguments if(argc < 2 || argc > 3) { printf("evoscan_logworks_conv <version %s>\n", version); printf("[email protected], @640774n6\n\n"); printf("usage: evoscan_logworks_conv <input csv path> <output dif path (optional)>\n"); return 1; } char *input_path = argv[1]; char *generated_output_path = create_output_path(input_path); char *output_path = (argc == 3) ? argv[2] : generated_output_path; if(!strcmp(input_path, output_path)) { printf("error: input and output path must be different\n"); free(generated_output_path); return 1; } //open file pointers FILE *input_file = fopen(input_path, "rb"); if(!input_file) { printf("error: failed to open input @ %s\n", input_path); free(generated_output_path); return 1; } FILE *output_file = fopen(output_path, "wb"); if(!output_path) { printf("error: failed to open output @ %s\n", output_path); free(generated_output_path); fclose(input_file); return 1; } FILE *tmp_file = tmpfile(); if(!tmp_file) { printf("error: failed to open tmp file\n"); free(generated_output_path); fclose(input_file); fclose(output_file); return 1; } //initialize variables fields = NULL; field_count = 0; used_fields = NULL; used_fields_min = NULL; used_fields_max = NULL; used_field_count = 0; total_sample_time = 0.0; total_sample_count = 0; row_sample_count = 0; csv_col = 0; csv_row = 0; //create csv parser struct csv_parser parser; unsigned char options = (CSV_APPEND_NULL | CSV_EMPTY_IS_NULL); csv_init(&parser, options); //main parse loop size_t length = 0; char buffer[1024]; while((length = fread(buffer, 1, 1024, input_file)) > 0) { //parse csv and handle with callbacks if(csv_parse(&parser, buffer, length, csv_process_col, csv_process_row, tmp_file) != length) { printf("error: failed to read from input @ %s\n", input_path); free(generated_output_path); fclose(input_file); fclose(output_file); fclose(tmp_file); csv_free(&parser); remove(output_path); return 1; } } //write output header fprintf(output_file, "TABLE\r\n0,1\r\n\"EXCEL\"\r\n"); fprintf(output_file, "VECTORS\r\n0,%d\r\n\"\"\r\n", (total_sample_count + 13)); fprintf(output_file, "TUPLES\r\n0,%d\r\n\"\"\r\n", (used_field_count + 1)); fprintf(output_file, "DATA\r\n0,0\r\n\"\"\r\n"); fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"Input Description\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "1,0\r\n\"\"\r\n"); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"Stochiometric:\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "1,0\r\n\"\"\r\n"); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"From Device:\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "1,0\r\n\"(EVOSCAN%d)\"\r\n", (i + 1)); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"Name:\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "1,0\r\n\"%s\"\r\n", used_fields[i]); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"Unit:\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "1,0\r\n\"%.3s\"\r\n", used_fields[i]); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"Range:\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "0,%d\r\nV\r\n", used_fields_min[i]); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"equiv(Sample):\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "0,0\r\nV\r\n"); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"to:\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "0,%d\r\nV\r\n", used_fields_max[i] + 1); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"equiv(Sample):\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "0,4096\r\nV\r\n"); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"Color:\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "0,%d\r\nV\r\n", color_for_index(i)); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"-End-\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "1,0\r\n\"\"\r\n"); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"Session 1\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "1,0\r\n\"\"\r\n"); } fprintf(output_file, "-1,0\r\nBOT\r\n"); fprintf(output_file, "1,0\r\n\"Time(sec)\"\r\n"); for(int i = 0; i < used_field_count; i++) { fprintf(output_file, "1,0\r\n\"%s (%.3s)\"\r\n", used_fields[i], used_fields[i]); } //append tmp to the output fseek(tmp_file, 0, SEEK_SET); while((length = fread(buffer, 1, 1024, tmp_file)) > 0) { fwrite(buffer, sizeof(char), length, output_file); } //write footer fprintf(output_file, "-1,0\r\nEOD\r\n"); //free generated output path free(generated_output_path); //free fields for(int i = 0; i < field_count; i++) { free(fields[i]); } free(fields); free(used_fields); free(used_fields_min); free(used_fields_max); //close file pointers fclose(input_file); fclose(output_file); fclose(tmp_file); //free parser csv_free(&parser); return 0; }
vector<vector<float> > Storage::LoadDataFloatCSV(char* filename, int nrItems, bool keepOpen) { vector<vector<float> > out; size_t bytes_read; char buf[1024]; struct counts c = {vector<float>(0),0, 0}; if (csv_init(&m_csvParser, CSV_STRICT) != 0) { fprintf(stderr, "Failed to initialize csv parser\n"); return out; //exit(EXIT_FAILURE); } //cout<<"Loading "<<filename<<"\n"; m_fp = fopen(filename, "r"); if(m_fp==NULL) { cerr<<"Error: Could not open file. ("<<filename<<")\n"; } cout.flush(); long oldRows = 0; while ((bytes_read=fread(buf, 1, 1, m_fp)) > 0) {//((bytes_read=fread(buf, 1, 1024, m_fp)) > 0) { if (csv_parse(&m_csvParser, buf, bytes_read, cbColumn, cbRow, &c) != bytes_read) { fprintf(stderr, "Error while parsing file: %s\n", csv_strerror(csv_error(&m_csvParser))); } else{ // cout<<".";cout.flush(); } if(c.rows>oldRows){ out.push_back(c.fData); c.fData.clear(); oldRows = c.rows; } if(c.rows>=nrItems) break; } if(keepOpen == false) { csv_fini(&m_csvParser, cbColumn, cbRow, &c); fclose(m_fp); } if(m_mpiRank == 0) { if(out.size()>0) cout<<"Loaded "<<out.size()<<" nr items of dimension "<<out[0].size()<<".\n"; else cout<<"Loaded no items.\n"; cout.flush(); } return out; }
/* The main method that handles parsing */ static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) { struct rcsv_metadata meta; VALUE csvio, options, option; VALUE ensure_container = rb_ary_new(); /* [] */ struct csv_parser cp; unsigned char csv_options = CSV_STRICT_FINI | CSV_APPEND_NULL; /* Setting up some sane defaults */ meta.row_as_hash = false; meta.empty_field_is_nil = false; meta.skip_current_row = false; meta.encoding_index = -1; meta.num_columns = 0; meta.current_col = 0; meta.current_row = 0; meta.offset_rows = 0; meta.num_only_rows = 0; meta.num_except_rows = 0; meta.num_row_defaults = 0; meta.num_row_conversions = 0; meta.only_rows = NULL; meta.except_rows = NULL; meta.row_defaults = NULL; meta.row_conversions = NULL; meta.column_names = NULL; meta.result = (VALUE[]){rb_ary_new()}; /* [] */ /* csvio is required, options is optional (pun intended) */ rb_scan_args(argc, argv, "11", &csvio, &options); /* options ||= {} */ if (NIL_P(options)) { options = rb_hash_new(); } /* First of all, we parse libcsv-related params so that it fails early if something is wrong with them */ /* By default, parsing is strict */ option = rb_hash_aref(options, ID2SYM(rb_intern("nostrict"))); if (!option || (option == Qnil)) { csv_options |= CSV_STRICT; } /* By default, empty strings are treated as Nils and quoted empty strings are treated as empty Ruby strings */ option = rb_hash_aref(options, ID2SYM(rb_intern("parse_empty_fields_as"))); if ((option == Qnil) || (option == ID2SYM(rb_intern("nil_or_string")))) { csv_options |= CSV_EMPTY_IS_NULL; } else if (option == ID2SYM(rb_intern("nil"))) { meta.empty_field_is_nil = true; } else if (option == ID2SYM(rb_intern("string"))) { meta.empty_field_is_nil = false; } else { rb_raise(rcsv_parse_error, "The only valid options for :parse_empty_fields_as are :nil, :string and :nil_or_string, but %s was supplied.", RSTRING_PTR(rb_inspect(option))); } /* rb_ensure() only expects callback functions to accept and return VALUEs */ /* This ugly hack converts C pointers into Ruby Fixnums in order to pass them in Array */ rb_ary_push(ensure_container, options); /* [options] */ rb_ary_push(ensure_container, csvio); /* [options, csvio] */ rb_ary_push(ensure_container, LONG2NUM((long)&meta)); /* [options, csvio, &meta] */ rb_ary_push(ensure_container, LONG2NUM((long)&cp)); /* [options, csvio, &meta, &cp] */ /* Try to initialize libcsv */ if (csv_init(&cp, csv_options) == -1) { rb_raise(rcsv_parse_error, "Couldn't initialize libcsv"); } /* From now on, cp handles allocated data and should be free'd on exit or exception */ rb_ensure(rcsv_raw_parse, ensure_container, rcsv_free_memory, ensure_container); /* Remove the last row if it's empty. That happens if CSV file ends with a newline. */ if (RARRAY_LEN(*(meta.result)) && /* meta.result.size != 0 */ RARRAY_LEN(rb_ary_entry(*(meta.result), -1)) == 0) { rb_ary_pop(*(meta.result)); } if (rb_block_given_p()) { return Qnil; /* STREAMING */ } else { return *(meta.result); /* Return accumulated result */ } }
cxCSV *cxCSV::Init(cxUInt8 opt) { csv_init(&parser, opt); return this; }