//virtual std::istream &csv_selector::input(std::istream &iss) { // reuse the input from csv_tool if (!iss) { return iss; } csv_data->buffer.clear(); while (iss) { std::string tmp = read_block(iss); // std::cerr << "tmp:" << std::endl << tmp << std::endl; if (tmp.size() < 1) { break; } std::string block_type = get_block_type(tmp); if (block_type == "csv_analyzer_header") { csv_data->mcolumns = parse_csv_header(tmp); this->update_selector(); } else if (block_type == "line_t" ) { line_t line = parse_csv_line(tmp); csv_data->buffer.push_back(line); } else { throw std::runtime_error("unknown block type in csv_selector::input: \"" + block_type + "\". block = \"" + tmp + "\"."); } } return iss; }
int main(int argc, char** argv) { int ifd; int ofd; size_t size; struct info info_ins; char* line = NULL; if (argc != 3) { fprintf(stderr, "Need 2 argument: csv file path and binary file path.\n"); exit(1); } if ((ifd = open(argv[1], O_RDONLY)) == -1) { perror("Open csv file fail"); exit(1); } if ((ofd = open(argv[2], O_WRONLY|O_CREAT|O_TRUNC, 0640)) == -1) { perror("Open binary file fail"); exit(1); } FILE* stream = fdopen(ifd, "r"); while (getline(&line, &size, stream) != -1) { info_ins = parse_csv_line(line); if (write(ofd, &info_ins, sizeof(struct info)) != sizeof(struct info)) { fprintf(stderr, "Warning: write to binary fail. The binary may corrupt!"); } free(line); line = NULL; } close(ifd); close(ofd); return 0; }
/* parse_csv_file() * * inputs - FILE pointer * - type of conf to parse * output - none * side effects - */ void parse_csv_file(FBFILE *file, ConfType conf_type) { struct ConfItem *conf; struct AccessItem *aconf; struct MatchItem *match_item; struct MatchItem *nresv; struct ResvChannel *cresv; char *name_field=NULL; char *user_field=NULL; char *reason_field=NULL; char *oper_reason=NULL; char *host_field=NULL; char *duration_field=NULL; char *temp=NULL; char line[IRCD_BUFSIZE]; char *p; while (fbgets(line, sizeof(line), file) != NULL) { duration_field = NULL; if ((p = strchr(line, '\n')) != NULL) *p = '\0'; if ((line[0] == '\0') || (line[0] == '#')) continue; switch(conf_type) { case KLINE_TYPE: parse_csv_line(line, &user_field, &host_field, &reason_field, &oper_reason, &temp, &temp, &temp, &duration_field, NULL); conf = make_conf_item(KLINE_TYPE); aconf = map_to_conf(conf); if (host_field != NULL) DupString(aconf->host, host_field); if (reason_field != NULL) DupString(aconf->reason, reason_field); if (oper_reason != NULL) DupString(aconf->oper_reason, oper_reason); if (user_field != NULL) DupString(aconf->user, user_field); if (duration_field != NULL) aconf->hold = atoi(duration_field); if (aconf->host != NULL) { if(duration_field == NULL) add_conf_by_address(CONF_KILL, aconf); else add_temp_line(conf); } break; case RKLINE_TYPE: { const char *errptr = NULL; pcre *exp_user = NULL, *exp_host = NULL; parse_csv_line(line, &user_field, &host_field, &reason_field, &oper_reason, &temp, &temp, &temp, &duration_field, NULL); if (host_field == NULL || user_field == NULL) break; if (!(exp_user = ircd_pcre_compile(user_field, &errptr)) || !(exp_host = ircd_pcre_compile(host_field, &errptr))) { sendto_realops_flags(UMODE_ALL, L_ALL, "Failed to add regular expression based K-Line: %s", errptr); break; } conf = make_conf_item(RKLINE_TYPE); aconf = map_to_conf(conf); aconf->regexuser = exp_user; aconf->regexhost = exp_host; DupString(aconf->user, user_field); DupString(aconf->host, host_field); if (reason_field != NULL) DupString(aconf->reason, reason_field); else DupString(aconf->reason, "No reason"); if (oper_reason != NULL) DupString(aconf->oper_reason, oper_reason); if(duration_field != NULL) { aconf->hold = atoi(duration_field); add_temp_line(conf); } } break; case DLINE_TYPE: parse_csv_line(line, &host_field, &reason_field, &temp, &temp, &temp, &temp, &duration_field, NULL); conf = make_conf_item(DLINE_TYPE); aconf = (struct AccessItem *)map_to_conf(conf); if (host_field != NULL) DupString(aconf->host, host_field); if (reason_field != NULL) DupString(aconf->reason, reason_field); if(duration_field != NULL) { aconf->hold = atoi(duration_field); add_temp_line(conf); } else conf_add_d_conf(aconf); break; case XLINE_TYPE: parse_csv_line(line, &name_field, &reason_field, &oper_reason, &temp, &temp, &temp, &temp, &duration_field, NULL); conf = make_conf_item(XLINE_TYPE); match_item = (struct MatchItem *)map_to_conf(conf); if (name_field != NULL) DupString(conf->name, name_field); if (reason_field != NULL) DupString(match_item->reason, reason_field); if(duration_field != NULL) { match_item->hold = atoi(duration_field); add_temp_line(conf); } break; case RXLINE_TYPE: { const char *errptr = NULL; pcre *exp_p = NULL; parse_csv_line(line, &name_field, &reason_field, &oper_reason, &temp, &temp, &temp, &temp, &duration_field, NULL); if (name_field == NULL) break; if (!(exp_p = ircd_pcre_compile(name_field, &errptr))) { sendto_realops_flags(UMODE_ALL, L_ALL, "Failed to add regular expression based X-Line: %s", errptr); break; } conf = make_conf_item(RXLINE_TYPE); conf->regexpname = exp_p; match_item = map_to_conf(conf); DupString(conf->name, name_field); if (reason_field != NULL) DupString(match_item->reason, reason_field); else DupString(match_item->reason, "No reason"); if(duration_field != NULL) { match_item->hold = atoi(duration_field); add_temp_line(conf); } } break; case CRESV_TYPE: parse_csv_line(line, &name_field, &reason_field, &duration_field, NULL); conf = create_channel_resv(name_field, reason_field, 0); if(duration_field != NULL) { cresv = map_to_conf(conf); cresv->hold = atoi(duration_field); add_temp_line(conf); } break; case NRESV_TYPE: parse_csv_line(line, &name_field, &reason_field, &duration_field, NULL); conf = create_nick_resv(name_field, reason_field, 0); if(duration_field != NULL) { nresv = map_to_conf(conf); nresv->hold = atoi(duration_field); add_temp_line(conf); } break; case GLINE_TYPE: case GDENY_TYPE: case CONF_TYPE: case OPER_TYPE: case CLIENT_TYPE: case SERVER_TYPE: case CLUSTER_TYPE: case HUB_TYPE: case LEAF_TYPE: case ULINE_TYPE: case EXEMPTDLINE_TYPE: case CLASS_TYPE: break; } } }
int main_dist(int argc, char **argv) { timer_init(); int n_max_reading_set = 0; const char *quantiles_string = "0.005,0.05,0.5,0.95,0.995"; const char *dist_file = NULL; const char *comp_file = NULL; const char *indel_dist_file = NULL; const char *summary_stats_file = NULL; const char *query_range_file = NULL; const char *readgroup_file = NULL; #define SQRT2 1.41421356237309504880 char c; while ((c = getopt(argc, argv, "d:c:i:l:x:y:X:Z:P:S:p:C:Q:q:f:F:R:t:r:m:g")) >= 0) { switch(c) { /* files */ case 'd': dist_file = optarg; break; case 'c': comp_file = optarg; break; case 'i': indel_dist_file = optarg; break; case 'l': query_range_file = optarg; break; case 'x': summary_stats_file = optarg; break; /* statistical parameters */ case 'y': opts.be_par.min_dirichlet_dist = SQRT2 * strtod_errmsg(optarg, "-y (min_dirichlet_dist)"); break; case 'X': opts.be_par.post_confidence = strtod_errmsg(optarg, "-X (post_confidence)"); break; case 'Z': opts.be_par.beta_confidence = strtod_errmsg(optarg, "-Z (beta_confidence)"); break; case 'P': opts.be_par.max_sample_points = (unsigned)strtod_errmsg(optarg, "-f (max_sample_points)"); break; case 'p': opts.dc_par.prior_alpha = strtod_errmsg(optarg, "-p (prior_alpha)"); break; case 'C': quantiles_string = optarg; break; case 'S': opts.dc_par.n_max_survey_loci = strtod_errmsg(optarg, "-S (n_max_survey_loci)"); break; /* read-level filtering */ case 'Q': opts.bf_par.min_base_quality = strtol_errmsg(optarg, "-Q (min_base_quality)"); break; case 'q': opts.bf_par.min_map_quality = strtol_errmsg(optarg, "-q (min_map_quality)"); break; case 'f': opts.bf_par.rflag_require = strtol_errmsg(optarg, "-f (rflag_require)"); break; case 'F': opts.bf_par.rflag_filter = strtol_errmsg(optarg, "-F (rflag_filter)"); break; case 'R': readgroup_file = optarg; break; /* general */ case 't': opts.n_threads = strtol_errmsg(optarg, "-t (n_threads)"); break; case 'r': opts.n_max_reading = strtol_errmsg(optarg, "-r (n_max_reading)"); n_max_reading_set = 1; break; case 'm': opts.max_mem = (size_t)strtod_errmsg(optarg, "-m (max_mem)"); break; case 'g': opts.ld_par.do_print_pileup = 1; break; default: return dist_usage(); break; } } if (argc - optind != 3) return dist_usage(); if (! n_max_reading_set) opts.n_max_reading = opts.n_threads; /* This adjustment makes max_sample_points a multiple of GEN_POINTS_BATCH */ opts.be_par.max_sample_points += GEN_POINTS_BATCH - (opts.be_par.max_sample_points % GEN_POINTS_BATCH); const char *samples_file = argv[optind]; const char *sample_pairs_file = argv[optind + 1]; const char *fasta_file = argv[optind + 2]; setvbuf(stdout, NULL, _IONBF, 0); printf("\n"); /* So progress messages don't interfere with shell prompt. */ if (! dist_file && ! comp_file && ! indel_dist_file) { fprintf(stderr, "Error: You must provide at least one of -d or -c or -i. " "Otherwise, there is nothing to calculate\n"); exit(5); } /* parse readgroups file */ FILE *readgroup_fh = open_if_present(readgroup_file, "r"); if (readgroup_fh) { opts.bf_par.readgroup_include_hash = init_readgroup_file(readgroup_fh); fclose(readgroup_fh); } gsl_set_error_handler_off(); FILE *dist_fh = open_if_present(dist_file, "w"); FILE *comp_fh = open_if_present(comp_file, "w"); FILE *indel_fh = open_if_present(indel_dist_file, "w"); opts.ld_par.do_dist = (dist_fh != NULL); opts.ld_par.do_comp = (comp_fh != NULL); opts.ld_par.do_indel = (indel_fh != NULL); /* resolve overlap between parameter sets */ opts.ld_par.max_sample_points = opts.be_par.max_sample_points; opts.ld_par.post_confidence = opts.be_par.post_confidence; opts.ld_par.min_dirichlet_dist = opts.be_par.min_dirichlet_dist; opts.dc_par.fasta_file = fasta_file; opts.dc_par.max_sample_points = opts.be_par.max_sample_points; /* allot fractions of main memory to points and input buffers. bounds and output buffers will be negligible */ #define FRAC_MEM_POINTSETS 0.9 size_t max_point_sets = opts.max_mem / (sizeof(POINT) * opts.be_par.max_sample_points); opts.dc_par.n_point_sets = max_point_sets * FRAC_MEM_POINTSETS; #define INPUT_MEM_FRACTION 0.05 /* allot a fraction of total memory to input buffers. */ unsigned long max_input_mem = opts.max_mem * INPUT_MEM_FRACTION; parse_csv_line(quantiles_string, opts.ld_par.quantiles, &opts.ld_par.n_quantiles, MAX_NUM_QUANTILES); struct thread_queue *tqueue = locus_diff_init(samples_file, sample_pairs_file, query_range_file, fasta_file, opts.n_threads, opts.n_max_reading, max_input_mem, opts.ld_par, opts.be_par, opts.dc_par, opts.bf_par, opts.bp_par, dist_fh, comp_fh, indel_fh); printf("Starting input processing.\n"); thread_queue_run(tqueue); if (summary_stats_file) print_pair_stats(summary_stats_file); if (dist_fh) fclose(dist_fh); if (comp_fh) fclose(comp_fh); if (indel_fh) fclose(indel_fh); locus_diff_free(tqueue); if (opts.bf_par.readgroup_include_hash) free_readgroup_hash(opts.bf_par.readgroup_include_hash); printf("Finished.\n"); return 0; }