// Returns number of types. int load_hla_csv(const char *path, char ***bools_ptr, int num_rows) { assert(num_rows > 0); StrBuf line; strbuf_alloc(&line, 1024); FILE *fh = fopen(path, "r"); if(fh == NULL) die("Cannot open file: %s.", path); if(strbuf_readline(&line, fh) == 0) die("Empty CSV file: %s.", path); int num_types = count_char(line.b, ','); char **bools = my_malloc(sizeof(char*) * num_rows, __FILE__, __LINE__); char *data = my_malloc(sizeof(char) * num_rows * (num_types+1), __FILE__, __LINE__); printf("Number of rows: %i.\n",num_rows); int i; for(i = 0; i < num_rows && strbuf_reset_readline(&line, fh); i++) { strbuf_chomp(&line); bools[i] = data + i * (num_types+1); load_comma_bool_line(line.b, bools[i], num_types); bools[i][num_types] = '\0'; } if(i < num_rows) die("Not enough rows in CSV file: %s.", path); fclose(fh); strbuf_dealloc(&line); *bools_ptr = bools; return num_types; }
void error_correct_list_of_files(StrBuf* list_fastq,char quality_cutoff, char ascii_qual_offset, dBGraphEc *db_graph, HandleLowQualUncorrectable policy, int max_read_len, int min_read_len, StrBuf* suffix, char* outdir, boolean add_greedy_bases_for_better_bwt_compression, int num_greedy_bases, boolean rev_comp_read_if_on_reverse_strand) { printf("error correct list of files\n"); fflush(stdout); int len = max_read_len+2; uint64_t* distrib_num_bases_corrected =(uint64_t*) malloc(sizeof(uint64_t)*len); uint64_t* distrib_position_bases_corrected=(uint64_t*) malloc(sizeof(uint64_t)*len); if ( (distrib_num_bases_corrected==NULL)|| (distrib_position_bases_corrected==NULL)) { die("Unable to alloc arrays for keeping stats. Your machine must have hardly any spare memory\n"); } set_uint64_t_array(distrib_num_bases_corrected, len, (uint64_t) 0); set_uint64_t_array(distrib_position_bases_corrected, len, (uint64_t) 0); FILE* list_fastq_fp = fopen(list_fastq->buff, "r"); if (list_fastq_fp==NULL) { printf("Cannot open file %s\n", list_fastq->buff); } StrBuf *next_fastq = strbuf_new(); StrBuf* corrected_file = strbuf_new(); StrBuf* corrected_file_newpath = strbuf_new(); while(strbuf_reset_readline(next_fastq, list_fastq_fp)) { strbuf_chomp(next_fastq); if(strbuf_len(next_fastq) > 0) { strbuf_reset(corrected_file); strbuf_reset(corrected_file_newpath); strbuf_copy(corrected_file, 0,//dest next_fastq,0,strbuf_len(next_fastq)); strbuf_append_str(corrected_file, suffix->buff); char* corrected_file_basename = basename(corrected_file->buff); strbuf_append_str(corrected_file_newpath, outdir); strbuf_append_str(corrected_file_newpath, corrected_file_basename); error_correct_file_against_graph(next_fastq->buff, quality_cutoff, ascii_qual_offset, db_graph, corrected_file_newpath->buff, distrib_num_bases_corrected, distrib_position_bases_corrected, len, min_read_len, policy, add_greedy_bases_for_better_bwt_compression, num_greedy_bases, rev_comp_read_if_on_reverse_strand); } } fclose(list_fastq_fp); strbuf_free(next_fastq); strbuf_free(corrected_file); strbuf_free(corrected_file_newpath); free(distrib_num_bases_corrected); free(distrib_position_bases_corrected); }