// Returns true if one or more files passed loads data into colour bool graph_file_is_colour_loaded(size_t colour, const GraphFileReader *files, size_t num_files) { size_t i; for(i = 0; i < num_files; i++) { if(file_filter_iscolloaded(&files[i].fltr, colour)) return true; } return false; }
int ctx_correct(int argc, char **argv) { size_t i, j; struct ReadThreadCmdArgs args = READ_THREAD_CMD_ARGS_INIT; read_thread_args_alloc(&args); read_thread_args_parse(&args, argc, argv, longopts, true); GraphFileReader *gfile = &args.gfile; PathFileBuffer *pfiles = &args.pfiles; CorrectAlnInputBuffer *inputs = &args.inputs; size_t ctx_total_cols = gfile->hdr.num_of_cols; size_t ctx_num_kmers = gfile->num_of_kmers; if(args.colour > ctx_total_cols) cmd_print_usage("-c %zu is too big [> %zu]", args.colour, ctx_total_cols); size_t ctp_usedcols = 0; for(i = 0; i < pfiles->len; i++) { if(!file_filter_iscolloaded(&pfiles->data[i].fltr, args.colour)) { cmd_print_usage("Path file doesn't load into colour %zu: %s", args.colour, pfiles->data[i].fltr.orig_path.buff); } ctp_usedcols = MAX2(ctp_usedcols, path_file_usedcols(&pfiles->data[i])); } // // Decide on memory // size_t bits_per_kmer, kmers_in_hash, graph_mem, path_mem, total_mem; // 1 bit needed per kmer if we need to keep track of noreseed bits_per_kmer = sizeof(Edges)*8 + ctx_num_kmers + sizeof(uint64_t)*8; kmers_in_hash = cmd_get_kmers_in_hash2(args.memargs.mem_to_use, args.memargs.mem_to_use_set, args.memargs.num_kmers, args.memargs.num_kmers_set, bits_per_kmer, ctx_num_kmers, ctx_num_kmers, false, &graph_mem); // Paths memory path_mem = path_files_mem_required(pfiles->data, pfiles->len, false, false, ctp_usedcols, 0); cmd_print_mem(path_mem, "paths"); // Total memory total_mem = graph_mem + path_mem; cmd_check_mem_limit(args.memargs.mem_to_use, total_mem); // // Check we can read all output files // // Open output files SeqOutput *outputs = ctx_calloc(inputs->len, sizeof(SeqOutput)); bool output_files_exist = false; for(i = 0; i < inputs->len; i++) { CorrectAlnInput *input = &inputs->data[i]; input->crt_params.ctxcol = input->crt_params.ctpcol = args.colour; SeqOutput *output = &outputs[i]; seq_output_alloc(output); seq_output_set_paths(output, input->out_base, async_task_pe_output(&input->files)); input->output = output; // output check prints warnings and returns true if errors output_files_exist |= seq_output_files_exist_check(output); } // Abandon if some of the output files already exist if(output_files_exist) die("Output files already exist"); // Attempt to open all files for(i = 0; i < inputs->len && seq_output_open(&outputs[i]); i++) {} // Check if something went wrong - if so remove all output files if(i < inputs->len) { for(j = 0; j < i; j++) seq_output_delete(&outputs[i]); die("Couldn't open output files"); } // // Allocate memory // dBGraph db_graph; db_graph_alloc(&db_graph, gfile->hdr.kmer_size, ctx_total_cols, 1, kmers_in_hash); size_t bytes_per_col = roundup_bits2bytes(db_graph.ht.capacity); db_graph.col_edges = ctx_calloc(db_graph.ht.capacity, sizeof(Edges)); db_graph.node_in_cols = ctx_calloc(bytes_per_col * ctx_total_cols, 1); // Paths path_store_alloc(&db_graph.pstore, path_mem, false, db_graph.ht.capacity, ctp_usedcols); // // Load Graph and Path files // LoadingStats gstats = LOAD_STATS_INIT_MACRO; GraphLoadingPrefs gprefs = {.db_graph = &db_graph, .boolean_covgs = false, .must_exist_in_graph = false, .must_exist_in_edges = NULL, .empty_colours = true}; // Load graph, print stats, close file graph_load(gfile, gprefs, &gstats); hash_table_print_stats_brief(&db_graph.ht); graph_file_close(gfile); // Load path files (does nothing if num_fpiles == 0) paths_format_merge(pfiles->data, pfiles->len, false, false, args.num_of_threads, &db_graph); // // Run alignment // correct_reads(args.num_of_threads, MAX_IO_THREADS, inputs->data, inputs->len, &db_graph); // Close and free output files for(i = 0; i < inputs->len; i++) seq_output_dealloc(&outputs[i]); ctx_free(outputs); read_thread_args_dealloc(&args); db_graph_dealloc(&db_graph); return EXIT_SUCCESS; }