static void inputs_attempt_open() { bool err_occurred = false; size_t i; for(i = 0; i < inputs.len && !err_occurred; i++) { AlignReadsData *input = &inputs.b[i]; err_occurred = !seqout_open(&input->seqout, input->out_base, input->fmt, // input->use_fq ? SEQ_FMT_FASTQ : SEQ_FMT_FASTQ, asyncio_task_is_pe(&files.b[i])); } if(err_occurred) { for(i = 0; i < inputs.len; i++) seqout_close(&inputs.b[i].seqout, true); die("Error creating output files"); } }
int ctx_correct(int argc, char **argv) { size_t i; struct ReadThreadCmdArgs args; read_thread_args_alloc(&args); read_thread_args_parse(&args, argc, argv, longopts, true); GraphFileReader *gfile = &args.gfile; GPathFileBuffer *gpfiles = &args.gpfiles; CorrectAlnInputBuffer *inputs = &args.inputs; // Update colours in graph file - sample in 0, all others in 1 size_t ncols = gpath_load_sample_pop(gfile, 1, gpfiles->b, gpfiles->len, args.colour); // Check for compatibility between graph files and link files graphs_gpaths_compatible(gfile, 1, gpfiles->b, gpfiles->len, 1); int64_t ctx_num_kmers = gfile->num_of_kmers; // // Decide on memory // size_t bits_per_kmer, kmers_in_hash, graph_mem, path_mem, total_mem; // 1 bit needed per kmer if we need to keep track of noreseed bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(Edges)*8 + (gpfiles->len > 0 ? sizeof(GPath*)*8 : 0) + ncols; // in colour kmers_in_hash = cmd_get_kmers_in_hash(args.memargs.mem_to_use, args.memargs.mem_to_use_set, args.memargs.num_kmers, args.memargs.num_kmers_set, bits_per_kmer, ctx_num_kmers, ctx_num_kmers, false, &graph_mem); // Paths memory size_t rem_mem = args.memargs.mem_to_use - MIN2(args.memargs.mem_to_use, graph_mem); path_mem = gpath_reader_mem_req(gpfiles->b, gpfiles->len, ncols, rem_mem, false, kmers_in_hash, false); cmd_print_mem(path_mem, "paths"); // Shift path store memory from graphs->paths graph_mem -= sizeof(GPath*)*kmers_in_hash; path_mem += sizeof(GPath*)*kmers_in_hash; // Total memory total_mem = graph_mem + path_mem; cmd_check_mem_limit(args.memargs.mem_to_use, total_mem); // // Check we can write all output files // // Open output files SeqOutput *outputs = ctx_calloc(inputs->len, sizeof(SeqOutput)); bool err_occurred = false; for(i = 0; i < inputs->len && !err_occurred; i++) { CorrectAlnInput *input = &inputs->b[i]; // We loaded target colour into colour zero input->crt_params.ctxcol = input->crt_params.ctpcol = 0; bool is_pe = asyncio_task_is_pe(&input->files); err_occurred = !seqout_open(&outputs[i], input->out_base, args.fmt, is_pe); input->output = &outputs[i]; } // Abandon if some of the output files already exist if(err_occurred) { for(i = 0; i < inputs->len; i++) seqout_close(&outputs[i], true); die("Error creating output files"); } // // Allocate memory // dBGraph db_graph; db_graph_alloc(&db_graph, gfile->hdr.kmer_size, ncols, 1, kmers_in_hash, DBG_ALLOC_EDGES | DBG_ALLOC_NODE_IN_COL); // Create a path store that does not tracks path counts gpath_reader_alloc_gpstore(gpfiles->b, gpfiles->len, path_mem, false, &db_graph); // // Load Graph and link files // GraphLoadingPrefs gprefs = graph_loading_prefs(&db_graph); gprefs.empty_colours = true; // Load graph, print stats, close file graph_load(gfile, gprefs, NULL); hash_table_print_stats_brief(&db_graph.ht); graph_file_close(gfile); // Load link files for(i = 0; i < gpfiles->len; i++) { gpath_reader_load(&gpfiles->b[i], GPATH_DIE_MISSING_KMERS, &db_graph); gpath_reader_close(&gpfiles->b[i]); } // // Run alignment // correct_reads(inputs->b, inputs->len, args.dump_seq_sizes, args.dump_frag_sizes, args.fq_zero, args.append_orig_seq, args.nthreads, &db_graph); // Close and free output files for(i = 0; i < inputs->len; i++) seqout_close(&outputs[i], false); ctx_free(outputs); // Closes input files read_thread_args_dealloc(&args); db_graph_dealloc(&db_graph); return EXIT_SUCCESS; }
int ctx_reads(int argc, char **argv) { parse_args(argc, argv); // // Open input graphs // GraphFileReader *gfiles = ctx_calloc(num_gfiles, sizeof(GraphFileReader)); size_t i, ctx_max_kmers = 0, ctx_sum_kmers = 0; graph_files_open(gfile_paths, gfiles, num_gfiles, &ctx_max_kmers, &ctx_sum_kmers); // Will exit and remove output files on error inputs_attempt_open(); // // Calculate memory use // size_t kmers_in_hash, graph_mem, bits_per_kmer = sizeof(BinaryKmer)*8; kmers_in_hash = cmd_get_kmers_in_hash(memargs.mem_to_use, memargs.mem_to_use_set, memargs.num_kmers, memargs.num_kmers_set, bits_per_kmer, ctx_max_kmers, ctx_sum_kmers, true, &graph_mem); cmd_check_mem_limit(memargs.mem_to_use, graph_mem); // // Set up graph // dBGraph db_graph; db_graph_alloc(&db_graph, gfiles[0].hdr.kmer_size, 1, 0, kmers_in_hash, 0); // Load graphs LoadingStats gstats = LOAD_STATS_INIT_MACRO; GraphLoadingPrefs gprefs = {.db_graph = &db_graph, .must_exist_in_graph = false, .empty_colours = true, .boolean_covgs = false}; for(i = 0; i < num_gfiles; i++) { file_filter_flatten(&gfiles[i].fltr, 0); graph_load(&gfiles[i], gprefs, &gstats); graph_file_close(&gfiles[i]); gprefs.empty_colours = false; } ctx_free(gfiles); status("Printing reads that do %stouch the graph\n", inputs.b[0].invert ? "not " : ""); // // Filter reads using async io // LoadingStats seq_stats = LOAD_STATS_INIT_MACRO; for(i = 0; i < inputs.len; i++) { inputs.b[i].stats = &seq_stats; inputs.b[i].db_graph = &db_graph; } // Deal with a set of files at once size_t start, end; for(start = 0; start < inputs.len; start += MAX_IO_THREADS) { // Can have different numbers of inputs vs threads end = MIN2(inputs.len, start+MAX_IO_THREADS); asyncio_run_pool(files.b+start, end-start, filter_reads, NULL, nthreads, 0); } size_t total_reads_printed = 0; size_t total_reads = seq_stats.num_se_reads + seq_stats.num_pe_reads; for(i = 0; i < inputs.len; i++) total_reads_printed += inputs.b[i].num_of_reads_printed; for(i = 0; i < inputs.len; i++) { seqout_close(&inputs.b[i].seqout, false); asyncio_task_close(&files.b[i]); } aln_reads_buf_dealloc(&inputs); asyncio_buf_dealloc(&files); status("Total printed %zu / %zu (%.2f%%) reads\n", total_reads_printed, total_reads, total_reads ? (100.0 * total_reads_printed) / total_reads : 0.0); db_graph_dealloc(&db_graph); return EXIT_SUCCESS; }