void graph_crawler_alloc(GraphCrawler *crawler, const dBGraph *db_graph) { ctx_assert(db_graph->node_in_cols != NULL); size_t ncols = db_graph->num_of_cols; int *col_paths = ctx_calloc(ncols, sizeof(int)); GCMultiColPath *multicol_paths = ctx_calloc(ncols, sizeof(GCMultiColPath)); GCUniColPath *unicol_paths = ctx_calloc(ncols, sizeof(GCUniColPath)); uint32_t *col_list = ctx_calloc(ncols, sizeof(uint32_t)); GraphCrawler tmp = {.num_paths = 0, .col_paths = col_paths, .multicol_paths = multicol_paths, .unicol_paths = unicol_paths, .col_list = col_list}; memcpy(crawler, &tmp, sizeof(GraphCrawler)); graph_cache_alloc(&crawler->cache, db_graph); graph_walker_alloc(&crawler->wlk, db_graph); rpt_walker_alloc(&crawler->rptwlk, db_graph->ht.capacity, 22); // 4MB } void graph_crawler_dealloc(GraphCrawler *crawler) { ctx_free(crawler->col_paths); ctx_free(crawler->multicol_paths); ctx_free(crawler->unicol_paths); ctx_free(crawler->col_list); graph_cache_dealloc(&crawler->cache); graph_walker_dealloc(&crawler->wlk); rpt_walker_dealloc(&crawler->rptwlk); memset(crawler, 0, sizeof(GraphCrawler)); // reset }
static void run_exp_abc(const dBGraph *db_graph, bool prime_AB, size_t nthreads, size_t num_repeats, size_t max_AB_dist, bool print_failed_contigs) { ExpABCWorker *wrkrs = ctx_calloc(nthreads, sizeof(ExpABCWorker)); size_t i, j; if(max_AB_dist == 0) max_AB_dist = SIZE_MAX; for(i = 0; i < nthreads; i++) { wrkrs[i].colour = 0; wrkrs[i].nthreads = nthreads; wrkrs[i].db_graph = db_graph; wrkrs[i].prime_AB = prime_AB; wrkrs[i].num_limit = num_repeats / nthreads; wrkrs[i].max_AB_dist = max_AB_dist; wrkrs[i].print_failed_contigs = print_failed_contigs; db_node_buf_alloc(&wrkrs[i].nbuf, 1024); graph_walker_alloc(&wrkrs[i].gwlk, db_graph); rpt_walker_alloc(&wrkrs[i].rptwlk, db_graph->ht.capacity, 22); // 4MB } util_run_threads(wrkrs, nthreads, sizeof(ExpABCWorker), nthreads, run_exp_abc_thread); // Merge results size_t num_tests = 0, results[NUM_RESULT_VALUES] = {0}; size_t ab_fail_state[GRPHWLK_NUM_STATES] = {0}; size_t bc_fail_state[GRPHWLK_NUM_STATES] = {0}; for(i = 0; i < nthreads; i++) { num_tests += wrkrs[i].num_tests; for(j = 0; j < NUM_RESULT_VALUES; j++) results[j] += wrkrs[i].results[j]; for(j = 0; j < GRPHWLK_NUM_STATES; j++) ab_fail_state[j] += wrkrs[i].ab_fail_state[j]; for(j = 0; j < GRPHWLK_NUM_STATES; j++) bc_fail_state[j] += wrkrs[i].bc_fail_state[j]; db_node_buf_dealloc(&wrkrs[i].nbuf); graph_walker_dealloc(&wrkrs[i].gwlk); rpt_walker_dealloc(&wrkrs[i].rptwlk); } // Print results char nrunstr[50]; ulong_to_str(num_tests, nrunstr); status("Ran %s tests with %zu threads", nrunstr, nthreads); const char *titles[] = {"RES_ABC_SUCCESS", "RES_AB_WRONG", "RES_AB_FAILED", "RES_BC_WRONG", "RES_BC_FAILED", "RES_BC_OVERSHOT", "RES_LOST_IN_RPT", "RES_NO_TRAVERSAL"}; util_print_nums(titles, results, NUM_RESULT_VALUES, 30); status("AB_FAILED:"); graph_step_print_state_hist(ab_fail_state); status("BC_FAILED:"); graph_step_print_state_hist(bc_fail_state); ctx_free(wrkrs); }
static void test_repeat_loop() { TASSERT(sizeof(FollowPath) == 20); // Construct 1 colour graph with kmer-size=11 dBGraph graph; size_t kmer_size = 11, ncols = 1; // Set up alignment correction params CorrectAlnParam params = {.ctpcol = 0, .ctxcol = 0, .ins_gap_min = 0, .ins_gap_max = 0, .one_way_gap_traverse = true, .use_end_check = true, .max_context = 10, .gap_variance = 0.1, .gap_wiggle = 5}; // Sequence with repeat char seq[] = "ATTTGGAACTCCGGA" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "GATAGGGCCAGT" "CGTCAGGAGCTAACT"; char p0[] = "ATTTGGAACTCCGGA""GATAGGGCCAGT""GATAGGGCCAGT"; char p1[] = "GATAGGGCCAGT""GATAGGGCCAGT""CGTCAGGAGCTAACT"; // Allocate graph, but don't add any sequence _construct_graph_with_paths(&graph, kmer_size, ncols, NULL, 0, params); GenPathWorker *gen_path_wrkr = gen_paths_workers_alloc(1, &graph, NULL); GraphWalker gwlk; RepeatWalker rptwlk; graph_walker_alloc(&gwlk); rpt_walker_alloc(&rptwlk, graph.ht.capacity, 12); dBNodeBuffer nbuf; db_node_buf_alloc(&nbuf, 1024); // Construct graph but no paths build_graph_from_str_mt(&graph, 0, seq, strlen(seq)); TASSERT2(graph.ht.num_kmers == 15+12+15, "%zu", (size_t)graph.ht.num_kmers); // Find first node in sequence dBNode node0 = db_graph_find_str(&graph, seq); TASSERT(node0.key != HASH_NOT_FOUND); // 1) With no paths char ans0[] = "ATTTGGAACTCCGGA""GATAGGGCCAGT"; test_walk(&gwlk, &rptwlk, node0, &nbuf, &graph, 15+2, ans0); // 2) Add small paths - produces collapsed down seq with two copy repeat gen_paths_from_str_mt(gen_path_wrkr, p0, params); gen_paths_from_str_mt(gen_path_wrkr, p1, params); char ans1[] = "ATTTGGAACTCCGGA""GATAGGGCCAGT""GATAGGGCCAGT""CGTCAGGAGCTAACT"; test_walk(&gwlk, &rptwlk, node0, &nbuf, &graph, 15+12+12+5, ans1); // 3) Add long paths gen_paths_from_str_mt(gen_path_wrkr, seq, params); test_walk(&gwlk, &rptwlk, node0, &nbuf, &graph, strlen(seq)+1-kmer_size, seq); graph_walker_dealloc(&gwlk); rpt_walker_dealloc(&rptwlk); db_node_buf_dealloc(&nbuf); gen_paths_workers_dealloc(gen_path_wrkr, 1); db_graph_dealloc(&graph); } void test_repeat_walker() { test_status("Testing repeat_walker.h"); test_repeat_loop(); }
BubbleCaller* bubble_callers_new(size_t num_callers, BubbleCallingPrefs prefs, gzFile gzout, const dBGraph *db_graph) { ctx_assert(num_callers > 0); // Max usage is 4 * max_allele_len * cols size_t i; size_t max_path_len = MAX2(prefs.max_flank_len, prefs.max_allele_len); BubbleCaller *callers = ctx_malloc(num_callers * sizeof(BubbleCaller)); pthread_mutex_t *out_lock = ctx_malloc(sizeof(pthread_mutex_t)); if(pthread_mutex_init(out_lock, NULL) != 0) die("mutex init failed"); size_t *num_bubbles_ptr = ctx_calloc(1, sizeof(size_t)); for(i = 0; i < num_callers; i++) { BubbleCaller tmp = {.threadid = i, .nthreads = num_callers, .haploid_seen = ctx_calloc(1+prefs.num_haploid, sizeof(bool)), .num_bubbles_ptr = num_bubbles_ptr, .prefs = prefs, .db_graph = db_graph, .gzout = gzout, .out_lock = out_lock}; memcpy(&callers[i], &tmp, sizeof(BubbleCaller)); // First two buffers don't actually need to grow db_node_buf_alloc(&callers[i].flank5p, prefs.max_flank_len); db_node_buf_alloc(&callers[i].pathbuf, max_path_len); graph_walker_alloc(&callers[i].wlk, db_graph); rpt_walker_alloc(&callers[i].rptwlk, db_graph->ht.capacity, 22); // 4MB graph_cache_alloc(&callers[i].cache, db_graph); cache_stepptr_buf_alloc(&callers[i].spp_forward, 1024); cache_stepptr_buf_alloc(&callers[i].spp_reverse, 1024); strbuf_alloc(&callers[i].output_buf, 2048); } return callers; } void bubble_callers_destroy(BubbleCaller *callers, size_t num_callers) { ctx_assert(num_callers > 0); size_t i; for(i = 0; i < num_callers; i++) { ctx_free(callers[i].haploid_seen); db_node_buf_dealloc(&callers[i].flank5p); db_node_buf_dealloc(&callers[i].pathbuf); rpt_walker_dealloc(&callers[i].rptwlk); graph_walker_dealloc(&callers[i].wlk); graph_cache_dealloc(&callers[i].cache); cache_stepptr_buf_dealloc(&callers[i].spp_forward); cache_stepptr_buf_dealloc(&callers[i].spp_reverse); strbuf_dealloc(&callers[i].output_buf); } pthread_mutex_destroy(callers[0].out_lock); ctx_free(callers[0].out_lock); ctx_free(callers[0].num_bubbles_ptr); ctx_free(callers); }