static inline KOccurRun* fetch_ref_contact(const GraphCache *cache, uint32_t pathid, const PathRefRun *ref_runs, KOccurRunBuffer *runbuf) { // Get path const GCachePath *path = graph_cache_path(cache, pathid); const GCacheStep *steps = graph_cache_step(cache, path->first_step); size_t num_steps = path->num_steps; // Get runs along the ref PathRefRun ref_run = ref_runs[pathid]; size_t num_runs = ref_run.num_runs; KOccurRun *koruns = runbuf->data + ref_run.first_runid; koruns_sort_by_qoffset(koruns, num_runs); // Set qoffset to be the kmer offset in the path size_t r, s, offset = 0; for(s = r = 0; s < num_steps; s++) { for(; r < num_runs && koruns[r].qoffset == s; r++) { koruns[r].qoffset = offset; } if(r == num_runs) break; const GCacheSnode *snode = graph_cache_snode(cache, steps[s].supernode); offset += snode->num_nodes; } return koruns; }
// If `pickup_new_runs` is true we pick up runs starting at this supernode static inline bool gcrawler_stop_at_ref_covg(const GraphCache *cache, const GCacheStep *step, BreakpointCaller *caller, KOccurRunBuffer *koruns, KOccurRunBuffer *koruns_ended, bool pickup_new_runs) { GCacheSnode *snode = graph_cache_snode(cache, step->supernode); GCachePath *path = graph_cache_path(cache, step->pathid); const dBNode *nodes = graph_cache_first_node(cache, snode); bool forward = (step->orient == FORWARD); // Use index of last step as qoffset size_t qoffset = path->num_steps-1; // Kmer occurance runs are added to koruns_3p_ended only if they end and are // longer than the mininum length in kmers (caller->min_ref_nkmers) kograph_filter_extend(caller->kograph, nodes, snode->num_nodes, forward, caller->min_ref_nkmers, qoffset, koruns, koruns_ended, pickup_new_runs); size_t i, min_run_qoffset = SIZE_MAX, min_ended_run_qoffset = SIZE_MAX; for(i = 0; i < koruns->len; i++) min_run_qoffset = MIN2(min_run_qoffset, koruns->data[i].qoffset); // Stop if all our earliest runs have finished for(i = 0; i < koruns_ended->len; i++) { min_ended_run_qoffset = MIN2(min_ended_run_qoffset, koruns_ended->data[i].qoffset); } // Continue if... return min_run_qoffset <= min_ended_run_qoffset; }
// Constructs a path of supernodes (SupernodePath) // `wlk` GraphWalker should be set to go at `node` // `rptwlk` RepeatWalker should be clear // `jmpfunc` is called with each supernode traversed and if it returns true // we continue crawling, otherwise we stop. If NULL assume always true // returns pathid in GraphCache uint32_t graph_crawler_load_path(GraphCache *cache, dBNode node, GraphWalker *wlk, RepeatWalker *rptwlk, bool (*jmpfunc)(GraphCache *_cache, GCacheStep *_step, void *_arg), void *arg) { size_t i; uint32_t stepid, pathid = graph_cache_new_path(cache); ctx_assert(db_nodes_are_equal(wlk->node, node)); for(i = 0; ; i++) { stepid = graph_cache_new_step(cache, node); GCacheStep *step = graph_cache_step(cache, stepid); GCacheSnode *snode = graph_cache_snode(cache, step->supernode); // Traverse to the end of the supernode walk_supernode_end(cache, snode, step->orient, wlk); if(jmpfunc != NULL && !jmpfunc(cache, step, arg)) break; // Find next node uint8_t num_edges; const dBNode *next_nodes; Nucleotide next_bases[4]; if(step->orient == FORWARD) { num_edges = snode->num_next; next_nodes = snode->next_nodes; binary_seq_unpack_byte(next_bases, snode->next_bases); } else { num_edges = snode->num_prev; next_nodes = snode->prev_nodes; binary_seq_unpack_byte(next_bases, snode->prev_bases); } // Traverse to next supernode if(!graph_walker_next_nodes(wlk, num_edges, next_nodes, next_bases) || !rpt_walker_attempt_traverse(rptwlk, wlk)) break; node = wlk->node; } return pathid; }
static void write_bubbles_to_file(BubbleCaller *caller) { // Loop over supernodes checking if they are 3p flanks size_t snode_count = graph_cache_num_snodes(&caller->cache); GCacheSnode *snode; size_t i; for(i = 0; i < snode_count; i++) { snode = graph_cache_snode(&caller->cache, i); find_bubbles_ending_with(caller, snode); if(caller->spp_forward.len > 1) print_bubble(caller, caller->spp_forward.b, caller->spp_forward.len); if(caller->spp_reverse.len > 1) print_bubble(caller, caller->spp_reverse.b, caller->spp_reverse.len); } }
void graph_crawler_reset_rpt_walker(RepeatWalker *rptwlk, const GraphCache *cache, uint32_t pathid) { rpt_walker_fast_clear(rptwlk, NULL, 0); const GCachePath *path = graph_cache_path(cache, pathid); const GCacheStep *step = graph_cache_step(cache, path->first_step), *endstep; const GCacheSnode *snode; const dBNode *node0, *node1; // Loop over supernodes in the path for(endstep = step + path->num_steps; step < endstep; step++) { // We don't care about orientation here snode = graph_cache_snode(cache, step->supernode); node0 = graph_cache_first_node(cache, snode); node1 = graph_cache_last_node(cache, snode); rpt_walker_fast_clear_single_node(rptwlk, *node0); rpt_walker_fast_clear_single_node(rptwlk, *node1); } }
// Potential bubble - filter ref and duplicate alleles static void print_bubble(BubbleCaller *caller, GCacheStep **steps, size_t num_paths) { const BubbleCallingPrefs prefs = caller->prefs; const dBGraph *db_graph = caller->db_graph; GCacheSnode *snode; size_t i; dBNodeBuffer *flank5p = &caller->flank5p; if(flank5p->len == 0) { // Haven't fetched 5p flank yet // flank5p[0] already contains the first node flank5p->len = 1; supernode_extend(flank5p, prefs.max_flank_len, db_graph); db_nodes_reverse_complement(flank5p->b, flank5p->len); } // // Print Bubble // // write to string buffer then flush to gzFile StrBuf *sbuf = &caller->output_buf; strbuf_reset(sbuf); // Temporary node buffer to use dBNodeBuffer *pathbuf = &caller->pathbuf; db_node_buf_reset(pathbuf); // Get bubble number (threadsafe num_bubbles_ptr++) size_t id = __sync_fetch_and_add((volatile size_t*)caller->num_bubbles_ptr, 1); // This can be set to anything without a '.' in it const char prefix[] = "call"; // 5p flank // strbuf_sprintf(sbuf, ">bubble.%s%zu.5pflank kmers=%zu\n", prefix, id, flank5p->len); strbuf_append_str(sbuf, ">bubble."); strbuf_append_str(sbuf, prefix); strbuf_append_ulong(sbuf, id); strbuf_append_str(sbuf, ".5pflank kmers="); strbuf_append_ulong(sbuf, flank5p->len); strbuf_append_char(sbuf, '\n'); branch_to_str(flank5p->b, flank5p->len, true, sbuf, db_graph); // 3p flank db_node_buf_reset(pathbuf); snode = graph_cache_snode(&caller->cache, steps[0]->supernode); graph_cache_snode_fetch_nodes(&caller->cache, snode, steps[0]->orient, pathbuf); // strbuf_sprintf(sbuf, ">bubble.%s%zu.3pflank kmers=%zu\n", prefix, id, pathbuf->len); strbuf_append_str(sbuf, ">bubble."); strbuf_append_str(sbuf, prefix); strbuf_append_ulong(sbuf, id); strbuf_append_str(sbuf, ".3pflank kmers="); strbuf_append_ulong(sbuf, pathbuf->len); strbuf_append_char(sbuf, '\n'); branch_to_str(pathbuf->b, pathbuf->len, false, sbuf, db_graph); // Print alleles for(i = 0; i < num_paths; i++) { db_node_buf_reset(pathbuf); graph_cache_step_fetch_nodes(&caller->cache, steps[i], pathbuf); // strbuf_sprintf(sbuf, ">bubble.%s%zu.branch.%zu kmers=%zu\n", // prefix, id, i, pathbuf->len); strbuf_append_str(sbuf, ">bubble."); strbuf_append_str(sbuf, prefix); strbuf_append_ulong(sbuf, id); strbuf_append_str(sbuf, ".branch."); strbuf_append_ulong(sbuf, i); strbuf_append_str(sbuf, " kmers="); strbuf_append_ulong(sbuf, pathbuf->len); strbuf_append_char(sbuf, '\n'); branch_to_str(pathbuf->b, pathbuf->len, false, sbuf, db_graph); } strbuf_append_char(sbuf, '\n'); ctx_assert(strlen(sbuf->b) == sbuf->end); // lock, print, unlock pthread_mutex_lock(caller->out_lock); gzwrite(caller->gzout, sbuf->b, sbuf->end); pthread_mutex_unlock(caller->out_lock); }