// `nbuf` and `sbuf` are temporary variables used by this function static void _call_bubble(BubbleCaller *caller, const char *flank5p, const char *flank3p, const char **alleles, size_t num_alleles, dBNodeBuffer *nbuf, StrBuf *sbuf) { const dBGraph *graph = caller->db_graph; const size_t kmer_size = graph->kmer_size; dBNode node5p = db_graph_find_str(graph, flank5p+strlen(flank5p)-kmer_size); dBNode node3p = db_graph_find_str(graph, flank3p); TASSERT(node5p.key != HASH_NOT_FOUND); TASSERT(node3p.key != HASH_NOT_FOUND); Edges edges5p = db_node_get_edges_union(graph, node5p.key); Edges edges3p = db_node_get_edges_union(graph, node3p.key); TASSERT(edges_get_outdegree(edges5p, node5p.orient) > 1); TASSERT(edges_get_indegree(edges3p, node3p.orient) > 1); find_bubbles(caller, node5p); GCacheUnitig *snode3p; Orientation snorient3p; GCacheStepPtrBuf *stepbuf; // Get 3p flank and orientation snode3p = graph_cache_find_unitig(&caller->cache, node3p); TASSERT(snode3p != NULL); snorient3p = gc_unitig_get_orient(&caller->cache, snode3p, node3p); find_bubbles_ending_with(caller, snode3p); stepbuf = (snorient3p == FORWARD ? &caller->spp_forward : &caller->spp_reverse); _check_alleles(&caller->cache, stepbuf, alleles, num_alleles, nbuf, sbuf); }
static inline bool nodes_are_tip(dBNodeBuffer nbuf, const dBGraph *db_graph) { Edges first = db_node_get_edges_union(db_graph, nbuf.b[0].key); Edges last = db_node_get_edges_union(db_graph, nbuf.b[nbuf.len-1].key); int in = edges_get_indegree(first, nbuf.b[0].orient); int out = edges_get_outdegree(last, nbuf.b[nbuf.len-1].orient); return (in+out <= 1); }
/* * Get position after current segment. A segement is a stretch of kmers aligned * to the graph with no gaps. * @param aln alignment of read to the graph * @param start offset in the alignment that starts this segments * @param missing_edge set to 1 if the segment was ended due to a missing edge * @return index of node after next gap or aln->nodes.len if no more gaps */ size_t db_alignment_next_gap(const dBAlignment *aln, size_t start, bool *missing_edge, const dBGraph *db_graph) { size_t i, end = aln->rpos.len; const int32_t *rpos = aln->rpos.b; const dBNode *nodes = aln->nodes.b; int colour = aln->colour; Edges edges; Nucleotide nuc; *missing_edge = false; if(end == 0) return 0; // Set upper bound on position of the next gap as within this read if(aln->used_r1 && aln->used_r2 && start < aln->r2strtidx) end = aln->r2strtidx; for(i = start+1; i < end && rpos[i-1]+1 == rpos[i]; i++) { // Check for a missing edge edges = colour < 0 ? db_node_get_edges_union(db_graph, nodes[i-1].key) : db_node_get_edges(db_graph, nodes[i-1].key, colour); nuc = db_node_get_last_nuc(nodes[i], db_graph); if(!edges_has_edge(edges, nuc, nodes[i-1].orient)) { *missing_edge = true; break; } } // Return position after gap return i; }
// Print in/outdegree - For debugging mostly // indegree/outdegree (2 means >=2) // 00: ! 01: + 02: { // 10: - 11: = 12: < // 20: } 21: > 22: * void db_nodes_print_edges(const dBNode *nodes, size_t num, const dBGraph *db_graph, FILE *out) { size_t i, indegree, outdegree; Edges edges; const char symbols[3][3] = {"!+{","-=<","}>*"}; for(i = 0; i < num; i++) { edges = db_node_get_edges_union(db_graph, nodes[i].key); indegree = MIN2(edges_get_indegree(edges, nodes[i].orient), 2); outdegree = MIN2(edges_get_outdegree(edges, nodes[i].orient), 2); fputc(symbols[indegree][outdegree], out); } }
static bool supernode_is_closed_cycle(const dBNode *nlist, size_t len, BinaryKmer bkmer0, BinaryKmer bkmer1, const dBGraph *db_graph) { Edges edges0, edges1; BinaryKmer shiftkmer; Nucleotide nuc; const size_t kmer_size = db_graph->kmer_size; edges0 = db_node_get_edges_union(db_graph, nlist[0].key); if(edges_get_indegree(edges0, nlist[0].orient) != 1) return false; edges1 = db_node_get_edges_union(db_graph, nlist[len-1].key); if(edges_get_outdegree(edges1, nlist[len-1].orient) != 1) return false; nuc = bkmer_get_last_nuc(bkmer0, nlist[0].orient, kmer_size); shiftkmer = bkmer_shift_add_last_nuc(bkmer1, nlist[len-1].orient, kmer_size, nuc); if(binary_kmers_are_equal(bkmer0, shiftkmer)) return true; shiftkmer = binary_kmer_reverse_complement(shiftkmer, kmer_size); return binary_kmers_are_equal(bkmer0, shiftkmer); }
// Extend a supernode, nlist[offset] must already be set // Walk along nodes starting from node/or, storing the supernode in nlist // Returns the number of nodes added, adds no more than `limit` // return false if out of space and limit > 0 bool supernode_extend(dBNodeBuffer *nbuf, size_t limit, const dBGraph *db_graph) { ctx_assert(nbuf->len > 0); const size_t kmer_size = db_graph->kmer_size; dBNode node0 = nbuf->data[0], node1 = nbuf->data[nbuf->len-1], node = node1; BinaryKmer bkmer = db_node_oriented_bkmer(db_graph, node); Edges edges = db_node_get_edges_union(db_graph, node.key); Nucleotide nuc; while(edges_has_precisely_one_edge(edges, node.orient, &nuc)) { bkmer = binary_kmer_left_shift_add(bkmer, kmer_size, nuc); node = db_graph_find(db_graph, bkmer); edges = db_node_get_edges_union(db_graph, node.key); ctx_assert(node.key != HASH_NOT_FOUND); if(edges_has_precisely_one_edge(edges, rev_orient(node.orient), &nuc)) { if(node.key == node0.key || node.key == nbuf->data[nbuf->len-1].key) { // don't create a loop A->B->A or a->b->B->A break; } if(limit && nbuf->len >= limit) return false; db_node_buf_add(nbuf, node); } else break; } return true; }