// Return 1 if changed; 0 otherwise bool infer_pop_edges(const BinaryKmer node_bkey, Edges *edges, const Covg *covgs, const dBGraph *db_graph) { Edges uedges = 0, iedges = 0xf, add_edges, edge; size_t orient, nuc, col, kmer_size = db_graph->kmer_size; const size_t ncols = db_graph->num_of_cols; BinaryKmer bkey, bkmer; hkey_t next; Edges newedges[ncols]; // char tmp[MAX_KMER_SIZE+1]; // binary_kmer_to_str(node_bkey, db_graph->kmer_size, tmp); // status("Inferring %s", tmp); for(col = 0; col < ncols; col++) { uedges |= edges[col]; // union of edges iedges &= edges[col]; // intersection of edges newedges[col] = edges[col]; } add_edges = uedges & ~iedges; if(!add_edges) return 0; for(orient = 0; orient < 2; orient++) { bkmer = (orient == FORWARD ? binary_kmer_left_shift_one_base(node_bkey, kmer_size) : binary_kmer_right_shift_one_base(node_bkey)); for(nuc = 0; nuc < 4; nuc++) { edge = nuc_orient_to_edge(nuc, orient); if(add_edges & edge) { // get next bkmer, look up in graph if(orient == FORWARD) binary_kmer_set_last_nuc(&bkmer, nuc); else binary_kmer_set_first_nuc(&bkmer, dna_nuc_complement(nuc), kmer_size); bkey = bkmer_get_key(bkmer, kmer_size); next = hash_table_find(&db_graph->ht, bkey); ctx_assert(next != HASH_NOT_FOUND); for(col = 0; col < ncols; col++) if(covgs[col] > 0 && db_node_has_col(db_graph, next, col)) newedges[col] |= edge; } } } int cmp = memcmp(edges, newedges, sizeof(Edges)*ncols); memcpy(edges, newedges, sizeof(Edges)*ncols); return (cmp != 0); }
// Return 1 if changed; 0 otherwise bool infer_all_edges(const BinaryKmer node_bkey, Edges *edges, const Covg *covgs, const dBGraph *db_graph) { Edges iedges = 0xff, edge; size_t orient, nuc, col, kmer_size = db_graph->kmer_size; const size_t ncols = db_graph->num_of_cols; BinaryKmer bkey, bkmer; hkey_t next; Edges newedges[ncols]; memcpy(newedges, edges, ncols * sizeof(Edges)); // intersection of edges for(col = 0; col < ncols; col++) iedges &= edges[col]; for(orient = 0; orient < 2; orient++) { bkmer = (orient == FORWARD ? binary_kmer_left_shift_one_base(node_bkey, kmer_size) : binary_kmer_right_shift_one_base(node_bkey)); for(nuc = 0; nuc < 4; nuc++) { edge = nuc_orient_to_edge(nuc, orient); if(!(iedges & edge)) { // edges are missing from some samples if(orient == FORWARD) binary_kmer_set_last_nuc(&bkmer, nuc); else binary_kmer_set_first_nuc(&bkmer, dna_nuc_complement(nuc), kmer_size); bkey = bkmer_get_key(bkmer, kmer_size); next = hash_table_find(&db_graph->ht, bkey); if(next != HASH_NOT_FOUND) { for(col = 0; col < ncols; col++) { if(covgs[col] > 0 && db_node_has_col(db_graph, next, col)) { newedges[col] |= edge; } } } } } } // Check if we changed the edges int cmp = memcmp(edges, newedges, sizeof(Edges)*ncols); memcpy(edges, newedges, sizeof(Edges)*ncols); return (cmp != 0); }
static char* binary_kmer_to_seq(uint64_t* bkmer, char * seq, int kmer_size, int num_of_bitfields) { uint64_t local_bkmer[num_of_bitfields]; int i; // Copy over a word at a time for(i = 0; i < num_of_bitfields; i++) { local_bkmer[i] = bkmer[i]; } // Loop backwards over bases for(i = kmer_size-1; i >= 0; i--) { seq[i] = binary_nucleotide_to_char(local_bkmer[num_of_bitfields-1] & 0x3); binary_kmer_right_shift_one_base(local_bkmer, num_of_bitfields); } seq[kmer_size] = '\0'; return seq; }
// if colour is -1 aligns to all colours, otherwise aligns to given colour only // Returns number of kmers lost from the end static size_t db_alignment_from_read(dBAlignment *aln, const read_t *r, uint8_t qcutoff, uint8_t hp_cutoff, const dBGraph *db_graph, int colour) { size_t contig_start, contig_end = 0, search_start = 0; const size_t kmer_size = db_graph->kmer_size; BinaryKmer bkmer, tmp_key; Nucleotide nuc; hkey_t node; size_t i, offset, nxtbse; dBNodeBuffer *nodes = &aln->nodes; Int32Buffer *rpos = &aln->rpos; ctx_assert(nodes->len == rpos->len); size_t n = nodes->len, init_len = n; db_node_buf_capacity(nodes, n + r->seq.end); int32_buf_capacity(rpos, n + r->seq.end); while((contig_start = seq_contig_start(r, search_start, kmer_size, qcutoff, hp_cutoff)) < r->seq.end) { contig_end = seq_contig_end(r, contig_start, kmer_size, qcutoff, hp_cutoff, &search_start); const char *contig = r->seq.b + contig_start; size_t contig_len = contig_end - contig_start; bkmer = binary_kmer_from_str(contig, kmer_size); bkmer = binary_kmer_right_shift_one_base(bkmer); for(offset=contig_start, nxtbse=kmer_size-1; nxtbse < contig_len; nxtbse++,offset++) { nuc = dna_char_to_nuc(contig[nxtbse]); bkmer = binary_kmer_left_shift_add(bkmer, kmer_size, nuc); tmp_key = binary_kmer_get_key(bkmer, kmer_size); node = hash_table_find(&db_graph->ht, tmp_key); if(node != HASH_NOT_FOUND && (colour == -1 || db_node_has_col(db_graph, node, colour))) { nodes->b[n].key = node; nodes->b[n].orient = bkmer_get_orientation(bkmer, tmp_key); rpos->b[n] = offset; n++; } } } // Return number of bases from the last kmer found until read end size_t ret = (n == init_len ? r->seq.end /* No kmers found */ : r->seq.end - (rpos->b[n-1] + kmer_size)); nodes->len = rpos->len = n; // Check for sequence gaps for(i = init_len; i+1 < nodes->len; i++) { if(rpos->b[i]+1 < rpos->b[i+1]) { aln->seq_gaps = true; break; } } return ret; }