bool CheckCanRemap(const Sequence& old_s, const Sequence& new_s) const { if(!CheckAllDifferent(old_s, new_s)) return false; size_t old_length = old_s.size() - k_ + 1; size_t new_length = new_s.size() - k_ + 1; UniformPositionAligner aligner(old_s.size() - k_ + 1, new_s.size() - k_ + 1); Kmer old_kmer = old_s.start<Kmer>(k_); old_kmer >>= 0; for (size_t i = k_ - 1; i < old_s.size(); ++i) { old_kmer <<= old_s[i]; size_t old_kmer_offset = i - k_ + 1; size_t new_kmer_offest = aligner.GetPosition(old_kmer_offset); if(old_kmer_offset * 2 + 1 == old_length && new_length % 2 == 0) { Kmer middle(k_ - 1, new_s, new_length / 2); if (typename Kmer::less2()(middle, !middle)) { new_kmer_offest = new_length - 1 - new_kmer_offest; } } Kmer new_kmer(k_, new_s, new_kmer_offest); auto it = mapping_.find(new_kmer); if (it != mapping_.end()) { if (Substitute(new_kmer) != old_kmer) { return false; } } } return true; }
void RemapKmers(const Sequence& old_s, const Sequence& new_s) { VERIFY(this->IsAttached()); size_t old_length = old_s.size() - k_ + 1; size_t new_length = new_s.size() - k_ + 1; UniformPositionAligner aligner(old_s.size() - k_ + 1, new_s.size() - k_ + 1); Kmer old_kmer = old_s.start<Kmer>(k_); for (size_t i = k_ - 1; i < old_s.size(); ++i) { // Instead of shifting right if (i != k_ - 1) { old_kmer <<= old_s[i]; } size_t old_kmer_offset = i - k_ + 1; size_t new_kmer_offest = aligner.GetPosition(old_kmer_offset); if(old_kmer_offset * 2 + 1 == old_length && new_length % 2 == 0) { Kmer middle(unsigned(k_ - 1), new_s, new_length / 2); if(typename Kmer::less2()(middle, !middle)) { new_kmer_offest = new_length - 1 - new_kmer_offest; } } Kmer new_kmer(unsigned(k_), new_s, new_kmer_offest); auto it = mapping_.find(new_kmer); if (it != mapping_.end()) { if(verification_on_) VERIFY(Substitute(new_kmer) == old_kmer); mapping_.erase(it); } if(old_kmer != new_kmer) mapping_[old_kmer] = new_kmer; } }
void checkKmers(DnaString const & kmer, TVertexDescriptor const & starting_vertex, TVertexDescriptor const & source_vertex, TGraph const & graph, std::vector<VertexLabels> & vertex_vector, boost::unordered_set<TVertexDescriptor> const & free_nodes, boost::unordered_map< std::pair<TVertexDescriptor, TVertexDescriptor>, boost::dynamic_bitset<> > & edge_ids, boost::dynamic_bitset<> const & id_bits, TKmerMap & kmer_map, std::size_t const & kmer_size ) { if (id_bits.none()) return; if (length(kmer) == kmer_size) { KmerLabels new_kmer_label = { starting_vertex, source_vertex, id_bits }; if (kmer_map.count(kmer) == 0) { std::vector<KmerLabels> new_vector(1, new_kmer_label); kmer_map[kmer] = new_vector; } else { kmer_map[kmer].push_back(new_kmer_label); } return; } for (Iterator<TGraph, OutEdgeIterator>::Type out_edge_iterator (graph, source_vertex) ; !atEnd(out_edge_iterator) ; ++out_edge_iterator) { DnaString new_kmer(kmer); TVertexDescriptor const & target_vertex = targetVertex(out_edge_iterator); boost::dynamic_bitset<> new_id_bits(id_bits); if (free_nodes.count(target_vertex) == 0) { seqan::appendValue(new_kmer, vertex_vector[target_vertex].dna); std::pair<TVertexDescriptor, TVertexDescriptor> edge_pair(source_vertex, target_vertex); if (edge_ids.count(edge_pair) == 1) { new_id_bits = id_bits & edge_ids[edge_pair]; } } checkKmers(new_kmer, starting_vertex, target_vertex, graph, vertex_vector, free_nodes, edge_ids, new_id_bits, kmer_map, kmer_size); } }
void check_kmers_simple(DnaString const & kmer, TGraph const & graph, TVertexDescriptor const & source_vertex, std::vector<VertexLabels> & vertex_vector, boost::unordered_set<TVertexDescriptor> const & free_nodes, boost::unordered_map< std::pair<TVertexDescriptor, TVertexDescriptor>, boost::dynamic_bitset<> > & edge_ids, boost::dynamic_bitset<> const & id_bits, TKmerMapSimple & kmer_map, std::size_t const & kmer_size ) { if (id_bits.none()) return; if (length(kmer) == kmer_size) { if (id_bits.all()) return; if (kmer_map.count(kmer) == 0) { kmer_map[kmer] = id_bits; } else { kmer_map[kmer] |= id_bits; } return; } for (Iterator<TGraph, OutEdgeIterator>::Type out_edge_iterator (graph, source_vertex) ; !atEnd(out_edge_iterator) ; ++out_edge_iterator) { DnaString new_kmer(kmer); TVertexDescriptor const & target_vertex = targetVertex(out_edge_iterator); // std::cout << source_vertex << " -> " << target_vertex << std::endl; boost::dynamic_bitset<> new_id_bits(id_bits); if (free_nodes.count(target_vertex) == 0) { seqan::appendValue(new_kmer, vertex_vector[target_vertex].dna); std::pair<TVertexDescriptor, TVertexDescriptor> edge_pair(source_vertex, target_vertex); if (edge_ids.count(edge_pair) == 1) { new_id_bits = id_bits & edge_ids[edge_pair]; } } check_kmers_simple(new_kmer, graph, target_vertex, vertex_vector, free_nodes, edge_ids, new_id_bits, kmer_map, kmer_size); } }