bool CheckCanRemap(const Sequence& old_s, const Sequence& new_s) const {
   if(!CheckAllDifferent(old_s, new_s))
     return false;
   size_t old_length = old_s.size() - k_ + 1;
   size_t new_length = new_s.size() - k_ + 1;
   UniformPositionAligner aligner(old_s.size() - k_ + 1,
                                  new_s.size() - k_ + 1);
   Kmer old_kmer = old_s.start<Kmer>(k_);
   old_kmer >>= 0;
   for (size_t i = k_ - 1; i < old_s.size(); ++i) {
     old_kmer <<= old_s[i];
     size_t old_kmer_offset = i - k_ + 1;
     size_t new_kmer_offest = aligner.GetPosition(old_kmer_offset);
     if(old_kmer_offset * 2 + 1 == old_length && new_length % 2 == 0) {
       Kmer middle(k_ - 1, new_s, new_length / 2);
       if (typename Kmer::less2()(middle, !middle)) {
         new_kmer_offest = new_length - 1 - new_kmer_offest;
       }
     }
     Kmer new_kmer(k_, new_s, new_kmer_offest);
     auto it = mapping_.find(new_kmer);
     if (it != mapping_.end()) {
       if (Substitute(new_kmer) != old_kmer) {
         return false;
       }
     }
   }
   return true;
 }
  void RemapKmers(const Sequence& old_s, const Sequence& new_s) {
    VERIFY(this->IsAttached());
    size_t old_length = old_s.size() - k_ + 1;
    size_t new_length = new_s.size() - k_ + 1;
    UniformPositionAligner aligner(old_s.size() - k_ + 1,
                                   new_s.size() - k_ + 1);
    Kmer old_kmer = old_s.start<Kmer>(k_);

    for (size_t i = k_ - 1; i < old_s.size(); ++i) {
      // Instead of shifting right
      if (i != k_ - 1) {
        old_kmer <<= old_s[i];
      }

      size_t old_kmer_offset = i - k_ + 1;
      size_t new_kmer_offest = aligner.GetPosition(old_kmer_offset);
      if(old_kmer_offset * 2 + 1 == old_length && new_length % 2 == 0) {
        Kmer middle(unsigned(k_ - 1), new_s, new_length / 2);
        if(typename Kmer::less2()(middle, !middle)) {
          new_kmer_offest = new_length - 1 - new_kmer_offest;
        }
      }
      Kmer new_kmer(unsigned(k_), new_s, new_kmer_offest);
      auto it = mapping_.find(new_kmer);
      if (it != mapping_.end()) {
    	if(verification_on_)
    		VERIFY(Substitute(new_kmer) == old_kmer);
        mapping_.erase(it);
      }
      if(old_kmer != new_kmer)
            mapping_[old_kmer] = new_kmer;
    }
  }
Пример #3
0
void
checkKmers(DnaString const & kmer,
           TVertexDescriptor const & starting_vertex,
           TVertexDescriptor const & source_vertex,
           TGraph const & graph,
           std::vector<VertexLabels> & vertex_vector,
           boost::unordered_set<TVertexDescriptor> const & free_nodes,
           boost::unordered_map< std::pair<TVertexDescriptor, TVertexDescriptor>, boost::dynamic_bitset<> > & edge_ids,
           boost::dynamic_bitset<> const & id_bits,
           TKmerMap & kmer_map,
           std::size_t const & kmer_size
          )
{
  if (id_bits.none())
    return;

  if (length(kmer) == kmer_size)
  {
    KmerLabels new_kmer_label =
    {
      starting_vertex,
      source_vertex,
      id_bits
    };

    if (kmer_map.count(kmer) == 0)
    {
      std::vector<KmerLabels> new_vector(1, new_kmer_label);
      kmer_map[kmer] = new_vector;
    }
    else
    {
      kmer_map[kmer].push_back(new_kmer_label);
    }

    return;
  }

  for (Iterator<TGraph, OutEdgeIterator>::Type out_edge_iterator (graph, source_vertex) ; !atEnd(out_edge_iterator) ; ++out_edge_iterator)
  {
    DnaString new_kmer(kmer);
    TVertexDescriptor const & target_vertex = targetVertex(out_edge_iterator);
    boost::dynamic_bitset<> new_id_bits(id_bits);

    if (free_nodes.count(target_vertex) == 0)
    {
      seqan::appendValue(new_kmer, vertex_vector[target_vertex].dna);
      std::pair<TVertexDescriptor, TVertexDescriptor> edge_pair(source_vertex, target_vertex);
      
      if (edge_ids.count(edge_pair) == 1)
      {
        new_id_bits = id_bits & edge_ids[edge_pair];
      }
    }

    checkKmers(new_kmer, starting_vertex, target_vertex, graph, vertex_vector, free_nodes, edge_ids, new_id_bits, kmer_map, kmer_size);
  }
}
Пример #4
0
void
check_kmers_simple(DnaString const & kmer,
                   TGraph const & graph,
                   TVertexDescriptor const & source_vertex,
                   std::vector<VertexLabels> & vertex_vector,
                   boost::unordered_set<TVertexDescriptor> const & free_nodes,
                   boost::unordered_map< std::pair<TVertexDescriptor, TVertexDescriptor>, boost::dynamic_bitset<> > & edge_ids,
                   boost::dynamic_bitset<> const & id_bits,
                   TKmerMapSimple & kmer_map,
                   std::size_t const & kmer_size
                  )
{
  if (id_bits.none())
    return;

  if (length(kmer) == kmer_size)
  {
    if (id_bits.all())
      return;

    if (kmer_map.count(kmer) == 0)
    {
      kmer_map[kmer] = id_bits;
    }
    else
    {
      kmer_map[kmer] |= id_bits;
    }

    return;
  }

  for (Iterator<TGraph, OutEdgeIterator>::Type out_edge_iterator (graph, source_vertex) ; !atEnd(out_edge_iterator) ; ++out_edge_iterator)
  {
    DnaString new_kmer(kmer);
    TVertexDescriptor const & target_vertex = targetVertex(out_edge_iterator);

    // std::cout << source_vertex << " -> " << target_vertex << std::endl;

    boost::dynamic_bitset<> new_id_bits(id_bits);

    if (free_nodes.count(target_vertex) == 0)
    {
      seqan::appendValue(new_kmer, vertex_vector[target_vertex].dna);
      std::pair<TVertexDescriptor, TVertexDescriptor> edge_pair(source_vertex, target_vertex);
      
      if (edge_ids.count(edge_pair) == 1)
      {
        new_id_bits = id_bits & edge_ids[edge_pair];
      }
    }

    check_kmers_simple(new_kmer, graph, target_vertex, vertex_vector, free_nodes, edge_ids, new_id_bits, kmer_map, kmer_size);
  }
}