Ejemplo n.º 1
0
void
verify_node_orig(kg_node_t * node, unsigned kmer_length) {
    assert( false && "TODO FIX! REVERSED KMER ENDIANNESS" );
    int double_kmer_length = kmer_length << 1;
#ifdef LARGE_KMERS
    Kmer mask;
    mask.createMask(double_kmer_length);
#else
    Kmer mask = (Kmer(1) << double_kmer_length) - 1;
#endif
    Kmer kmer = node->kmer;
    Kmer rc_kmer = reverseComplement(kmer, kmer_length);
    char leftmost_base = (kmer >> (double_kmer_length - 2)) & 0x3;
    char rightmost_base = kmer & 0x3;

    for (int i = 0 ; i < 4 ; ++ i) {
        // check on the left side
        kg_node_t * node2 = node->left[i];
        int count = node->left_count[i];

        if (node2) {
            assert (count != 0);
            if (count > 0) {
                Kmer kmer2 = KMER_PREPEND(kmer, i, double_kmer_length, mask);
                assert(kmer2 == node2->kmer);
                assert(node2->right[(int)rightmost_base] == node);
                assert(node2->right_count[(int)rightmost_base] == count);
            } else {
                Kmer kmer2 = KMER_APPEND(rc_kmer, i ^ 0x3, double_kmer_length, mask);
                assert(kmer2 == node2->kmer);
                assert(node2->left[rightmost_base ^ 0x3] == node);
                assert(node2->left_count[rightmost_base ^ 0x3] == count);
            }
        } else {
            assert (count == 0);
        }


        // check on the right side
        node2 = node->right[i];
        count = node->right_count[i];

        if (node2) {
            assert (count != 0);
            if (count > 0) {
                Kmer kmer2 = KMER_APPEND(kmer, i, double_kmer_length, mask);
                assert(kmer2 == node2->kmer);
                assert(node2->left[(int)leftmost_base] == node);
                assert(node2->left_count[(int)leftmost_base] == count);
            } else {
                Kmer kmer2 = KMER_PREPEND(rc_kmer, i ^ 0x3, double_kmer_length, mask);
                assert(kmer2 == node2->kmer);
                assert(node2->right[leftmost_base ^ 0x3] == node);
                assert(node2->right_count[leftmost_base ^ 0x3] == count);
            }
        } else {
            assert (count == 0);
        }
    }
}
Ejemplo n.º 2
0
void printTranscripts(const KmerIndex& index, std::stringstream& o, const std::string s,
  const std::vector<std::pair<KmerEntry,int>>& v, const std::vector<int> u) {

  Kmer km;
  KmerEntry val;
  int p;

  // find first mapping k-mer
  if (!v.empty()) {
    p = findFirstMappingKmer(v,val);
    km = Kmer((s.c_str()+p));
  }
  

  for (int i = 0; i < u.size(); i++) {
    int tr = u[i];
    if (i > 0) {
      o << ";";
    }
    std::pair<int, bool> xp = index.findPosition(tr, km, val, p);
    o << "(" << index.target_names_[tr] << "," << xp.first << ",";
    if (xp.second) {
      o << "FW)";
    } else {
      o << "RC)";
    }
  }
}
 void DeleteKMers(const Sequence &nucls, EdgeId e) {
     VERIFY(nucls.size() >= index_.k());
     KeyWithHash kwh = index_.ConstructKWH(Kmer(index_.k(), nucls));
     DeleteIfEqual(kwh, e);
     for (size_t i = index_.k(), n = nucls.size(); i < n; ++i) {
         kwh <<= nucls[i];
         DeleteIfEqual(kwh, e);
     }
 }
Ejemplo n.º 4
0
void verify_node(KmerNode * node, KmerGraph *hashtable, unsigned kmer_length)
{
    int double_kmer_length = kmer_length << 1;
#ifdef LARGE_KMERS
    Kmer mask;
    mask.createMask(double_kmer_length);
#else
    Kmer mask = (Kmer(1) << double_kmer_length) - 1;
#endif
    Kmer kmer = node->kmer;
    Kmer rc_kmer = reverseComplement(kmer, kmer_length);
    char rightmost_base = KMER_GET_TAIL_BASE(kmer, kmer_length);
    char leftmost_base = KMER_GET_HEAD_BASE(kmer, kmer_length);
    KmerNode *node2;

    for (int i = 0 ; i < 4 ; ++ i) {
        // check on the left side
        int count = node->left_count[i];
        int color = node->left_color[i];
        assert( color == 0 || count != 0 );  // count must be non-zero if color is non-zero
        if (color == 0) {
            if (count > 0) {
                Kmer kmer2 = KMER_PREPEND(kmer, i, double_kmer_length, mask);
                node2 = hashtable->findNode(canonicalKmer(kmer2, kmer_length));
                assert( node2 != NULL );
                assert(cnorm(node2->right_count[static_cast<int>(rightmost_base)]) == cnorm(count));
                assert(node2->right_color[static_cast<int>(rightmost_base)] == 0);
            }	else if (count < 0) {
                Kmer kmer2 = KMER_APPEND(rc_kmer, i ^ 0x3, double_kmer_length);
                node2 = hashtable->findNode(canonicalKmer(kmer2, kmer_length));
                assert( node2 != NULL );
                assert(cnorm(node2->left_count[static_cast<int>( COMPLEMENT(rightmost_base) )]) == cnorm(count));
                assert(node2->left_color[static_cast<int>( COMPLEMENT(rightmost_base) )] == 0);
            }
        }

        // check on the right side
        count = node->right_count[i];
        color = node->right_color[i];
        assert( color == 0 || count != 0 );  // count must be non-zero if color is non-zero
        if (color == 0) {
            if (count > 0) {
                Kmer kmer2 = KMER_APPEND(kmer, i, double_kmer_length);
                node2 = hashtable->findNode(canonicalKmer(kmer2, kmer_length));
                assert( node2 != NULL );
                assert(cnorm(node2->left_count[static_cast<int>(leftmost_base)]) == cnorm(count));
                assert(node2->left_color[static_cast<int>(leftmost_base)] == 0);
            } else if (count < 0) {
                Kmer kmer2 = KMER_PREPEND(rc_kmer, i ^ 0x3, double_kmer_length, mask);
                node2 = hashtable->findNode(canonicalKmer(kmer2, kmer_length));
                assert( node2 != NULL );
                assert(cnorm(node2->right_count[static_cast<int>( COMPLEMENT(leftmost_base) )]) == cnorm(count));
                assert(node2->right_color[static_cast<int>( COMPLEMENT(leftmost_base) )] == 0);
            }
        }
    }
}
Ejemplo n.º 5
0
void testSimpleInsertACAutomaton() {
  PointerACAutomaton<Kmer> aho;

  Kmer count_acag = Kmer("ACAG");
  count_acag.count = 5;
  Kmer count_caga = Kmer("CAGA");
  count_caga.count = 2;
  Kmer count_caca = Kmer("CACA");
  count_caca.count = 3;
  Kmer count_gca = Kmer("GCA");
  count_gca.count = 1;

  aho.insert("ACAG", count_acag);
  aho.insert("CAGA", count_caga);
  aho.insert("CACA", count_caca);
  aho.insert("GCA", count_gca);

  aho.build_failure_functions();

  pointer_state<Kmer> *state_ac = aho.goto_state("ac");
  pointer_state<Kmer> *state_aca = aho.goto_state("aca");
  pointer_state<Kmer> *state_c = aho.goto_state("c");
  pointer_state<Kmer> *state_g = aho.goto_state("g");
  pointer_state<Kmer> *state_cag = aho.goto_state("cag");

  TAP_TEST(aho.getInitialState()->transitions[T] == aho.getInitialState()
           && aho.getInitialState()->transitions[N] == aho.getInitialState(),
           TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_ac->transitions[A] == state_aca, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_ac->transitions[C] == state_c, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_aca->transitions[A] == state_g->transitions[A], TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_c->transitions[C] == state_c, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_g->transitions[G] == state_g, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_g->transitions[C]->transitions[C] == state_c, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_cag->transitions[G] == state_g, TEST_AC_TRANSITIONS, "");
  TAP_TEST(state_cag->transitions[A]->is_final, TEST_AC_FINAL, "");

  string caga = "caga";
  string caca = "caca";
  string acag = "acag";
  TAP_TEST(aho.get(caga).count == 2, TEST_AC_GET, "");
  TAP_TEST(aho.get(caca).count == 3, TEST_AC_GET, "");
  TAP_TEST(aho.get(acag).count == 5, TEST_AC_GET, "");
}
Ejemplo n.º 6
0
void dump_hash(const kmers_map_type& kmers_map, const char* prefix, const char* suffix) {
  FILE* fout;
  std::ostringstream fname;
  fname << prefix << suffix << ".hash";
  fout = fopen(fname.str().c_str(), "wb");

  FILE* findexout;
  std::ostringstream findexname;
  findexname << prefix << suffix << ".index";
  findexout = fopen(findexname.str().c_str(), "wb");

  // compute index
  std::vector<std::tr1::tuple<Kmer, const occ_list_type* > > index;
  index.push_back(std::tr1::make_tuple(Kmer("Z"), (occ_list_type*) 0));

  for (kmers_map_type::const_iterator it = kmers_map.begin(); it != kmers_map.end(); it++) {
    index.push_back(std::tr1::make_tuple(it->first, &(it->second)));
  }

  // Remove the (Kmer("Z"),0) tuple.
  index.erase(index.begin());

  std::sort(index.begin(), index.end(), index_less());

  // Write out index.
  // Format: kmer string, number of reads with kmer
  // Writes out:
  // num kmers in block, (kmer, num reads with kmer)+
  const unsigned int nKmer = index.size();
  fwrite(&nKmer, sizeof(unsigned int), 1, findexout); // number of kmers
  for (unsigned int kid = 0; kid < nKmer; kid++) { // index of kmer
    const char* kmer = std::tr1::get<0>(index[kid]).getKmer().c_str();
    unsigned int nread = std::tr1::get<1>(index[kid])->size();
    fwrite(kmer, sizeof(char), strlen(kmer) + 1, findexout);
    fwrite(&nread, sizeof(unsigned int), 1, findexout);
  }

  // Write out reads.
  // Format: read ID, hit position
  // Writes out:
  // read id, pos within read
  for (unsigned int kid = 0; kid < nKmer; kid++) {
    const Kmer& kmer = std::tr1::get<0>(index[kid]);
    const occ_list_type& occ_list = *std::tr1::get<1>(index[kid]);
    for (occ_list_type::const_iterator it = occ_list.begin(); it != occ_list.end(); it++) {
      const unsigned int& readid = it->readID;
      const unsigned int& pos = it->pos;
      fwrite(&readid, sizeof(unsigned int), 1, fout);
      fwrite(&pos, sizeof(unsigned int), 1, fout);		    
    }
  }

  fclose(fout);
  fclose(findexout);
}
Ejemplo n.º 7
0
   static CountDB fromFile( const std::string& fname ) {
    std::ifstream in(fname, std::ios::in | std::ios::binary );
    uint32_t merSize;
    in.read( reinterpret_cast<char*>(&merSize), sizeof(merSize) );
    size_t numCounts;
    in.read( reinterpret_cast<char*>(&numCounts), sizeof(size_t) );
    auto kmers = std::make_shared<std::vector<Kmer>>(numCounts, Kmer(0));
    std::vector<AtomicCount> counts(numCounts);
    in.read( reinterpret_cast<char*>(&((*kmers)[0])), sizeof(Kmer) * numCounts );
    in.read( reinterpret_cast<char*>(&counts[0]), sizeof(AtomicCount) * numCounts );
    in.close();

    CountDB index(kmers, merSize);
    index._counts = std::move(counts);
    return index;
   }
Ejemplo n.º 8
0
bool checkMapability(const KmerIndex& index, const std::string &s, const std::vector<std::pair<KmerEntry,int>>& v, std::vector<int> &u) {
  const int maxMismatch = 2;
  const int maxSoftclip = 5;
    
  Kmer km;
  KmerEntry val;
  int p;

  if (!v.empty()) {
    p = findFirstMappingKmer(v,val);
    km = Kmer(s.c_str()+p);
  } else {
    return false;
  }
  
  std::vector<int> vtmp; vtmp.reserve(u.size());
  
  for (auto tr : u) {
    auto trpos = index.findPosition(tr, km, val, p);
    int tpos = (int)trpos.first;
    int sz = (int)s.size();
    bool add = true; 
    if (trpos.second) {
      if (tpos < 1 || tpos + sz - 1 > index.target_seqs_[tr].size()) {
        add = false;
      } else {
        //std::cout << index.target_seqs_[tr].substr(tpos,sz) << std::endl;
        //std::cout << s << std::endl;
        int mis = 0;
        for (int i = 0; i < sz - maxSoftclip; i++) {
          if (index.target_seqs_[tr][tpos-1 + i] != s[i]) {
            ++mis;
            if (mis > maxMismatch) {
              break;
            }
          }
        }
        add = (mis <= maxMismatch);
      }
    }  else {
      if (tpos > index.target_seqs_[tr].size() || tpos - sz < 1) {
        add = false;
      } else {      
        std::string rs = revcomp(s);
        //std::cout << index.target_seqs_[tr].substr(tpos - sz, sz) << std::endl;
        //std::cout << rs << std::endl;
        int mis = 0;
        for (int i = sz-1; i >= maxSoftclip; i--) {
          if (index.target_seqs_[tr][tpos-sz+i] != rs[sz]) {
            ++mis;
            if (mis > maxMismatch) {
              break;
            }
          }
        }
        add = (mis <= maxMismatch);
      }
    }
    
    if (add) {
      vtmp.push_back(tr);
    }
    
  }
 
  
  if (vtmp.empty()) {
    return false;
  }
  
  if (vtmp.size() < u.size()) {
    u = vtmp; // copy
  }
  
  return true;
  
}