Path_n_count_pair IRKE::inchworm (KmerCounter& kcounter, char direction, kmer_int_type_t kmer, Kmer_visitor& visitor, float min_connectivity) { // cout << "inchworm" << endl; Path_n_count_pair entire_path; unsigned int inchworm_round = 0; unsigned long num_total_kmers = kcounter.size(); Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE); while (true) { inchworm_round++; eliminator.clear(); if (inchworm_round > num_total_kmers) { throw(string ("Error, inchworm rounds have exceeded the number of possible seed kmers")); } if (IRKE_COMMON::MONITOR >= 3) { cerr << endl << "Inchworm round(" << string(1,direction) << "): " << inchworm_round << " searching kmer: " << kmer << endl; string kmer_str = kcounter.get_kmer_string(kmer); cerr << kcounter.describe_kmer(kmer_str) << endl; } visitor.erase(kmer); // seed kmer must be not visited already. Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer)); Path_n_count_pair best_path = inchworm_step(kcounter, direction, kmer_pair, visitor, eliminator, inchworm_round, 0, min_connectivity, MAX_RECURSION); if (best_path.second > 0) { // append info to entire path in reverse order, so starts just after seed kmer vector<kmer_int_type_t>& kmer_list = best_path.first; unsigned int num_kmers = kmer_list.size(); int first_index = num_kmers - 1; int last_index = 0; if (CRAWL) { last_index = first_index - CRAWL_LENGTH + 1; if (last_index < 0) { last_index = 0; } } for (int i = first_index; i >= last_index; i--) { kmer_int_type_t kmer_extend = kmer_list[i]; entire_path.first.push_back(kmer_extend); visitor.add(kmer_extend); entire_path.second += kcounter.get_kmer_count(kmer_extend); } kmer = entire_path.first[ entire_path.first.size() -1 ]; } else { // no extension possible break; } } if (IRKE_COMMON::MONITOR >= 3) cerr << endl; return(entire_path); }
Path_n_count_pair IRKE::inchworm(KmerCounter &kcounter, char direction, kmer_int_type_t kmer, Kmer_visitor &visitor, float min_connectivity) { // cout << "inchworm" << endl; Path_n_count_pair entire_path; entire_path.second = 0; // init cumulative path coverage unsigned int inchworm_round = 0; unsigned long num_total_kmers = kcounter.size(); Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE); while (true) { if (IRKE_COMMON::__DEVEL_rand_fracture) { // terminate extension with probability of __DEVEL_rand_fracture_prob float prob_to_fracture = rand() / (float) RAND_MAX; //cerr << "prob: " << prob_to_fracture << endl; if (prob_to_fracture <= IRKE_COMMON::__DEVEL_rand_fracture_prob) { // cerr << "Fracturing at iworm round: " << inchworm_round << " given P: " << prob_to_fracture << endl; return (entire_path); } } inchworm_round++; eliminator.clear(); if (inchworm_round > num_total_kmers) { throw (string("Error, inchworm rounds have exceeded the number of possible seed kmers")); } if (IRKE_COMMON::MONITOR >= 3) { cerr << endl << "Inchworm round(" << string(1, direction) << "): " << inchworm_round << " searching kmer: " << kmer << endl; string kmer_str = kcounter.get_kmer_string(kmer); cerr << kcounter.describe_kmer(kmer_str) << endl; } visitor.erase(kmer); // seed kmer must be not visited already. Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer)); Path_n_count_pair best_path = inchworm_step(kcounter, direction, kmer_pair, visitor, eliminator, inchworm_round, 0, min_connectivity, MAX_RECURSION); vector<kmer_int_type_t> &kmer_list = best_path.first; unsigned int num_kmers = kmer_list.size(); if ((IRKE_COMMON::__DEVEL_zero_kmer_on_use && num_kmers >= 1) || best_path.second > 0) { // append info to entire path in reverse order, so starts just after seed kmer int first_index = num_kmers - 1; int last_index = 0; if (CRAWL) { last_index = first_index - CRAWL_LENGTH + 1; if (last_index < 0) { last_index = 0; } } for (int i = first_index; i >= last_index; i--) { kmer_int_type_t kmer_extend = kmer_list[i]; entire_path.first.push_back(kmer_extend); visitor.add(kmer_extend); //entire_path.second += kcounter.get_kmer_count(kmer_extend); // selected here, zero out: if (IRKE_COMMON::__DEVEL_zero_kmer_on_use) { kcounter.clear_kmer(kmer_extend); } } kmer = entire_path.first[entire_path.first.size() - 1]; entire_path.second += best_path.second; } else { // no extension possible break; } } if (IRKE_COMMON::MONITOR >= 3) cerr << "No extension possible." << endl << endl; return (entire_path); }