Beispiel #1
0
Path_n_count_pair IRKE::inchworm (KmerCounter& kcounter, char direction, kmer_int_type_t kmer, Kmer_visitor& visitor, float min_connectivity) {
	
	// cout << "inchworm" << endl;
	
	Path_n_count_pair entire_path;
	
	unsigned int inchworm_round = 0;
	
	unsigned long num_total_kmers = kcounter.size();
	
	Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);
	
	while (true) {
		
		inchworm_round++;
		eliminator.clear();
		
		if (inchworm_round > num_total_kmers) {
			throw(string ("Error, inchworm rounds have exceeded the number of possible seed kmers"));
		}
		
		if (IRKE_COMMON::MONITOR >= 3) {
			cerr << endl << "Inchworm round(" << string(1,direction) << "): " << inchworm_round << " searching kmer: " << kmer << endl;
			string kmer_str = kcounter.get_kmer_string(kmer);
			cerr << kcounter.describe_kmer(kmer_str) << endl;
		}
		
		visitor.erase(kmer); // seed kmer must be not visited already.
		
		Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
		Path_n_count_pair best_path = inchworm_step(kcounter, direction, kmer_pair, visitor, eliminator, inchworm_round, 0, min_connectivity, MAX_RECURSION);
		
		if (best_path.second > 0) {
			// append info to entire path in reverse order, so starts just after seed kmer
			vector<kmer_int_type_t>& kmer_list = best_path.first;
			
			unsigned int num_kmers = kmer_list.size();
			int first_index = num_kmers - 1;
			int last_index = 0;
			if (CRAWL) {
				last_index = first_index - CRAWL_LENGTH + 1;
				if (last_index < 0) {
					last_index = 0;
				}
			}
			
			for (int i = first_index; i >= last_index; i--) {
				kmer_int_type_t kmer_extend = kmer_list[i];
				entire_path.first.push_back(kmer_extend);
				visitor.add(kmer_extend);
				entire_path.second += kcounter.get_kmer_count(kmer_extend);

			}
			
			kmer = entire_path.first[ entire_path.first.size() -1 ];
			
		}
		else {
			// no extension possible
			break;
		}
	}
	
	if (IRKE_COMMON::MONITOR >= 3) 
		cerr << endl;
	
	
	return(entire_path);
}
Beispiel #2
0
Path_n_count_pair IRKE::inchworm(KmerCounter &kcounter,
                                 char direction,
                                 kmer_int_type_t kmer,
                                 Kmer_visitor &visitor,
                                 float min_connectivity)
{

    // cout << "inchworm" << endl;

    Path_n_count_pair entire_path;
    entire_path.second = 0; // init cumulative path coverage

    unsigned int inchworm_round = 0;

    unsigned long num_total_kmers = kcounter.size();

    Kmer_visitor eliminator(kcounter.get_kmer_length(), DOUBLE_STRANDED_MODE);

    while (true) {


        if (IRKE_COMMON::__DEVEL_rand_fracture) {

            // terminate extension with probability of __DEVEL_rand_fracture_prob

            float prob_to_fracture = rand() / (float) RAND_MAX;
            //cerr << "prob: " << prob_to_fracture << endl;

            if (prob_to_fracture <= IRKE_COMMON::__DEVEL_rand_fracture_prob) {

                // cerr << "Fracturing at iworm round: " << inchworm_round << " given P: " << prob_to_fracture << endl;

                return (entire_path);
            }
        }

        inchworm_round++;
        eliminator.clear();

        if (inchworm_round > num_total_kmers) {
            throw (string("Error, inchworm rounds have exceeded the number of possible seed kmers"));
        }

        if (IRKE_COMMON::MONITOR >= 3) {
            cerr << endl << "Inchworm round(" << string(1, direction) << "): " << inchworm_round << " searching kmer: "
                << kmer << endl;
            string kmer_str = kcounter.get_kmer_string(kmer);
            cerr << kcounter.describe_kmer(kmer_str) << endl;
        }

        visitor.erase(kmer); // seed kmer must be not visited already.

        Kmer_Occurence_Pair kmer_pair(kmer, kcounter.get_kmer_count(kmer));
        Path_n_count_pair best_path = inchworm_step(kcounter,
                                                    direction,
                                                    kmer_pair,
                                                    visitor,
                                                    eliminator,
                                                    inchworm_round,
                                                    0,
                                                    min_connectivity,
                                                    MAX_RECURSION);

        vector<kmer_int_type_t> &kmer_list = best_path.first;
        unsigned int num_kmers = kmer_list.size();

        if ((IRKE_COMMON::__DEVEL_zero_kmer_on_use && num_kmers >= 1) || best_path.second > 0) {
            // append info to entire path in reverse order, so starts just after seed kmer

            int first_index = num_kmers - 1;
            int last_index = 0;
            if (CRAWL) {
                last_index = first_index - CRAWL_LENGTH + 1;
                if (last_index < 0) {
                    last_index = 0;
                }
            }

            for (int i = first_index; i >= last_index; i--) {
                kmer_int_type_t kmer_extend = kmer_list[i];
                entire_path.first.push_back(kmer_extend);
                visitor.add(kmer_extend);
                //entire_path.second += kcounter.get_kmer_count(kmer_extend);

                // selected here, zero out:


                if (IRKE_COMMON::__DEVEL_zero_kmer_on_use) {
                    kcounter.clear_kmer(kmer_extend);
                }

            }

            kmer = entire_path.first[entire_path.first.size() - 1];

            entire_path.second += best_path.second;

        }
        else {
            // no extension possible
            break;
        }
    }

    if (IRKE_COMMON::MONITOR >= 3)
        cerr << "No extension possible." << endl << endl;


    return (entire_path);
}