Beispiel #1
0
static void fill_hash_table(void)
{
	int i;
	for (i=0;i<TOTAL_SIZE/2;i++) {
		u32 tag = random();
		u32 t = tag % HASH_SIZE;

		if (hash_table[t].offset == 0 ||
		    replace(hash_table[t].offset, i)) {
			hash_table[t].offset = i;
			hash_table[t].tag = tag;
		}
	}

	print_distribution(i);
	printf("\n");

	for (;i<TOTAL_SIZE;i++) {
		u32 tag = random();
		u32 t = tag % HASH_SIZE;

		if (hash_table[t].offset == 0 ||
		    replace(hash_table[t].offset, i)) {
			hash_table[t].offset = i;
			hash_table[t].tag = tag;
		}
	}

	print_distribution(i);
}
int calibrate(int file, int width, Triplet *bias, FTriplet *scale)
{
  Triplet data = {0};
  MagDistribution m_dist = {{0},{0},{0}};
  Triplet diameters;
  int avg_diameter;

  while (1) {
    print_distribution (m_dist.min, m_dist.max, data, width, ' ', ' ');

    read_triplet (file, XM_ADDRESS, OUT_X_L_M, &data);
    update_mag_bias (data, &m_dist);

    print_distribution (m_dist.min, m_dist.max, data, width, '|', '*');

    move (6, 0);
    refresh ();
    usleep (20000);
    if (getch () != ERR)
      break;
  }

  bias->x = m_dist.bias.x;
  bias->y = m_dist.bias.y;
  bias->z = m_dist.bias.z;

  /* Try to scale the axes (crude transform from ellipse to sphere) */
  diameters.x = abs (m_dist.max.x - m_dist.min.x);
  diameters.y = abs (m_dist.max.y - m_dist.min.y);
  diameters.z = abs (m_dist.max.z - m_dist.min.z);
  avg_diameter = (diameters.x + diameters.y + diameters.z) / 3;
  scale->x = avg_diameter / (float)diameters.x;
  scale->y = avg_diameter / (float)diameters.y;
  scale->z = avg_diameter / (float)diameters.z;

  return 1;
}
/*METHOD: CONVERT DISTRIBUTIONS INTO READ MAPPINGS - SEND TO PRINT*/
void convert_distribution(string o_file, int s_count, const map<int, string> *id2seqid, const map<int, string> *id2kmers, const map<int, int> *id2taxid, const map<int, string> *id2tandl, const taxonomy *my_taxonomy, const map<int, taxonomy *> *taxid2node, const int kmer_len, const int read_len){
    /*Initialize variables for getting read mappings instead of kmer mappings */
    int n_kmers = read_len - kmer_len + 1;
    int seqs_read = 0;
    /*Iterate over taxid2kmers in parallel*/
    printf("\t>>STEP 4: CONVERTING KMER MAPPINGS INTO READ CLASSIFICATIONS:\n");
    printf("\t\t%imers, with a database built using %imers\n",read_len, kmer_len);
    cerr << "\t\t" << seqs_read << " sequences converted...";
    int i;
    #pragma omp parallel for
    for(i = 1; i <= s_count; i++) {
        //Get values to parse here
        string curr_ks = id2kmers->find(i)->second;
        //Saving values 
        vector<int> all_kmers;
        int count_kmers = 0;
        //Iterate through all of the kmer pairs 
        int mid, end; 
        string buf; 
        std::stringstream ss(curr_ks);
        int pair_taxid, pair_count;
        string curr_pair, pair_tstr;
        while(ss >> buf) {
            //Extract the string of this pair
            curr_pair = buf;
            //Split up this pair into the taxid and the number of kmers
            mid = curr_pair.find(":");
            end = curr_pair.find("\n"); 
            pair_tstr = curr_pair.substr(0,mid);
            pair_count = atoi(curr_pair.substr(mid+1,end-mid-1).c_str());
            if (pair_tstr == "A")
                pair_taxid = 0;
            else
                pair_taxid = atoi(pair_tstr.c_str());
            //Add kmers to queue
            for (int j = 0; j < pair_count; j++) {
                all_kmers.push_back(pair_taxid);
                count_kmers += 1;
            }
        }
        //Process all mappings
        vector<int> curr_kmers; 
        map<int,int> taxids_mapped; 
        int mapped_taxid;
        int prev_kmer, next_kmer; 
        int prev_taxid; 
        for (int k = 0; k < count_kmers; k++) {
            next_kmer = all_kmers[k];
            curr_kmers.push_back(next_kmer);
            if (curr_kmers.size() == n_kmers) {
                if (prev_kmer == next_kmer) {
                    mapped_taxid = prev_taxid;
                } else {
                    mapped_taxid = get_classification(&curr_kmers, my_taxonomy, taxid2node);
                } 
                //if (mapped_taxid != 0) {
                //Save to map
                auto t_it = taxids_mapped.find(mapped_taxid);
                if (t_it == taxids_mapped.end()){
                    taxids_mapped[mapped_taxid] = 1;
                } else {
                    t_it->second += 1;
                }
                prev_taxid = mapped_taxid;
                //Remove last element
                prev_kmer = curr_kmers[0];
                curr_kmers.erase(curr_kmers.begin());
            } 
        }
        //Update User
        #pragma omp atomic
        seqs_read += 1;
        #pragma omp critical
        {
            cerr << "\r\t\t" << seqs_read << " sequences converted...";
            print_distribution(o_file, id2seqid->find(i)->second, id2taxid->find(i)->second, id2tandl->find(i)->second, taxids_mapped);
        }
    }
    cerr << "\r\t\t" << seqs_read << " sequences converted...\n";
}