static void fill_hash_table(void) { int i; for (i=0;i<TOTAL_SIZE/2;i++) { u32 tag = random(); u32 t = tag % HASH_SIZE; if (hash_table[t].offset == 0 || replace(hash_table[t].offset, i)) { hash_table[t].offset = i; hash_table[t].tag = tag; } } print_distribution(i); printf("\n"); for (;i<TOTAL_SIZE;i++) { u32 tag = random(); u32 t = tag % HASH_SIZE; if (hash_table[t].offset == 0 || replace(hash_table[t].offset, i)) { hash_table[t].offset = i; hash_table[t].tag = tag; } } print_distribution(i); }
int calibrate(int file, int width, Triplet *bias, FTriplet *scale) { Triplet data = {0}; MagDistribution m_dist = {{0},{0},{0}}; Triplet diameters; int avg_diameter; while (1) { print_distribution (m_dist.min, m_dist.max, data, width, ' ', ' '); read_triplet (file, XM_ADDRESS, OUT_X_L_M, &data); update_mag_bias (data, &m_dist); print_distribution (m_dist.min, m_dist.max, data, width, '|', '*'); move (6, 0); refresh (); usleep (20000); if (getch () != ERR) break; } bias->x = m_dist.bias.x; bias->y = m_dist.bias.y; bias->z = m_dist.bias.z; /* Try to scale the axes (crude transform from ellipse to sphere) */ diameters.x = abs (m_dist.max.x - m_dist.min.x); diameters.y = abs (m_dist.max.y - m_dist.min.y); diameters.z = abs (m_dist.max.z - m_dist.min.z); avg_diameter = (diameters.x + diameters.y + diameters.z) / 3; scale->x = avg_diameter / (float)diameters.x; scale->y = avg_diameter / (float)diameters.y; scale->z = avg_diameter / (float)diameters.z; return 1; }
/*METHOD: CONVERT DISTRIBUTIONS INTO READ MAPPINGS - SEND TO PRINT*/ void convert_distribution(string o_file, int s_count, const map<int, string> *id2seqid, const map<int, string> *id2kmers, const map<int, int> *id2taxid, const map<int, string> *id2tandl, const taxonomy *my_taxonomy, const map<int, taxonomy *> *taxid2node, const int kmer_len, const int read_len){ /*Initialize variables for getting read mappings instead of kmer mappings */ int n_kmers = read_len - kmer_len + 1; int seqs_read = 0; /*Iterate over taxid2kmers in parallel*/ printf("\t>>STEP 4: CONVERTING KMER MAPPINGS INTO READ CLASSIFICATIONS:\n"); printf("\t\t%imers, with a database built using %imers\n",read_len, kmer_len); cerr << "\t\t" << seqs_read << " sequences converted..."; int i; #pragma omp parallel for for(i = 1; i <= s_count; i++) { //Get values to parse here string curr_ks = id2kmers->find(i)->second; //Saving values vector<int> all_kmers; int count_kmers = 0; //Iterate through all of the kmer pairs int mid, end; string buf; std::stringstream ss(curr_ks); int pair_taxid, pair_count; string curr_pair, pair_tstr; while(ss >> buf) { //Extract the string of this pair curr_pair = buf; //Split up this pair into the taxid and the number of kmers mid = curr_pair.find(":"); end = curr_pair.find("\n"); pair_tstr = curr_pair.substr(0,mid); pair_count = atoi(curr_pair.substr(mid+1,end-mid-1).c_str()); if (pair_tstr == "A") pair_taxid = 0; else pair_taxid = atoi(pair_tstr.c_str()); //Add kmers to queue for (int j = 0; j < pair_count; j++) { all_kmers.push_back(pair_taxid); count_kmers += 1; } } //Process all mappings vector<int> curr_kmers; map<int,int> taxids_mapped; int mapped_taxid; int prev_kmer, next_kmer; int prev_taxid; for (int k = 0; k < count_kmers; k++) { next_kmer = all_kmers[k]; curr_kmers.push_back(next_kmer); if (curr_kmers.size() == n_kmers) { if (prev_kmer == next_kmer) { mapped_taxid = prev_taxid; } else { mapped_taxid = get_classification(&curr_kmers, my_taxonomy, taxid2node); } //if (mapped_taxid != 0) { //Save to map auto t_it = taxids_mapped.find(mapped_taxid); if (t_it == taxids_mapped.end()){ taxids_mapped[mapped_taxid] = 1; } else { t_it->second += 1; } prev_taxid = mapped_taxid; //Remove last element prev_kmer = curr_kmers[0]; curr_kmers.erase(curr_kmers.begin()); } } //Update User #pragma omp atomic seqs_read += 1; #pragma omp critical { cerr << "\r\t\t" << seqs_read << " sequences converted..."; print_distribution(o_file, id2seqid->find(i)->second, id2taxid->find(i)->second, id2tandl->find(i)->second, taxids_mapped); } } cerr << "\r\t\t" << seqs_read << " sequences converted...\n"; }