double accuracy(const dvec_t& dec_values, const ivec_t& ty){ int correct = 0; int total = (int) ty.size(); size_t i; for(i = 0; i < ty.size(); ++i) if(ty[i] == (dec_values[i] >= 0? 1: -1)) ++correct; printf("Accuracy = %g%% (%d/%d)\n", (double)correct/total*100,correct,total); return (double) correct / total; }
/** Function update_uf() * Given fragment IDs of the same cluster, update the union find * structure [uf_clust] to reflect this information */ void update_uf (ivec_t& uf_clst, const ivec_t& clusters) { int sz = clusters.size (); for (int i = 0; i < sz - 1; ++ i) { for (int j = i + 1; j < sz; ++ j) { int fragID_i = clusters[i], fragID_j = clusters[j]; int root_i = uf_find (fragID_i, uf_clst), root_j = uf_find (fragID_j, uf_clst); uf_clst[root_j] = root_i; } } } // update_uf
/** Function debug_print_fragments () * Given fragment IDs then print concatenated fragments from input fastq files */ void debug_print_fragments (const ivec_t& fragIDs, const std::string& fq, const std::string& fq2) { std::cout << "\nnum fragments: " << fragIDs.size() << "\n"; iset_t ids (fragIDs.begin(), fragIDs.end()); std::ifstream ifhfq, ifhfq2; xny::openfile<std::ifstream>(ifhfq, fq); xny::openfile<std::ifstream>(ifhfq2, fq2); bio::fastq_input_iterator<> iter_fq (ifhfq), end, iter_fq2(ifhfq2); int fragID = 0; for (; iter_fq != end, iter_fq2 != end; ++ iter_fq, ++ iter_fq2) { if (ids.count(fragID)) { std::string frag = std::get<1>(*iter_fq) + std::get<1> (*iter_fq2); std::cout << frag << "\n"; } ++ fragID; } xny::closefile(ifhfq); xny::closefile(ifhfq2); } //debug_print_fragments
/** Function make_cluster() * * Given a list of fragments denoted by seeds, make pairwise comparison * and clustering conforming max_mismatch criteria * * Output: clusters in 2d vector format, where each row of the vector * stores the clustered fragment IDs. */ void make_cluster (iivec_t& clusters, const ii64vec_t& list_seeds, const ivec_t& init_cluster, int max_mismatch, const ivec_t& uf_clst) { if (list_seeds.size() == 0) { abording ("DuplRm.cpp -- make_cluster(): SC failed"); } //--------- union find: (1) initialize the cluster --------- int sz = init_cluster.size(); bvec_t visited (sz, false); ivec_t clst (sz); for (int i = 0; i < sz; ++ i) clst[i] = i; //--------- pairwise comparison --------- for (int i = 0; i < sz - 1; ++ i) { if (visited[i]) continue; // to speed up int idx_i = init_cluster[i]; for (int j = i + 1; j < sz; ++ j) { if (visited[j]) continue; // to speed up, avoid of comparison // if this is already clustered int idx_j = init_cluster[j]; // check global uf structure according to fragID int root_uf_i = uf_clsfind ((int) list_seeds[idx_i].back(), uf_clst), root_uf_j = uf_clsfind ((int) list_seeds[idx_j].back(), uf_clst); if (root_uf_i != root_uf_j) { int root_i = uf_find (i, clst), root_j = uf_find (j, clst); if (root_i != root_j) { if (is_similar (list_seeds[idx_i], list_seeds[idx_j], max_mismatch)) { clst[root_j] = root_i; visited[j] = true; } } } // if } // for (int j = i + 1 } // for (int i = 0 //----- generate final cluster { clusterID --> fragment IDs } ------ std::map<int, ivec_t> clstID_fragIDs; std::map<int, ivec_t>::iterator it; for (int i = 0; i < sz; ++ i) { int idx_i = init_cluster[i]; int fragID = list_seeds[idx_i].back(); int root_i = uf_clsfind (i, clst); it = clstID_fragIDs.find (root_i); if (it != clstID_fragIDs.end()) it->second.push_back(fragID); else clstID_fragIDs[root_i] = ivec_t (1, fragID); } // for (int i = 0 // go through the map and produce clusters in sorted vector format for (it = clstID_fragIDs.begin(); it != clstID_fragIDs.end(); ++ it) { if (it->second.size() > 1) { std::sort (it->second.begin(), it->second.end()); clusters.push_back(it->second); } } // for (it } // make_cluster