Ejemplo n.º 1
0
double accuracy(const dvec_t& dec_values, const ivec_t& ty){
	int    correct = 0;
	int    total   = (int) ty.size();
	size_t i;

	for(i = 0; i < ty.size(); ++i)
		if(ty[i] == (dec_values[i] >= 0? 1: -1)) ++correct;

	printf("Accuracy = %g%% (%d/%d)\n",
		(double)correct/total*100,correct,total);

	return (double) correct / total;
}
Ejemplo n.º 2
0
/**	Function update_uf()
 * 	Given fragment IDs of the same cluster, update the union find
 * 	structure [uf_clust] to reflect this information
 */
void update_uf (ivec_t& uf_clst, const ivec_t& clusters) {
	int sz = clusters.size ();
	for (int i = 0; i < sz - 1; ++ i) {
		for (int j = i + 1; j < sz; ++ j) {
			int fragID_i = clusters[i],
				fragID_j = clusters[j];
			int root_i = uf_find (fragID_i, uf_clst),
				root_j = uf_find (fragID_j, uf_clst);
			uf_clst[root_j] = root_i;
		}
	}
} // update_uf
Ejemplo n.º 3
0
/** Function debug_print_fragments ()
 * Given fragment IDs then print concatenated fragments from input fastq files
 */
void debug_print_fragments (const ivec_t& fragIDs, const std::string& fq,
		const std::string& fq2) {

	std::cout << "\nnum fragments: " << fragIDs.size() << "\n";
	iset_t ids (fragIDs.begin(), fragIDs.end());
	std::ifstream ifhfq, ifhfq2;
	xny::openfile<std::ifstream>(ifhfq, fq);
	xny::openfile<std::ifstream>(ifhfq2, fq2);
	bio::fastq_input_iterator<> iter_fq (ifhfq), end, iter_fq2(ifhfq2);
	int fragID = 0;
	for (; iter_fq != end, iter_fq2 != end; ++ iter_fq, ++ iter_fq2) {
		if (ids.count(fragID)) {
			std::string frag = std::get<1>(*iter_fq) + std::get<1> (*iter_fq2);
			std::cout << frag << "\n";
		}
		++ fragID;
	}
	xny::closefile(ifhfq);
	xny::closefile(ifhfq2);
} //debug_print_fragments
Ejemplo n.º 4
0
/** Function make_cluster()
 *
 *  Given a list of fragments denoted by seeds, make pairwise comparison
 *  and clustering conforming max_mismatch criteria
 *
 *  Output: clusters in 2d vector format, where each row of the vector
 *  stores the clustered fragment IDs.
 */
void make_cluster (iivec_t& clusters, const ii64vec_t& list_seeds,
		const ivec_t& init_cluster, int max_mismatch, const ivec_t& uf_clst) {

	if (list_seeds.size() == 0) {
		abording ("DuplRm.cpp -- make_cluster(): SC failed");
	}

	//--------- union find: (1) initialize the cluster ---------
	int sz = init_cluster.size();
	bvec_t visited (sz, false);
	ivec_t clst (sz);
	for (int i = 0; i < sz; ++ i) clst[i] = i;

	//---------  pairwise comparison ---------
	for (int i = 0; i < sz - 1; ++ i) {
		if (visited[i]) continue; // to speed up

		int idx_i = init_cluster[i];
		for (int j = i + 1; j < sz; ++ j) {

			if (visited[j]) continue; // to speed up, avoid of comparison
									  // if this is already clustered
			int idx_j = init_cluster[j];

			// check global uf structure according to fragID
			int root_uf_i = uf_clsfind ((int) list_seeds[idx_i].back(), uf_clst),
				root_uf_j = uf_clsfind ((int) list_seeds[idx_j].back(), uf_clst);

			if (root_uf_i != root_uf_j) {
				int root_i = uf_find (i, clst),
					root_j = uf_find (j, clst);

				if (root_i != root_j) {
					if (is_similar (list_seeds[idx_i], list_seeds[idx_j],
							max_mismatch)) {
						clst[root_j] = root_i;
						visited[j] = true;
					}
				}
			} // if
		} // for (int j = i + 1
	} // for (int i = 0

	//----- generate final cluster { clusterID --> fragment IDs } ------
	std::map<int, ivec_t> clstID_fragIDs;
	std::map<int, ivec_t>::iterator it;
	for (int i = 0; i < sz; ++ i) {
		int idx_i = init_cluster[i];
		int fragID = list_seeds[idx_i].back();
		int root_i = uf_clsfind (i, clst);
		it = clstID_fragIDs.find (root_i);
		if (it != clstID_fragIDs.end()) it->second.push_back(fragID);
		else clstID_fragIDs[root_i] = ivec_t (1, fragID);
	} // for (int i = 0

	// go through the map and produce clusters in sorted vector format
	for (it = clstID_fragIDs.begin(); it != clstID_fragIDs.end(); ++ it) {
		if (it->second.size() > 1) {
			std::sort (it->second.begin(), it->second.end());
			clusters.push_back(it->second);
		}
	} // for (it

} // make_cluster