Beispiel #1
0
static void writeEstimate(ostream& out,
		const ContigNode& id0, const ContigNode& id1,
		unsigned len0, unsigned len1,
		const Pairs& pairs, const PMF& pmf)
{
	if (pairs.size() < opt::npairs)
		return;

	DistanceEst est;
	est.distance = estimateDistance(len0, len1,
			pairs, pmf, est.numPairs);
	est.stdDev = pmf.getSampleStdDev(est.numPairs);

	std::pair<ContigNode, ContigNode> e(id0, id1 ^ id0.sense());
	if (est.numPairs >= opt::npairs) {
		if (opt::format == DOT) {
#pragma omp critical(out)
			out << get(g_contigNames, e) << " [" << est << "]\n";
		} else
			out << ' ' << get(g_contigNames, id1) << ',' << est;
	} else if (opt::verbose > 1) {
#pragma omp critical(cerr)
		cerr << "warning: " << get(g_contigNames, e)
			<< " [d=" << est.distance << "] "
			<< est.numPairs << " of " << pairs.size()
			<< " pairs fit the expected distribution\n";
	}
}
Beispiel #2
0
    /** performs a single alignment of the measurement to the model.
     * The model needs to be added using addToModel before this call.
     * 
     * @param measurement - the mesh that needs to be matched
     * @param max_iter - maximum number of iterations
     * @param min_mse - minimum average square distance between points after which to stop
     * @param min_mse_diff - minimum difference in average square distance between points after which to stop
     * @param overlap - percentage of overlap, range between [0..1]. A value of
     *                  0.95 will discard 5% of the pairs with the worst matches
     */
    Result _align( _Adapter measurement, size_t max_iter, double min_mse, double min_mse_diff, double overlap )
    {

	Result result;
	Pairs pairs;
	
	result.C_global2globalnew = Eigen::Affine3d::Identity();

	result.iter = 0;
	result.overlap = overlap;
	result.d_box = std::numeric_limits<double>::infinity();
	result.mse_diff = result.mse = std::numeric_limits<double>::infinity();
	double old_mse = result.mse;
	while( result.iter < max_iter && result.mse > min_mse && result.mse_diff > min_mse_diff )
	{
	    old_mse = result.mse;

	    pairs.clear();
	    findPairs.findPairs( measurement, pairs, result.d_box );
	    const size_t n_po = measurement.size() * result.overlap;
	    result.d_box = pairs.trim( n_po ) * 2.0;
	    result.pairs = pairs.size();
	    if( result.pairs < Pairs::MIN_PAIRS )
		return result;

	    Eigen::Affine3d C_globalprev2globalnew = pairs.getTransform();
	    result.C_global2globalnew = C_globalprev2globalnew * result.C_global2globalnew;
	    measurement.setOffsetTransform( result.C_global2globalnew );

	    result.mse = pairs.getMeanSquareError();
	    result.mse_diff = old_mse - result.mse;

	    result.iter++;

//	 	std::cout
// 	    << "points: " << measurement.size()
// 	    << "\titer: " << result.iter
// 	    << "\tpairs: " << pairs.size()
// 	    << "\tmse: " << result.mse
// 	    << "\tmse_diff: " << result.mse_diff
// 	    << "\td_box: " << result.d_box
// 	    << "\toverlap: " << result.overlap
// 	    << std::endl;
	}
// 	std::cout
// 	    << std::endl;
 	
 	std::vector<double> pairs_distance; 
	for( size_t i = 0; i < pairs.size(); i++ ) {
	    pairs_distance.push_back( pairs.pairs[i].distance ); 
	}
	result.pairs_distance = pairs_distance; 
	
	return result;
    }
Beispiel #3
0
/** Estimate the distance between two contigs.
 * @param numPairs [out] the number of pairs that agree with the
 * expected distribution
 * @return the estimated distance
 */
static int estimateDistance(unsigned len0, unsigned len1,
		const Pairs& pairs, const PMF& pmf,
		unsigned& numPairs)
{
	// The provisional fragment sizes are calculated as if the contigs
	// were perfectly adjacent with no overlap or gap.
	typedef vector<pair<int, int> > Fragments;
	Fragments fragments;
	fragments.reserve(pairs.size());
	for (Pairs::const_iterator it = pairs.begin();
			it != pairs.end(); ++it) {
		int a0 = it->targetAtQueryStart();
		int a1 = it->mateTargetAtQueryStart();
		if (it->isReverse())
			a0 = len0 - a0;
		if (!it->isMateReverse())
			a1 = len1 - a1;
		fragments.push_back(opt::rf
				? make_pair(a1, len1 + a0)
				: make_pair(a0, len0 + a1));
	}

	// Remove duplicate fragments.
	unsigned orig = fragments.size();
	sort(fragments.begin(), fragments.end());
	fragments.erase(unique(fragments.begin(), fragments.end()),
			fragments.end());
	numPairs = fragments.size();
	assert((int)orig - (int)numPairs >= 0);
	stats.total_frags += orig;
	stats.dup_frags += orig - numPairs;

	if (numPairs < opt::npairs)
		return INT_MIN;

	vector<int> fragmentSizes;
	fragmentSizes.reserve(fragments.size());
	unsigned ma = opt::minAlign;
	for (Fragments::const_iterator it = fragments.begin();
			it != fragments.end(); ++it) {
		int x = it->second - it->first;
		if (!opt::rf && opt::method == MLE 
				&& x <= 2 * int(ma - 1)) {
			unsigned align = x / 2;
			if (opt::verbose > 0)
#pragma omp critical(cerr)
				cerr << PROGRAM ": warning: The observed fragment of "
					"size " << x << " bp is shorter than 2*l "
					"(l=" << opt::minAlign << ").\n";
			ma = min(ma, align);
		}
		fragmentSizes.push_back(x);
	}

#pragma omp critical(g_recMA)
	g_recMA = min(g_recMA, ma);
	switch (opt::method) {
	  case MLE:
		// Use the maximum likelihood estimator.
		return maximumLikelihoodEstimate(ma,
				opt::minDist, opt::maxDist,
				fragmentSizes, pmf, len0, len1, opt::rf, numPairs);
	  case MEAN:
		// Use the difference of the population mean
		// and the sample mean.
		return estimateDistanceUsingMean(
				fragmentSizes, pmf, numPairs);
	  default:
		assert(false);
		abort();
	}
}