static void writeEstimate(ostream& out, const ContigNode& id0, const ContigNode& id1, unsigned len0, unsigned len1, const Pairs& pairs, const PMF& pmf) { if (pairs.size() < opt::npairs) return; DistanceEst est; est.distance = estimateDistance(len0, len1, pairs, pmf, est.numPairs); est.stdDev = pmf.getSampleStdDev(est.numPairs); std::pair<ContigNode, ContigNode> e(id0, id1 ^ id0.sense()); if (est.numPairs >= opt::npairs) { if (opt::format == DOT) { #pragma omp critical(out) out << get(g_contigNames, e) << " [" << est << "]\n"; } else out << ' ' << get(g_contigNames, id1) << ',' << est; } else if (opt::verbose > 1) { #pragma omp critical(cerr) cerr << "warning: " << get(g_contigNames, e) << " [d=" << est.distance << "] " << est.numPairs << " of " << pairs.size() << " pairs fit the expected distribution\n"; } }
/** performs a single alignment of the measurement to the model. * The model needs to be added using addToModel before this call. * * @param measurement - the mesh that needs to be matched * @param max_iter - maximum number of iterations * @param min_mse - minimum average square distance between points after which to stop * @param min_mse_diff - minimum difference in average square distance between points after which to stop * @param overlap - percentage of overlap, range between [0..1]. A value of * 0.95 will discard 5% of the pairs with the worst matches */ Result _align( _Adapter measurement, size_t max_iter, double min_mse, double min_mse_diff, double overlap ) { Result result; Pairs pairs; result.C_global2globalnew = Eigen::Affine3d::Identity(); result.iter = 0; result.overlap = overlap; result.d_box = std::numeric_limits<double>::infinity(); result.mse_diff = result.mse = std::numeric_limits<double>::infinity(); double old_mse = result.mse; while( result.iter < max_iter && result.mse > min_mse && result.mse_diff > min_mse_diff ) { old_mse = result.mse; pairs.clear(); findPairs.findPairs( measurement, pairs, result.d_box ); const size_t n_po = measurement.size() * result.overlap; result.d_box = pairs.trim( n_po ) * 2.0; result.pairs = pairs.size(); if( result.pairs < Pairs::MIN_PAIRS ) return result; Eigen::Affine3d C_globalprev2globalnew = pairs.getTransform(); result.C_global2globalnew = C_globalprev2globalnew * result.C_global2globalnew; measurement.setOffsetTransform( result.C_global2globalnew ); result.mse = pairs.getMeanSquareError(); result.mse_diff = old_mse - result.mse; result.iter++; // std::cout // << "points: " << measurement.size() // << "\titer: " << result.iter // << "\tpairs: " << pairs.size() // << "\tmse: " << result.mse // << "\tmse_diff: " << result.mse_diff // << "\td_box: " << result.d_box // << "\toverlap: " << result.overlap // << std::endl; } // std::cout // << std::endl; std::vector<double> pairs_distance; for( size_t i = 0; i < pairs.size(); i++ ) { pairs_distance.push_back( pairs.pairs[i].distance ); } result.pairs_distance = pairs_distance; return result; }
/** Estimate the distance between two contigs. * @param numPairs [out] the number of pairs that agree with the * expected distribution * @return the estimated distance */ static int estimateDistance(unsigned len0, unsigned len1, const Pairs& pairs, const PMF& pmf, unsigned& numPairs) { // The provisional fragment sizes are calculated as if the contigs // were perfectly adjacent with no overlap or gap. typedef vector<pair<int, int> > Fragments; Fragments fragments; fragments.reserve(pairs.size()); for (Pairs::const_iterator it = pairs.begin(); it != pairs.end(); ++it) { int a0 = it->targetAtQueryStart(); int a1 = it->mateTargetAtQueryStart(); if (it->isReverse()) a0 = len0 - a0; if (!it->isMateReverse()) a1 = len1 - a1; fragments.push_back(opt::rf ? make_pair(a1, len1 + a0) : make_pair(a0, len0 + a1)); } // Remove duplicate fragments. unsigned orig = fragments.size(); sort(fragments.begin(), fragments.end()); fragments.erase(unique(fragments.begin(), fragments.end()), fragments.end()); numPairs = fragments.size(); assert((int)orig - (int)numPairs >= 0); stats.total_frags += orig; stats.dup_frags += orig - numPairs; if (numPairs < opt::npairs) return INT_MIN; vector<int> fragmentSizes; fragmentSizes.reserve(fragments.size()); unsigned ma = opt::minAlign; for (Fragments::const_iterator it = fragments.begin(); it != fragments.end(); ++it) { int x = it->second - it->first; if (!opt::rf && opt::method == MLE && x <= 2 * int(ma - 1)) { unsigned align = x / 2; if (opt::verbose > 0) #pragma omp critical(cerr) cerr << PROGRAM ": warning: The observed fragment of " "size " << x << " bp is shorter than 2*l " "(l=" << opt::minAlign << ").\n"; ma = min(ma, align); } fragmentSizes.push_back(x); } #pragma omp critical(g_recMA) g_recMA = min(g_recMA, ma); switch (opt::method) { case MLE: // Use the maximum likelihood estimator. return maximumLikelihoodEstimate(ma, opt::minDist, opt::maxDist, fragmentSizes, pmf, len0, len1, opt::rf, numPairs); case MEAN: // Use the difference of the population mean // and the sample mean. return estimateDistanceUsingMean( fragmentSizes, pmf, numPairs); default: assert(false); abort(); } }