/** * Compute the histogram of distances between sample points on the shape. The histogram bins uniformly subdivide the range * of distances from zero to \a max_distance. If \a max_distance is negative, the shape scale specified in the constructor * will be used. * * @param histogram The histogram to be computed. * @param dist_type The type of distance metric. * @param max_distance The maximum separation between points to consider for the histogram. A negative value indicates the * entire shape is to be considered (in which case \a max_distance is set to the shape scale specified in the * constructor). The histogram range is set appropriately. * @param pair_reduction_ratio The fraction of the available set of point pairs -- expressed as a number between 0 and 1 -- * that will be randomly selected and used to actually build the histogram. Note that the final set is a subsampling of * the set of all possible pairs, not the set of all possible pairs of a subsampled set of points. This may be useful for * getting a more evenly sampled set of pairwise distances, with an extra-large initial set of points. A value of 1 * indicates all ordered pairs will be used, but this counts every distance twice, so a value of 0.5 or so may be more * appropriate. A negative value picks a default of 0.5, or the ratio that gives a maximum of ~1M ordered pairs, whichever * is smaller. */ void compute(Histogram & histogram, DistanceType dist_type, Real max_distance = -1, Real pair_reduction_ratio = -1) const { long num_samples = ldh.numSamples(); long num_distinct_unordered = (num_samples - 1) * num_samples; if (pair_reduction_ratio < 0) pair_reduction_ratio = (Real)std::min(0.5, 1000000.0 / num_distinct_unordered); // don't count (x, x) histogram.setZero(); // don't bother setting the range Real local_reduction_ratio = std::sqrt(pair_reduction_ratio); long num_queries = Math::clamp((long)std::ceil(local_reduction_ratio * num_samples), 0, num_samples - 1); if (num_queries <= 0) return; TheaArray<int32> query_indices((array_size_t)num_queries); Random::common().sortedIntegers(0, (int32)num_samples - 1, (int32)num_queries, &query_indices[0]); Histogram local_histogram(histogram.numBins()); for (array_size_t i = 0; i < query_indices.size(); ++i) { Vector3 p = ldh.getSamplePosition(query_indices[i]); ldh.compute(p, local_histogram, dist_type, max_distance, local_reduction_ratio); // Remove the zero distance from the query point to itself local_histogram.remove(0.0); if (i == 0) histogram.setRange(local_histogram.minValue(), local_histogram.maxValue()); histogram.insert(local_histogram); } }
virtual bool operator()(bool enabled, const Args &args) { ncalls++; std::pair<Histogram::iterator, bool> inserted = calls.insert(std::make_pair<int, size_t>(args.callno, 1)); if (!inserted.second) inserted.first->second++; return enabled; }
virtual bool operator()(bool enabled, const Args &args) { SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.insn); assert(insn); ninsns++; std::string kind = rose::stringifyX86InstructionKind(insn->get_kind(), "x86_"); std::pair<Histogram::iterator, bool> inserted = insns.insert(std::make_pair<std::string, size_t>(kind, 1)); if (!inserted.second) inserted.first->second++; return enabled; }
/** Return the most likely distance between two contigs and the number * of pairs that support that distance estimate. * @param len0 the length of the first contig in bp * @param len1 the length of the second contig in bp * @param rf whether the fragment library is oriented reverse-forward * @param[out] n the number of samples with a non-zero probability */ int maximumLikelihoodEstimate(unsigned l, int first, int last, const vector<int>& samples, const PMF& pmf, unsigned len0, unsigned len1, bool rf, unsigned& n) { assert(first < last); assert(!samples.empty()); // The aligner is unable to map reads to the ends of the sequence. // Correct for this lack of sensitivity by subtracting l-1 bp from // the length of each sequence, where the aligner requires a match // of at least l bp. When the fragment library is oriented // forward-reverse, subtract 2*(l-1) from each sample. assert(l > 0); assert(len0 >= l); assert(len1 >= l); len0 -= l - 1; len1 -= l - 1; if (len0 > len1) swap(len0, len1); if (rf) { // This library is oriented reverse-forward. Histogram h(samples.begin(), samples.end()); int d; tie(d, n) = maximumLikelihoodEstimate( first, last, h, pmf, len0, len1); return d; } else { // This library is oriented forward-reverse. // Subtract 2*(l-1) from each sample. Histogram h; typedef vector<int> Samples; for (Samples::const_iterator it = samples.begin(); it != samples.end(); ++it) { assert(*it > 2 * (int)(l - 1)); h.insert(*it - 2 * (l - 1)); } int d; tie(d, n) = maximumLikelihoodEstimate( first, last, h, pmf, len0, len1); return max(first, d - 2 * (int)(l - 1)); } }
static void handleAlignmentPair(const ReadAlignMap::value_type& curr, const ReadAlignMap::value_type& pair) { const string& currID = curr.first; const string& pairID = pair.first; // Both reads must align to a unique location. // The reads are allowed to span more than one contig, but // at least one of the two reads must span no more than // two contigs. const unsigned MAX_SPAN = 2; if (curr.second.empty() && pair.second.empty()) { stats.bothUnaligned++; } else if (curr.second.empty() || pair.second.empty()) { stats.oneUnaligned++; } else if (!checkUniqueAlignments(curr.second) || !checkUniqueAlignments(pair.second)) { stats.numMulti++; } else if (curr.second.size() > MAX_SPAN && pair.second.size() > MAX_SPAN) { stats.numSplit++; } else { // Iterate over the vectors, outputting the aligments bool counted = false; for (AlignmentVector::const_iterator refAlignIter = curr.second.begin(); refAlignIter != curr.second.end(); ++refAlignIter) { for (AlignmentVector::const_iterator pairAlignIter = pair.second.begin(); pairAlignIter != pair.second.end(); ++pairAlignIter) { const Alignment& a0 = flipAlignment(*refAlignIter, currID); const Alignment& a1 = flipAlignment(*pairAlignIter, pairID); bool sameTarget = a0.contig == a1.contig; if (sameTarget && curr.second.size() == 1 && pair.second.size() == 1) { // Same target and the only alignment. if (a0.isRC != a1.isRC) { // Correctly oriented. Add this alignment to // the distribution of fragment sizes. int size = fragmentSize(a0, a1); histogram.insert(size); if (!opt::fragPath.empty()) { fragFile << size << '\n'; assert(fragFile.good()); } } else stats.numFF++; counted = true; } bool outputSameTarget = opt::fragPath.empty() && opt::histPath.empty(); if (!sameTarget || outputSameTarget) { cout << SAMRecord(a0, a1) << '\n' << SAMRecord(a1, a0) << '\n'; assert(cout.good()); } } } if (!counted) stats.numDifferent++; } }