Ejemplo n.º 1
0
    /**
     * Compute the histogram of distances between sample points on the shape. The histogram bins uniformly subdivide the range
     * of distances from zero to \a max_distance. If \a max_distance is negative, the shape scale specified in the constructor
     * will be used.
     *
     * @param histogram The histogram to be computed.
     * @param dist_type The type of distance metric.
     * @param max_distance The maximum separation between points to consider for the histogram. A negative value indicates the
     *   entire shape is to be considered (in which case \a max_distance is set to the shape scale specified in the
     *   constructor). The histogram range is set appropriately.
     * @param pair_reduction_ratio The fraction of the available set of point pairs -- expressed as a number between 0 and 1 --
     *   that will be randomly selected and used to actually build the histogram. Note that the final set is a subsampling of
     *   the set of all possible pairs, not the set of all possible pairs of a subsampled set of points. This may be useful for
     *   getting a more evenly sampled set of pairwise distances, with an extra-large initial set of points. A value of 1
     *   indicates all ordered pairs will be used, but this counts every distance twice, so a value of 0.5 or so may be more
     *   appropriate. A negative value picks a default of 0.5, or the ratio that gives a maximum of ~1M ordered pairs, whichever
     *   is smaller.
     */
    void compute(Histogram & histogram, DistanceType dist_type, Real max_distance = -1, Real pair_reduction_ratio = -1) const
    {
      long num_samples = ldh.numSamples();
      long num_distinct_unordered = (num_samples - 1) * num_samples;

      if (pair_reduction_ratio < 0)
        pair_reduction_ratio = (Real)std::min(0.5, 1000000.0 / num_distinct_unordered);  // don't count (x, x)

      histogram.setZero();  // don't bother setting the range

      Real local_reduction_ratio = std::sqrt(pair_reduction_ratio);
      long num_queries = Math::clamp((long)std::ceil(local_reduction_ratio * num_samples), 0, num_samples - 1);
      if (num_queries <= 0)
        return;

      TheaArray<int32> query_indices((array_size_t)num_queries);
      Random::common().sortedIntegers(0, (int32)num_samples - 1, (int32)num_queries, &query_indices[0]);

      Histogram local_histogram(histogram.numBins());
      for (array_size_t i = 0; i < query_indices.size(); ++i)
      {
        Vector3 p = ldh.getSamplePosition(query_indices[i]);
        ldh.compute(p, local_histogram, dist_type, max_distance, local_reduction_ratio);

        // Remove the zero distance from the query point to itself
        local_histogram.remove(0.0);

        if (i == 0)
          histogram.setRange(local_histogram.minValue(), local_histogram.maxValue());

        histogram.insert(local_histogram);
      }
    }
Ejemplo n.º 2
0
    virtual bool operator()(bool enabled, const Args &args) {
        ncalls++;

        std::pair<Histogram::iterator, bool> inserted = calls.insert(std::make_pair<int, size_t>(args.callno, 1));
        if (!inserted.second)
            inserted.first->second++;
        return enabled;
    }
Ejemplo n.º 3
0
    virtual bool operator()(bool enabled, const Args &args) {
        SgAsmx86Instruction *insn = isSgAsmx86Instruction(args.insn);
        assert(insn);
        ninsns++;

        std::string kind = rose::stringifyX86InstructionKind(insn->get_kind(), "x86_");
        std::pair<Histogram::iterator, bool> inserted = insns.insert(std::make_pair<std::string, size_t>(kind, 1));
        if (!inserted.second)
            inserted.first->second++;

        return enabled;
    }
Ejemplo n.º 4
0
/** Return the most likely distance between two contigs and the number
 * of pairs that support that distance estimate.
 * @param len0 the length of the first contig in bp
 * @param len1 the length of the second contig in bp
 * @param rf whether the fragment library is oriented reverse-forward
 * @param[out] n the number of samples with a non-zero probability
 */
int maximumLikelihoodEstimate(unsigned l,
		int first, int last,
		const vector<int>& samples, const PMF& pmf,
		unsigned len0, unsigned len1, bool rf,
		unsigned& n)
{
	assert(first < last);
	assert(!samples.empty());

	// The aligner is unable to map reads to the ends of the sequence.
	// Correct for this lack of sensitivity by subtracting l-1 bp from
	// the length of each sequence, where the aligner requires a match
	// of at least l bp. When the fragment library is oriented
	// forward-reverse, subtract 2*(l-1) from each sample.
	assert(l > 0);
	assert(len0 >= l);
	assert(len1 >= l);
	len0 -= l - 1;
	len1 -= l - 1;

	if (len0 > len1)
		swap(len0, len1);

	if (rf) {
		// This library is oriented reverse-forward.
		Histogram h(samples.begin(), samples.end());
		int d;
		tie(d, n) = maximumLikelihoodEstimate(
				first, last, h,
				pmf, len0, len1);
		return d;
	} else {
		// This library is oriented forward-reverse.
		// Subtract 2*(l-1) from each sample.
		Histogram h;
		typedef vector<int> Samples;
		for (Samples::const_iterator it = samples.begin();
				it != samples.end(); ++it) {
			assert(*it > 2 * (int)(l - 1));
			h.insert(*it - 2 * (l - 1));
		}
		int d;
		tie(d, n) = maximumLikelihoodEstimate(
				first, last, h,
				pmf, len0, len1);
		return max(first, d - 2 * (int)(l - 1));
	}
}
Ejemplo n.º 5
0
static void handleAlignmentPair(const ReadAlignMap::value_type& curr,
                                const ReadAlignMap::value_type& pair)
{
    const string& currID = curr.first;
    const string& pairID = pair.first;

    // Both reads must align to a unique location.
    // The reads are allowed to span more than one contig, but
    // at least one of the two reads must span no more than
    // two contigs.
    const unsigned MAX_SPAN = 2;
    if (curr.second.empty() && pair.second.empty()) {
        stats.bothUnaligned++;
    } else if (curr.second.empty() || pair.second.empty()) {
        stats.oneUnaligned++;
    } else if (!checkUniqueAlignments(curr.second)
               || !checkUniqueAlignments(pair.second)) {
        stats.numMulti++;
    } else if (curr.second.size() > MAX_SPAN
               && pair.second.size() > MAX_SPAN) {
        stats.numSplit++;
    } else {
        // Iterate over the vectors, outputting the aligments
        bool counted = false;
        for (AlignmentVector::const_iterator refAlignIter
                = curr.second.begin();
                refAlignIter != curr.second.end(); ++refAlignIter) {
            for (AlignmentVector::const_iterator pairAlignIter
                    = pair.second.begin();
                    pairAlignIter != pair.second.end();
                    ++pairAlignIter) {
                const Alignment& a0 = flipAlignment(*refAlignIter,
                                                    currID);
                const Alignment& a1 = flipAlignment(*pairAlignIter,
                                                    pairID);

                bool sameTarget = a0.contig == a1.contig;
                if (sameTarget
                        && curr.second.size() == 1
                        && pair.second.size() == 1) {
                    // Same target and the only alignment.
                    if (a0.isRC != a1.isRC) {
                        // Correctly oriented. Add this alignment to
                        // the distribution of fragment sizes.
                        int size = fragmentSize(a0, a1);
                        histogram.insert(size);
                        if (!opt::fragPath.empty()) {
                            fragFile << size << '\n';
                            assert(fragFile.good());
                        }
                    } else
                        stats.numFF++;
                    counted = true;
                }

                bool outputSameTarget = opt::fragPath.empty()
                                        && opt::histPath.empty();
                if (!sameTarget || outputSameTarget) {
                    cout << SAMRecord(a0, a1) << '\n'
                         << SAMRecord(a1, a0) << '\n';
                    assert(cout.good());
                }
            }
        }
        if (!counted)
            stats.numDifferent++;
    }
}