void
SVScorePairRefProcessor::
processClearedRecord(
    const bam_record& bamRead)
{
    using namespace illumina::common;

    assert(bamParams.isSet);

    const pos_t refPos(bamRead.pos()-1);
    if (! bamParams.interval.range.is_pos_intersect(refPos)) return;

    const bool isLargeInsert(isLargeInsertSV(sv));

#ifdef DEBUG_MEGAPAIR
    log_os << __FUNCTION__ << ": read: " << bamRead << "\n";
#endif

    /// check if fragment is too big or too small:
    const int templateSize(std::abs(bamRead.template_size()));
    if (templateSize < bamParams.minFrag) return;
    if (templateSize > bamParams.maxFrag) return;

    // count only from the down stream reads
    const bool isFirstBamRead(isFirstRead(bamRead));

    // get fragment range:
    pos_t fragBeginRefPos(refPos);
    if (! isFirstBamRead)
    {
        fragBeginRefPos=bamRead.mate_pos()-1;
    }

    const pos_t fragEndRefPos(fragBeginRefPos+templateSize);

    if (fragBeginRefPos > fragEndRefPos)
    {
        std::ostringstream oss;
        oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fragBeginRefPos << " " << fragEndRefPos << " bamRecord: " << bamRead << "\n";
        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
    }

    {
        const pos_t fragOverlap(std::min((1+svParams.centerPos-fragBeginRefPos), (fragEndRefPos-svParams.centerPos)));
#ifdef DEBUG_MEGAPAIR
        log_os << __FUNCTION__ << ": frag begin/end/overlap: " << fragBeginRefPos << " " << fragEndRefPos << " " << fragOverlap << "\n";
#endif
        if (fragOverlap < pairOpt.minFragSupport) return;
    }

    SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]);

    static const bool isShadow(false);

    SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first()));
    setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, bamRead, isShadow, evRead);

    setAlleleFrag(*bamParams.fragDistroPtr, templateSize, fragment.ref.getBp(isBp1),isLargeInsert);
}
Exemple #2
0
FragmentSizeType::index_t
SVLocusScanner::
_getFragmentSizeType(
    const bam_record& bamRead,
    const unsigned defaultReadGroupIndex) const
{
    using namespace FragmentSizeType;
    if (bamRead.target_id() != bamRead.mate_target_id()) return DISTANT;
    const int32_t fragmentSize(std::abs(bamRead.template_size()));
    return classifySize(_stats[defaultReadGroupIndex], fragmentSize);
}
Exemple #3
0
bool
SVLocusScanner::
isProperPair(
    const bam_record& bamRead,
    const unsigned defaultReadGroupIndex) const
{
    if (! is_innie_pair(bamRead)) return false;

    const Range& ppr(_stats[defaultReadGroupIndex].properPair);
    const int32_t fragmentSize(std::abs(bamRead.template_size()));

    // we're seeing way to much large fragment garbage in cancers to use
    // vanilla proper pair criteria, push the max fragment size out a bit for now:
    static const float maxAnomFactor(1.5);
    if (fragmentSize > static_cast<int32_t>(maxAnomFactor*ppr.max)) return false;
    if (fragmentSize < ppr.min) return false;

    return true;
}
Exemple #4
0
/// Create an SVLocus for each potential SV event supported by the BAM record
///
/// the loci count should almost always be one (or, depending on input filtration, zero).
/// multiple suggested loci from one read is more of a theoretical possibility than an
/// expectation.
///
static
void
getSVLociImpl(
    const ReadScannerOptions& opt,
    const ReadScannerDerivOptions& dopt,
    const SVLocusScanner::CachedReadGroupStats& rstats,
    const bam_record& bamRead,
    const bam_header_info& bamHeader,
    const reference_contig_segment& refSeq,
    std::vector<SVLocus>& loci,
    SampleEvidenceCounts& eCounts)
{
    using namespace illumina::common;

    loci.clear();
    std::vector<SVObservation> candidates;
    known_pos_range2 localEvidenceRange;

    getReadBreakendsImpl(opt, dopt, rstats, bamRead, nullptr, bamHeader,
                         refSeq, nullptr, candidates, localEvidenceRange);

#ifdef DEBUG_SCANNER
    log_os << __FUNCTION__ << ": candidate_size: " << candidates.size() << "\n";
#endif

    // translate SVCandidate to a simpler form for use
    // in the SV locus graph:
    for (const SVCandidate& cand : candidates)
    {
        const bool isCandComplex(isComplexSV(cand));

        const SVBreakend& localBreakend(cand.bp1);
        const SVBreakend& remoteBreakend(cand.bp2);

        if ((0==localBreakend.interval.range.size()) ||
            ((! isCandComplex) && (0==remoteBreakend.interval.range.size())))
        {
            std::ostringstream oss;
            oss << "Unexpected breakend pattern proposed from bam record.\n"
                << "\tlocal_breakend: " << localBreakend << "\n"
                << "\tremote_breakend: " << remoteBreakend << "\n"
                << "\tbam_record: " << bamRead << "\n";
            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
        }

        // update evidence stats:
        for (int i(0); i< SVEvidenceType::SIZE; ++i)
        {
            eCounts.eType[i] += localBreakend.lowresEvidence.getVal(i);
        }

        // determine the evidence weight of this candidate:
        unsigned localEvidenceWeight(0);
        unsigned remoteEvidenceWeight(0);

        if (localBreakend.getAnyNonPairCount() != 0)
        {
            localEvidenceWeight = SVObservationWeights::internalReadEvent;
            if (remoteBreakend.getAnyNonPairCount() != 0)
            {
                remoteEvidenceWeight = SVObservationWeights::internalReadEvent;
            }
        }
        else if (localBreakend.getLocalPairCount() != 0)
        {
            bool isClose(false);
            if (is_innie_pair(bamRead))
            {
                isClose = (std::abs(bamRead.template_size()) < rstats.minDistantFragmentSize);
            }

            unsigned thisWeight(SVObservationWeights::readPair);
            if (isClose)
            {
                thisWeight = SVObservationWeights::closeReadPair;
                eCounts.closeCount += 1;
            }

            localEvidenceWeight = thisWeight;
            if (remoteBreakend.getLocalPairCount() != 0)
            {
                remoteEvidenceWeight = thisWeight;
            }
        }

        // finally, create the graph locus:
        SVLocus locus;
        // set local breakend estimate:
        const NodeIndexType localBreakendNode(locus.addNode(localBreakend.interval));
        locus.setNodeEvidence(localBreakendNode,localEvidenceRange);

        if (isCandComplex)
        {
            locus.linkNodes(localBreakendNode,localBreakendNode,localEvidenceWeight);
        }
        else
        {
            // set remote breakend estimate:
            const NodeIndexType remoteBreakendNode(locus.addNode(remoteBreakend.interval));
            locus.linkNodes(localBreakendNode,remoteBreakendNode,localEvidenceWeight,remoteEvidenceWeight);

            locus.mergeSelfOverlap();
        }

#ifdef DEBUG_SCANNER
        log_os << __FUNCTION__ << ": adding Locus: " << locus << "\n";
#endif
        loci.push_back(locus);
    }
}