void SVScorePairRefProcessor:: processClearedRecord( const bam_record& bamRead) { using namespace illumina::common; assert(bamParams.isSet); const pos_t refPos(bamRead.pos()-1); if (! bamParams.interval.range.is_pos_intersect(refPos)) return; const bool isLargeInsert(isLargeInsertSV(sv)); #ifdef DEBUG_MEGAPAIR log_os << __FUNCTION__ << ": read: " << bamRead << "\n"; #endif /// check if fragment is too big or too small: const int templateSize(std::abs(bamRead.template_size())); if (templateSize < bamParams.minFrag) return; if (templateSize > bamParams.maxFrag) return; // count only from the down stream reads const bool isFirstBamRead(isFirstRead(bamRead)); // get fragment range: pos_t fragBeginRefPos(refPos); if (! isFirstBamRead) { fragBeginRefPos=bamRead.mate_pos()-1; } const pos_t fragEndRefPos(fragBeginRefPos+templateSize); if (fragBeginRefPos > fragEndRefPos) { std::ostringstream oss; oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fragBeginRefPos << " " << fragEndRefPos << " bamRecord: " << bamRead << "\n"; BOOST_THROW_EXCEPTION(LogicException(oss.str())); } { const pos_t fragOverlap(std::min((1+svParams.centerPos-fragBeginRefPos), (fragEndRefPos-svParams.centerPos))); #ifdef DEBUG_MEGAPAIR log_os << __FUNCTION__ << ": frag begin/end/overlap: " << fragBeginRefPos << " " << fragEndRefPos << " " << fragOverlap << "\n"; #endif if (fragOverlap < pairOpt.minFragSupport) return; } SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]); static const bool isShadow(false); SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first())); setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, bamRead, isShadow, evRead); setAlleleFrag(*bamParams.fragDistroPtr, templateSize, fragment.ref.getBp(isBp1),isLargeInsert); }
FragmentSizeType::index_t SVLocusScanner:: _getFragmentSizeType( const bam_record& bamRead, const unsigned defaultReadGroupIndex) const { using namespace FragmentSizeType; if (bamRead.target_id() != bamRead.mate_target_id()) return DISTANT; const int32_t fragmentSize(std::abs(bamRead.template_size())); return classifySize(_stats[defaultReadGroupIndex], fragmentSize); }
bool SVLocusScanner:: isProperPair( const bam_record& bamRead, const unsigned defaultReadGroupIndex) const { if (! is_innie_pair(bamRead)) return false; const Range& ppr(_stats[defaultReadGroupIndex].properPair); const int32_t fragmentSize(std::abs(bamRead.template_size())); // we're seeing way to much large fragment garbage in cancers to use // vanilla proper pair criteria, push the max fragment size out a bit for now: static const float maxAnomFactor(1.5); if (fragmentSize > static_cast<int32_t>(maxAnomFactor*ppr.max)) return false; if (fragmentSize < ppr.min) return false; return true; }
/// Create an SVLocus for each potential SV event supported by the BAM record /// /// the loci count should almost always be one (or, depending on input filtration, zero). /// multiple suggested loci from one read is more of a theoretical possibility than an /// expectation. /// static void getSVLociImpl( const ReadScannerOptions& opt, const ReadScannerDerivOptions& dopt, const SVLocusScanner::CachedReadGroupStats& rstats, const bam_record& bamRead, const bam_header_info& bamHeader, const reference_contig_segment& refSeq, std::vector<SVLocus>& loci, SampleEvidenceCounts& eCounts) { using namespace illumina::common; loci.clear(); std::vector<SVObservation> candidates; known_pos_range2 localEvidenceRange; getReadBreakendsImpl(opt, dopt, rstats, bamRead, nullptr, bamHeader, refSeq, nullptr, candidates, localEvidenceRange); #ifdef DEBUG_SCANNER log_os << __FUNCTION__ << ": candidate_size: " << candidates.size() << "\n"; #endif // translate SVCandidate to a simpler form for use // in the SV locus graph: for (const SVCandidate& cand : candidates) { const bool isCandComplex(isComplexSV(cand)); const SVBreakend& localBreakend(cand.bp1); const SVBreakend& remoteBreakend(cand.bp2); if ((0==localBreakend.interval.range.size()) || ((! isCandComplex) && (0==remoteBreakend.interval.range.size()))) { std::ostringstream oss; oss << "Unexpected breakend pattern proposed from bam record.\n" << "\tlocal_breakend: " << localBreakend << "\n" << "\tremote_breakend: " << remoteBreakend << "\n" << "\tbam_record: " << bamRead << "\n"; BOOST_THROW_EXCEPTION(LogicException(oss.str())); } // update evidence stats: for (int i(0); i< SVEvidenceType::SIZE; ++i) { eCounts.eType[i] += localBreakend.lowresEvidence.getVal(i); } // determine the evidence weight of this candidate: unsigned localEvidenceWeight(0); unsigned remoteEvidenceWeight(0); if (localBreakend.getAnyNonPairCount() != 0) { localEvidenceWeight = SVObservationWeights::internalReadEvent; if (remoteBreakend.getAnyNonPairCount() != 0) { remoteEvidenceWeight = SVObservationWeights::internalReadEvent; } } else if (localBreakend.getLocalPairCount() != 0) { bool isClose(false); if (is_innie_pair(bamRead)) { isClose = (std::abs(bamRead.template_size()) < rstats.minDistantFragmentSize); } unsigned thisWeight(SVObservationWeights::readPair); if (isClose) { thisWeight = SVObservationWeights::closeReadPair; eCounts.closeCount += 1; } localEvidenceWeight = thisWeight; if (remoteBreakend.getLocalPairCount() != 0) { remoteEvidenceWeight = thisWeight; } } // finally, create the graph locus: SVLocus locus; // set local breakend estimate: const NodeIndexType localBreakendNode(locus.addNode(localBreakend.interval)); locus.setNodeEvidence(localBreakendNode,localEvidenceRange); if (isCandComplex) { locus.linkNodes(localBreakendNode,localBreakendNode,localEvidenceWeight); } else { // set remote breakend estimate: const NodeIndexType remoteBreakendNode(locus.addNode(remoteBreakend.interval)); locus.linkNodes(localBreakendNode,remoteBreakendNode,localEvidenceWeight,remoteEvidenceWeight); locus.mergeSelfOverlap(); } #ifdef DEBUG_SCANNER log_os << __FUNCTION__ << ": adding Locus: " << locus << "\n"; #endif loci.push_back(locus); } }