bool isMateInsertionEvidenceCandidate( const bam_record& bamRead, const unsigned minMapq) { if (! bamRead.is_paired()) return false; if (bamRead.isNonStrictSupplement()) return false; if (bamRead.is_unmapped() || bamRead.is_mate_unmapped()) return false; if (bamRead.map_qual() < minMapq) return false; if (bamRead.target_id() < 0) return false; if (bamRead.mate_target_id() < 0) return false; if (bamRead.target_id() != bamRead.mate_target_id()) return true; /// TODO: better candidate definition based on fragment size distro: static const int minSize(10000); return (std::abs(bamRead.pos()-bamRead.mate_pos()) >= minSize); }
FragmentSizeType::index_t SVLocusScanner:: _getFragmentSizeType( const bam_record& bamRead, const unsigned defaultReadGroupIndex) const { using namespace FragmentSizeType; if (bamRead.target_id() != bamRead.mate_target_id()) return DISTANT; const int32_t fragmentSize(std::abs(bamRead.template_size())); return classifySize(_stats[defaultReadGroupIndex], fragmentSize); }
/// extract poorly aligned read ends (semi-aligned and/or soft-clipped) /// to internal candidate format static void getSVCandidatesFromSemiAligned( const ReadScannerOptions& opt, const ReadScannerDerivOptions& dopt, const bam_record& bamRead, const SimpleAlignment& bamAlign, const FRAGSOURCE::index_t fragSource, const reference_contig_segment& refSeq, std::vector<SVObservation>& candidates) { unsigned leadingMismatchLen(0); unsigned trailingMismatchLen(0); pos_t leadingRefPos(0), trailingRefPos(0); getSVBreakendCandidateSemiAligned(bamRead, bamAlign, refSeq, dopt.isUseOverlappingPairs, leadingMismatchLen, leadingRefPos, trailingMismatchLen, trailingRefPos); if ((leadingMismatchLen + trailingMismatchLen) >= bamRead.read_size()) return; using namespace SVEvidenceType; static const index_t svSource(SEMIALIGN); // semi-aligned reads don't define a full hypothesis, so they're always evidence for a 'complex' ie. undefined, event // in a fashion analogous to clipped reads static const bool isComplex(true); if (leadingMismatchLen >= opt.minSemiAlignedMismatchLen) { const pos_t pos(leadingRefPos); candidates.push_back(GetSplitSVCandidate(dopt,bamRead.target_id(),pos,pos,svSource, fragSource,isComplex)); } if (trailingMismatchLen >= opt.minSemiAlignedMismatchLen) { const pos_t pos(trailingRefPos); candidates.push_back(GetSplitSVCandidate(dopt,bamRead.target_id(),pos,pos,svSource, fragSource,isComplex)); } }
/// get SV candidates from shadow/singleton pairs /// look for singletons, create candidateSV around conf. interval of shadow position /// cache singletons? might be needed to remove poor quality shadows. /// should be able to re-use code, follow soft-clipping example. static void getSVCandidatesFromShadow( const ReadScannerOptions& opt, const SVLocusScanner::CachedReadGroupStats& rstats, const bam_record& localRead, const SimpleAlignment& localAlign, const bam_record* remoteReadPtr, TrackedCandidates& candidates) { using namespace SVEvidenceType; static const index_t svSource(SHADOW); static const bool isComplex(true); pos_t singletonGenomePos(0); int targetId(0); if (NULL == remoteReadPtr) { if (!localRead.is_unmapped()) return; // need to take care of this case // need to rely on cached mapq and qname return; if (!isGoodShadow(localRead,lastMapq,lastQname,opt.minSingletonMapqGraph)) { return; } singletonGenomePos = localAlign.pos; targetId = localRead.target_id(); } else { // have both reads, straightforward from here const bam_record& remoteRead(*remoteReadPtr); const SimpleAlignment remoteAlign(remoteRead); if (localRead.is_mate_unmapped()) { // remote read is shadow candidate if (!isGoodShadow(remoteRead,localRead.map_qual(),localRead.qname(),opt.minSingletonMapqGraph)) { return; } singletonGenomePos = localAlign.pos; targetId = remoteRead.target_id(); } else if (localRead.is_unmapped()) { // local is shadow candidate if (!isGoodShadow(localRead,remoteRead.map_qual(),remoteRead.qname(),opt.minSingletonMapqGraph)) { return; } singletonGenomePos = remoteAlign.pos; targetId = localRead.target_id(); } else { // none unmapped, skip this one return; } } const pos_t properPairRangeOffset = static_cast<pos_t>(rstats.properPair.min + (rstats.properPair.max-rstats.properPair.min)/2); const pos_t shadowGenomePos = singletonGenomePos + properPairRangeOffset; candidates.push_back(GetSplitSVCandidate(opt,targetId,shadowGenomePos,shadowGenomePos, svSource, isComplex)); }