void SVScorer:: getBreakendMaxMappedDepthAndMQ0( const bool isMaxDepth, const double cutoffDepth, const SVBreakend& bp, unsigned& maxDepth, float& MQ0Frac) { /// define a new interval -/+ 50 bases around the center pos /// of the breakpoint static const pos_t regionSize(50); maxDepth=0; MQ0Frac=0; unsigned totalReads(0); unsigned totalMQ0Reads(0); const pos_t centerPos(bp.interval.range.center_pos()); const known_pos_range2 searchRange(std::max((centerPos-regionSize),0), (centerPos+regionSize)); if (searchRange.size() == 0) return; std::vector<unsigned> depth(searchRange.size(),0); bool isCutoff(false); bool isNormalFound(false); const unsigned bamCount(_bamStreams.size()); for (unsigned bamIndex(0); bamIndex < bamCount; ++bamIndex) { if (_isAlignmentTumor[bamIndex]) continue; isNormalFound=true; bam_streamer& bamStream(*_bamStreams[bamIndex]); // set bam stream to new search interval: bamStream.set_new_region(bp.interval.tid, searchRange.begin_pos(), searchRange.end_pos()); while (bamStream.next()) { const bam_record& bamRead(*(bamStream.get_record_ptr())); // turn filtration down to mapped only to match depth estimate method: if (bamRead.is_unmapped()) continue; const pos_t refPos(bamRead.pos()-1); if (refPos >= searchRange.end_pos()) break; addReadToDepthEst(bamRead,searchRange.begin_pos(),depth); totalReads++; if (0 == bamRead.map_qual()) totalMQ0Reads++; if (isMaxDepth) { const pos_t depthOffset(refPos-searchRange.begin_pos()); if (depthOffset>=0) { if (depth[depthOffset] > cutoffDepth) { isCutoff=true; break; } } } } if (isCutoff) break; } assert(isNormalFound); maxDepth = *(std::max_element(depth.begin(),depth.end())); if (totalReads>=10) { MQ0Frac = static_cast<float>(totalMQ0Reads)/static_cast<float>(totalReads); } }
/// scan read record (and optionally its mate record) for SV evidence. // /// note that estimation is improved by the mate record (because we have the mate cigar string in this case) /// static void getReadBreakendsImpl( const ReadScannerOptions& opt, const ReadScannerDerivOptions& dopt, const SVLocusScanner::CachedReadGroupStats& rstats, const bam_record& localRead, const bam_record* remoteReadPtr, const bam_header_info& bamHeader, const reference_contig_segment& localRefSeq, const reference_contig_segment* remoteRefSeqPtr, std::vector<SVObservation>& candidates, known_pos_range2& localEvidenceRange) { using namespace illumina::common; #ifdef DEBUG_SCANNER log_os << __FUNCTION__ << ": Starting read: " << localRead.qname() << "\n"; #endif const chromMap_t& chromToIndex(bamHeader.chrom_to_index); candidates.clear(); /// get some basic derived information from the bam_record: const SimpleAlignment localAlign(getAlignment(localRead)); try { getSingleReadSVCandidates(opt, dopt, localRead, localAlign, chromToIndex, localRefSeq, candidates); // run the same check on the read's mate if we have access to it if (nullptr != remoteReadPtr) { const bam_record& remoteRead(*remoteReadPtr); const SimpleAlignment remoteAlign(getAlignment(remoteRead)); if (nullptr == remoteRefSeqPtr) { static const char msg[] = "ERROR: remoteRefSeqPtr cannot be null"; BOOST_THROW_EXCEPTION(LogicException(msg)); } getSingleReadSVCandidates(opt, dopt, remoteRead, remoteAlign, chromToIndex, (*remoteRefSeqPtr), candidates); } // process shadows: //getSVCandidatesFromShadow(opt, rstats, localRead, localAlign,remoteReadPtr,candidates); // - process anomalous read pairs: getSVCandidatesFromPair(opt, dopt, rstats, localRead, localAlign, remoteReadPtr, candidates); } catch (...) { std::cerr << "ERROR: Exception caught while processing "; if (nullptr == remoteReadPtr) { std::cerr << "single read record:\n" << '\t' << localRead << "\n"; } else { std::cerr << " read pair records:\n" << '\t' << localRead << "\n" << '\t' << (*remoteReadPtr) << "\n"; } throw; } #ifdef DEBUG_SCANNER log_os << __FUNCTION__ << ": post-pair candidate_size: " << candidates.size() << "\n"; #endif // update localEvidence range: // note this is only used if candidates were added, so there's no harm in setting it every time: const unsigned localRefLength(apath_ref_length(localAlign.path)); const pos_t startRefPos(localRead.pos()-1); const pos_t endRefPos(startRefPos+localRefLength); localEvidenceRange.set_range(startRefPos,endRefPos); const int maxTid(chromToIndex.size()); /// final chance to QC candidate set: /// for (const SVCandidate& sv : candidates) { bool isInvalidTid(false); if ((sv.bp1.interval.tid < 0) || (sv.bp1.interval.tid >= maxTid)) { isInvalidTid=true; } else if (sv.bp2.state != SVBreakendState::UNKNOWN) { if ((sv.bp2.interval.tid < 0) || (sv.bp2.interval.tid >= maxTid)) { isInvalidTid=true; } } bool isInvalidPos(false); if (! isInvalidTid) { // note in the 'off-chromosome edge' test below we check for cases which are obviously way off // the edge, but allow for a bit of over-edge mistakes to occur for the circular chromosomes // static const int offEdgePad(500); const pos_t tid1Length(bamHeader.chrom_data[sv.bp1.interval.tid].length); if ((sv.bp1.interval.range.end_pos() <= -offEdgePad) || (sv.bp1.interval.range.begin_pos() >= (tid1Length+offEdgePad))) { isInvalidPos=true; } else if (sv.bp2.state != SVBreakendState::UNKNOWN) { const pos_t tid2Length(bamHeader.chrom_data[sv.bp2.interval.tid].length); if ((sv.bp2.interval.range.end_pos() <= -offEdgePad) || (sv.bp2.interval.range.begin_pos() >= (tid2Length+offEdgePad))) { isInvalidPos=true; } } } if (isInvalidTid || isInvalidPos) { std::ostringstream oss; if (isInvalidTid) { oss << "SVbreakend has unknown or invalid chromosome id in candidate sv.\n"; } else { oss << "Cannot interpret BAM record: candidate SV breakend from BAM record is off chromosome edge.\n"; } oss << "\tlocal_bam_record: " << localRead << "\n" << "\tremote_bam record: "; if (NULL==remoteReadPtr) { oss << "NONE"; } else { oss << (*remoteReadPtr); } oss << "\n" << "\tSVCandidate: " << sv << "\n"; BOOST_THROW_EXCEPTION(LogicException(oss.str())); } } }