static void addReadToDepthEst( const bam_record& bamRead, const pos_t beginPos, std::vector<unsigned>& depth) { using namespace ALIGNPATH; const pos_t endPos(beginPos+depth.size()); // get cigar: path_t apath; bam_cigar_to_apath(bamRead.raw_cigar(), bamRead.n_cigar(), apath); pos_t refPos(bamRead.pos()-1); BOOST_FOREACH(const path_segment& ps, apath) { if (refPos>=endPos) return; if (MATCH == ps.type) { for (pos_t pos(refPos); pos < (refPos+static_cast<pos_t>(ps.length)); ++pos) { if (pos>=beginPos) { if (pos>=endPos) return; depth[pos-beginPos]++; } } } if (is_segment_type_ref_length(ps.type)) refPos += ps.length; } }
void SVScorePairRefProcessor:: processClearedRecord( const bam_record& bamRead) { using namespace illumina::common; assert(bamParams.isSet); const pos_t refPos(bamRead.pos()-1); if (! bamParams.interval.range.is_pos_intersect(refPos)) return; const bool isLargeInsert(isLargeInsertSV(sv)); #ifdef DEBUG_MEGAPAIR log_os << __FUNCTION__ << ": read: " << bamRead << "\n"; #endif /// check if fragment is too big or too small: const int templateSize(std::abs(bamRead.template_size())); if (templateSize < bamParams.minFrag) return; if (templateSize > bamParams.maxFrag) return; // count only from the down stream reads const bool isFirstBamRead(isFirstRead(bamRead)); // get fragment range: pos_t fragBeginRefPos(refPos); if (! isFirstBamRead) { fragBeginRefPos=bamRead.mate_pos()-1; } const pos_t fragEndRefPos(fragBeginRefPos+templateSize); if (fragBeginRefPos > fragEndRefPos) { std::ostringstream oss; oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fragBeginRefPos << " " << fragEndRefPos << " bamRecord: " << bamRead << "\n"; BOOST_THROW_EXCEPTION(LogicException(oss.str())); } { const pos_t fragOverlap(std::min((1+svParams.centerPos-fragBeginRefPos), (fragEndRefPos-svParams.centerPos))); #ifdef DEBUG_MEGAPAIR log_os << __FUNCTION__ << ": frag begin/end/overlap: " << fragBeginRefPos << " " << fragEndRefPos << " " << fragOverlap << "\n"; #endif if (fragOverlap < pairOpt.minFragSupport) return; } SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]); static const bool isShadow(false); SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first())); setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, bamRead, isShadow, evRead); setAlleleFrag(*bamParams.fragDistroPtr, templateSize, fragment.ref.getBp(isBp1),isLargeInsert); }
void addRead( const bam_record& bamRead) { if (_isRegionInit) { if (bamRead.pos() > _endPos + 1000) { _maxPos=_endPos; setNewRegion(); } } if (! _isRegionInit) { _minPos=bamRead.pos(); _maxPos=bamRead.pos(); _endPos=bamRead.pos() + bamRead.read_size(); _isRegionInit=true; } else { if (bamRead.pos() > _maxPos) { _maxPos = bamRead.pos(); _endPos=bamRead.pos() + bamRead.read_size(); } } _count++; _totalReadLength += bamRead.read_size(); }
void addRead( const bam_record& bamRead) { const pos_t pos(bamRead.pos()-1); const unsigned rsize(bamRead.read_size()); if (! _isRegionInit) { _maxPos=pos; _isRegionInit=true; } for (; _maxPos<pos; ++_maxPos) flushPos(_maxPos); _depth.inc(pos,rsize); _count++; }
bool isMateInsertionEvidenceCandidate( const bam_record& bamRead, const unsigned minMapq) { if (! bamRead.is_paired()) return false; if (bamRead.isNonStrictSupplement()) return false; if (bamRead.is_unmapped() || bamRead.is_mate_unmapped()) return false; if (bamRead.map_qual() < minMapq) return false; if (bamRead.target_id() < 0) return false; if (bamRead.mate_target_id() < 0) return false; if (bamRead.target_id() != bamRead.mate_target_id()) return true; /// TODO: better candidate definition based on fragment size distro: static const int minSize(10000); return (std::abs(bamRead.pos()-bamRead.mate_pos()) >= minSize); }
/// scan read record (and optionally its mate record) for SV evidence. // /// note that estimation is improved by the mate record (because we have the mate cigar string in this case) /// static void getReadBreakendsImpl( const ReadScannerOptions& opt, const ReadScannerDerivOptions& dopt, const SVLocusScanner::CachedReadGroupStats& rstats, const bam_record& localRead, const bam_record* remoteReadPtr, const bam_header_info& bamHeader, const reference_contig_segment& localRefSeq, const reference_contig_segment* remoteRefSeqPtr, std::vector<SVObservation>& candidates, known_pos_range2& localEvidenceRange) { using namespace illumina::common; #ifdef DEBUG_SCANNER log_os << __FUNCTION__ << ": Starting read: " << localRead.qname() << "\n"; #endif const chromMap_t& chromToIndex(bamHeader.chrom_to_index); candidates.clear(); /// get some basic derived information from the bam_record: const SimpleAlignment localAlign(getAlignment(localRead)); try { getSingleReadSVCandidates(opt, dopt, localRead, localAlign, chromToIndex, localRefSeq, candidates); // run the same check on the read's mate if we have access to it if (nullptr != remoteReadPtr) { const bam_record& remoteRead(*remoteReadPtr); const SimpleAlignment remoteAlign(getAlignment(remoteRead)); if (nullptr == remoteRefSeqPtr) { static const char msg[] = "ERROR: remoteRefSeqPtr cannot be null"; BOOST_THROW_EXCEPTION(LogicException(msg)); } getSingleReadSVCandidates(opt, dopt, remoteRead, remoteAlign, chromToIndex, (*remoteRefSeqPtr), candidates); } // process shadows: //getSVCandidatesFromShadow(opt, rstats, localRead, localAlign,remoteReadPtr,candidates); // - process anomalous read pairs: getSVCandidatesFromPair(opt, dopt, rstats, localRead, localAlign, remoteReadPtr, candidates); } catch (...) { std::cerr << "ERROR: Exception caught while processing "; if (nullptr == remoteReadPtr) { std::cerr << "single read record:\n" << '\t' << localRead << "\n"; } else { std::cerr << " read pair records:\n" << '\t' << localRead << "\n" << '\t' << (*remoteReadPtr) << "\n"; } throw; } #ifdef DEBUG_SCANNER log_os << __FUNCTION__ << ": post-pair candidate_size: " << candidates.size() << "\n"; #endif // update localEvidence range: // note this is only used if candidates were added, so there's no harm in setting it every time: const unsigned localRefLength(apath_ref_length(localAlign.path)); const pos_t startRefPos(localRead.pos()-1); const pos_t endRefPos(startRefPos+localRefLength); localEvidenceRange.set_range(startRefPos,endRefPos); const int maxTid(chromToIndex.size()); /// final chance to QC candidate set: /// for (const SVCandidate& sv : candidates) { bool isInvalidTid(false); if ((sv.bp1.interval.tid < 0) || (sv.bp1.interval.tid >= maxTid)) { isInvalidTid=true; } else if (sv.bp2.state != SVBreakendState::UNKNOWN) { if ((sv.bp2.interval.tid < 0) || (sv.bp2.interval.tid >= maxTid)) { isInvalidTid=true; } } bool isInvalidPos(false); if (! isInvalidTid) { // note in the 'off-chromosome edge' test below we check for cases which are obviously way off // the edge, but allow for a bit of over-edge mistakes to occur for the circular chromosomes // static const int offEdgePad(500); const pos_t tid1Length(bamHeader.chrom_data[sv.bp1.interval.tid].length); if ((sv.bp1.interval.range.end_pos() <= -offEdgePad) || (sv.bp1.interval.range.begin_pos() >= (tid1Length+offEdgePad))) { isInvalidPos=true; } else if (sv.bp2.state != SVBreakendState::UNKNOWN) { const pos_t tid2Length(bamHeader.chrom_data[sv.bp2.interval.tid].length); if ((sv.bp2.interval.range.end_pos() <= -offEdgePad) || (sv.bp2.interval.range.begin_pos() >= (tid2Length+offEdgePad))) { isInvalidPos=true; } } } if (isInvalidTid || isInvalidPos) { std::ostringstream oss; if (isInvalidTid) { oss << "SVbreakend has unknown or invalid chromosome id in candidate sv.\n"; } else { oss << "Cannot interpret BAM record: candidate SV breakend from BAM record is off chromosome edge.\n"; } oss << "\tlocal_bam_record: " << localRead << "\n" << "\tremote_bam record: "; if (NULL==remoteReadPtr) { oss << "NONE"; } else { oss << (*remoteReadPtr); } oss << "\n" << "\tSVCandidate: " << sv << "\n"; BOOST_THROW_EXCEPTION(LogicException(oss.str())); } } }