void SVScorePairRefProcessor:: processClearedRecord( const bam_record& bamRead) { using namespace illumina::common; assert(bamParams.isSet); const pos_t refPos(bamRead.pos()-1); if (! bamParams.interval.range.is_pos_intersect(refPos)) return; const bool isLargeInsert(isLargeInsertSV(sv)); #ifdef DEBUG_MEGAPAIR log_os << __FUNCTION__ << ": read: " << bamRead << "\n"; #endif /// check if fragment is too big or too small: const int templateSize(std::abs(bamRead.template_size())); if (templateSize < bamParams.minFrag) return; if (templateSize > bamParams.maxFrag) return; // count only from the down stream reads const bool isFirstBamRead(isFirstRead(bamRead)); // get fragment range: pos_t fragBeginRefPos(refPos); if (! isFirstBamRead) { fragBeginRefPos=bamRead.mate_pos()-1; } const pos_t fragEndRefPos(fragBeginRefPos+templateSize); if (fragBeginRefPos > fragEndRefPos) { std::ostringstream oss; oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fragBeginRefPos << " " << fragEndRefPos << " bamRecord: " << bamRead << "\n"; BOOST_THROW_EXCEPTION(LogicException(oss.str())); } { const pos_t fragOverlap(std::min((1+svParams.centerPos-fragBeginRefPos), (fragEndRefPos-svParams.centerPos))); #ifdef DEBUG_MEGAPAIR log_os << __FUNCTION__ << ": frag begin/end/overlap: " << fragBeginRefPos << " " << fragEndRefPos << " " << fragOverlap << "\n"; #endif if (fragOverlap < pairOpt.minFragSupport) return; } SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]); static const bool isShadow(false); SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first())); setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, bamRead, isShadow, evRead); setAlleleFrag(*bamParams.fragDistroPtr, templateSize, fragment.ref.getBp(isBp1),isLargeInsert); }
static bool isGoodShadow( const bam_record& bamRead, const std::string& lastQname) { #ifdef DEBUG_IS_SHADOW static const std::string logtag("isGoodShadow"); #endif if (! bamRead.is_paired()) return false; if (bamRead.isNonStrictSupplement()) return false; // sanity check that this is a shadow read: if (!bamRead.is_unmapped()) return false; if (bamRead.is_mate_unmapped()) return false; static const unsigned minAvgQualShadow = 25; if (get_avg_quality(bamRead) < minAvgQualShadow) { return false; } if (strcmp(bamRead.qname(),lastQname.c_str()) != 0) { // something went wrong here, shadows should have their singleton partner // preceding them in the BAM file. #ifdef DEBUG_IS_SHADOW log_os << logtag << " ERROR: Shadow without matching singleton : " << bamRead.qname() << " vs " << lastQname << std::endl; #endif return false; } #ifdef DEBUG_IS_SHADOW log_os << logtag << " Found shadow!\n"; << logtag << " this mapq = " << ((unsigned int)bamRead.map_qual()) << std::endl;
void SVCandidateSetSequenceFragmentSampleGroup:: add( const bam_header_info& bamHeader, const bam_record& bamRead, const bool isExpectRepeat, const bool isSourcedFromGraphEdgeNode1, const bool isSubMapped) { using namespace illumina::common; #ifdef DEBUG_SVDATA log_os << "SVDataGroup adding: " << bamRead << "\n"; #endif SVCandidateSetSequenceFragment* fragPtr(getSequenceFragment(bamRead.qname())); if (nullptr == fragPtr) return; SVCandidateSetSequenceFragment& fragment(*fragPtr); SVCandidateSetRead* targetReadPtr(nullptr); if (2 == bamRead.read_no()) { if (bamRead.isNonStrictSupplement()) { fragment.read2Supplemental.emplace_back(); targetReadPtr = (&(fragment.read2Supplemental.back())); } else { targetReadPtr = (&(fragment.read2)); } } else { if (bamRead.isNonStrictSupplement()) { fragment.read1Supplemental.emplace_back(); targetReadPtr = (&(fragment.read1Supplemental.back())); } else { targetReadPtr = (&(fragment.read1)); } } SVCandidateSetRead& targetRead(*targetReadPtr); if (targetRead.isSet()) { if (isExpectRepeat) return; std::ostringstream oss; oss << "Unexpected alignment name collision. Source: '" << dataSourceName << "'\n" << "\tExisting read: "; summarizeAlignmentRecord(bamHeader, targetRead.bamrec, oss); oss << "\n" << "\tNew read: "; summarizeAlignmentRecord(bamHeader, bamRead, oss); oss << "\n"; BOOST_THROW_EXCEPTION(GeneralException(oss.str())); } targetRead.bamrec = bamRead; targetRead.isSourcedFromGraphEdgeNode1 = isSourcedFromGraphEdgeNode1; targetRead.isSubMapped = isSubMapped; targetRead.readIndex = (isSubMapped ? _subMappedReadIndex : _mappedReadIndex); }
/// scan read record (and optionally its mate record) for SV evidence. // /// note that estimation is improved by the mate record (because we have the mate cigar string in this case) /// static void getReadBreakendsImpl( const ReadScannerOptions& opt, const ReadScannerDerivOptions& dopt, const SVLocusScanner::CachedReadGroupStats& rstats, const bam_record& localRead, const bam_record* remoteReadPtr, const bam_header_info& bamHeader, const reference_contig_segment& localRefSeq, const reference_contig_segment* remoteRefSeqPtr, std::vector<SVObservation>& candidates, known_pos_range2& localEvidenceRange) { using namespace illumina::common; #ifdef DEBUG_SCANNER log_os << __FUNCTION__ << ": Starting read: " << localRead.qname() << "\n"; #endif const chromMap_t& chromToIndex(bamHeader.chrom_to_index); candidates.clear(); /// get some basic derived information from the bam_record: const SimpleAlignment localAlign(getAlignment(localRead)); try { getSingleReadSVCandidates(opt, dopt, localRead, localAlign, chromToIndex, localRefSeq, candidates); // run the same check on the read's mate if we have access to it if (nullptr != remoteReadPtr) { const bam_record& remoteRead(*remoteReadPtr); const SimpleAlignment remoteAlign(getAlignment(remoteRead)); if (nullptr == remoteRefSeqPtr) { static const char msg[] = "ERROR: remoteRefSeqPtr cannot be null"; BOOST_THROW_EXCEPTION(LogicException(msg)); } getSingleReadSVCandidates(opt, dopt, remoteRead, remoteAlign, chromToIndex, (*remoteRefSeqPtr), candidates); } // process shadows: //getSVCandidatesFromShadow(opt, rstats, localRead, localAlign,remoteReadPtr,candidates); // - process anomalous read pairs: getSVCandidatesFromPair(opt, dopt, rstats, localRead, localAlign, remoteReadPtr, candidates); } catch (...) { std::cerr << "ERROR: Exception caught while processing "; if (nullptr == remoteReadPtr) { std::cerr << "single read record:\n" << '\t' << localRead << "\n"; } else { std::cerr << " read pair records:\n" << '\t' << localRead << "\n" << '\t' << (*remoteReadPtr) << "\n"; } throw; } #ifdef DEBUG_SCANNER log_os << __FUNCTION__ << ": post-pair candidate_size: " << candidates.size() << "\n"; #endif // update localEvidence range: // note this is only used if candidates were added, so there's no harm in setting it every time: const unsigned localRefLength(apath_ref_length(localAlign.path)); const pos_t startRefPos(localRead.pos()-1); const pos_t endRefPos(startRefPos+localRefLength); localEvidenceRange.set_range(startRefPos,endRefPos); const int maxTid(chromToIndex.size()); /// final chance to QC candidate set: /// for (const SVCandidate& sv : candidates) { bool isInvalidTid(false); if ((sv.bp1.interval.tid < 0) || (sv.bp1.interval.tid >= maxTid)) { isInvalidTid=true; } else if (sv.bp2.state != SVBreakendState::UNKNOWN) { if ((sv.bp2.interval.tid < 0) || (sv.bp2.interval.tid >= maxTid)) { isInvalidTid=true; } } bool isInvalidPos(false); if (! isInvalidTid) { // note in the 'off-chromosome edge' test below we check for cases which are obviously way off // the edge, but allow for a bit of over-edge mistakes to occur for the circular chromosomes // static const int offEdgePad(500); const pos_t tid1Length(bamHeader.chrom_data[sv.bp1.interval.tid].length); if ((sv.bp1.interval.range.end_pos() <= -offEdgePad) || (sv.bp1.interval.range.begin_pos() >= (tid1Length+offEdgePad))) { isInvalidPos=true; } else if (sv.bp2.state != SVBreakendState::UNKNOWN) { const pos_t tid2Length(bamHeader.chrom_data[sv.bp2.interval.tid].length); if ((sv.bp2.interval.range.end_pos() <= -offEdgePad) || (sv.bp2.interval.range.begin_pos() >= (tid2Length+offEdgePad))) { isInvalidPos=true; } } } if (isInvalidTid || isInvalidPos) { std::ostringstream oss; if (isInvalidTid) { oss << "SVbreakend has unknown or invalid chromosome id in candidate sv.\n"; } else { oss << "Cannot interpret BAM record: candidate SV breakend from BAM record is off chromosome edge.\n"; } oss << "\tlocal_bam_record: " << localRead << "\n" << "\tremote_bam record: "; if (NULL==remoteReadPtr) { oss << "NONE"; } else { oss << (*remoteReadPtr); } oss << "\n" << "\tSVCandidate: " << sv << "\n"; BOOST_THROW_EXCEPTION(LogicException(oss.str())); } } }
/// get SV candidates from shadow/singleton pairs /// look for singletons, create candidateSV around conf. interval of shadow position /// cache singletons? might be needed to remove poor quality shadows. /// should be able to re-use code, follow soft-clipping example. static void getSVCandidatesFromShadow( const ReadScannerOptions& opt, const SVLocusScanner::CachedReadGroupStats& rstats, const bam_record& localRead, const SimpleAlignment& localAlign, const bam_record* remoteReadPtr, TrackedCandidates& candidates) { using namespace SVEvidenceType; static const index_t svSource(SHADOW); static const bool isComplex(true); pos_t singletonGenomePos(0); int targetId(0); if (NULL == remoteReadPtr) { if (!localRead.is_unmapped()) return; // need to take care of this case // need to rely on cached mapq and qname return; if (!isGoodShadow(localRead,lastMapq,lastQname,opt.minSingletonMapqGraph)) { return; } singletonGenomePos = localAlign.pos; targetId = localRead.target_id(); } else { // have both reads, straightforward from here const bam_record& remoteRead(*remoteReadPtr); const SimpleAlignment remoteAlign(remoteRead); if (localRead.is_mate_unmapped()) { // remote read is shadow candidate if (!isGoodShadow(remoteRead,localRead.map_qual(),localRead.qname(),opt.minSingletonMapqGraph)) { return; } singletonGenomePos = localAlign.pos; targetId = remoteRead.target_id(); } else if (localRead.is_unmapped()) { // local is shadow candidate if (!isGoodShadow(localRead,remoteRead.map_qual(),remoteRead.qname(),opt.minSingletonMapqGraph)) { return; } singletonGenomePos = remoteAlign.pos; targetId = localRead.target_id(); } else { // none unmapped, skip this one return; } } const pos_t properPairRangeOffset = static_cast<pos_t>(rstats.properPair.min + (rstats.properPair.max-rstats.properPair.min)/2); const pos_t shadowGenomePos = singletonGenomePos + properPairRangeOffset; candidates.push_back(GetSplitSVCandidate(opt,targetId,shadowGenomePos,shadowGenomePos, svSource, isComplex)); }