void
SVScorePairRefProcessor::
processClearedRecord(
    const bam_record& bamRead)
{
    using namespace illumina::common;

    assert(bamParams.isSet);

    const pos_t refPos(bamRead.pos()-1);
    if (! bamParams.interval.range.is_pos_intersect(refPos)) return;

    const bool isLargeInsert(isLargeInsertSV(sv));

#ifdef DEBUG_MEGAPAIR
    log_os << __FUNCTION__ << ": read: " << bamRead << "\n";
#endif

    /// check if fragment is too big or too small:
    const int templateSize(std::abs(bamRead.template_size()));
    if (templateSize < bamParams.minFrag) return;
    if (templateSize > bamParams.maxFrag) return;

    // count only from the down stream reads
    const bool isFirstBamRead(isFirstRead(bamRead));

    // get fragment range:
    pos_t fragBeginRefPos(refPos);
    if (! isFirstBamRead)
    {
        fragBeginRefPos=bamRead.mate_pos()-1;
    }

    const pos_t fragEndRefPos(fragBeginRefPos+templateSize);

    if (fragBeginRefPos > fragEndRefPos)
    {
        std::ostringstream oss;
        oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fragBeginRefPos << " " << fragEndRefPos << " bamRecord: " << bamRead << "\n";
        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
    }

    {
        const pos_t fragOverlap(std::min((1+svParams.centerPos-fragBeginRefPos), (fragEndRefPos-svParams.centerPos)));
#ifdef DEBUG_MEGAPAIR
        log_os << __FUNCTION__ << ": frag begin/end/overlap: " << fragBeginRefPos << " " << fragEndRefPos << " " << fragOverlap << "\n";
#endif
        if (fragOverlap < pairOpt.minFragSupport) return;
    }

    SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]);

    static const bool isShadow(false);

    SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first()));
    setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, bamRead, isShadow, evRead);

    setAlleleFrag(*bamParams.fragDistroPtr, templateSize, fragment.ref.getBp(isBp1),isLargeInsert);
}
Exemple #2
0
static
bool
isGoodShadow(
    const bam_record& bamRead,
    const std::string& lastQname)
{
#ifdef DEBUG_IS_SHADOW
    static const std::string logtag("isGoodShadow");
#endif

    if (! bamRead.is_paired()) return false;

    if (bamRead.isNonStrictSupplement()) return false;

    // sanity check that this is a shadow read:
    if (!bamRead.is_unmapped()) return false;
    if (bamRead.is_mate_unmapped()) return false;

    static const unsigned minAvgQualShadow = 25;
    if (get_avg_quality(bamRead) < minAvgQualShadow)
    {
        return false;
    }

    if (strcmp(bamRead.qname(),lastQname.c_str()) != 0)
    {
        // something went wrong here, shadows should have their singleton partner
        // preceding them in the BAM file.
#ifdef DEBUG_IS_SHADOW
        log_os << logtag << " ERROR: Shadow without matching singleton : " << bamRead.qname() << " vs " << lastQname << std::endl;
#endif
        return false;
    }

#ifdef DEBUG_IS_SHADOW
    log_os << logtag << " Found shadow!\n";
            << logtag << " this mapq  = " << ((unsigned int)bamRead.map_qual()) << std::endl;
void
SVCandidateSetSequenceFragmentSampleGroup::
add(
    const bam_header_info& bamHeader,
    const bam_record& bamRead,
    const bool isExpectRepeat,
    const bool isSourcedFromGraphEdgeNode1,
    const bool isSubMapped)
{
    using namespace illumina::common;

#ifdef DEBUG_SVDATA
    log_os << "SVDataGroup adding: " << bamRead << "\n";
#endif

    SVCandidateSetSequenceFragment* fragPtr(getSequenceFragment(bamRead.qname()));
    if (nullptr == fragPtr) return;

    SVCandidateSetSequenceFragment& fragment(*fragPtr);

    SVCandidateSetRead* targetReadPtr(nullptr);
    if (2 == bamRead.read_no())
    {
        if (bamRead.isNonStrictSupplement())
        {
            fragment.read2Supplemental.emplace_back();
            targetReadPtr = (&(fragment.read2Supplemental.back()));
        }
        else
        {
            targetReadPtr = (&(fragment.read2));
        }
    }
    else
    {
        if (bamRead.isNonStrictSupplement())
        {
            fragment.read1Supplemental.emplace_back();
            targetReadPtr = (&(fragment.read1Supplemental.back()));
        }
        else
        {
            targetReadPtr = (&(fragment.read1));
        }
    }

    SVCandidateSetRead& targetRead(*targetReadPtr);
    if (targetRead.isSet())
    {
        if (isExpectRepeat) return;

        std::ostringstream oss;
        oss << "Unexpected alignment name collision. Source: '" << dataSourceName << "'\n"
            << "\tExisting read: ";
        summarizeAlignmentRecord(bamHeader, targetRead.bamrec, oss);
        oss << "\n"
            << "\tNew read: ";
        summarizeAlignmentRecord(bamHeader, bamRead, oss);
        oss << "\n";
        BOOST_THROW_EXCEPTION(GeneralException(oss.str()));
    }

    targetRead.bamrec = bamRead;
    targetRead.isSourcedFromGraphEdgeNode1 = isSourcedFromGraphEdgeNode1;
    targetRead.isSubMapped = isSubMapped;
    targetRead.readIndex = (isSubMapped ? _subMappedReadIndex : _mappedReadIndex);
}
Exemple #4
0
/// scan read record (and optionally its mate record) for SV evidence.
//
/// note that estimation is improved by the mate record (because we have the mate cigar string in this case)
///
static
void
getReadBreakendsImpl(
    const ReadScannerOptions& opt,
    const ReadScannerDerivOptions& dopt,
    const SVLocusScanner::CachedReadGroupStats& rstats,
    const bam_record& localRead,
    const bam_record* remoteReadPtr,
    const bam_header_info& bamHeader,
    const reference_contig_segment& localRefSeq,
    const reference_contig_segment* remoteRefSeqPtr,
    std::vector<SVObservation>& candidates,
    known_pos_range2& localEvidenceRange)
{
    using namespace illumina::common;

#ifdef DEBUG_SCANNER
    log_os << __FUNCTION__ << ": Starting read: " << localRead.qname() << "\n";
#endif

    const chromMap_t& chromToIndex(bamHeader.chrom_to_index);

    candidates.clear();

    /// get some basic derived information from the bam_record:
    const SimpleAlignment localAlign(getAlignment(localRead));

    try
    {
        getSingleReadSVCandidates(opt, dopt, localRead, localAlign, chromToIndex,
                                  localRefSeq, candidates);

        // run the same check on the read's mate if we have access to it
        if (nullptr != remoteReadPtr)
        {
            const bam_record& remoteRead(*remoteReadPtr);
            const SimpleAlignment remoteAlign(getAlignment(remoteRead));

            if (nullptr == remoteRefSeqPtr)
            {
                static const char msg[] = "ERROR: remoteRefSeqPtr cannot be null";
                BOOST_THROW_EXCEPTION(LogicException(msg));
            }
            getSingleReadSVCandidates(opt, dopt, remoteRead, remoteAlign,
                                      chromToIndex, (*remoteRefSeqPtr),
                                      candidates);
        }

        // process shadows:
        //getSVCandidatesFromShadow(opt, rstats, localRead, localAlign,remoteReadPtr,candidates);

        // - process anomalous read pairs:
        getSVCandidatesFromPair(opt, dopt, rstats, localRead, localAlign, remoteReadPtr,
                                candidates);
    }
    catch (...)
    {
        std::cerr << "ERROR: Exception caught while processing ";
        if (nullptr == remoteReadPtr)
        {
            std::cerr << "single read record:\n"
                      << '\t' << localRead << "\n";
        }
        else
        {
            std::cerr << " read pair records:\n"
                      << '\t'  << localRead << "\n"
                      << '\t' << (*remoteReadPtr) << "\n";
        }
        throw;
    }

#ifdef DEBUG_SCANNER
    log_os << __FUNCTION__ << ": post-pair candidate_size: " << candidates.size() << "\n";
#endif

    // update localEvidence range:
    // note this is only used if candidates were added, so there's no harm in setting it every time:
    const unsigned localRefLength(apath_ref_length(localAlign.path));
    const pos_t startRefPos(localRead.pos()-1);
    const pos_t endRefPos(startRefPos+localRefLength);

    localEvidenceRange.set_range(startRefPos,endRefPos);

    const int maxTid(chromToIndex.size());

    /// final chance to QC candidate set:
    ///
    for (const SVCandidate& sv : candidates)
    {
        bool isInvalidTid(false);
        if ((sv.bp1.interval.tid < 0) || (sv.bp1.interval.tid >= maxTid))
        {
            isInvalidTid=true;
        }
        else if (sv.bp2.state != SVBreakendState::UNKNOWN)
        {
            if ((sv.bp2.interval.tid < 0) || (sv.bp2.interval.tid >= maxTid))
            {
                isInvalidTid=true;
            }
        }

        bool isInvalidPos(false);
        if (! isInvalidTid)
        {
            // note in the 'off-chromosome edge' test below we check for cases which are obviously way off
            // the edge, but allow for a bit of over-edge mistakes to occur for the circular chromosomes
            //
            static const int offEdgePad(500);
            const pos_t tid1Length(bamHeader.chrom_data[sv.bp1.interval.tid].length);
            if ((sv.bp1.interval.range.end_pos() <= -offEdgePad) || (sv.bp1.interval.range.begin_pos() >= (tid1Length+offEdgePad)))
            {
                isInvalidPos=true;
            }
            else if (sv.bp2.state != SVBreakendState::UNKNOWN)
            {
                const pos_t tid2Length(bamHeader.chrom_data[sv.bp2.interval.tid].length);
                if ((sv.bp2.interval.range.end_pos() <= -offEdgePad) || (sv.bp2.interval.range.begin_pos() >= (tid2Length+offEdgePad)))
                {
                    isInvalidPos=true;
                }
            }
        }

        if (isInvalidTid || isInvalidPos)
        {
            std::ostringstream oss;
            if (isInvalidTid)
            {
                oss << "SVbreakend has unknown or invalid chromosome id in candidate sv.\n";
            }
            else
            {
                oss << "Cannot interpret BAM record: candidate SV breakend from BAM record is off chromosome edge.\n";
            }

            oss << "\tlocal_bam_record: " <<  localRead << "\n"
                << "\tremote_bam record: ";
            if (NULL==remoteReadPtr)
            {
                oss << "NONE";
            }
            else
            {
                oss << (*remoteReadPtr);
            }
            oss << "\n"
                << "\tSVCandidate: " << sv << "\n";
            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
        }
    }
}
Exemple #5
0
/// get SV candidates from shadow/singleton pairs
/// look for singletons, create candidateSV around conf. interval of shadow position
/// cache singletons? might be needed to remove poor quality shadows.
/// should be able to re-use code, follow soft-clipping example.
static
void
getSVCandidatesFromShadow(
    const ReadScannerOptions& opt,
    const SVLocusScanner::CachedReadGroupStats& rstats,
    const bam_record& localRead,
    const SimpleAlignment& localAlign,
    const bam_record* remoteReadPtr,
    TrackedCandidates& candidates)
{
    using namespace SVEvidenceType;
    static const index_t svSource(SHADOW);

    static const bool isComplex(true);
    pos_t singletonGenomePos(0);
    int targetId(0);
    if (NULL == remoteReadPtr)
    {
        if (!localRead.is_unmapped()) return;
        // need to take care of this case
        // need to rely on cached mapq and qname
        return;
        if (!isGoodShadow(localRead,lastMapq,lastQname,opt.minSingletonMapqGraph))
        {
            return;
        }
        singletonGenomePos = localAlign.pos;
        targetId           = localRead.target_id();
    }
    else
    {
        // have both reads, straightforward from here
        const bam_record& remoteRead(*remoteReadPtr);
        const SimpleAlignment remoteAlign(remoteRead);

        if (localRead.is_mate_unmapped())
        {
            // remote read is shadow candidate
            if (!isGoodShadow(remoteRead,localRead.map_qual(),localRead.qname(),opt.minSingletonMapqGraph))
            {
                return;
            }
            singletonGenomePos = localAlign.pos;
            targetId = remoteRead.target_id();
        }
        else if (localRead.is_unmapped())
        {
            // local is shadow candidate
            if (!isGoodShadow(localRead,remoteRead.map_qual(),remoteRead.qname(),opt.minSingletonMapqGraph))
            {
                return;
            }
            singletonGenomePos = remoteAlign.pos;
            targetId = localRead.target_id();
        }
        else
        {
            // none unmapped, skip this one
            return;
        }
    }
    const pos_t properPairRangeOffset = static_cast<pos_t>(rstats.properPair.min + (rstats.properPair.max-rstats.properPair.min)/2);
    const pos_t shadowGenomePos = singletonGenomePos + properPairRangeOffset;
    candidates.push_back(GetSplitSVCandidate(opt,targetId,shadowGenomePos,shadowGenomePos, svSource, isComplex));
}