Beispiel #1
0
static
void
addReadToDepthEst(
    const bam_record& bamRead,
    const pos_t beginPos,
    std::vector<unsigned>& depth)
{
    using namespace ALIGNPATH;

    const pos_t endPos(beginPos+depth.size());

    // get cigar:
    path_t apath;
    bam_cigar_to_apath(bamRead.raw_cigar(), bamRead.n_cigar(), apath);

    pos_t refPos(bamRead.pos()-1);
    BOOST_FOREACH(const path_segment& ps, apath)
    {
        if (refPos>=endPos) return;

        if (MATCH == ps.type)
        {
            for (pos_t pos(refPos); pos < (refPos+static_cast<pos_t>(ps.length)); ++pos)
            {
                if (pos>=beginPos)
                {
                    if (pos>=endPos) return;
                    depth[pos-beginPos]++;
                }
            }
        }
        if (is_segment_type_ref_length(ps.type)) refPos += ps.length;
    }
}
void
SVScorePairRefProcessor::
processClearedRecord(
    const bam_record& bamRead)
{
    using namespace illumina::common;

    assert(bamParams.isSet);

    const pos_t refPos(bamRead.pos()-1);
    if (! bamParams.interval.range.is_pos_intersect(refPos)) return;

    const bool isLargeInsert(isLargeInsertSV(sv));

#ifdef DEBUG_MEGAPAIR
    log_os << __FUNCTION__ << ": read: " << bamRead << "\n";
#endif

    /// check if fragment is too big or too small:
    const int templateSize(std::abs(bamRead.template_size()));
    if (templateSize < bamParams.minFrag) return;
    if (templateSize > bamParams.maxFrag) return;

    // count only from the down stream reads
    const bool isFirstBamRead(isFirstRead(bamRead));

    // get fragment range:
    pos_t fragBeginRefPos(refPos);
    if (! isFirstBamRead)
    {
        fragBeginRefPos=bamRead.mate_pos()-1;
    }

    const pos_t fragEndRefPos(fragBeginRefPos+templateSize);

    if (fragBeginRefPos > fragEndRefPos)
    {
        std::ostringstream oss;
        oss << "ERROR: Failed to parse fragment range from bam record. Frag begin,end: " << fragBeginRefPos << " " << fragEndRefPos << " bamRecord: " << bamRead << "\n";
        BOOST_THROW_EXCEPTION(LogicException(oss.str()));
    }

    {
        const pos_t fragOverlap(std::min((1+svParams.centerPos-fragBeginRefPos), (fragEndRefPos-svParams.centerPos)));
#ifdef DEBUG_MEGAPAIR
        log_os << __FUNCTION__ << ": frag begin/end/overlap: " << fragBeginRefPos << " " << fragEndRefPos << " " << fragOverlap << "\n";
#endif
        if (fragOverlap < pairOpt.minFragSupport) return;
    }

    SVFragmentEvidence& fragment(evidence.getSampleEvidence(bamParams.bamIndex)[bamRead.qname()]);

    static const bool isShadow(false);

    SVFragmentEvidenceRead& evRead(fragment.getRead(bamRead.is_first()));
    setReadEvidence(svParams.minMapQ, svParams.minTier2MapQ, bamRead, isShadow, evRead);

    setAlleleFrag(*bamParams.fragDistroPtr, templateSize, fragment.ref.getBp(isBp1),isLargeInsert);
}
    void
    addRead(
        const bam_record& bamRead)
    {
        if (_isRegionInit)
        {
            if (bamRead.pos() > _endPos + 1000)
            {
                _maxPos=_endPos;
                setNewRegion();
            }
        }

        if (! _isRegionInit)
        {
            _minPos=bamRead.pos();
            _maxPos=bamRead.pos();
            _endPos=bamRead.pos() + bamRead.read_size();
            _isRegionInit=true;
        }
        else
        {
            if (bamRead.pos() > _maxPos)
            {
                _maxPos = bamRead.pos();
                _endPos=bamRead.pos() + bamRead.read_size();
            }
        }

        _count++;
        _totalReadLength += bamRead.read_size();
    }
    void
    addRead(
        const bam_record& bamRead)
    {
        const pos_t pos(bamRead.pos()-1);
        const unsigned rsize(bamRead.read_size());
        if (! _isRegionInit)
        {
            _maxPos=pos;
            _isRegionInit=true;
        }

        for (; _maxPos<pos; ++_maxPos) flushPos(_maxPos);
        _depth.inc(pos,rsize);
        _count++;
    }
Beispiel #5
0
bool
isMateInsertionEvidenceCandidate(
    const bam_record& bamRead,
    const unsigned minMapq)
{
    if (! bamRead.is_paired()) return false;
    if (bamRead.isNonStrictSupplement()) return false;
    if (bamRead.is_unmapped() || bamRead.is_mate_unmapped()) return false;

    if (bamRead.map_qual() < minMapq) return false;

    if (bamRead.target_id() < 0) return false;
    if (bamRead.mate_target_id() < 0) return false;

    if (bamRead.target_id() != bamRead.mate_target_id()) return true;

    /// TODO: better candidate definition based on fragment size distro:
    static const int minSize(10000);
    return (std::abs(bamRead.pos()-bamRead.mate_pos()) >= minSize);
}
Beispiel #6
0
/// scan read record (and optionally its mate record) for SV evidence.
//
/// note that estimation is improved by the mate record (because we have the mate cigar string in this case)
///
static
void
getReadBreakendsImpl(
    const ReadScannerOptions& opt,
    const ReadScannerDerivOptions& dopt,
    const SVLocusScanner::CachedReadGroupStats& rstats,
    const bam_record& localRead,
    const bam_record* remoteReadPtr,
    const bam_header_info& bamHeader,
    const reference_contig_segment& localRefSeq,
    const reference_contig_segment* remoteRefSeqPtr,
    std::vector<SVObservation>& candidates,
    known_pos_range2& localEvidenceRange)
{
    using namespace illumina::common;

#ifdef DEBUG_SCANNER
    log_os << __FUNCTION__ << ": Starting read: " << localRead.qname() << "\n";
#endif

    const chromMap_t& chromToIndex(bamHeader.chrom_to_index);

    candidates.clear();

    /// get some basic derived information from the bam_record:
    const SimpleAlignment localAlign(getAlignment(localRead));

    try
    {
        getSingleReadSVCandidates(opt, dopt, localRead, localAlign, chromToIndex,
                                  localRefSeq, candidates);

        // run the same check on the read's mate if we have access to it
        if (nullptr != remoteReadPtr)
        {
            const bam_record& remoteRead(*remoteReadPtr);
            const SimpleAlignment remoteAlign(getAlignment(remoteRead));

            if (nullptr == remoteRefSeqPtr)
            {
                static const char msg[] = "ERROR: remoteRefSeqPtr cannot be null";
                BOOST_THROW_EXCEPTION(LogicException(msg));
            }
            getSingleReadSVCandidates(opt, dopt, remoteRead, remoteAlign,
                                      chromToIndex, (*remoteRefSeqPtr),
                                      candidates);
        }

        // process shadows:
        //getSVCandidatesFromShadow(opt, rstats, localRead, localAlign,remoteReadPtr,candidates);

        // - process anomalous read pairs:
        getSVCandidatesFromPair(opt, dopt, rstats, localRead, localAlign, remoteReadPtr,
                                candidates);
    }
    catch (...)
    {
        std::cerr << "ERROR: Exception caught while processing ";
        if (nullptr == remoteReadPtr)
        {
            std::cerr << "single read record:\n"
                      << '\t' << localRead << "\n";
        }
        else
        {
            std::cerr << " read pair records:\n"
                      << '\t'  << localRead << "\n"
                      << '\t' << (*remoteReadPtr) << "\n";
        }
        throw;
    }

#ifdef DEBUG_SCANNER
    log_os << __FUNCTION__ << ": post-pair candidate_size: " << candidates.size() << "\n";
#endif

    // update localEvidence range:
    // note this is only used if candidates were added, so there's no harm in setting it every time:
    const unsigned localRefLength(apath_ref_length(localAlign.path));
    const pos_t startRefPos(localRead.pos()-1);
    const pos_t endRefPos(startRefPos+localRefLength);

    localEvidenceRange.set_range(startRefPos,endRefPos);

    const int maxTid(chromToIndex.size());

    /// final chance to QC candidate set:
    ///
    for (const SVCandidate& sv : candidates)
    {
        bool isInvalidTid(false);
        if ((sv.bp1.interval.tid < 0) || (sv.bp1.interval.tid >= maxTid))
        {
            isInvalidTid=true;
        }
        else if (sv.bp2.state != SVBreakendState::UNKNOWN)
        {
            if ((sv.bp2.interval.tid < 0) || (sv.bp2.interval.tid >= maxTid))
            {
                isInvalidTid=true;
            }
        }

        bool isInvalidPos(false);
        if (! isInvalidTid)
        {
            // note in the 'off-chromosome edge' test below we check for cases which are obviously way off
            // the edge, but allow for a bit of over-edge mistakes to occur for the circular chromosomes
            //
            static const int offEdgePad(500);
            const pos_t tid1Length(bamHeader.chrom_data[sv.bp1.interval.tid].length);
            if ((sv.bp1.interval.range.end_pos() <= -offEdgePad) || (sv.bp1.interval.range.begin_pos() >= (tid1Length+offEdgePad)))
            {
                isInvalidPos=true;
            }
            else if (sv.bp2.state != SVBreakendState::UNKNOWN)
            {
                const pos_t tid2Length(bamHeader.chrom_data[sv.bp2.interval.tid].length);
                if ((sv.bp2.interval.range.end_pos() <= -offEdgePad) || (sv.bp2.interval.range.begin_pos() >= (tid2Length+offEdgePad)))
                {
                    isInvalidPos=true;
                }
            }
        }

        if (isInvalidTid || isInvalidPos)
        {
            std::ostringstream oss;
            if (isInvalidTid)
            {
                oss << "SVbreakend has unknown or invalid chromosome id in candidate sv.\n";
            }
            else
            {
                oss << "Cannot interpret BAM record: candidate SV breakend from BAM record is off chromosome edge.\n";
            }

            oss << "\tlocal_bam_record: " <<  localRead << "\n"
                << "\tremote_bam record: ";
            if (NULL==remoteReadPtr)
            {
                oss << "NONE";
            }
            else
            {
                oss << (*remoteReadPtr);
            }
            oss << "\n"
                << "\tSVCandidate: " << sv << "\n";
            BOOST_THROW_EXCEPTION(LogicException(oss.str()));
        }
    }
}