Пример #1
0
bool
isMateInsertionEvidenceCandidate(
    const bam_record& bamRead,
    const unsigned minMapq)
{
    if (! bamRead.is_paired()) return false;
    if (bamRead.isNonStrictSupplement()) return false;
    if (bamRead.is_unmapped() || bamRead.is_mate_unmapped()) return false;

    if (bamRead.map_qual() < minMapq) return false;

    if (bamRead.target_id() < 0) return false;
    if (bamRead.mate_target_id() < 0) return false;

    if (bamRead.target_id() != bamRead.mate_target_id()) return true;

    /// TODO: better candidate definition based on fragment size distro:
    static const int minSize(10000);
    return (std::abs(bamRead.pos()-bamRead.mate_pos()) >= minSize);
}
Пример #2
0
FragmentSizeType::index_t
SVLocusScanner::
_getFragmentSizeType(
    const bam_record& bamRead,
    const unsigned defaultReadGroupIndex) const
{
    using namespace FragmentSizeType;
    if (bamRead.target_id() != bamRead.mate_target_id()) return DISTANT;
    const int32_t fragmentSize(std::abs(bamRead.template_size()));
    return classifySize(_stats[defaultReadGroupIndex], fragmentSize);
}
Пример #3
0
/// extract poorly aligned read ends (semi-aligned and/or soft-clipped)
/// to internal candidate format
static
void
getSVCandidatesFromSemiAligned(
    const ReadScannerOptions& opt,
    const ReadScannerDerivOptions& dopt,
    const bam_record& bamRead,
    const SimpleAlignment& bamAlign,
    const FRAGSOURCE::index_t fragSource,
    const reference_contig_segment& refSeq,
    std::vector<SVObservation>& candidates)
{
    unsigned leadingMismatchLen(0);
    unsigned trailingMismatchLen(0);
    pos_t leadingRefPos(0), trailingRefPos(0);
    getSVBreakendCandidateSemiAligned(bamRead, bamAlign, refSeq,
                                      dopt.isUseOverlappingPairs,
                                      leadingMismatchLen, leadingRefPos,
                                      trailingMismatchLen, trailingRefPos);

    if ((leadingMismatchLen + trailingMismatchLen) >= bamRead.read_size()) return;

    using namespace SVEvidenceType;
    static const index_t svSource(SEMIALIGN);

    // semi-aligned reads don't define a full hypothesis, so they're always evidence for a 'complex' ie. undefined, event
    // in a fashion analogous to clipped reads
    static const bool isComplex(true);

    if (leadingMismatchLen >= opt.minSemiAlignedMismatchLen)
    {
        const pos_t pos(leadingRefPos);
        candidates.push_back(GetSplitSVCandidate(dopt,bamRead.target_id(),pos,pos,svSource, fragSource,isComplex));
    }

    if (trailingMismatchLen >= opt.minSemiAlignedMismatchLen)
    {
        const pos_t pos(trailingRefPos);
        candidates.push_back(GetSplitSVCandidate(dopt,bamRead.target_id(),pos,pos,svSource, fragSource,isComplex));
    }
}
Пример #4
0
/// get SV candidates from shadow/singleton pairs
/// look for singletons, create candidateSV around conf. interval of shadow position
/// cache singletons? might be needed to remove poor quality shadows.
/// should be able to re-use code, follow soft-clipping example.
static
void
getSVCandidatesFromShadow(
    const ReadScannerOptions& opt,
    const SVLocusScanner::CachedReadGroupStats& rstats,
    const bam_record& localRead,
    const SimpleAlignment& localAlign,
    const bam_record* remoteReadPtr,
    TrackedCandidates& candidates)
{
    using namespace SVEvidenceType;
    static const index_t svSource(SHADOW);

    static const bool isComplex(true);
    pos_t singletonGenomePos(0);
    int targetId(0);
    if (NULL == remoteReadPtr)
    {
        if (!localRead.is_unmapped()) return;
        // need to take care of this case
        // need to rely on cached mapq and qname
        return;
        if (!isGoodShadow(localRead,lastMapq,lastQname,opt.minSingletonMapqGraph))
        {
            return;
        }
        singletonGenomePos = localAlign.pos;
        targetId           = localRead.target_id();
    }
    else
    {
        // have both reads, straightforward from here
        const bam_record& remoteRead(*remoteReadPtr);
        const SimpleAlignment remoteAlign(remoteRead);

        if (localRead.is_mate_unmapped())
        {
            // remote read is shadow candidate
            if (!isGoodShadow(remoteRead,localRead.map_qual(),localRead.qname(),opt.minSingletonMapqGraph))
            {
                return;
            }
            singletonGenomePos = localAlign.pos;
            targetId = remoteRead.target_id();
        }
        else if (localRead.is_unmapped())
        {
            // local is shadow candidate
            if (!isGoodShadow(localRead,remoteRead.map_qual(),remoteRead.qname(),opt.minSingletonMapqGraph))
            {
                return;
            }
            singletonGenomePos = remoteAlign.pos;
            targetId = localRead.target_id();
        }
        else
        {
            // none unmapped, skip this one
            return;
        }
    }
    const pos_t properPairRangeOffset = static_cast<pos_t>(rstats.properPair.min + (rstats.properPair.max-rstats.properPair.min)/2);
    const pos_t shadowGenomePos = singletonGenomePos + properPairRangeOffset;
    candidates.push_back(GetSplitSVCandidate(opt,targetId,shadowGenomePos,shadowGenomePos, svSource, isComplex));
}