Exemplo n.º 1
0
bool
isMateInsertionEvidenceCandidate(
    const bam_record& bamRead,
    const unsigned minMapq)
{
    if (! bamRead.is_paired()) return false;
    if (bamRead.isNonStrictSupplement()) return false;
    if (bamRead.is_unmapped() || bamRead.is_mate_unmapped()) return false;

    if (bamRead.map_qual() < minMapq) return false;

    if (bamRead.target_id() < 0) return false;
    if (bamRead.mate_target_id() < 0) return false;

    if (bamRead.target_id() != bamRead.mate_target_id()) return true;

    /// TODO: better candidate definition based on fragment size distro:
    static const int minSize(10000);
    return (std::abs(bamRead.pos()-bamRead.mate_pos()) >= minSize);
}
Exemplo n.º 2
0
static
bool
isGoodShadow(
    const bam_record& bamRead,
    const std::string& lastQname)
{
#ifdef DEBUG_IS_SHADOW
    static const std::string logtag("isGoodShadow");
#endif

    if (! bamRead.is_paired()) return false;

    if (bamRead.isNonStrictSupplement()) return false;

    // sanity check that this is a shadow read:
    if (!bamRead.is_unmapped()) return false;
    if (bamRead.is_mate_unmapped()) return false;

    static const unsigned minAvgQualShadow = 25;
    if (get_avg_quality(bamRead) < minAvgQualShadow)
    {
        return false;
    }

    if (strcmp(bamRead.qname(),lastQname.c_str()) != 0)
    {
        // something went wrong here, shadows should have their singleton partner
        // preceding them in the BAM file.
#ifdef DEBUG_IS_SHADOW
        log_os << logtag << " ERROR: Shadow without matching singleton : " << bamRead.qname() << " vs " << lastQname << std::endl;
#endif
        return false;
    }

#ifdef DEBUG_IS_SHADOW
    log_os << logtag << " Found shadow!\n";
            << logtag << " this mapq  = " << ((unsigned int)bamRead.map_qual()) << std::endl;
Exemplo n.º 3
0
/// get SV candidates from anomalous read pairs
static
void
getSVCandidatesFromPair(
    const ReadScannerOptions& opt,
    const ReadScannerDerivOptions& dopt,
    const SVLocusScanner::CachedReadGroupStats& rstats,
    const bam_record& localRead,
    const SimpleAlignment& localAlign,
    const bam_record* remoteReadPtr,
    std::vector<SVObservation>& candidates)
{
    if (! localRead.is_paired()) return;

    // don't count paired end evidence from SA-split reads twice:
    if (localRead.isNonStrictSupplement()) return;

    if (localRead.is_unmapped() || localRead.is_mate_unmapped()) return;

    // special case typically used for RNA-Seq analysis:
    if (opt.isIgnoreAnomProperPair && localRead.is_proper_pair()) return;

    // abstract remote alignment to SimpleAlignment object:
    const bool isRemote(nullptr != remoteReadPtr);
    const SimpleAlignment remoteAlign(isRemote ? getAlignment(*remoteReadPtr) : getFakeMateAlignment(localRead));

    AlignmentPairAnalyzer pairInspector(opt, dopt, rstats);
    pairInspector.reset(localAlign, remoteAlign, isRemote, localRead.is_first());

    if (! pairInspector.computeLargeEventRegionScale()) return;

    candidates.emplace_back();
    pairInspector.getSVObservation(candidates.back());

#ifdef DEBUG_SCANNER
    log_os << __FUNCTION__ << " evaluating pair sv for inclusion: " << candidates.back() << "\n";
#endif
}
Exemplo n.º 4
0
/// get SV candidates from shadow/singleton pairs
/// look for singletons, create candidateSV around conf. interval of shadow position
/// cache singletons? might be needed to remove poor quality shadows.
/// should be able to re-use code, follow soft-clipping example.
static
void
getSVCandidatesFromShadow(
    const ReadScannerOptions& opt,
    const SVLocusScanner::CachedReadGroupStats& rstats,
    const bam_record& localRead,
    const SimpleAlignment& localAlign,
    const bam_record* remoteReadPtr,
    TrackedCandidates& candidates)
{
    using namespace SVEvidenceType;
    static const index_t svSource(SHADOW);

    static const bool isComplex(true);
    pos_t singletonGenomePos(0);
    int targetId(0);
    if (NULL == remoteReadPtr)
    {
        if (!localRead.is_unmapped()) return;
        // need to take care of this case
        // need to rely on cached mapq and qname
        return;
        if (!isGoodShadow(localRead,lastMapq,lastQname,opt.minSingletonMapqGraph))
        {
            return;
        }
        singletonGenomePos = localAlign.pos;
        targetId           = localRead.target_id();
    }
    else
    {
        // have both reads, straightforward from here
        const bam_record& remoteRead(*remoteReadPtr);
        const SimpleAlignment remoteAlign(remoteRead);

        if (localRead.is_mate_unmapped())
        {
            // remote read is shadow candidate
            if (!isGoodShadow(remoteRead,localRead.map_qual(),localRead.qname(),opt.minSingletonMapqGraph))
            {
                return;
            }
            singletonGenomePos = localAlign.pos;
            targetId = remoteRead.target_id();
        }
        else if (localRead.is_unmapped())
        {
            // local is shadow candidate
            if (!isGoodShadow(localRead,remoteRead.map_qual(),remoteRead.qname(),opt.minSingletonMapqGraph))
            {
                return;
            }
            singletonGenomePos = remoteAlign.pos;
            targetId = localRead.target_id();
        }
        else
        {
            // none unmapped, skip this one
            return;
        }
    }
    const pos_t properPairRangeOffset = static_cast<pos_t>(rstats.properPair.min + (rstats.properPair.max-rstats.properPair.min)/2);
    const pos_t shadowGenomePos = singletonGenomePos + properPairRangeOffset;
    candidates.push_back(GetSplitSVCandidate(opt,targetId,shadowGenomePos,shadowGenomePos, svSource, isComplex));
}
std::pair<bool,align_id_t>
starling_read_buffer::
add_read_alignment(const starling_options& opt,
                   const bam_record& br,
                   const alignment& al,
                   const MAPLEVEL::index_t maplev,
                   const READ_ALIGN::index_t rat,
                   const align_id_t contig_id) {

    assert(! br.is_unmapped());

    const bool is_genomic(READ_ALIGN::GENOME == rat);
    align_id_t this_read_id;
    bool is_key_found(false);

    if(opt.is_ignore_read_names) {
        this_read_id=next_id();
        _read_data[this_read_id] = new starling_read(br,is_genomic);
    } else {
        const read_key tmp_key(br);
        const read_key_lup_t::const_iterator i(_read_key.find(tmp_key));
        is_key_found=(i!=_read_key.end());

        if(! is_key_found) {
            this_read_id=next_id();
            _read_data[this_read_id] = new starling_read(br,is_genomic);
        } else {
            this_read_id=i->second;
        }

        starling_read& sread(*(_read_data[this_read_id]));

        if(! is_key_found) {
            _read_key[sread.key()]=this_read_id;
        } else {
            assert(sread.key() == tmp_key);
        }
    }

    starling_read& sread(*(_read_data[this_read_id]));

    if(! is_key_found) {
        sread.id() = this_read_id;

    } else {
        {   // no GROUPER input accepted for reads crossing splice junctions:
            bool is_spliced_contig_read(false);
            if(is_genomic) {
                if((! sread.contig_align().empty()) &&
                   (apath_exon_count(al.path)>1)) is_spliced_contig_read=true;
            } else {
                if(sread.is_segmented()) is_spliced_contig_read=true;
            }

            if(is_spliced_contig_read) {
                log_os << "ERROR: assembled contig realignments are not allowed for splice junction reads. Read: " << sread.key() << "\n";
                exit(EXIT_FAILURE);
            }
        }

        if(! sread.is_compatible_alignment(al,rat,contig_id,opt)) {
            log_os << "WARNING: skipping new alignment: " << al
                   << " which is incompatible with alignments in read: " << sread;
            return std::make_pair(false,0);
        }

        // contig BAM records are incomplete, so we want to fill in
        // the full record if there's a mapped genomic alignment
        // available:
        if(is_genomic) sread.set_genomic_bam_record(br);
    }

    if(is_genomic) {
        sread.set_genome_align(al);
        sread.genome_align_maplev = maplev;

        // deal with segmented reads now:
        if(sread.is_segmented()) {
            const uint8_t n_seg(sread.segment_count());
            for(unsigned i(0); i<n_seg; ++i) {
                const uint8_t seg_no(i+1);
                const pos_t seg_buffer_pos(get_alignment_buffer_pos(sread.get_segment(seg_no).genome_align()));
                sread.get_segment(seg_no).buffer_pos = seg_buffer_pos;
                (_pos_group[seg_buffer_pos]).insert(std::make_pair(this_read_id,seg_no));
            }
        }
    } else {
        // contig alignments:
        sread.contig_align()[contig_id] = al;
        (_contig_group[contig_id]).insert(this_read_id);
    }

    if((! is_key_found) && (! sread.is_segmented())) {
        const pos_t buffer_pos(get_alignment_buffer_pos(al));
        const seg_id_t seg_id(0);
        sread.get_full_segment().buffer_pos = buffer_pos;
        (_pos_group[buffer_pos]).insert(std::make_pair(this_read_id,seg_id));
    }

    return std::make_pair(true,this_read_id);
}