예제 #1
0
void CGeneFinder::CGeneSearchPlugin::setUpFeatureIterator ( 
    CBioseq_Handle &ignored_bioseq_handle,
    auto_ptr<CFeat_CI> &feat_ci,
    TSeqPos circular_length,
    CRange<TSeqPos> &range,
    const CSeq_loc& loc,
    SAnnotSelector &sel,
    CScope &scope,
    ENa_strand &strand )
{
    if ( m_BioseqHandle ) {
        // if we're circular, we may need to split our range into two pieces
        if( ( circular_length != kInvalidSeqPos ) &&
            ( range.GetFrom() > range.GetTo() )) 
        {
            // For circular locations, the "from" is greater than the "to", which
            // would not work properly if given to CFeat_CI.
            // So, as a work around, we transform the range
            // into a mix location of the form "join(0..to, from..MAXINT)"

            CRef<CSeq_loc> new_loc( new CSeq_loc );
            new_loc->SetInt().SetFrom( 0 );
            new_loc->SetInt().SetTo( range.GetTo() );

            CRef<CSeq_loc> otherHalfOfRange( new CSeq_loc );
            otherHalfOfRange->SetInt().SetFrom( range.GetFrom() );
            otherHalfOfRange->SetInt().SetTo( kMax_Int );

            new_loc->Add( *otherHalfOfRange );

            new_loc->SetStrand( loc.GetStrand() );
            new_loc->SetId( *loc.GetId() );

            feat_ci.reset( new CFeat_CI(scope, *new_loc, sel) );
        } else {            
            // remove far parts, if necessary
            bool loc_change_needed = false;
            ITERATE( CSeq_loc, loc_iter, loc ) {
                if( ! m_BioseqHandle.IsSynonym( loc_iter.GetSeq_id() ) ) {
                    loc_change_needed = true;
                    break;
                }
            }
            if( loc_change_needed ) {
                CRef<CSeq_loc> new_loc( new CSeq_loc );
                ITERATE( CSeq_loc, loc_iter, loc ) {
                    if( m_BioseqHandle.IsSynonym( loc_iter.GetSeq_id() ) ) {
                        new_loc->Add( *loc_iter.GetRangeAsSeq_loc() );
                    }
                }
                feat_ci.reset( new CFeat_CI(scope, *new_loc, sel) );
            } else {
                feat_ci.reset( new CFeat_CI(scope, loc, sel) );
            }
        }
    } else {
pair<double, bool> CScoreUniqSeqCoverage::MakeScore(CBioseq_Handle const& query_handle, vector<CSeq_align const*>::const_iterator begin, vector<CSeq_align const*>::const_iterator end)
{
    CConstRef<CBioseq> bioseq = query_handle.GetCompleteBioseq();

    unsigned int qlen = 0;
    if ( !bioseq.Empty() && bioseq->IsSetLength()) {
        qlen = bioseq->GetLength();
    }

    if ( !qlen ) {
        return make_pair(0, false);
    }

    bool isDenDiag = ( (*begin)->GetSegs().Which() == CSeq_align::C_Segs::e_Dendiag) ?
                              true : false;

    CRangeCollection<TSeqPos> subj_rng_coll((*begin)->GetSeqRange(1));
    CRange<TSeqPos> q_rng((*begin)->GetSeqRange(0));
    
    CRangeCollection<TSeqPos> query_rng_coll(s_FixMinusStrandRange(q_rng));
    
    for( ++begin; begin != end; ++begin ) {
        const CRange<TSeqPos> align_subj_rng((*begin)->GetSeqRange(1));
        // subject range should always be on the positive strand
        assert(align_subj_rng.GetTo() > align_subj_rng.GetFrom());
        CRangeCollection<TSeqPos> coll(align_subj_rng);
        coll.Subtract(subj_rng_coll);

        if ( coll.empty() ) {
            continue;
        }

        if(coll[0] == align_subj_rng) {
            CRange<TSeqPos> query_rng ((*begin)->GetSeqRange(0));
            query_rng_coll += s_FixMinusStrandRange(query_rng);
            subj_rng_coll += align_subj_rng;
        }
        else {
            ITERATE (CRangeCollection<TSeqPos>, uItr, coll) {
                CRange<TSeqPos> query_rng;
                const CRange<TSeqPos> & subj_rng = (*uItr);
                CRef<CSeq_align> densegAln;
                if ( isDenDiag) {
                    densegAln = CreateDensegFromDendiag(**begin);
                }

                CAlnMap map( (isDenDiag) ? densegAln->GetSegs().GetDenseg() : (*begin)->GetSegs().GetDenseg());
                TSignedSeqPos subj_aln_start =  map.GetAlnPosFromSeqPos(1,subj_rng.GetFrom());
                TSignedSeqPos subj_aln_end =  map.GetAlnPosFromSeqPos(1,subj_rng.GetTo());
                query_rng.SetFrom(map.GetSeqPosFromAlnPos(0,subj_aln_start));
                query_rng.SetTo(map.GetSeqPosFromAlnPos(0,subj_aln_end));

                query_rng_coll += s_FixMinusStrandRange(query_rng);
                subj_rng_coll += subj_rng;
            }
        }
    }
예제 #3
0
CRef<CSeq_loc> CFeatTrim::Apply(const CSeq_loc& loc, 
    const CRange<TSeqPos>& range)
{
    const bool set_partial = true;
    const TSeqPos from = range.GetFrom();
    const TSeqPos to = range.GetTo();

    CRef<CSeq_loc> trimmed_loc(new CSeq_loc());
    trimmed_loc->Assign(loc);

    x_TrimLocation(from, to, set_partial, trimmed_loc);

    return trimmed_loc;
}
예제 #4
0
CRef<CSeq_feat> CFeatTrim::Apply(const CSeq_feat& feat,
    const CRange<TSeqPos>& range)
{
    CRef<CSeq_loc> loc = Ref(new CSeq_loc());
    loc->Assign(feat.GetLocation());

    const TSeqPos from = range.GetFrom();
    const TSeqPos to = range.GetTo();

    const bool set_partial = true;

    x_TrimLocation(from, to, set_partial, loc);
    if (loc->IsNull()) {
        return Ref(new CSeq_feat());
    }

    // Create a new seq-feat with the trimmed location
    CRef<CSeq_feat> new_sf(new CSeq_feat());
    new_sf->Assign(feat);
    new_sf->SetLocation(*loc);
    if (!loc->IsNull() &&
        (loc->IsPartialStart(eExtreme_Biological) || 
        loc->IsPartialStop(eExtreme_Biological))) {
        new_sf->SetPartial(true);
    }


    // If Cdregion need to consider changes in frameshift
    if (new_sf->GetData().IsCdregion()) {
        const TSeqPos offset = x_GetStartOffset(feat, from, to);
        x_UpdateFrame(offset, new_sf->SetData().SetCdregion());

        if (new_sf->SetData().SetCdregion().IsSetCode_break()) {
            // iterate over code breaks and remove if they fall outside the range
            list<CRef<CCode_break>>& code_breaks = new_sf->SetData().SetCdregion().SetCode_break();
            code_breaks.remove_if(SOutsideRange(from,to));
            if (code_breaks.empty()) {
                new_sf->SetData().SetCdregion().ResetCode_break();
            }
            else {
                const auto strand = loc->GetStrand();
                // Trim the 3' end
                if (strand != eNa_strand_minus) {
                    for (auto code_break : code_breaks) {
                        const TSeqPos cb_to = code_break->GetLoc().GetTotalRange().GetTo();
                        if (cb_to > to) {
                            x_TrimCodeBreak(from, to, *code_break);
                        }
                    }
                }
                else { // strand == eNa_strand_minus
                    for (auto code_break : code_breaks) {
                        const TSeqPos cb_from = code_break->GetLoc().GetTotalRange().GetFrom();
                        if (cb_from < from) {
                            x_TrimCodeBreak(from, to, *code_break);
                        }
                    }
                } 
            }
        }
    }
    else 
    if (new_sf->GetData().GetSubtype() == CSeqFeatData::eSubtype_tRNA) {
        auto& rna = new_sf->SetData().SetRna();
        if (rna.IsSetExt() && rna.GetExt().IsTRNA()) {
            x_TrimTrnaExt(from, to, rna.SetExt().SetTRNA());
        }
    }
    return new_sf;
}