Esempio n. 1
0
void CSplignTrim::CutToMatchRight(TSeg& s)
{
    size_t pos = s.m_details.rfind('M');
    if(pos == string::npos) {
        s.SetToGap();
        return;
    }
    size_t len = s.m_details.length() - pos - 1;//length to cut
    if(len > 0) {
        CutFromRight(len, s);
    }
}
Esempio n. 2
0
void CSplignTrim::Cut50FromRight(TSeg& s)
{
    int score = 0, maxscore = -2;
    string::iterator irs0, irs1, irs, irs_max;
    irs0 = s.m_details.begin();
    irs1 = s.m_details.end();
    irs_max = irs0;
    for(irs = irs0; irs != irs1; ++irs) {
        if(*irs == 'M') {
            ++score;
        } else {
            --score;
        }
        if(score >= maxscore) {
            maxscore = score;
            irs_max = irs;
        }
    }
    int len = irs1 - irs_max - 1;
    if(len > 0) {
        CutFromRight(len, s);
    }
    
}
Esempio n. 3
0
//trims exons around internal alignment gaps to complete codons
//if CDS can be retrieved from bioseq
void CSplignTrim::TrimHolesToCodons(TSegs& segments, CBioseq_Handle& mrna_bio_handle, bool mrna_strand, size_t mrna_len)
{

    if( mrna_bio_handle ) {
        //collect CDS intervals (could be more than one in a case of ribosomal slippage)
        vector<TSeqRange> tr;
        for(CFeat_CI ci(mrna_bio_handle, SAnnotSelector(CSeqFeatData::e_Cdregion)); ci; ++ci) {
            for(CSeq_loc_CI slit(ci->GetLocation()); slit; ++slit) {
                TSeqRange r, ori;
                ori = slit.GetRange();
                if( mrna_strand ) {
                    r = ori;
                } else {//reverse
                    r.SetFrom(mrna_len - ori.GetTo() - 1);
                    r.SetTo(mrna_len - ori.GetFrom() - 1);
                }
                tr.push_back(r);
            }
        }

        if(tr.empty()) return;// CDS not found

        //trim
        AdjustGaps(segments);//make sure there is no adjacent gaps
        size_t pos1 = 0, pos2 = 2;
        for(; pos2 < segments.size(); ++pos1, ++pos2) {
            if( segments[pos1].m_exon && !segments[pos1+1].m_exon && segments[pos2].m_exon ) {//candidate for trimming
                
                //trim left exon    
                TSeqPos p1 = segments[pos1].m_box[1];
                ITERATE(vector<TSeqRange>, it, tr) {
                    if( p1 >= it->GetFrom() && p1 <= it->GetTo() ) {
                        TSeqPos cut_mrna_len = (p1 + 1 - it->GetFrom()) % 3, cnt = 0;
                        string transcript = segments[pos1].m_details;
                        int i = (int)transcript.size() - 1;
                        for(; i>=0; --i) {
                            if( cnt%3 == cut_mrna_len &&  transcript[i] == 'M' ) { //cut point  
                                CutFromRight(transcript.size() - i - 1, segments[pos1]);
                                break;
                            }
                            if( transcript[i] != 'I' ) ++cnt;
                        }
                        if( i < 0 ) {// exon should not be so bad   
                            NCBI_THROW(CAlgoAlignException, eInternal, g_msg_InvalidRange);
                        }
                        break;
                    }
                }
                
                //trim right exon   
                TSeqPos p2 =  segments[pos2].m_box[0];
                ITERATE(vector<TSeqRange>, it, tr) {
                    if( p2 >= it->GetFrom() && p2 <= it->GetTo() ) {
                        TSeqPos cut_mrna_len = ( 3 - ( p2 - it->GetFrom()) % 3  ) %3, cnt = 0;
                        string transcript = segments[pos2].m_details;
                        int i = 0;
                        for( ; i < (int)transcript.size(); ++i) {
                            if( cnt%3 == cut_mrna_len && transcript[i] == 'M' ) { //cut point   
                                CutFromLeft(i, segments[pos2]);
                                break;
                            }
                            if( transcript[i] != 'I' ) ++cnt;
                        }
                        if( i == (int)transcript.size() ) {// exon should not be so bad 
                            NCBI_THROW(CAlgoAlignException, eInternal, g_msg_InvalidRange);
                        }
                        break;
                    }
                }
            }
        }
        AdjustGaps(segments);
    }
Esempio n. 4
0
// try improving the segment by cutting it from the left, 20/20 rule
void CSplignTrim::ImproveFromRight(TSeg& s)
{
    CutToMatchRight(s);
    Cut50FromRight(s);
    if(ThrowAwayShortExon(s)) return;    

    int len_total = (int)s.m_details.size();
    if(len_total <= 20) return;//two short

    //compute number of matches
    int match_total = 0;
    string::reverse_iterator irs0 = s.m_details.rbegin(),
        irs1 = s.m_details.rend(), irs;

    for(irs = irs0; irs != irs1; ++irs) {
        if(*irs == 'M') {
            ++match_total;
        }
    }

    //find the left boundary, 20/20 rule
    {{
      size_t pos = max(20, len_total/5) - 1;
      pos = s.m_details.find('M', pos);
      if( pos == string::npos ) return;//no M found. 
      pos = s.m_details.find_first_not_of('M', pos); 
      if( pos == string::npos ) return;// 100% id on the right, nothing to trim
      irs1 = irs1 - pos; 
    }}

    //after 20/20 *ir1s is M, irs1+1 is not M and eventually irs1 is a left boundary for trimming 
    
   
    string::reverse_iterator irs_tr = s.m_details.rend(); //trimming point
    int match = 0, len = 0;

    for(irs = irs0; irs != irs1; ++irs) {        
        if(*irs == 'M') {
            ++match;
        }
        ++len;
        double rid = match / (double)len;
        double lid = (match_total - match) / (double)(len_total - len);

        //dropoff check
        double epsilon = 1e-10;
        if( lid - rid - m_MaxPartExonIdentDrop > epsilon ) {
            irs_tr = irs;
            //do not count trimmed part, adjust values
            match_total -= match;
            len_total -= len;
            match = 0;
            len = 0;
        }
    }            

    if( irs_tr == s.m_details.rend() ) return;//no trimming point found

    //actual trimming
    CutFromRight( irs_tr - irs0 + 1 , s );
    ThrowAwayShortExon(s);
}