Example #1
0
void CSplignTrim::AdjustGaps(TSegs& segments)
{
    TSegs new_segments;
        int gap_start_idx (-1);
        if(segments.size() && segments[0].m_exon == false) {
            gap_start_idx = 0;
        }

        for(size_t k (0); k < segments.size(); ++k) {
            TSeg& s (segments[k]);
            if(!s.m_exon) {
                if(gap_start_idx == -1) {
                    gap_start_idx = int(k);
                    if(k > 0) {
                        s.m_box[0] = segments[k-1].m_box[1] + 1;
                        s.m_box[2] = segments[k-1].m_box[3] + 1;
                    }
                }
            }
            else {
                if(gap_start_idx >= 0) {
                    TSeg& g = segments[gap_start_idx];
                    g.m_box[1] = s.m_box[0] - 1;
                    g.m_box[3] = s.m_box[2] - 1;
                    g.m_len = g.m_box[1] - g.m_box[0] + 1;
                    g.m_details.resize(0);
                    new_segments.push_back(g);
                    gap_start_idx = -1;
                }
                new_segments.push_back(s);
            } 
        }

        if(gap_start_idx >= 0) {
            TSeg& g (segments[gap_start_idx]);
            g.m_box[1] = segments[segments.size()-1].m_box[1];
            g.m_box[3] = segments[segments.size()-1].m_box[3];
            g.m_len = g.m_box[1] - g.m_box[0] + 1;
            g.m_details.resize(0);
            new_segments.push_back(g);
        }

        segments.swap(new_segments);
}
Example #2
0
// aka stich holes
//joins exons segments[p1] and segments[p1] into a singe exon
//everithing in between becomes a regular gap in query adjacent to a regular gap in subject 
void CSplignTrim::JoinExons(TSegs& segments, TSeqPos p1, TSeqPos p2)
{
    //sanity check
    if( p1 >= segments.size() ) return;
    if( p2 >= segments.size() ) return;
    if( !segments[p1].m_exon ) return;
    if( !segments[p2].m_exon ) return;
    size_t pos1 = min( p1, p2);
    size_t pos2 = max( p1, p2);
    if( segments[pos1].m_box[1] >= segments[pos2].m_box[0] ||
        segments[pos1].m_box[3] >= segments[pos2].m_box[2] ) {
        return; // segments intersect
    }

    //join

    TSegs new_segments;
    for( size_t pos = 0; pos < pos1; ++pos) {
        new_segments.push_back(segments[pos]);
    }
    //joint exon
    TSeg s(segments[pos1]);
    s.m_box[1] = segments[pos2].m_box[1];
    s.m_box[3] = segments[pos2].m_box[3];
    if( segments[pos1].m_box[1] + 1 < segments[pos2].m_box[0]) {
        s.m_details.append(segments[pos2].m_box[0] - segments[pos1].m_box[1] - 1, 'D');
    }
    if( segments[pos1].m_box[3] + 1 < segments[pos2].m_box[2]) {
        s.m_details.append(segments[pos2].m_box[2] - segments[pos1].m_box[3] - 1, 'I');
    }
    s.m_details += segments[pos2].m_details;
    Update(s);
    new_segments.push_back(s);
    //write the rest
    for( size_t pos = ++pos2; pos < segments.size(); ++pos) {
        new_segments.push_back(segments[pos]);
    }
    
    segments.swap(new_segments);
}
Example #3
0
//check if the exon segments[p] abuts another exon in genomic coordinates, right side
bool CSplignTrim::HasAbuttingExonOnRight(TSegs segments, TSeqPos p)
{
    TSeqPos len = segments.size();
    TSeqPos np = p+1;
    for( ; np < len; ++np) {
        if( segments[np].m_exon ) break;
    }
    if(np == len) {// no exons on the right found
        return false;
    } 
    if( segments[p].m_box[3] + 1 == segments[np].m_box[2] ) { //abutting
        return true;
    }
    return false;
}
Example #4
0
BEGIN_NCBI_SCOPE

//check if the exon segments[p] abuts another exon in genomic coordinates, right side
bool CSplignTrim::HasAbuttingExonOnRight(TSegs segments, TSeqPos p)
{
    TSeqPos len = segments.size();
    TSeqPos np = p+1;
    for( ; np < len; ++np) {
        if( segments[np].m_exon ) break;
    }
    if(np == len) {// no exons on the right found
        return false;
    } 
    if( segments[p].m_box[3] + 1 == segments[np].m_box[2] ) { //abutting
        return true;
    }
    return false;
}
Example #5
0
//trims exons around internal alignment gaps to complete codons
//if CDS can be retrieved from bioseq
void CSplignTrim::TrimHolesToCodons(TSegs& segments, CBioseq_Handle& mrna_bio_handle, bool mrna_strand, size_t mrna_len)
{

    if( mrna_bio_handle ) {
        //collect CDS intervals (could be more than one in a case of ribosomal slippage)
        vector<TSeqRange> tr;
        for(CFeat_CI ci(mrna_bio_handle, SAnnotSelector(CSeqFeatData::e_Cdregion)); ci; ++ci) {
            for(CSeq_loc_CI slit(ci->GetLocation()); slit; ++slit) {
                TSeqRange r, ori;
                ori = slit.GetRange();
                if( mrna_strand ) {
                    r = ori;
                } else {//reverse
                    r.SetFrom(mrna_len - ori.GetTo() - 1);
                    r.SetTo(mrna_len - ori.GetFrom() - 1);
                }
                tr.push_back(r);
            }
        }

        if(tr.empty()) return;// CDS not found

        //trim
        AdjustGaps(segments);//make sure there is no adjacent gaps
        size_t pos1 = 0, pos2 = 2;
        for(; pos2 < segments.size(); ++pos1, ++pos2) {
            if( segments[pos1].m_exon && !segments[pos1+1].m_exon && segments[pos2].m_exon ) {//candidate for trimming
                
                //trim left exon    
                TSeqPos p1 = segments[pos1].m_box[1];
                ITERATE(vector<TSeqRange>, it, tr) {
                    if( p1 >= it->GetFrom() && p1 <= it->GetTo() ) {
                        TSeqPos cut_mrna_len = (p1 + 1 - it->GetFrom()) % 3, cnt = 0;
                        string transcript = segments[pos1].m_details;
                        int i = (int)transcript.size() - 1;
                        for(; i>=0; --i) {
                            if( cnt%3 == cut_mrna_len &&  transcript[i] == 'M' ) { //cut point  
                                CutFromRight(transcript.size() - i - 1, segments[pos1]);
                                break;
                            }
                            if( transcript[i] != 'I' ) ++cnt;
                        }
                        if( i < 0 ) {// exon should not be so bad   
                            NCBI_THROW(CAlgoAlignException, eInternal, g_msg_InvalidRange);
                        }
                        break;
                    }
                }
                
                //trim right exon   
                TSeqPos p2 =  segments[pos2].m_box[0];
                ITERATE(vector<TSeqRange>, it, tr) {
                    if( p2 >= it->GetFrom() && p2 <= it->GetTo() ) {
                        TSeqPos cut_mrna_len = ( 3 - ( p2 - it->GetFrom()) % 3  ) %3, cnt = 0;
                        string transcript = segments[pos2].m_details;
                        int i = 0;
                        for( ; i < (int)transcript.size(); ++i) {
                            if( cnt%3 == cut_mrna_len && transcript[i] == 'M' ) { //cut point   
                                CutFromLeft(i, segments[pos2]);
                                break;
                            }
                            if( transcript[i] != 'I' ) ++cnt;
                        }
                        if( i == (int)transcript.size() ) {// exon should not be so bad 
                            NCBI_THROW(CAlgoAlignException, eInternal, g_msg_InvalidRange);
                        }
                        break;
                    }
                }
            }
        }
        AdjustGaps(segments);
    }