void CSplignTrim::AdjustGaps(TSegs& segments) { TSegs new_segments; int gap_start_idx (-1); if(segments.size() && segments[0].m_exon == false) { gap_start_idx = 0; } for(size_t k (0); k < segments.size(); ++k) { TSeg& s (segments[k]); if(!s.m_exon) { if(gap_start_idx == -1) { gap_start_idx = int(k); if(k > 0) { s.m_box[0] = segments[k-1].m_box[1] + 1; s.m_box[2] = segments[k-1].m_box[3] + 1; } } } else { if(gap_start_idx >= 0) { TSeg& g = segments[gap_start_idx]; g.m_box[1] = s.m_box[0] - 1; g.m_box[3] = s.m_box[2] - 1; g.m_len = g.m_box[1] - g.m_box[0] + 1; g.m_details.resize(0); new_segments.push_back(g); gap_start_idx = -1; } new_segments.push_back(s); } } if(gap_start_idx >= 0) { TSeg& g (segments[gap_start_idx]); g.m_box[1] = segments[segments.size()-1].m_box[1]; g.m_box[3] = segments[segments.size()-1].m_box[3]; g.m_len = g.m_box[1] - g.m_box[0] + 1; g.m_details.resize(0); new_segments.push_back(g); } segments.swap(new_segments); }
// aka stich holes //joins exons segments[p1] and segments[p1] into a singe exon //everithing in between becomes a regular gap in query adjacent to a regular gap in subject void CSplignTrim::JoinExons(TSegs& segments, TSeqPos p1, TSeqPos p2) { //sanity check if( p1 >= segments.size() ) return; if( p2 >= segments.size() ) return; if( !segments[p1].m_exon ) return; if( !segments[p2].m_exon ) return; size_t pos1 = min( p1, p2); size_t pos2 = max( p1, p2); if( segments[pos1].m_box[1] >= segments[pos2].m_box[0] || segments[pos1].m_box[3] >= segments[pos2].m_box[2] ) { return; // segments intersect } //join TSegs new_segments; for( size_t pos = 0; pos < pos1; ++pos) { new_segments.push_back(segments[pos]); } //joint exon TSeg s(segments[pos1]); s.m_box[1] = segments[pos2].m_box[1]; s.m_box[3] = segments[pos2].m_box[3]; if( segments[pos1].m_box[1] + 1 < segments[pos2].m_box[0]) { s.m_details.append(segments[pos2].m_box[0] - segments[pos1].m_box[1] - 1, 'D'); } if( segments[pos1].m_box[3] + 1 < segments[pos2].m_box[2]) { s.m_details.append(segments[pos2].m_box[2] - segments[pos1].m_box[3] - 1, 'I'); } s.m_details += segments[pos2].m_details; Update(s); new_segments.push_back(s); //write the rest for( size_t pos = ++pos2; pos < segments.size(); ++pos) { new_segments.push_back(segments[pos]); } segments.swap(new_segments); }
//check if the exon segments[p] abuts another exon in genomic coordinates, right side bool CSplignTrim::HasAbuttingExonOnRight(TSegs segments, TSeqPos p) { TSeqPos len = segments.size(); TSeqPos np = p+1; for( ; np < len; ++np) { if( segments[np].m_exon ) break; } if(np == len) {// no exons on the right found return false; } if( segments[p].m_box[3] + 1 == segments[np].m_box[2] ) { //abutting return true; } return false; }
BEGIN_NCBI_SCOPE //check if the exon segments[p] abuts another exon in genomic coordinates, right side bool CSplignTrim::HasAbuttingExonOnRight(TSegs segments, TSeqPos p) { TSeqPos len = segments.size(); TSeqPos np = p+1; for( ; np < len; ++np) { if( segments[np].m_exon ) break; } if(np == len) {// no exons on the right found return false; } if( segments[p].m_box[3] + 1 == segments[np].m_box[2] ) { //abutting return true; } return false; }
//trims exons around internal alignment gaps to complete codons //if CDS can be retrieved from bioseq void CSplignTrim::TrimHolesToCodons(TSegs& segments, CBioseq_Handle& mrna_bio_handle, bool mrna_strand, size_t mrna_len) { if( mrna_bio_handle ) { //collect CDS intervals (could be more than one in a case of ribosomal slippage) vector<TSeqRange> tr; for(CFeat_CI ci(mrna_bio_handle, SAnnotSelector(CSeqFeatData::e_Cdregion)); ci; ++ci) { for(CSeq_loc_CI slit(ci->GetLocation()); slit; ++slit) { TSeqRange r, ori; ori = slit.GetRange(); if( mrna_strand ) { r = ori; } else {//reverse r.SetFrom(mrna_len - ori.GetTo() - 1); r.SetTo(mrna_len - ori.GetFrom() - 1); } tr.push_back(r); } } if(tr.empty()) return;// CDS not found //trim AdjustGaps(segments);//make sure there is no adjacent gaps size_t pos1 = 0, pos2 = 2; for(; pos2 < segments.size(); ++pos1, ++pos2) { if( segments[pos1].m_exon && !segments[pos1+1].m_exon && segments[pos2].m_exon ) {//candidate for trimming //trim left exon TSeqPos p1 = segments[pos1].m_box[1]; ITERATE(vector<TSeqRange>, it, tr) { if( p1 >= it->GetFrom() && p1 <= it->GetTo() ) { TSeqPos cut_mrna_len = (p1 + 1 - it->GetFrom()) % 3, cnt = 0; string transcript = segments[pos1].m_details; int i = (int)transcript.size() - 1; for(; i>=0; --i) { if( cnt%3 == cut_mrna_len && transcript[i] == 'M' ) { //cut point CutFromRight(transcript.size() - i - 1, segments[pos1]); break; } if( transcript[i] != 'I' ) ++cnt; } if( i < 0 ) {// exon should not be so bad NCBI_THROW(CAlgoAlignException, eInternal, g_msg_InvalidRange); } break; } } //trim right exon TSeqPos p2 = segments[pos2].m_box[0]; ITERATE(vector<TSeqRange>, it, tr) { if( p2 >= it->GetFrom() && p2 <= it->GetTo() ) { TSeqPos cut_mrna_len = ( 3 - ( p2 - it->GetFrom()) % 3 ) %3, cnt = 0; string transcript = segments[pos2].m_details; int i = 0; for( ; i < (int)transcript.size(); ++i) { if( cnt%3 == cut_mrna_len && transcript[i] == 'M' ) { //cut point CutFromLeft(i, segments[pos2]); break; } if( transcript[i] != 'I' ) ++cnt; } if( i == (int)transcript.size() ) {// exon should not be so bad NCBI_THROW(CAlgoAlignException, eInternal, g_msg_InvalidRange); } break; } } } } AdjustGaps(segments); }