Exemple #1
0
string& CAlnVec::GetAlnSeqString(string& buffer,
                                 TNumrow row,
                                 const TSignedRange& aln_rng) const
{
    string buff;
    buffer.erase();

    CSeqVector& seq_vec      = x_GetSeqVector(row);
    TSeqPos     seq_vec_size = seq_vec.size();
    
    // get the chunks which are aligned to seq on anchor
    CRef<CAlnMap::CAlnChunkVec> chunk_vec = 
        GetAlnChunks(row, aln_rng, fSkipInserts | fSkipUnalignedGaps);
    
    // for each chunk
    for (int i=0; i<chunk_vec->size(); i++) {
        CConstRef<CAlnMap::CAlnChunk> chunk = (*chunk_vec)[i];
                
        if (chunk->GetType() & fSeq) {
            // add the sequence string
            if (IsPositiveStrand(row)) {
                seq_vec.GetSeqData(chunk->GetRange().GetFrom(),
                                   chunk->GetRange().GetTo() + 1,
                                   buff);
            } else {
                seq_vec.GetSeqData(seq_vec_size - chunk->GetRange().GetTo() - 1,
                                   seq_vec_size - chunk->GetRange().GetFrom(),
                                   buff);
            }
            if (GetWidth(row) == 3) {
                TranslateNAToAA(buff, buff, GetGenCode(row));
            }
           buffer += buff;
        } else {
            // add appropriate number of gap/end chars
            const int n = chunk->GetAlnRange().GetLength();
            char* ch_buff = new char[n+1];
            char fill_ch;
            if (chunk->GetType() & fNoSeqOnLeft  ||
                chunk->GetType() & fNoSeqOnRight) {
                fill_ch = GetEndChar();
            } else {
                fill_ch = GetGapChar(row);
            }
            memset(ch_buff, fill_ch, n);
            ch_buff[n] = 0;
            buffer += ch_buff;
            delete[] ch_buff;
        }
    }
    return buffer;
}
Exemple #2
0
void CSeqVector_CI::x_FillCache(TSeqPos start, TSeqPos count)
{
    _ASSERT(m_Seg.GetType() != CSeqMap::eSeqEnd);
    _ASSERT(start >= m_Seg.GetPosition());
    _ASSERT(start < m_Seg.GetEndPosition());

    x_ResizeCache(count);

    switch ( m_Seg.GetType() ) {
    case CSeqMap::eSeqData:
    {
        const CSeq_data& data = m_Seg.GetRefData();
        if ( data.IsGap() && m_Seg.GetType() == CSeqMap::eSeqGap ) {
            // workaround for erroneously split gap Seq-data
            x_FillCache(start, count);
            return;
        }
        
        TCoding dataCoding = data.Which();
        TCoding cacheCoding = x_GetCoding(m_Coding, dataCoding);
        bool reverse = m_Seg.GetRefMinusStrand();

        bool randomize = false;
        if ( cacheCoding != dataCoding &&
             cacheCoding == CSeq_data::e_Ncbi2na &&
             m_Randomizer) {
            cacheCoding = CSeq_data::e_Ncbi4na;
            randomize = true;
        }

        const char* table = 0;
        if ( cacheCoding != dataCoding || reverse ||
             m_CaseConversion != eCaseConversion_none ) {
            table = sx_GetConvertTable(dataCoding, cacheCoding,
                                       reverse, m_CaseConversion);
            if ( !table && cacheCoding != dataCoding ) {
                NCBI_THROW_FMT(CSeqVectorException, eCodingError,
                               "Incompatible sequence codings: "<<
                               dataCoding<<" -> "<<cacheCoding);
            }
        }

        TSeqPos dataPos;
        if ( reverse ) {
            // Revert segment offset
            dataPos = m_Seg.GetRefEndPosition() -
                (start - m_Seg.GetPosition()) - count;
        }
        else {
            dataPos = m_Seg.GetRefPosition() +
                (start - m_Seg.GetPosition());
        }

        switch ( dataCoding ) {
        case CSeq_data::e_Iupacna:
            copy_8bit_any(m_Cache, count, data.GetIupacna().Get(), dataPos,
                          table, reverse);
            break;
        case CSeq_data::e_Iupacaa:
            copy_8bit_any(m_Cache, count, data.GetIupacaa().Get(), dataPos,
                          table, reverse);
            break;
        case CSeq_data::e_Ncbi2na:
            copy_2bit_any(m_Cache, count, data.GetNcbi2na().Get(), dataPos,
                            table, reverse);
            break;
        case CSeq_data::e_Ncbi4na:
            copy_4bit_any(m_Cache, count, data.GetNcbi4na().Get(), dataPos,
                          table, reverse);
            break;
        case CSeq_data::e_Ncbi8na:
            copy_8bit_any(m_Cache, count, data.GetNcbi8na().Get(), dataPos,
                          table, reverse);
            break;
        case CSeq_data::e_Ncbipna:
            NCBI_THROW(CSeqVectorException, eCodingError,
                       "Ncbipna conversion not implemented");
        case CSeq_data::e_Ncbi8aa:
            copy_8bit_any(m_Cache, count, data.GetNcbi8aa().Get(), dataPos,
                          table, reverse);
            break;
        case CSeq_data::e_Ncbieaa:
            copy_8bit_any(m_Cache, count, data.GetNcbieaa().Get(), dataPos,
                          table, reverse);
            break;
        case CSeq_data::e_Ncbipaa:
            NCBI_THROW(CSeqVectorException, eCodingError,
                       "Ncbipaa conversion not implemented");
        case CSeq_data::e_Ncbistdaa:
            copy_8bit_any(m_Cache, count, data.GetNcbistdaa().Get(), dataPos,
                          table, reverse);
            break;
        default:
            NCBI_THROW_FMT(CSeqVectorException, eCodingError,
                           "Invalid data coding: "<<dataCoding);
        }
        if ( randomize ) {
            m_Randomizer->RandomizeData(m_Cache, count, start);
        }
        break;
    }
    case CSeqMap::eSeqGap:
        if (m_Coding == CSeq_data::e_Ncbi2na  &&  m_Randomizer) {
            fill_n(m_Cache, count,
                   sx_GetGapChar(CSeq_data::e_Ncbi4na, eCaseConversion_none));
            m_Randomizer->RandomizeData(m_Cache, count, start);
        }
        else {
            fill_n(m_Cache, count, GetGapChar());
        }
        break;
    default:
        NCBI_THROW_FMT(CSeqVectorException, eDataError,
                       "Invalid segment type: "<<m_Seg.GetType());
    }
    m_CachePos = start;
}
Exemple #3
0
string& CAlnVec::GetColumnVector(string& buffer,
                                 TSeqPos aln_pos,
                                 TResidueCount * residue_count,
                                 bool gaps_in_count) const
{
    buffer.resize(GetNumRows(), GetEndChar());
    if (aln_pos > GetAlnStop()) {
        aln_pos = GetAlnStop(); // out-of-range adjustment
    }
    TNumseg seg   = GetSeg(aln_pos);
    TSeqPos delta = aln_pos - GetAlnStart(seg);
    TSeqPos len   = GetLen(seg);

    TSignedSeqPos pos;

    for (TNumrow row = 0; row < m_NumRows; row++) {
        pos = GetStart(row, seg);
        if (pos >= 0) {
            // it's a sequence residue

            bool plus = IsPositiveStrand(row);
            if (plus) {
                pos += delta;
            } else {
                pos += len - 1 - delta;
            }
            
            CSeqVector& seq_vec = x_GetSeqVector(row);
            if (GetWidth(row) == 3) {
                string na_buff, aa_buff;
                if (plus) {
                    seq_vec.GetSeqData(pos, pos + 3, na_buff);
                } else {
                    TSeqPos size = seq_vec.size();
                    seq_vec.GetSeqData(size - pos - 3, size - pos, na_buff);
                }
                TranslateNAToAA(na_buff, aa_buff, GetGenCode(row));
                buffer[row] = aa_buff[0];
            } else {
                buffer[row] = seq_vec[plus ? pos : seq_vec.size() - pos - 1];
            }

            if (residue_count) {
                (*residue_count)[FromIupac(buffer[row])]++;
            }

        } else {
            // it's a gap or endchar
            
            if (GetEndChar() != (buffer[row] = GetGapChar(row))) {
                // need to check the where the segment is
                // only if endchar != gap
                // this saves a check if there're the same
                TSegTypeFlags type = GetSegType(row, seg);
                if (type & fNoSeqOnLeft  ||  type & fNoSeqOnRight) {
                    buffer[row] = GetEndChar();
                }
            }

            if (gaps_in_count  &&  residue_count) {
                (*residue_count)[FromIupac(buffer[row])]++;
            }
        }
    } // for row

    return buffer;
}
Exemple #4
0
string& CAlnVec::GetWholeAlnSeqString(TNumrow       row,
                                      string&       buffer,
                                      TSeqPosList * insert_aln_starts,
                                      TSeqPosList * insert_starts,
                                      TSeqPosList * insert_lens,
                                      unsigned int  scrn_width,
                                      TSeqPosList * scrn_lefts,
                                      TSeqPosList * scrn_rights) const
{
    TSeqPos       aln_pos = 0,
        len = 0,
        curr_pos = 0,
        anchor_pos = 0,
        scrn_pos = 0,
        prev_len = 0,
        ttl_len = 0;
    TSignedSeqPos start = -1,
        stop = -1,
        scrn_lft_seq_pos = -1,
        scrn_rgt_seq_pos = -1,
        prev_aln_pos = -1,
        prev_start = -1;
    TNumseg       seg;
    int           pos, nscrns, delta;
    
    TSeqPos aln_len = GetAlnStop() + 1;

    bool anchored = IsSetAnchor();
    bool plus     = IsPositiveStrand(row);
    int  width    = GetWidth(row);

    scrn_width *= width;

    const bool record_inserts = insert_starts && insert_lens;
    const bool record_coords  = scrn_width && scrn_lefts && scrn_rights;

    // allocate space for the row
    char* c_buff = new char[aln_len + 1];
    char* c_buff_ptr = c_buff;
    string buff;
    
    const TNumseg& left_seg = x_GetSeqLeftSeg(row);
    const TNumseg& right_seg = x_GetSeqRightSeg(row);

    // loop through all segments
    for (seg = 0, pos = row, aln_pos = 0, anchor_pos = m_Anchor;
         seg < m_NumSegs;
         ++seg, pos += m_NumRows, anchor_pos += m_NumRows) {
        
        const TSeqPos& seg_len = m_Lens[seg];
        start = m_Starts[pos];
        len = seg_len * width;

        if (anchored  &&  m_Starts[anchor_pos] < 0) {
            if (start >= 0) {
                // record the insert if requested
                if (record_inserts) {
                    if (prev_aln_pos == (TSignedSeqPos)(aln_pos / width)  &&
                        start == (TSignedSeqPos)(plus ? prev_start + prev_len :
                                  prev_start - len)) {
                        // consolidate the adjacent inserts
                        ttl_len += len;
                        insert_lens->pop_back();
                        insert_lens->push_back(ttl_len);
                        if (!plus) {
                            insert_starts->pop_back();
                            insert_starts->push_back(start);
                        }
                    } else {
                        prev_aln_pos = aln_pos / width;
                        ttl_len = len;
                        insert_starts->push_back(start);
                        insert_aln_starts->push_back(prev_aln_pos);
                        insert_lens->push_back(len);
                    }
                    prev_start = start;
                    prev_len = len;
		}
            }
        } else {
            if (start >= 0) {
                stop = start + len - 1;

                // add regular sequence to buffer
                GetSeqString(buff, row, start, stop);
                TSeqPos buf_len = min<TSeqPos>(buff.size(), seg_len);
                memcpy(c_buff_ptr, buff.c_str(), buf_len);
                c_buff_ptr += buf_len;
                if (buf_len < seg_len) {
                    // Not enough chars in the sequence, add gap
                    buf_len = seg_len - buf_len;
                    char* ch_buff = new char[buf_len + 1];
                    char fill_ch;

                    if (seg < left_seg  ||  seg > right_seg) {
                        fill_ch = GetEndChar();
                    } else {
                        fill_ch = GetGapChar(row);
                    }

                    memset(ch_buff, fill_ch, buf_len);
                    ch_buff[buf_len] = 0;
                    memcpy(c_buff_ptr, ch_buff, buf_len);
                    c_buff_ptr += buf_len;
                    delete[] ch_buff;
                }

                // take care of coords if necessary
                if (record_coords) {
                    if (scrn_lft_seq_pos < 0) {
                        scrn_lft_seq_pos = plus ? start : stop;
                        if (scrn_rgt_seq_pos < 0) {
                            scrn_rgt_seq_pos = scrn_lft_seq_pos;
                        }
                    }
                    // previous scrns
                    nscrns = (aln_pos - scrn_pos) / scrn_width;
                    for (int i = 0; i < nscrns; i++) {
                        scrn_lefts->push_back(scrn_lft_seq_pos);
                        scrn_rights->push_back(scrn_rgt_seq_pos);
                        if (i == 0) {
                            scrn_lft_seq_pos = plus ? start : stop;
                        }
                        scrn_pos += scrn_width;
                    }
                    if (nscrns > 0) {
                        scrn_lft_seq_pos = plus ? start : stop;
                    }
                    // current scrns
                    nscrns = (aln_pos + len - scrn_pos) / scrn_width;
                    curr_pos = aln_pos;
                    for (int i = 0; i < nscrns; i++) {
                        delta = (plus ?
                                 scrn_width - (curr_pos - scrn_pos) :
                                 curr_pos - scrn_pos - scrn_width);
                        
                        scrn_lefts->push_back(scrn_lft_seq_pos);
                        if (plus ?
                            scrn_lft_seq_pos < start :
                            scrn_lft_seq_pos > stop) {
                            scrn_lft_seq_pos = (plus ? start : stop) +
                                delta;
                            scrn_rgt_seq_pos = scrn_lft_seq_pos +
                                (plus ? -1 : 1);
                        } else {
                            scrn_rgt_seq_pos = scrn_lft_seq_pos + (plus ? -1 : 1)
                                + delta;
                            scrn_lft_seq_pos += delta;
                        }
                        if (seg == left_seg  &&
                            scrn_lft_seq_pos == scrn_rgt_seq_pos) {
                            if (plus) {
                                scrn_rgt_seq_pos--;
                            } else {
                                scrn_rgt_seq_pos++;
                            }
                        }
                        scrn_rights->push_back(scrn_rgt_seq_pos);
                        curr_pos = scrn_pos += scrn_width;
                    }
                    if (aln_pos + len <= scrn_pos) {
                        scrn_lft_seq_pos = -1; // reset
                    }
                    scrn_rgt_seq_pos = plus ? stop : start;
                }
            } else {
                // add appropriate number of gap/end chars
                
                char* ch_buff = new char[seg_len + 1];
                char fill_ch;
                
                if (seg < left_seg  ||  seg > right_seg) {
                    fill_ch = GetEndChar();
                } else {
                    fill_ch = GetGapChar(row);
                }
                
                memset(ch_buff, fill_ch, seg_len);
                ch_buff[seg_len] = 0;
                memcpy(c_buff_ptr, ch_buff, seg_len);
                c_buff_ptr += seg_len;
                delete[] ch_buff;
            }
            aln_pos += len;
        }

    }

    // take care of the remaining coords if necessary
    if (record_coords) {
        // previous scrns
        TSeqPos pos_diff = aln_pos - scrn_pos;
        if (pos_diff > 0) {
            nscrns = pos_diff / scrn_width;
            if (pos_diff % scrn_width) {
                nscrns++;
            }
            for (int i = 0; i < nscrns; i++) {
                scrn_lefts->push_back(scrn_lft_seq_pos);
                scrn_rights->push_back(scrn_rgt_seq_pos);
                if (i == 0) {
                    scrn_lft_seq_pos = scrn_rgt_seq_pos;
                }
                scrn_pos += scrn_width;
            }
        }
    }
    c_buff[aln_len] = '\0';
    buffer = c_buff;
    delete [] c_buff;
    return buffer;
}