示例#1
0
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data)
{


    //check data integrity

    if( m_SmithWaterman && ( data->m_offset1 || m_SeqLen1 != data->m_len1 ||
                             data->m_offset2 || m_SeqLen2 != data->m_len2 ) ) {
        NCBI_THROW(CAlgoAlignException, eBadParameter,
                   "Smith-Waterman not compatible with offsets provided");
    }

    if( m_SmithWaterman && ( !data->m_esf_L1 || !data->m_esf_R1 ||
                             !data->m_esf_L2 || !data->m_esf_R2 ) ) {
        NCBI_THROW(CAlgoAlignException, eBadParameter,
                   "Smith-Waterman not compatible with end gap penalties");
    }

    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF(N2);

    const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
            return 0;
        }
    }

    bool bFreeGapLeft1  = data->m_esf_L1 && data->m_offset1 == 0;
    bool bFreeGapRight1 = data->m_esf_R1 &&
                          m_SeqLen1 == data->m_offset1 + data->m_len1; 

    bool bFreeGapLeft2  = data->m_esf_L2 && data->m_offset2 == 0;
    bool bFreeGapRight2 = data->m_esf_R2 &&
                          m_SeqLen2 == data->m_offset2 + data->m_len2; 

    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;
    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);
    backtrace_matrix.SetAt(0, 0);

    // first row
    // note that stl_rowF[0] is not used in the main cycle,
    size_t k;
    stl_rowV[0] = wgleft1;
    for (k = 1; k < N2; ++k) {
        stl_rowV[k] = stl_rowV[k-1] + wsleft1;
        stl_rowF[k] = kInfMinus;
        backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
    }
    backtrace_matrix.Purge(k);
    stl_rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // gap penalties
    TScore wgleft2 (bFreeGapLeft2? 0: m_Wg);
    TScore wsleft2 (bFreeGapLeft2? 0: m_Ws);

    const char * seq1 = m_Seq1 + data->m_offset1;
    const char * seq1_end = seq1 + data->m_len1;

    TScore V0 = wgleft2;
    TScore V = 0;//best score in the current cell. Will be equal to the NW score at the end
    TScore best_V = 0;//best score in the whole matrix aka score for SW 

    --k;

    for(;  seq1 != seq1_end && !m_terminate;  ++seq1) {

        backtrace_matrix.SetAt(++k, kMaskFc);

        if( seq1 + 1 == seq1_end && bFreeGapRight1) {
                wg1 = ws1 = 0;
        }

        unsigned char tracer;
        const TNCBIScore * row_sc = sm[(size_t)*seq1];

        const char * seq2 = m_Seq2 + data->m_offset2;
        const char * seq2_end = seq2 + data->m_len2;
        TScore wg2 = m_Wg, ws2 = m_Ws;

        //best ending with gap in seq1 open  seq1 X- or extended seq1 X--
        //                                   seq2 XX             seq2 XXX
        TScore  E = kInfMinus;
        //best ending with gap in seq2
        TScore F;
        //total best with 
        //best ending with match    
        TScore G;
        //just temporary
        TScore n0;
        //total best
        TScore * rowV    = &stl_rowV[0];//previos row
        V = V0 += wsleft2;       //current row
        //best ending with match
        TScore * rowF    = &stl_rowF[0];

        for (; seq2 != seq2_end;) {
            
            G = *rowV + row_sc[(size_t)*seq2++];
            *rowV = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;  // open a new gap
                tracer = 0;
            }

            if( bFreeGapRight2 && seq2 == seq2_end ) {
                wg2 = ws2 = 0;
            }

            F = *++rowF;
            n0 = *++rowV + wg2;
            if(F >= n0) {
                F += ws2;
                tracer |= kMaskFc;
            }
            else {
                F = n0 + ws2;
            }
            *rowF = F;
            
            //best score
            if( G < F || ( G == F && m_GapPreference == eLater) ) {
                if( E <= F ) {
                    V = F;
                } else {
                    V = E;
                    tracer |= kMaskE;
                }
            } else if( E > G || ( E == G && m_GapPreference == eLater) ) {
                V = E;
                tracer |= kMaskE;
            } else {
                V = G;
                tracer |= kMaskD;
            }
            
            if (m_SmithWaterman && V < 0 ) {
                V = 0;
            }

            backtrace_matrix.SetAt(++k, tracer);

            if (V > best_V) {
                best_V = V;
                backtrace_matrix.SetBestPos(k);
            }
        }
        *rowV = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
                break;
            }
        }
    }

    backtrace_matrix.Purge(++k);
    backtrace_matrix.SetBestScore(best_V);

    /*
    //print the matrix out
    {{
    cout<<endl;
    int kk, ind1, ind2, width = 4;
    cout<<setw(width)<<" ";
    cout<<setw(width)<<"-";
    for(ind2 = 0; ind2 < N2-1; ++ind2) {
        cout<<setw(width)<<*(m_Seq2 + data->m_offset2 + ind2);
    }
    cout<<endl;
    for(kk = 0,ind1 = 0; ind1 < N1; ++ind1) {        
        if(ind1) { 
            cout<<setw(width)<<(m_Seq1 + data->m_offset1)[ind1-1];
        } else {
            cout<<setw(width)<<"-";
        }
        for(ind2 = 0; ind2 < N2; ++ind2,++kk) {
            string tstr;
            unsigned char Key (backtrace_matrix[kk]);
            if( Key & kMaskD ) tstr += "D";
            else if ( Key & kMaskE ) tstr += "E";
            else tstr += "F";
            if( Key & kMaskEc )  tstr += "-";            
            if( Key & kMaskFc )  tstr += "|";
            cout<<setw(width)<<tstr;
        }
        cout<<endl<<endl;
    }
    cout<<endl;
    }}
    //end of print the matrix out
    */

    if(!m_terminate) {
        x_SWDoBackTrace(backtrace_matrix, data);
        //check back trace
        TTranscript rv (data->m_transcript.size());
        copy(data->m_transcript.rbegin(), data->m_transcript.rend(), rv.begin());        
        if(m_SmithWaterman) {
            if( best_V != ScoreFromTranscript(rv,  data->m_offset1,  data->m_offset2) ) {
                NCBI_THROW(CAlgoAlignException, eInternal,
                           "CNWAligner: error in back trace");
            }
        } else {
            if( V != ScoreFromTranscript(rv,  data->m_offset1,  data->m_offset2) ) {
                NCBI_THROW(CAlgoAlignException, eInternal,
                           "CNWAligner: error in back trace");
            }
        }
    }

    if(m_SmithWaterman) {
        return best_V;
    }
    return V;
}
示例#2
0
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data)
{
    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF(N2);

    TScore * rowV    = &stl_rowV[0];
    TScore * rowF    = &stl_rowF[0];

    TScore * pV = rowV - 1;

    const char * seq1 = m_Seq1 + data->m_offset1 - 1;
    const char * seq2 = m_Seq2 + data->m_offset2 - 1;

    const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
	  return 0;
	}
    }

    bool bFreeGapLeft1  = data->m_esf_L1 && data->m_offset1 == 0;
    bool bFreeGapRight1 = data->m_esf_R1 &&
                          m_SeqLen1 == data->m_offset1 + data->m_len1; 

    bool bFreeGapLeft2  = data->m_esf_L2 && data->m_offset2 == 0;
    bool bFreeGapRight2 = data->m_esf_R2 &&
                          m_SeqLen2 == data->m_offset2 + data->m_len2; 

    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;
    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);
    backtrace_matrix.SetAt(0, 0);

    // first row
    size_t k;
    rowV[0] = wgleft1;
    for (k = 1; k < N2; ++k) {
        rowV[k] = pV[k] + wsleft1;
        rowF[k] = kInfMinus;
        backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
    }
    backtrace_matrix.Purge(k);
    rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // recurrences
    TScore wgleft2 (bFreeGapLeft2? 0: m_Wg);
    TScore wsleft2 (bFreeGapLeft2? 0: m_Ws);
    TScore V  (rowV[N2 - 1]);
    TScore V0 (wgleft2);
    TScore E, G, n0;
    unsigned char tracer;

    size_t i, j;
    for(i = 1;  i < N1 && !m_terminate;  ++i) {
        
        V = V0 += wsleft2;
        E = kInfMinus;
        backtrace_matrix.SetAt(k++, kMaskFc);
        unsigned char ci = seq1[i];

        if(i == N1 - 1 && bFreeGapRight1) {
                wg1 = ws1 = 0;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = 1; j < N2; ++j, ++k) {

            G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;  // open a new gap
                tracer = 0;
            }

            if(j == N2 - 1 && bFreeGapRight2) {
                wg2 = ws2 = 0;
            }
            n0 = rowV[j] + wg2;
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + ws2;
            }

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }

            backtrace_matrix.SetAt(k, tracer);
        }

        pV[j] = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
                break;
            }
        }
    }

    backtrace_matrix.Purge(k);

    if(!m_terminate) {
        x_DoBackTrace(backtrace_matrix, data);
    }

    return V;
}