예제 #1
0
CNWAligner::TScore CMMAligner::x_Run()
{
    m_terminate = false;
    if(m_prg_callback) {
        m_prg_info.m_iter_total = 2*m_SeqLen1*m_SeqLen2;
        m_prg_info.m_iter_done = 0;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    if(m_terminate) {
        return m_score = 0;
    }
    
    m_score = kMin_Int;
    m_TransList.clear();
    m_TransList.push_back(eTS_None);

    SCoordRect m (0, 0, m_SeqLen1 - 1, m_SeqLen2 - 1);
    x_DoSubmatrix(m, m_TransList.end(), false, false); // top-level call

    if(m_terminate) {
        return m_score = 0;
    }

    // reverse_copy not supported by some compilers
    list<ETranscriptSymbol>::const_iterator ii = m_TransList.begin();
    size_t nsize = m_TransList.size() - 1;
    m_Transcript.clear();
    m_Transcript.resize(nsize);
    for(size_t k = 1; k <= nsize; ++k)
        m_Transcript[nsize - k] = *++ii;

    return m_score;
}
예제 #2
0
CNWAligner::TScore CPSSMAligner::x_AlignProfile(SAlignInOut* data)
{
    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<double> stl_rowV (N2), stl_rowF(N2);

    double* rowV    = &stl_rowV[0];
    double* rowF    = &stl_rowF[0];

    double* pV = rowV - 1;

    const double** freq1_row = m_Freq1 + data->m_offset1 - 1;
    const double** freq2_row = m_Freq2 + data->m_offset2 - 1;

    m_terminate = false;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if(m_terminate = m_prg_callback(&m_prg_info)) {
	  return 0;
	}
    }

    TScore wg1L = m_Wg;
    TScore wg1R = m_Wg;
    TScore wg2L = m_Wg;
    TScore wg2R = m_Wg;

    TScore ws1L = m_Ws;
    TScore ws1R = m_Ws;
    TScore ws2L = m_Ws;
    TScore ws2R = m_Ws;

    if (data->m_offset1 == 0) {
        if (data->m_esf_L1) {
            wg1L = ws1L = 0;
        }
        else {
            wg1L = m_StartWg;
            ws1L = m_StartWs;
        }
    }

    if (m_SeqLen1 == data->m_offset1 + data->m_len1) {
        if (data->m_esf_R1) {
            wg1R = ws1R = 0;
        }
        else {
            wg1R = m_EndWg;
            ws1R = m_EndWs;
        }
    }

    if (data->m_offset2 == 0) {
        if (data->m_esf_L2) {
            wg2L = ws2L = 0;
        }
        else {
            wg2L = m_StartWg;
            ws2L = m_StartWs;
        }
    }

    if (m_SeqLen2 == data->m_offset2 + data->m_len2) {
        if (data->m_esf_R2) {
            wg2R = ws2R = 0;
        }
        else {
            wg2R = m_EndWg;
            ws2R = m_EndWs;
        }
    }

    TScore wgleft1   = wg1L;
    TScore wsleft1   = ws1L;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);

    // first row
    size_t k = 1;
    if (N2 > 1) {
        rowV[0] = wgleft1 * (1.0 - freq2_row[1][0]);
        for (k = 1; k < N2; k++) {
            rowV[k] = pV[k] + wsleft1;
            rowF[k] = kInfMinus;
            backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
        }
        backtrace_matrix.Purge(k);
    }
    rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // recurrences
    TScore wgleft2   = wg2L;
    TScore wsleft2   = ws2L;
    double V  = rowV[N2 - 1];
    double V0 = 0;
    double E, G, n0;
    unsigned char tracer;

    if (N1 > 1)
        V0 = wgleft2 * (1.0 - freq1_row[1][0]);

    size_t i, j;
    for(i = 1;  i < N1 && !m_terminate;  ++i) {
        
        V = V0 += wsleft2;
        E = kInfMinus;
        backtrace_matrix.SetAt(k++, kMaskFc);

        if(i == N1 - 1) {
            wg1 = wg1R;
            ws1 = ws1R;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = 1; j < N2; ++j, ++k) {

            if(j == N2 - 1) {
                wg2 = wg2R;
                ws2 = ws2R;
            }
            const double *profile1 = freq1_row[i];
            const double *profile2 = freq2_row[j];
            const double scaled_wg1 = wg1 * (1.0 - profile2[0]);
            const double scaled_ws1 = ws1;
            const double scaled_wg2 = wg2 * (1.0 - profile1[0]);
            const double scaled_ws2 = ws2;
            
            double accum = 0.0, sum = 0.0;
            int num_zeros1 = 0, num_zeros2 = 0;
            double diff_freq1[kPSSM_ColumnSize];
            double diff_freq2[kPSSM_ColumnSize];

            // separate the residue frequencies into two components:
            // a component that is the same for both columns, and
            // a component that is different. The all-against-all
            // score computation only takes place on the components
            // that are different, so this will assign a higher score
            // to more similar frequency columns
            //
            // Begin by separating out the common portion of each
            // profile

            for (int m = 1; m < kPSSM_ColumnSize; m++) {
                if (profile1[m] < profile2[m]) {
                    accum += profile1[m] * m_DScoreMatrix[m][m];
                    diff_freq1[m] = 0.0;
                    diff_freq2[m] = profile2[m] - profile1[m];
                    num_zeros1++;
                }
                else {
                    accum += profile2[m] * m_DScoreMatrix[m][m];
                    diff_freq1[m] = profile1[m] - profile2[m];
                    diff_freq2[m] = 0.0;
                    num_zeros2++;
                }
            }

            // normalize difference for profile with smaller gap
            if (profile1[0] <= profile2[0]) {
                for (int m = 1; m < kPSSM_ColumnSize; m++)
                    sum += diff_freq1[m];
            } else {
                for (int m = 1; m < kPSSM_ColumnSize; m++)
                    sum += diff_freq2[m];
            }

            if (sum > 0) {
                sum = 1.0 / sum;
                if (profile1[0] <= profile2[0]) {
                    for (int m = 1; m < kPSSM_ColumnSize; m++)
                        diff_freq1[m] *= sum;
                } else {
                    for (int m = 1; m < kPSSM_ColumnSize; m++)
                        diff_freq2[m] *= sum;
                }

                // Add in the cross terms (not counting gaps).
                // Note that the following assumes a symmetric
                // score matrix

                if (num_zeros1 > num_zeros2) {
                    for (int m = 1; m < kPSSM_ColumnSize; m++) {
                        if (diff_freq1[m] > 0) {
                            sum = 0.0;
                            double *matrix_row = m_DScoreMatrix[m];
                            for (int n = 1; n < kPSSM_ColumnSize; n++) {
                                sum += diff_freq2[n] * matrix_row[n];
                            }
                            accum += diff_freq1[m] * sum;
                        }
                    }
                } else {
                    for (int m = 1; m < kPSSM_ColumnSize; m++) {
                        if (diff_freq2[m] > 0) {
                            sum = 0.0;
                            double *matrix_row = m_DScoreMatrix[m];
                            for (int n = 1; n < kPSSM_ColumnSize; n++) {
                                sum += diff_freq1[n] * matrix_row[n];
                            }
                            accum += diff_freq2[m] * sum;
                        }
                    }
                }
            }

            G = pV[j] + accum * m_FreqScale +
                            profile1[0] * m_Ws * (1-profile2[0]) +
                            profile2[0] * m_Ws * (1-profile1[0]);

            pV[j] = V;

            n0 = V + scaled_wg1;
            if(E >= n0) {
                E += scaled_ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + scaled_ws1;  // open a new gap
                tracer = 0;
            }

            n0 = rowV[j] + scaled_wg2;
            if(rowF[j] >= n0) {
                rowF[j] += scaled_ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + scaled_ws2;
            }

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }
            backtrace_matrix.SetAt(k, tracer);
        }

        pV[j] = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if(m_terminate = m_prg_callback(&m_prg_info)) {
                break;
            }
        }
    }
    backtrace_matrix.Purge(k);

    if(!m_terminate) {
        x_DoBackTrace(backtrace_matrix, data);
    }
    return (TScore)(V + 0.5);
}
예제 #3
0
CNWAligner::TScore CPSSMAligner::x_AlignPSSM(SAlignInOut* data)
{
    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF(N2);

    TScore* rowV    = &stl_rowV[0];
    TScore* rowF    = &stl_rowF[0];

    TScore* pV = rowV - 1;

    const TScore** pssm_row = m_Pssm1 + data->m_offset1 - 1;
    const char* seq2 = m_Seq2 + data->m_offset2 - 1;

    m_terminate = false;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if(m_terminate = m_prg_callback(&m_prg_info)) {
	  return 0;
	}
    }

    TScore wg1L = m_Wg;
    TScore wg1R = m_Wg;
    TScore wg2L = m_Wg;
    TScore wg2R = m_Wg;

    TScore ws1L = m_Ws;
    TScore ws1R = m_Ws;
    TScore ws2L = m_Ws;
    TScore ws2R = m_Ws;

    if (data->m_offset1 == 0) {
        if (data->m_esf_L1) {
            wg1L = ws1L = 0;
        }
        else {
            wg1L = m_StartWg;
            ws1L = m_StartWs;
        }
    }

    if (m_SeqLen1 == data->m_offset1 + data->m_len1) {
        if (data->m_esf_R1) {
            wg1R = ws1R = 0;
        }
        else {
            wg1R = m_EndWg;
            ws1R = m_EndWs;
        }
    }

    if (data->m_offset2 == 0) {
        if (data->m_esf_L2) {
            wg2L = ws2L = 0;
        }
        else {
            wg2L = m_StartWg;
            ws2L = m_StartWs;
        }
    }

    if (m_SeqLen2 == data->m_offset2 + data->m_len2) {
        if (data->m_esf_R2) {
            wg2R = ws2R = 0;
        }
        else {
            wg2R = m_EndWg;
            ws2R = m_EndWs;
        }
    }

    TScore wgleft1   = wg1L;
    TScore wsleft1   = ws1L;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);
    backtrace_matrix.SetAt(0, 0);

    // first row
    size_t k;
    rowV[0] = wgleft1;
    for (k = 1; k < N2; k++) {
        rowV[k] = pV[k] + wsleft1;
        rowF[k] = kInfMinus;
        backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
    }
    backtrace_matrix.Purge(k);
    rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // recurrences
    TScore wgleft2   = wg2L;
    TScore wsleft2   = ws2L;
    TScore V  = rowV[N2 - 1];
    TScore V0 = wgleft2;
    TScore E, G, n0;
    unsigned char tracer;

    size_t i, j;
    for(i = 1;  i < N1 && !m_terminate;  ++i) {
        
        V = V0 += wsleft2;
        E = kInfMinus;
        backtrace_matrix.SetAt(k++, kMaskFc);

        if(i == N1 - 1) {
            wg1 = wg1R;
            ws1 = ws1R;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = 1; j < N2; ++j, ++k) {

            G = pV[j] + pssm_row[i][(unsigned char)seq2[j]];

            pV[j] = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;
                tracer = 0;
            }

            if(j == N2 - 1) {
                wg2 = wg2R;
                ws2 = ws2R;
            }
            n0 = rowV[j] + wg2;
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + ws2;
            }

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }
            backtrace_matrix.SetAt(k, tracer);
        }

        pV[j] = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if(m_terminate = m_prg_callback(&m_prg_info)) {
                break;
            }
        }  
    }
    backtrace_matrix.Purge(k);

    if(!m_terminate) {
        x_DoBackTrace(backtrace_matrix, data);
    }
    return V;
}
예제 #4
0
void CMMAligner::x_RunBtm(const SCoordRect& rect,
             vector<TScore>& vE, vector<TScore>& vF, vector<TScore>& vG,
             vector<unsigned char>& trace, bool rb) const
{
    if( m_terminate ) {
        return;
    }

    const size_t dim1 = rect.i2 - rect.i1 + 1;
    const size_t dim2 = rect.j2 - rect.j1 + 1;
    const size_t N1   = dim1 + 1;
    const size_t N2   = dim2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF (N2);
    TScore* rowV    = &stl_rowV [0];
    TScore* rowF    = &stl_rowF [0];

    TScore* pV = rowV + 1;

    const char* seq1 = m_Seq1 + rect.i1;
    const char* seq2 = m_Seq2 + rect.j1;

    const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    bool bFreeGapRight1  = m_esf_R1 && rect.i2 == m_SeqLen1 - 1;
    bool bFreeGapRight2  = m_esf_R2 && rect.j2 == m_SeqLen2 - 1;
    bool bFreeGapLeft2  =  m_esf_L2 && rect.j1 == 0;

    // progress reporting

    const size_t prg_rep_rate = 100;
    const size_t prg_rep_increment = prg_rep_rate*N2;

    // bottom row

    TScore wg = bFreeGapRight1? 0: m_Wg;
    TScore ws = bFreeGapRight1? 0: m_Ws;

    rowV[N2 - 1] = wg;
    int i, j;
    for (j = N2 - 2; j >= 0; --j) {
        rowV[j] = pV[j] + ws;
        rowF[j] = kInfMinus;
    }
    rowV[N2 - 1] = 0;

    // recurrences

    wg = bFreeGapRight2? 0: m_Wg;
    ws = bFreeGapRight2? 0: m_Ws;

    TScore V  = 0;
    TScore V0 = rb? 0: wg;
    TScore E, G, n0;

    for(i = N1 - 2;  i > 0;  --i) {
        
        V = V0 += ws;
        E = kInfMinus;
        unsigned char ci = seq1[i];

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = N2 - 2; j >= 0; --j) {

            G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + m_Wg;
            if(E >= n0)
                E += m_Ws;      // continue the gap
            else
                E = n0 + m_Ws;  // open a new gap

            if(j == 0 && bFreeGapLeft2) {
                wg2 = ws2 = 0;
            }

            n0 = rowV[j] + wg2;
            if (rowF[j] > n0)
                rowF[j] += ws2;
            else
                rowF[j] = n0 + ws2;

            V = (E >= rowF[j])? (E >= G? E: G): (rowF[j] >= G? rowF[j]: G);
        }
        pV[j] = V;

        if( m_prg_callback && (N1 - i) % prg_rep_rate == 0 ) {
#ifdef NCBI_THREADS
            CFastMutexGuard guard (progress_mutex);
#endif
            m_prg_info.m_iter_done += prg_rep_increment;
            if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
                break;
            }
        }
    }

    // the top row (i == 0)
    if(!m_terminate) {

        vF[N2-1] = V = V0 += ws;
        vG[N2-1] = vE[N2-1] = E = kInfMinus;
        trace[N2-1] = kMaskFc;
        unsigned char ci = seq1[i];

        TScore wg2 = m_Wg, ws2 = m_Ws;

        unsigned char tracer;
        for (j = N2 - 2; j >= 0; --j) {

            vG[j] = G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + m_Wg;
            if(E >= n0) {
                E += m_Ws;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + m_Ws;  // open a new gap
                tracer = 0;
            }
            vE[j] = E;

            if(j == 0 && bFreeGapLeft2) {
                wg2 = ws2 = 0;
            }

            n0 = rowV[j] + wg2;
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + ws2;
            }
            vF[j] = rowF[j];

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }
            trace[j] = tracer;
        }
    }

    if( m_prg_callback ) {
#ifdef NCBI_THREADS
        CFastMutexGuard guard (progress_mutex);
#endif
        m_prg_info.m_iter_done += (N1 - i) % prg_rep_rate;
        m_terminate = m_prg_callback(&m_prg_info);
    }
}
예제 #5
0
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data)
{


    //check data integrity

    if( m_SmithWaterman && ( data->m_offset1 || m_SeqLen1 != data->m_len1 ||
                             data->m_offset2 || m_SeqLen2 != data->m_len2 ) ) {
        NCBI_THROW(CAlgoAlignException, eBadParameter,
                   "Smith-Waterman not compatible with offsets provided");
    }

    if( m_SmithWaterman && ( !data->m_esf_L1 || !data->m_esf_R1 ||
                             !data->m_esf_L2 || !data->m_esf_R2 ) ) {
        NCBI_THROW(CAlgoAlignException, eBadParameter,
                   "Smith-Waterman not compatible with end gap penalties");
    }

    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF(N2);

    const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
            return 0;
        }
    }

    bool bFreeGapLeft1  = data->m_esf_L1 && data->m_offset1 == 0;
    bool bFreeGapRight1 = data->m_esf_R1 &&
                          m_SeqLen1 == data->m_offset1 + data->m_len1; 

    bool bFreeGapLeft2  = data->m_esf_L2 && data->m_offset2 == 0;
    bool bFreeGapRight2 = data->m_esf_R2 &&
                          m_SeqLen2 == data->m_offset2 + data->m_len2; 

    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;
    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);
    backtrace_matrix.SetAt(0, 0);

    // first row
    // note that stl_rowF[0] is not used in the main cycle,
    size_t k;
    stl_rowV[0] = wgleft1;
    for (k = 1; k < N2; ++k) {
        stl_rowV[k] = stl_rowV[k-1] + wsleft1;
        stl_rowF[k] = kInfMinus;
        backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
    }
    backtrace_matrix.Purge(k);
    stl_rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // gap penalties
    TScore wgleft2 (bFreeGapLeft2? 0: m_Wg);
    TScore wsleft2 (bFreeGapLeft2? 0: m_Ws);

    const char * seq1 = m_Seq1 + data->m_offset1;
    const char * seq1_end = seq1 + data->m_len1;

    TScore V0 = wgleft2;
    TScore V = 0;//best score in the current cell. Will be equal to the NW score at the end
    TScore best_V = 0;//best score in the whole matrix aka score for SW 

    --k;

    for(;  seq1 != seq1_end && !m_terminate;  ++seq1) {

        backtrace_matrix.SetAt(++k, kMaskFc);

        if( seq1 + 1 == seq1_end && bFreeGapRight1) {
                wg1 = ws1 = 0;
        }

        unsigned char tracer;
        const TNCBIScore * row_sc = sm[(size_t)*seq1];

        const char * seq2 = m_Seq2 + data->m_offset2;
        const char * seq2_end = seq2 + data->m_len2;
        TScore wg2 = m_Wg, ws2 = m_Ws;

        //best ending with gap in seq1 open  seq1 X- or extended seq1 X--
        //                                   seq2 XX             seq2 XXX
        TScore  E = kInfMinus;
        //best ending with gap in seq2
        TScore F;
        //total best with 
        //best ending with match    
        TScore G;
        //just temporary
        TScore n0;
        //total best
        TScore * rowV    = &stl_rowV[0];//previos row
        V = V0 += wsleft2;       //current row
        //best ending with match
        TScore * rowF    = &stl_rowF[0];

        for (; seq2 != seq2_end;) {
            
            G = *rowV + row_sc[(size_t)*seq2++];
            *rowV = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;  // open a new gap
                tracer = 0;
            }

            if( bFreeGapRight2 && seq2 == seq2_end ) {
                wg2 = ws2 = 0;
            }

            F = *++rowF;
            n0 = *++rowV + wg2;
            if(F >= n0) {
                F += ws2;
                tracer |= kMaskFc;
            }
            else {
                F = n0 + ws2;
            }
            *rowF = F;
            
            //best score
            if( G < F || ( G == F && m_GapPreference == eLater) ) {
                if( E <= F ) {
                    V = F;
                } else {
                    V = E;
                    tracer |= kMaskE;
                }
            } else if( E > G || ( E == G && m_GapPreference == eLater) ) {
                V = E;
                tracer |= kMaskE;
            } else {
                V = G;
                tracer |= kMaskD;
            }
            
            if (m_SmithWaterman && V < 0 ) {
                V = 0;
            }

            backtrace_matrix.SetAt(++k, tracer);

            if (V > best_V) {
                best_V = V;
                backtrace_matrix.SetBestPos(k);
            }
        }
        *rowV = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
                break;
            }
        }
    }

    backtrace_matrix.Purge(++k);
    backtrace_matrix.SetBestScore(best_V);

    /*
    //print the matrix out
    {{
    cout<<endl;
    int kk, ind1, ind2, width = 4;
    cout<<setw(width)<<" ";
    cout<<setw(width)<<"-";
    for(ind2 = 0; ind2 < N2-1; ++ind2) {
        cout<<setw(width)<<*(m_Seq2 + data->m_offset2 + ind2);
    }
    cout<<endl;
    for(kk = 0,ind1 = 0; ind1 < N1; ++ind1) {        
        if(ind1) { 
            cout<<setw(width)<<(m_Seq1 + data->m_offset1)[ind1-1];
        } else {
            cout<<setw(width)<<"-";
        }
        for(ind2 = 0; ind2 < N2; ++ind2,++kk) {
            string tstr;
            unsigned char Key (backtrace_matrix[kk]);
            if( Key & kMaskD ) tstr += "D";
            else if ( Key & kMaskE ) tstr += "E";
            else tstr += "F";
            if( Key & kMaskEc )  tstr += "-";            
            if( Key & kMaskFc )  tstr += "|";
            cout<<setw(width)<<tstr;
        }
        cout<<endl<<endl;
    }
    cout<<endl;
    }}
    //end of print the matrix out
    */

    if(!m_terminate) {
        x_SWDoBackTrace(backtrace_matrix, data);
        //check back trace
        TTranscript rv (data->m_transcript.size());
        copy(data->m_transcript.rbegin(), data->m_transcript.rend(), rv.begin());        
        if(m_SmithWaterman) {
            if( best_V != ScoreFromTranscript(rv,  data->m_offset1,  data->m_offset2) ) {
                NCBI_THROW(CAlgoAlignException, eInternal,
                           "CNWAligner: error in back trace");
            }
        } else {
            if( V != ScoreFromTranscript(rv,  data->m_offset1,  data->m_offset2) ) {
                NCBI_THROW(CAlgoAlignException, eInternal,
                           "CNWAligner: error in back trace");
            }
        }
    }

    if(m_SmithWaterman) {
        return best_V;
    }
    return V;
}
예제 #6
0
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data)
{
    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF(N2);

    TScore * rowV    = &stl_rowV[0];
    TScore * rowF    = &stl_rowF[0];

    TScore * pV = rowV - 1;

    const char * seq1 = m_Seq1 + data->m_offset1 - 1;
    const char * seq2 = m_Seq2 + data->m_offset2 - 1;

    const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
	  return 0;
	}
    }

    bool bFreeGapLeft1  = data->m_esf_L1 && data->m_offset1 == 0;
    bool bFreeGapRight1 = data->m_esf_R1 &&
                          m_SeqLen1 == data->m_offset1 + data->m_len1; 

    bool bFreeGapLeft2  = data->m_esf_L2 && data->m_offset2 == 0;
    bool bFreeGapRight2 = data->m_esf_R2 &&
                          m_SeqLen2 == data->m_offset2 + data->m_len2; 

    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;
    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);
    backtrace_matrix.SetAt(0, 0);

    // first row
    size_t k;
    rowV[0] = wgleft1;
    for (k = 1; k < N2; ++k) {
        rowV[k] = pV[k] + wsleft1;
        rowF[k] = kInfMinus;
        backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
    }
    backtrace_matrix.Purge(k);
    rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // recurrences
    TScore wgleft2 (bFreeGapLeft2? 0: m_Wg);
    TScore wsleft2 (bFreeGapLeft2? 0: m_Ws);
    TScore V  (rowV[N2 - 1]);
    TScore V0 (wgleft2);
    TScore E, G, n0;
    unsigned char tracer;

    size_t i, j;
    for(i = 1;  i < N1 && !m_terminate;  ++i) {
        
        V = V0 += wsleft2;
        E = kInfMinus;
        backtrace_matrix.SetAt(k++, kMaskFc);
        unsigned char ci = seq1[i];

        if(i == N1 - 1 && bFreeGapRight1) {
                wg1 = ws1 = 0;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = 1; j < N2; ++j, ++k) {

            G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;  // open a new gap
                tracer = 0;
            }

            if(j == N2 - 1 && bFreeGapRight2) {
                wg2 = ws2 = 0;
            }
            n0 = rowV[j] + wg2;
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + ws2;
            }

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }

            backtrace_matrix.SetAt(k, tracer);
        }

        pV[j] = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
                break;
            }
        }
    }

    backtrace_matrix.Purge(k);

    if(!m_terminate) {
        x_DoBackTrace(backtrace_matrix, data);
    }

    return V;
}