Example #1
0
CNWAligner::TScore CPSSMAligner::x_AlignPSSM(SAlignInOut* data)
{
    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF(N2);

    TScore* rowV    = &stl_rowV[0];
    TScore* rowF    = &stl_rowF[0];

    TScore* pV = rowV - 1;

    const TScore** pssm_row = m_Pssm1 + data->m_offset1 - 1;
    const char* seq2 = m_Seq2 + data->m_offset2 - 1;

    m_terminate = false;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if(m_terminate = m_prg_callback(&m_prg_info)) {
	  return 0;
	}
    }

    TScore wg1L = m_Wg;
    TScore wg1R = m_Wg;
    TScore wg2L = m_Wg;
    TScore wg2R = m_Wg;

    TScore ws1L = m_Ws;
    TScore ws1R = m_Ws;
    TScore ws2L = m_Ws;
    TScore ws2R = m_Ws;

    if (data->m_offset1 == 0) {
        if (data->m_esf_L1) {
            wg1L = ws1L = 0;
        }
        else {
            wg1L = m_StartWg;
            ws1L = m_StartWs;
        }
    }

    if (m_SeqLen1 == data->m_offset1 + data->m_len1) {
        if (data->m_esf_R1) {
            wg1R = ws1R = 0;
        }
        else {
            wg1R = m_EndWg;
            ws1R = m_EndWs;
        }
    }

    if (data->m_offset2 == 0) {
        if (data->m_esf_L2) {
            wg2L = ws2L = 0;
        }
        else {
            wg2L = m_StartWg;
            ws2L = m_StartWs;
        }
    }

    if (m_SeqLen2 == data->m_offset2 + data->m_len2) {
        if (data->m_esf_R2) {
            wg2R = ws2R = 0;
        }
        else {
            wg2R = m_EndWg;
            ws2R = m_EndWs;
        }
    }

    TScore wgleft1   = wg1L;
    TScore wsleft1   = ws1L;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);
    backtrace_matrix.SetAt(0, 0);

    // first row
    size_t k;
    rowV[0] = wgleft1;
    for (k = 1; k < N2; k++) {
        rowV[k] = pV[k] + wsleft1;
        rowF[k] = kInfMinus;
        backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
    }
    backtrace_matrix.Purge(k);
    rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // recurrences
    TScore wgleft2   = wg2L;
    TScore wsleft2   = ws2L;
    TScore V  = rowV[N2 - 1];
    TScore V0 = wgleft2;
    TScore E, G, n0;
    unsigned char tracer;

    size_t i, j;
    for(i = 1;  i < N1 && !m_terminate;  ++i) {
        
        V = V0 += wsleft2;
        E = kInfMinus;
        backtrace_matrix.SetAt(k++, kMaskFc);

        if(i == N1 - 1) {
            wg1 = wg1R;
            ws1 = ws1R;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = 1; j < N2; ++j, ++k) {

            G = pV[j] + pssm_row[i][(unsigned char)seq2[j]];

            pV[j] = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;
                tracer = 0;
            }

            if(j == N2 - 1) {
                wg2 = wg2R;
                ws2 = ws2R;
            }
            n0 = rowV[j] + wg2;
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + ws2;
            }

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }
            backtrace_matrix.SetAt(k, tracer);
        }

        pV[j] = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if(m_terminate = m_prg_callback(&m_prg_info)) {
                break;
            }
        }  
    }
    backtrace_matrix.Purge(k);

    if(!m_terminate) {
        x_DoBackTrace(backtrace_matrix, data);
    }
    return V;
}
Example #2
0
CNWAligner::TScore CPSSMAligner::x_AlignProfile(SAlignInOut* data)
{
    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<double> stl_rowV (N2), stl_rowF(N2);

    double* rowV    = &stl_rowV[0];
    double* rowF    = &stl_rowF[0];

    double* pV = rowV - 1;

    const double** freq1_row = m_Freq1 + data->m_offset1 - 1;
    const double** freq2_row = m_Freq2 + data->m_offset2 - 1;

    m_terminate = false;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if(m_terminate = m_prg_callback(&m_prg_info)) {
	  return 0;
	}
    }

    TScore wg1L = m_Wg;
    TScore wg1R = m_Wg;
    TScore wg2L = m_Wg;
    TScore wg2R = m_Wg;

    TScore ws1L = m_Ws;
    TScore ws1R = m_Ws;
    TScore ws2L = m_Ws;
    TScore ws2R = m_Ws;

    if (data->m_offset1 == 0) {
        if (data->m_esf_L1) {
            wg1L = ws1L = 0;
        }
        else {
            wg1L = m_StartWg;
            ws1L = m_StartWs;
        }
    }

    if (m_SeqLen1 == data->m_offset1 + data->m_len1) {
        if (data->m_esf_R1) {
            wg1R = ws1R = 0;
        }
        else {
            wg1R = m_EndWg;
            ws1R = m_EndWs;
        }
    }

    if (data->m_offset2 == 0) {
        if (data->m_esf_L2) {
            wg2L = ws2L = 0;
        }
        else {
            wg2L = m_StartWg;
            ws2L = m_StartWs;
        }
    }

    if (m_SeqLen2 == data->m_offset2 + data->m_len2) {
        if (data->m_esf_R2) {
            wg2R = ws2R = 0;
        }
        else {
            wg2R = m_EndWg;
            ws2R = m_EndWs;
        }
    }

    TScore wgleft1   = wg1L;
    TScore wsleft1   = ws1L;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);

    // first row
    size_t k = 1;
    if (N2 > 1) {
        rowV[0] = wgleft1 * (1.0 - freq2_row[1][0]);
        for (k = 1; k < N2; k++) {
            rowV[k] = pV[k] + wsleft1;
            rowF[k] = kInfMinus;
            backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
        }
        backtrace_matrix.Purge(k);
    }
    rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // recurrences
    TScore wgleft2   = wg2L;
    TScore wsleft2   = ws2L;
    double V  = rowV[N2 - 1];
    double V0 = 0;
    double E, G, n0;
    unsigned char tracer;

    if (N1 > 1)
        V0 = wgleft2 * (1.0 - freq1_row[1][0]);

    size_t i, j;
    for(i = 1;  i < N1 && !m_terminate;  ++i) {
        
        V = V0 += wsleft2;
        E = kInfMinus;
        backtrace_matrix.SetAt(k++, kMaskFc);

        if(i == N1 - 1) {
            wg1 = wg1R;
            ws1 = ws1R;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = 1; j < N2; ++j, ++k) {

            if(j == N2 - 1) {
                wg2 = wg2R;
                ws2 = ws2R;
            }
            const double *profile1 = freq1_row[i];
            const double *profile2 = freq2_row[j];
            const double scaled_wg1 = wg1 * (1.0 - profile2[0]);
            const double scaled_ws1 = ws1;
            const double scaled_wg2 = wg2 * (1.0 - profile1[0]);
            const double scaled_ws2 = ws2;
            
            double accum = 0.0, sum = 0.0;
            int num_zeros1 = 0, num_zeros2 = 0;
            double diff_freq1[kPSSM_ColumnSize];
            double diff_freq2[kPSSM_ColumnSize];

            // separate the residue frequencies into two components:
            // a component that is the same for both columns, and
            // a component that is different. The all-against-all
            // score computation only takes place on the components
            // that are different, so this will assign a higher score
            // to more similar frequency columns
            //
            // Begin by separating out the common portion of each
            // profile

            for (int m = 1; m < kPSSM_ColumnSize; m++) {
                if (profile1[m] < profile2[m]) {
                    accum += profile1[m] * m_DScoreMatrix[m][m];
                    diff_freq1[m] = 0.0;
                    diff_freq2[m] = profile2[m] - profile1[m];
                    num_zeros1++;
                }
                else {
                    accum += profile2[m] * m_DScoreMatrix[m][m];
                    diff_freq1[m] = profile1[m] - profile2[m];
                    diff_freq2[m] = 0.0;
                    num_zeros2++;
                }
            }

            // normalize difference for profile with smaller gap
            if (profile1[0] <= profile2[0]) {
                for (int m = 1; m < kPSSM_ColumnSize; m++)
                    sum += diff_freq1[m];
            } else {
                for (int m = 1; m < kPSSM_ColumnSize; m++)
                    sum += diff_freq2[m];
            }

            if (sum > 0) {
                sum = 1.0 / sum;
                if (profile1[0] <= profile2[0]) {
                    for (int m = 1; m < kPSSM_ColumnSize; m++)
                        diff_freq1[m] *= sum;
                } else {
                    for (int m = 1; m < kPSSM_ColumnSize; m++)
                        diff_freq2[m] *= sum;
                }

                // Add in the cross terms (not counting gaps).
                // Note that the following assumes a symmetric
                // score matrix

                if (num_zeros1 > num_zeros2) {
                    for (int m = 1; m < kPSSM_ColumnSize; m++) {
                        if (diff_freq1[m] > 0) {
                            sum = 0.0;
                            double *matrix_row = m_DScoreMatrix[m];
                            for (int n = 1; n < kPSSM_ColumnSize; n++) {
                                sum += diff_freq2[n] * matrix_row[n];
                            }
                            accum += diff_freq1[m] * sum;
                        }
                    }
                } else {
                    for (int m = 1; m < kPSSM_ColumnSize; m++) {
                        if (diff_freq2[m] > 0) {
                            sum = 0.0;
                            double *matrix_row = m_DScoreMatrix[m];
                            for (int n = 1; n < kPSSM_ColumnSize; n++) {
                                sum += diff_freq1[n] * matrix_row[n];
                            }
                            accum += diff_freq2[m] * sum;
                        }
                    }
                }
            }

            G = pV[j] + accum * m_FreqScale +
                            profile1[0] * m_Ws * (1-profile2[0]) +
                            profile2[0] * m_Ws * (1-profile1[0]);

            pV[j] = V;

            n0 = V + scaled_wg1;
            if(E >= n0) {
                E += scaled_ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + scaled_ws1;  // open a new gap
                tracer = 0;
            }

            n0 = rowV[j] + scaled_wg2;
            if(rowF[j] >= n0) {
                rowF[j] += scaled_ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + scaled_ws2;
            }

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }
            backtrace_matrix.SetAt(k, tracer);
        }

        pV[j] = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if(m_terminate = m_prg_callback(&m_prg_info)) {
                break;
            }
        }
    }
    backtrace_matrix.Purge(k);

    if(!m_terminate) {
        x_DoBackTrace(backtrace_matrix, data);
    }
    return (TScore)(V + 0.5);
}
Example #3
0
CNWAligner::TScore CMMAligner::x_RunTerm(const SCoordRect& rect,
                                         bool left_top, bool right_bottom,
                                         list<ETranscriptSymbol>& subpath)
{
    if( m_terminate ) {
        return 0;
    }

    const size_t N1 = rect.i2 - rect.i1 + 2;
    const size_t N2 = rect.j2 - rect.j1 + 2;

    vector<TScore> stl_rowV (N2), stl_rowF (N2);
    TScore* rowV    = &stl_rowV [0];
    TScore* rowF    = &stl_rowF [0];

    // index calculation: [i,j] = i*n2 + j
    vector<unsigned char> stl_bm (N1*N2);
    unsigned char* backtrace = &stl_bm[0];

    TScore* pV = rowV - 1;

    const char* seq1 = m_Seq1 + rect.i1 - 1;
    const char* seq2 = m_Seq2 + rect.j1 - 1;

    const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    bool bFreeGapLeft1  = m_esf_L1 && rect.i1 == 0;
    bool bFreeGapRight1 = m_esf_R1 && rect.i2 == m_SeqLen1 - 1;
    bool bFreeGapLeft2  = m_esf_L2 && rect.j1 == 0;
    bool bFreeGapRight2 = m_esf_R2 && rect.j2 == m_SeqLen2 - 1;

    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;
    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // first row
    size_t k;
    {
        rowV[0] = wgleft1;
        for (k = 1; k < N2; k++) {
            rowV[k] = pV[k] + wsleft1;
            rowF[k] = kInfMinus;
            backtrace[k] = kMaskE | kMaskEc;
        }
        rowV[0] = 0;
    }

    // recurrences
    TScore wgleft2   = bFreeGapLeft2? 0: m_Wg;
    TScore wsleft2   = bFreeGapLeft2? 0: m_Ws;
    TScore V  = 0;
    TScore V0 = left_top? 0: wgleft2;
    TScore E, G, n0;
    unsigned char tracer;

    size_t i, j;
    for(i = 1;  i < N1;  ++i) {
        
        V = V0 += wsleft2;
        E = kInfMinus;
        backtrace[k++] = kMaskFc;
        unsigned char ci = seq1[i];

        if(i == N1 - 1 && bFreeGapRight1) {
                wg1 = ws1 = 0;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = 1; j < N2; ++j, ++k) {

            G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;  // open a new gap
                tracer = 0;
            }

            if(j == N2 - 1 && bFreeGapRight2) {
                wg2 = ws2 = 0;
            }
            n0 = rowV[j] + ((right_bottom && j == N2 - 1)? 0: wg2);
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + ws2;
            }

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }
            backtrace[k] = tracer;
        }

        pV[j] = V;
    }

    // fill the subpath
    subpath.clear();
    
    // run backtrace
    k = N1*N2 - 1;
    while (k != 0) {
        unsigned char Key = backtrace[k];
        if (Key & kMaskD) {
            subpath.push_front(eTS_Match);
            k -= N2 + 1;
        }
        else if (Key & kMaskE) {
            subpath.push_front(eTS_Insert); --k;
            while(k > 0 && (Key & kMaskEc)) {
                subpath.push_front(eTS_Insert);
                Key = backtrace[k--];
            }
        }
        else {
            subpath.push_front(eTS_Delete);
            k -= N2;
            while(k > 0 && (Key & kMaskFc)) {
                subpath.push_front(eTS_Delete);
                Key = backtrace[k];
                k -= N2;
            }
        }
    }

    return V;
}
Example #4
0
void CMMAligner::x_RunBtm(const SCoordRect& rect,
             vector<TScore>& vE, vector<TScore>& vF, vector<TScore>& vG,
             vector<unsigned char>& trace, bool rb) const
{
    if( m_terminate ) {
        return;
    }

    const size_t dim1 = rect.i2 - rect.i1 + 1;
    const size_t dim2 = rect.j2 - rect.j1 + 1;
    const size_t N1   = dim1 + 1;
    const size_t N2   = dim2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF (N2);
    TScore* rowV    = &stl_rowV [0];
    TScore* rowF    = &stl_rowF [0];

    TScore* pV = rowV + 1;

    const char* seq1 = m_Seq1 + rect.i1;
    const char* seq2 = m_Seq2 + rect.j1;

    const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    bool bFreeGapRight1  = m_esf_R1 && rect.i2 == m_SeqLen1 - 1;
    bool bFreeGapRight2  = m_esf_R2 && rect.j2 == m_SeqLen2 - 1;
    bool bFreeGapLeft2  =  m_esf_L2 && rect.j1 == 0;

    // progress reporting

    const size_t prg_rep_rate = 100;
    const size_t prg_rep_increment = prg_rep_rate*N2;

    // bottom row

    TScore wg = bFreeGapRight1? 0: m_Wg;
    TScore ws = bFreeGapRight1? 0: m_Ws;

    rowV[N2 - 1] = wg;
    int i, j;
    for (j = N2 - 2; j >= 0; --j) {
        rowV[j] = pV[j] + ws;
        rowF[j] = kInfMinus;
    }
    rowV[N2 - 1] = 0;

    // recurrences

    wg = bFreeGapRight2? 0: m_Wg;
    ws = bFreeGapRight2? 0: m_Ws;

    TScore V  = 0;
    TScore V0 = rb? 0: wg;
    TScore E, G, n0;

    for(i = N1 - 2;  i > 0;  --i) {
        
        V = V0 += ws;
        E = kInfMinus;
        unsigned char ci = seq1[i];

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = N2 - 2; j >= 0; --j) {

            G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + m_Wg;
            if(E >= n0)
                E += m_Ws;      // continue the gap
            else
                E = n0 + m_Ws;  // open a new gap

            if(j == 0 && bFreeGapLeft2) {
                wg2 = ws2 = 0;
            }

            n0 = rowV[j] + wg2;
            if (rowF[j] > n0)
                rowF[j] += ws2;
            else
                rowF[j] = n0 + ws2;

            V = (E >= rowF[j])? (E >= G? E: G): (rowF[j] >= G? rowF[j]: G);
        }
        pV[j] = V;

        if( m_prg_callback && (N1 - i) % prg_rep_rate == 0 ) {
#ifdef NCBI_THREADS
            CFastMutexGuard guard (progress_mutex);
#endif
            m_prg_info.m_iter_done += prg_rep_increment;
            if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
                break;
            }
        }
    }

    // the top row (i == 0)
    if(!m_terminate) {

        vF[N2-1] = V = V0 += ws;
        vG[N2-1] = vE[N2-1] = E = kInfMinus;
        trace[N2-1] = kMaskFc;
        unsigned char ci = seq1[i];

        TScore wg2 = m_Wg, ws2 = m_Ws;

        unsigned char tracer;
        for (j = N2 - 2; j >= 0; --j) {

            vG[j] = G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + m_Wg;
            if(E >= n0) {
                E += m_Ws;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + m_Ws;  // open a new gap
                tracer = 0;
            }
            vE[j] = E;

            if(j == 0 && bFreeGapLeft2) {
                wg2 = ws2 = 0;
            }

            n0 = rowV[j] + wg2;
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + ws2;
            }
            vF[j] = rowF[j];

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }
            trace[j] = tracer;
        }
    }

    if( m_prg_callback ) {
#ifdef NCBI_THREADS
        CFastMutexGuard guard (progress_mutex);
#endif
        m_prg_info.m_iter_done += (N1 - i) % prg_rep_rate;
        m_terminate = m_prg_callback(&m_prg_info);
    }
}
Example #5
0
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data)
{


    //check data integrity

    if( m_SmithWaterman && ( data->m_offset1 || m_SeqLen1 != data->m_len1 ||
                             data->m_offset2 || m_SeqLen2 != data->m_len2 ) ) {
        NCBI_THROW(CAlgoAlignException, eBadParameter,
                   "Smith-Waterman not compatible with offsets provided");
    }

    if( m_SmithWaterman && ( !data->m_esf_L1 || !data->m_esf_R1 ||
                             !data->m_esf_L2 || !data->m_esf_R2 ) ) {
        NCBI_THROW(CAlgoAlignException, eBadParameter,
                   "Smith-Waterman not compatible with end gap penalties");
    }

    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF(N2);

    const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
            return 0;
        }
    }

    bool bFreeGapLeft1  = data->m_esf_L1 && data->m_offset1 == 0;
    bool bFreeGapRight1 = data->m_esf_R1 &&
                          m_SeqLen1 == data->m_offset1 + data->m_len1; 

    bool bFreeGapLeft2  = data->m_esf_L2 && data->m_offset2 == 0;
    bool bFreeGapRight2 = data->m_esf_R2 &&
                          m_SeqLen2 == data->m_offset2 + data->m_len2; 

    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;
    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);
    backtrace_matrix.SetAt(0, 0);

    // first row
    // note that stl_rowF[0] is not used in the main cycle,
    size_t k;
    stl_rowV[0] = wgleft1;
    for (k = 1; k < N2; ++k) {
        stl_rowV[k] = stl_rowV[k-1] + wsleft1;
        stl_rowF[k] = kInfMinus;
        backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
    }
    backtrace_matrix.Purge(k);
    stl_rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // gap penalties
    TScore wgleft2 (bFreeGapLeft2? 0: m_Wg);
    TScore wsleft2 (bFreeGapLeft2? 0: m_Ws);

    const char * seq1 = m_Seq1 + data->m_offset1;
    const char * seq1_end = seq1 + data->m_len1;

    TScore V0 = wgleft2;
    TScore V = 0;//best score in the current cell. Will be equal to the NW score at the end
    TScore best_V = 0;//best score in the whole matrix aka score for SW 

    --k;

    for(;  seq1 != seq1_end && !m_terminate;  ++seq1) {

        backtrace_matrix.SetAt(++k, kMaskFc);

        if( seq1 + 1 == seq1_end && bFreeGapRight1) {
                wg1 = ws1 = 0;
        }

        unsigned char tracer;
        const TNCBIScore * row_sc = sm[(size_t)*seq1];

        const char * seq2 = m_Seq2 + data->m_offset2;
        const char * seq2_end = seq2 + data->m_len2;
        TScore wg2 = m_Wg, ws2 = m_Ws;

        //best ending with gap in seq1 open  seq1 X- or extended seq1 X--
        //                                   seq2 XX             seq2 XXX
        TScore  E = kInfMinus;
        //best ending with gap in seq2
        TScore F;
        //total best with 
        //best ending with match    
        TScore G;
        //just temporary
        TScore n0;
        //total best
        TScore * rowV    = &stl_rowV[0];//previos row
        V = V0 += wsleft2;       //current row
        //best ending with match
        TScore * rowF    = &stl_rowF[0];

        for (; seq2 != seq2_end;) {
            
            G = *rowV + row_sc[(size_t)*seq2++];
            *rowV = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;  // open a new gap
                tracer = 0;
            }

            if( bFreeGapRight2 && seq2 == seq2_end ) {
                wg2 = ws2 = 0;
            }

            F = *++rowF;
            n0 = *++rowV + wg2;
            if(F >= n0) {
                F += ws2;
                tracer |= kMaskFc;
            }
            else {
                F = n0 + ws2;
            }
            *rowF = F;
            
            //best score
            if( G < F || ( G == F && m_GapPreference == eLater) ) {
                if( E <= F ) {
                    V = F;
                } else {
                    V = E;
                    tracer |= kMaskE;
                }
            } else if( E > G || ( E == G && m_GapPreference == eLater) ) {
                V = E;
                tracer |= kMaskE;
            } else {
                V = G;
                tracer |= kMaskD;
            }
            
            if (m_SmithWaterman && V < 0 ) {
                V = 0;
            }

            backtrace_matrix.SetAt(++k, tracer);

            if (V > best_V) {
                best_V = V;
                backtrace_matrix.SetBestPos(k);
            }
        }
        *rowV = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
                break;
            }
        }
    }

    backtrace_matrix.Purge(++k);
    backtrace_matrix.SetBestScore(best_V);

    /*
    //print the matrix out
    {{
    cout<<endl;
    int kk, ind1, ind2, width = 4;
    cout<<setw(width)<<" ";
    cout<<setw(width)<<"-";
    for(ind2 = 0; ind2 < N2-1; ++ind2) {
        cout<<setw(width)<<*(m_Seq2 + data->m_offset2 + ind2);
    }
    cout<<endl;
    for(kk = 0,ind1 = 0; ind1 < N1; ++ind1) {        
        if(ind1) { 
            cout<<setw(width)<<(m_Seq1 + data->m_offset1)[ind1-1];
        } else {
            cout<<setw(width)<<"-";
        }
        for(ind2 = 0; ind2 < N2; ++ind2,++kk) {
            string tstr;
            unsigned char Key (backtrace_matrix[kk]);
            if( Key & kMaskD ) tstr += "D";
            else if ( Key & kMaskE ) tstr += "E";
            else tstr += "F";
            if( Key & kMaskEc )  tstr += "-";            
            if( Key & kMaskFc )  tstr += "|";
            cout<<setw(width)<<tstr;
        }
        cout<<endl<<endl;
    }
    cout<<endl;
    }}
    //end of print the matrix out
    */

    if(!m_terminate) {
        x_SWDoBackTrace(backtrace_matrix, data);
        //check back trace
        TTranscript rv (data->m_transcript.size());
        copy(data->m_transcript.rbegin(), data->m_transcript.rend(), rv.begin());        
        if(m_SmithWaterman) {
            if( best_V != ScoreFromTranscript(rv,  data->m_offset1,  data->m_offset2) ) {
                NCBI_THROW(CAlgoAlignException, eInternal,
                           "CNWAligner: error in back trace");
            }
        } else {
            if( V != ScoreFromTranscript(rv,  data->m_offset1,  data->m_offset2) ) {
                NCBI_THROW(CAlgoAlignException, eInternal,
                           "CNWAligner: error in back trace");
            }
        }
    }

    if(m_SmithWaterman) {
        return best_V;
    }
    return V;
}
// Evaluate dynamic programming matrix. Create transcript.
CNWAligner::TScore CSplicedAligner32::x_Align (SAlignInOut* data)
{
    // use the banded version if there is no space for introns
    const int len_dif (data->m_len2 - data->m_len1);
    if(len_dif < 2 * int (m_IntronMinSize) / 3) {
        const Uint1 where  (len_dif < 0? 0: 1);
        const size_t shift (abs(len_dif) / 2);
        const size_t band  (abs(len_dif) + 2*(max(data->m_len1,data->m_len2)/20 + 1));
        SetShift(where, shift);
        SetBand(band);
        return CBandAligner::x_Align(data);
    }

    // redefine TScore as a floating-point type for this procedure only
    typedef double TScore;
    const TScore cds_penalty_extra = -2e-6;

    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF (N2);
    TScore* rowV    = &stl_rowV[0];
    TScore* rowF    = &stl_rowF[0];

    // index calculation: [i,j] = i*n2 + j
    SAllocator<Uint4> alloc_bm (N1*N2);
    Uint4* backtrace_matrix (alloc_bm.GetPointer());

    TScore* pV = rowV - 1;

    const char* seq1   = m_Seq1 + data->m_offset1 - 1;
    const char* seq2   = m_Seq2 + data->m_offset2 - 1;

    const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    bool bFreeGapLeft1  = data->m_esf_L1 && data->m_offset1 == 0;
    bool bFreeGapRight1 = data->m_esf_R1 &&
                          m_SeqLen1 == data->m_offset1 + data->m_len1;

    bool bFreeGapLeft2  = data->m_esf_L2 && data->m_offset1 == 0;
    bool bFreeGapRight2 = data->m_esf_R2 &&
                          m_SeqLen2 == data->m_offset2 + data->m_len2;

    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;
    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;
    TScore wg1 = wgleft1, ws1 = wsleft1;

    // recurrences
    TScore wgleft2   = bFreeGapLeft2? 0: m_Wg;
    TScore wsleft2   = bFreeGapLeft2? 0: m_Ws;
    TScore V  = 0;
    TScore V0 = 0;
    TScore E, G, n0;
    Uint4 type;

    // store candidate donors
    size_t* jAllDonors [splice_type_count_32];
    TScore* vAllDonors [splice_type_count_32];
    vector<size_t> stl_jAllDonors (splice_type_count_32 * N2);
    vector<TScore> stl_vAllDonors (splice_type_count_32 * N2);
    for(unsigned char st = 0; st < splice_type_count_32; ++st) {
        jAllDonors[st] = &stl_jAllDonors[st*N2];
        vAllDonors[st] = &stl_vAllDonors[st*N2];
    }
    size_t  jTail[splice_type_count_32], jHead[splice_type_count_32];
    TScore  vBestDonor   [splice_type_count_32];
    size_t  jBestDonor   [splice_type_count_32] = {0};

    // place to store gap opening starts
    size_t ins_start;
    vector<size_t> stl_del_start(N2);
    size_t* del_start = &stl_del_start[0];

    // donor/acceptor matrix
    const Uint1 * dnr_acc_matrix = g_dnr_acc_matrix.GetMatrix();

    // fake row (above lambda)
    rowV[0] = kInfMinus;
    size_t k;
    for (k = 0; k < N2; k++) {
        rowV[k] = rowF[k] = kInfMinus;
	del_start[k] = k;
    }
    k = 0;

    size_t cds_start = m_cds_start, cds_stop = m_cds_stop;
    if(cds_start < cds_stop) {
        cds_start -= data->m_offset1;
        cds_stop -= data->m_offset1;
    }

    size_t i, j = 0, k0;
    unsigned char ci;
    for(i = 0;  i < N1;  ++i, j = 0) {

        V = i > 0? (V0 += wsleft2) : 0;
        E = kInfMinus;
        ins_start = k0 = k;
        backtrace_matrix[k++] = kTypeGap; // | del_start[0]
        ci = i > 0? seq1[i]: 'N';

        for(unsigned char st = 0; st < splice_type_count_32; ++st) {
            jTail[st] = jHead[st] = 0;
            vBestDonor[st] = kInfMinus;
        }

        if(i == N1 - 1 && bFreeGapRight1) {
                wg1 = ws1 = 0;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;
            
        // detect donor candidate
        if(N2 > 2) {
            unsigned char d1 = seq2[1], d2 = seq2[2];
            Uint1 dnr_type = 0xF0 & dnr_acc_matrix[(size_t(d1)<<8)|d2];

            for(Uint1 st = 0; st < splice_type_count_32; ++st ) {
                jAllDonors[st][jTail[st]] = j;
                if(dnr_type & (0x10 << st)) {
                    vAllDonors[st][jTail[st]] = 
                        ( d1 == g_nwspl32_donor[st][0] &&
                          d2 == g_nwspl32_donor[st][1] ) ? V: (V + m_Wd1);
                }
                else { // both chars distorted
                    vAllDonors[st][jTail[st]] = V + m_Wd2;
                }
                ++(jTail[st]);
            }
        }

        if(cds_start <= i && i < cds_stop) {

            if(i != 0 || ! bFreeGapLeft1) {
                ws1 += cds_penalty_extra;
            }
            if(j != 0 || ! bFreeGapLeft2) {
                ws2 += cds_penalty_extra;
            }
        }

        for (j = 1; j < N2; ++j, ++k) {
            
            G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;      // continue the gap
            }
            else {
                E = n0 + ws1;  // open a new gap
		ins_start = k-1;
            }

            if(j == N2 - 1 && bFreeGapRight2) {
                wg2 = ws2 = 0;
            }
            n0 = rowV[j] + wg2;
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
            }
            else {
                rowF[j] = n0 + ws2;
                del_start[j] = k-N2;
            }

            // evaluate the score (V)
            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    type = kTypeGap | ins_start;
                }
                else {
                    V = G;
                    type = kTypeDiag;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                    type = kTypeGap | del_start[j];
                }
                else {
                    V = G;
                    type = kTypeDiag;
                }
            }

            // find out if there are new donors
            for(unsigned char st = 0; st < splice_type_count_32; ++st) {

                if(jTail[st] > jHead[st])  {
                    if(j - jAllDonors[st][jHead[st]] >= m_IntronMinSize) {
                        if(vAllDonors[st][jHead[st]] > vBestDonor[st]) {
                            vBestDonor[st] = vAllDonors[st][jHead[st]];
                            jBestDonor[st] = jAllDonors[st][jHead[st]];
                        }
                        ++(jHead[st]);
                    }
                }
            }
                
            // check splice signal
            Uint4 dnr_pos = kMax_UI4;
            unsigned char c1 = seq2[j-1], c2 = seq2[j];
            Uint1 acc_mask = 0x0F & dnr_acc_matrix[(size_t(c1)<<8)|c2];
            for(Uint1 st = 0; st < splice_type_count_32; ++st ) {
                if(acc_mask & (0x01 << st)) {
                    TScore vAcc = vBestDonor[st] + m_Wi[st];
                    if( c1 != g_nwspl32_acceptor[st][0] ||
                        c2 != g_nwspl32_acceptor[st][1] ) {

                        vAcc += m_Wd1;
                    }
                    if(vAcc > V) {
                        V = vAcc;
                        dnr_pos = k0 + jBestDonor[st];
                    }
                }
                else {   // try arbitrary splice
                    TScore vAcc = vBestDonor[st] + m_Wi[st] + m_Wd2;
                    if(vAcc > V) {
                        V = vAcc;
                        dnr_pos = k0 + jBestDonor[st];
                    }
                }
            }
            
            if(dnr_pos != kMax_UI4) {
                type = kTypeIntron | dnr_pos;
            }

            backtrace_matrix[k] = type;

            // detect donor candidates
            if(j < N2 - 2) {
                unsigned char d1 = seq2[j+1], d2 = seq2[j+2];
                Uint1 dnr_mask = 0xF0 & dnr_acc_matrix[(size_t(d1)<<8)|d2];
                for(Uint1 st = 0; st < splice_type_count_32; ++st ) {
                    if( dnr_mask & (0x10 << st) ) {
                        if( d1 == g_nwspl32_donor[st][0] &&
                            d2 == g_nwspl32_donor[st][1] ) {

                            if(V > vBestDonor[st]) {
                                jAllDonors[st][jTail[st]] = j;
                                vAllDonors[st][jTail[st]] = V;
                                ++(jTail[st]);
                            }
                        } else {
                            TScore v = V + m_Wd1;
                            if(v > vBestDonor[st]) {
                                jAllDonors[st][jTail[st]] = j;
                                vAllDonors[st][jTail[st]] = v;
                                ++(jTail[st]);
                            }
                        }
                    }
                    else { // both chars distorted
                        TScore v = V + m_Wd2;
                        if(v > vBestDonor[st]) {
                            jAllDonors[st][jTail[st]] = j;
                            vAllDonors[st][jTail[st]] = v;
                            ++(jTail[st]);
                        }
                    }
                }
            }
        }

        pV[j] = V;

        if(i == 0) {
            V0 = wgleft2;
            wg1 = m_Wg;
            ws1 = m_Ws;
        }

    }

    try {
        x_DoBackTrace(backtrace_matrix, data);
    }
    catch(exception&) { // GCC hack
      throw;
    }
    
    return CNWAligner::TScore(V);
}
Example #7
0
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data)
{
    const size_t N1 = data->m_len1 + 1;
    const size_t N2 = data->m_len2 + 1;

    vector<TScore> stl_rowV (N2), stl_rowF(N2);

    TScore * rowV    = &stl_rowV[0];
    TScore * rowF    = &stl_rowF[0];

    TScore * pV = rowV - 1;

    const char * seq1 = m_Seq1 + data->m_offset1 - 1;
    const char * seq2 = m_Seq2 + data->m_offset2 - 1;

    const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;

    if(m_prg_callback) {
        m_prg_info.m_iter_total = N1*N2;
        m_prg_info.m_iter_done = 0;
        if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
	  return 0;
	}
    }

    bool bFreeGapLeft1  = data->m_esf_L1 && data->m_offset1 == 0;
    bool bFreeGapRight1 = data->m_esf_R1 &&
                          m_SeqLen1 == data->m_offset1 + data->m_len1; 

    bool bFreeGapLeft2  = data->m_esf_L2 && data->m_offset2 == 0;
    bool bFreeGapRight2 = data->m_esf_R2 &&
                          m_SeqLen2 == data->m_offset2 + data->m_len2; 

    TScore wgleft1   = bFreeGapLeft1? 0: m_Wg;
    TScore wsleft1   = bFreeGapLeft1? 0: m_Ws;
    TScore wg1 = m_Wg, ws1 = m_Ws;

    // index calculation: [i,j] = i*n2 + j
    CBacktraceMatrix4 backtrace_matrix (N1 * N2);
    backtrace_matrix.SetAt(0, 0);

    // first row
    size_t k;
    rowV[0] = wgleft1;
    for (k = 1; k < N2; ++k) {
        rowV[k] = pV[k] + wsleft1;
        rowF[k] = kInfMinus;
        backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
    }
    backtrace_matrix.Purge(k);
    rowV[0] = 0;
	
    if(m_prg_callback) {
        m_prg_info.m_iter_done = k;
        m_terminate = m_prg_callback(&m_prg_info);
    }

    // recurrences
    TScore wgleft2 (bFreeGapLeft2? 0: m_Wg);
    TScore wsleft2 (bFreeGapLeft2? 0: m_Ws);
    TScore V  (rowV[N2 - 1]);
    TScore V0 (wgleft2);
    TScore E, G, n0;
    unsigned char tracer;

    size_t i, j;
    for(i = 1;  i < N1 && !m_terminate;  ++i) {
        
        V = V0 += wsleft2;
        E = kInfMinus;
        backtrace_matrix.SetAt(k++, kMaskFc);
        unsigned char ci = seq1[i];

        if(i == N1 - 1 && bFreeGapRight1) {
                wg1 = ws1 = 0;
        }

        TScore wg2 = m_Wg, ws2 = m_Ws;

        for (j = 1; j < N2; ++j, ++k) {

            G = pV[j] + sm[ci][(unsigned char)seq2[j]];
            pV[j] = V;

            n0 = V + wg1;
            if(E >= n0) {
                E += ws1;      // continue the gap
                tracer = kMaskEc;
            }
            else {
                E = n0 + ws1;  // open a new gap
                tracer = 0;
            }

            if(j == N2 - 1 && bFreeGapRight2) {
                wg2 = ws2 = 0;
            }
            n0 = rowV[j] + wg2;
            if(rowF[j] >= n0) {
                rowF[j] += ws2;
                tracer |= kMaskFc;
            }
            else {
                rowF[j] = n0 + ws2;
            }

            if (E >= rowF[j]) {
                if(E >= G) {
                    V = E;
                    tracer |= kMaskE;
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            } else {
                if(rowF[j] >= G) {
                    V = rowF[j];
                }
                else {
                    V = G;
                    tracer |= kMaskD;
                }
            }

            backtrace_matrix.SetAt(k, tracer);
        }

        pV[j] = V;

        if(m_prg_callback) {
            m_prg_info.m_iter_done = k;
            if( (m_terminate = m_prg_callback(&m_prg_info)) ) {
                break;
            }
        }
    }

    backtrace_matrix.Purge(k);

    if(!m_terminate) {
        x_DoBackTrace(backtrace_matrix, data);
    }

    return V;
}