CNWAligner::TScore CPSSMAligner::x_AlignProfile(SAlignInOut* data) { const size_t N1 = data->m_len1 + 1; const size_t N2 = data->m_len2 + 1; vector<double> stl_rowV (N2), stl_rowF(N2); double* rowV = &stl_rowV[0]; double* rowF = &stl_rowF[0]; double* pV = rowV - 1; const double** freq1_row = m_Freq1 + data->m_offset1 - 1; const double** freq2_row = m_Freq2 + data->m_offset2 - 1; m_terminate = false; if(m_prg_callback) { m_prg_info.m_iter_total = N1*N2; m_prg_info.m_iter_done = 0; if(m_terminate = m_prg_callback(&m_prg_info)) { return 0; } } TScore wg1L = m_Wg; TScore wg1R = m_Wg; TScore wg2L = m_Wg; TScore wg2R = m_Wg; TScore ws1L = m_Ws; TScore ws1R = m_Ws; TScore ws2L = m_Ws; TScore ws2R = m_Ws; if (data->m_offset1 == 0) { if (data->m_esf_L1) { wg1L = ws1L = 0; } else { wg1L = m_StartWg; ws1L = m_StartWs; } } if (m_SeqLen1 == data->m_offset1 + data->m_len1) { if (data->m_esf_R1) { wg1R = ws1R = 0; } else { wg1R = m_EndWg; ws1R = m_EndWs; } } if (data->m_offset2 == 0) { if (data->m_esf_L2) { wg2L = ws2L = 0; } else { wg2L = m_StartWg; ws2L = m_StartWs; } } if (m_SeqLen2 == data->m_offset2 + data->m_len2) { if (data->m_esf_R2) { wg2R = ws2R = 0; } else { wg2R = m_EndWg; ws2R = m_EndWs; } } TScore wgleft1 = wg1L; TScore wsleft1 = ws1L; TScore wg1 = m_Wg, ws1 = m_Ws; // index calculation: [i,j] = i*n2 + j CBacktraceMatrix4 backtrace_matrix (N1 * N2); // first row size_t k = 1; if (N2 > 1) { rowV[0] = wgleft1 * (1.0 - freq2_row[1][0]); for (k = 1; k < N2; k++) { rowV[k] = pV[k] + wsleft1; rowF[k] = kInfMinus; backtrace_matrix.SetAt(k, kMaskE | kMaskEc); } backtrace_matrix.Purge(k); } rowV[0] = 0; if(m_prg_callback) { m_prg_info.m_iter_done = k; m_terminate = m_prg_callback(&m_prg_info); } // recurrences TScore wgleft2 = wg2L; TScore wsleft2 = ws2L; double V = rowV[N2 - 1]; double V0 = 0; double E, G, n0; unsigned char tracer; if (N1 > 1) V0 = wgleft2 * (1.0 - freq1_row[1][0]); size_t i, j; for(i = 1; i < N1 && !m_terminate; ++i) { V = V0 += wsleft2; E = kInfMinus; backtrace_matrix.SetAt(k++, kMaskFc); if(i == N1 - 1) { wg1 = wg1R; ws1 = ws1R; } TScore wg2 = m_Wg, ws2 = m_Ws; for (j = 1; j < N2; ++j, ++k) { if(j == N2 - 1) { wg2 = wg2R; ws2 = ws2R; } const double *profile1 = freq1_row[i]; const double *profile2 = freq2_row[j]; const double scaled_wg1 = wg1 * (1.0 - profile2[0]); const double scaled_ws1 = ws1; const double scaled_wg2 = wg2 * (1.0 - profile1[0]); const double scaled_ws2 = ws2; double accum = 0.0, sum = 0.0; int num_zeros1 = 0, num_zeros2 = 0; double diff_freq1[kPSSM_ColumnSize]; double diff_freq2[kPSSM_ColumnSize]; // separate the residue frequencies into two components: // a component that is the same for both columns, and // a component that is different. The all-against-all // score computation only takes place on the components // that are different, so this will assign a higher score // to more similar frequency columns // // Begin by separating out the common portion of each // profile for (int m = 1; m < kPSSM_ColumnSize; m++) { if (profile1[m] < profile2[m]) { accum += profile1[m] * m_DScoreMatrix[m][m]; diff_freq1[m] = 0.0; diff_freq2[m] = profile2[m] - profile1[m]; num_zeros1++; } else { accum += profile2[m] * m_DScoreMatrix[m][m]; diff_freq1[m] = profile1[m] - profile2[m]; diff_freq2[m] = 0.0; num_zeros2++; } } // normalize difference for profile with smaller gap if (profile1[0] <= profile2[0]) { for (int m = 1; m < kPSSM_ColumnSize; m++) sum += diff_freq1[m]; } else { for (int m = 1; m < kPSSM_ColumnSize; m++) sum += diff_freq2[m]; } if (sum > 0) { sum = 1.0 / sum; if (profile1[0] <= profile2[0]) { for (int m = 1; m < kPSSM_ColumnSize; m++) diff_freq1[m] *= sum; } else { for (int m = 1; m < kPSSM_ColumnSize; m++) diff_freq2[m] *= sum; } // Add in the cross terms (not counting gaps). // Note that the following assumes a symmetric // score matrix if (num_zeros1 > num_zeros2) { for (int m = 1; m < kPSSM_ColumnSize; m++) { if (diff_freq1[m] > 0) { sum = 0.0; double *matrix_row = m_DScoreMatrix[m]; for (int n = 1; n < kPSSM_ColumnSize; n++) { sum += diff_freq2[n] * matrix_row[n]; } accum += diff_freq1[m] * sum; } } } else { for (int m = 1; m < kPSSM_ColumnSize; m++) { if (diff_freq2[m] > 0) { sum = 0.0; double *matrix_row = m_DScoreMatrix[m]; for (int n = 1; n < kPSSM_ColumnSize; n++) { sum += diff_freq1[n] * matrix_row[n]; } accum += diff_freq2[m] * sum; } } } } G = pV[j] + accum * m_FreqScale + profile1[0] * m_Ws * (1-profile2[0]) + profile2[0] * m_Ws * (1-profile1[0]); pV[j] = V; n0 = V + scaled_wg1; if(E >= n0) { E += scaled_ws1; // continue the gap tracer = kMaskEc; } else { E = n0 + scaled_ws1; // open a new gap tracer = 0; } n0 = rowV[j] + scaled_wg2; if(rowF[j] >= n0) { rowF[j] += scaled_ws2; tracer |= kMaskFc; } else { rowF[j] = n0 + scaled_ws2; } if (E >= rowF[j]) { if(E >= G) { V = E; tracer |= kMaskE; } else { V = G; tracer |= kMaskD; } } else { if(rowF[j] >= G) { V = rowF[j]; } else { V = G; tracer |= kMaskD; } } backtrace_matrix.SetAt(k, tracer); } pV[j] = V; if(m_prg_callback) { m_prg_info.m_iter_done = k; if(m_terminate = m_prg_callback(&m_prg_info)) { break; } } } backtrace_matrix.Purge(k); if(!m_terminate) { x_DoBackTrace(backtrace_matrix, data); } return (TScore)(V + 0.5); }
CNWAligner::TScore CPSSMAligner::x_AlignPSSM(SAlignInOut* data) { const size_t N1 = data->m_len1 + 1; const size_t N2 = data->m_len2 + 1; vector<TScore> stl_rowV (N2), stl_rowF(N2); TScore* rowV = &stl_rowV[0]; TScore* rowF = &stl_rowF[0]; TScore* pV = rowV - 1; const TScore** pssm_row = m_Pssm1 + data->m_offset1 - 1; const char* seq2 = m_Seq2 + data->m_offset2 - 1; m_terminate = false; if(m_prg_callback) { m_prg_info.m_iter_total = N1*N2; m_prg_info.m_iter_done = 0; if(m_terminate = m_prg_callback(&m_prg_info)) { return 0; } } TScore wg1L = m_Wg; TScore wg1R = m_Wg; TScore wg2L = m_Wg; TScore wg2R = m_Wg; TScore ws1L = m_Ws; TScore ws1R = m_Ws; TScore ws2L = m_Ws; TScore ws2R = m_Ws; if (data->m_offset1 == 0) { if (data->m_esf_L1) { wg1L = ws1L = 0; } else { wg1L = m_StartWg; ws1L = m_StartWs; } } if (m_SeqLen1 == data->m_offset1 + data->m_len1) { if (data->m_esf_R1) { wg1R = ws1R = 0; } else { wg1R = m_EndWg; ws1R = m_EndWs; } } if (data->m_offset2 == 0) { if (data->m_esf_L2) { wg2L = ws2L = 0; } else { wg2L = m_StartWg; ws2L = m_StartWs; } } if (m_SeqLen2 == data->m_offset2 + data->m_len2) { if (data->m_esf_R2) { wg2R = ws2R = 0; } else { wg2R = m_EndWg; ws2R = m_EndWs; } } TScore wgleft1 = wg1L; TScore wsleft1 = ws1L; TScore wg1 = m_Wg, ws1 = m_Ws; // index calculation: [i,j] = i*n2 + j CBacktraceMatrix4 backtrace_matrix (N1 * N2); backtrace_matrix.SetAt(0, 0); // first row size_t k; rowV[0] = wgleft1; for (k = 1; k < N2; k++) { rowV[k] = pV[k] + wsleft1; rowF[k] = kInfMinus; backtrace_matrix.SetAt(k, kMaskE | kMaskEc); } backtrace_matrix.Purge(k); rowV[0] = 0; if(m_prg_callback) { m_prg_info.m_iter_done = k; m_terminate = m_prg_callback(&m_prg_info); } // recurrences TScore wgleft2 = wg2L; TScore wsleft2 = ws2L; TScore V = rowV[N2 - 1]; TScore V0 = wgleft2; TScore E, G, n0; unsigned char tracer; size_t i, j; for(i = 1; i < N1 && !m_terminate; ++i) { V = V0 += wsleft2; E = kInfMinus; backtrace_matrix.SetAt(k++, kMaskFc); if(i == N1 - 1) { wg1 = wg1R; ws1 = ws1R; } TScore wg2 = m_Wg, ws2 = m_Ws; for (j = 1; j < N2; ++j, ++k) { G = pV[j] + pssm_row[i][(unsigned char)seq2[j]]; pV[j] = V; n0 = V + wg1; if(E >= n0) { E += ws1; tracer = kMaskEc; } else { E = n0 + ws1; tracer = 0; } if(j == N2 - 1) { wg2 = wg2R; ws2 = ws2R; } n0 = rowV[j] + wg2; if(rowF[j] >= n0) { rowF[j] += ws2; tracer |= kMaskFc; } else { rowF[j] = n0 + ws2; } if (E >= rowF[j]) { if(E >= G) { V = E; tracer |= kMaskE; } else { V = G; tracer |= kMaskD; } } else { if(rowF[j] >= G) { V = rowF[j]; } else { V = G; tracer |= kMaskD; } } backtrace_matrix.SetAt(k, tracer); } pV[j] = V; if(m_prg_callback) { m_prg_info.m_iter_done = k; if(m_terminate = m_prg_callback(&m_prg_info)) { break; } } } backtrace_matrix.Purge(k); if(!m_terminate) { x_DoBackTrace(backtrace_matrix, data); } return V; }
// Evaluate dynamic programming matrix. Create transcript. CNWAligner::TScore CSplicedAligner32::x_Align (SAlignInOut* data) { // use the banded version if there is no space for introns const int len_dif (data->m_len2 - data->m_len1); if(len_dif < 2 * int (m_IntronMinSize) / 3) { const Uint1 where (len_dif < 0? 0: 1); const size_t shift (abs(len_dif) / 2); const size_t band (abs(len_dif) + 2*(max(data->m_len1,data->m_len2)/20 + 1)); SetShift(where, shift); SetBand(band); return CBandAligner::x_Align(data); } // redefine TScore as a floating-point type for this procedure only typedef double TScore; const TScore cds_penalty_extra = -2e-6; const size_t N1 = data->m_len1 + 1; const size_t N2 = data->m_len2 + 1; vector<TScore> stl_rowV (N2), stl_rowF (N2); TScore* rowV = &stl_rowV[0]; TScore* rowF = &stl_rowF[0]; // index calculation: [i,j] = i*n2 + j SAllocator<Uint4> alloc_bm (N1*N2); Uint4* backtrace_matrix (alloc_bm.GetPointer()); TScore* pV = rowV - 1; const char* seq1 = m_Seq1 + data->m_offset1 - 1; const char* seq2 = m_Seq2 + data->m_offset2 - 1; const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s; bool bFreeGapLeft1 = data->m_esf_L1 && data->m_offset1 == 0; bool bFreeGapRight1 = data->m_esf_R1 && m_SeqLen1 == data->m_offset1 + data->m_len1; bool bFreeGapLeft2 = data->m_esf_L2 && data->m_offset1 == 0; bool bFreeGapRight2 = data->m_esf_R2 && m_SeqLen2 == data->m_offset2 + data->m_len2; TScore wgleft1 = bFreeGapLeft1? 0: m_Wg; TScore wsleft1 = bFreeGapLeft1? 0: m_Ws; TScore wg1 = wgleft1, ws1 = wsleft1; // recurrences TScore wgleft2 = bFreeGapLeft2? 0: m_Wg; TScore wsleft2 = bFreeGapLeft2? 0: m_Ws; TScore V = 0; TScore V0 = 0; TScore E, G, n0; Uint4 type; // store candidate donors size_t* jAllDonors [splice_type_count_32]; TScore* vAllDonors [splice_type_count_32]; vector<size_t> stl_jAllDonors (splice_type_count_32 * N2); vector<TScore> stl_vAllDonors (splice_type_count_32 * N2); for(unsigned char st = 0; st < splice_type_count_32; ++st) { jAllDonors[st] = &stl_jAllDonors[st*N2]; vAllDonors[st] = &stl_vAllDonors[st*N2]; } size_t jTail[splice_type_count_32], jHead[splice_type_count_32]; TScore vBestDonor [splice_type_count_32]; size_t jBestDonor [splice_type_count_32] = {0}; // place to store gap opening starts size_t ins_start; vector<size_t> stl_del_start(N2); size_t* del_start = &stl_del_start[0]; // donor/acceptor matrix const Uint1 * dnr_acc_matrix = g_dnr_acc_matrix.GetMatrix(); // fake row (above lambda) rowV[0] = kInfMinus; size_t k; for (k = 0; k < N2; k++) { rowV[k] = rowF[k] = kInfMinus; del_start[k] = k; } k = 0; size_t cds_start = m_cds_start, cds_stop = m_cds_stop; if(cds_start < cds_stop) { cds_start -= data->m_offset1; cds_stop -= data->m_offset1; } size_t i, j = 0, k0; unsigned char ci; for(i = 0; i < N1; ++i, j = 0) { V = i > 0? (V0 += wsleft2) : 0; E = kInfMinus; ins_start = k0 = k; backtrace_matrix[k++] = kTypeGap; // | del_start[0] ci = i > 0? seq1[i]: 'N'; for(unsigned char st = 0; st < splice_type_count_32; ++st) { jTail[st] = jHead[st] = 0; vBestDonor[st] = kInfMinus; } if(i == N1 - 1 && bFreeGapRight1) { wg1 = ws1 = 0; } TScore wg2 = m_Wg, ws2 = m_Ws; // detect donor candidate if(N2 > 2) { unsigned char d1 = seq2[1], d2 = seq2[2]; Uint1 dnr_type = 0xF0 & dnr_acc_matrix[(size_t(d1)<<8)|d2]; for(Uint1 st = 0; st < splice_type_count_32; ++st ) { jAllDonors[st][jTail[st]] = j; if(dnr_type & (0x10 << st)) { vAllDonors[st][jTail[st]] = ( d1 == g_nwspl32_donor[st][0] && d2 == g_nwspl32_donor[st][1] ) ? V: (V + m_Wd1); } else { // both chars distorted vAllDonors[st][jTail[st]] = V + m_Wd2; } ++(jTail[st]); } } if(cds_start <= i && i < cds_stop) { if(i != 0 || ! bFreeGapLeft1) { ws1 += cds_penalty_extra; } if(j != 0 || ! bFreeGapLeft2) { ws2 += cds_penalty_extra; } } for (j = 1; j < N2; ++j, ++k) { G = pV[j] + sm[ci][(unsigned char)seq2[j]]; pV[j] = V; n0 = V + wg1; if(E >= n0) { E += ws1; // continue the gap } else { E = n0 + ws1; // open a new gap ins_start = k-1; } if(j == N2 - 1 && bFreeGapRight2) { wg2 = ws2 = 0; } n0 = rowV[j] + wg2; if(rowF[j] >= n0) { rowF[j] += ws2; } else { rowF[j] = n0 + ws2; del_start[j] = k-N2; } // evaluate the score (V) if (E >= rowF[j]) { if(E >= G) { V = E; type = kTypeGap | ins_start; } else { V = G; type = kTypeDiag; } } else { if(rowF[j] >= G) { V = rowF[j]; type = kTypeGap | del_start[j]; } else { V = G; type = kTypeDiag; } } // find out if there are new donors for(unsigned char st = 0; st < splice_type_count_32; ++st) { if(jTail[st] > jHead[st]) { if(j - jAllDonors[st][jHead[st]] >= m_IntronMinSize) { if(vAllDonors[st][jHead[st]] > vBestDonor[st]) { vBestDonor[st] = vAllDonors[st][jHead[st]]; jBestDonor[st] = jAllDonors[st][jHead[st]]; } ++(jHead[st]); } } } // check splice signal Uint4 dnr_pos = kMax_UI4; unsigned char c1 = seq2[j-1], c2 = seq2[j]; Uint1 acc_mask = 0x0F & dnr_acc_matrix[(size_t(c1)<<8)|c2]; for(Uint1 st = 0; st < splice_type_count_32; ++st ) { if(acc_mask & (0x01 << st)) { TScore vAcc = vBestDonor[st] + m_Wi[st]; if( c1 != g_nwspl32_acceptor[st][0] || c2 != g_nwspl32_acceptor[st][1] ) { vAcc += m_Wd1; } if(vAcc > V) { V = vAcc; dnr_pos = k0 + jBestDonor[st]; } } else { // try arbitrary splice TScore vAcc = vBestDonor[st] + m_Wi[st] + m_Wd2; if(vAcc > V) { V = vAcc; dnr_pos = k0 + jBestDonor[st]; } } } if(dnr_pos != kMax_UI4) { type = kTypeIntron | dnr_pos; } backtrace_matrix[k] = type; // detect donor candidates if(j < N2 - 2) { unsigned char d1 = seq2[j+1], d2 = seq2[j+2]; Uint1 dnr_mask = 0xF0 & dnr_acc_matrix[(size_t(d1)<<8)|d2]; for(Uint1 st = 0; st < splice_type_count_32; ++st ) { if( dnr_mask & (0x10 << st) ) { if( d1 == g_nwspl32_donor[st][0] && d2 == g_nwspl32_donor[st][1] ) { if(V > vBestDonor[st]) { jAllDonors[st][jTail[st]] = j; vAllDonors[st][jTail[st]] = V; ++(jTail[st]); } } else { TScore v = V + m_Wd1; if(v > vBestDonor[st]) { jAllDonors[st][jTail[st]] = j; vAllDonors[st][jTail[st]] = v; ++(jTail[st]); } } } else { // both chars distorted TScore v = V + m_Wd2; if(v > vBestDonor[st]) { jAllDonors[st][jTail[st]] = j; vAllDonors[st][jTail[st]] = v; ++(jTail[st]); } } } } } pV[j] = V; if(i == 0) { V0 = wgleft2; wg1 = m_Wg; ws1 = m_Ws; } } try { x_DoBackTrace(backtrace_matrix, data); } catch(exception&) { // GCC hack throw; } return CNWAligner::TScore(V); }
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data) { //check data integrity if( m_SmithWaterman && ( data->m_offset1 || m_SeqLen1 != data->m_len1 || data->m_offset2 || m_SeqLen2 != data->m_len2 ) ) { NCBI_THROW(CAlgoAlignException, eBadParameter, "Smith-Waterman not compatible with offsets provided"); } if( m_SmithWaterman && ( !data->m_esf_L1 || !data->m_esf_R1 || !data->m_esf_L2 || !data->m_esf_R2 ) ) { NCBI_THROW(CAlgoAlignException, eBadParameter, "Smith-Waterman not compatible with end gap penalties"); } const size_t N1 = data->m_len1 + 1; const size_t N2 = data->m_len2 + 1; vector<TScore> stl_rowV (N2), stl_rowF(N2); const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s; if(m_prg_callback) { m_prg_info.m_iter_total = N1*N2; m_prg_info.m_iter_done = 0; if( (m_terminate = m_prg_callback(&m_prg_info)) ) { return 0; } } bool bFreeGapLeft1 = data->m_esf_L1 && data->m_offset1 == 0; bool bFreeGapRight1 = data->m_esf_R1 && m_SeqLen1 == data->m_offset1 + data->m_len1; bool bFreeGapLeft2 = data->m_esf_L2 && data->m_offset2 == 0; bool bFreeGapRight2 = data->m_esf_R2 && m_SeqLen2 == data->m_offset2 + data->m_len2; TScore wgleft1 = bFreeGapLeft1? 0: m_Wg; TScore wsleft1 = bFreeGapLeft1? 0: m_Ws; TScore wg1 = m_Wg, ws1 = m_Ws; // index calculation: [i,j] = i*n2 + j CBacktraceMatrix4 backtrace_matrix (N1 * N2); backtrace_matrix.SetAt(0, 0); // first row // note that stl_rowF[0] is not used in the main cycle, size_t k; stl_rowV[0] = wgleft1; for (k = 1; k < N2; ++k) { stl_rowV[k] = stl_rowV[k-1] + wsleft1; stl_rowF[k] = kInfMinus; backtrace_matrix.SetAt(k, kMaskE | kMaskEc); } backtrace_matrix.Purge(k); stl_rowV[0] = 0; if(m_prg_callback) { m_prg_info.m_iter_done = k; m_terminate = m_prg_callback(&m_prg_info); } // gap penalties TScore wgleft2 (bFreeGapLeft2? 0: m_Wg); TScore wsleft2 (bFreeGapLeft2? 0: m_Ws); const char * seq1 = m_Seq1 + data->m_offset1; const char * seq1_end = seq1 + data->m_len1; TScore V0 = wgleft2; TScore V = 0;//best score in the current cell. Will be equal to the NW score at the end TScore best_V = 0;//best score in the whole matrix aka score for SW --k; for(; seq1 != seq1_end && !m_terminate; ++seq1) { backtrace_matrix.SetAt(++k, kMaskFc); if( seq1 + 1 == seq1_end && bFreeGapRight1) { wg1 = ws1 = 0; } unsigned char tracer; const TNCBIScore * row_sc = sm[(size_t)*seq1]; const char * seq2 = m_Seq2 + data->m_offset2; const char * seq2_end = seq2 + data->m_len2; TScore wg2 = m_Wg, ws2 = m_Ws; //best ending with gap in seq1 open seq1 X- or extended seq1 X-- // seq2 XX seq2 XXX TScore E = kInfMinus; //best ending with gap in seq2 TScore F; //total best with //best ending with match TScore G; //just temporary TScore n0; //total best TScore * rowV = &stl_rowV[0];//previos row V = V0 += wsleft2; //current row //best ending with match TScore * rowF = &stl_rowF[0]; for (; seq2 != seq2_end;) { G = *rowV + row_sc[(size_t)*seq2++]; *rowV = V; n0 = V + wg1; if(E >= n0) { E += ws1; // continue the gap tracer = kMaskEc; } else { E = n0 + ws1; // open a new gap tracer = 0; } if( bFreeGapRight2 && seq2 == seq2_end ) { wg2 = ws2 = 0; } F = *++rowF; n0 = *++rowV + wg2; if(F >= n0) { F += ws2; tracer |= kMaskFc; } else { F = n0 + ws2; } *rowF = F; //best score if( G < F || ( G == F && m_GapPreference == eLater) ) { if( E <= F ) { V = F; } else { V = E; tracer |= kMaskE; } } else if( E > G || ( E == G && m_GapPreference == eLater) ) { V = E; tracer |= kMaskE; } else { V = G; tracer |= kMaskD; } if (m_SmithWaterman && V < 0 ) { V = 0; } backtrace_matrix.SetAt(++k, tracer); if (V > best_V) { best_V = V; backtrace_matrix.SetBestPos(k); } } *rowV = V; if(m_prg_callback) { m_prg_info.m_iter_done = k; if( (m_terminate = m_prg_callback(&m_prg_info)) ) { break; } } } backtrace_matrix.Purge(++k); backtrace_matrix.SetBestScore(best_V); /* //print the matrix out {{ cout<<endl; int kk, ind1, ind2, width = 4; cout<<setw(width)<<" "; cout<<setw(width)<<"-"; for(ind2 = 0; ind2 < N2-1; ++ind2) { cout<<setw(width)<<*(m_Seq2 + data->m_offset2 + ind2); } cout<<endl; for(kk = 0,ind1 = 0; ind1 < N1; ++ind1) { if(ind1) { cout<<setw(width)<<(m_Seq1 + data->m_offset1)[ind1-1]; } else { cout<<setw(width)<<"-"; } for(ind2 = 0; ind2 < N2; ++ind2,++kk) { string tstr; unsigned char Key (backtrace_matrix[kk]); if( Key & kMaskD ) tstr += "D"; else if ( Key & kMaskE ) tstr += "E"; else tstr += "F"; if( Key & kMaskEc ) tstr += "-"; if( Key & kMaskFc ) tstr += "|"; cout<<setw(width)<<tstr; } cout<<endl<<endl; } cout<<endl; }} //end of print the matrix out */ if(!m_terminate) { x_SWDoBackTrace(backtrace_matrix, data); //check back trace TTranscript rv (data->m_transcript.size()); copy(data->m_transcript.rbegin(), data->m_transcript.rend(), rv.begin()); if(m_SmithWaterman) { if( best_V != ScoreFromTranscript(rv, data->m_offset1, data->m_offset2) ) { NCBI_THROW(CAlgoAlignException, eInternal, "CNWAligner: error in back trace"); } } else { if( V != ScoreFromTranscript(rv, data->m_offset1, data->m_offset2) ) { NCBI_THROW(CAlgoAlignException, eInternal, "CNWAligner: error in back trace"); } } } if(m_SmithWaterman) { return best_V; } return V; }
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data) { const size_t N1 = data->m_len1 + 1; const size_t N2 = data->m_len2 + 1; vector<TScore> stl_rowV (N2), stl_rowF(N2); TScore * rowV = &stl_rowV[0]; TScore * rowF = &stl_rowF[0]; TScore * pV = rowV - 1; const char * seq1 = m_Seq1 + data->m_offset1 - 1; const char * seq2 = m_Seq2 + data->m_offset2 - 1; const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s; if(m_prg_callback) { m_prg_info.m_iter_total = N1*N2; m_prg_info.m_iter_done = 0; if( (m_terminate = m_prg_callback(&m_prg_info)) ) { return 0; } } bool bFreeGapLeft1 = data->m_esf_L1 && data->m_offset1 == 0; bool bFreeGapRight1 = data->m_esf_R1 && m_SeqLen1 == data->m_offset1 + data->m_len1; bool bFreeGapLeft2 = data->m_esf_L2 && data->m_offset2 == 0; bool bFreeGapRight2 = data->m_esf_R2 && m_SeqLen2 == data->m_offset2 + data->m_len2; TScore wgleft1 = bFreeGapLeft1? 0: m_Wg; TScore wsleft1 = bFreeGapLeft1? 0: m_Ws; TScore wg1 = m_Wg, ws1 = m_Ws; // index calculation: [i,j] = i*n2 + j CBacktraceMatrix4 backtrace_matrix (N1 * N2); backtrace_matrix.SetAt(0, 0); // first row size_t k; rowV[0] = wgleft1; for (k = 1; k < N2; ++k) { rowV[k] = pV[k] + wsleft1; rowF[k] = kInfMinus; backtrace_matrix.SetAt(k, kMaskE | kMaskEc); } backtrace_matrix.Purge(k); rowV[0] = 0; if(m_prg_callback) { m_prg_info.m_iter_done = k; m_terminate = m_prg_callback(&m_prg_info); } // recurrences TScore wgleft2 (bFreeGapLeft2? 0: m_Wg); TScore wsleft2 (bFreeGapLeft2? 0: m_Ws); TScore V (rowV[N2 - 1]); TScore V0 (wgleft2); TScore E, G, n0; unsigned char tracer; size_t i, j; for(i = 1; i < N1 && !m_terminate; ++i) { V = V0 += wsleft2; E = kInfMinus; backtrace_matrix.SetAt(k++, kMaskFc); unsigned char ci = seq1[i]; if(i == N1 - 1 && bFreeGapRight1) { wg1 = ws1 = 0; } TScore wg2 = m_Wg, ws2 = m_Ws; for (j = 1; j < N2; ++j, ++k) { G = pV[j] + sm[ci][(unsigned char)seq2[j]]; pV[j] = V; n0 = V + wg1; if(E >= n0) { E += ws1; // continue the gap tracer = kMaskEc; } else { E = n0 + ws1; // open a new gap tracer = 0; } if(j == N2 - 1 && bFreeGapRight2) { wg2 = ws2 = 0; } n0 = rowV[j] + wg2; if(rowF[j] >= n0) { rowF[j] += ws2; tracer |= kMaskFc; } else { rowF[j] = n0 + ws2; } if (E >= rowF[j]) { if(E >= G) { V = E; tracer |= kMaskE; } else { V = G; tracer |= kMaskD; } } else { if(rowF[j] >= G) { V = rowF[j]; } else { V = G; tracer |= kMaskD; } } backtrace_matrix.SetAt(k, tracer); } pV[j] = V; if(m_prg_callback) { m_prg_info.m_iter_done = k; if( (m_terminate = m_prg_callback(&m_prg_info)) ) { break; } } } backtrace_matrix.Purge(k); if(!m_terminate) { x_DoBackTrace(backtrace_matrix, data); } return V; }