CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data) { //check data integrity if( m_SmithWaterman && ( data->m_offset1 || m_SeqLen1 != data->m_len1 || data->m_offset2 || m_SeqLen2 != data->m_len2 ) ) { NCBI_THROW(CAlgoAlignException, eBadParameter, "Smith-Waterman not compatible with offsets provided"); } if( m_SmithWaterman && ( !data->m_esf_L1 || !data->m_esf_R1 || !data->m_esf_L2 || !data->m_esf_R2 ) ) { NCBI_THROW(CAlgoAlignException, eBadParameter, "Smith-Waterman not compatible with end gap penalties"); } const size_t N1 = data->m_len1 + 1; const size_t N2 = data->m_len2 + 1; vector<TScore> stl_rowV (N2), stl_rowF(N2); const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s; if(m_prg_callback) { m_prg_info.m_iter_total = N1*N2; m_prg_info.m_iter_done = 0; if( (m_terminate = m_prg_callback(&m_prg_info)) ) { return 0; } } bool bFreeGapLeft1 = data->m_esf_L1 && data->m_offset1 == 0; bool bFreeGapRight1 = data->m_esf_R1 && m_SeqLen1 == data->m_offset1 + data->m_len1; bool bFreeGapLeft2 = data->m_esf_L2 && data->m_offset2 == 0; bool bFreeGapRight2 = data->m_esf_R2 && m_SeqLen2 == data->m_offset2 + data->m_len2; TScore wgleft1 = bFreeGapLeft1? 0: m_Wg; TScore wsleft1 = bFreeGapLeft1? 0: m_Ws; TScore wg1 = m_Wg, ws1 = m_Ws; // index calculation: [i,j] = i*n2 + j CBacktraceMatrix4 backtrace_matrix (N1 * N2); backtrace_matrix.SetAt(0, 0); // first row // note that stl_rowF[0] is not used in the main cycle, size_t k; stl_rowV[0] = wgleft1; for (k = 1; k < N2; ++k) { stl_rowV[k] = stl_rowV[k-1] + wsleft1; stl_rowF[k] = kInfMinus; backtrace_matrix.SetAt(k, kMaskE | kMaskEc); } backtrace_matrix.Purge(k); stl_rowV[0] = 0; if(m_prg_callback) { m_prg_info.m_iter_done = k; m_terminate = m_prg_callback(&m_prg_info); } // gap penalties TScore wgleft2 (bFreeGapLeft2? 0: m_Wg); TScore wsleft2 (bFreeGapLeft2? 0: m_Ws); const char * seq1 = m_Seq1 + data->m_offset1; const char * seq1_end = seq1 + data->m_len1; TScore V0 = wgleft2; TScore V = 0;//best score in the current cell. Will be equal to the NW score at the end TScore best_V = 0;//best score in the whole matrix aka score for SW --k; for(; seq1 != seq1_end && !m_terminate; ++seq1) { backtrace_matrix.SetAt(++k, kMaskFc); if( seq1 + 1 == seq1_end && bFreeGapRight1) { wg1 = ws1 = 0; } unsigned char tracer; const TNCBIScore * row_sc = sm[(size_t)*seq1]; const char * seq2 = m_Seq2 + data->m_offset2; const char * seq2_end = seq2 + data->m_len2; TScore wg2 = m_Wg, ws2 = m_Ws; //best ending with gap in seq1 open seq1 X- or extended seq1 X-- // seq2 XX seq2 XXX TScore E = kInfMinus; //best ending with gap in seq2 TScore F; //total best with //best ending with match TScore G; //just temporary TScore n0; //total best TScore * rowV = &stl_rowV[0];//previos row V = V0 += wsleft2; //current row //best ending with match TScore * rowF = &stl_rowF[0]; for (; seq2 != seq2_end;) { G = *rowV + row_sc[(size_t)*seq2++]; *rowV = V; n0 = V + wg1; if(E >= n0) { E += ws1; // continue the gap tracer = kMaskEc; } else { E = n0 + ws1; // open a new gap tracer = 0; } if( bFreeGapRight2 && seq2 == seq2_end ) { wg2 = ws2 = 0; } F = *++rowF; n0 = *++rowV + wg2; if(F >= n0) { F += ws2; tracer |= kMaskFc; } else { F = n0 + ws2; } *rowF = F; //best score if( G < F || ( G == F && m_GapPreference == eLater) ) { if( E <= F ) { V = F; } else { V = E; tracer |= kMaskE; } } else if( E > G || ( E == G && m_GapPreference == eLater) ) { V = E; tracer |= kMaskE; } else { V = G; tracer |= kMaskD; } if (m_SmithWaterman && V < 0 ) { V = 0; } backtrace_matrix.SetAt(++k, tracer); if (V > best_V) { best_V = V; backtrace_matrix.SetBestPos(k); } } *rowV = V; if(m_prg_callback) { m_prg_info.m_iter_done = k; if( (m_terminate = m_prg_callback(&m_prg_info)) ) { break; } } } backtrace_matrix.Purge(++k); backtrace_matrix.SetBestScore(best_V); /* //print the matrix out {{ cout<<endl; int kk, ind1, ind2, width = 4; cout<<setw(width)<<" "; cout<<setw(width)<<"-"; for(ind2 = 0; ind2 < N2-1; ++ind2) { cout<<setw(width)<<*(m_Seq2 + data->m_offset2 + ind2); } cout<<endl; for(kk = 0,ind1 = 0; ind1 < N1; ++ind1) { if(ind1) { cout<<setw(width)<<(m_Seq1 + data->m_offset1)[ind1-1]; } else { cout<<setw(width)<<"-"; } for(ind2 = 0; ind2 < N2; ++ind2,++kk) { string tstr; unsigned char Key (backtrace_matrix[kk]); if( Key & kMaskD ) tstr += "D"; else if ( Key & kMaskE ) tstr += "E"; else tstr += "F"; if( Key & kMaskEc ) tstr += "-"; if( Key & kMaskFc ) tstr += "|"; cout<<setw(width)<<tstr; } cout<<endl<<endl; } cout<<endl; }} //end of print the matrix out */ if(!m_terminate) { x_SWDoBackTrace(backtrace_matrix, data); //check back trace TTranscript rv (data->m_transcript.size()); copy(data->m_transcript.rbegin(), data->m_transcript.rend(), rv.begin()); if(m_SmithWaterman) { if( best_V != ScoreFromTranscript(rv, data->m_offset1, data->m_offset2) ) { NCBI_THROW(CAlgoAlignException, eInternal, "CNWAligner: error in back trace"); } } else { if( V != ScoreFromTranscript(rv, data->m_offset1, data->m_offset2) ) { NCBI_THROW(CAlgoAlignException, eInternal, "CNWAligner: error in back trace"); } } } if(m_SmithWaterman) { return best_V; } return V; }
CNWAligner::TScore CNWAligner::x_Align(SAlignInOut* data) { const size_t N1 = data->m_len1 + 1; const size_t N2 = data->m_len2 + 1; vector<TScore> stl_rowV (N2), stl_rowF(N2); TScore * rowV = &stl_rowV[0]; TScore * rowF = &stl_rowF[0]; TScore * pV = rowV - 1; const char * seq1 = m_Seq1 + data->m_offset1 - 1; const char * seq2 = m_Seq2 + data->m_offset2 - 1; const TNCBIScore (* sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s; if(m_prg_callback) { m_prg_info.m_iter_total = N1*N2; m_prg_info.m_iter_done = 0; if( (m_terminate = m_prg_callback(&m_prg_info)) ) { return 0; } } bool bFreeGapLeft1 = data->m_esf_L1 && data->m_offset1 == 0; bool bFreeGapRight1 = data->m_esf_R1 && m_SeqLen1 == data->m_offset1 + data->m_len1; bool bFreeGapLeft2 = data->m_esf_L2 && data->m_offset2 == 0; bool bFreeGapRight2 = data->m_esf_R2 && m_SeqLen2 == data->m_offset2 + data->m_len2; TScore wgleft1 = bFreeGapLeft1? 0: m_Wg; TScore wsleft1 = bFreeGapLeft1? 0: m_Ws; TScore wg1 = m_Wg, ws1 = m_Ws; // index calculation: [i,j] = i*n2 + j CBacktraceMatrix4 backtrace_matrix (N1 * N2); backtrace_matrix.SetAt(0, 0); // first row size_t k; rowV[0] = wgleft1; for (k = 1; k < N2; ++k) { rowV[k] = pV[k] + wsleft1; rowF[k] = kInfMinus; backtrace_matrix.SetAt(k, kMaskE | kMaskEc); } backtrace_matrix.Purge(k); rowV[0] = 0; if(m_prg_callback) { m_prg_info.m_iter_done = k; m_terminate = m_prg_callback(&m_prg_info); } // recurrences TScore wgleft2 (bFreeGapLeft2? 0: m_Wg); TScore wsleft2 (bFreeGapLeft2? 0: m_Ws); TScore V (rowV[N2 - 1]); TScore V0 (wgleft2); TScore E, G, n0; unsigned char tracer; size_t i, j; for(i = 1; i < N1 && !m_terminate; ++i) { V = V0 += wsleft2; E = kInfMinus; backtrace_matrix.SetAt(k++, kMaskFc); unsigned char ci = seq1[i]; if(i == N1 - 1 && bFreeGapRight1) { wg1 = ws1 = 0; } TScore wg2 = m_Wg, ws2 = m_Ws; for (j = 1; j < N2; ++j, ++k) { G = pV[j] + sm[ci][(unsigned char)seq2[j]]; pV[j] = V; n0 = V + wg1; if(E >= n0) { E += ws1; // continue the gap tracer = kMaskEc; } else { E = n0 + ws1; // open a new gap tracer = 0; } if(j == N2 - 1 && bFreeGapRight2) { wg2 = ws2 = 0; } n0 = rowV[j] + wg2; if(rowF[j] >= n0) { rowF[j] += ws2; tracer |= kMaskFc; } else { rowF[j] = n0 + ws2; } if (E >= rowF[j]) { if(E >= G) { V = E; tracer |= kMaskE; } else { V = G; tracer |= kMaskD; } } else { if(rowF[j] >= G) { V = rowF[j]; } else { V = G; tracer |= kMaskD; } } backtrace_matrix.SetAt(k, tracer); } pV[j] = V; if(m_prg_callback) { m_prg_info.m_iter_done = k; if( (m_terminate = m_prg_callback(&m_prg_info)) ) { break; } } } backtrace_matrix.Purge(k); if(!m_terminate) { x_DoBackTrace(backtrace_matrix, data); } return V; }