bool Realigner::computeSWalignment(vector<CigarOp>& CigarData, vector<MDelement>& MD_data, unsigned int& start_pos_update) { string dummy_string; //AlignmentCell *best_cell; //best_cell = &DP_matrix[0][0]; // Compute boundaries for tubed alignment around previously found alignment if (!ComputeTubedAlignmentBoundaries()) return false; // Path ordering creates left aligned InDels vector<int> insertion_path_ordering(3); vector<int> deletion_path_ordering(3); insertion_path_ordering[0] = FROM_I; insertion_path_ordering[1] = FROM_MATCH; insertion_path_ordering[2] = FROM_MISM; deletion_path_ordering[0] = FROM_D; deletion_path_ordering[1] = FROM_MATCH; deletion_path_ordering[2] = FROM_MISM; // --- Compute first row and column of the matrix // First row: moving horizontally for insertions if (!soft_clip_key_end_) { DP_matrix[0][1].best_path_direction = FROM_I; DP_matrix[0][1].best_score = kGapOpen; DP_matrix[0][1].scores[FROM_I] = kGapOpen; DP_matrix[0][1].scores[FROM_NOWHERE] = kNotApplicable; for (unsigned int q_idx=2; q_idx<q_limit_plus_[0]; q_idx++) { DP_matrix[0][q_idx].in_directions[FROM_I] = FROM_I; DP_matrix[0][q_idx].scores[FROM_NOWHERE] = kNotApplicable; DP_matrix[0][q_idx].scores[FROM_I] = DP_matrix[0][q_idx-1].best_score + kGapExtend; DP_matrix[0][q_idx].best_path_direction = FROM_I; DP_matrix[0][q_idx].best_score = DP_matrix[0][q_idx-1].best_score + kGapExtend; } } if (!start_anywhere_in_ref_) { // First column: moving vertically for deletions DP_matrix[1][0].best_path_direction = FROM_D; DP_matrix[1][0].best_score = kGapOpen; DP_matrix[1][0].scores[FROM_D] = kGapOpen; DP_matrix[1][0].scores[FROM_NOWHERE] = kNotApplicable; unsigned int t = 2; while (t < q_limit_minus_.size() and q_limit_minus_[t] == 0) { DP_matrix[t][0].in_directions[FROM_D] = FROM_D; DP_matrix[t][0].scores[FROM_NOWHERE] = kNotApplicable; DP_matrix[t][0].scores[FROM_D] = DP_matrix[t-1][0].best_score + kGapExtend; DP_matrix[t][0].best_path_direction = FROM_D; DP_matrix[t][0].best_score = DP_matrix[t-1][0].best_score + kGapExtend; t++; } } // ------ Main alignment loop ------ vector<int> temp_scores(FROM_NOWHERE); vector<int> highest_score_cell(2, 0); for (unsigned int t_idx=1; t_idx<t_seq_.size()+1; t_idx++) { for (unsigned int q_idx=q_limit_minus_[t_idx]; q_idx<q_limit_plus_[t_idx]; q_idx++) { if (q_idx == 0) continue; // Scoring for Match; Mismatch / Insertion / Deletion; DP_matrix[t_idx][q_idx].scores.assign(FROM_NOWHERE+1, kNotApplicable); DP_matrix[t_idx][q_idx].in_directions.assign(FROM_NOWHERE, FROM_NOWHERE); if (soft_clip_key_end_) DP_matrix[t_idx][q_idx].scores[FROM_NOWHERE] = 0; // 1) - Match / Mismatch Score DP_matrix[t_idx][q_idx].is_match = isMatch(q_seq_[q_idx-1], t_seq_[t_idx-1]); if (DP_matrix[t_idx][q_idx].is_match) { DP_matrix[t_idx][q_idx].in_directions[FROM_MATCH] = DP_matrix[t_idx-1][q_idx-1].best_path_direction; DP_matrix[t_idx][q_idx].scores[FROM_MATCH] = DP_matrix[t_idx-1][q_idx-1].best_score + kMatchScore; } else { DP_matrix[t_idx][q_idx].in_directions[FROM_MISM] = DP_matrix[t_idx-1][q_idx-1].best_path_direction; DP_matrix[t_idx][q_idx].scores[FROM_MISM] = DP_matrix[t_idx-1][q_idx-1].best_score + kMismatchScore; } // 2) - Insertion Score temp_scores.assign(FROM_NOWHERE, kNotApplicable); temp_scores[FROM_MATCH] = DP_matrix[t_idx][q_idx-1].scores[FROM_MATCH] + kGapOpen; temp_scores[FROM_I] = DP_matrix[t_idx][q_idx-1].scores[FROM_I] + kGapExtend; temp_scores[FROM_MISM] = DP_matrix[t_idx][q_idx-1].scores[FROM_MISM] + kGapOpen; DP_matrix[t_idx][q_idx].scores[FROM_I] = kNotApplicable; DP_matrix[t_idx][q_idx].in_directions[FROM_I] = FROM_NOWHERE; for (int i=0; i<(int)insertion_path_ordering.size(); i++) { if (temp_scores[insertion_path_ordering[i]] > DP_matrix[t_idx][q_idx].scores[FROM_I]) { DP_matrix[t_idx][q_idx].scores[FROM_I] = temp_scores[insertion_path_ordering[i]]; DP_matrix[t_idx][q_idx].in_directions[FROM_I] = insertion_path_ordering[i]; } } // 3) - Deletion Score temp_scores.assign(FROM_NOWHERE, kNotApplicable); temp_scores[FROM_MATCH] = DP_matrix[t_idx-1][q_idx].scores[FROM_MATCH] + kGapOpen; temp_scores[FROM_D] = DP_matrix[t_idx-1][q_idx].scores[FROM_D] + kGapExtend; temp_scores[FROM_MISM] = DP_matrix[t_idx-1][q_idx].scores[FROM_MISM] + kGapOpen; DP_matrix[t_idx][q_idx].scores[FROM_D] = kNotApplicable; DP_matrix[t_idx][q_idx].in_directions[FROM_D] = FROM_NOWHERE; for (int i=0; i<(int)deletion_path_ordering.size(); i++) { if (temp_scores[deletion_path_ordering[i]] > DP_matrix[t_idx][q_idx].scores[FROM_D]) { DP_matrix[t_idx][q_idx].scores[FROM_D] = temp_scores[deletion_path_ordering[i]]; DP_matrix[t_idx][q_idx].in_directions[FROM_D] = deletion_path_ordering[i]; } } // Choose best move for this cell DP_matrix[t_idx][q_idx].best_score = kNotApplicable-1; DP_matrix[t_idx][q_idx].best_path_direction = FROM_NOWHERE; unsigned int s_idx; for (unsigned int i=0; i<DP_matrix[t_idx][q_idx].scores.size(); i++) { // Reverse lookup for reverse strand so that InDels are placed at the beginning of HPs if (isForwardStrandRead_) s_idx = i; else s_idx = DP_matrix[t_idx][q_idx].scores.size()-1-i; if (DP_matrix[t_idx][q_idx].scores[s_idx] > DP_matrix[t_idx][q_idx].best_score) { DP_matrix[t_idx][q_idx].best_score = DP_matrix[t_idx][q_idx].scores[s_idx]; DP_matrix[t_idx][q_idx].best_path_direction = s_idx; } } bool investigate_highscore = false; if (soft_clip_bead_end_ or (stop_anywhere_in_ref_ and q_idx == q_seq_.size())) investigate_highscore = true; if (investigate_highscore and DP_matrix[t_idx][q_idx].best_score > DP_matrix[highest_score_cell[0]][highest_score_cell[1]].best_score) { highest_score_cell[0] = t_idx; highest_score_cell[1] = q_idx; } } } // ------- end alignment matrix loop ------ if (!stop_anywhere_in_ref_ and !soft_clip_bead_end_) { highest_score_cell[0] = t_seq_.size(); highest_score_cell[1] = q_seq_.size(); } // Backtrack alignment in dynamic programming matrix, generate cigar string / MD tag backtrackAlignment(highest_score_cell[0], highest_score_cell[1], CigarData, MD_data, start_pos_update); return true; }
bool Realigner::computeSWalignment(vector<CigarOp>& CigarData, vector<MDelement>& MD_data, unsigned int& start_pos_update) { string dummy_string; // Compute boundaries for tubed alignment around previously found alignment if (!ComputeTubedAlignmentBoundaries()) return false; // Path ordering creates left aligned InDels vector<int> insertion_path_ordering(3); vector<int> deletion_path_ordering(3); insertion_path_ordering[0] = FROM_I; insertion_path_ordering[1] = FROM_MATCH; insertion_path_ordering[2] = FROM_MISM; deletion_path_ordering[0] = FROM_D; deletion_path_ordering[1] = FROM_MATCH; deletion_path_ordering[2] = FROM_MISM; // --- Compute first row and column of the matrix // First row: moving horizontally for insertions if (!soft_clip_left_) { DP_matrix[0][1].best_path_direction = FROM_I; DP_matrix[0][1].best_score = kGapOpen; DP_matrix[0][1].scores[FROM_I] = kGapOpen; DP_matrix[0][1].scores[FROM_NOWHERE] = kNotApplicable; for (unsigned int q_idx=2; q_idx<q_limit_plus_[0]; q_idx++) { DP_matrix[0][q_idx].in_directions[FROM_I] = FROM_I; DP_matrix[0][q_idx].scores[FROM_NOWHERE] = kNotApplicable; DP_matrix[0][q_idx].scores[FROM_I] = DP_matrix[0][q_idx-1].best_score + kGapExtend; DP_matrix[0][q_idx].best_path_direction = FROM_I; DP_matrix[0][q_idx].best_score = DP_matrix[0][q_idx-1].best_score + kGapExtend; } } if (!start_anywhere_in_ref_) { // First column: moving vertically for deletions DP_matrix[1][0].best_path_direction = FROM_D; DP_matrix[1][0].best_score = kGapOpen; DP_matrix[1][0].scores[FROM_D] = kGapOpen; DP_matrix[1][0].scores[FROM_NOWHERE] = kNotApplicable; unsigned int t = 2; while (t < q_limit_minus_.size() and q_limit_minus_[t] == 0) { DP_matrix[t][0].in_directions[FROM_D] = FROM_D; DP_matrix[t][0].scores[FROM_NOWHERE] = kNotApplicable; DP_matrix[t][0].scores[FROM_D] = DP_matrix[t-1][0].best_score + kGapExtend; DP_matrix[t][0].best_path_direction = FROM_D; DP_matrix[t][0].best_score = DP_matrix[t-1][0].best_score + kGapExtend; t++; } } // ------ Main alignment loop ------ vector<int> temp_scores(FROM_NOWHERE); vector<int> highest_score_cell(2, 0); for (unsigned int t_idx=1; t_idx<t_seq_.size()+1; t_idx++) { for (unsigned int q_idx=q_limit_minus_[t_idx]; q_idx<q_limit_plus_[t_idx]; q_idx++) { if (q_idx == 0) continue; // Scoring for Match; Mismatch / Insertion / Deletion; DP_matrix[t_idx][q_idx].scores.assign(FROM_NOWHERE+1, kNotApplicable); DP_matrix[t_idx][q_idx].in_directions.assign(FROM_NOWHERE, FROM_NOWHERE); if (soft_clip_left_) DP_matrix[t_idx][q_idx].scores[FROM_NOWHERE] = 0; // 1) - Match / Mismatch Score DP_matrix[t_idx][q_idx].is_match = isMatch(q_seq_[q_idx-1], t_seq_[t_idx-1]); if (DP_matrix[t_idx][q_idx].is_match) { DP_matrix[t_idx][q_idx].in_directions[FROM_MATCH] = DP_matrix[t_idx-1][q_idx-1].best_path_direction; DP_matrix[t_idx][q_idx].scores[FROM_MATCH] = DP_matrix[t_idx-1][q_idx-1].best_score + kMatchScore; } else { DP_matrix[t_idx][q_idx].in_directions[FROM_MISM] = DP_matrix[t_idx-1][q_idx-1].best_path_direction; DP_matrix[t_idx][q_idx].scores[FROM_MISM] = DP_matrix[t_idx-1][q_idx-1].best_score + kMismatchScore; } // 2) - Insertion Score temp_scores.assign(FROM_NOWHERE, kNotApplicable); temp_scores[FROM_MATCH] = DP_matrix[t_idx][q_idx-1].scores[FROM_MATCH] + kGapOpen; temp_scores[FROM_I] = DP_matrix[t_idx][q_idx-1].scores[FROM_I] + kGapExtend; temp_scores[FROM_MISM] = DP_matrix[t_idx][q_idx-1].scores[FROM_MISM] + kGapOpen; DP_matrix[t_idx][q_idx].scores[FROM_I] = kNotApplicable; DP_matrix[t_idx][q_idx].in_directions[FROM_I] = FROM_NOWHERE; for (int i=0; i<(int)insertion_path_ordering.size(); i++) { if (temp_scores[insertion_path_ordering[i]] > DP_matrix[t_idx][q_idx].scores[FROM_I]) { DP_matrix[t_idx][q_idx].scores[FROM_I] = temp_scores[insertion_path_ordering[i]]; DP_matrix[t_idx][q_idx].in_directions[FROM_I] = insertion_path_ordering[i]; } } // 3) - Deletion Score temp_scores.assign(FROM_NOWHERE, kNotApplicable); temp_scores[FROM_MATCH] = DP_matrix[t_idx-1][q_idx].scores[FROM_MATCH] + kGapOpen; temp_scores[FROM_D] = DP_matrix[t_idx-1][q_idx].scores[FROM_D] + kGapExtend; temp_scores[FROM_MISM] = DP_matrix[t_idx-1][q_idx].scores[FROM_MISM] + kGapOpen; DP_matrix[t_idx][q_idx].scores[FROM_D] = kNotApplicable; DP_matrix[t_idx][q_idx].in_directions[FROM_D] = FROM_NOWHERE; for (int i=0; i<(int)deletion_path_ordering.size(); i++) { if (temp_scores[deletion_path_ordering[i]] > DP_matrix[t_idx][q_idx].scores[FROM_D]) { DP_matrix[t_idx][q_idx].scores[FROM_D] = temp_scores[deletion_path_ordering[i]]; DP_matrix[t_idx][q_idx].in_directions[FROM_D] = deletion_path_ordering[i]; } } // Choose best move for this cell DP_matrix[t_idx][q_idx].best_score = kNotApplicable-1; DP_matrix[t_idx][q_idx].best_path_direction = FROM_NOWHERE; for (unsigned int iMove=0; iMove<DP_matrix[t_idx][q_idx].scores.size(); iMove++) { if (DP_matrix[t_idx][q_idx].scores[iMove] > DP_matrix[t_idx][q_idx].best_score) { DP_matrix[t_idx][q_idx].best_score = DP_matrix[t_idx][q_idx].scores[iMove]; DP_matrix[t_idx][q_idx].best_path_direction = iMove; } } // Clipping settings determine where we search for the best scoring cell to stop aligning bool valid_t_idx = stop_anywhere_in_ref_ or (t_idx == t_seq_.size()); bool valid_q_idx = soft_clip_right_ or (q_idx == q_seq_.size()); bool investigate_highscore = valid_t_idx and valid_q_idx; if (investigate_highscore and DP_matrix[t_idx][q_idx].best_score > DP_matrix[highest_score_cell[0]][highest_score_cell[1]].best_score) { highest_score_cell[0] = t_idx; highest_score_cell[1] = q_idx; } } } // ------- end alignment matrix loop ------ // Force full string alignment if desired, no matter what the score is. if (!stop_anywhere_in_ref_ and !soft_clip_right_) { highest_score_cell[0] = t_seq_.size(); highest_score_cell[1] = q_seq_.size(); } // Backtrack alignment in dynamic programming matrix, generate cigar string / MD tag backtrackAlignment(highest_score_cell[0], highest_score_cell[1], CigarData, MD_data, start_pos_update); return true; }