Esempio n. 1
0
bool Realigner::computeSWalignment(vector<CigarOp>& CigarData, vector<MDelement>& MD_data,
                       unsigned int& start_pos_update) {

  string dummy_string;

  //AlignmentCell *best_cell;
  //best_cell = &DP_matrix[0][0];
  
  // Compute boundaries for tubed alignment around previously found alignment
  if (!ComputeTubedAlignmentBoundaries())
    return false;

  // Path ordering creates left aligned InDels
  vector<int> insertion_path_ordering(3);
  vector<int> deletion_path_ordering(3);
  insertion_path_ordering[0] = FROM_I;
  insertion_path_ordering[1] = FROM_MATCH;
  insertion_path_ordering[2] = FROM_MISM;
  deletion_path_ordering[0] = FROM_D;
  deletion_path_ordering[1] = FROM_MATCH;
  deletion_path_ordering[2] = FROM_MISM;

  // --- Compute first row  and column of the matrix
  // First row: moving horizontally for insertions
  if (!soft_clip_key_end_) {
    DP_matrix[0][1].best_path_direction = FROM_I;
    DP_matrix[0][1].best_score = kGapOpen;
    DP_matrix[0][1].scores[FROM_I] = kGapOpen;
	DP_matrix[0][1].scores[FROM_NOWHERE] = kNotApplicable;
    for (unsigned int q_idx=2; q_idx<q_limit_plus_[0]; q_idx++) {
      DP_matrix[0][q_idx].in_directions[FROM_I] = FROM_I;
      DP_matrix[0][q_idx].scores[FROM_NOWHERE] = kNotApplicable;
      DP_matrix[0][q_idx].scores[FROM_I] = DP_matrix[0][q_idx-1].best_score + kGapExtend;
      DP_matrix[0][q_idx].best_path_direction = FROM_I;
      DP_matrix[0][q_idx].best_score = DP_matrix[0][q_idx-1].best_score + kGapExtend;
    }
  }

  if (!start_anywhere_in_ref_) {
    // First column: moving vertically for deletions
    DP_matrix[1][0].best_path_direction = FROM_D;
    DP_matrix[1][0].best_score = kGapOpen;
    DP_matrix[1][0].scores[FROM_D] = kGapOpen;
	DP_matrix[1][0].scores[FROM_NOWHERE] = kNotApplicable;
    unsigned int t = 2;
    while (t < q_limit_minus_.size() and q_limit_minus_[t] == 0) {
      DP_matrix[t][0].in_directions[FROM_D] = FROM_D;
  	  DP_matrix[t][0].scores[FROM_NOWHERE] = kNotApplicable;
      DP_matrix[t][0].scores[FROM_D] = DP_matrix[t-1][0].best_score + kGapExtend;
      DP_matrix[t][0].best_path_direction = FROM_D;
      DP_matrix[t][0].best_score = DP_matrix[t-1][0].best_score + kGapExtend;
      t++;
    }
  }

  // ------ Main alignment loop ------
  vector<int>   temp_scores(FROM_NOWHERE);
  vector<int>   highest_score_cell(2, 0);

  for (unsigned int t_idx=1; t_idx<t_seq_.size()+1; t_idx++) {

    for (unsigned int q_idx=q_limit_minus_[t_idx]; q_idx<q_limit_plus_[t_idx]; q_idx++) {

      if (q_idx == 0)
        continue;

      // Scoring for Match; Mismatch / Insertion / Deletion;
      DP_matrix[t_idx][q_idx].scores.assign(FROM_NOWHERE+1, kNotApplicable);
      DP_matrix[t_idx][q_idx].in_directions.assign(FROM_NOWHERE, FROM_NOWHERE);
      if (soft_clip_key_end_)
        DP_matrix[t_idx][q_idx].scores[FROM_NOWHERE] = 0;

      // 1) - Match / Mismatch Score
      DP_matrix[t_idx][q_idx].is_match = isMatch(q_seq_[q_idx-1], t_seq_[t_idx-1]);
      if (DP_matrix[t_idx][q_idx].is_match) {
        DP_matrix[t_idx][q_idx].in_directions[FROM_MATCH] = DP_matrix[t_idx-1][q_idx-1].best_path_direction;
        DP_matrix[t_idx][q_idx].scores[FROM_MATCH] = DP_matrix[t_idx-1][q_idx-1].best_score + kMatchScore;
      } else {
        DP_matrix[t_idx][q_idx].in_directions[FROM_MISM] = DP_matrix[t_idx-1][q_idx-1].best_path_direction;
        DP_matrix[t_idx][q_idx].scores[FROM_MISM] = DP_matrix[t_idx-1][q_idx-1].best_score + kMismatchScore;
      }

      // 2) - Insertion Score
      temp_scores.assign(FROM_NOWHERE, kNotApplicable);
      temp_scores[FROM_MATCH] = DP_matrix[t_idx][q_idx-1].scores[FROM_MATCH] + kGapOpen;
      temp_scores[FROM_I] = DP_matrix[t_idx][q_idx-1].scores[FROM_I] + kGapExtend;
      temp_scores[FROM_MISM] = DP_matrix[t_idx][q_idx-1].scores[FROM_MISM] + kGapOpen;
      DP_matrix[t_idx][q_idx].scores[FROM_I] = kNotApplicable;
      DP_matrix[t_idx][q_idx].in_directions[FROM_I] = FROM_NOWHERE;
      for (int i=0; i<(int)insertion_path_ordering.size(); i++) {
        if (temp_scores[insertion_path_ordering[i]] > DP_matrix[t_idx][q_idx].scores[FROM_I]) {
          DP_matrix[t_idx][q_idx].scores[FROM_I] = temp_scores[insertion_path_ordering[i]];
          DP_matrix[t_idx][q_idx].in_directions[FROM_I] = insertion_path_ordering[i];
        }
      }

      // 3) - Deletion Score
      temp_scores.assign(FROM_NOWHERE, kNotApplicable);
      temp_scores[FROM_MATCH] = DP_matrix[t_idx-1][q_idx].scores[FROM_MATCH] + kGapOpen;
      temp_scores[FROM_D] = DP_matrix[t_idx-1][q_idx].scores[FROM_D] + kGapExtend;
      temp_scores[FROM_MISM] = DP_matrix[t_idx-1][q_idx].scores[FROM_MISM] + kGapOpen;
      DP_matrix[t_idx][q_idx].scores[FROM_D] = kNotApplicable;
      DP_matrix[t_idx][q_idx].in_directions[FROM_D] = FROM_NOWHERE;
      for (int i=0; i<(int)deletion_path_ordering.size(); i++) {
        if (temp_scores[deletion_path_ordering[i]] > DP_matrix[t_idx][q_idx].scores[FROM_D]) {
          DP_matrix[t_idx][q_idx].scores[FROM_D] = temp_scores[deletion_path_ordering[i]];
          DP_matrix[t_idx][q_idx].in_directions[FROM_D] = deletion_path_ordering[i];
        }
      }

      // Choose best move for this cell
      DP_matrix[t_idx][q_idx].best_score = kNotApplicable-1;
      DP_matrix[t_idx][q_idx].best_path_direction = FROM_NOWHERE;
      unsigned int s_idx;

      for (unsigned int i=0; i<DP_matrix[t_idx][q_idx].scores.size(); i++) {
        // Reverse lookup for reverse strand so that InDels are placed at the beginning of HPs
        if (isForwardStrandRead_)
          s_idx = i;
        else
          s_idx = DP_matrix[t_idx][q_idx].scores.size()-1-i;
        if (DP_matrix[t_idx][q_idx].scores[s_idx] > DP_matrix[t_idx][q_idx].best_score) {
          DP_matrix[t_idx][q_idx].best_score = DP_matrix[t_idx][q_idx].scores[s_idx];
          DP_matrix[t_idx][q_idx].best_path_direction = s_idx;
        }
      }

      bool investigate_highscore = false;
      if (soft_clip_bead_end_ or (stop_anywhere_in_ref_ and q_idx == q_seq_.size()))
        investigate_highscore = true;
      if (investigate_highscore and DP_matrix[t_idx][q_idx].best_score
               > DP_matrix[highest_score_cell[0]][highest_score_cell[1]].best_score) {
        highest_score_cell[0] = t_idx;
        highest_score_cell[1] = q_idx;
      }

    }
  }
  // ------- end alignment matrix loop ------
  if (!stop_anywhere_in_ref_ and !soft_clip_bead_end_) {
    highest_score_cell[0] = t_seq_.size();
    highest_score_cell[1] = q_seq_.size();
  }

  // Backtrack alignment in dynamic programming matrix, generate cigar string / MD tag
  backtrackAlignment(highest_score_cell[0], highest_score_cell[1], CigarData, MD_data, start_pos_update);
  return true;
}
Esempio n. 2
0
bool Realigner::computeSWalignment(vector<CigarOp>& CigarData, vector<MDelement>& MD_data,
                       unsigned int& start_pos_update) {

  string dummy_string;
  
  // Compute boundaries for tubed alignment around previously found alignment
  if (!ComputeTubedAlignmentBoundaries())
    return false;

  // Path ordering creates left aligned InDels
  vector<int> insertion_path_ordering(3);
  vector<int> deletion_path_ordering(3);
  insertion_path_ordering[0] = FROM_I;
  insertion_path_ordering[1] = FROM_MATCH;
  insertion_path_ordering[2] = FROM_MISM;
  deletion_path_ordering[0] = FROM_D;
  deletion_path_ordering[1] = FROM_MATCH;
  deletion_path_ordering[2] = FROM_MISM;

  // --- Compute first row  and column of the matrix
  // First row: moving horizontally for insertions
  if (!soft_clip_left_) {
    DP_matrix[0][1].best_path_direction = FROM_I;
    DP_matrix[0][1].best_score = kGapOpen;
    DP_matrix[0][1].scores[FROM_I] = kGapOpen;
    DP_matrix[0][1].scores[FROM_NOWHERE] = kNotApplicable;
    for (unsigned int q_idx=2; q_idx<q_limit_plus_[0]; q_idx++) {
      DP_matrix[0][q_idx].in_directions[FROM_I] = FROM_I;
      DP_matrix[0][q_idx].scores[FROM_NOWHERE] = kNotApplicable;
      DP_matrix[0][q_idx].scores[FROM_I] = DP_matrix[0][q_idx-1].best_score + kGapExtend;
      DP_matrix[0][q_idx].best_path_direction = FROM_I;
      DP_matrix[0][q_idx].best_score = DP_matrix[0][q_idx-1].best_score + kGapExtend;
    }
  }

  if (!start_anywhere_in_ref_) {
    // First column: moving vertically for deletions
    DP_matrix[1][0].best_path_direction = FROM_D;
    DP_matrix[1][0].best_score = kGapOpen;
    DP_matrix[1][0].scores[FROM_D] = kGapOpen;
    DP_matrix[1][0].scores[FROM_NOWHERE] = kNotApplicable;
    unsigned int t = 2;
    while (t < q_limit_minus_.size() and q_limit_minus_[t] == 0) {
      DP_matrix[t][0].in_directions[FROM_D] = FROM_D;
      DP_matrix[t][0].scores[FROM_NOWHERE] = kNotApplicable;
      DP_matrix[t][0].scores[FROM_D] = DP_matrix[t-1][0].best_score + kGapExtend;
      DP_matrix[t][0].best_path_direction = FROM_D;
      DP_matrix[t][0].best_score = DP_matrix[t-1][0].best_score + kGapExtend;
      t++;
    }
  }

  // ------ Main alignment loop ------
  vector<int>   temp_scores(FROM_NOWHERE);
  vector<int>   highest_score_cell(2, 0);

  for (unsigned int t_idx=1; t_idx<t_seq_.size()+1; t_idx++) {

    for (unsigned int q_idx=q_limit_minus_[t_idx]; q_idx<q_limit_plus_[t_idx]; q_idx++) {

      if (q_idx == 0)
        continue;

      // Scoring for Match; Mismatch / Insertion / Deletion;
      DP_matrix[t_idx][q_idx].scores.assign(FROM_NOWHERE+1, kNotApplicable);
      DP_matrix[t_idx][q_idx].in_directions.assign(FROM_NOWHERE, FROM_NOWHERE);
      if (soft_clip_left_)
        DP_matrix[t_idx][q_idx].scores[FROM_NOWHERE] = 0;

      // 1) - Match / Mismatch Score
      DP_matrix[t_idx][q_idx].is_match = isMatch(q_seq_[q_idx-1], t_seq_[t_idx-1]);
      if (DP_matrix[t_idx][q_idx].is_match) {
        DP_matrix[t_idx][q_idx].in_directions[FROM_MATCH] = DP_matrix[t_idx-1][q_idx-1].best_path_direction;
        DP_matrix[t_idx][q_idx].scores[FROM_MATCH] = DP_matrix[t_idx-1][q_idx-1].best_score + kMatchScore;
      } else {
        DP_matrix[t_idx][q_idx].in_directions[FROM_MISM] = DP_matrix[t_idx-1][q_idx-1].best_path_direction;
        DP_matrix[t_idx][q_idx].scores[FROM_MISM] = DP_matrix[t_idx-1][q_idx-1].best_score + kMismatchScore;
      }

      // 2) - Insertion Score
      temp_scores.assign(FROM_NOWHERE, kNotApplicable);
      temp_scores[FROM_MATCH] = DP_matrix[t_idx][q_idx-1].scores[FROM_MATCH] + kGapOpen;
      temp_scores[FROM_I] = DP_matrix[t_idx][q_idx-1].scores[FROM_I] + kGapExtend;
      temp_scores[FROM_MISM] = DP_matrix[t_idx][q_idx-1].scores[FROM_MISM] + kGapOpen;
      DP_matrix[t_idx][q_idx].scores[FROM_I] = kNotApplicable;
      DP_matrix[t_idx][q_idx].in_directions[FROM_I] = FROM_NOWHERE;
      for (int i=0; i<(int)insertion_path_ordering.size(); i++) {
        if (temp_scores[insertion_path_ordering[i]] > DP_matrix[t_idx][q_idx].scores[FROM_I]) {
          DP_matrix[t_idx][q_idx].scores[FROM_I] = temp_scores[insertion_path_ordering[i]];
          DP_matrix[t_idx][q_idx].in_directions[FROM_I] = insertion_path_ordering[i];
        }
      }

      // 3) - Deletion Score
      temp_scores.assign(FROM_NOWHERE, kNotApplicable);
      temp_scores[FROM_MATCH] = DP_matrix[t_idx-1][q_idx].scores[FROM_MATCH] + kGapOpen;
      temp_scores[FROM_D] = DP_matrix[t_idx-1][q_idx].scores[FROM_D] + kGapExtend;
      temp_scores[FROM_MISM] = DP_matrix[t_idx-1][q_idx].scores[FROM_MISM] + kGapOpen;
      DP_matrix[t_idx][q_idx].scores[FROM_D] = kNotApplicable;
      DP_matrix[t_idx][q_idx].in_directions[FROM_D] = FROM_NOWHERE;
      for (int i=0; i<(int)deletion_path_ordering.size(); i++) {
        if (temp_scores[deletion_path_ordering[i]] > DP_matrix[t_idx][q_idx].scores[FROM_D]) {
          DP_matrix[t_idx][q_idx].scores[FROM_D] = temp_scores[deletion_path_ordering[i]];
          DP_matrix[t_idx][q_idx].in_directions[FROM_D] = deletion_path_ordering[i];
        }
      }

      // Choose best move for this cell
      DP_matrix[t_idx][q_idx].best_score = kNotApplicable-1;
      DP_matrix[t_idx][q_idx].best_path_direction = FROM_NOWHERE;
      for (unsigned int iMove=0; iMove<DP_matrix[t_idx][q_idx].scores.size(); iMove++) {
        if (DP_matrix[t_idx][q_idx].scores[iMove] > DP_matrix[t_idx][q_idx].best_score) {
          DP_matrix[t_idx][q_idx].best_score = DP_matrix[t_idx][q_idx].scores[iMove];
          DP_matrix[t_idx][q_idx].best_path_direction = iMove;
        }
      }

      // Clipping settings determine where we search for the best scoring cell to stop aligning
      bool valid_t_idx = stop_anywhere_in_ref_ or (t_idx == t_seq_.size());
      bool valid_q_idx = soft_clip_right_ or (q_idx == q_seq_.size());
      bool investigate_highscore = valid_t_idx and valid_q_idx;

      if (investigate_highscore and DP_matrix[t_idx][q_idx].best_score
               > DP_matrix[highest_score_cell[0]][highest_score_cell[1]].best_score) {
        highest_score_cell[0] = t_idx;
        highest_score_cell[1] = q_idx;
      }

    }
  }
  // ------- end alignment matrix loop ------

  // Force full string alignment if desired, no matter what the score is.
  if (!stop_anywhere_in_ref_ and !soft_clip_right_) {
    highest_score_cell[0] = t_seq_.size();
    highest_score_cell[1] = q_seq_.size();
  }

  // Backtrack alignment in dynamic programming matrix, generate cigar string / MD tag
  backtrackAlignment(highest_score_cell[0], highest_score_cell[1], CigarData, MD_data, start_pos_update);
  return true;
}