// Backtrack through scoring matrices void alignment_reverse_move(enum Matrix *curr_matrix, score_t *curr_score, size_t *score_x, size_t *score_y, size_t *arr_index, const aligner_t *aligner) { size_t seq_x = (*score_x)-1, seq_y = (*score_y)-1; size_t len_i = aligner->score_width-1, len_j = aligner->score_height-1; bool is_match; int match_penalty; const scoring_t *scoring = aligner->scoring; scoring_lookup(scoring, aligner->seq_a[seq_x], aligner->seq_b[seq_y], &match_penalty, &is_match); int gap_a_open_penalty, gap_b_open_penalty; int gap_a_extend_penalty, gap_b_extend_penalty; gap_a_open_penalty = gap_b_open_penalty = scoring->gap_extend + scoring->gap_open; gap_a_extend_penalty = gap_b_extend_penalty = scoring->gap_extend; // Free gaps at the ends if(scoring->no_end_gap_penalty) { if(*score_x == len_i) gap_a_open_penalty = gap_a_extend_penalty = 0; if(*score_y == len_j) gap_b_open_penalty = gap_b_extend_penalty = 0; } if(scoring->no_start_gap_penalty) { if(*score_x == 0) gap_a_open_penalty = gap_a_extend_penalty = 0; if(*score_y == 0) gap_b_open_penalty = gap_b_extend_penalty = 0; } long prev_match_penalty, prev_gap_a_penalty, prev_gap_b_penalty; switch(*curr_matrix) { case MATCH: prev_match_penalty = match_penalty; prev_gap_a_penalty = match_penalty; prev_gap_b_penalty = match_penalty; (*score_x)--; (*score_y)--; (*arr_index) -= aligner->score_width + 1; break; case GAP_A: prev_match_penalty = gap_a_open_penalty; prev_gap_a_penalty = gap_a_extend_penalty; prev_gap_b_penalty = gap_a_open_penalty; (*score_y)--; (*arr_index) -= aligner->score_width; break; case GAP_B: prev_match_penalty = gap_b_open_penalty; prev_gap_a_penalty = gap_b_open_penalty; prev_gap_b_penalty = gap_b_extend_penalty; (*score_x)--; (*arr_index)--; break; default: fprintf(stderr, "Program error: invalid matrix in get_reverse_move()\n"); fprintf(stderr, "Please submit a bug report to: [email protected]\n"); exit(EXIT_FAILURE); } // *arr_index = ARR_2D_INDEX(aligner->score_width, *score_x, *score_y); if((!scoring->no_gaps_in_a || *score_x == 0 || *score_x == len_i) && (long)aligner->gap_a_scores[*arr_index] + prev_gap_a_penalty == *curr_score) { *curr_matrix = GAP_A; *curr_score = aligner->gap_a_scores[*arr_index]; } else if((!scoring->no_gaps_in_b || *score_y == 0 || *score_y == len_j) && (long)aligner->gap_b_scores[*arr_index] + prev_gap_b_penalty == *curr_score) { *curr_matrix = GAP_B; *curr_score = aligner->gap_b_scores[*arr_index]; } else if((long)aligner->match_scores[*arr_index] + prev_match_penalty == *curr_score) { *curr_matrix = MATCH; *curr_score = aligner->match_scores[*arr_index]; } else { alignment_print_matrices(aligner); fprintf(stderr, "[%s:%zu,%zu]: %i [ismatch: %i] '%c' '%c'\n", MATRIX_NAME(*curr_matrix), *score_x, *score_y, *curr_score, is_match, aligner->seq_a[seq_x], aligner->seq_b[seq_y]); fprintf(stderr, " Penalties match: %li gap_open: %li gap_extend: %li\n", prev_match_penalty, prev_gap_a_penalty, prev_gap_b_penalty); fprintf(stderr, " Expected MATCH: %i GAP_A: %i GAP_B: %i\n", aligner->match_scores[*arr_index], aligner->gap_a_scores[*arr_index], aligner->gap_b_scores[*arr_index]); fprintf(stderr, "Program error: traceback fail (get_reverse_move)\n" "This may be due to an integer overflow if your sequences are long or scores\n" "are large. If this is the case using smaller scores or shorter sequences may\n" "work around this problem. \n" " If you think this is a bug, please report it to: [email protected]\n"); exit(EXIT_FAILURE); } }
void needleman_wunsch_align2(const char *a, const char *b, size_t len_a, size_t len_b, const scoring_t *scoring, nw_aligner_t *nw, alignment_t *result) { aligner_align(nw, a, b, len_a, len_b, scoring, 0); // work backwards re-tracing optimal alignment, then shift sequences into place // note: longest_alignment = strlen(seq_a) + strlen(seq_b) size_t longest_alignment = nw->score_width-1 + nw->score_height-1; alignment_ensure_capacity(result, longest_alignment); // Position of next alignment character in buffer (working backwards) size_t next_char = longest_alignment-1; size_t arr_size = nw->score_width * nw->score_height; // Get max score (and therefore current matrix) enum Matrix curr_matrix = MATCH; score_t curr_score = nw->match_scores[arr_size-1]; if(nw->gap_b_scores[arr_size-1] >= curr_score) { curr_matrix = GAP_B; curr_score = nw->gap_b_scores[arr_size-1]; } if(nw->gap_a_scores[arr_size-1] >= curr_score) { curr_matrix = GAP_A; curr_score = nw->gap_a_scores[arr_size-1]; } #ifdef DEBUG alignment_print_matrices(nw); #endif result->score = curr_score; char *alignment_a = result->result_a, *alignment_b = result->result_b; // coords in score matrices size_t score_x = nw->score_width-1, score_y = nw->score_height-1; size_t arr_index = arr_size - 1; for(; score_x > 0 && score_y > 0; next_char--) { #ifdef DEBUG printf("matrix: %s (%lu,%lu) score: %i\n", MATRIX_NAME(curr_matrix), score_x-1, score_y-1, curr_score); #endif switch(curr_matrix) { case MATCH: alignment_a[next_char] = nw->seq_a[score_x-1]; alignment_b[next_char] = nw->seq_b[score_y-1]; break; case GAP_A: alignment_a[next_char] = '-'; alignment_b[next_char] = nw->seq_b[score_y-1]; break; case GAP_B: alignment_a[next_char] = nw->seq_a[score_x-1]; alignment_b[next_char] = '-'; break; default: fprintf(stderr, "Program error: invalid matrix number\n"); fprintf(stderr, "Please submit a bug report to: [email protected]\n"); exit(EXIT_FAILURE); } if(score_x > 0 && score_y > 0) { alignment_reverse_move(&curr_matrix, &curr_score, &score_x, &score_y, &arr_index, nw); } } // Gap in A while(score_y > 0) { alignment_a[next_char] = '-'; alignment_b[next_char] = nw->seq_b[score_y-1]; next_char--; score_y--; } // Gap in B while(score_x > 0) { alignment_a[next_char] = nw->seq_a[score_x-1]; alignment_b[next_char] = '-'; next_char--; score_x--; } // Shift alignment strings back into 0th position in char arrays int first_char = next_char+1; int alignment_len = longest_alignment - first_char; // Use memmove memmove(alignment_a, alignment_a+first_char, alignment_len); memmove(alignment_b, alignment_b+first_char, alignment_len); alignment_a[alignment_len] = '\0'; alignment_b[alignment_len] = '\0'; result->length = alignment_len; }