void align(char* seq_a, char* seq_b) { // Variables to store alignment result sw_aligner_t *sw = smith_waterman_new(); alignment_t *result = alignment_create(256); // Decide on scoring int match = 1; int mismatch = -2; int gap_open = -4; int gap_extend = -1; // Don't penalise gaps at the start // ACGATTT // ----TTT would score +3 (when match=+1) char no_start_gap_penalty = 1; // ..or gaps at the end e.g. // ACGATTT // ACGA--- would score +4 (when match=+1) char no_end_gap_penalty = 1; char no_gaps_in_a = 0, no_gaps_in_b = 0; char no_mismatches = 0; // Compare character case-sensitively (usually set to 0 for DNA etc) char case_sensitive = 0; scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); // Add some special cases // x -> y means x in seq1 changing to y in seq2 scoring_add_mutation(&scoring, 'a', 'c', -2); // a -> c give substitution score -2 scoring_add_mutation(&scoring, 'c', 'a', -1); // c -> a give substitution score -1 // We could also prohibit the aligning of characters not given as special cases // scoring.use_match_mismatch = 0; smith_waterman_align(seq_a, seq_b, &scoring, sw); while(smith_waterman_fetch(sw, result)) { printf("seqA: %s [start:%zu]\n", result->result_a, result->pos_a); printf("seqB: %s [start:%zu]\n", result->result_b, result->pos_b); printf("alignment score: %i\n\n", result->score); } // Free memory for storing alignment results smith_waterman_free(sw); alignment_free(result); }
void sw_test_no_gaps_smith_waterman() { sw_aligner_t *sw = smith_waterman_new(); alignment_t *result = alignment_create(256); const char* seq_a = "gacag"; const char* seq_b = "tgaagt"; int match = 1; int mismatch = -2; int gap_open = -4; int gap_extend = -1; bool no_start_gap_penalty = false, no_end_gap_penalty = false; bool no_gaps_in_a = true, no_gaps_in_b = true; bool no_mismatches = false, case_sensitive = true; scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); smith_waterman_align(seq_a, seq_b, &scoring, sw); smith_waterman_fetch(sw, result); ASSERT(strcmp(result->result_a, "ga") == 0 && strcmp(result->result_b, "ga") == 0); smith_waterman_fetch(sw, result); ASSERT(strcmp(result->result_a, "ag") == 0 && strcmp(result->result_b, "ag") == 0); alignment_free(result); smith_waterman_free(sw); }
int main(int argc, char **argv) { if(argc != 2) print_usage(argv); char *seq = argv[1]; size_t seqlen = strlen(seq); // Go int match = 1, mismatch = -1, gap_open = -4, gap_extend = -1; bool no_start_gap_penalty = false, no_end_gap_penalty = false; bool no_gaps_in_a = true, no_gaps_in_b = true; bool no_mismatches = true, case_sensitive = true; scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); // Alignment results stored here sw_aligner_t *sw = smith_waterman_new(); alignment_t *aln = alignment_create(seqlen+1); smith_waterman_align(seq, seq, &scoring, sw); // Loop over results while(smith_waterman_fetch(sw, aln)) { if(aln->pos_a < aln->pos_b) { fputs(aln->result_a, stdout); printf(" [%zu,%zu]\n", aln->pos_a, aln->pos_b); } } smith_waterman_free(sw); alignment_free(aln); return EXIT_SUCCESS; }
// Align two sequences against each other to find local alignments between them void align(const char *seq_a, const char *seq_b, const char *seq_a_name, const char *seq_b_name) { if((seq_a_name != NULL || seq_b_name != NULL) && wait_on_keystroke) { fprintf(stderr, "Error: Interactive input takes seq only " "(no FASTA/FASTQ) '%s:%s'\n", seq_a_name, seq_b_name); fflush(stderr); exit(EXIT_FAILURE); } // Check both arguments have length > 0 if(seq_a[0] == '\0' || seq_b[0] == '\0') { fprintf(stderr, "Error: Sequences must have length > 0\n"); fflush(stderr); if(cmd->print_fasta && seq_a_name != NULL && seq_b_name != NULL) { fprintf(stderr, "%s\n%s\n", seq_a_name, seq_b_name); } fflush(stderr); return; } smith_waterman_align(seq_a, seq_b, &scoring, sw); aligner_t *aligner = smith_waterman_get_aligner(sw); size_t len_a = aligner->score_width-1, len_b = aligner->score_height-1; printf("== Alignment %zu lengths (%lu, %lu):\n", alignment_index, len_a, len_b); if(cmd->print_matrices) { alignment_print_matrices(aligner); } // seqA if(cmd->print_fasta && seq_a_name != NULL) { fputs(seq_a_name, stdout); putc('\n', stdout); } if(cmd->print_seq) { fputs(seq_a, stdout); putc('\n', stdout); } // seqB if(cmd->print_fasta && seq_b_name != NULL) { fputs(seq_b_name, stdout); putc('\n', stdout); } if(cmd->print_seq) { fputs(seq_b, stdout); putc('\n', stdout); } putc('\n', stdout); if(!cmd->min_score_set) { // If min_score hasn't been set, set a limit based on the lengths of seqs // or zero if we're running interactively cmd->min_score = wait_on_keystroke ? 0 : scoring.match * MAX2(0.2 * MIN2(len_a, len_b), 2); #ifdef SEQ_ALIGN_VERBOSE printf("min_score: %i\n", cmd->min_score); #endif } fflush(stdout); size_t hit_index = 0; // For print context size_t context_left = 0, context_right = 0; size_t left_spaces_a = 0, left_spaces_b = 0; size_t right_spaces_a = 0, right_spaces_b = 0; while(get_next_hit() && smith_waterman_fetch(sw, result) && result->score >= cmd->min_score && (!cmd->max_hits_per_alignment_set || hit_index < cmd->max_hits_per_alignment)) { printf("hit %zu.%zu score: %i\n", alignment_index, hit_index++, result->score); if(cmd->print_context) { // Calculate number of characters of context to print either side context_left = MAX2(result->pos_a, result->pos_b); context_left = MIN2(context_left, cmd->print_context); size_t rem_a = len_a - (result->pos_a + result->len_a); size_t rem_b = len_b - (result->pos_b + result->len_b); context_right = MAX2(rem_a, rem_b); context_right = MIN2(context_right, cmd->print_context); left_spaces_a = (context_left > result->pos_a) ? context_left - result->pos_a : 0; left_spaces_b = (context_left > result->pos_b) ? context_left - result->pos_b : 0; right_spaces_a = (context_right > rem_a) ? context_right - rem_a : 0; right_spaces_b = (context_right > rem_b) ? context_right - rem_b : 0; } #ifdef SEQ_ALIGN_VERBOSE printf("context left = %lu; right = %lu spacing: [%lu,%lu] [%lu,%lu]\n", context_left, context_right, left_spaces_a, right_spaces_a, left_spaces_b, right_spaces_b); #endif // seq a print_alignment_part(result->result_a, result->result_b, result->pos_a, result->len_a, seq_a, left_spaces_a, right_spaces_a, context_left-left_spaces_a, context_right-right_spaces_a); if(cmd->print_pretty) { fputs(" ", stdout); size_t max_left_spaces = MAX2(left_spaces_a, left_spaces_b); size_t max_right_spaces = MAX2(right_spaces_a, right_spaces_b); size_t spacer; // Print spaces for lefthand spacing for(spacer = 0; spacer < max_left_spaces; spacer++) { putc(' ', stdout); } // Print dots for lefthand context sequence for(spacer = 0; spacer < context_left-max_left_spaces; spacer++) { putc('.', stdout); } alignment_print_spacer(result->result_a, result->result_b, &scoring); // Print dots for righthand context sequence for(spacer = 0; spacer < context_right-max_right_spaces; spacer++) { putc('.', stdout); } // Print spaces for righthand spacing for(spacer = 0; spacer < max_right_spaces; spacer++) { putc(' ', stdout); } putc('\n', stdout); } // seq b print_alignment_part(result->result_b, result->result_a, result->pos_b, result->len_b, seq_b, left_spaces_b, right_spaces_b, context_left-left_spaces_b, context_right-right_spaces_b); printf("\n"); // Flush output here fflush(stdout); } fputs("==\n", stdout); fflush(stdout); // Increment sequence alignment counter alignment_index++; }