void nw_test_no_mismatches() { nw_aligner_t *nw = needleman_wunsch_new(); alignment_t *result = alignment_create(256); int match = 1; int mismatch = -2; int gap_open = -4; int gap_extend = -1; bool no_start_gap_penalty = false, no_end_gap_penalty = false; bool no_gaps_in_a = false, no_gaps_in_b = false; bool no_mismatches = true, case_sensitive = true; scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); needleman_wunsch_align("atc", "ac", &scoring, nw, result); ASSERT(strcmp(result->result_a, "atc") == 0 && strcmp(result->result_b, "a-c") == 0); needleman_wunsch_align("cgatcga", "catcctcga", &scoring, nw, result); ASSERT(strcmp(result->result_a, "cgatc---ga") == 0 && strcmp(result->result_b, "c-atcctcga") == 0); alignment_free(result); needleman_wunsch_free(nw); }
/* First sequence is aligned to the corresponding (equal) substring of the second * sequence because both gaps at start and at end are free */ void nw_test_free_gaps_at_ends() { nw_aligner_t *nw = needleman_wunsch_new(); alignment_t *result = alignment_create(256); const char* seq_a = "acg"; const char* seq_b = "tttacgttt"; int match = 1; int mismatch = -1; int gap_open = -4; int gap_extend = -1; bool no_start_gap_penalty = true, no_end_gap_penalty = true; bool no_gaps_in_a = false, no_gaps_in_b = false; bool no_mismatches = false, case_sensitive = true; scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); needleman_wunsch_align(seq_a, seq_b, &scoring, nw, result); ASSERT(strcmp(result->result_a, "---acg---") == 0 && strcmp(result->result_b, "tttacgttt") == 0); alignment_free(result); needleman_wunsch_free(nw); }
/* No gap is expected in the longer sequence */ void nw_test_no_gaps_in_longer() { nw_aligner_t *nw = needleman_wunsch_new(); alignment_t *aln = alignment_create(256); const char* seq_a = "aaaaacg"; const char* seq_b = "acgt"; int match = 1; int mismatch = -2; int gap_open = -4; int gap_extend = -1; bool no_start_gap_penalty = false, no_end_gap_penalty = false; bool no_gaps_in_a = true, no_gaps_in_b = false; bool no_mismatches = false, case_sensitive = true; scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); needleman_wunsch_align(seq_a, seq_b, &scoring, nw, aln); // ASSERT(strcmp(aln->result_a, "aaaaacg") == 0 && // strcmp(aln->result_b, "acgt---") == 0); ASSERT(strcmp(aln->result_a, "aaaaacg-") == 0 && strcmp(aln->result_b, "a----cgt") == 0); alignment_free(aln); needleman_wunsch_free(nw); }
void align(char* seq_a, char* seq_b) { // Variables to store alignment result nw_aligner_t *nw = needleman_wunsch_new(); alignment_t *result = alignment_create(256); // Decide on scoring int match = 1; int mismatch = -2; int gap_open = -4; int gap_extend = -1; // Don't penalise gaps at the start // ACGATTT // ----TTT would score +3 (when match=+1) char no_start_gap_penalty = 1; // ..or gaps at the end e.g. // ACGATTT // ACGA--- would score +4 (when match=+1) char no_end_gap_penalty = 1; char no_gaps_in_a = 0, no_gaps_in_b = 0; char no_mismatches = 0; // Compare character case-sensitively (usually set to 0 for DNA etc) char case_sensitive = 0; scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); // Add some special cases // x -> y means x in seq1 changing to y in seq2 scoring_add_mutation(&scoring, 'a', 'c', -2); // a -> c give substitution score -2 scoring_add_mutation(&scoring, 'c', 'a', -1); // c -> a give substitution score -1 // We could also prohibit the aligning of characters not given as special cases // scoring.use_match_mismatch = 0; needleman_wunsch_align(seq_a, seq_b, &scoring, nw, result); printf("seqA: %s\n", result->result_a); printf("seqB: %s\n", result->result_b); printf("alignment score: %i\n", result->score); // Free memory for storing alignment results needleman_wunsch_free(nw); alignment_free(result); }
void nw_test_no_mismatches_rand() { nw_aligner_t *nw = needleman_wunsch_new(); alignment_t *aln = alignment_create(256); int match = 1; int mismatch = -2; int gap_open = -4; int gap_extend = -1; bool no_start_gap_penalty = false, no_end_gap_penalty = false; bool no_gaps_in_a = false, no_gaps_in_b = false; bool no_mismatches = true, case_sensitive = true; scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); char seqa[100], seqb[100]; size_t i; // Run 50 random alignments for(i = 0; i < 50; i++) { make_rand_seq(seqa, sizeof(seqa)); make_rand_seq(seqb, sizeof(seqb)); needleman_wunsch_align(seqa, seqb, &scoring, nw, aln); // Check no mismatches char *a = aln->result_a, *b = aln->result_b; while(1) { ASSERT(*a == '-' || *b == '-' || *a == *b); // printf("Seq: '%s' '%s'\n", aln->result_a, aln->result_b); // exit(EXIT_FAILURE); if(!*a && !*b) break; a++; b++; } } alignment_free(aln); needleman_wunsch_free(nw); }
static PyObject * nw_align_wrapper(PyObject *self, PyObject *args, PyObject *kw) { const char *seq1, *seq2; // Decide on scoring int match = 1; int mismatch = -2; int gap_open = -4; int gap_extend = -1; // Don't penalise gaps at the start // ACGATTT // ----TTT would score +3 (when match=+1) int no_start_gap_penalty = 0; // ..or gaps at the end e.g. // ACGATTT // ACGA--- would score +4 (when match=+1) int no_end_gap_penalty = 0; int no_gaps_in_a = 0, no_gaps_in_b = 0; int no_mismatches = 0; // Compare character case-sensitively (usually set to 0 for DNA etc) int case_sensitive = 0; PyObject * matrix = NULL; static char *kwlist[] = {"seq1","seq2", "matrix", "match", "mismatch", "gap_open","gap_extend", "no_start_gap_penalty", "no_end_gap_penalty", "no_gaps_in_a", "no_gaps_in_b", "no_mismatches", "case_sensitive", NULL}; PyObject *res = NULL; if(!PyArg_ParseTupleAndKeywords(args, kw, "ss|Oiiiiiiiiii", kwlist, &seq1, &seq2, &matrix, &match, &mismatch, &gap_open, &gap_extend, &no_start_gap_penalty, &no_end_gap_penalty, &no_gaps_in_a, &no_gaps_in_b, &no_mismatches, &case_sensitive)) return NULL; alignment_t *result = alignment_create(256); // Variables to store alignment result nw_aligner_t *nw = needleman_wunsch_new(); scoring_t scoring; scoring_init(&scoring, match, mismatch, gap_open, gap_extend, no_start_gap_penalty, no_end_gap_penalty, no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive); // Add some special cases // x -> y means x in seq1 changing to y in seq2 if(matrix != NULL) { PyObject * mapping = PyMapping_Items(matrix); if(mapping == NULL) goto error; int n = PySequence_Size(mapping); PyObject *item; int value; PyObject *key; char * char_a; char * char_b; int i; for(i = 0; i < n; i++) { item = PySequence_GetItem(mapping, i); if(item == NULL || !PyTuple_Check(item)) { Py_XDECREF(item); Py_DECREF(mapping); goto error; } if(!PyArg_ParseTuple(item, "Oi", &key, &value)) { PyErr_SetString(PyExc_RuntimeError, "Values of matrix dict should be integers"); Py_XDECREF(item); Py_DECREF(mapping); goto error; } if(!PyTuple_Check(key)) { PyErr_SetString(PyExc_RuntimeError, "Keys of matrix dict should be tuples"); Py_XDECREF(item); Py_DECREF(mapping); goto error; } if(!PyArg_ParseTuple(key, "ss", &char_a, &char_b)) { PyErr_SetString(PyExc_RuntimeError, "Keys of matrix dict should be tuples with 2 characters as elements."); Py_XDECREF(item); Py_DECREF(mapping); goto error; } if(strlen(char_a) != 1 || strlen(char_b) != 1) { PyErr_SetString(PyExc_RuntimeError, "Character length should be 1"); Py_XDECREF(item); Py_DECREF(mapping); goto error; } scoring_add_mutation(&scoring, case_sensitive ? *char_a : tolower(*char_a), case_sensitive ? *char_a : tolower(*char_b), value); // a -> c give substitution score -2 Py_DECREF(item); } } // We could also prohibit the aligning of characters not given as special cases // scoring.use_match_mismatch = 0; needleman_wunsch_align(seq1, seq2, &scoring, nw, result); res = Py_BuildValue("ssi", result->result_a, result->result_b, result->score); error: // Free memory for storing alignment results needleman_wunsch_free(nw); alignment_free(result); return res; }