Exemple #1
0
void nw_test_no_mismatches()
{
  nw_aligner_t *nw = needleman_wunsch_new();
  alignment_t *result = alignment_create(256);

  int match = 1;
  int mismatch = -2;
  int gap_open = -4;
  int gap_extend = -1;

  bool no_start_gap_penalty = false, no_end_gap_penalty = false;
  bool no_gaps_in_a = false, no_gaps_in_b = false;
  bool no_mismatches = true, case_sensitive = true;

  scoring_t scoring;
  scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
               no_start_gap_penalty, no_end_gap_penalty,
               no_gaps_in_a, no_gaps_in_b,
               no_mismatches, case_sensitive);

  needleman_wunsch_align("atc", "ac", &scoring, nw, result);
  ASSERT(strcmp(result->result_a, "atc") == 0 &&
         strcmp(result->result_b, "a-c") == 0);

  needleman_wunsch_align("cgatcga", "catcctcga", &scoring, nw, result);
  ASSERT(strcmp(result->result_a, "cgatc---ga") == 0 &&
         strcmp(result->result_b, "c-atcctcga") == 0);

  alignment_free(result);
  needleman_wunsch_free(nw);
}
Exemple #2
0
/* First sequence is aligned to the corresponding (equal) substring of the second
 * sequence because both gaps at start and at end are free */
void nw_test_free_gaps_at_ends()
{
  nw_aligner_t *nw = needleman_wunsch_new();
  alignment_t *result = alignment_create(256);

  const char* seq_a = "acg";
  const char* seq_b = "tttacgttt";

  int match = 1;
  int mismatch = -1;
  int gap_open = -4;
  int gap_extend = -1;

  bool no_start_gap_penalty = true, no_end_gap_penalty = true;
  bool no_gaps_in_a = false, no_gaps_in_b = false;
  bool no_mismatches = false, case_sensitive = true;

  scoring_t scoring;
  scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
               no_start_gap_penalty, no_end_gap_penalty,
               no_gaps_in_a, no_gaps_in_b,
               no_mismatches, case_sensitive);

  needleman_wunsch_align(seq_a, seq_b, &scoring, nw, result);
  ASSERT(strcmp(result->result_a, "---acg---") == 0 &&
         strcmp(result->result_b, "tttacgttt") == 0);

  alignment_free(result);
  needleman_wunsch_free(nw);
}
Exemple #3
0
/* No gap is expected in the longer sequence */
void nw_test_no_gaps_in_longer()
{
  nw_aligner_t *nw = needleman_wunsch_new();
  alignment_t *aln = alignment_create(256);

  const char* seq_a = "aaaaacg";
  const char* seq_b = "acgt";

  int match = 1;
  int mismatch = -2;
  int gap_open = -4;
  int gap_extend = -1;

  bool no_start_gap_penalty = false, no_end_gap_penalty = false;
  bool no_gaps_in_a = true, no_gaps_in_b = false;
  bool no_mismatches = false, case_sensitive = true;

  scoring_t scoring;
  scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
               no_start_gap_penalty, no_end_gap_penalty,
               no_gaps_in_a, no_gaps_in_b,
               no_mismatches, case_sensitive);

  needleman_wunsch_align(seq_a, seq_b, &scoring, nw, aln);

  // ASSERT(strcmp(aln->result_a, "aaaaacg") == 0 &&
  //        strcmp(aln->result_b, "acgt---") == 0);

  ASSERT(strcmp(aln->result_a, "aaaaacg-") == 0 &&
         strcmp(aln->result_b, "a----cgt") == 0);

  alignment_free(aln);
  needleman_wunsch_free(nw);
}
Exemple #4
0
void align(char* seq_a, char* seq_b)
{
  // Variables to store alignment result
  nw_aligner_t *nw = needleman_wunsch_new();
  alignment_t *result = alignment_create(256);

  // Decide on scoring
  int match = 1;
  int mismatch = -2;
  int gap_open = -4;
  int gap_extend = -1;
  
  // Don't penalise gaps at the start
  // ACGATTT
  // ----TTT would score +3 (when match=+1)
  char no_start_gap_penalty = 1;
  
  // ..or gaps at the end e.g.
  // ACGATTT
  // ACGA--- would score +4 (when match=+1)
  char no_end_gap_penalty = 1;

  char no_gaps_in_a = 0, no_gaps_in_b = 0;
  char no_mismatches = 0;

  // Compare character case-sensitively (usually set to 0 for DNA etc)
  char case_sensitive = 0;

  scoring_t scoring;
  scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
               no_start_gap_penalty, no_end_gap_penalty,
               no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive);

  // Add some special cases
  // x -> y means x in seq1 changing to y in seq2
  scoring_add_mutation(&scoring, 'a', 'c', -2); // a -> c give substitution score -2
  scoring_add_mutation(&scoring, 'c', 'a', -1); // c -> a give substitution score -1

  // We could also prohibit the aligning of characters not given as special cases
  // scoring.use_match_mismatch = 0;

  needleman_wunsch_align(seq_a, seq_b, &scoring, nw, result);

  printf("seqA: %s\n", result->result_a);
  printf("seqB: %s\n", result->result_b);
  printf("alignment score: %i\n", result->score);

  // Free memory for storing alignment results
  needleman_wunsch_free(nw);
  alignment_free(result);
}
Exemple #5
0
void nw_test_no_mismatches_rand()
{
  nw_aligner_t *nw = needleman_wunsch_new();
  alignment_t *aln = alignment_create(256);

  int match = 1;
  int mismatch = -2;
  int gap_open = -4;
  int gap_extend = -1;

  bool no_start_gap_penalty = false, no_end_gap_penalty = false;
  bool no_gaps_in_a = false, no_gaps_in_b = false;
  bool no_mismatches = true, case_sensitive = true;

  scoring_t scoring;
  scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
               no_start_gap_penalty, no_end_gap_penalty,
               no_gaps_in_a, no_gaps_in_b,
               no_mismatches, case_sensitive);

  char seqa[100], seqb[100];
  size_t i;

  // Run 50 random alignments
  for(i = 0; i < 50; i++)
  {
    make_rand_seq(seqa, sizeof(seqa));
    make_rand_seq(seqb, sizeof(seqb));
    needleman_wunsch_align(seqa, seqb, &scoring, nw, aln);
    // Check no mismatches
    char *a = aln->result_a, *b = aln->result_b;
    while(1) {
      ASSERT(*a == '-' || *b == '-' || *a == *b);
        // printf("Seq: '%s' '%s'\n", aln->result_a, aln->result_b);
        // exit(EXIT_FAILURE);
      if(!*a && !*b) break;
      a++; b++;
    }
  }

  alignment_free(aln);
  needleman_wunsch_free(nw);
}
Exemple #6
0
static PyObject * nw_align_wrapper(PyObject *self, PyObject *args, PyObject *kw)
{
    const char *seq1, *seq2;
    // Decide on scoring
    int match = 1;
    int mismatch = -2;
    int gap_open = -4;
    int gap_extend = -1;
    
    // Don't penalise gaps at the start
    // ACGATTT
    // ----TTT would score +3 (when match=+1)
    int no_start_gap_penalty = 0;
    
    // ..or gaps at the end e.g.
    // ACGATTT
    // ACGA--- would score +4 (when match=+1)
    int no_end_gap_penalty = 0;

    int no_gaps_in_a = 0, no_gaps_in_b = 0;
    int no_mismatches = 0;

    // Compare character case-sensitively (usually set to 0 for DNA etc)
    int case_sensitive = 0;

    PyObject * matrix = NULL;

    static char *kwlist[] = {"seq1","seq2", "matrix", "match", "mismatch", "gap_open","gap_extend", "no_start_gap_penalty", "no_end_gap_penalty", "no_gaps_in_a", "no_gaps_in_b", "no_mismatches", "case_sensitive", NULL};
    PyObject *res = NULL;

    if(!PyArg_ParseTupleAndKeywords(args, kw, "ss|Oiiiiiiiiii", kwlist, &seq1, &seq2, &matrix, &match, &mismatch, &gap_open, &gap_extend,
                                                                 &no_start_gap_penalty, &no_end_gap_penalty, &no_gaps_in_a, &no_gaps_in_b, &no_mismatches, &case_sensitive))
        return NULL;
    alignment_t *result = alignment_create(256);
    
    // Variables to store alignment result
    nw_aligner_t *nw = needleman_wunsch_new();

    scoring_t scoring;
    scoring_init(&scoring, match, mismatch, gap_open, gap_extend,
                no_start_gap_penalty, no_end_gap_penalty,
                no_gaps_in_a, no_gaps_in_b, no_mismatches, case_sensitive);

    // Add some special cases
    // x -> y means x in seq1 changing to y in seq2
    if(matrix != NULL)
    {
        PyObject * mapping = PyMapping_Items(matrix);
        if(mapping == NULL)
            goto error;
        int n = PySequence_Size(mapping);
        PyObject *item;
        int value;
        PyObject *key;
        char * char_a;
        char * char_b;
        int i;
        for(i = 0; i < n; i++)
        {
            item = PySequence_GetItem(mapping, i);
            if(item == NULL || !PyTuple_Check(item))
            {
                Py_XDECREF(item);
                Py_DECREF(mapping);
                goto error; 
            }
            
            if(!PyArg_ParseTuple(item, "Oi", &key, &value))
            {
                PyErr_SetString(PyExc_RuntimeError, "Values of matrix dict should be integers");
                Py_XDECREF(item);
                Py_DECREF(mapping);
                goto error;
            }
            if(!PyTuple_Check(key))
            {
                PyErr_SetString(PyExc_RuntimeError, "Keys of matrix dict should be tuples");
                Py_XDECREF(item);
                Py_DECREF(mapping);
                goto error;
            }
            if(!PyArg_ParseTuple(key, "ss", &char_a, &char_b))
            {
                PyErr_SetString(PyExc_RuntimeError, "Keys of matrix dict should be tuples with 2 characters as elements.");
                Py_XDECREF(item);
                Py_DECREF(mapping);
                goto error;
            }
            if(strlen(char_a) != 1 || strlen(char_b) != 1)
            {
                PyErr_SetString(PyExc_RuntimeError, "Character length should be 1");
                Py_XDECREF(item);
                Py_DECREF(mapping);
                goto error;
            }
            scoring_add_mutation(&scoring, case_sensitive ? *char_a : tolower(*char_a), case_sensitive ? *char_a : tolower(*char_b), value); // a -> c give substitution score -2
            Py_DECREF(item);
        }
    }

    // We could also prohibit the aligning of characters not given as special cases
    // scoring.use_match_mismatch = 0;

    needleman_wunsch_align(seq1, seq2, &scoring, nw, result);

    res = Py_BuildValue("ssi", result->result_a, result->result_b, result->score);

error:
    // Free memory for storing alignment results
    needleman_wunsch_free(nw);

    alignment_free(result);
    return res;
}