示例#1
0
int gt_alignment_unit_test(GtError *err)
{
  static char u[] = "acgtagatatatagat",
              v[] = "agaaagaggtaagaggga";
  GtAlignment *alignment;
  int had_err = 0;
  gt_error_check(err);

  /* construct the following alignment (backwards):

     acgtaga--tatata-gat
     |   |||  || | | |                  [R 7,I 2,R 2,D 1,R 3,I 1,R 3]
     agaaagaggta-agaggga
  */

  alignment = gt_alignment_new_with_seqs((const GtUchar *) u,
                                 (GtUword) strlen(u),
                                 (const GtUchar *) v,
                                 (GtUword) strlen(v));
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_insertion(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_deletion(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_insertion(alignment);
  gt_alignment_add_insertion(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);

  gt_ensure(gt_alignment_eval(alignment) == 10UL);

  gt_alignment_delete(alignment);

  return had_err;
}
示例#2
0
static void gt_pbs_add_hit(GtArray *hitlist, GtAlignment *ali, GtPBSOptions *o,
                           unsigned long trna_seqlen, const char *desc,
                           GtStrand strand, GtPBSResults *r)
{
  unsigned long dist;
  GtPBSHit *hit;
  unsigned long offset;
  GtRange urange, vrange;
  gt_assert(hitlist && desc);

  if (!ali) return;
  gt_assert(ali);

  dist = gt_alignment_eval(ali);
  urange = gt_alignment_get_urange(ali);
  vrange = gt_alignment_get_vrange(ali);
  offset = abs(o->radius - urange.start);

  if (dist <= o->max_edist
        && abs(o->radius-urange.start) <= o->offsetlen.end
        && abs(o->radius-urange.start) >= o->offsetlen.start
        && abs(urange.end-urange.start+1) <= o->alilen.end
        && abs(urange.end-urange.start+1) >= o->alilen.start
        && vrange.start <= o->trnaoffsetlen.end
        && vrange.start >= o->trnaoffsetlen.start)
  {
    hit = gt_pbs_hit_new(abs(urange.end-urange.start+1),
                         strand,
                         desc,
                         vrange.start,
                         urange.start,
                         urange.end,
                         offset,
                         dist,
                         gt_pbs_score_func(dist,
                                           offset,
                                           urange.end-urange.start+1,
                                           trna_seqlen,
                                           vrange.start),
                         r);
    gt_array_add(hitlist, hit);
  }
}
static GtMatchIteratorStatus gt_match_iterator_sw_next(GtMatchIterator *mi,
                                                      GT_UNUSED GtMatch **match,
                                                      GT_UNUSED GtError *err)
{
  GtMatchIteratorSW *mis;
  GtSeq *seq_a, *seq_b;
  char *a, *b;
  const char *adesc, *bdesc;
  GtAlignment *ali = NULL;
  unsigned long seqlen_a, seqlen_b, seqpos;
  GtRange arng, brng;
  gt_assert(mi && match);

  mis = gt_match_iterator_sw_cast(mi);
  while (true) {
    if (!mis->pvt->firstali)
      mis->pvt->seqno_es2++;
    if (mis->pvt->seqno_es2 == gt_encseq_num_of_sequences(mis->pvt->es2)) {
      mis->pvt->seqno_es1++;
      if (mis->pvt->seqno_es1 == gt_encseq_num_of_sequences(mis->pvt->es1))
        return GT_MATCHER_STATUS_END;
      mis->pvt->seqno_es2 = 0;
    }
    seqlen_a = gt_encseq_seqlength(mis->pvt->es1, mis->pvt->seqno_es1);
    seqlen_b = gt_encseq_seqlength(mis->pvt->es2, mis->pvt->seqno_es2);
    /* XXX: reuse buffers for performance improvement */
    a = gt_malloc(seqlen_a * sizeof (char));
    seqpos = gt_encseq_seqstartpos(mis->pvt->es1, mis->pvt->seqno_es1);
    gt_encseq_extract_decoded(mis->pvt->es1, a, seqpos, seqpos + seqlen_a - 1);
    b = gt_malloc(seqlen_b * sizeof (char));
    seqpos = gt_encseq_seqstartpos(mis->pvt->es2, mis->pvt->seqno_es2);
    gt_encseq_extract_decoded(mis->pvt->es1, b, seqpos, seqpos + seqlen_b - 1);
    seq_a = gt_seq_new(a, seqlen_a, gt_encseq_alphabet(mis->pvt->es1));
    seq_b = gt_seq_new(b, seqlen_b, gt_encseq_alphabet(mis->pvt->es2));
    ali = gt_swalign(seq_a, seq_b, mis->pvt->sf);
    mis->pvt->firstali = false;
    if (ali && gt_alignment_get_length(ali) >= mis->pvt->min_len
          && gt_alignment_eval(ali) <= mis->pvt->max_edist) {
      break;
    }
    gt_alignment_delete(ali);
    gt_seq_delete(seq_a);
    gt_seq_delete(seq_b);
    gt_free(a);
    gt_free(b);
  }
  arng = gt_alignment_get_urange(ali);
  brng = gt_alignment_get_vrange(ali);
  adesc = gt_encseq_description(mis->pvt->es1, &seqlen_a, mis->pvt->seqno_es1);
  bdesc = gt_encseq_description(mis->pvt->es2, &seqlen_b, mis->pvt->seqno_es2);
  *match = gt_match_sw_new("", "",
                           mis->pvt->seqno_es1,
                           mis->pvt->seqno_es2,
                           gt_alignment_get_length(ali),
                           gt_alignment_eval(ali),
                           arng.start, brng.start,
                           arng.end, brng.end,
                           GT_MATCH_DIRECT);
  gt_match_set_seqid1_nt(*match, adesc, seqlen_a);
  gt_match_set_seqid2_nt(*match, bdesc, seqlen_b);
  gt_alignment_delete(ali);
  gt_seq_delete(seq_a);
  gt_seq_delete(seq_b);
  gt_free(a);
  gt_free(b);
  return GT_MATCHER_STATUS_OK;
}
示例#4
0
int gt_xdrop_unit_test(GT_UNUSED GtError *err)
{
  int had_err = 0, i, j, s;
  const GtUchar *strings[GT_XDROP_NUM_OF_TESTS] =
    {(const GtUchar*) "TTTTTTTTTTTTTTTAAAGGGTTTCCCAAAGGGTTTCCCTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTTTTTGGGGCCCCAAAATTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTNNNNTTTTGGGGCCCCAAAATTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAAGGGTTTCGCAAAGGGTTTCCCTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAAGGGTTTCCAAAGGGTTTCCCCTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAAGGGTTTCCTCAAAGGGTTTCCTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAACAGATCACCCGCTTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAACGGGTTTCTCAAAGGGTTCCCTTTTTTTTTTTTTTT"};
  GtUword lengths[GT_XDROP_NUM_OF_TESTS] =
  {54UL, 46UL, 50UL, 54UL, 54UL, 54UL, 46UL, 54UL},
    eval_scores[GT_XDROP_NUM_OF_TESTS *
      GT_XDROP_NUM_OF_TESTS *
      GT_XDROP_NUM_OF_TESTS] =
      {0, 13UL, 0, 1UL, 4UL, 1UL, 0, 7UL,
        13UL, 0, 0, 14UL, 15UL, 14UL, 0, 15UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 14UL, 0, 0, 1UL, 2UL, 0, 1UL,
        4UL, 15UL, 0, 1UL, 0, 8UL, 0, 1UL,
        1UL, 14UL, 0, 2UL, 8UL, 0, 0, 4UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        7UL, 15UL, 0, 1UL, 1UL, 4UL, 0, 0,

        0, 13UL, 0, 1UL, 4UL, 5UL, 14UL, 7UL,
        13UL, 0, 0, 14UL, 15UL, 14UL, 12UL, 15UL,
        0, 0, 0, 20UL, 0, 19UL, 17UL, 0,
        1UL, 14UL, 20UL, 0, 5UL, 6UL, 15UL, 8UL,
        4UL, 15UL, 0, 5UL, 0, 8UL, 15UL, 10UL,
        5UL, 14UL, 19UL, 6UL, 8UL, 0, 14UL, 4UL,
        14UL, 12UL, 17UL, 15UL, 15UL, 14UL, 0, 14UL,
        7UL, 15UL, 0, 8UL, 10UL, 4UL, 14UL, 0,

        0, 13UL, 19UL, 1UL, 2UL, 2UL, 13UL, 3UL,
        13UL, 0, 9UL, 14UL, 14UL, 13UL, 12UL, 14UL,
        17UL, 4UL, 0, 18UL, 19UL, 16UL, 16UL, 18UL,
        1UL, 14UL, 18UL, 0, 2UL, 3UL, 13UL, 3UL,
        2UL, 14UL, 18UL, 2UL, 0, 4UL, 13UL, 4UL,
        2UL, 13UL, 19UL, 3UL, 4UL, 0, 13UL, 3UL,
        14UL, 12UL, 17UL, 13UL, 13UL, 14UL, 0, 14UL,
        3UL, 14UL, 18UL, 3UL, 4UL, 3UL, 13UL, 0,

        0, 13UL, 17UL, 1UL, 2UL, 2UL, 14UL, 3UL,
        13UL, 0, 4UL, 14UL, 15UL, 13UL, 12UL, 14UL,
        19UL, 9UL, 0, 18UL, 18UL, 19UL, 17UL, 18UL,
        1UL, 14UL, 18UL, 0, 2UL, 3UL, 13UL, 3UL,
        2UL, 14UL, 19UL, 2UL, 0, 4UL, 13UL, 4UL,
        2UL, 13UL, 16UL, 3UL, 4UL, 0, 14UL, 3UL,
        13UL, 12UL, 16UL, 13UL, 13UL, 13UL, 0, 13UL,
        3UL, 14UL, 18UL, 3UL, 4UL, 3UL, 14UL, 0,

        0, 0, 0, 1UL, 1UL, 1UL, 0, 1UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 0, 0, 0, 1UL, 0, 0, 1UL,
        1UL, 0, 0, 1UL, 0, 0, 0, 1UL,
        1UL, 0, 0, 0, 0, 0, 0, 1UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 0, 0, 1UL, 1UL, 1UL, 0, 0,

        0, 0, 0, 1UL, 1UL, 1UL, 0, 1UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 0, 0, 0, 1UL, 2UL, 0, 1UL,
        1UL, 0, 0, 1UL, 0, 0, 0, 1UL,
        1UL, 0, 0, 2UL, 0, 0, 0, 1UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 0, 0, 1UL, 1UL, 1UL, 0, 0,

        0, 13UL, 17UL, 1UL, 2UL, 2UL, 13UL, 3UL,
        13UL, 0, 4UL, 14UL, 14UL, 13UL, 12UL, 14UL,
        17UL, 4UL, 0, 18UL, 19UL, 16UL, 16UL, 19UL,
        1UL, 14UL, 18UL, 0, 2UL, 3UL, 13UL, 3UL,
        2UL, 14UL, 19UL, 2UL, 0, 4UL, 13UL, 4UL,
        2UL, 13UL, 16UL, 3UL, 4UL, 0, 13UL, 3UL,
        13UL, 12UL, 16UL, 13UL, 13UL, 13UL, 0, 13UL,
        3UL, 14UL, 19UL, 3UL, 4UL, 3UL, 13UL, 0,

        0, 13UL, 0, 1UL, 2UL, 2UL, 5UL, 3UL,
        13UL, 0, 0, 14UL, 15UL, 13UL, 0, 14UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 14UL, 0, 0, 2UL, 3UL, 5UL, 3UL,
        2UL, 15UL, 0, 2UL, 0, 4UL, 5UL, 4UL,
        2UL, 13UL, 0, 3UL, 4UL, 0, 6UL, 3UL,
        5UL, 0, 0, 5UL, 5UL, 6UL, 0, 5UL,
        3UL, 14UL, 0, 3UL, 4UL, 3UL, 5UL, 0};
  GtSeqabstract *useq, *vseq;

  GtXdropArbitraryscores score[GT_XDROP_NUM_OF_TESTS] = {{2, -2, -2, -2},
                                                         {2, -1, -1, -1},
                                                         {2, -1, -5, -2},
                                                         {2, -1, -2, -5},
                                                         {3, -2, -3, -3},
                                                         {3, -1, -1, -1},
                                                         {4, -1, -3, -3},
                                                         {10, -3, -8, -8}};
  GtXdropresources *resources;
  GtXdropbest best;
  GtXdropscore dropscore = (GtXdropscore) 12;
  GtMultieoplist *edit_ops = NULL;
  GtAlignment *alignment;
  bool rightextension = true;

  gt_error_check(err);

  for (s = 0; s < GT_XDROP_NUM_OF_TESTS; ++s) {
    resources = gt_xdrop_resources_new(&score[s]);
    for (i = 0; i < GT_XDROP_NUM_OF_TESTS && !had_err; ++i) {
      for (j = 0; j < GT_XDROP_NUM_OF_TESTS; ++j) {
        useq = gt_seqabstract_new_gtuchar(rightextension,GT_READMODE_FORWARD,
                                          strings[i], lengths[i], 0,lengths[i]);
        vseq = gt_seqabstract_new_gtuchar(rightextension, GT_READMODE_FORWARD,
                                          strings[j], lengths[j], 0,lengths[j]);
        gt_evalxdroparbitscoresextend(true, &best, resources, useq, vseq,
                                      dropscore);

        edit_ops = gt_xdrop_backtrack(resources, &best);
        gt_ensure(edit_ops != NULL);
        alignment = gt_alignment_new_with_seqs(strings[i], best.ivalue,
                                               strings[j], best.jvalue);
        gt_alignment_set_multieop_list(alignment, edit_ops);
        gt_ensure(eval_scores[s*64+i*8+j] == gt_alignment_eval(alignment));

        gt_multieoplist_delete(edit_ops);
        gt_alignment_delete(alignment);
        if (i == j) {
          gt_evalxdroparbitscoresextend(false, &best, resources, useq, vseq,
                                        dropscore);

          edit_ops = gt_xdrop_backtrack(resources, &best);
          alignment = gt_alignment_new_with_seqs(strings[i], best.ivalue,
                                                 strings[j], best.jvalue);
          gt_alignment_set_multieop_list(alignment, edit_ops);
          gt_ensure(eval_scores[s*64+i*8+j] == gt_alignment_eval(alignment));
          gt_multieoplist_delete(edit_ops);
          gt_alignment_delete(alignment);
        }
        gt_seqabstract_delete(useq);
        gt_seqabstract_delete(vseq);
      }
    }
    gt_xdrop_resources_delete(resources);
  }

  return had_err;
}