Пример #1
0
static void locali_freedfsconstinfo (Limdfsconstinfo **lci)
{
  gt_alignment_delete((*lci)->tbs.alignment);
  (*lci)->tbs.alignment = NULL;
  gt_free((*lci)->tbs.spaceGtUchardbsubstring);
  (*lci)->tbs.spaceGtUchardbsubstring = NULL;
  gt_free (*lci);
  *lci = NULL;
}
Пример #2
0
void gt_print_edist_alignment(const GtUchar *useq, GtUword ustart, GtUword ulen,
                              const GtUchar *vseq, GtUword vstart, GtUword vlen)
{
  GtAlignment *align;
  align = gt_alignment_new_with_seqs(useq+ustart, ulen, vseq+vstart, vlen);

  alignment_in_square_space(NULL, align, useq, ustart, ulen,
                            vseq, vstart, vlen, 0, 1, 1);
  gt_alignment_show(align, true, stdout, 80);
  gt_alignment_delete(align);
}
Пример #3
0
void gt_freeSWdpresource(SWdpresource *swdpresource)
{
  gt_alignment_delete(swdpresource->alignment);
  swdpresource->alignment = NULL;
  gt_free(swdpresource->swcol);
  gt_free(swdpresource->swentrycol);
  gt_free(swdpresource->maxedges);
  gt_free(swdpresource->dbsubstring);
  swdpresource->allocatedswcol = 0;
  swdpresource->allocatedmaxedges = 0;
  gt_free(swdpresource);
}
Пример #4
0
int gt_alignment_unit_test(GtError *err)
{
  static char u[] = "acgtagatatatagat",
              v[] = "agaaagaggtaagaggga";
  GtAlignment *alignment;
  int had_err = 0;
  gt_error_check(err);

  /* construct the following alignment (backwards):

     acgtaga--tatata-gat
     |   |||  || | | |                  [R 7,I 2,R 2,D 1,R 3,I 1,R 3]
     agaaagaggta-agaggga
  */

  alignment = gt_alignment_new_with_seqs((const GtUchar *) u,
                                 (GtUword) strlen(u),
                                 (const GtUchar *) v,
                                 (GtUword) strlen(v));
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_insertion(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_deletion(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_insertion(alignment);
  gt_alignment_add_insertion(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);
  gt_alignment_add_replacement(alignment);

  gt_ensure(gt_alignment_eval(alignment) == 10UL);

  gt_alignment_delete(alignment);

  return had_err;
}
Пример #5
0
void gt_checklinearspace_local(GT_UNUSED bool forward,
                               const GtUchar *useq, GtUword ulen,
                               const GtUchar *vseq, GtUword vlen)
{
  GtAlignment *align;
  GtWord score1, score2, score3, score4,
         matchscore = 2, mismatchscore = -2, gapscore = -1;
  GtUchar *low_useq, *low_vseq;
  LinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;
  GtAlphabet *alphabet;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  scorehandler = gt_scorehandler_new_DNA(matchscore, mismatchscore,
                                         0, gapscore);
  alphabet = gt_scorehandler_get_alphabet(scorehandler);
  low_useq = check_dna_sequence(useq, ulen, alphabet);
  low_vseq = check_dna_sequence(vseq, vlen, alphabet);

  if (low_useq == NULL || low_vseq == NULL)
  {
    low_useq? gt_free(low_useq):0;
    low_vseq? gt_free(low_vseq):0;
    gt_scorehandler_delete(scorehandler);
    return;
  }

  spacemanager = gt_linspaceManagement_new();
  align = gt_alignment_new();
  score1 = gt_computelinearspace_local_generic(spacemanager, scorehandler,
                                               align, useq, 0, ulen,
                                               vseq, 0, vlen);

  score2 = gt_alignment_eval_with_score(align, matchscore,
                                        mismatchscore, gapscore);

  gt_linspaceManagement_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);

  if (score1 != score2)
  {
    fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score1, score2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_reset(align);
  score3 = alignment_in_square_space_local(NULL, align, useq, 0, ulen,
                                           vseq, 0, vlen, matchscore,
                                           mismatchscore, gapscore);

  if (score1 != score3)
  {
    fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD
            " = alignment_in_square_space_local\n", score1, score3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  score4 = gt_alignment_eval_with_score(align, matchscore,
                                                mismatchscore, gapscore);
  if (score3 != score4)
  {
    fprintf(stderr,"alignment_in_square_space_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score3, score4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_delete(align);
  gt_free(low_useq);
  gt_free(low_vseq);
}
Пример #6
0
void gt_checklinearspace(GT_UNUSED bool forward,
                         const GtUchar *useq,
                         GtUword ulen,
                         const GtUchar *vseq,
                         GtUword vlen)
{
  GtAlignment *align;
  GtUword edist1, edist2, edist3, edist4,
          matchcost = 0, mismatchcost = 1, gapcost = 1;
  LinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;
  GtUchar *low_useq, *low_vseq;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  scorehandler = gt_scorehandler_new_DNA(matchcost,  mismatchcost, 0, gapcost);
  GtAlphabet *alphabet = gt_scorehandler_get_alphabet(scorehandler);
  low_useq = check_dna_sequence(useq, ulen, alphabet);
  low_vseq = check_dna_sequence(vseq, vlen, alphabet);

  if (low_useq == NULL || low_vseq == NULL)
  {
    low_useq? gt_free(low_useq):0;
    low_vseq? gt_free(low_vseq):0;
    gt_scorehandler_delete(scorehandler);
    return;
  }

  spacemanager = gt_linspaceManagement_new();
  align = gt_alignment_new_with_seqs(low_useq, ulen, low_vseq, vlen);

  edist1 = gt_calc_linearalign(spacemanager, scorehandler, align,
                               low_useq, 0, ulen,
                               low_vseq, 0, vlen);

  edist2 = distance_only_global_alignment(useq, 0, ulen, vseq, 0, vlen,
                                          scorehandler);

  if (edist1 != edist2)
  {
    fprintf(stderr,"gt_calc_linearalign = "GT_WU" != "GT_WU
            " = distance_only_global_alignment\n", edist1,edist2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  edist3 = gt_alignment_eval_with_score(align, matchcost,
                                        mismatchcost, gapcost);

  if (edist2 != edist3)
  {
    fprintf(stderr,"distance_only_global_alignment = "GT_WU" != "GT_WU
            " = gt_alignment_eval_with_score\n", edist2,edist3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  edist4 = gt_calc_linearedist(low_useq, ulen, low_vseq, vlen);
  if (edist3 != edist4)
  {
    fprintf(stderr,"gt_alignment_eval_with_score = "GT_WU" != "GT_WU
            " = gt_calc_linearedist\n", edist3, edist4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_free(low_useq);
  gt_free(low_vseq);
  gt_linspaceManagement_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);
  gt_alignment_delete(align);
}
Пример #7
0
GtPBSResults* gt_pbs_find(const char *seq,
                          const char *rev_seq,
                          GtLTRElement *element,
                          GtPBSOptions *o,
                          GtError *err)
{
  GtSeq *seq_forward, *seq_rev;
  GtPBSResults *results;
  unsigned long j;
  GtAlignment *ali;
  GtAlphabet *a = gt_alphabet_new_dna();
  GtScoreFunction *sf = gt_dna_scorefunc_new(a,
                                             o->ali_score_match,
                                             o->ali_score_mismatch,
                                             o->ali_score_insertion,
                                             o->ali_score_deletion);

  gt_assert(seq && rev_seq && sf && a && element);

  results = gt_pbs_results_new(element, o);

  seq_forward = gt_seq_new(seq + (gt_ltrelement_leftltrlen(element))
                               - (o->radius),
                           2*o->radius + 1,
                           a);

  seq_rev     = gt_seq_new(rev_seq + (gt_ltrelement_rightltrlen(element))
                                   - (o->radius),
                           2*o->radius + 1,
                           a);

    for (j=0;j<gt_bioseq_number_of_sequences(o->trna_lib);j++)
  {
    GtSeq *trna_seq, *trna_from3;
    char *trna_from3_full;
    unsigned long trna_seqlen;

    trna_seq = gt_bioseq_get_seq(o->trna_lib, j);
    trna_seqlen = gt_seq_length(trna_seq);

    trna_from3_full = gt_calloc(trna_seqlen, sizeof (char));
    memcpy(trna_from3_full, gt_seq_get_orig(trna_seq),
           sizeof (char)*trna_seqlen);
    (void) gt_reverse_complement(trna_from3_full, trna_seqlen, err);
    trna_from3 = gt_seq_new_own(trna_from3_full, trna_seqlen, a);

    ali = gt_swalign(seq_forward, trna_from3, sf);
    gt_pbs_add_hit(results->hits, ali, o, trna_seqlen,
                   gt_seq_get_description(trna_seq), GT_STRAND_FORWARD,
                   results);
    gt_alignment_delete(ali);

    ali = gt_swalign(seq_rev, trna_from3, sf);
    gt_pbs_add_hit(results->hits, ali, o, trna_seqlen,
                   gt_seq_get_description(trna_seq), GT_STRAND_REVERSE,
                   results);
    gt_alignment_delete(ali);

    gt_seq_delete(trna_from3);
  }
  gt_seq_delete(seq_forward);
  gt_seq_delete(seq_rev);
  gt_score_function_delete(sf);
  gt_alphabet_delete(a);
  gt_array_sort(results->hits, gt_pbs_hit_compare);
  return results;
}
Пример #8
0
static void seededmatch2alignment(const GtQuerymatch *querymatch,
                                  GtUword querystartabsolute,
                                  const GtEncseq *encseq,
                                  GtQuerymatchoutoptions *querymatchoutoptions)
{
  GtUword ulen, vlen, rightdistance = 0, leftdistance = 0,
          offset1 = querymatchoutoptions->seedpos1 +
                    querymatchoutoptions->seedlen,
          offset2 = querymatchoutoptions->seedpos2 +
                    querymatchoutoptions->seedlen;
  GtAlignment *alignment = gt_alignment_new();

  gt_assert(querymatch->dbstart + querymatch->dblen >= offset1);
  ulen = querymatch->dbstart + querymatch->dblen - offset1;
  gt_assert(querystartabsolute + querymatch->querylen >= offset2);
  vlen = querystartabsolute + querymatch->querylen - offset2;
  if (ulen > 0 && vlen > 0)
  {
    Polished_point best_polished_point = {0,0,0};
    rightdistance = align_front_prune_edist(true,
                                            &best_polished_point,
                                            querymatchoutoptions->front_trace,
                                            encseq,
                                            querymatchoutoptions->ggemi,
                                            offset1,
                                            ulen,
                                            offset2,
                                            vlen);
    front_trace2alignments(alignment,
                           querymatchoutoptions->front_trace,
                           &best_polished_point,
                           ulen,vlen);
    front_trace_reset(querymatchoutoptions->front_trace,ulen+vlen);
  }
  gt_assert(rightdistance <= querymatch->edist);
  if (querymatchoutoptions->totallength == GT_UWORD_MAX)
  {
    querymatchoutoptions->totallength = gt_encseq_total_length(encseq);
  }
  if (querymatchoutoptions->seedpos1 > querymatch->dbstart &&
      querymatchoutoptions->seedpos2 > querystartabsolute)
  {
    Polished_point best_polished_point = {0,0,0};
    ulen = querymatchoutoptions->seedpos1 - querymatch->dbstart;
    vlen = querymatchoutoptions->seedpos2 - querystartabsolute;
    leftdistance = align_front_prune_edist(false,
                               &best_polished_point,
                               querymatchoutoptions->front_trace,
                               encseq,
                               querymatchoutoptions->ggemi,
                               GT_REVERSEPOS(querymatchoutoptions->totallength,
                                             querymatchoutoptions->seedpos1-1),
                               ulen,
                               GT_REVERSEPOS(querymatchoutoptions->totallength,
                                             querymatchoutoptions->seedpos2-1),
                               vlen);
    front_trace_reset(querymatchoutoptions->front_trace,ulen+vlen);
  }
  if (leftdistance + rightdistance > querymatch->edist)
  {
    fprintf(stderr,"leftdistance + rightdistance = " GT_WU " + " GT_WU " > "
                   GT_WU " = querymatch->edist\n",
                   leftdistance,rightdistance,querymatch->edist);
  }
  gt_assert(leftdistance + rightdistance <= querymatch->edist);
  gt_alignment_delete(alignment);
}
Пример #9
0
static GtMatchIteratorStatus gt_match_iterator_sw_next(GtMatchIterator *mi,
                                                      GT_UNUSED GtMatch **match,
                                                      GT_UNUSED GtError *err)
{
  GtMatchIteratorSW *mis;
  GtSeq *seq_a, *seq_b;
  char *a, *b;
  const char *adesc, *bdesc;
  GtAlignment *ali = NULL;
  unsigned long seqlen_a, seqlen_b, seqpos;
  GtRange arng, brng;
  gt_assert(mi && match);

  mis = gt_match_iterator_sw_cast(mi);
  while (true) {
    if (!mis->pvt->firstali)
      mis->pvt->seqno_es2++;
    if (mis->pvt->seqno_es2 == gt_encseq_num_of_sequences(mis->pvt->es2)) {
      mis->pvt->seqno_es1++;
      if (mis->pvt->seqno_es1 == gt_encseq_num_of_sequences(mis->pvt->es1))
        return GT_MATCHER_STATUS_END;
      mis->pvt->seqno_es2 = 0;
    }
    seqlen_a = gt_encseq_seqlength(mis->pvt->es1, mis->pvt->seqno_es1);
    seqlen_b = gt_encseq_seqlength(mis->pvt->es2, mis->pvt->seqno_es2);
    /* XXX: reuse buffers for performance improvement */
    a = gt_malloc(seqlen_a * sizeof (char));
    seqpos = gt_encseq_seqstartpos(mis->pvt->es1, mis->pvt->seqno_es1);
    gt_encseq_extract_decoded(mis->pvt->es1, a, seqpos, seqpos + seqlen_a - 1);
    b = gt_malloc(seqlen_b * sizeof (char));
    seqpos = gt_encseq_seqstartpos(mis->pvt->es2, mis->pvt->seqno_es2);
    gt_encseq_extract_decoded(mis->pvt->es1, b, seqpos, seqpos + seqlen_b - 1);
    seq_a = gt_seq_new(a, seqlen_a, gt_encseq_alphabet(mis->pvt->es1));
    seq_b = gt_seq_new(b, seqlen_b, gt_encseq_alphabet(mis->pvt->es2));
    ali = gt_swalign(seq_a, seq_b, mis->pvt->sf);
    mis->pvt->firstali = false;
    if (ali && gt_alignment_get_length(ali) >= mis->pvt->min_len
          && gt_alignment_eval(ali) <= mis->pvt->max_edist) {
      break;
    }
    gt_alignment_delete(ali);
    gt_seq_delete(seq_a);
    gt_seq_delete(seq_b);
    gt_free(a);
    gt_free(b);
  }
  arng = gt_alignment_get_urange(ali);
  brng = gt_alignment_get_vrange(ali);
  adesc = gt_encseq_description(mis->pvt->es1, &seqlen_a, mis->pvt->seqno_es1);
  bdesc = gt_encseq_description(mis->pvt->es2, &seqlen_b, mis->pvt->seqno_es2);
  *match = gt_match_sw_new("", "",
                           mis->pvt->seqno_es1,
                           mis->pvt->seqno_es2,
                           gt_alignment_get_length(ali),
                           gt_alignment_eval(ali),
                           arng.start, brng.start,
                           arng.end, brng.end,
                           GT_MATCH_DIRECT);
  gt_match_set_seqid1_nt(*match, adesc, seqlen_a);
  gt_match_set_seqid2_nt(*match, bdesc, seqlen_b);
  gt_alignment_delete(ali);
  gt_seq_delete(seq_a);
  gt_seq_delete(seq_b);
  gt_free(a);
  gt_free(b);
  return GT_MATCHER_STATUS_OK;
}
Пример #10
0
static int gt_linspace_align_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtLinspaceArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlignment *align;
  GtWord left_dist = 0, right_dist = 0;
  GtSequenceTable *sequence_table1, *sequence_table2;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler = NULL;
  GtTimer *linspacetimer = NULL;
  GtAlphabet *alphabet = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  sequence_table1 = gt_sequence_table_new();
  sequence_table2 = gt_sequence_table_new();
  align = gt_alignment_new();
  spacemanager = gt_linspace_management_new();
  gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor);

  /* get sequences */
  if (gt_str_array_size(arguments->strings) > 0)
  {
    get_onesequence(sequence_table1, arguments->strings, 0);
    sequence_table1->size++;
    get_onesequence(sequence_table2, arguments->strings, 1);
    sequence_table2->size++;
  }
  else if (gt_str_array_size(arguments->files) > 0)
  {
    had_err = get_fastasequences(sequence_table1,
                                 gt_str_array_get_str(arguments->files,0),err);
    if (!had_err)
    {
      had_err = get_fastasequences(sequence_table2,
                                  gt_str_array_get_str(arguments->files,1),err);
    }
  }
  if (arguments->dna)
  {
    alphabet = gt_alphabet_new_dna();
  } else
  {
    gt_assert(arguments->protein);
    alphabet = gt_alphabet_new_protein();
  }
  gt_encode_sequence_table(alphabet,sequence_table1);
  gt_encode_sequence_table(alphabet,sequence_table2);
  if (!had_err)
  {
    scorehandler = gt_arguments2scorehandler(arguments,err);
    if (scorehandler == NULL)
    {
      had_err = -1;
    } else
    {
      if (arguments->global && arguments->protein && !arguments->has_costmatrix)
      {
        GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler);
        gt_scorehandler_delete(scorehandler);
        scorehandler = costhandler;
      }
    }
  }
  /* get diagonal band */
  if (!had_err && arguments->diagonal)
  {
    if (gt_str_array_size(arguments->diagonalbonds) > 0)
    {
      had_err = gt_parse_score_value(__LINE__,&left_dist,
                                  gt_str_array_get(arguments->diagonalbonds,0),
                                  false, err);
      if (!had_err)
      {
        had_err = gt_parse_score_value(__LINE__,&right_dist,
                                  gt_str_array_get(arguments->diagonalbonds,1),
                                  false, err);
      }
    }
  }
  if (!had_err && arguments->spacetime)
  {
    linspacetimer = gt_timer_new();
  }

  /* alignment functions with linear gap costs */
  if (!had_err)
  {
    bool affine;

    if (gt_str_array_size(arguments->linearcosts) > 0)
    {
      affine = false;
    } else
    {
      gt_assert(gt_str_array_size(arguments->affinecosts) > 0);
      affine = true;
    }
    had_err = gt_all_against_all_alignment_check (
                            affine, align, arguments,
                            spacemanager,
                            scorehandler,
                            gt_alphabet_characters(alphabet),
                            gt_alphabet_wildcard_show(alphabet),
                            sequence_table1,
                            sequence_table2,
                            left_dist,
                            right_dist,
                            linspacetimer,err);
  }
  /*spacetime option*/
  if (!had_err && arguments->spacetime)
  {
    printf("# combined space peak in kilobytes: %f\n",
           GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager)));
    gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n",
                            stdout);
  }
  gt_timer_delete(linspacetimer);
  gt_linspace_management_delete(spacemanager);
  gt_sequence_table_delete(sequence_table1);
  gt_sequence_table_delete(sequence_table2);
  gt_alignment_delete(align);
  gt_alphabet_delete(alphabet);
  gt_scorehandler_delete(scorehandler);
  return had_err;
}
Пример #11
0
void gt_linearalign_check_local(GT_UNUSED bool forward,
                                const GtUchar *useq, GtUword ulen,
                                const GtUchar *vseq, GtUword vlen)
{
  GtAlignment *align;
  GtWord score1, score2, score3, score4,
         matchscore = 2, mismatchscore = -2, gapscore = -1;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore);
  gt_scorehandler_plain(scorehandler);
  spacemanager = gt_linspace_management_new();
  align = gt_alignment_new();
  score1 = gt_linearalign_compute_local_generic(spacemanager, scorehandler,
                                                align, useq, 0, ulen,
                                                vseq, 0, vlen);

  score2 = gt_alignment_eval_with_score(align, true, matchscore,
                                        mismatchscore, gapscore);
  gt_linspace_management_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);
  if (score1 != score2)
  {
    fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score1, score2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_reset(align);
  score3 = gt_squarealign_calculate_local(NULL, align, useq, 0, ulen,
                                          vseq, 0, vlen, matchscore,
                                          mismatchscore, gapscore);

  if (score1 != score3)
  {
    fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD
            " = gt_squarealign_calculate_local\n", score1, score3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  score4 = gt_alignment_eval_with_score(align, true, matchscore,
                                        mismatchscore, gapscore);
  if (score3 != score4)
  {
    fprintf(stderr,"gt_squarealign_calculate_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score3, score4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  gt_alignment_delete(align);
}
Пример #12
0
void gt_linearalign_check(GT_UNUSED bool forward,
                          const GtUchar *useq,
                          GtUword ulen,
                          const GtUchar *vseq,
                          GtUword vlen)
{
  GtAlignment *align;
  GtUword edist1, edist2, edist3, edist4,
          matchcost = 0, mismatchcost = 1, gapcost = 1;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;
  const bool downcase = true;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  scorehandler = gt_scorehandler_new(matchcost,  mismatchcost, 0, gapcost);
  gt_scorehandler_plain(scorehandler);
  gt_scorehandler_downcase(scorehandler);
  spacemanager = gt_linspace_management_new();
  align = gt_alignment_new_with_seqs(useq, ulen, vseq, vlen);
  edist1 = gt_calc_linearalign(spacemanager, scorehandler, align,
                               useq, 0, ulen,
                               vseq, 0, vlen);
  edist2 = gt_squarealign_global_distance_only(useq, 0, ulen, vseq, 0, vlen,
                                               scorehandler);

  if (edist1 != edist2)
  {
    fprintf(stderr,"gt_calc_linearalign = "GT_WU" != "GT_WU
            " = gt_squarealign_global_distance_only\n", edist1,edist2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  edist3 = gt_alignment_eval_with_score(align, true, matchcost,
                                        mismatchcost, gapcost);

  if (edist2 != edist3)
  {
    fprintf(stderr,"gt_squarealign_global_distance_only = "GT_WU" != "GT_WU
            " = gt_alignment_eval_with_score\n", edist2,edist3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  edist4 = gt_calc_linearedist(downcase,useq, ulen, vseq, vlen);
  if (edist3 != edist4)
  {
    fprintf(stderr,"gt_alignment_eval_with_score = "GT_WU" != "GT_WU
            " = gt_calc_linearedist\n", edist3, edist4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  gt_linspace_management_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);
  gt_alignment_delete(align);
}
Пример #13
0
int gt_xdrop_unit_test(GT_UNUSED GtError *err)
{
  int had_err = 0, i, j, s;
  const GtUchar *strings[GT_XDROP_NUM_OF_TESTS] =
    {(const GtUchar*) "TTTTTTTTTTTTTTTAAAGGGTTTCCCAAAGGGTTTCCCTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTTTTTGGGGCCCCAAAATTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTNNNNTTTTGGGGCCCCAAAATTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAAGGGTTTCGCAAAGGGTTTCCCTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAAGGGTTTCCAAAGGGTTTCCCCTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAAGGGTTTCCTCAAAGGGTTTCCTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAACAGATCACCCGCTTTTTTTTTTTTTTTT",
     (const GtUchar*) "TTTTTTTTTTTTTTTAAACGGGTTTCTCAAAGGGTTCCCTTTTTTTTTTTTTTT"};
  GtUword lengths[GT_XDROP_NUM_OF_TESTS] =
  {54UL, 46UL, 50UL, 54UL, 54UL, 54UL, 46UL, 54UL},
    eval_scores[GT_XDROP_NUM_OF_TESTS *
      GT_XDROP_NUM_OF_TESTS *
      GT_XDROP_NUM_OF_TESTS] =
      {0, 13UL, 0, 1UL, 4UL, 1UL, 0, 7UL,
        13UL, 0, 0, 14UL, 15UL, 14UL, 0, 15UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 14UL, 0, 0, 1UL, 2UL, 0, 1UL,
        4UL, 15UL, 0, 1UL, 0, 8UL, 0, 1UL,
        1UL, 14UL, 0, 2UL, 8UL, 0, 0, 4UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        7UL, 15UL, 0, 1UL, 1UL, 4UL, 0, 0,

        0, 13UL, 0, 1UL, 4UL, 5UL, 14UL, 7UL,
        13UL, 0, 0, 14UL, 15UL, 14UL, 12UL, 15UL,
        0, 0, 0, 20UL, 0, 19UL, 17UL, 0,
        1UL, 14UL, 20UL, 0, 5UL, 6UL, 15UL, 8UL,
        4UL, 15UL, 0, 5UL, 0, 8UL, 15UL, 10UL,
        5UL, 14UL, 19UL, 6UL, 8UL, 0, 14UL, 4UL,
        14UL, 12UL, 17UL, 15UL, 15UL, 14UL, 0, 14UL,
        7UL, 15UL, 0, 8UL, 10UL, 4UL, 14UL, 0,

        0, 13UL, 19UL, 1UL, 2UL, 2UL, 13UL, 3UL,
        13UL, 0, 9UL, 14UL, 14UL, 13UL, 12UL, 14UL,
        17UL, 4UL, 0, 18UL, 19UL, 16UL, 16UL, 18UL,
        1UL, 14UL, 18UL, 0, 2UL, 3UL, 13UL, 3UL,
        2UL, 14UL, 18UL, 2UL, 0, 4UL, 13UL, 4UL,
        2UL, 13UL, 19UL, 3UL, 4UL, 0, 13UL, 3UL,
        14UL, 12UL, 17UL, 13UL, 13UL, 14UL, 0, 14UL,
        3UL, 14UL, 18UL, 3UL, 4UL, 3UL, 13UL, 0,

        0, 13UL, 17UL, 1UL, 2UL, 2UL, 14UL, 3UL,
        13UL, 0, 4UL, 14UL, 15UL, 13UL, 12UL, 14UL,
        19UL, 9UL, 0, 18UL, 18UL, 19UL, 17UL, 18UL,
        1UL, 14UL, 18UL, 0, 2UL, 3UL, 13UL, 3UL,
        2UL, 14UL, 19UL, 2UL, 0, 4UL, 13UL, 4UL,
        2UL, 13UL, 16UL, 3UL, 4UL, 0, 14UL, 3UL,
        13UL, 12UL, 16UL, 13UL, 13UL, 13UL, 0, 13UL,
        3UL, 14UL, 18UL, 3UL, 4UL, 3UL, 14UL, 0,

        0, 0, 0, 1UL, 1UL, 1UL, 0, 1UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 0, 0, 0, 1UL, 0, 0, 1UL,
        1UL, 0, 0, 1UL, 0, 0, 0, 1UL,
        1UL, 0, 0, 0, 0, 0, 0, 1UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 0, 0, 1UL, 1UL, 1UL, 0, 0,

        0, 0, 0, 1UL, 1UL, 1UL, 0, 1UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 0, 0, 0, 1UL, 2UL, 0, 1UL,
        1UL, 0, 0, 1UL, 0, 0, 0, 1UL,
        1UL, 0, 0, 2UL, 0, 0, 0, 1UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 0, 0, 1UL, 1UL, 1UL, 0, 0,

        0, 13UL, 17UL, 1UL, 2UL, 2UL, 13UL, 3UL,
        13UL, 0, 4UL, 14UL, 14UL, 13UL, 12UL, 14UL,
        17UL, 4UL, 0, 18UL, 19UL, 16UL, 16UL, 19UL,
        1UL, 14UL, 18UL, 0, 2UL, 3UL, 13UL, 3UL,
        2UL, 14UL, 19UL, 2UL, 0, 4UL, 13UL, 4UL,
        2UL, 13UL, 16UL, 3UL, 4UL, 0, 13UL, 3UL,
        13UL, 12UL, 16UL, 13UL, 13UL, 13UL, 0, 13UL,
        3UL, 14UL, 19UL, 3UL, 4UL, 3UL, 13UL, 0,

        0, 13UL, 0, 1UL, 2UL, 2UL, 5UL, 3UL,
        13UL, 0, 0, 14UL, 15UL, 13UL, 0, 14UL,
        0, 0, 0, 0, 0, 0, 0, 0,
        1UL, 14UL, 0, 0, 2UL, 3UL, 5UL, 3UL,
        2UL, 15UL, 0, 2UL, 0, 4UL, 5UL, 4UL,
        2UL, 13UL, 0, 3UL, 4UL, 0, 6UL, 3UL,
        5UL, 0, 0, 5UL, 5UL, 6UL, 0, 5UL,
        3UL, 14UL, 0, 3UL, 4UL, 3UL, 5UL, 0};
  GtSeqabstract *useq, *vseq;

  GtXdropArbitraryscores score[GT_XDROP_NUM_OF_TESTS] = {{2, -2, -2, -2},
                                                         {2, -1, -1, -1},
                                                         {2, -1, -5, -2},
                                                         {2, -1, -2, -5},
                                                         {3, -2, -3, -3},
                                                         {3, -1, -1, -1},
                                                         {4, -1, -3, -3},
                                                         {10, -3, -8, -8}};
  GtXdropresources *resources;
  GtXdropbest best;
  GtXdropscore dropscore = (GtXdropscore) 12;
  GtMultieoplist *edit_ops = NULL;
  GtAlignment *alignment;
  bool rightextension = true;

  gt_error_check(err);

  for (s = 0; s < GT_XDROP_NUM_OF_TESTS; ++s) {
    resources = gt_xdrop_resources_new(&score[s]);
    for (i = 0; i < GT_XDROP_NUM_OF_TESTS && !had_err; ++i) {
      for (j = 0; j < GT_XDROP_NUM_OF_TESTS; ++j) {
        useq = gt_seqabstract_new_gtuchar(rightextension,GT_READMODE_FORWARD,
                                          strings[i], lengths[i], 0,lengths[i]);
        vseq = gt_seqabstract_new_gtuchar(rightextension, GT_READMODE_FORWARD,
                                          strings[j], lengths[j], 0,lengths[j]);
        gt_evalxdroparbitscoresextend(true, &best, resources, useq, vseq,
                                      dropscore);

        edit_ops = gt_xdrop_backtrack(resources, &best);
        gt_ensure(edit_ops != NULL);
        alignment = gt_alignment_new_with_seqs(strings[i], best.ivalue,
                                               strings[j], best.jvalue);
        gt_alignment_set_multieop_list(alignment, edit_ops);
        gt_ensure(eval_scores[s*64+i*8+j] == gt_alignment_eval(alignment));

        gt_multieoplist_delete(edit_ops);
        gt_alignment_delete(alignment);
        if (i == j) {
          gt_evalxdroparbitscoresextend(false, &best, resources, useq, vseq,
                                        dropscore);

          edit_ops = gt_xdrop_backtrack(resources, &best);
          alignment = gt_alignment_new_with_seqs(strings[i], best.ivalue,
                                                 strings[j], best.jvalue);
          gt_alignment_set_multieop_list(alignment, edit_ops);
          gt_ensure(eval_scores[s*64+i*8+j] == gt_alignment_eval(alignment));
          gt_multieoplist_delete(edit_ops);
          gt_alignment_delete(alignment);
        }
        gt_seqabstract_delete(useq);
        gt_seqabstract_delete(vseq);
      }
    }
    gt_xdrop_resources_delete(resources);
  }

  return had_err;
}
Пример #14
0
static int gt_show_seedext_runner(GT_UNUSED int argc,
                                  GT_UNUSED const char **argv,
                                  GT_UNUSED int parsed_args,
                                  void *tool_arguments,
                                  GtError *err)
{
  int had_err = 0;
  GtUword alignmentwidth;
  GtShowSeedextArguments *arguments = tool_arguments;
  GtSeedextendMatchIterator *semi;

  gt_error_check(err);
  gt_assert(arguments != NULL);
  /* Parse option string in first line of file specified by filename. */
  alignmentwidth = arguments->show_alignment ? 70 : 0;
  semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err);
  if (semi == NULL)
  {
    had_err = -1;
  }
  /* Parse seed extensions. */
  if (!had_err)
  {
    const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi),
                   *bencseq = gt_seedextend_match_iterator_bencseq(semi);
    GtAlignment *alignment = gt_alignment_new();
    Polishing_info *pol_info = NULL;
    GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0};

    /* the following are used if seed_extend is set */
    GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL;
    GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr;
    const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq);
    const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq);
    GtUchar *alignment_show_buffer
      = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth)
                                  : NULL;
    GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new();
    GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);;

    if (!arguments->relax_polish)
    {
      double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS;
      if (gt_seedextend_match_iterator_bias_parameters(semi))
      {
        matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq);
      }
      pol_info = polishing_info_new_with_bias(
                          gt_seedextend_match_iterator_errorpercentage(semi),
                          matchscore_bias,
                          gt_seedextend_match_iterator_history_size(semi));
    }
    if (arguments->seed_display)
    {
      gt_seedextend_match_iterator_seed_display_set(semi);
    }
    if (arguments->show_alignment || arguments->showeoplist)
    {
      gt_seedextend_match_iterator_querymatchoutoptions_set(semi,
                                                       true,
                                                       arguments->showeoplist,
                                                       alignmentwidth,
                                                       !arguments->relax_polish,
                                                       arguments->seed_display);
    }
    if (arguments->seed_extend)
    {
      greedyextendmatchinfo
        = gt_greedy_extend_matchinfo_new(70,
                              GT_MAX_ALI_LEN_DIFF,
                              gt_seedextend_match_iterator_history_size(semi),
                              GT_MIN_PERC_MAT_HISTORY,
                              0, /* userdefinedleastlength */
                              GT_EXTEND_CHAR_ACCESS_ANY,
                              100,
                              pol_info);
    }
    if (pol_info != NULL)
    {
      gt_alignment_polished_ends(alignment,pol_info,false);
    }
    processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo;
    if (arguments->sortmatches)
    {
      (void) gt_seedextend_match_iterator_all_sorted(semi,true);
    }
    while (true)
    {
      GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi);

      if (querymatchptr == NULL)
      {
        break;
      }
      if (gt_seedextend_match_iterator_has_seedline(semi))
      {
        if (arguments->seed_extend)
        {
          if (aencseq == bencseq)
          {
            const GtUword
              seedlen = gt_seedextend_match_iterator_seedlen(semi),
              seedpos1 = gt_seedextend_match_iterator_seedpos1(semi),
              seedpos2 = gt_seedextend_match_iterator_seedpos2(semi);

            processinfo_and_querymatchspaceptr.querymatchspaceptr
              = querymatchptr;
            had_err = gt_greedy_extend_selfmatch_with_output(
                                  &processinfo_and_querymatchspaceptr,
                                  aencseq,
                                  seedlen,
                                  seedpos1,
                                  seedpos2,
                                  err);
            if (had_err)
            {
              break;
            }
          } else
          {
            gt_assert(false);
          }
        } else
        {
          const GtUword query_totallength
            = gt_encseq_seqlength(bencseq,
                                  gt_querymatch_queryseqnum(querymatchptr));
          gt_show_seed_extend_encseq(querymatchptr,
                                     aencseq,
                                     bencseq,
                                     query_totallength);
        }
      } else
      {
        gt_show_seed_extend_plain(&seqpairbuf,
                                  linspace_spacemanager,
                                  linspace_scorehandler,
                                  alignment,
                                  alignment_show_buffer,
                                  alignmentwidth,
                                  arguments->showeoplist,
                                  characters,
                                  wildcardshow,
                                  aencseq,
                                  bencseq,
                                  querymatchptr);
      }
    }
    polishing_info_delete(pol_info);
    gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo);
    gt_free(alignment_show_buffer);
    gt_scorehandler_delete(linspace_scorehandler);
    gt_linspaceManagement_delete(linspace_spacemanager);
    gt_free(seqpairbuf.a_sequence);
    gt_free(seqpairbuf.b_sequence);
    gt_alignment_delete(alignment);
  }
  gt_seedextend_match_iterator_delete(semi);
  return had_err;
}
Пример #15
0
void gt_template_utils_teardown(void) {
  gt_template_delete(source);
  gt_template_delete(target);
  gt_alignment_delete(alignment);
  gt_output_map_attributes_delete(output_attributes);
}