Exemple #1
0
void gt_reinitLocaliTracebackstate(Limdfsconstinfo *lci,
                                GtUword dbprefixlen,
                                GtUword pprefixlen)
{
  LocaliTracebackstate *tbs = &lci->tbs;

  tbs->dbprefixlen = tbs->dbcurrent = dbprefixlen;
  tbs->queryend = tbs->querypos = pprefixlen;
  if (dbprefixlen > (GtUword) tbs->allocatedGtUchardbsubstring)
  {
    tbs->spaceGtUchardbsubstring = gt_realloc(tbs->spaceGtUchardbsubstring,
                                            sizeof (GtUchar) * dbprefixlen);
  }
  gt_alignment_reset(tbs->alignment);
}
Exemple #2
0
void gt_checklinearspace_local(GT_UNUSED bool forward,
                               const GtUchar *useq, GtUword ulen,
                               const GtUchar *vseq, GtUword vlen)
{
  GtAlignment *align;
  GtWord score1, score2, score3, score4,
         matchscore = 2, mismatchscore = -2, gapscore = -1;
  GtUchar *low_useq, *low_vseq;
  LinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;
  GtAlphabet *alphabet;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  scorehandler = gt_scorehandler_new_DNA(matchscore, mismatchscore,
                                         0, gapscore);
  alphabet = gt_scorehandler_get_alphabet(scorehandler);
  low_useq = check_dna_sequence(useq, ulen, alphabet);
  low_vseq = check_dna_sequence(vseq, vlen, alphabet);

  if (low_useq == NULL || low_vseq == NULL)
  {
    low_useq? gt_free(low_useq):0;
    low_vseq? gt_free(low_vseq):0;
    gt_scorehandler_delete(scorehandler);
    return;
  }

  spacemanager = gt_linspaceManagement_new();
  align = gt_alignment_new();
  score1 = gt_computelinearspace_local_generic(spacemanager, scorehandler,
                                               align, useq, 0, ulen,
                                               vseq, 0, vlen);

  score2 = gt_alignment_eval_with_score(align, matchscore,
                                        mismatchscore, gapscore);

  gt_linspaceManagement_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);

  if (score1 != score2)
  {
    fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score1, score2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_reset(align);
  score3 = alignment_in_square_space_local(NULL, align, useq, 0, ulen,
                                           vseq, 0, vlen, matchscore,
                                           mismatchscore, gapscore);

  if (score1 != score3)
  {
    fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD
            " = alignment_in_square_space_local\n", score1, score3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  score4 = gt_alignment_eval_with_score(align, matchscore,
                                                mismatchscore, gapscore);
  if (score3 != score4)
  {
    fprintf(stderr,"alignment_in_square_space_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score3, score4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_delete(align);
  gt_free(low_useq);
  gt_free(low_vseq);
}
/*call function with linear gap costs for all given sequences */
static int gt_all_against_all_alignment_check(bool affine,
                                        GtAlignment *align,
                                        const GtLinspaceArguments *arguments,
                                        GtLinspaceManagement *spacemanager,
                                        const GtScoreHandler *scorehandler,
                                        const GtUchar *characters,
                                        GtUchar wildcardshow,
                                        const GtSequenceTable *sequence_table1,
                                        const GtSequenceTable *sequence_table2,
                                        GtWord left_dist,
                                        GtWord right_dist,
                                        GtTimer *linspacetimer,
                                        GtError *err)
{
  int had_err = 0;
  const GtUchar *useq, *vseq;
  GtUword i, j, ulen, vlen;

  gt_error_check(err);
  if (linspacetimer != NULL)
  {
    gt_timer_start(linspacetimer);
  }
  for (i = 0; !had_err && i < sequence_table1->size; i++)
  {
    ulen = gt_str_length(sequence_table1->seqarray[i]);
    useq = (const GtUchar*) gt_str_get(sequence_table1->seqarray[i]);
    for (j = 0; j< sequence_table2->size; j++)
    {
      vlen = gt_str_length(sequence_table2->seqarray[j]);
      vseq = (const GtUchar*) gt_str_get(sequence_table2->seqarray[j]);
      gt_alignment_reset(align);
      if (arguments->global)
      {
        if (arguments->diagonal)
        {
          if (gt_str_array_size(arguments->diagonalbonds) == 0)
          {
            left_dist = LEFT_DIAGONAL_SHIFT(arguments->similarity, ulen, vlen);
            right_dist = RIGHT_DIAGONAL_SHIFT(arguments->similarity, ulen,
                                              vlen);
          }
          if ((left_dist > MIN(0, (GtWord)vlen-(GtWord)ulen))||
              (right_dist < MAX(0, (GtWord)vlen-(GtWord)ulen)))
          {
            gt_error_set(err, "ERROR: invalid diagonalband for global "
                              "alignment (ulen: "GT_WU", vlen: "GT_WU")\n"
                              "left_dist <= MIN(0, vlen-ulen) and "
                              "right_dist >= MAX(0, vlen-ulen)", ulen, vlen);
            had_err = 1;
          }
          if (!had_err)
          {
            (affine ? gt_diagonalbandalign_affinegapcost_compute_generic
                    : gt_diagonalbandalign_compute_generic)
                       (spacemanager, scorehandler, align,
                        useq, 0, ulen, vseq, 0, vlen,
                        left_dist, right_dist);
          }
        } else
        {
          (affine ? gt_linearalign_affinegapcost_compute_generic
                  : gt_linearalign_compute_generic)
                             (spacemanager, scorehandler, align,
                              useq, 0, ulen, vseq, 0, vlen);
        }
      }
      else if (arguments->local)
      {
        (affine ? gt_linearalign_affinegapcost_compute_local_generic
                : gt_linearalign_compute_local_generic)
                    (spacemanager, scorehandler, align,
                     useq, 0, ulen, vseq, 0, vlen);
      }
      /* show alignment*/
      if (!had_err)
      {
        gt_assert(align != NULL);
        if (!strcmp(gt_str_get(arguments->outputfile),"stdout"))
        {
          alignment_show_with_sequences(useq, ulen, vseq, vlen, align,
                                        characters,
                                        wildcardshow, arguments->showscore,
                                        !arguments->scoreonly,
                                        arguments->showsequences,
                                        arguments->global,
                                        scorehandler, stdout);
        } else
        {
          FILE *fp = gt_fa_fopen_func(gt_str_get(arguments->outputfile),
                                                 "a", __FILE__,__LINE__,err);
          if (fp == NULL)
          {
            had_err = -1;
          } else
          {
            alignment_show_with_sequences(useq, ulen, vseq, vlen, align,
                                          characters, wildcardshow,
                                          arguments->showscore,
                                          !arguments->scoreonly,
                                          arguments->showsequences,
                                          arguments->global, scorehandler,fp);
            gt_fa_fclose(fp);
          }
        }
      }
    }
  }
  if (linspacetimer != NULL)
  {
    gt_timer_stop(linspacetimer);
  }
  if (!had_err && arguments->wildcardshow)
  {
    printf("# wildcards are represented by %c\n", wildcardshow);
  }
  return had_err;
}
Exemple #4
0
void gt_linearalign_check_local(GT_UNUSED bool forward,
                                const GtUchar *useq, GtUword ulen,
                                const GtUchar *vseq, GtUword vlen)
{
  GtAlignment *align;
  GtWord score1, score2, score3, score4,
         matchscore = 2, mismatchscore = -2, gapscore = -1;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore);
  gt_scorehandler_plain(scorehandler);
  spacemanager = gt_linspace_management_new();
  align = gt_alignment_new();
  score1 = gt_linearalign_compute_local_generic(spacemanager, scorehandler,
                                                align, useq, 0, ulen,
                                                vseq, 0, vlen);

  score2 = gt_alignment_eval_with_score(align, true, matchscore,
                                        mismatchscore, gapscore);
  gt_linspace_management_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);
  if (score1 != score2)
  {
    fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score1, score2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_reset(align);
  score3 = gt_squarealign_calculate_local(NULL, align, useq, 0, ulen,
                                          vseq, 0, vlen, matchscore,
                                          mismatchscore, gapscore);

  if (score1 != score3)
  {
    fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD
            " = gt_squarealign_calculate_local\n", score1, score3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  score4 = gt_alignment_eval_with_score(align, true, matchscore,
                                        mismatchscore, gapscore);
  if (score3 != score4)
  {
    fprintf(stderr,"gt_squarealign_calculate_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score3, score4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  gt_alignment_delete(align);
}
Exemple #5
0
static void applysmithwaterman(SWdpresource *dpresource,
                               const GtEncseq *encseq,
                               GtUword encsequnit,
                               GtUword startpos,
                               GtUword endpos,
                               const GtUchar *query,
                               GtUword querylen)
{
  Scoretype score;
  Maxscorecoord maxpair;
  DPregion maxentry;

  if (dpresource->allocatedswcol < querylen + 1)
  {
    dpresource->allocatedswcol = querylen + 1;
    dpresource->swcol = gt_realloc(dpresource->swcol,
                                   sizeof *dpresource->swcol
                                   * dpresource->allocatedswcol);
    dpresource->swentrycol = gt_realloc(dpresource->swentrycol,
                                        sizeof *dpresource->swentrycol
                                        * dpresource->allocatedswcol);
  }
  score = swlocalsimilarityscore(dpresource->swcol,&maxpair,
                                 &dpresource->scorevalues,
                                 query,querylen,encseq,startpos,endpos);
  if (score >= (Scoretype) dpresource->scorethreshold)
  {
    GtIdxMatch match;

    swlocalsimilarityregion(dpresource->swentrycol,
                            &maxentry,
                            &dpresource->scorevalues,
                            query,maxpair.umax,
                            encseq,startpos,startpos + maxpair.vmax);
    gt_assert(maxentry.similarity == score);
    match.dbabsolute = false;
    match.dbstartpos = maxentry.start2;
    match.dblen = maxentry.len2;
    match.dbseqnum = encsequnit;
    match.querystartpos = maxentry.start1;
    match.querylen = maxentry.len1;
    gt_assert(maxentry.similarity >= 0);
    match.distance = (GtUword) maxentry.similarity;
    if (dpresource->showalignment)
    {
      if (dpresource->allocatedmaxedges <
          (maxentry.len1 + 1) * (maxentry.len2 + 1))
      {
        dpresource->allocatedmaxedges
          = (maxentry.len1 + 1) * (maxentry.len2 + 1);
        dpresource->maxedges
          = gt_realloc(dpresource->maxedges,
                       sizeof *dpresource->maxedges
                       * dpresource->allocatedmaxedges);
      }
      gt_alignment_reset(dpresource->alignment);
      if (dpresource->allocateddbsubstring < (GtUword) maxentry.len2)
      {
        dpresource->allocateddbsubstring = (GtUword) maxentry.len2;
        dpresource->dbsubstring
          = gt_realloc(dpresource->dbsubstring,
                       sizeof *dpresource->dbsubstring
                       * dpresource->allocateddbsubstring);
      }
      swproducealignment(dpresource->alignment,
                         dpresource->dbsubstring,
                         dpresource->maxedges,
                         dpresource->swcol,
                         &dpresource->scorevalues,
                         dpresource->scorethreshold,
                         query + maxentry.start1,
                         maxentry.len1,
                         encseq,
                         startpos + maxentry.start2,
                         startpos + maxentry.start2 + maxentry.len2);
      match.alignment = dpresource->alignment;
      match.dbsubstring = dpresource->dbsubstring;
    } else
    {
      match.dbsubstring = NULL;
      match.alignment = NULL;
    }
    dpresource->processmatch(dpresource->processmatchinfo,&match);
  }
}
static void gt_show_seed_extend_plain(GtSequencepairbuffer *seqpairbuf,
                                      GtLinspaceManagement
                                      *linspace_spacemanager,
                                      GtScoreHandler *linspace_scorehandler,
                                      GtAlignment *alignment,
                                      GtUchar *alignment_show_buffer,
                                      GtUword alignmentwidth,
                                      bool showeoplist,
                                      const GtUchar *characters,
                                      GtUchar wildcardshow,
                                      const GtEncseq *aencseq,
                                      const GtEncseq *bencseq,
                                      const GtQuerymatch *querymatchptr)
{
  GtUword edist;
  GtReadmode query_readmode = gt_querymatch_query_readmode(querymatchptr);
  const GtUword distance = gt_querymatch_distance(querymatchptr),
                dblen = gt_querymatch_dblen(querymatchptr),
                queryseqnum = gt_querymatch_queryseqnum(querymatchptr),
                querystart_fwdstrand
                  = gt_querymatch_querystart_fwdstrand(querymatchptr),
                querylen = gt_querymatch_querylen(querymatchptr);

  const GtUword apos_ab = gt_querymatch_dbstart(querymatchptr);
  const GtUword bpos_ab = gt_encseq_seqstartpos(bencseq, queryseqnum) +
                          querystart_fwdstrand;

  gt_querymatch_coordinates_out(querymatchptr);
  if (dblen >= seqpairbuf->a_allocated)
  {
    seqpairbuf->a_sequence = gt_realloc(seqpairbuf->a_sequence,
                                       sizeof *seqpairbuf->a_sequence * dblen);
    seqpairbuf->a_allocated = dblen;
  }
  if (querylen >= seqpairbuf->b_allocated)
  {
    seqpairbuf->b_sequence = gt_realloc(seqpairbuf->b_sequence,
                                       sizeof *seqpairbuf->b_sequence *
                                       querylen);
    seqpairbuf->b_allocated = querylen;
  }
  gt_encseq_extract_encoded(aencseq, seqpairbuf->a_sequence, apos_ab,
                            apos_ab + dblen - 1);
  gt_encseq_extract_encoded(bencseq, seqpairbuf->b_sequence, bpos_ab,
                            bpos_ab + querylen - 1);
  if (query_readmode != GT_READMODE_FORWARD)
  {
    gt_assert(query_readmode == GT_READMODE_REVCOMPL);
    gt_inplace_reverse_complement(seqpairbuf->b_sequence,querylen);
  }
  edist = gt_computelinearspace_generic(linspace_spacemanager,
                                        linspace_scorehandler,
                                        alignment,
                                        seqpairbuf->a_sequence,
                                        0,
                                        dblen,
                                        seqpairbuf->b_sequence,
                                        0,
                                        querylen);
  if (edist < distance)
  {
    printf("# edist=" GT_WU " (smaller by " GT_WU ")\n",edist,distance - edist);
  }
  gt_assert(edist <= distance);
  if (alignmentwidth > 0)
  {
    gt_alignment_show_generic(alignment_show_buffer,
                              false,
                              alignment,
                              stdout,
                              alignmentwidth,
                              characters,
                              wildcardshow);
  }
  if (showeoplist && distance > 0)
  {
    gt_alignment_show_multieop_list(alignment, stdout);
  }
  if (alignmentwidth > 0 || showeoplist)
  {
    gt_alignment_reset(alignment);
  }
}