Example #1
0
/* create a global alignment in square space with constant cost values,
 * to use it in linear context you have to generate an spacemanager before,
 * in any other case it can be NULL */
GtUword alignment_in_square_space(GtLinspaceManagement *spacemanager,
                                  GtAlignment *align,
                                  const GtUchar *useq,
                                  GtUword ustart,
                                  GtUword ulen,
                                  const GtUchar *vseq,
                                  GtUword vstart,
                                  GtUword vlen,
                                  GtUword matchcost,
                                  GtUword mismatchcost,
                                  GtUword gapcost)
{
  GtUword distance;
  GtScoreHandler *scorehandler;

  gt_assert(align);
  scorehandler = gt_scorehandler_new(matchcost, mismatchcost, 0, gapcost);
  distance = alignment_in_square_space_generic (spacemanager, align,
                                                useq, ustart,  ulen,
                                                vseq,  vstart, vlen,
                                                scorehandler);
  gt_scorehandler_delete(scorehandler);
  return distance;
}
Example #2
0
/* create an local alignment in square space with constant score values,
 * to use it in linear context you have to generate an spacemanager before,
 * in any other case it can be NULL */
GtWord alignment_in_square_space_local(GtLinspaceManagement *spacemanager,
                                       GtAlignment *align,
                                       const GtUchar *useq,
                                       GtUword ustart,
                                       GtUword ulen,
                                       const GtUchar *vseq,
                                       GtUword vstart,
                                       GtUword vlen,
                                       GtWord matchscore,
                                       GtWord mismatchscore,
                                       GtWord gapscore)
{
  GtWord score;
  gt_assert(align);
  GtScoreHandler *scorehandler = gt_scorehandler_new(matchscore,
                                                     mismatchscore, 0,
                                                     gapscore);
  score = alignment_in_square_space_local_generic(spacemanager, align,
                                                  useq, ustart, ulen,
                                                  vseq, vstart, vlen,
                                                  scorehandler);
  gt_scorehandler_delete(scorehandler);
  return score;
}
Example #3
0
GtScoreHandler *gt_scorehandler2costhandler(const GtScoreHandler *scorehandler)
{
  GtScoreHandler *costhandler;

  gt_assert(scorehandler != NULL);
  if (scorehandler->scorematrix == NULL)
  {
    GtWord matchscore, mismatchscore, gap_extension, gap_opening,
           maxscore = MAX(MAX(GT_DIV2(scorehandler->matchscore+1),
                         GT_DIV2(scorehandler->mismatchscore+1)),
                     MAX(1 + scorehandler->gap_extension,0));

    matchscore = 2 * maxscore - scorehandler->matchscore;
    mismatchscore = 2 * maxscore - scorehandler->mismatchscore;
    gap_extension = maxscore - scorehandler->gap_extension;
    gap_opening = -scorehandler->gap_opening;
    costhandler = gt_scorehandler_new(matchscore,
                                      mismatchscore,
                                      gap_opening,
                                      gap_extension);
    if (!scorehandler->mappedsequence)
    {
      gt_scorehandler_plain(costhandler);
    }
  } else
  {
    int maxscore;
    GtWord gap_extension, gap_opening;
    unsigned int i, j,
                 dim = gt_score_matrix_get_dimension(scorehandler->scorematrix);
    GtScoreMatrix *costmatrix
      = gt_score_matrix_clone_empty(scorehandler->scorematrix);

    for (maxscore = 0, i = 0; i < dim; i++)
    {
      for (j = 0; j < dim; j++)
      {
        int val = gt_score_matrix_get_score(scorehandler->scorematrix, i, j);

        if (val > maxscore)
        {
          maxscore = val;
        }
      }
    }
    maxscore = MAX(GT_DIV2(maxscore+1), 1 + scorehandler->gap_extension);
    for (i = 0; i < dim; i++)
    {
      for (j = 0; j < dim; j++)
      {
        /* translate */
        int score = gt_score_matrix_get_score(scorehandler->scorematrix,i,j);
        gt_score_matrix_set_score(costmatrix, i, j, 2 * maxscore - score);
      }
    }
    gap_extension = maxscore - scorehandler->gap_extension;
    gap_opening = -scorehandler->gap_opening;
    costhandler = gt_scorehandler_new( 0,0, gap_opening, gap_extension);
    gt_scorehandler_add_scorematrix(costhandler,costmatrix);
  }
  return costhandler;
}
Example #4
0
/* handle score and cost values */
static GtScoreHandler *gt_arguments2scorehandler(
                             const GtLinspaceArguments *arguments,
                             GtError *err)
{
  GtWord matchscore, mismatchscore, gap_open, gap_extension;
  GtScoreHandler *scorehandler = NULL;
  GtScoreMatrix *scorematrix = NULL;
  int had_err = 0;

  gt_error_check(err);
  if (gt_str_array_size(arguments->linearcosts) > 0)
  {
    GtUword wordindex = 0;

    if (arguments->protein)
    {
      scorematrix
        = gt_score_matrix_new_read_protein(
                          gt_str_array_get(arguments->linearcosts,wordindex++),
                          err);
      if (scorematrix == NULL)
      {
        had_err = -1;
      }
      matchscore = 0;
      mismatchscore = 0;
    } else
    {
      had_err = gt_parse_score_value(__LINE__,&matchscore,
                                     gt_str_array_get(arguments->linearcosts,
                                                      wordindex++),
                                     arguments->global,err);
      if (!had_err)
      {
        had_err = gt_parse_score_value(__LINE__,&mismatchscore,
                                       gt_str_array_get(arguments->linearcosts,
                                                        wordindex++),
                                       arguments->global,err);
      }
    }
    if (!had_err)
    {
      gap_open = 0;
      had_err = gt_parse_score_value(__LINE__,&gap_extension,
                                     gt_str_array_get(arguments->linearcosts,
                                                      wordindex++),
                                     false,err);
    }
  } else /*if (gt_str_array_size(arguments->affinecosts) > 0)*/
  {
    GtUword wordindex = 0;

    if (arguments->protein)
    {
      scorematrix = gt_score_matrix_new_read_protein(
                               gt_str_array_get(arguments->affinecosts,
                                                wordindex++), err);
      if (scorematrix == NULL)
      {
        had_err = -1;
      }
      matchscore = mismatchscore = 0;
    } else
    {
      had_err = gt_parse_score_value(__LINE__,&matchscore,
                                     gt_str_array_get(arguments->affinecosts,
                                                      wordindex++),
                                     arguments->global,err);
      if (!had_err)
      {
        had_err = gt_parse_score_value(__LINE__,&mismatchscore,
                                       gt_str_array_get(arguments->affinecosts,
                                                        wordindex++),
                                       arguments->global,err);
      }
    }
    if (!had_err)
    {
      had_err = gt_parse_score_value(__LINE__,&gap_open,
                                     gt_str_array_get(arguments->affinecosts,
                                                      wordindex++),
                                     false,err);
    }
    if (!had_err)
    {
      had_err = gt_parse_score_value(__LINE__,&gap_extension,
                                     gt_str_array_get(arguments->affinecosts,
                                                      wordindex),
                                     false,err);
    }
  }
  if (!had_err)
  {
    scorehandler = gt_scorehandler_new(matchscore, mismatchscore,
                                       gap_open, gap_extension);
    if (scorematrix != NULL)
    {
      gt_scorehandler_add_scorematrix(scorehandler,scorematrix);
    }
  }
  return scorehandler;
}
Example #5
0
void gt_linearalign_check_local(GT_UNUSED bool forward,
                                const GtUchar *useq, GtUword ulen,
                                const GtUchar *vseq, GtUword vlen)
{
  GtAlignment *align;
  GtWord score1, score2, score3, score4,
         matchscore = 2, mismatchscore = -2, gapscore = -1;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore);
  gt_scorehandler_plain(scorehandler);
  spacemanager = gt_linspace_management_new();
  align = gt_alignment_new();
  score1 = gt_linearalign_compute_local_generic(spacemanager, scorehandler,
                                                align, useq, 0, ulen,
                                                vseq, 0, vlen);

  score2 = gt_alignment_eval_with_score(align, true, matchscore,
                                        mismatchscore, gapscore);
  gt_linspace_management_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);
  if (score1 != score2)
  {
    fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score1, score2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_reset(align);
  score3 = gt_squarealign_calculate_local(NULL, align, useq, 0, ulen,
                                          vseq, 0, vlen, matchscore,
                                          mismatchscore, gapscore);

  if (score1 != score3)
  {
    fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD
            " = gt_squarealign_calculate_local\n", score1, score3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  score4 = gt_alignment_eval_with_score(align, true, matchscore,
                                        mismatchscore, gapscore);
  if (score3 != score4)
  {
    fprintf(stderr,"gt_squarealign_calculate_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score3, score4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  gt_alignment_delete(align);
}
Example #6
0
void gt_linearalign_check(GT_UNUSED bool forward,
                          const GtUchar *useq,
                          GtUword ulen,
                          const GtUchar *vseq,
                          GtUword vlen)
{
  GtAlignment *align;
  GtUword edist1, edist2, edist3, edist4,
          matchcost = 0, mismatchcost = 1, gapcost = 1;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;
  const bool downcase = true;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  scorehandler = gt_scorehandler_new(matchcost,  mismatchcost, 0, gapcost);
  gt_scorehandler_plain(scorehandler);
  gt_scorehandler_downcase(scorehandler);
  spacemanager = gt_linspace_management_new();
  align = gt_alignment_new_with_seqs(useq, ulen, vseq, vlen);
  edist1 = gt_calc_linearalign(spacemanager, scorehandler, align,
                               useq, 0, ulen,
                               vseq, 0, vlen);
  edist2 = gt_squarealign_global_distance_only(useq, 0, ulen, vseq, 0, vlen,
                                               scorehandler);

  if (edist1 != edist2)
  {
    fprintf(stderr,"gt_calc_linearalign = "GT_WU" != "GT_WU
            " = gt_squarealign_global_distance_only\n", edist1,edist2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  edist3 = gt_alignment_eval_with_score(align, true, matchcost,
                                        mismatchcost, gapcost);

  if (edist2 != edist3)
  {
    fprintf(stderr,"gt_squarealign_global_distance_only = "GT_WU" != "GT_WU
            " = gt_alignment_eval_with_score\n", edist2,edist3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  edist4 = gt_calc_linearedist(downcase,useq, ulen, vseq, vlen);
  if (edist3 != edist4)
  {
    fprintf(stderr,"gt_alignment_eval_with_score = "GT_WU" != "GT_WU
            " = gt_calc_linearedist\n", edist3, edist4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  gt_linspace_management_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);
  gt_alignment_delete(align);
}
static int gt_show_seedext_runner(GT_UNUSED int argc,
                                  GT_UNUSED const char **argv,
                                  GT_UNUSED int parsed_args,
                                  void *tool_arguments,
                                  GtError *err)
{
  int had_err = 0;
  GtUword alignmentwidth;
  GtShowSeedextArguments *arguments = tool_arguments;
  GtSeedextendMatchIterator *semi;

  gt_error_check(err);
  gt_assert(arguments != NULL);
  /* Parse option string in first line of file specified by filename. */
  alignmentwidth = arguments->show_alignment ? 70 : 0;
  semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err);
  if (semi == NULL)
  {
    had_err = -1;
  }
  /* Parse seed extensions. */
  if (!had_err)
  {
    const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi),
                   *bencseq = gt_seedextend_match_iterator_bencseq(semi);
    GtAlignment *alignment = gt_alignment_new();
    Polishing_info *pol_info = NULL;
    GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0};

    /* the following are used if seed_extend is set */
    GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL;
    GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr;
    const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq);
    const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq);
    GtUchar *alignment_show_buffer
      = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth)
                                  : NULL;
    GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new();
    GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);;

    if (!arguments->relax_polish)
    {
      double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS;
      if (gt_seedextend_match_iterator_bias_parameters(semi))
      {
        matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq);
      }
      pol_info = polishing_info_new_with_bias(
                          gt_seedextend_match_iterator_errorpercentage(semi),
                          matchscore_bias,
                          gt_seedextend_match_iterator_history_size(semi));
    }
    if (arguments->seed_display)
    {
      gt_seedextend_match_iterator_seed_display_set(semi);
    }
    if (arguments->show_alignment || arguments->showeoplist)
    {
      gt_seedextend_match_iterator_querymatchoutoptions_set(semi,
                                                       true,
                                                       arguments->showeoplist,
                                                       alignmentwidth,
                                                       !arguments->relax_polish,
                                                       arguments->seed_display);
    }
    if (arguments->seed_extend)
    {
      greedyextendmatchinfo
        = gt_greedy_extend_matchinfo_new(70,
                              GT_MAX_ALI_LEN_DIFF,
                              gt_seedextend_match_iterator_history_size(semi),
                              GT_MIN_PERC_MAT_HISTORY,
                              0, /* userdefinedleastlength */
                              GT_EXTEND_CHAR_ACCESS_ANY,
                              100,
                              pol_info);
    }
    if (pol_info != NULL)
    {
      gt_alignment_polished_ends(alignment,pol_info,false);
    }
    processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo;
    if (arguments->sortmatches)
    {
      (void) gt_seedextend_match_iterator_all_sorted(semi,true);
    }
    while (true)
    {
      GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi);

      if (querymatchptr == NULL)
      {
        break;
      }
      if (gt_seedextend_match_iterator_has_seedline(semi))
      {
        if (arguments->seed_extend)
        {
          if (aencseq == bencseq)
          {
            const GtUword
              seedlen = gt_seedextend_match_iterator_seedlen(semi),
              seedpos1 = gt_seedextend_match_iterator_seedpos1(semi),
              seedpos2 = gt_seedextend_match_iterator_seedpos2(semi);

            processinfo_and_querymatchspaceptr.querymatchspaceptr
              = querymatchptr;
            had_err = gt_greedy_extend_selfmatch_with_output(
                                  &processinfo_and_querymatchspaceptr,
                                  aencseq,
                                  seedlen,
                                  seedpos1,
                                  seedpos2,
                                  err);
            if (had_err)
            {
              break;
            }
          } else
          {
            gt_assert(false);
          }
        } else
        {
          const GtUword query_totallength
            = gt_encseq_seqlength(bencseq,
                                  gt_querymatch_queryseqnum(querymatchptr));
          gt_show_seed_extend_encseq(querymatchptr,
                                     aencseq,
                                     bencseq,
                                     query_totallength);
        }
      } else
      {
        gt_show_seed_extend_plain(&seqpairbuf,
                                  linspace_spacemanager,
                                  linspace_scorehandler,
                                  alignment,
                                  alignment_show_buffer,
                                  alignmentwidth,
                                  arguments->showeoplist,
                                  characters,
                                  wildcardshow,
                                  aencseq,
                                  bencseq,
                                  querymatchptr);
      }
    }
    polishing_info_delete(pol_info);
    gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo);
    gt_free(alignment_show_buffer);
    gt_scorehandler_delete(linspace_scorehandler);
    gt_linspaceManagement_delete(linspace_spacemanager);
    gt_free(seqpairbuf.a_sequence);
    gt_free(seqpairbuf.b_sequence);
    gt_alignment_delete(alignment);
  }
  gt_seedextend_match_iterator_delete(semi);
  return had_err;
}