Exemple #1
0
SWdpresource *gt_newSWdpresource(Scoretype matchscore,
                              Scoretype mismatchscore,
                              Scoretype gapextend,
                              GtUword scorethreshold,
                              bool showalignment,
                              ProcessIdxMatch processmatch,
                              void *processmatchinfo)
{
  SWdpresource *swdpresource;

  swdpresource = gt_malloc(sizeof *swdpresource);
  swdpresource->showalignment = showalignment;
  swdpresource->scorevalues.matchscore = matchscore;
  swdpresource->scorevalues.mismatchscore = mismatchscore;
  swdpresource->scorevalues.gapextend = gapextend;
  swdpresource->scorethreshold = scorethreshold;
  swdpresource->alignment = gt_alignment_new();
  swdpresource->swcol = NULL;
  swdpresource->swentrycol = NULL;
  swdpresource->maxedges = NULL;
  swdpresource->allocatedswcol = 0;
  swdpresource->allocatedmaxedges = 0;
  swdpresource->processmatch = processmatch;
  swdpresource->processmatchinfo = processmatchinfo;
  swdpresource->dbsubstring = NULL;
  swdpresource->allocateddbsubstring = 0;
  return swdpresource;
}
Exemple #2
0
GtAlignment* gt_alignment_new_with_seqs(const GtUchar *u, GtUword ulen,
                                        const GtUchar *v, GtUword vlen)
{
  GtAlignment *alignment;
  gt_assert(u && v);
  alignment = gt_alignment_new();
  gt_alignment_set_seqs(alignment, u, ulen, v, vlen);
  return alignment;
}
Exemple #3
0
static Limdfsconstinfo *locali_allocatedfsconstinfo (GT_UNUSED
                                                     unsigned int alphasize)
{
  Limdfsconstinfo *lci;

  lci = gt_malloc (sizeof (Limdfsconstinfo));
  lci->maxcollen = 0;
  lci->tbs.alignment = gt_alignment_new();
  lci->tbs.spaceGtUchardbsubstring = NULL;
  lci->tbs.allocatedGtUchardbsubstring = 0;
  return lci;
}
Exemple #4
0
void gt_checklinearspace_local(GT_UNUSED bool forward,
                               const GtUchar *useq, GtUword ulen,
                               const GtUchar *vseq, GtUword vlen)
{
  GtAlignment *align;
  GtWord score1, score2, score3, score4,
         matchscore = 2, mismatchscore = -2, gapscore = -1;
  GtUchar *low_useq, *low_vseq;
  LinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;
  GtAlphabet *alphabet;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  scorehandler = gt_scorehandler_new_DNA(matchscore, mismatchscore,
                                         0, gapscore);
  alphabet = gt_scorehandler_get_alphabet(scorehandler);
  low_useq = check_dna_sequence(useq, ulen, alphabet);
  low_vseq = check_dna_sequence(vseq, vlen, alphabet);

  if (low_useq == NULL || low_vseq == NULL)
  {
    low_useq? gt_free(low_useq):0;
    low_vseq? gt_free(low_vseq):0;
    gt_scorehandler_delete(scorehandler);
    return;
  }

  spacemanager = gt_linspaceManagement_new();
  align = gt_alignment_new();
  score1 = gt_computelinearspace_local_generic(spacemanager, scorehandler,
                                               align, useq, 0, ulen,
                                               vseq, 0, vlen);

  score2 = gt_alignment_eval_with_score(align, matchscore,
                                        mismatchscore, gapscore);

  gt_linspaceManagement_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);

  if (score1 != score2)
  {
    fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score1, score2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_reset(align);
  score3 = alignment_in_square_space_local(NULL, align, useq, 0, ulen,
                                           vseq, 0, vlen, matchscore,
                                           mismatchscore, gapscore);

  if (score1 != score3)
  {
    fprintf(stderr,"gt_computelinearspace_local = "GT_WD" != "GT_WD
            " = alignment_in_square_space_local\n", score1, score3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  score4 = gt_alignment_eval_with_score(align, matchscore,
                                                mismatchscore, gapscore);
  if (score3 != score4)
  {
    fprintf(stderr,"alignment_in_square_space_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score3, score4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_delete(align);
  gt_free(low_useq);
  gt_free(low_vseq);
}
static void seededmatch2alignment(const GtQuerymatch *querymatch,
                                  GtUword querystartabsolute,
                                  const GtEncseq *encseq,
                                  GtQuerymatchoutoptions *querymatchoutoptions)
{
  GtUword ulen, vlen, rightdistance = 0, leftdistance = 0,
          offset1 = querymatchoutoptions->seedpos1 +
                    querymatchoutoptions->seedlen,
          offset2 = querymatchoutoptions->seedpos2 +
                    querymatchoutoptions->seedlen;
  GtAlignment *alignment = gt_alignment_new();

  gt_assert(querymatch->dbstart + querymatch->dblen >= offset1);
  ulen = querymatch->dbstart + querymatch->dblen - offset1;
  gt_assert(querystartabsolute + querymatch->querylen >= offset2);
  vlen = querystartabsolute + querymatch->querylen - offset2;
  if (ulen > 0 && vlen > 0)
  {
    Polished_point best_polished_point = {0,0,0};
    rightdistance = align_front_prune_edist(true,
                                            &best_polished_point,
                                            querymatchoutoptions->front_trace,
                                            encseq,
                                            querymatchoutoptions->ggemi,
                                            offset1,
                                            ulen,
                                            offset2,
                                            vlen);
    front_trace2alignments(alignment,
                           querymatchoutoptions->front_trace,
                           &best_polished_point,
                           ulen,vlen);
    front_trace_reset(querymatchoutoptions->front_trace,ulen+vlen);
  }
  gt_assert(rightdistance <= querymatch->edist);
  if (querymatchoutoptions->totallength == GT_UWORD_MAX)
  {
    querymatchoutoptions->totallength = gt_encseq_total_length(encseq);
  }
  if (querymatchoutoptions->seedpos1 > querymatch->dbstart &&
      querymatchoutoptions->seedpos2 > querystartabsolute)
  {
    Polished_point best_polished_point = {0,0,0};
    ulen = querymatchoutoptions->seedpos1 - querymatch->dbstart;
    vlen = querymatchoutoptions->seedpos2 - querystartabsolute;
    leftdistance = align_front_prune_edist(false,
                               &best_polished_point,
                               querymatchoutoptions->front_trace,
                               encseq,
                               querymatchoutoptions->ggemi,
                               GT_REVERSEPOS(querymatchoutoptions->totallength,
                                             querymatchoutoptions->seedpos1-1),
                               ulen,
                               GT_REVERSEPOS(querymatchoutoptions->totallength,
                                             querymatchoutoptions->seedpos2-1),
                               vlen);
    front_trace_reset(querymatchoutoptions->front_trace,ulen+vlen);
  }
  if (leftdistance + rightdistance > querymatch->edist)
  {
    fprintf(stderr,"leftdistance + rightdistance = " GT_WU " + " GT_WU " > "
                   GT_WU " = querymatch->edist\n",
                   leftdistance,rightdistance,querymatch->edist);
  }
  gt_assert(leftdistance + rightdistance <= querymatch->edist);
  gt_alignment_delete(alignment);
}
static int gt_linspace_align_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtLinspaceArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlignment *align;
  GtWord left_dist = 0, right_dist = 0;
  GtSequenceTable *sequence_table1, *sequence_table2;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler = NULL;
  GtTimer *linspacetimer = NULL;
  GtAlphabet *alphabet = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  sequence_table1 = gt_sequence_table_new();
  sequence_table2 = gt_sequence_table_new();
  align = gt_alignment_new();
  spacemanager = gt_linspace_management_new();
  gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor);

  /* get sequences */
  if (gt_str_array_size(arguments->strings) > 0)
  {
    get_onesequence(sequence_table1, arguments->strings, 0);
    sequence_table1->size++;
    get_onesequence(sequence_table2, arguments->strings, 1);
    sequence_table2->size++;
  }
  else if (gt_str_array_size(arguments->files) > 0)
  {
    had_err = get_fastasequences(sequence_table1,
                                 gt_str_array_get_str(arguments->files,0),err);
    if (!had_err)
    {
      had_err = get_fastasequences(sequence_table2,
                                  gt_str_array_get_str(arguments->files,1),err);
    }
  }
  if (arguments->dna)
  {
    alphabet = gt_alphabet_new_dna();
  } else
  {
    gt_assert(arguments->protein);
    alphabet = gt_alphabet_new_protein();
  }
  gt_encode_sequence_table(alphabet,sequence_table1);
  gt_encode_sequence_table(alphabet,sequence_table2);
  if (!had_err)
  {
    scorehandler = gt_arguments2scorehandler(arguments,err);
    if (scorehandler == NULL)
    {
      had_err = -1;
    } else
    {
      if (arguments->global && arguments->protein && !arguments->has_costmatrix)
      {
        GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler);
        gt_scorehandler_delete(scorehandler);
        scorehandler = costhandler;
      }
    }
  }
  /* get diagonal band */
  if (!had_err && arguments->diagonal)
  {
    if (gt_str_array_size(arguments->diagonalbonds) > 0)
    {
      had_err = gt_parse_score_value(__LINE__,&left_dist,
                                  gt_str_array_get(arguments->diagonalbonds,0),
                                  false, err);
      if (!had_err)
      {
        had_err = gt_parse_score_value(__LINE__,&right_dist,
                                  gt_str_array_get(arguments->diagonalbonds,1),
                                  false, err);
      }
    }
  }
  if (!had_err && arguments->spacetime)
  {
    linspacetimer = gt_timer_new();
  }

  /* alignment functions with linear gap costs */
  if (!had_err)
  {
    bool affine;

    if (gt_str_array_size(arguments->linearcosts) > 0)
    {
      affine = false;
    } else
    {
      gt_assert(gt_str_array_size(arguments->affinecosts) > 0);
      affine = true;
    }
    had_err = gt_all_against_all_alignment_check (
                            affine, align, arguments,
                            spacemanager,
                            scorehandler,
                            gt_alphabet_characters(alphabet),
                            gt_alphabet_wildcard_show(alphabet),
                            sequence_table1,
                            sequence_table2,
                            left_dist,
                            right_dist,
                            linspacetimer,err);
  }
  /*spacetime option*/
  if (!had_err && arguments->spacetime)
  {
    printf("# combined space peak in kilobytes: %f\n",
           GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager)));
    gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n",
                            stdout);
  }
  gt_timer_delete(linspacetimer);
  gt_linspace_management_delete(spacemanager);
  gt_sequence_table_delete(sequence_table1);
  gt_sequence_table_delete(sequence_table2);
  gt_alignment_delete(align);
  gt_alphabet_delete(alphabet);
  gt_scorehandler_delete(scorehandler);
  return had_err;
}
Exemple #7
0
void gt_linearalign_check_local(GT_UNUSED bool forward,
                                const GtUchar *useq, GtUword ulen,
                                const GtUchar *vseq, GtUword vlen)
{
  GtAlignment *align;
  GtWord score1, score2, score3, score4,
         matchscore = 2, mismatchscore = -2, gapscore = -1;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler;

  if (memchr(useq, LINEAR_EDIST_GAP,ulen) != NULL)
  {
    fprintf(stderr,"%s: sequence u contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  if (memchr(vseq, LINEAR_EDIST_GAP,vlen) != NULL)
  {
    fprintf(stderr,"%s: sequence v contains gap symbol\n",__func__);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  scorehandler = gt_scorehandler_new(matchscore, mismatchscore, 0, gapscore);
  gt_scorehandler_plain(scorehandler);
  spacemanager = gt_linspace_management_new();
  align = gt_alignment_new();
  score1 = gt_linearalign_compute_local_generic(spacemanager, scorehandler,
                                                align, useq, 0, ulen,
                                                vseq, 0, vlen);

  score2 = gt_alignment_eval_with_score(align, true, matchscore,
                                        mismatchscore, gapscore);
  gt_linspace_management_delete(spacemanager);
  gt_scorehandler_delete(scorehandler);
  if (score1 != score2)
  {
    fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score1, score2);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  gt_alignment_reset(align);
  score3 = gt_squarealign_calculate_local(NULL, align, useq, 0, ulen,
                                          vseq, 0, vlen, matchscore,
                                          mismatchscore, gapscore);

  if (score1 != score3)
  {
    fprintf(stderr,"gt_linearalign_compute_local_generic = "GT_WD" != "GT_WD
            " = gt_squarealign_calculate_local\n", score1, score3);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }

  score4 = gt_alignment_eval_with_score(align, true, matchscore,
                                        mismatchscore, gapscore);
  if (score3 != score4)
  {
    fprintf(stderr,"gt_squarealign_calculate_local = "GT_WD" != "GT_WD
            " = gt_alignment_eval_generic_with_score\n", score3, score4);
    exit(GT_EXIT_PROGRAMMING_ERROR);
  }
  gt_alignment_delete(align);
}
static int gt_show_seedext_runner(GT_UNUSED int argc,
                                  GT_UNUSED const char **argv,
                                  GT_UNUSED int parsed_args,
                                  void *tool_arguments,
                                  GtError *err)
{
  int had_err = 0;
  GtUword alignmentwidth;
  GtShowSeedextArguments *arguments = tool_arguments;
  GtSeedextendMatchIterator *semi;

  gt_error_check(err);
  gt_assert(arguments != NULL);
  /* Parse option string in first line of file specified by filename. */
  alignmentwidth = arguments->show_alignment ? 70 : 0;
  semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err);
  if (semi == NULL)
  {
    had_err = -1;
  }
  /* Parse seed extensions. */
  if (!had_err)
  {
    const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi),
                   *bencseq = gt_seedextend_match_iterator_bencseq(semi);
    GtAlignment *alignment = gt_alignment_new();
    Polishing_info *pol_info = NULL;
    GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0};

    /* the following are used if seed_extend is set */
    GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL;
    GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr;
    const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq);
    const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq);
    GtUchar *alignment_show_buffer
      = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth)
                                  : NULL;
    GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new();
    GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);;

    if (!arguments->relax_polish)
    {
      double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS;
      if (gt_seedextend_match_iterator_bias_parameters(semi))
      {
        matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq);
      }
      pol_info = polishing_info_new_with_bias(
                          gt_seedextend_match_iterator_errorpercentage(semi),
                          matchscore_bias,
                          gt_seedextend_match_iterator_history_size(semi));
    }
    if (arguments->seed_display)
    {
      gt_seedextend_match_iterator_seed_display_set(semi);
    }
    if (arguments->show_alignment || arguments->showeoplist)
    {
      gt_seedextend_match_iterator_querymatchoutoptions_set(semi,
                                                       true,
                                                       arguments->showeoplist,
                                                       alignmentwidth,
                                                       !arguments->relax_polish,
                                                       arguments->seed_display);
    }
    if (arguments->seed_extend)
    {
      greedyextendmatchinfo
        = gt_greedy_extend_matchinfo_new(70,
                              GT_MAX_ALI_LEN_DIFF,
                              gt_seedextend_match_iterator_history_size(semi),
                              GT_MIN_PERC_MAT_HISTORY,
                              0, /* userdefinedleastlength */
                              GT_EXTEND_CHAR_ACCESS_ANY,
                              100,
                              pol_info);
    }
    if (pol_info != NULL)
    {
      gt_alignment_polished_ends(alignment,pol_info,false);
    }
    processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo;
    if (arguments->sortmatches)
    {
      (void) gt_seedextend_match_iterator_all_sorted(semi,true);
    }
    while (true)
    {
      GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi);

      if (querymatchptr == NULL)
      {
        break;
      }
      if (gt_seedextend_match_iterator_has_seedline(semi))
      {
        if (arguments->seed_extend)
        {
          if (aencseq == bencseq)
          {
            const GtUword
              seedlen = gt_seedextend_match_iterator_seedlen(semi),
              seedpos1 = gt_seedextend_match_iterator_seedpos1(semi),
              seedpos2 = gt_seedextend_match_iterator_seedpos2(semi);

            processinfo_and_querymatchspaceptr.querymatchspaceptr
              = querymatchptr;
            had_err = gt_greedy_extend_selfmatch_with_output(
                                  &processinfo_and_querymatchspaceptr,
                                  aencseq,
                                  seedlen,
                                  seedpos1,
                                  seedpos2,
                                  err);
            if (had_err)
            {
              break;
            }
          } else
          {
            gt_assert(false);
          }
        } else
        {
          const GtUword query_totallength
            = gt_encseq_seqlength(bencseq,
                                  gt_querymatch_queryseqnum(querymatchptr));
          gt_show_seed_extend_encseq(querymatchptr,
                                     aencseq,
                                     bencseq,
                                     query_totallength);
        }
      } else
      {
        gt_show_seed_extend_plain(&seqpairbuf,
                                  linspace_spacemanager,
                                  linspace_scorehandler,
                                  alignment,
                                  alignment_show_buffer,
                                  alignmentwidth,
                                  arguments->showeoplist,
                                  characters,
                                  wildcardshow,
                                  aencseq,
                                  bencseq,
                                  querymatchptr);
      }
    }
    polishing_info_delete(pol_info);
    gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo);
    gt_free(alignment_show_buffer);
    gt_scorehandler_delete(linspace_scorehandler);
    gt_linspaceManagement_delete(linspace_spacemanager);
    gt_free(seqpairbuf.a_sequence);
    gt_free(seqpairbuf.b_sequence);
    gt_alignment_delete(alignment);
  }
  gt_seedextend_match_iterator_delete(semi);
  return had_err;
}
void gt_template_utils_setup(void) {
  source = gt_template_new();
  alignment = gt_alignment_new();
  target = gt_template_new();
  output_attributes = gt_output_map_attributes_new();
}