void gt_querymatchoutoptions_delete(
        GtQuerymatchoutoptions *querymatchoutoptions)
{
  if (querymatchoutoptions != NULL)
  {
    front_trace_delete(querymatchoutoptions->front_trace);
    gt_greedy_extend_matchinfo_delete(querymatchoutoptions->ggemi);
    gt_free(querymatchoutoptions);
  }
}
Example #2
0
static int gt_seed_extend_runner(GT_UNUSED int argc,
                                 GT_UNUSED const char **argv,
                                 GT_UNUSED int parsed_args,
                                 void *tool_arguments,
                                 GtError *err)
{
  GtSeedExtendArguments *arguments = tool_arguments;
  GtEncseqLoader *encseq_loader = NULL;
  GtEncseq *aencseq = NULL, *bencseq = NULL;
  GtGreedyextendmatchinfo *grextinfo = NULL;
  GtXdropmatchinfo *xdropinfo = NULL;
  GtQuerymatchoutoptions *querymatchoutopt = NULL;
  GtTimer *seedextendtimer = NULL;
  GtExtendCharAccess cam = GT_EXTEND_CHAR_ACCESS_ANY;
  GtUword errorpercentage = 0UL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments != NULL);
  gt_assert(arguments->se_minidentity >= GT_EXTEND_MIN_IDENTITY_PERCENTAGE &&
            arguments->se_minidentity <= 100UL);

  /* Calculate error percentage from minidentity */
  errorpercentage = 100UL - arguments->se_minidentity;

  /* Measure whole running time */
  if (arguments->benchmark || arguments->verbose) {
    gt_showtime_enable();
  }
  if (gt_showtime_enabled())
  {
    seedextendtimer = gt_timer_new();
    gt_timer_start(seedextendtimer);
  }

  /* Load encseq A */
  encseq_loader = gt_encseq_loader_new();
  gt_encseq_loader_enable_autosupport(encseq_loader);
  aencseq = gt_encseq_loader_load(encseq_loader,
                                  gt_str_get(arguments->dbs_indexname),
                                  err);
  if (aencseq == NULL)
    had_err = -1;

  /* If there is a 2nd read set: Load encseq B */
  if (!had_err) {
    if (strcmp(gt_str_get(arguments->dbs_queryname), "") != 0) {
      bencseq = gt_encseq_loader_load(encseq_loader,
                                      gt_str_get(arguments->dbs_queryname),
                                      err);
    } else {
      bencseq = gt_encseq_ref(aencseq);
    }
    if (bencseq == NULL) {
      had_err = -1;
      gt_encseq_delete(aencseq);
    }
  }
  gt_encseq_loader_delete(encseq_loader);

  /* set character access method */
  if (!had_err && (gt_option_is_set(arguments->se_option_greedy) ||
                   gt_option_is_set(arguments->se_option_xdrop) ||
                   arguments->se_alignmentwidth > 0))
  {
    cam = gt_greedy_extend_char_access(gt_str_get
                                       (arguments->se_char_access_mode),
                                       err);
    if ((int) cam == -1) {
      had_err = -1;
      gt_encseq_delete(aencseq);
      gt_encseq_delete(bencseq);
    }
  }

  /* Use bias dependent parameters, adapted from E. Myers' DALIGNER */
  if (!had_err && arguments->bias_parameters) {
    const GtAlphabet *alpha = gt_encseq_alphabet(aencseq);
    const double bias_factor[10] = {.690, .690, .690, .690, .780,
                                    .850, .900, .933, .966, 1.000};

    if (gt_alphabet_is_dna(alpha)) {
      GtUword at, cg;
      at = gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'a'));
      at += gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 't'));
      cg = gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'c'));
      cg += gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'g'));
      if (at + cg > 0) {
        const double ratio = (double)MIN(at, cg) / (at + cg);
        int bias_index = (int)MAX(0.0, (ratio + 0.025) * 20.0 - 1.0);
        gt_assert(bias_index < 10);
        arguments->se_maxalilendiff = 30;
        arguments->se_perc_match_hist = (GtUword)(100.0 - errorpercentage *
                                                  bias_factor[bias_index]);
        if (arguments->verbose) {
          printf("# Base ratio = %4.2lf -> percmathistory = "GT_WU"\n",
                 ratio, arguments->se_perc_match_hist);
        }
      } else {
        had_err = -1;
      }
    } else {
      had_err = -1;
    }
    if (had_err) {
      gt_error_set(err, "option \"-bias-parameters\" can only be applied to "
                   "the DNA alphabet");
      gt_encseq_delete(aencseq);
      gt_encseq_delete(bencseq);
    }
  }

  /* Prepare options for greedy extension */
  if (!had_err && gt_option_is_set(arguments->se_option_greedy)) {
    grextinfo = gt_greedy_extend_matchinfo_new(errorpercentage,
                                               arguments->se_maxalilendiff,
                                               arguments->se_historysize,
                                               arguments->se_perc_match_hist,
                                               arguments->se_alignlength,
                                               cam,
                                               arguments->se_extendgreedy);
    if (arguments->benchmark) {
      gt_greedy_extend_matchinfo_silent_set(grextinfo);
    }
  }

  /* Prepare options for xdrop extension */
  if (!had_err && gt_option_is_set(arguments->se_option_xdrop)) {
    xdropinfo = gt_xdrop_matchinfo_new(arguments->se_alignlength,
                                       errorpercentage,
                                       arguments->se_xdropbelowscore,
                                       arguments->se_extendxdrop);
    if (arguments->benchmark) {
      gt_xdrop_matchinfo_silent_set(xdropinfo);
    }
  }

  /* Prepare output options */
  if (!had_err && (arguments->se_alignmentwidth > 0 ||
                   gt_option_is_set(arguments->se_option_xdrop)))
  {
    querymatchoutopt
      = gt_querymatchoutoptions_new(arguments->se_alignmentwidth);

    if (gt_option_is_set(arguments->se_option_xdrop) ||
        gt_option_is_set(arguments->se_option_greedy))
    {
      const GtUword sensitivity = gt_option_is_set(arguments->se_option_greedy)
                                    ? arguments->se_extendgreedy : 100;

      gt_querymatchoutoptions_extend(querymatchoutopt,
                                     errorpercentage,
                                     arguments->se_maxalilendiff,
                                     arguments->se_historysize,
                                     arguments->se_perc_match_hist,
                                     cam,
                                     sensitivity);
    }
  }

  /* Start algorithm */
  if (!had_err) {
    GtDiagbandseed dbsarguments;
    dbsarguments.errorpercentage = errorpercentage;
    dbsarguments.userdefinedleastlength = arguments->se_alignlength;
    dbsarguments.seedlength = arguments->dbs_seedlength;
    dbsarguments.logdiagbandwidth = arguments->dbs_logdiagbandwidth;
    dbsarguments.mincoverage = arguments->dbs_mincoverage;
    dbsarguments.maxfreq = arguments->dbs_maxfreq;
    dbsarguments.memlimit = arguments->dbs_memlimit;
    dbsarguments.mirror = arguments->mirror;
    dbsarguments.overlappingseeds = arguments->overlappingseeds;
    dbsarguments.verify = arguments->dbs_verify;
    dbsarguments.verbose = arguments->verbose;
    dbsarguments.debug_kmer = arguments->dbs_debug_kmer;
    dbsarguments.debug_seedpair = arguments->dbs_debug_seedpair;
    dbsarguments.seed_display = arguments->seed_display;
    dbsarguments.extendgreedyinfo = grextinfo;
    dbsarguments.extendxdropinfo = xdropinfo;
    dbsarguments.querymatchoutopt = querymatchoutopt;

    had_err = gt_diagbandseed_run(aencseq, bencseq, &dbsarguments, err);

    /* clean up */
    gt_encseq_delete(aencseq);
    gt_encseq_delete(bencseq);
    if (gt_option_is_set(arguments->se_option_greedy)) {
      gt_greedy_extend_matchinfo_delete(grextinfo);
    }
    if (gt_option_is_set(arguments->se_option_xdrop)) {
      gt_xdrop_matchinfo_delete(xdropinfo);
    }
    if (arguments->se_alignmentwidth > 0 ||
        gt_option_is_set(arguments->se_option_xdrop)) {
      gt_querymatchoutoptions_delete(querymatchoutopt);
    }
  }

  if (gt_showtime_enabled()) {
    if (!had_err) {
      char *keystring
        = gt_seed_extend_params_keystring(gt_option_is_set(arguments->
                                                           se_option_greedy),
                                          gt_option_is_set(arguments->
                                                           se_option_xdrop),
                                          arguments->dbs_seedlength,
                                          arguments->se_alignlength,
                                          arguments->se_minidentity,
                                          arguments->se_maxalilendiff,
                                          arguments->se_perc_match_hist,
                                          arguments->se_extendgreedy,
                                          arguments->se_extendxdrop,
                                          arguments->se_xdropbelowscore);
      printf("# TIME seedextend-%s", keystring);
      gt_free(keystring);
      gt_timer_show_formatted(seedextendtimer,
                              " overall " GT_WD ".%06ld\n",
                              stdout);
    }
    gt_timer_delete(seedextendtimer);
  }
  return had_err;
}
static int gt_show_seedext_runner(GT_UNUSED int argc,
                                  GT_UNUSED const char **argv,
                                  GT_UNUSED int parsed_args,
                                  void *tool_arguments,
                                  GtError *err)
{
  int had_err = 0;
  GtUword alignmentwidth;
  GtShowSeedextArguments *arguments = tool_arguments;
  GtSeedextendMatchIterator *semi;

  gt_error_check(err);
  gt_assert(arguments != NULL);
  /* Parse option string in first line of file specified by filename. */
  alignmentwidth = arguments->show_alignment ? 70 : 0;
  semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err);
  if (semi == NULL)
  {
    had_err = -1;
  }
  /* Parse seed extensions. */
  if (!had_err)
  {
    const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi),
                   *bencseq = gt_seedextend_match_iterator_bencseq(semi);
    GtAlignment *alignment = gt_alignment_new();
    Polishing_info *pol_info = NULL;
    GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0};

    /* the following are used if seed_extend is set */
    GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL;
    GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr;
    const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq);
    const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq);
    GtUchar *alignment_show_buffer
      = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth)
                                  : NULL;
    GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new();
    GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);;

    if (!arguments->relax_polish)
    {
      double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS;
      if (gt_seedextend_match_iterator_bias_parameters(semi))
      {
        matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq);
      }
      pol_info = polishing_info_new_with_bias(
                          gt_seedextend_match_iterator_errorpercentage(semi),
                          matchscore_bias,
                          gt_seedextend_match_iterator_history_size(semi));
    }
    if (arguments->seed_display)
    {
      gt_seedextend_match_iterator_seed_display_set(semi);
    }
    if (arguments->show_alignment || arguments->showeoplist)
    {
      gt_seedextend_match_iterator_querymatchoutoptions_set(semi,
                                                       true,
                                                       arguments->showeoplist,
                                                       alignmentwidth,
                                                       !arguments->relax_polish,
                                                       arguments->seed_display);
    }
    if (arguments->seed_extend)
    {
      greedyextendmatchinfo
        = gt_greedy_extend_matchinfo_new(70,
                              GT_MAX_ALI_LEN_DIFF,
                              gt_seedextend_match_iterator_history_size(semi),
                              GT_MIN_PERC_MAT_HISTORY,
                              0, /* userdefinedleastlength */
                              GT_EXTEND_CHAR_ACCESS_ANY,
                              100,
                              pol_info);
    }
    if (pol_info != NULL)
    {
      gt_alignment_polished_ends(alignment,pol_info,false);
    }
    processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo;
    if (arguments->sortmatches)
    {
      (void) gt_seedextend_match_iterator_all_sorted(semi,true);
    }
    while (true)
    {
      GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi);

      if (querymatchptr == NULL)
      {
        break;
      }
      if (gt_seedextend_match_iterator_has_seedline(semi))
      {
        if (arguments->seed_extend)
        {
          if (aencseq == bencseq)
          {
            const GtUword
              seedlen = gt_seedextend_match_iterator_seedlen(semi),
              seedpos1 = gt_seedextend_match_iterator_seedpos1(semi),
              seedpos2 = gt_seedextend_match_iterator_seedpos2(semi);

            processinfo_and_querymatchspaceptr.querymatchspaceptr
              = querymatchptr;
            had_err = gt_greedy_extend_selfmatch_with_output(
                                  &processinfo_and_querymatchspaceptr,
                                  aencseq,
                                  seedlen,
                                  seedpos1,
                                  seedpos2,
                                  err);
            if (had_err)
            {
              break;
            }
          } else
          {
            gt_assert(false);
          }
        } else
        {
          const GtUword query_totallength
            = gt_encseq_seqlength(bencseq,
                                  gt_querymatch_queryseqnum(querymatchptr));
          gt_show_seed_extend_encseq(querymatchptr,
                                     aencseq,
                                     bencseq,
                                     query_totallength);
        }
      } else
      {
        gt_show_seed_extend_plain(&seqpairbuf,
                                  linspace_spacemanager,
                                  linspace_scorehandler,
                                  alignment,
                                  alignment_show_buffer,
                                  alignmentwidth,
                                  arguments->showeoplist,
                                  characters,
                                  wildcardshow,
                                  aencseq,
                                  bencseq,
                                  querymatchptr);
      }
    }
    polishing_info_delete(pol_info);
    gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo);
    gt_free(alignment_show_buffer);
    gt_scorehandler_delete(linspace_scorehandler);
    gt_linspaceManagement_delete(linspace_spacemanager);
    gt_free(seqpairbuf.a_sequence);
    gt_free(seqpairbuf.b_sequence);
    gt_alignment_delete(alignment);
  }
  gt_seedextend_match_iterator_delete(semi);
  return had_err;
}