示例#1
0
GtTimer* gt_timer_new_with_progress_description(const char* desc)
{
  GtTimer *t = gt_timer_new();
  t->statedesc = gt_cstr_dup(desc);
  t->has_desc = true;
  return t;
}
示例#2
0
static int gt_seed_extend_runner(GT_UNUSED int argc,
                                 GT_UNUSED const char **argv,
                                 GT_UNUSED int parsed_args,
                                 void *tool_arguments,
                                 GtError *err)
{
  GtSeedExtendArguments *arguments = tool_arguments;
  GtEncseqLoader *encseq_loader = NULL;
  GtEncseq *aencseq = NULL, *bencseq = NULL;
  GtGreedyextendmatchinfo *grextinfo = NULL;
  GtXdropmatchinfo *xdropinfo = NULL;
  GtQuerymatchoutoptions *querymatchoutopt = NULL;
  GtTimer *seedextendtimer = NULL;
  GtExtendCharAccess cam = GT_EXTEND_CHAR_ACCESS_ANY;
  GtUword errorpercentage = 0UL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments != NULL);
  gt_assert(arguments->se_minidentity >= GT_EXTEND_MIN_IDENTITY_PERCENTAGE &&
            arguments->se_minidentity <= 100UL);

  /* Calculate error percentage from minidentity */
  errorpercentage = 100UL - arguments->se_minidentity;

  /* Measure whole running time */
  if (arguments->benchmark || arguments->verbose) {
    gt_showtime_enable();
  }
  if (gt_showtime_enabled())
  {
    seedextendtimer = gt_timer_new();
    gt_timer_start(seedextendtimer);
  }

  /* Load encseq A */
  encseq_loader = gt_encseq_loader_new();
  gt_encseq_loader_enable_autosupport(encseq_loader);
  aencseq = gt_encseq_loader_load(encseq_loader,
                                  gt_str_get(arguments->dbs_indexname),
                                  err);
  if (aencseq == NULL)
    had_err = -1;

  /* If there is a 2nd read set: Load encseq B */
  if (!had_err) {
    if (strcmp(gt_str_get(arguments->dbs_queryname), "") != 0) {
      bencseq = gt_encseq_loader_load(encseq_loader,
                                      gt_str_get(arguments->dbs_queryname),
                                      err);
    } else {
      bencseq = gt_encseq_ref(aencseq);
    }
    if (bencseq == NULL) {
      had_err = -1;
      gt_encseq_delete(aencseq);
    }
  }
  gt_encseq_loader_delete(encseq_loader);

  /* set character access method */
  if (!had_err && (gt_option_is_set(arguments->se_option_greedy) ||
                   gt_option_is_set(arguments->se_option_xdrop) ||
                   arguments->se_alignmentwidth > 0))
  {
    cam = gt_greedy_extend_char_access(gt_str_get
                                       (arguments->se_char_access_mode),
                                       err);
    if ((int) cam == -1) {
      had_err = -1;
      gt_encseq_delete(aencseq);
      gt_encseq_delete(bencseq);
    }
  }

  /* Use bias dependent parameters, adapted from E. Myers' DALIGNER */
  if (!had_err && arguments->bias_parameters) {
    const GtAlphabet *alpha = gt_encseq_alphabet(aencseq);
    const double bias_factor[10] = {.690, .690, .690, .690, .780,
                                    .850, .900, .933, .966, 1.000};

    if (gt_alphabet_is_dna(alpha)) {
      GtUword at, cg;
      at = gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'a'));
      at += gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 't'));
      cg = gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'c'));
      cg += gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'g'));
      if (at + cg > 0) {
        const double ratio = (double)MIN(at, cg) / (at + cg);
        int bias_index = (int)MAX(0.0, (ratio + 0.025) * 20.0 - 1.0);
        gt_assert(bias_index < 10);
        arguments->se_maxalilendiff = 30;
        arguments->se_perc_match_hist = (GtUword)(100.0 - errorpercentage *
                                                  bias_factor[bias_index]);
        if (arguments->verbose) {
          printf("# Base ratio = %4.2lf -> percmathistory = "GT_WU"\n",
                 ratio, arguments->se_perc_match_hist);
        }
      } else {
        had_err = -1;
      }
    } else {
      had_err = -1;
    }
    if (had_err) {
      gt_error_set(err, "option \"-bias-parameters\" can only be applied to "
                   "the DNA alphabet");
      gt_encseq_delete(aencseq);
      gt_encseq_delete(bencseq);
    }
  }

  /* Prepare options for greedy extension */
  if (!had_err && gt_option_is_set(arguments->se_option_greedy)) {
    grextinfo = gt_greedy_extend_matchinfo_new(errorpercentage,
                                               arguments->se_maxalilendiff,
                                               arguments->se_historysize,
                                               arguments->se_perc_match_hist,
                                               arguments->se_alignlength,
                                               cam,
                                               arguments->se_extendgreedy);
    if (arguments->benchmark) {
      gt_greedy_extend_matchinfo_silent_set(grextinfo);
    }
  }

  /* Prepare options for xdrop extension */
  if (!had_err && gt_option_is_set(arguments->se_option_xdrop)) {
    xdropinfo = gt_xdrop_matchinfo_new(arguments->se_alignlength,
                                       errorpercentage,
                                       arguments->se_xdropbelowscore,
                                       arguments->se_extendxdrop);
    if (arguments->benchmark) {
      gt_xdrop_matchinfo_silent_set(xdropinfo);
    }
  }

  /* Prepare output options */
  if (!had_err && (arguments->se_alignmentwidth > 0 ||
                   gt_option_is_set(arguments->se_option_xdrop)))
  {
    querymatchoutopt
      = gt_querymatchoutoptions_new(arguments->se_alignmentwidth);

    if (gt_option_is_set(arguments->se_option_xdrop) ||
        gt_option_is_set(arguments->se_option_greedy))
    {
      const GtUword sensitivity = gt_option_is_set(arguments->se_option_greedy)
                                    ? arguments->se_extendgreedy : 100;

      gt_querymatchoutoptions_extend(querymatchoutopt,
                                     errorpercentage,
                                     arguments->se_maxalilendiff,
                                     arguments->se_historysize,
                                     arguments->se_perc_match_hist,
                                     cam,
                                     sensitivity);
    }
  }

  /* Start algorithm */
  if (!had_err) {
    GtDiagbandseed dbsarguments;
    dbsarguments.errorpercentage = errorpercentage;
    dbsarguments.userdefinedleastlength = arguments->se_alignlength;
    dbsarguments.seedlength = arguments->dbs_seedlength;
    dbsarguments.logdiagbandwidth = arguments->dbs_logdiagbandwidth;
    dbsarguments.mincoverage = arguments->dbs_mincoverage;
    dbsarguments.maxfreq = arguments->dbs_maxfreq;
    dbsarguments.memlimit = arguments->dbs_memlimit;
    dbsarguments.mirror = arguments->mirror;
    dbsarguments.overlappingseeds = arguments->overlappingseeds;
    dbsarguments.verify = arguments->dbs_verify;
    dbsarguments.verbose = arguments->verbose;
    dbsarguments.debug_kmer = arguments->dbs_debug_kmer;
    dbsarguments.debug_seedpair = arguments->dbs_debug_seedpair;
    dbsarguments.seed_display = arguments->seed_display;
    dbsarguments.extendgreedyinfo = grextinfo;
    dbsarguments.extendxdropinfo = xdropinfo;
    dbsarguments.querymatchoutopt = querymatchoutopt;

    had_err = gt_diagbandseed_run(aencseq, bencseq, &dbsarguments, err);

    /* clean up */
    gt_encseq_delete(aencseq);
    gt_encseq_delete(bencseq);
    if (gt_option_is_set(arguments->se_option_greedy)) {
      gt_greedy_extend_matchinfo_delete(grextinfo);
    }
    if (gt_option_is_set(arguments->se_option_xdrop)) {
      gt_xdrop_matchinfo_delete(xdropinfo);
    }
    if (arguments->se_alignmentwidth > 0 ||
        gt_option_is_set(arguments->se_option_xdrop)) {
      gt_querymatchoutoptions_delete(querymatchoutopt);
    }
  }

  if (gt_showtime_enabled()) {
    if (!had_err) {
      char *keystring
        = gt_seed_extend_params_keystring(gt_option_is_set(arguments->
                                                           se_option_greedy),
                                          gt_option_is_set(arguments->
                                                           se_option_xdrop),
                                          arguments->dbs_seedlength,
                                          arguments->se_alignlength,
                                          arguments->se_minidentity,
                                          arguments->se_maxalilendiff,
                                          arguments->se_perc_match_hist,
                                          arguments->se_extendgreedy,
                                          arguments->se_extendxdrop,
                                          arguments->se_xdropbelowscore);
      printf("# TIME seedextend-%s", keystring);
      gt_free(keystring);
      gt_timer_show_formatted(seedextendtimer,
                              " overall " GT_WD ".%06ld\n",
                              stdout);
    }
    gt_timer_delete(seedextendtimer);
  }
  return had_err;
}
示例#3
0
static int gt_linspace_align_runner(GT_UNUSED int argc,
                                    GT_UNUSED const char **argv,
                                    GT_UNUSED int parsed_args,
                                    void *tool_arguments,
                                    GtError *err)
{
  GtLinspaceArguments *arguments = tool_arguments;
  int had_err = 0;
  GtAlignment *align;
  GtWord left_dist = 0, right_dist = 0;
  GtSequenceTable *sequence_table1, *sequence_table2;
  GtLinspaceManagement *spacemanager;
  GtScoreHandler *scorehandler = NULL;
  GtTimer *linspacetimer = NULL;
  GtAlphabet *alphabet = NULL;

  gt_error_check(err);
  gt_assert(arguments);
  sequence_table1 = gt_sequence_table_new();
  sequence_table2 = gt_sequence_table_new();
  align = gt_alignment_new();
  spacemanager = gt_linspace_management_new();
  gt_linspace_management_set_TSfactor(spacemanager,arguments->timesquarefactor);

  /* get sequences */
  if (gt_str_array_size(arguments->strings) > 0)
  {
    get_onesequence(sequence_table1, arguments->strings, 0);
    sequence_table1->size++;
    get_onesequence(sequence_table2, arguments->strings, 1);
    sequence_table2->size++;
  }
  else if (gt_str_array_size(arguments->files) > 0)
  {
    had_err = get_fastasequences(sequence_table1,
                                 gt_str_array_get_str(arguments->files,0),err);
    if (!had_err)
    {
      had_err = get_fastasequences(sequence_table2,
                                  gt_str_array_get_str(arguments->files,1),err);
    }
  }
  if (arguments->dna)
  {
    alphabet = gt_alphabet_new_dna();
  } else
  {
    gt_assert(arguments->protein);
    alphabet = gt_alphabet_new_protein();
  }
  gt_encode_sequence_table(alphabet,sequence_table1);
  gt_encode_sequence_table(alphabet,sequence_table2);
  if (!had_err)
  {
    scorehandler = gt_arguments2scorehandler(arguments,err);
    if (scorehandler == NULL)
    {
      had_err = -1;
    } else
    {
      if (arguments->global && arguments->protein && !arguments->has_costmatrix)
      {
        GtScoreHandler *costhandler = gt_scorehandler2costhandler(scorehandler);
        gt_scorehandler_delete(scorehandler);
        scorehandler = costhandler;
      }
    }
  }
  /* get diagonal band */
  if (!had_err && arguments->diagonal)
  {
    if (gt_str_array_size(arguments->diagonalbonds) > 0)
    {
      had_err = gt_parse_score_value(__LINE__,&left_dist,
                                  gt_str_array_get(arguments->diagonalbonds,0),
                                  false, err);
      if (!had_err)
      {
        had_err = gt_parse_score_value(__LINE__,&right_dist,
                                  gt_str_array_get(arguments->diagonalbonds,1),
                                  false, err);
      }
    }
  }
  if (!had_err && arguments->spacetime)
  {
    linspacetimer = gt_timer_new();
  }

  /* alignment functions with linear gap costs */
  if (!had_err)
  {
    bool affine;

    if (gt_str_array_size(arguments->linearcosts) > 0)
    {
      affine = false;
    } else
    {
      gt_assert(gt_str_array_size(arguments->affinecosts) > 0);
      affine = true;
    }
    had_err = gt_all_against_all_alignment_check (
                            affine, align, arguments,
                            spacemanager,
                            scorehandler,
                            gt_alphabet_characters(alphabet),
                            gt_alphabet_wildcard_show(alphabet),
                            sequence_table1,
                            sequence_table2,
                            left_dist,
                            right_dist,
                            linspacetimer,err);
  }
  /*spacetime option*/
  if (!had_err && arguments->spacetime)
  {
    printf("# combined space peak in kilobytes: %f\n",
           GT_KILOBYTES(gt_linspace_management_get_spacepeak(spacemanager)));
    gt_timer_show_formatted(linspacetimer,"# TIME overall " GT_WD ".%02ld\n",
                            stdout);
  }
  gt_timer_delete(linspacetimer);
  gt_linspace_management_delete(spacemanager);
  gt_sequence_table_delete(sequence_table1);
  gt_sequence_table_delete(sequence_table2);
  gt_alignment_delete(align);
  gt_alphabet_delete(alphabet);
  gt_scorehandler_delete(scorehandler);
  return had_err;
}
示例#4
0
static int gt_speck_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL,
               *a_in_stream = NULL, *a_out_stream = NULL,
               *feature_stream = NULL, *sort_stream = NULL,
               *last_stream = NULL;
  GtNodeVisitor *spec_visitor = NULL;
  GtSpecResults *res = NULL;
  GtFeatureIndex *fi = NULL;
  GtTimer *t = NULL;
  GtRegionMapping *rm = NULL;
  GtArray *arr = gt_array_new(sizeof (GtFeatureNode*));
  SpeccheckArguments *arguments = tool_arguments;

  int had_err = 0;
  gt_error_check(err);

  res = gt_spec_results_new();
  gt_assert(res);
  t = gt_timer_new();
  gt_assert(t);

  spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res,
                                     err);
  if (!spec_visitor)
    return -1;

  /* add region mapping if given */
  if (gt_seqid2file_option_used(arguments->s2fi)) {
    rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rm)
      had_err = -1;
    if (!had_err)
      gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm);
  }

  /* set runtime error behaviour */
  if (arguments->fail_hard)
    gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor);
  else
    gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor);

  /* redirect warnings */
  gt_warning_set_handler(gt_speck_record_warning, res);

  last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
  gt_assert(gff3_in_stream);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

  /* insert sort stream if requested */
  if (arguments->sort) {
    last_stream = sort_stream = gt_sort_stream_new(last_stream);
  }

  /* if -provideindex is given, collect input features and index them first */
  if (arguments->provideindex) {
    fi = gt_feature_index_memory_new();
    gt_assert(fi);

    last_stream = feature_stream = gt_feature_stream_new(last_stream, fi);
    gt_assert(feature_stream);

    last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr,
                                                             err);
    if (!a_out_stream)
      had_err = -1;

    gt_timer_start(t);

    if (!had_err)
      had_err = gt_node_stream_pull(last_stream, err);

    if (!had_err) {
      gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor,
                                        gt_feature_index_ref(fi));
      last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err);
      if (!a_in_stream)
        had_err = -1;
    }
  } else {
    gt_timer_start(t);
  }

  if (!had_err) {
    checker_stream = gt_visitor_stream_new(last_stream, spec_visitor);
    gt_assert(checker_stream);
  }

  /* perform checking  */
  if (!had_err)
    had_err = gt_node_stream_pull(checker_stream, err);

  gt_timer_stop(t);

  /* reset warnings output */
  gt_warning_set_handler(gt_warning_default_handler, NULL);

  /* output results */
  if (!had_err)
    gt_spec_results_report(res, arguments->outfp,
                           gt_str_get(arguments->specfile),
                           arguments->verbose, arguments->colored,
                           !arguments->allexpects);

  if (!had_err)
    gt_timer_show_formatted(t, "Finished in " GT_WD ".%06ld s.\n", stderr);

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(a_in_stream);
  gt_node_stream_delete(a_out_stream);
  gt_node_stream_delete(checker_stream);
  gt_node_stream_delete(feature_stream);
  gt_node_stream_delete(sort_stream);
  gt_spec_results_delete(res);
  gt_feature_index_delete(fi);
  gt_timer_delete(t);
  gt_array_delete(arr);

  return had_err;
}
示例#5
0
static int gt_speck_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL,
               *a_in_stream = NULL, *a_out_stream = NULL,
               *feature_stream = NULL, *sort_stream = NULL,
               *last_stream = NULL;
  GtNodeVisitor *spec_visitor = NULL;
  GtSpecResults *res = NULL;
  GtFeatureIndex *fi = NULL;
  GtTypeChecker *type_checker = NULL;
  GtTimer *t = NULL;
  GtRegionMapping *rm = NULL;
  GtArray *arr = gt_array_new(sizeof (GtFeatureNode*));
  GtStr *prog, *speclib;
  SpeccheckArguments *arguments = tool_arguments;

  int had_err = 0;
  gt_error_check(err);

  res = gt_spec_results_new();
  gt_assert(res);

  if (gt_file_exists(gt_str_get(arguments->format))) {
    speclib = gt_str_ref(arguments->format);
  } else {
    prog = gt_str_new();
    gt_str_append_cstr_nt(prog, gt_error_get_progname(err),
                    gt_cstr_length_up_to_char(gt_error_get_progname(err), ' '));
    speclib = gt_get_gtdata_path(gt_str_get(prog), NULL);
    gt_str_delete(prog);
    gt_str_append_cstr(speclib, "/spec/output_drivers/");
    gt_str_append_str(speclib, arguments->format);

    if (!gt_file_exists(gt_str_get(speclib))) {
      gt_error_set(err, "output driver file \"%s\" does not exist",
                   gt_str_get(speclib));
      had_err = -1;
    }
  }

  if (!had_err) {
    spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res,
                                       err);
    if (!spec_visitor) {
      gt_spec_results_delete(res);
      return -1;
    }
  }

  t = gt_timer_new();
  gt_assert(t);

  /* add region mapping if given */
  if (!had_err && gt_seqid2file_option_used(arguments->s2fi)) {
    rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rm)
      had_err = -1;
    if (!had_err)
      gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm);
  }

  /* set type checker if necessary */
  if (!had_err && gt_typecheck_info_option_used(arguments->tci)) {
    type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err);
    if (!type_checker)
      had_err = -1;
    if (!had_err)
      gt_spec_visitor_add_type_checker((GtSpecVisitor*) spec_visitor,
                                       type_checker);
  }

  if (!had_err) {
    /* set runtime error behaviour */
    if (arguments->fail_hard)
      gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor);
    else
      gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor);

    /* redirect warnings */
    gt_warning_set_handler(gt_speck_record_warning, res);

    last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
    gt_assert(gff3_in_stream);
    gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

    /* insert sort stream if requested */
    if (arguments->sort) {
      last_stream = sort_stream = gt_sort_stream_new(last_stream);
    }

    /* if -provideindex is given, collect input features and index them first */
    if (arguments->provideindex) {
      fi = gt_feature_index_memory_new();
      gt_assert(fi);

      last_stream = feature_stream = gt_feature_stream_new(last_stream, fi);
      gt_assert(feature_stream);

      last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr,
                                                               err);
      if (!a_out_stream)
        had_err = -1;

      gt_timer_start(t);

      if (!had_err)
        had_err = gt_node_stream_pull(last_stream, err);

      if (!had_err) {
        gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor,
                                          gt_feature_index_ref(fi));
        last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err);
        if (!a_in_stream)
          had_err = -1;
      }
    } else {
      gt_timer_start(t);
    }

    if (!had_err) {
      checker_stream = gt_visitor_stream_new(last_stream, spec_visitor);
      gt_assert(checker_stream);
    }

    /* perform checking  */
    if (!had_err)
      had_err = gt_node_stream_pull(checker_stream, err);

    gt_timer_stop(t);

    /* reset warnings output */
    gt_warning_set_handler(gt_warning_default_handler, NULL);

    /* output results */
    if (!had_err) {
      GtStr *runtime = gt_str_new();
      gt_timer_get_formatted(t, GT_WD ".%06ld", runtime);
      had_err = gt_spec_results_render_template(res, gt_str_get(speclib),
                                                arguments->outfp,
                                                gt_str_get(arguments->specfile),
                                                arguments->verbose,
                                                arguments->colored,
                                                gt_str_get(runtime), err);
      gt_str_delete(runtime);
    }
  }

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(a_in_stream);
  gt_node_stream_delete(a_out_stream);
  gt_node_stream_delete(checker_stream);
  gt_node_stream_delete(feature_stream);
  gt_node_stream_delete(sort_stream);
  gt_spec_results_delete(res);
  gt_feature_index_delete(fi);
  gt_type_checker_delete(type_checker);
  gt_timer_delete(t);
  gt_array_delete(arr);
  gt_str_delete(speclib);

  return had_err;
}