Exemple #1
0
int gt_chseqids(int argc, const char **argv, GtError *err)
{
  GtNodeStream *gff3_in_stream, *chseqids_stream, *sort_stream = NULL,
               *gff3_out_stream = NULL;
  ChseqidsArguments arguments;
  GtStr *chseqids;
  int parsed_args, had_err = 0;

  gt_error_check(err);

  /* option parsing */
  switch (parse_options(&parsed_args, &arguments, argc, argv, err)) {
    case GT_OPTION_PARSER_OK: break;
    case GT_OPTION_PARSER_ERROR: return -1;
    case GT_OPTION_PARSER_REQUESTS_EXIT: return 0;
  }

  /* create the streams */
  gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args + 1]);
  if (arguments.verbose && arguments.outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);
  chseqids = gt_str_new_cstr(argv[parsed_args]);
  chseqids_stream = gt_chseqids_stream_new(gff3_in_stream, chseqids, err);
  if (!chseqids_stream)
    had_err = -1;
  gt_str_delete(chseqids);
  if (!had_err) {
    if (arguments.sort) {
      sort_stream = gt_sort_stream_new(chseqids_stream);
      gff3_out_stream = gt_gff3_out_stream_new(sort_stream, arguments.outfp);
    }
    else {
      gff3_out_stream = gt_gff3_out_stream_new(chseqids_stream,
                                               arguments.outfp);
    }
  }

  /* pull the features through the stream and free them afterwards */
  if (!had_err)
    had_err = gt_node_stream_pull(gff3_out_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(chseqids_stream);
  gt_node_stream_delete(sort_stream);
  gt_node_stream_delete(gff3_in_stream);
  gt_file_delete(arguments.outfp);

  return had_err;
}
Exemple #2
0
static int gt_bed_to_gff3_runner(GT_UNUSED int argc, const char **argv,
                                 int parsed_args, void *tool_arguments,
                                 GtError *err)
{
  GtNodeStream *bed_in_stream = NULL, *gff3_out_stream = NULL;
  BEDToGFF3Arguments *arguments = tool_arguments;
  int had_err;

  gt_error_check(err);

  /* create a BED input stream */
  bed_in_stream = gt_bed_in_stream_new(argv[parsed_args]);
  gt_bed_in_stream_set_feature_type((GtBEDInStream*) bed_in_stream,
                                    gt_str_get(arguments->feature_type));
  gt_bed_in_stream_set_thick_feature_type((GtBEDInStream*) bed_in_stream,
                                          gt_str_get(arguments
                                                     ->thick_feature_type));
  gt_bed_in_stream_set_block_type((GtBEDInStream*) bed_in_stream,
                                  gt_str_get(arguments->block_type));

  /* create a GFF3 output stream */
  /* XXX: use proper genfile */
  gff3_out_stream = gt_gff3_out_stream_new(bed_in_stream, NULL);

  /* pull the features through the stream and free them afterwards */
  had_err = gt_node_stream_pull(gff3_out_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(bed_in_stream);

  return had_err;
}
static int gt_mergefeat_runner(int argc, const char **argv, int parsed_args,
                          void *tool_arguments, GtError *err)
{
  InterFeatArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream,
               *merge_feature_stream,
               *gff3_out_stream;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  /* create a gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                  argv + parsed_args);

  /* create merge feature stream */
  merge_feature_stream = gt_merge_feature_stream_new(gff3_in_stream);

  /* create gff3 output stream */
  gff3_out_stream = gt_gff3_out_stream_new(merge_feature_stream,
                                           arguments->outfp);

  /* pull the features through the stream and free them afterwards */
  had_err = gt_node_stream_pull(gff3_out_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(merge_feature_stream);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
Exemple #4
0
static int gt_csa_runner(GT_UNUSED int argc, const char **argv, int parsed_args,
                         void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream, *csa_stream, *gff3_out_stream;
  CSAArguments *arguments = tool_arguments;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  /* create the streams */
  gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[parsed_args]);
  if (arguments->verbose && arguments->outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);
  csa_stream      = gt_csa_stream_new(gff3_in_stream, arguments->join_length);
  gff3_out_stream = gt_gff3_out_stream_new(csa_stream, arguments->outfp);

  /* pull the features through the stream and free them afterwards */
  had_err = gt_node_stream_pull(gff3_out_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(csa_stream);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
static int gt_inlineseq_split_runner(int argc, const char **argv,
                                     int parsed_args,
                              void *tool_arguments, GtError *err)
{
  GtInlineseqSplitArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream = NULL,
               *gff3_out_stream = NULL,
               *split_stream = NULL,
               *last_stream = NULL;
  GtFile *seq_out_file = NULL,
         *gff3_out_file = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (gt_str_length(arguments->seqoutfile) > 0) {
    seq_out_file = gt_file_new(gt_str_get(arguments->seqoutfile), "w+", err);
    if (!seq_out_file)
      had_err = -1;
  }

  if (!had_err && gt_str_length(arguments->gffoutfile) > 0) {
    gff3_out_file = gt_file_new(gt_str_get(arguments->gffoutfile), "w+", err);
    if (!gff3_out_file)
      had_err = -1;
  }

  if (!had_err) {
    last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
    gt_assert(gff3_in_stream);
  }

  if (!had_err) {
    last_stream = split_stream = gt_sequence_node_out_stream_new(last_stream,
                                                                 seq_out_file,
                                                                 err);
    gt_assert(split_stream);
  }

  if (!had_err) {
    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           gff3_out_file);
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(split_stream);
  gt_file_delete(seq_out_file);
  gt_file_delete(gff3_out_file);

  return had_err;
}
static int gff3_out_stream_lua_new(lua_State *L)
{
  GtNodeStream **out_stream, **in_stream = check_genome_stream(L, 1);
  gt_assert(L);
  /* construct object */
  out_stream = lua_newuserdata(L, sizeof (GtNodeStream*));
  *out_stream = gt_gff3_out_stream_new(*in_stream, NULL);
  gt_assert(*out_stream);
  luaL_getmetatable(L, GENOME_STREAM_METATABLE);
  lua_setmetatable(L, -2);
  return 1;
}
Exemple #7
0
static int gt_tir_runner(GT_UNUSED int argc, GT_UNUSED const char **argv,
                         GT_UNUSED int parsed_args, void *tool_arguments,
                         GtError *err)
{
  GtTirArguments *arguments = tool_arguments;
  GtNodeStream *tir_stream = NULL,
               *gff3_out_stream = NULL,
               *last_stream = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  tir_stream = gt_tir_stream_new(arguments->str_indexname,
                                 arguments->min_seed_length,
                                 arguments->min_TIR_length,
                                 arguments->max_TIR_length,
                                 arguments->min_TIR_distance,
                                 arguments->max_TIR_distance,
                                 arguments->arbit_scores,
                                 arguments->xdrop_belowscore,
                                 arguments->similarity_threshold,
                                 arguments->best_overlaps,
                                 arguments->no_overlaps,
                                 arguments->min_TSD_length,
                                 arguments->max_TSD_length,
                                 arguments->vicinity,
                                 err);

  if (tir_stream == NULL)
    return -1;

  last_stream = tir_stream;

  /* gff3 outstream */
  gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL);
  last_stream = gff3_out_stream;

  /* output arguments line */
  /* gt_tir_showargsline(argc, argv); */

  /* pull the features through the stream and free them afterwards */
  if (!had_err)
    had_err = gt_node_stream_pull(last_stream, err);

  /* free */
  gt_node_stream_delete(tir_stream);
  gt_node_stream_delete(gff3_out_stream);

  return had_err;
}
static int gt_inlineseq_add_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL,
               *add_stream = NULL,
               *gff3_out_stream = NULL,
               *last_stream = NULL;
  GtRegionMapping *rm = NULL;
  InlineseqAddArguments *arguments = tool_arguments;
  int had_err = 0;
  gt_error_check(err);

  /* add region mapping if given */
  if (gt_seqid2file_option_used(arguments->s2fi)) {
    rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rm)
      had_err = -1;
  }

  if (!had_err) {
    last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
    gt_assert(gff3_in_stream);
    gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

    last_stream = add_stream = gt_sequence_node_add_stream_new(last_stream, rm,
                                                               err);
    if (!add_stream) {
      had_err = -1;
    }
  }

  if (!had_err) {
    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);
  }

  if (!had_err)
    had_err = gt_node_stream_pull(last_stream, err);

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(add_stream);
  gt_node_stream_delete(gff3_out_stream);
  gt_region_mapping_delete(rm);

  return had_err;
}
Exemple #9
0
static int gt_cds_runner(GT_UNUSED int argc, const char **argv, int parsed_args,
                         void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream, *cds_stream = NULL, *gff3_out_stream = NULL;
  CDSArguments *arguments = tool_arguments;
  GtRegionMapping *region_mapping;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  /* create gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]);
  if (arguments->verbose && arguments->outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);

  /* create region mapping */
  region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
  if (!region_mapping)
    had_err = -1;

  if (!had_err) {
    /* create CDS stream */
    cds_stream = gt_cds_stream_new(gff3_in_stream, region_mapping,
                                   arguments->minorflen, GT_CDS_SOURCE_TAG,
                                   arguments->start_codon,
                                   arguments->final_stop_codon,
                                   arguments->generic_start_codons);

    /* create gff3 output stream */
    gff3_out_stream = gt_gff3_out_stream_new(cds_stream, arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(gff3_out_stream, err);
  }

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(cds_stream);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
Exemple #10
0
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, void *tool_arguments,
                               GtError *err)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream   = NULL,
               *gff3_out_stream  = NULL,
               *ltrdigest_stream = NULL,
               *tab_out_stream   = NULL,
               *last_stream      = NULL;
  int had_err      = 0,
      tests_to_run = 0,
      arg = parsed_args;
  const char *indexname = argv[arg+1];
  GtLogger *logger = gt_logger_new(arguments->verbose,
                                   GT_LOGGER_DEFLT_PREFIX, stdout);
  GtEncseqLoader *el;
  GtEncseq *encseq;
  gt_error_check(err);
  gt_assert(arguments);

  /* Set sequence encoder options. Defaults are ok. */
  el = gt_encseq_loader_new();
  gt_encseq_loader_set_logger(el, logger);

  /* Open sequence file */
  encseq = gt_encseq_loader_load(el, indexname, err);
  if (!encseq)
    had_err = -1;

  /* Always search for PPT. */
  tests_to_run |= GT_LTRDIGEST_RUN_PPT;

  /* Open tRNA library if given. */
  if (!had_err && arguments->trna_lib
        && gt_str_length(arguments->trna_lib) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PBS;
   arguments->pbs_opts.trna_lib = gt_bioseq_new(gt_str_get(arguments->trna_lib),
                                                 err);
    if (gt_error_is_set(err))
      had_err = -1;
  }

#ifdef HAVE_HMMER
  /* Open HMMER files if given. */
  if (!had_err && gt_str_array_size(arguments->pdom_opts.hmm_files) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PDOM;
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }
#endif

  if (!had_err)
  {
    /* set up stream flow
     * ------------------*/
    last_stream = gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[arg]);

    last_stream = ltrdigest_stream = gt_ltrdigest_stream_new(last_stream,
                                                  tests_to_run,
                                                  encseq,
                                                  &arguments->pbs_opts,
                                                  &arguments->ppt_opts,
#ifdef HAVE_HMMER
                                                  &arguments->pdom_opts,
#endif
                                                  err);
    if (!ltrdigest_stream)
      had_err = -1;
  }

  if (!had_err)
  {
    /* attach tabular output stream, if requested */
    if (gt_str_length(arguments->prefix) > 0)
    {
      last_stream = tab_out_stream = gt_ltr_fileout_stream_new(last_stream,
                                              tests_to_run,
                                              encseq,
                                              gt_str_get(arguments->prefix),
                                              &arguments->ppt_opts,
                                              &arguments->pbs_opts,
#ifdef HAVE_HMMER
                                              &arguments->pdom_opts,
#endif
                                              gt_str_get(arguments->trna_lib),
                                              argv[arg+1],
                                              argv[arg],
                                              arguments->seqnamelen,
                                              err);
#ifdef HAVE_HMMER
    if (&arguments->pdom_opts.write_alignments)
      gt_ltr_fileout_stream_enable_pdom_alignment_output(tab_out_stream);
    if (&arguments->pdom_opts.write_aaseqs)
      gt_ltr_fileout_stream_enable_aa_sequence_output(tab_out_stream);
#endif
    }

    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(ltrdigest_stream);
  if (tab_out_stream != NULL)
    gt_node_stream_delete(tab_out_stream);
  gt_node_stream_delete(gff3_in_stream);

  gt_encseq_loader_delete(el);
  gt_encseq_delete(encseq);
  encseq = NULL;
  gt_bioseq_delete(arguments->pbs_opts.trna_lib);
  gt_logger_delete(logger);

  return had_err;
}
Exemple #11
0
static int gt_gff3_runner(int argc, const char **argv, int parsed_args,
                          void *tool_arguments, GtError *err)
{
  GFF3Arguments *arguments = tool_arguments;
  GtTypeChecker *type_checker = NULL;
  GtNodeStream *gff3_in_stream,
               *sort_stream = NULL,
               *load_stream = NULL,
               *merge_feature_stream = NULL,
               *add_introns_stream = NULL,
               *set_source_stream = NULL,
               *gff3_out_stream = NULL,
               *last_stream;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  /* create a gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                  argv + parsed_args);
  if (arguments->verbose && arguments->outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);
  if (arguments->checkids)
    gt_gff3_in_stream_check_id_attributes((GtGFF3InStream*) gff3_in_stream);
  if (!arguments->addids)
    gt_gff3_in_stream_disable_add_ids(gff3_in_stream);

  last_stream = gff3_in_stream;

  /* set different type checker if necessary */
  if (gt_typecheck_info_option_used(arguments->tci)) {
    type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err);
    if (!type_checker)
      had_err = -1;
    if (!had_err)
      gt_gff3_in_stream_set_type_checker(gff3_in_stream, type_checker);
  }

  /* set offset (if necessary) */
  if (!had_err && arguments->offset != GT_UNDEF_WORD)
    gt_gff3_in_stream_set_offset(gff3_in_stream, arguments->offset);

  /* set offsetfile (if necessary) */
  if (!had_err && gt_str_length(arguments->offsetfile)) {
    had_err = gt_gff3_in_stream_set_offsetfile(gff3_in_stream,
                                               arguments->offsetfile, err);
  }

  /* enable strict mode (if necessary) */
  if (!had_err && arguments->strict)
    gt_gff3_in_stream_enable_strict_mode((GtGFF3InStream*) gff3_in_stream);
  /* enable tidy mode (if necessary) */
  if (!had_err && arguments->tidy)
    gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

  if (!had_err && arguments->fixboundaries)
    gt_gff3_in_stream_fix_region_boundaries((GtGFF3InStream*) gff3_in_stream);

  /* create load stream (if necessary) */
  if (!had_err && arguments->load) {
    load_stream = gt_load_stream_new(last_stream);
    last_stream = load_stream;
  }

  /* create sort stream (if necessary) */
  if (!had_err && arguments->sort) {
    sort_stream = gt_sort_stream_new(last_stream);
    last_stream = sort_stream;
  }

  /* create merge feature stream (if necessary) */
  if (!had_err && arguments->mergefeat) {
    gt_assert(sort_stream);
    merge_feature_stream = gt_merge_feature_stream_new(sort_stream);
    last_stream = merge_feature_stream;
  }

  /* create addintrons stream (if necessary) */
  if (!had_err && arguments->addintrons) {
    gt_assert(last_stream);
    add_introns_stream = gt_add_introns_stream_new(last_stream);
    last_stream = add_introns_stream;
  }

  /* create setsource stream (if necessary) */
  if (!had_err && gt_str_length(arguments->newsource) > 0) {
    gt_assert(last_stream);
    GtNodeVisitor *ssv = gt_set_source_visitor_new(arguments->newsource);
    set_source_stream = gt_visitor_stream_new(last_stream, ssv);
    last_stream = set_source_stream;
  }

  /* create gff3 output stream */
  if (!had_err && arguments->show) {
    gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp);
    last_stream = gff3_out_stream;
    gt_gff3_out_stream_set_fasta_width((GtGFF3OutStream*) last_stream,
                                       arguments->width);
    if (arguments->retainids)
      gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream*) last_stream);
  }

  /* pull the features through the stream and free them afterwards */
  if (!had_err)
    had_err = gt_node_stream_pull(last_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(sort_stream);
  gt_node_stream_delete(load_stream);
  gt_node_stream_delete(merge_feature_stream);
  gt_node_stream_delete(add_introns_stream);
  gt_node_stream_delete(set_source_stream);
  gt_node_stream_delete(gff3_in_stream);
  gt_type_checker_delete(type_checker);

  return had_err;
}
Exemple #12
0
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, void *tool_arguments,
                               GtError *err)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream  = NULL,
               *gff3_out_stream = NULL,
               *pdom_stream     = NULL,
               *ppt_stream      = NULL,
               *pbs_stream      = NULL,
               *tab_out_stream  = NULL,
               *sa_stream       = NULL,
               *last_stream     = NULL;
  int had_err      = 0,
      tests_to_run = 0,
      arg = parsed_args;
  GtRegionMapping *rmap = NULL;
  GtPdomModelSet *ms = NULL;
  gt_error_check(err);
  gt_assert(arguments);

  /* determine and open sequence source */
  if (gt_seqid2file_option_used(arguments->s2fi)) {
    /* create region mapping */
    rmap = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rmap)
      had_err = -1;
  } else {
    GtEncseqLoader *el;
    GtEncseq *encseq;
    /* no new-style sequence source option given, fall back to legacy syntax */
    if (argc < 3) {
      gt_error_set(err, "missing mandatory argument(s)");
      had_err = -1;
    }
    if (!had_err) {
      el = gt_encseq_loader_new();
      gt_encseq_loader_disable_autosupport(el);
      gt_encseq_loader_require_md5_support(el);
      gt_encseq_loader_require_description_support(el);
      encseq = gt_encseq_loader_load(el, argv[argc-1], err);
      /* XXX: clip off terminal argument */
      gt_free((char*) argv[argc-1]);
      argv[argc-1] = NULL;
      argc--;
      gt_encseq_loader_delete(el);
      if (!encseq)
        had_err = -1;
      else {
        rmap = gt_region_mapping_new_encseq_seqno(encseq);
        gt_encseq_delete(encseq);
      }
    }
  }
  gt_assert(had_err || rmap);

  /* Always search for PPT. */
  tests_to_run |= GT_LTRDIGEST_RUN_PPT;

  /* Open tRNA library if given. */
  if (!had_err && arguments->trna_lib
        && gt_str_length(arguments->trna_lib) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PBS;
    arguments->trna_lib_bs = gt_bioseq_new(gt_str_get(arguments->trna_lib),
                                           err);
    if (gt_error_is_set(err))
      had_err = -1;
  }

  /* Set HMMER cutoffs. */
  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PDOM;
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }

  if (!had_err) {
    last_stream = gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[arg]);
  }

  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) {
    GtNodeVisitor *pdom_v;
    ms = gt_pdom_model_set_new(arguments->hmm_files, err);
    if (ms != NULL) {
      pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->evalue_cutoff,
                                             arguments->chain_max_gap_length,
                                             arguments->cutoff, rmap, err);
      if (pdom_v == NULL)
        had_err = -1;
      if (!had_err) {
        gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*)
                                                                        pdom_v,
                                                 GT_LTRDIGEST_TAG);
        if (arguments->output_all_chains)
          gt_ltrdigest_pdom_visitor_output_all_chains((GtLTRdigestPdomVisitor*)
                                                                        pdom_v);
        last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v);
      }
    } else had_err = -1;
  }

  if (!had_err && arguments->trna_lib_bs) {
    GtNodeVisitor *pbs_v;
    pbs_v = gt_ltrdigest_pbs_visitor_new(rmap, arguments->pbs_radius,
                                         arguments->max_edist,
                                         arguments->alilen,
                                         arguments->offsetlen,
                                         arguments->trnaoffsetlen,
                                         arguments->ali_score_match,
                                         arguments->ali_score_mismatch,
                                         arguments->ali_score_insertion,
                                         arguments->ali_score_deletion,
                                         arguments->trna_lib_bs, err);
    if (pbs_v != NULL)
      last_stream = pbs_stream = gt_visitor_stream_new(last_stream, pbs_v);
    else
      had_err = -1;
  }

  if (!had_err) {
    GtNodeVisitor *ppt_v;
    ppt_v = gt_ltrdigest_ppt_visitor_new(rmap, arguments->ppt_len,
                                         arguments->ubox_len,
                                         arguments->ppt_pyrimidine_prob,
                                         arguments->ppt_purine_prob,
                                         arguments->bkg_a_prob,
                                         arguments->bkg_g_prob,
                                         arguments->bkg_t_prob,
                                         arguments->bkg_c_prob,
                                         arguments->ubox_u_prob,
                                         arguments->ppt_radius,
                                         arguments->max_ubox_dist, err);
    if (ppt_v != NULL)
      last_stream = ppt_stream = gt_visitor_stream_new(last_stream, ppt_v);
    else
      had_err = -1;
  }

  if (!had_err) {
    GtNodeVisitor *sa_v;
    sa_v = gt_ltrdigest_strand_assign_visitor_new();
    gt_assert(sa_v);
    last_stream = sa_stream = gt_visitor_stream_new(last_stream, sa_v);
  }

  if (!had_err)
  {
    /* attach tabular output stream, if requested */
    if (gt_str_length(arguments->prefix) > 0)
    {
      last_stream = tab_out_stream = gt_ltrdigest_file_out_stream_new(
                                                  last_stream,
                                                  tests_to_run,
                                                  rmap,
                                                  gt_str_get(arguments->prefix),
                                                  arguments->seqnamelen,
                                                  err);
      if (!tab_out_stream)
        had_err = -1;
      if (!had_err && arguments->print_metadata)
      {
        had_err = gt_ltrdigest_file_out_stream_write_metadata(
                                           (GtLTRdigestFileOutStream*)
                                                                 tab_out_stream,
                                           tests_to_run,
                                           gt_str_get(arguments->trna_lib),
                                           argv[arg],
                                           arguments->ppt_len,
                                           arguments->ubox_len,
                                           arguments->ppt_radius,
                                           arguments->alilen,
                                           arguments->max_edist,
                                           arguments->offsetlen,
                                           arguments->trnaoffsetlen,
                                           arguments->pbs_radius,
                                           arguments->hmm_files,
                                           arguments->chain_max_gap_length,
                                           arguments->evalue_cutoff,
                                           err);
      }
      if (!had_err)
      {
        if (arguments->write_alignments)
          gt_ltrdigest_file_out_stream_enable_pdom_alignment_output(
                                                                tab_out_stream);
        if (arguments->write_aaseqs)
          gt_ltrdigest_file_out_stream_enable_aa_sequence_output(
                                                                tab_out_stream);
      }
    }

    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_pdom_model_set_delete(ms);
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(ppt_stream);
  gt_node_stream_delete(pbs_stream);
  gt_node_stream_delete(sa_stream);
  gt_node_stream_delete(pdom_stream);
  gt_node_stream_delete(tab_out_stream);
  gt_node_stream_delete(gff3_in_stream);
  gt_bioseq_delete(arguments->trna_lib_bs);
  gt_region_mapping_delete(rmap);

  return had_err;
}
Exemple #13
0
static int gt_tir_runner(GT_UNUSED int argc, GT_UNUSED const char **argv,
                         GT_UNUSED int parsed_args, void *tool_arguments,
                         GtError *err)
{
  GtTirArguments *arguments = tool_arguments;
  GtNodeStream *tir_stream = NULL,
               *pdom_stream = NULL,
               *gff3_out_stream = NULL,
               *last_stream = NULL;
  GtPdomModelSet *ms = NULL;
  GtRegionMapping *rmap = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  tir_stream = gt_tir_stream_new(arguments->str_indexname,
                                 arguments->min_seed_length,
                                 arguments->min_TIR_length,
                                 arguments->max_TIR_length,
                                 arguments->min_TIR_distance,
                                 arguments->max_TIR_distance,
                                 arguments->arbit_scores,
                                 arguments->xdrop_belowscore,
                                 arguments->similarity_threshold,
                                 arguments->best_overlaps,
                                 arguments->no_overlaps,
                                 arguments->min_TSD_length,
                                 arguments->max_TSD_length,
                                 arguments->vicinity,
                                 err);

  if (tir_stream == NULL)
    return -1;
  last_stream = tir_stream;

  rmap = gt_region_mapping_new_encseq((GtEncseq*)
                            gt_tir_stream_get_encseq((GtTIRStream*) tir_stream),
                            true, false);
  gt_assert(rmap);

  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0)
  {
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }

  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) {
    GtNodeVisitor *pdom_v;
    ms = gt_pdom_model_set_new(arguments->hmm_files, err);
    if (ms != NULL) {
      pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->cutoff,
                                             arguments->chain_max_gap_length,
                                             arguments->evalue_cutoff, rmap,
                                             err);
      if (pdom_v == NULL)
        had_err = -1;
      if (!had_err) {
        last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v);
        gt_ltrdigest_pdom_visitor_set_root_type((GtLTRdigestPdomVisitor*)
                                                                        pdom_v,
                                        gt_ft_terminal_inverted_repeat_element);
        gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*)
                                                                        pdom_v,
                                                 "TIRvish");
      }
    } else had_err = -1;
  }

  gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL);
  last_stream = gff3_out_stream;

  /* pull the features through the stream and free them afterwards */
  if (!had_err)
    had_err = gt_node_stream_pull(last_stream, err);

  gt_node_stream_delete(tir_stream);
  gt_node_stream_delete(pdom_stream);
  gt_node_stream_delete(gff3_out_stream);
  gt_region_mapping_delete(rmap);
  gt_pdom_model_set_delete(ms);

  return had_err;
}
Exemple #14
0
// Main method
int main(int argc, char * const *argv)
{
  GtError *error;
  GtLogger *logger;
  GtQueue *streams;
  GtNodeStream *stream, *last_stream;
  CanonGFF3Options options = { NULL, NULL, false };

  gt_lib_init();
  error = gt_error_new();
  canon_gff3_parse_options(argc, argv + 0, &options, error);

  streams = gt_queue_new();
  logger = gt_logger_new(true, "", stderr);

  stream = gt_gff3_in_stream_new_unsorted(argc - optind, (const char **)
                                                          argv+optind);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)stream);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)stream);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(options.infer)
  {
    GtHashmap *type_parents = gt_hashmap_new(GT_HASH_STRING, gt_free_func,
                                             gt_free_func);
    gt_hashmap_add(type_parents, gt_cstr_dup("mRNA"), gt_cstr_dup("gene"));
    gt_hashmap_add(type_parents, gt_cstr_dup("tRNA"), gt_cstr_dup("gene"));
    stream = agn_infer_parent_stream_new(last_stream,
                                                 type_parents);
    gt_hashmap_delete(type_parents);
    gt_queue_add(streams, stream);
    last_stream = stream;
  }

  stream = agn_gene_stream_new(last_stream, logger);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(options.source != NULL)
  {
    GtNodeVisitor *ssv = gt_set_source_visitor_new(options.source);
    stream = gt_visitor_stream_new(last_stream, ssv);
    gt_queue_add(streams, stream);
    last_stream = stream;
  }

  stream = gt_gff3_out_stream_new(last_stream, options.outstream);
  if(!options.infer)
    gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream *)stream);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(gt_node_stream_pull(last_stream, error) == -1)
  {
    fprintf(stderr, "[CanonGFF3] error processing node stream: %s",
            gt_error_get(error));
  }

  while(gt_queue_size(streams) > 0)
  {
    stream = gt_queue_get(streams);
    gt_node_stream_delete(stream);
  }
  gt_queue_delete(streams);
  if(options.source != NULL)
    gt_str_delete(options.source);
  if(options.outstream != NULL)
    gt_file_delete(options.outstream);
  gt_error_delete(error);
  gt_logger_delete(logger);
  gt_lib_clean();

  return 0;
}
int main(int argc, char ** argv)
{
    GtNodeStream * in, * score, * out;
    GtFile * out_file;
    GtError * err;

    if (argc != 4)
    {
       usage(argv[0]);
       exit(1);
    }

    // initilaize genometools
    gt_lib_init();
    err = gt_error_new();

    if (!(in = gt_gff3_in_stream_new_sorted(argv[1])))
    {
        fprintf(stderr, "Failed to open input stream with arg %s\n", argv[1]);
        exit(1);
    }

    if (!(out_file = gt_file_new(argv[2], "w+", err)))
    {
        gt_node_stream_delete(in);
        fprintf(stderr, "Failed to create output file %s\n", argv[2]);
        exit(1);
    }

    if (!(score = CpGI_score_stream_new(in, argv[3])))
    {

        gt_file_delete(out_file);
        gt_node_stream_delete(in);
        fprintf(stderr, "Failed to create CpGI score stream\n");
        exit(1);
    }
    out = gt_gff3_out_stream_new(in, out_file);
    
    if (!(out = gt_gff3_out_stream_new(score, out_file)))
    {
        gt_node_stream_delete(score);
        gt_file_delete(out_file);
        gt_node_stream_delete(in);
        fprintf(stderr, "Failed to create output stream\n");
        exit(1);
    }

    if (gt_node_stream_pull(out, err))
    {
        fprintf(stderr, "Failed to pull through out stream\n");
    }

    // close genome tools
    gt_node_stream_delete(out);
    gt_node_stream_delete(score);
    gt_file_delete(out_file);
    gt_node_stream_delete(in);
    gt_error_delete(err);
    gt_lib_clean();
    return 0;
}
Exemple #16
0
static int gt_select_runner(int argc, const char **argv, int parsed_args,
                            void *tool_arguments, GtError *err)
{
  SelectArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream, *select_stream,
               *targetbest_select_stream = NULL, *gff3_out_stream;
  int had_err;
  GtFile *drop_file = NULL;
  GtNodeVisitor *gff3outvis = NULL;
  gt_error_check(err);
  gt_assert(arguments);

  /* create a gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                  argv + parsed_args);
  if (arguments->verbose && arguments->outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);

  /* create a filter stream */
  select_stream = gt_select_stream_new(gff3_in_stream, arguments->seqid,
                                       arguments->source,
                                       &arguments->contain_range,
                                       &arguments->overlap_range,
                                       arguments->strand,
                                       arguments->targetstrand,
                                       arguments->has_CDS,
                                       arguments->max_gene_length,
                                       arguments->max_gene_num,
                                       arguments->min_gene_score,
                                       arguments->max_gene_score,
                                       arguments->min_average_splice_site_prob,
                                       arguments->feature_num,
                                       arguments->filter_files,
                                       arguments->filter_logic,
                                       err);

  if (select_stream) {
    GtSelectStream *fs = (GtSelectStream*) select_stream;

    if (gt_str_length(arguments->dropped_file) > 0) {
      drop_file = gt_file_new(gt_str_get(arguments->dropped_file), "w", err);
      gff3outvis = gt_gff3_visitor_new(drop_file);
      gt_select_stream_set_drophandler(fs, print_to_file_drophandler,
                                       (void*) gff3outvis);
    } else {
      gt_select_stream_set_drophandler(fs, default_drophandler, NULL);
    }

    gt_select_stream_set_single_intron_factor(select_stream,
                                              arguments->single_intron_factor);

    if (arguments->targetbest)
      targetbest_select_stream = gt_targetbest_select_stream_new(select_stream);

    /* create a gff3 output stream */
    gff3_out_stream = gt_gff3_out_stream_new(arguments->targetbest
                                             ? targetbest_select_stream
                                             : select_stream,
                                             arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(gff3_out_stream, err);

    /* free */
    gt_node_stream_delete(gff3_out_stream);
    gt_node_stream_delete(select_stream);
    gt_node_stream_delete(targetbest_select_stream);
  } else {
    had_err = -1;
  }
  gt_file_delete(drop_file);
  gt_node_visitor_delete(gff3outvis);
  gt_node_stream_delete(gff3_in_stream);
  return had_err;
}
static int gt_sketch_runner(int argc, const char **argv, int parsed_args,
                              void *tool_arguments, GT_UNUSED GtError *err)
{
  GtSketchArguments *arguments = tool_arguments;
  GtNodeStream *in_stream = NULL,
               *add_introns_stream = NULL,
               *gff3_out_stream = NULL,
               *feature_stream = NULL,
               *sort_stream = NULL,
               *last_stream;
  GtFeatureIndex *features = NULL;
  const char *file;
  char *seqid = NULL;
  GtRange qry_range, sequence_region_range;
  GtArray *results = NULL;
  GtStyle *sty = NULL;
  GtStr *prog, *defaultstylefile = NULL;
  GtDiagram *d = NULL;
  GtLayout *l = NULL;
  GtImageInfo* ii = NULL;
  GtCanvas *canvas = NULL;
  GtUword height;
  bool has_seqid;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(arguments);

  prog = gt_str_new();
  gt_str_append_cstr_nt(prog, argv[0],
                        gt_cstr_length_up_to_char(argv[0], ' '));
  defaultstylefile = gt_get_gtdata_path(gt_str_get(prog), err);
  gt_str_delete(prog);
  if (!defaultstylefile)
    had_err = -1;
  if (!had_err) {
    gt_str_append_cstr(defaultstylefile, "/sketch/default.style");
  }

  file = argv[parsed_args];
  if (!had_err) {
    /* create feature index */
    features = gt_feature_index_memory_new();
    parsed_args++;

    /* create an input stream */
    if (strcmp(gt_str_get(arguments->input), "gff") == 0)
    {
      in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                 argv + parsed_args);
      if (arguments->verbose)
        gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) in_stream);
    } else if (strcmp(gt_str_get(arguments->input), "bed") == 0)
    {
      if (argc - parsed_args == 0)
        in_stream = gt_bed_in_stream_new(NULL);
      else
        in_stream = gt_bed_in_stream_new(argv[parsed_args]);
    } else if (strcmp(gt_str_get(arguments->input), "gtf") == 0)
    {
      if (argc - parsed_args == 0)
        in_stream = gt_gtf_in_stream_new(NULL);
      else
        in_stream = gt_gtf_in_stream_new(argv[parsed_args]);
    }
    last_stream = in_stream;

    /* create add introns stream if -addintrons was used */
    if (arguments->addintrons) {
      sort_stream = gt_sort_stream_new(last_stream);
      add_introns_stream = gt_add_introns_stream_new(sort_stream);
      last_stream = add_introns_stream;
    }

    /* create gff3 output stream if -pipe was used */
    if (arguments->pipe) {
      gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL);
      last_stream = gff3_out_stream;
    }

    /* create feature stream */
    feature_stream = gt_feature_stream_new(last_stream, features);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(feature_stream, err);

    gt_node_stream_delete(feature_stream);
    gt_node_stream_delete(gff3_out_stream);
    gt_node_stream_delete(sort_stream);
    gt_node_stream_delete(add_introns_stream);
    gt_node_stream_delete(in_stream);
  }

  if (!had_err) {
    had_err = gt_feature_index_has_seqid(features,
                                         &has_seqid,
                                         gt_str_get(arguments->seqid),
                                         err);
  }

  /* if seqid is empty, take first one added to index */
  if (!had_err && strcmp(gt_str_get(arguments->seqid),"") == 0) {
    seqid = gt_feature_index_get_first_seqid(features, err);
    if (seqid == NULL) {
      gt_error_set(err, "GFF input file must contain a sequence region!");
      had_err = -1;
    }
  }
  else if (!had_err && !has_seqid) {
    gt_error_set(err, "sequence region '%s' does not exist in GFF input file",
                 gt_str_get(arguments->seqid));
    had_err = -1;
  }
  else if (!had_err)
    seqid = gt_str_get(arguments->seqid);

  results = gt_array_new(sizeof (GtGenomeNode*));
  if (!had_err) {
    had_err = gt_feature_index_get_range_for_seqid(features,
                                                   &sequence_region_range,
                                                   seqid,
                                                   err);
  }
  if (!had_err) {
    qry_range.start = (arguments->start == GT_UNDEF_UWORD ?
                         sequence_region_range.start :
                         arguments->start);
    qry_range.end   = (arguments->end == GT_UNDEF_UWORD ?
                         sequence_region_range.end :
                         arguments->end);
  }

  if (!had_err) {
    if (arguments->verbose)
      fprintf(stderr, "# of results: "GT_WU"\n", gt_array_size(results));

    /* find and load style file */
    if (!(sty = gt_style_new(err)))
      had_err = -1;
    if (gt_str_length(arguments->stylefile) == 0) {
      gt_str_append_str(arguments->stylefile, defaultstylefile);
    } else {
      if (!had_err && gt_file_exists(gt_str_get(arguments->stylefile))) {
        if (arguments->unsafe)
          gt_style_unsafe_mode(sty);
      }
      else
      {
        had_err = -1;
        gt_error_set(err, "style file '%s' does not exist!",
                          gt_str_get(arguments->stylefile));
      }
    }
    if (!had_err)
      had_err = gt_style_load_file(sty, gt_str_get(arguments->stylefile), err);
  }

  if (!had_err) {
    /* create and write image file */
    if (!(d = gt_diagram_new(features, seqid, &qry_range, sty, err)))
      had_err = -1;
    if (!had_err && arguments->flattenfiles)
      gt_diagram_set_track_selector_func(d, flattened_file_track_selector,
                                         NULL);
    if (had_err || !(l = gt_layout_new(d, arguments->width, sty, err)))
      had_err = -1;
    if (!had_err)
      had_err = gt_layout_get_height(l, &height, err);
    if (!had_err) {
      ii = gt_image_info_new();

      if (strcmp(gt_str_get(arguments->format),"pdf")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PDF,
                                          arguments->width,
                                          height, ii, err);
      }
      else if (strcmp(gt_str_get(arguments->format),"ps")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PS,
                                          arguments->width,
                                          height, ii, err);
      }
      else if (strcmp(gt_str_get(arguments->format),"svg")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_SVG,
                                          arguments->width,
                                          height, ii, err);
      }
      else {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PNG,
                                          arguments->width,
                                          height, ii, err);
      }
      if (!canvas)
        had_err = -1;
      if (!had_err) {
        had_err = gt_layout_sketch(l, canvas, err);
      }
      if (!had_err) {
        if (arguments->showrecmaps) {
          GtUword i;
          const GtRecMap *rm;
          for (i = 0; i < gt_image_info_num_of_rec_maps(ii) ;i++) {
            char buf[BUFSIZ];
            rm = gt_image_info_get_rec_map(ii, i);
            (void) gt_rec_map_format_html_imagemap_coords(rm, buf, BUFSIZ);
            printf("%s, %s\n",
                   buf,
                   gt_feature_node_get_type(gt_rec_map_get_genome_feature(rm)));
          }
        }
        if (arguments->use_streams) {
          GtFile *outfile;
          GtStr *str = gt_str_new();
          gt_canvas_cairo_file_to_stream((GtCanvasCairoFile*) canvas, str);
          outfile = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, file, "w+", err);
          if (outfile) {
            gt_file_xwrite(outfile, gt_str_get_mem(str), gt_str_length(str));
            gt_file_delete(outfile);
          } else {
            had_err = -1;
          }
          gt_str_delete(str);
        } else {
          had_err = gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas,
                                                 file,
                                                 err);
        }
      }
    }
  }

  /* free */
  gt_free(seqid);
  gt_canvas_delete(canvas);
  gt_layout_delete(l);
  gt_image_info_delete(ii);
  gt_style_delete(sty);
  gt_diagram_delete(d);
  gt_array_delete(results);
  gt_str_delete(defaultstylefile);
  gt_feature_index_delete(features);

  return had_err;
}