Esempio n. 1
0
GtFeatureIndex *agn_import_simple(int numfiles, const char **filenames,
                                  char *type, AgnLogger *logger)
{
  GtFeatureIndex *features = gt_feature_index_memory_new();

  GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3);

  GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL);
  gt_hashmap_add(typestokeep, type, type);
  GtNodeStream *filterstream = agn_filter_stream_new(gff3, typestokeep);

  GtNodeStream *featstream = gt_feature_out_stream_new(filterstream, features);

  GtError *error = gt_error_new();
  int result = gt_node_stream_pull(featstream, error);
  if(result == -1)
  {
    agn_logger_log_error(logger, "error processing node stream: %s",
                         gt_error_get(error));
  }
  gt_error_delete(error);

  if(agn_logger_has_error(logger))
  {
    gt_feature_index_delete(features);
    features = NULL;
  }
  gt_node_stream_delete(gff3);
  gt_node_stream_delete(filterstream);
  gt_node_stream_delete(featstream);
  return features;
}
Esempio n. 2
0
int gt_gff3_to_gtf(int argc, const char **argv, GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL, *gtf_out_stream = NULL;
  int parsed_args, had_err = 0;
  gt_error_check(err);

  /* option parsing */
  switch (parse_options(&parsed_args, argc, argv, err)) {
    case OPTIONPARSER_OK: break;
    case OPTIONPARSER_ERROR: return -1;
    case OPTIONPARSER_REQUESTS_EXIT: return 0;
  }

  /* create a gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                  argv + parsed_args);

  if (!gff3_in_stream)
    had_err = -1;

  if (!had_err) {
    /* create a gtf output stream */
    gtf_out_stream = gt_gtf_out_stream_new(gff3_in_stream, NULL);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(gtf_out_stream, err);
  }

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(gtf_out_stream);

  return had_err;
}
Esempio n. 3
0
GtFeatureIndex *agn_import_canonical(int numfiles, const char **filenames,
                                     AgnLogger *logger)
{
  GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3);

  GtFeatureIndex *features = gt_feature_index_memory_new();
  GtNodeStream *cgstream = agn_canon_gene_stream_new(gff3, logger);
  GtNodeStream *featstream = gt_feature_out_stream_new(cgstream, features);

  GtError *error = gt_error_new();
  int result = gt_node_stream_pull(featstream, error);
  if(result == -1)
  {
    agn_logger_log_error(logger, "error processing node stream: %s",
                         gt_error_get(error));
  }
  gt_error_delete(error);

  if(agn_logger_has_error(logger))
  {
    gt_feature_index_delete(features);
    features = NULL;
  }
  gt_node_stream_delete(gff3);
  gt_node_stream_delete(cgstream);
  gt_node_stream_delete(featstream);
  return features;
}
Esempio n. 4
0
static int gt_seqids_runner(GT_UNUSED int argc, const char **argv,
                                  int parsed_args,
                                  GT_UNUSED void *tool_arguments, GtError *err)
{
  GtNodeStream *in_stream, *v_stream;
  GtCstrTable *cst;
  int had_err = 0;
  gt_error_check(err);

  cst = gt_cstr_table_new();
  in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                             argv + parsed_args);
  v_stream = gt_visitor_stream_new(in_stream, gt_collect_ids_visitor_new(cst));

  had_err = gt_node_stream_pull(v_stream, err);
  if (!had_err) {
    GtStrArray *seqids;
    GtUword i;
    seqids = gt_cstr_table_get_all(cst);
    for (i = 0; i < gt_str_array_size(seqids); i++) {
      printf("%s\n", gt_str_array_get(seqids, i));
    }
    gt_str_array_delete(seqids);
  }

  gt_node_stream_delete(v_stream);
  gt_node_stream_delete(in_stream);
  gt_cstr_table_delete(cst);
  return had_err;
}
Esempio n. 5
0
static int gt_bed_to_gff3_runner(GT_UNUSED int argc, const char **argv,
                                 int parsed_args, void *tool_arguments,
                                 GtError *err)
{
  GtNodeStream *bed_in_stream = NULL, *gff3_out_stream = NULL;
  BEDToGFF3Arguments *arguments = tool_arguments;
  int had_err;

  gt_error_check(err);

  /* create a BED input stream */
  bed_in_stream = gt_bed_in_stream_new(argv[parsed_args]);
  gt_bed_in_stream_set_feature_type((GtBEDInStream*) bed_in_stream,
                                    gt_str_get(arguments->feature_type));
  gt_bed_in_stream_set_thick_feature_type((GtBEDInStream*) bed_in_stream,
                                          gt_str_get(arguments
                                                     ->thick_feature_type));
  gt_bed_in_stream_set_block_type((GtBEDInStream*) bed_in_stream,
                                  gt_str_get(arguments->block_type));

  /* create a GFF3 output stream */
  /* XXX: use proper genfile */
  gff3_out_stream = gt_gff3_out_stream_new(bed_in_stream, NULL);

  /* pull the features through the stream and free them afterwards */
  had_err = gt_node_stream_pull(gff3_out_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(bed_in_stream);

  return had_err;
}
Esempio n. 6
0
static void infer_cds_visitor_test_data(GtQueue *queue)
{
  GtError *error = gt_error_new();
  const char *file = "data/gff3/grape-codons.gff3";
  GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &file);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3in);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3in);
  GtLogger *logger = gt_logger_new(true, "", stderr);
  GtNodeStream *icv_stream = agn_infer_cds_stream_new(gff3in, NULL, logger);
  GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) );
  GtNodeStream *arraystream = gt_array_out_stream_new(icv_stream, feats, error);
  int pullresult = gt_node_stream_pull(arraystream, error);
  if(pullresult == -1)
  {
    fprintf(stderr, "[AgnInferCDSVisitor::infer_cds_visitor_test_data] error "
            "processing features: %s\n", gt_error_get(error));
  }
  gt_node_stream_delete(gff3in);
  gt_node_stream_delete(icv_stream);
  gt_node_stream_delete(arraystream);
  gt_logger_delete(logger);
  gt_array_sort(feats, (GtCompare)agn_genome_node_compare);
  gt_array_reverse(feats);
  while(gt_array_size(feats) > 0)
  {
    GtFeatureNode *fn = *(GtFeatureNode **)gt_array_pop(feats);
    gt_queue_add(queue, fn);
  }
  gt_array_delete(feats);
  gt_error_delete(error);
}
Esempio n. 7
0
static int gt_mergefeat_runner(int argc, const char **argv, int parsed_args,
                          void *tool_arguments, GtError *err)
{
  InterFeatArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream,
               *merge_feature_stream,
               *gff3_out_stream;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  /* create a gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                  argv + parsed_args);

  /* create merge feature stream */
  merge_feature_stream = gt_merge_feature_stream_new(gff3_in_stream);

  /* create gff3 output stream */
  gff3_out_stream = gt_gff3_out_stream_new(merge_feature_stream,
                                           arguments->outfp);

  /* pull the features through the stream and free them afterwards */
  had_err = gt_node_stream_pull(gff3_out_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(merge_feature_stream);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
Esempio n. 8
0
static int gt_csa_runner(GT_UNUSED int argc, const char **argv, int parsed_args,
                         void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream, *csa_stream, *gff3_out_stream;
  CSAArguments *arguments = tool_arguments;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  /* create the streams */
  gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[parsed_args]);
  if (arguments->verbose && arguments->outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);
  csa_stream      = gt_csa_stream_new(gff3_in_stream, arguments->join_length);
  gff3_out_stream = gt_gff3_out_stream_new(csa_stream, arguments->outfp);

  /* pull the features through the stream and free them afterwards */
  had_err = gt_node_stream_pull(gff3_out_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(csa_stream);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
Esempio n. 9
0
static int gt_splicesiteinfo_runner(int argc, const char **argv,
                                    int parsed_args, void *tool_arguments,
                                    GtError *err)
{
  SpliceSiteInfoArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream = NULL,
               *add_introns_stream = NULL,
               *splice_site_info_stream = NULL;
  GtRegionMapping *region_mapping;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (!had_err) {
    /* create gff3 input stream */
    gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                    argv + parsed_args);

    /* create region mapping */
    region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!region_mapping)
      had_err = -1;
  }

  if (!had_err) {
    /* create addintrons stream (if necessary) */
    if (arguments->addintrons)
      add_introns_stream = gt_add_introns_stream_new(gff3_in_stream);

    /* create extract feature stream */
    splice_site_info_stream = gt_splice_site_info_stream_new(
                                                          arguments->addintrons
                                                          ? add_introns_stream
                                                          : gff3_in_stream,
                                                          region_mapping);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(splice_site_info_stream, err);
  }

  if (!had_err) {
    if (!gt_splice_site_info_stream_show(splice_site_info_stream,
                                         arguments->outfp)) {
      gt_warning("input file(s) contained no intron, use option -addintrons to "
                 "add introns automatically");
    }
  }

  /* free */
  gt_node_stream_delete(splice_site_info_stream);
  gt_node_stream_delete(add_introns_stream);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
Esempio n. 10
0
static void gv_test_calc_integrity(AgnUnitTest *test)
{
  const char *filename = "data/gff3/gaeval-stream-unit-test-2.gff3";
  GtNodeStream *align_in = gt_gff3_in_stream_new_unsorted(1, &filename);
  AgnGaevalParams params = { 0.6, 0.3, 0.05, 0.05, 400, 200, 100 };
  GtNodeVisitor *nv = agn_gaeval_visitor_new(align_in, params);
  AgnGaevalVisitor *gv = gaeval_visitor_cast(nv);
  gt_node_stream_delete(align_in);

  GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename);
  GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL);
  gt_hashmap_add(typestokeep, "mRNA", "mRNA");
  GtNodeStream *filtstream = agn_filter_stream_new(gff3in, typestokeep);
  GtLogger *logger = gt_logger_new(true, "", stderr);
  GtNodeStream *ics = agn_infer_cds_stream_new(filtstream, NULL, logger);
  GtNodeStream *ies = agn_infer_exons_stream_new(ics, NULL, logger);

  GtError *error = gt_error_new();
  GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) );
  GtNodeStream *featstream = gt_array_out_stream_new(ies, feats, error);
  int result = gt_node_stream_pull(featstream, error);
  if(result == -1)
  {
    fprintf(stderr, "[AgnGaevalVisitor::gv_test_calc_integrity] error "
            "processing GFF3: %s\n", gt_error_get(error));
    return;
  }
  gt_node_stream_delete(gff3in);
  gt_node_stream_delete(filtstream);
  gt_node_stream_delete(featstream);
  gt_node_stream_delete(ics);
  gt_node_stream_delete(ies);
  gt_logger_delete(logger);
  gt_hashmap_delete(typestokeep);

  agn_assert(gt_array_size(feats) == 2);
  GtFeatureNode *g1 = *(GtFeatureNode **)gt_array_get(feats, 0);
  GtFeatureNode *g2 = *(GtFeatureNode **)gt_array_get(feats, 1);

  double cov1 = gaeval_visitor_calculate_coverage(gv,  g1, error);
  double cov2 = gaeval_visitor_calculate_coverage(gv,  g2, error);
  double int1 = gaeval_visitor_calculate_integrity(gv, g1, cov1, NULL, error);
  double int2 = gaeval_visitor_calculate_integrity(gv, g2, cov2, NULL, error);

  bool test1 = fabs(cov1 - 1.000) < 0.001 &&
               fabs(cov2 - 0.997) < 0.001 &&
               fabs(int1 - 0.850) < 0.001 &&
               fabs(int2 - 0.863) < 0.001;
  agn_unit_test_result(test, "calculate integrity", test1);

  gt_error_delete(error);
  gt_array_delete(feats);
  gt_genome_node_delete((GtGenomeNode *)g1);
  gt_genome_node_delete((GtGenomeNode *)g2);
  gt_node_visitor_delete(nv);
}
Esempio n. 11
0
static int gt_inlineseq_split_runner(int argc, const char **argv,
                                     int parsed_args,
                              void *tool_arguments, GtError *err)
{
  GtInlineseqSplitArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream = NULL,
               *gff3_out_stream = NULL,
               *split_stream = NULL,
               *last_stream = NULL;
  GtFile *seq_out_file = NULL,
         *gff3_out_file = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (gt_str_length(arguments->seqoutfile) > 0) {
    seq_out_file = gt_file_new(gt_str_get(arguments->seqoutfile), "w+", err);
    if (!seq_out_file)
      had_err = -1;
  }

  if (!had_err && gt_str_length(arguments->gffoutfile) > 0) {
    gff3_out_file = gt_file_new(gt_str_get(arguments->gffoutfile), "w+", err);
    if (!gff3_out_file)
      had_err = -1;
  }

  if (!had_err) {
    last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
    gt_assert(gff3_in_stream);
  }

  if (!had_err) {
    last_stream = split_stream = gt_sequence_node_out_stream_new(last_stream,
                                                                 seq_out_file,
                                                                 err);
    gt_assert(split_stream);
  }

  if (!had_err) {
    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           gff3_out_file);
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(split_stream);
  gt_file_delete(seq_out_file);
  gt_file_delete(gff3_out_file);

  return had_err;
}
Esempio n. 12
0
static int gt_gff3validator_runner(int argc, const char **argv, int parsed_args,
                                   void *tool_arguments, GtError *err)
{
    GFF3ValidatorArguments *arguments = tool_arguments;
    GtTypeChecker *type_checker = NULL;
    GtXRFChecker *xrf_checker = NULL;
    GtNodeStream *gff3_in_stream;
    int had_err = 0;

    gt_error_check(err);
    gt_assert(arguments);

    /* create a GFF3 input stream */
    gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                     argv + parsed_args);
    gt_gff3_in_stream_check_id_attributes((GtGFF3InStream*) gff3_in_stream);

    /* set different type checker if necessary */
    if (gt_typecheck_info_option_used(arguments->tci)) {
        type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err);
        if (!type_checker)
            had_err = -1;
        if (!had_err)
            gt_gff3_in_stream_set_type_checker(gff3_in_stream, type_checker);
    }

    /* set XRF checker */
    if (gt_xrfcheck_info_option_used(arguments->xci)) {
        xrf_checker = gt_xrfcheck_info_create_xrf_checker(arguments->xci, err);
        if (!xrf_checker)
            had_err = -1;
        if (!had_err)
            gt_gff3_in_stream_set_xrf_checker(gff3_in_stream, xrf_checker);
    }
    /* enable strict mode (if necessary) */
    if (!had_err && arguments->strict)
        gt_gff3_in_stream_enable_strict_mode((GtGFF3InStream*) gff3_in_stream);

    /* pull the features through the stream and free them afterwards */
    if (!had_err)
        had_err = gt_node_stream_pull(gff3_in_stream, err);

    if (!had_err)
        printf("input is valid GFF3\n");

    /* free */
    gt_node_stream_delete(gff3_in_stream);
    gt_type_checker_delete(type_checker);
    gt_xrf_checker_delete(xrf_checker);

    return had_err;
}
Esempio n. 13
0
static int gt_tir_runner(GT_UNUSED int argc, GT_UNUSED const char **argv,
                         GT_UNUSED int parsed_args, void *tool_arguments,
                         GtError *err)
{
  GtTirArguments *arguments = tool_arguments;
  GtNodeStream *tir_stream = NULL,
               *gff3_out_stream = NULL,
               *last_stream = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  tir_stream = gt_tir_stream_new(arguments->str_indexname,
                                 arguments->min_seed_length,
                                 arguments->min_TIR_length,
                                 arguments->max_TIR_length,
                                 arguments->min_TIR_distance,
                                 arguments->max_TIR_distance,
                                 arguments->arbit_scores,
                                 arguments->xdrop_belowscore,
                                 arguments->similarity_threshold,
                                 arguments->best_overlaps,
                                 arguments->no_overlaps,
                                 arguments->min_TSD_length,
                                 arguments->max_TSD_length,
                                 arguments->vicinity,
                                 err);

  if (tir_stream == NULL)
    return -1;

  last_stream = tir_stream;

  /* gff3 outstream */
  gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL);
  last_stream = gff3_out_stream;

  /* output arguments line */
  /* gt_tir_showargsline(argc, argv); */

  /* pull the features through the stream and free them afterwards */
  if (!had_err)
    had_err = gt_node_stream_pull(last_stream, err);

  /* free */
  gt_node_stream_delete(tir_stream);
  gt_node_stream_delete(gff3_out_stream);

  return had_err;
}
Esempio n. 14
0
int gt_chseqids(int argc, const char **argv, GtError *err)
{
  GtNodeStream *gff3_in_stream, *chseqids_stream, *sort_stream = NULL,
               *gff3_out_stream = NULL;
  ChseqidsArguments arguments;
  GtStr *chseqids;
  int parsed_args, had_err = 0;

  gt_error_check(err);

  /* option parsing */
  switch (parse_options(&parsed_args, &arguments, argc, argv, err)) {
    case GT_OPTION_PARSER_OK: break;
    case GT_OPTION_PARSER_ERROR: return -1;
    case GT_OPTION_PARSER_REQUESTS_EXIT: return 0;
  }

  /* create the streams */
  gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args + 1]);
  if (arguments.verbose && arguments.outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);
  chseqids = gt_str_new_cstr(argv[parsed_args]);
  chseqids_stream = gt_chseqids_stream_new(gff3_in_stream, chseqids, err);
  if (!chseqids_stream)
    had_err = -1;
  gt_str_delete(chseqids);
  if (!had_err) {
    if (arguments.sort) {
      sort_stream = gt_sort_stream_new(chseqids_stream);
      gff3_out_stream = gt_gff3_out_stream_new(sort_stream, arguments.outfp);
    }
    else {
      gff3_out_stream = gt_gff3_out_stream_new(chseqids_stream,
                                               arguments.outfp);
    }
  }

  /* pull the features through the stream and free them afterwards */
  if (!had_err)
    had_err = gt_node_stream_pull(gff3_out_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(chseqids_stream);
  gt_node_stream_delete(sort_stream);
  gt_node_stream_delete(gff3_in_stream);
  gt_file_delete(arguments.outfp);

  return had_err;
}
Esempio n. 15
0
static int gt_inlineseq_add_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL,
               *add_stream = NULL,
               *gff3_out_stream = NULL,
               *last_stream = NULL;
  GtRegionMapping *rm = NULL;
  InlineseqAddArguments *arguments = tool_arguments;
  int had_err = 0;
  gt_error_check(err);

  /* add region mapping if given */
  if (gt_seqid2file_option_used(arguments->s2fi)) {
    rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rm)
      had_err = -1;
  }

  if (!had_err) {
    last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
    gt_assert(gff3_in_stream);
    gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

    last_stream = add_stream = gt_sequence_node_add_stream_new(last_stream, rm,
                                                               err);
    if (!add_stream) {
      had_err = -1;
    }
  }

  if (!had_err) {
    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);
  }

  if (!had_err)
    had_err = gt_node_stream_pull(last_stream, err);

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(add_stream);
  gt_node_stream_delete(gff3_out_stream);
  gt_region_mapping_delete(rm);

  return had_err;
}
Esempio n. 16
0
static int gt_extractfeat_runner(GT_UNUSED int argc, const char **argv,
                                 int parsed_args, void *tool_arguments,
                                 GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL, *extract_feature_stream = NULL;
  GtExtractFeatArguments *arguments = tool_arguments;
  GtRegionMapping *region_mapping;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  if (!had_err) {
    /* create gff3 input stream */
    gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]);
    if (arguments->verbose)
      gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);

    /* create region mapping */
    region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!region_mapping)
      had_err = -1;
  }

  if (!had_err) {
    /* create extract feature stream */
    extract_feature_stream =
      gt_extract_feature_stream_new(gff3_in_stream,
                                    region_mapping,
                                    gt_str_get(arguments->type),
                                    arguments->join,
                                    arguments->translate,
                                    arguments->seqid,
                                    arguments->target,
                                    arguments->width,
                                    arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(extract_feature_stream, err);
  }

  /* free */
  gt_node_stream_delete(extract_feature_stream);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
Esempio n. 17
0
static int gt_cds_runner(GT_UNUSED int argc, const char **argv, int parsed_args,
                         void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream, *cds_stream = NULL, *gff3_out_stream = NULL;
  CDSArguments *arguments = tool_arguments;
  GtRegionMapping *region_mapping;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  /* create gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]);
  if (arguments->verbose && arguments->outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);

  /* create region mapping */
  region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
  if (!region_mapping)
    had_err = -1;

  if (!had_err) {
    /* create CDS stream */
    cds_stream = gt_cds_stream_new(gff3_in_stream, region_mapping,
                                   arguments->minorflen, GT_CDS_SOURCE_TAG,
                                   arguments->start_codon,
                                   arguments->final_stop_codon,
                                   arguments->generic_start_codons);

    /* create gff3 output stream */
    gff3_out_stream = gt_gff3_out_stream_new(cds_stream, arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(gff3_out_stream, err);
  }

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(cds_stream);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
Esempio n. 18
0
static void gv_test_intersect(AgnUnitTest *test)
{
  GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) );
  GtError *error = gt_error_new();
  const char *filename = "data/gff3/gaeval-stream-unit-test-1.gff3";
  GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename);
  GtNodeStream *fstream = gt_array_out_stream_new(gff3in, feats, error);
  int result = gt_node_stream_pull(fstream, error);
  if(result == -1)
  {
    fprintf(stderr, "[AgnGaevalVisitor::gv_test_intersect] error "
            "processing GFF3: %s\n", gt_error_get(error));
    return;
  }
  gt_error_delete(error);
  gt_node_stream_delete(gff3in);
  gt_node_stream_delete(fstream);

  agn_assert(gt_array_size(feats) == 9);
  GtGenomeNode *g1 = *(GtGenomeNode **)gt_array_get(feats, 1);
  GtGenomeNode *g2 = *(GtGenomeNode **)gt_array_get(feats, 3);
  GtGenomeNode *g3 = *(GtGenomeNode **)gt_array_get(feats, 7);

  GtGenomeNode *est1 = *(GtGenomeNode **)gt_array_get(feats, 0);
  GtGenomeNode *est2 = *(GtGenomeNode **)gt_array_get(feats, 2);
  GtGenomeNode *est3 = *(GtGenomeNode **)gt_array_get(feats, 4);
  GtGenomeNode *est4 = *(GtGenomeNode **)gt_array_get(feats, 5);
  GtGenomeNode *est5 = *(GtGenomeNode **)gt_array_get(feats, 6);
  GtGenomeNode *est6 = *(GtGenomeNode **)gt_array_get(feats, 8);

  GtArray *cov = gaeval_visitor_intersect(g1, est1);
  bool test1 = cov == NULL;
  cov = gaeval_visitor_intersect(g1, est2);
  test1 = gt_array_size(cov) == 1;
  if(test1)
  {
    GtRange *range01 = gt_array_pop(cov);
    GtRange testrange = { 400, 500 };
    test1 = gt_range_compare(range01, &testrange) == 0;
  }
  agn_unit_test_result(test, "intersect (1)", test1);
  gt_array_delete(cov);

  cov = gaeval_visitor_intersect(g2, est3);
  bool test2 = gt_array_size(cov) == 2;
  if(test2)
  {
    GtRange *range01 = gt_array_get(cov, 0);
    GtRange *range02 = gt_array_get(cov, 1);
    GtRange testrange1 = { 800, 900 };
    GtRange testrange2 = { 1050, 1075 };
    test2 = gt_range_compare(range01, &testrange1) == 0 &&
            gt_range_compare(range02, &testrange2) == 0;
  }
  agn_unit_test_result(test, "intersect (2)", test2);
  gt_array_delete(cov);

  cov = gaeval_visitor_intersect(g2, est4);
  bool test3 = gt_array_size(cov) == 2;
  if(test3)
  {
    GtRange *range01 = gt_array_get(cov, 0);
    GtRange *range02 = gt_array_get(cov, 1);
    GtRange testrange1 = { 1070, 1125 };
    GtRange testrange2 = { 1250, 1310 };
    test3 = gt_range_compare(range01, &testrange1) == 0 &&
            gt_range_compare(range02, &testrange2) == 0;
  }
  agn_unit_test_result(test, "intersect (3)", test3);
  gt_array_delete(cov);

  cov = gaeval_visitor_intersect(g3, est5);
  bool test4 = gt_array_size(cov) == 2;
  if(test4)
  {
    GtRange *range01 = gt_array_get(cov, 0);
    GtRange *range02 = gt_array_get(cov, 1);
    GtRange testrange1 = { 2000, 3000 };
    GtRange testrange2 = { 4000, 5000 };
    test4 = gt_range_compare(range01, &testrange1) == 0 &&
            gt_range_compare(range02, &testrange2) == 0;
  }
  agn_unit_test_result(test, "intersect (4)", test4);
  gt_array_delete(cov);

  cov = gaeval_visitor_intersect(g3, est6);
  bool test5 = gt_array_size(cov) == 2;
  if(test5)
  {
    GtRange *range01 = gt_array_get(cov, 0);
    GtRange *range02 = gt_array_get(cov, 1);
    GtRange testrange1 = { 2500, 3000 };
    GtRange testrange2 = { 4000, 5000 };
    test5 = gt_range_compare(range01, &testrange1) == 0 &&
            gt_range_compare(range02, &testrange2) == 0;
  }
  agn_unit_test_result(test, "intersect (5)", test5);
  gt_array_delete(cov);

  gt_array_delete(feats);
  gt_genome_node_delete(g1);
  gt_genome_node_delete(g2);
  gt_genome_node_delete(g3);
  gt_genome_node_delete(est1);
  gt_genome_node_delete(est2);
  gt_genome_node_delete(est3);
  gt_genome_node_delete(est4);
  gt_genome_node_delete(est5);
  gt_genome_node_delete(est6);
}
Esempio n. 19
0
static int gt_speck_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL,
               *a_in_stream = NULL, *a_out_stream = NULL,
               *feature_stream = NULL, *sort_stream = NULL,
               *last_stream = NULL;
  GtNodeVisitor *spec_visitor = NULL;
  GtSpecResults *res = NULL;
  GtFeatureIndex *fi = NULL;
  GtTimer *t = NULL;
  GtRegionMapping *rm = NULL;
  GtArray *arr = gt_array_new(sizeof (GtFeatureNode*));
  SpeccheckArguments *arguments = tool_arguments;

  int had_err = 0;
  gt_error_check(err);

  res = gt_spec_results_new();
  gt_assert(res);
  t = gt_timer_new();
  gt_assert(t);

  spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res,
                                     err);
  if (!spec_visitor)
    return -1;

  /* add region mapping if given */
  if (gt_seqid2file_option_used(arguments->s2fi)) {
    rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rm)
      had_err = -1;
    if (!had_err)
      gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm);
  }

  /* set runtime error behaviour */
  if (arguments->fail_hard)
    gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor);
  else
    gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor);

  /* redirect warnings */
  gt_warning_set_handler(gt_speck_record_warning, res);

  last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
  gt_assert(gff3_in_stream);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

  /* insert sort stream if requested */
  if (arguments->sort) {
    last_stream = sort_stream = gt_sort_stream_new(last_stream);
  }

  /* if -provideindex is given, collect input features and index them first */
  if (arguments->provideindex) {
    fi = gt_feature_index_memory_new();
    gt_assert(fi);

    last_stream = feature_stream = gt_feature_stream_new(last_stream, fi);
    gt_assert(feature_stream);

    last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr,
                                                             err);
    if (!a_out_stream)
      had_err = -1;

    gt_timer_start(t);

    if (!had_err)
      had_err = gt_node_stream_pull(last_stream, err);

    if (!had_err) {
      gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor,
                                        gt_feature_index_ref(fi));
      last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err);
      if (!a_in_stream)
        had_err = -1;
    }
  } else {
    gt_timer_start(t);
  }

  if (!had_err) {
    checker_stream = gt_visitor_stream_new(last_stream, spec_visitor);
    gt_assert(checker_stream);
  }

  /* perform checking  */
  if (!had_err)
    had_err = gt_node_stream_pull(checker_stream, err);

  gt_timer_stop(t);

  /* reset warnings output */
  gt_warning_set_handler(gt_warning_default_handler, NULL);

  /* output results */
  if (!had_err)
    gt_spec_results_report(res, arguments->outfp,
                           gt_str_get(arguments->specfile),
                           arguments->verbose, arguments->colored,
                           !arguments->allexpects);

  if (!had_err)
    gt_timer_show_formatted(t, "Finished in " GT_WD ".%06ld s.\n", stderr);

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(a_in_stream);
  gt_node_stream_delete(a_out_stream);
  gt_node_stream_delete(checker_stream);
  gt_node_stream_delete(feature_stream);
  gt_node_stream_delete(sort_stream);
  gt_spec_results_delete(res);
  gt_feature_index_delete(fi);
  gt_timer_delete(t);
  gt_array_delete(arr);

  return had_err;
}
Esempio n. 20
0
GtNodeVisitor*
agn_gaeval_visitor_new(GtNodeStream *astream, AgnGaevalParams gparams)
{
  agn_assert(astream);

  // Create the node visitor
  GtNodeVisitor *nv = gt_node_visitor_create(gaeval_visitor_class());
  AgnGaevalVisitor *v = gaeval_visitor_cast(nv);
  v->alignments = gt_feature_index_memory_new();
  v->tsvout = NULL;
  v->params = gparams;

  // Check that sum of weights is 1.0
  double weights_total = gparams.alpha + gparams.beta +
                         gparams.gamma + gparams.epsilon;
  if(fabs(weights_total - 1.0) > 0.0001)
  {
    fprintf(stderr, "[AgnGaevalVisitor::agn_gaeval_visitor_new] warning: "
            "sum of weights is not 1.0 %.3lf; integrity calculations will be "
            "incorrect\n", weights_total);
  }


  // Set up node stream to load alignment features into memory
  GtQueue *streams = gt_queue_new();
  GtNodeStream *stream, *last_stream;
  GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL);
  gt_hashmap_add(typestokeep, "cDNA_match", "cDNA_match");
  gt_hashmap_add(typestokeep, "EST_match", "EST_match");
  gt_hashmap_add(typestokeep, "nucleotide_match", "nucleotide_match");
  stream = agn_filter_stream_new(astream, typestokeep);
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_feature_out_stream_new(last_stream, v->alignments);
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "cDNA_match", "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "EST_match", "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  stream = gt_inter_feature_stream_new(last_stream, "nucleotide_match",
                                       "match_gap");
  gt_queue_add(streams, stream);
  last_stream = stream;

  // Process the node stream
  GtError *error = gt_error_new();
  int result = gt_node_stream_pull(last_stream, error);
  if(result == -1)
  {
    fprintf(stderr, "[AEGeAn::AgnGaevalStream] error parsing alignments: %s\n",
            gt_error_get(error));
    gt_node_visitor_delete(nv);
    return NULL;
  }
  gt_error_delete(error);
  gt_hashmap_delete(typestokeep);
  while(gt_queue_size(streams) > 0)
  {
    stream = gt_queue_get(streams);
    gt_node_stream_delete(stream);
  }
  gt_queue_delete(streams);

  return nv;
}
Esempio n. 21
0
static int gt_select_runner(int argc, const char **argv, int parsed_args,
                            void *tool_arguments, GtError *err)
{
  SelectArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream, *select_stream,
               *targetbest_select_stream = NULL, *gff3_out_stream;
  int had_err;
  GtFile *drop_file = NULL;
  GtNodeVisitor *gff3outvis = NULL;
  gt_error_check(err);
  gt_assert(arguments);

  /* create a gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                  argv + parsed_args);
  if (arguments->verbose && arguments->outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);

  /* create a filter stream */
  select_stream = gt_select_stream_new(gff3_in_stream, arguments->seqid,
                                       arguments->source,
                                       &arguments->contain_range,
                                       &arguments->overlap_range,
                                       arguments->strand,
                                       arguments->targetstrand,
                                       arguments->has_CDS,
                                       arguments->max_gene_length,
                                       arguments->max_gene_num,
                                       arguments->min_gene_score,
                                       arguments->max_gene_score,
                                       arguments->min_average_splice_site_prob,
                                       arguments->feature_num,
                                       arguments->filter_files,
                                       arguments->filter_logic,
                                       err);

  if (select_stream) {
    GtSelectStream *fs = (GtSelectStream*) select_stream;

    if (gt_str_length(arguments->dropped_file) > 0) {
      drop_file = gt_file_new(gt_str_get(arguments->dropped_file), "w", err);
      gff3outvis = gt_gff3_visitor_new(drop_file);
      gt_select_stream_set_drophandler(fs, print_to_file_drophandler,
                                       (void*) gff3outvis);
    } else {
      gt_select_stream_set_drophandler(fs, default_drophandler, NULL);
    }

    gt_select_stream_set_single_intron_factor(select_stream,
                                              arguments->single_intron_factor);

    if (arguments->targetbest)
      targetbest_select_stream = gt_targetbest_select_stream_new(select_stream);

    /* create a gff3 output stream */
    gff3_out_stream = gt_gff3_out_stream_new(arguments->targetbest
                                             ? targetbest_select_stream
                                             : select_stream,
                                             arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(gff3_out_stream, err);

    /* free */
    gt_node_stream_delete(gff3_out_stream);
    gt_node_stream_delete(select_stream);
    gt_node_stream_delete(targetbest_select_stream);
  } else {
    had_err = -1;
  }
  gt_file_delete(drop_file);
  gt_node_visitor_delete(gff3outvis);
  gt_node_stream_delete(gff3_in_stream);
  return had_err;
}
Esempio n. 22
0
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, void *tool_arguments,
                               GtError *err)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream   = NULL,
               *gff3_out_stream  = NULL,
               *ltrdigest_stream = NULL,
               *tab_out_stream   = NULL,
               *last_stream      = NULL;
  int had_err      = 0,
      tests_to_run = 0,
      arg = parsed_args;
  const char *indexname = argv[arg+1];
  GtLogger *logger = gt_logger_new(arguments->verbose,
                                   GT_LOGGER_DEFLT_PREFIX, stdout);
  GtEncseqLoader *el;
  GtEncseq *encseq;
  gt_error_check(err);
  gt_assert(arguments);

  /* Set sequence encoder options. Defaults are ok. */
  el = gt_encseq_loader_new();
  gt_encseq_loader_set_logger(el, logger);

  /* Open sequence file */
  encseq = gt_encseq_loader_load(el, indexname, err);
  if (!encseq)
    had_err = -1;

  /* Always search for PPT. */
  tests_to_run |= GT_LTRDIGEST_RUN_PPT;

  /* Open tRNA library if given. */
  if (!had_err && arguments->trna_lib
        && gt_str_length(arguments->trna_lib) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PBS;
   arguments->pbs_opts.trna_lib = gt_bioseq_new(gt_str_get(arguments->trna_lib),
                                                 err);
    if (gt_error_is_set(err))
      had_err = -1;
  }

#ifdef HAVE_HMMER
  /* Open HMMER files if given. */
  if (!had_err && gt_str_array_size(arguments->pdom_opts.hmm_files) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PDOM;
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }
#endif

  if (!had_err)
  {
    /* set up stream flow
     * ------------------*/
    last_stream = gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[arg]);

    last_stream = ltrdigest_stream = gt_ltrdigest_stream_new(last_stream,
                                                  tests_to_run,
                                                  encseq,
                                                  &arguments->pbs_opts,
                                                  &arguments->ppt_opts,
#ifdef HAVE_HMMER
                                                  &arguments->pdom_opts,
#endif
                                                  err);
    if (!ltrdigest_stream)
      had_err = -1;
  }

  if (!had_err)
  {
    /* attach tabular output stream, if requested */
    if (gt_str_length(arguments->prefix) > 0)
    {
      last_stream = tab_out_stream = gt_ltr_fileout_stream_new(last_stream,
                                              tests_to_run,
                                              encseq,
                                              gt_str_get(arguments->prefix),
                                              &arguments->ppt_opts,
                                              &arguments->pbs_opts,
#ifdef HAVE_HMMER
                                              &arguments->pdom_opts,
#endif
                                              gt_str_get(arguments->trna_lib),
                                              argv[arg+1],
                                              argv[arg],
                                              arguments->seqnamelen,
                                              err);
#ifdef HAVE_HMMER
    if (&arguments->pdom_opts.write_alignments)
      gt_ltr_fileout_stream_enable_pdom_alignment_output(tab_out_stream);
    if (&arguments->pdom_opts.write_aaseqs)
      gt_ltr_fileout_stream_enable_aa_sequence_output(tab_out_stream);
#endif
    }

    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(ltrdigest_stream);
  if (tab_out_stream != NULL)
    gt_node_stream_delete(tab_out_stream);
  gt_node_stream_delete(gff3_in_stream);

  gt_encseq_loader_delete(el);
  gt_encseq_delete(encseq);
  encseq = NULL;
  gt_bioseq_delete(arguments->pbs_opts.trna_lib);
  gt_logger_delete(logger);

  return had_err;
}
Esempio n. 23
0
int gt_feature_in_stream_unit_test(GtError *error)
{
  GtNodeStream *src, *dest;
  GtFeatureIndex *prefeat, *postfeat;
  GtRange range1, range1test, range2, range2test;

  prefeat = in_stream_test_data(error);
  postfeat = gt_feature_index_memory_new();
  src = gt_feature_in_stream_new(prefeat);
  dest = gt_feature_out_stream_new(src, postfeat);
  int result = gt_node_stream_pull(dest, error);
  if (result == -1)
    return -1;

  GtStrArray *seqids = gt_feature_index_get_seqids(postfeat, error);
  if (gt_str_array_size(seqids) != 2)
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: expected 2 "
                 "seqids, found "GT_WU"", gt_str_array_size(seqids));
    return -1;
  }
  gt_str_array_delete(seqids);

  range1test.start = 500;  range1test.end = 75000;
  range2test.start = 4000; range2test.end = 9500;
  gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_delete(prefeat);
  gt_feature_index_delete(postfeat);
  gt_node_stream_delete(src);
  gt_node_stream_delete(dest);

  prefeat = in_stream_test_data(error);
  postfeat = gt_feature_index_memory_new();
  src = gt_feature_in_stream_new(prefeat);
  dest = gt_feature_out_stream_new(src, postfeat);
  gt_feature_in_stream_use_orig_ranges((GtFeatureInStream *)src);
  result = gt_node_stream_pull(dest, error);
  if (result == -1)
    return -1;

  range1test.start = 500;  range1test.end = 75000;
  range2test.start = 4000; range2test.end = 9500;
  gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  range1test.start = 1; range1test.end = 100000;
  range2test.start = 1; range2test.end = 10000;
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_delete(prefeat);
  gt_feature_index_delete(postfeat);
  gt_node_stream_delete(src);
  gt_node_stream_delete(dest);

  return 0;
}
Esempio n. 24
0
static int gt_speck_runner(int argc, const char **argv, int parsed_args,
                               void *tool_arguments, GtError *err)
{
  GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL,
               *a_in_stream = NULL, *a_out_stream = NULL,
               *feature_stream = NULL, *sort_stream = NULL,
               *last_stream = NULL;
  GtNodeVisitor *spec_visitor = NULL;
  GtSpecResults *res = NULL;
  GtFeatureIndex *fi = NULL;
  GtTypeChecker *type_checker = NULL;
  GtTimer *t = NULL;
  GtRegionMapping *rm = NULL;
  GtArray *arr = gt_array_new(sizeof (GtFeatureNode*));
  GtStr *prog, *speclib;
  SpeccheckArguments *arguments = tool_arguments;

  int had_err = 0;
  gt_error_check(err);

  res = gt_spec_results_new();
  gt_assert(res);

  if (gt_file_exists(gt_str_get(arguments->format))) {
    speclib = gt_str_ref(arguments->format);
  } else {
    prog = gt_str_new();
    gt_str_append_cstr_nt(prog, gt_error_get_progname(err),
                    gt_cstr_length_up_to_char(gt_error_get_progname(err), ' '));
    speclib = gt_get_gtdata_path(gt_str_get(prog), NULL);
    gt_str_delete(prog);
    gt_str_append_cstr(speclib, "/spec/output_drivers/");
    gt_str_append_str(speclib, arguments->format);

    if (!gt_file_exists(gt_str_get(speclib))) {
      gt_error_set(err, "output driver file \"%s\" does not exist",
                   gt_str_get(speclib));
      had_err = -1;
    }
  }

  if (!had_err) {
    spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res,
                                       err);
    if (!spec_visitor) {
      gt_spec_results_delete(res);
      return -1;
    }
  }

  t = gt_timer_new();
  gt_assert(t);

  /* add region mapping if given */
  if (!had_err && gt_seqid2file_option_used(arguments->s2fi)) {
    rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rm)
      had_err = -1;
    if (!had_err)
      gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm);
  }

  /* set type checker if necessary */
  if (!had_err && gt_typecheck_info_option_used(arguments->tci)) {
    type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err);
    if (!type_checker)
      had_err = -1;
    if (!had_err)
      gt_spec_visitor_add_type_checker((GtSpecVisitor*) spec_visitor,
                                       type_checker);
  }

  if (!had_err) {
    /* set runtime error behaviour */
    if (arguments->fail_hard)
      gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor);
    else
      gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor);

    /* redirect warnings */
    gt_warning_set_handler(gt_speck_record_warning, res);

    last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted(
                                                            argc - parsed_args,
                                                            argv + parsed_args);
    gt_assert(gff3_in_stream);
    gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

    /* insert sort stream if requested */
    if (arguments->sort) {
      last_stream = sort_stream = gt_sort_stream_new(last_stream);
    }

    /* if -provideindex is given, collect input features and index them first */
    if (arguments->provideindex) {
      fi = gt_feature_index_memory_new();
      gt_assert(fi);

      last_stream = feature_stream = gt_feature_stream_new(last_stream, fi);
      gt_assert(feature_stream);

      last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr,
                                                               err);
      if (!a_out_stream)
        had_err = -1;

      gt_timer_start(t);

      if (!had_err)
        had_err = gt_node_stream_pull(last_stream, err);

      if (!had_err) {
        gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor,
                                          gt_feature_index_ref(fi));
        last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err);
        if (!a_in_stream)
          had_err = -1;
      }
    } else {
      gt_timer_start(t);
    }

    if (!had_err) {
      checker_stream = gt_visitor_stream_new(last_stream, spec_visitor);
      gt_assert(checker_stream);
    }

    /* perform checking  */
    if (!had_err)
      had_err = gt_node_stream_pull(checker_stream, err);

    gt_timer_stop(t);

    /* reset warnings output */
    gt_warning_set_handler(gt_warning_default_handler, NULL);

    /* output results */
    if (!had_err) {
      GtStr *runtime = gt_str_new();
      gt_timer_get_formatted(t, GT_WD ".%06ld", runtime);
      had_err = gt_spec_results_render_template(res, gt_str_get(speclib),
                                                arguments->outfp,
                                                gt_str_get(arguments->specfile),
                                                arguments->verbose,
                                                arguments->colored,
                                                gt_str_get(runtime), err);
      gt_str_delete(runtime);
    }
  }

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_node_stream_delete(a_in_stream);
  gt_node_stream_delete(a_out_stream);
  gt_node_stream_delete(checker_stream);
  gt_node_stream_delete(feature_stream);
  gt_node_stream_delete(sort_stream);
  gt_spec_results_delete(res);
  gt_feature_index_delete(fi);
  gt_type_checker_delete(type_checker);
  gt_timer_delete(t);
  gt_array_delete(arr);
  gt_str_delete(speclib);

  return had_err;
}
int main(int argc, char ** argv)
{
    GtNodeStream * in, * score, * out;
    GtFile * out_file;
    GtError * err;

    if (argc != 4)
    {
       usage(argv[0]);
       exit(1);
    }

    // initilaize genometools
    gt_lib_init();
    err = gt_error_new();

    if (!(in = gt_gff3_in_stream_new_sorted(argv[1])))
    {
        fprintf(stderr, "Failed to open input stream with arg %s\n", argv[1]);
        exit(1);
    }

    if (!(out_file = gt_file_new(argv[2], "w+", err)))
    {
        gt_node_stream_delete(in);
        fprintf(stderr, "Failed to create output file %s\n", argv[2]);
        exit(1);
    }

    if (!(score = CpGI_score_stream_new(in, argv[3])))
    {

        gt_file_delete(out_file);
        gt_node_stream_delete(in);
        fprintf(stderr, "Failed to create CpGI score stream\n");
        exit(1);
    }
    out = gt_gff3_out_stream_new(in, out_file);
    
    if (!(out = gt_gff3_out_stream_new(score, out_file)))
    {
        gt_node_stream_delete(score);
        gt_file_delete(out_file);
        gt_node_stream_delete(in);
        fprintf(stderr, "Failed to create output stream\n");
        exit(1);
    }

    if (gt_node_stream_pull(out, err))
    {
        fprintf(stderr, "Failed to pull through out stream\n");
    }

    // close genome tools
    gt_node_stream_delete(out);
    gt_node_stream_delete(score);
    gt_file_delete(out_file);
    gt_node_stream_delete(in);
    gt_error_delete(err);
    gt_lib_clean();
    return 0;
}
Esempio n. 26
0
static int gt_gff3_runner(int argc, const char **argv, int parsed_args,
                          void *tool_arguments, GtError *err)
{
  GFF3Arguments *arguments = tool_arguments;
  GtTypeChecker *type_checker = NULL;
  GtNodeStream *gff3_in_stream,
               *sort_stream = NULL,
               *load_stream = NULL,
               *merge_feature_stream = NULL,
               *add_introns_stream = NULL,
               *set_source_stream = NULL,
               *gff3_out_stream = NULL,
               *last_stream;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  /* create a gff3 input stream */
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                  argv + parsed_args);
  if (arguments->verbose && arguments->outfp)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);
  if (arguments->checkids)
    gt_gff3_in_stream_check_id_attributes((GtGFF3InStream*) gff3_in_stream);
  if (!arguments->addids)
    gt_gff3_in_stream_disable_add_ids(gff3_in_stream);

  last_stream = gff3_in_stream;

  /* set different type checker if necessary */
  if (gt_typecheck_info_option_used(arguments->tci)) {
    type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err);
    if (!type_checker)
      had_err = -1;
    if (!had_err)
      gt_gff3_in_stream_set_type_checker(gff3_in_stream, type_checker);
  }

  /* set offset (if necessary) */
  if (!had_err && arguments->offset != GT_UNDEF_WORD)
    gt_gff3_in_stream_set_offset(gff3_in_stream, arguments->offset);

  /* set offsetfile (if necessary) */
  if (!had_err && gt_str_length(arguments->offsetfile)) {
    had_err = gt_gff3_in_stream_set_offsetfile(gff3_in_stream,
                                               arguments->offsetfile, err);
  }

  /* enable strict mode (if necessary) */
  if (!had_err && arguments->strict)
    gt_gff3_in_stream_enable_strict_mode((GtGFF3InStream*) gff3_in_stream);
  /* enable tidy mode (if necessary) */
  if (!had_err && arguments->tidy)
    gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream);

  if (!had_err && arguments->fixboundaries)
    gt_gff3_in_stream_fix_region_boundaries((GtGFF3InStream*) gff3_in_stream);

  /* create load stream (if necessary) */
  if (!had_err && arguments->load) {
    load_stream = gt_load_stream_new(last_stream);
    last_stream = load_stream;
  }

  /* create sort stream (if necessary) */
  if (!had_err && arguments->sort) {
    sort_stream = gt_sort_stream_new(last_stream);
    last_stream = sort_stream;
  }

  /* create merge feature stream (if necessary) */
  if (!had_err && arguments->mergefeat) {
    gt_assert(sort_stream);
    merge_feature_stream = gt_merge_feature_stream_new(sort_stream);
    last_stream = merge_feature_stream;
  }

  /* create addintrons stream (if necessary) */
  if (!had_err && arguments->addintrons) {
    gt_assert(last_stream);
    add_introns_stream = gt_add_introns_stream_new(last_stream);
    last_stream = add_introns_stream;
  }

  /* create setsource stream (if necessary) */
  if (!had_err && gt_str_length(arguments->newsource) > 0) {
    gt_assert(last_stream);
    GtNodeVisitor *ssv = gt_set_source_visitor_new(arguments->newsource);
    set_source_stream = gt_visitor_stream_new(last_stream, ssv);
    last_stream = set_source_stream;
  }

  /* create gff3 output stream */
  if (!had_err && arguments->show) {
    gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp);
    last_stream = gff3_out_stream;
    gt_gff3_out_stream_set_fasta_width((GtGFF3OutStream*) last_stream,
                                       arguments->width);
    if (arguments->retainids)
      gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream*) last_stream);
  }

  /* pull the features through the stream and free them afterwards */
  if (!had_err)
    had_err = gt_node_stream_pull(last_stream, err);

  /* free */
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(sort_stream);
  gt_node_stream_delete(load_stream);
  gt_node_stream_delete(merge_feature_stream);
  gt_node_stream_delete(add_introns_stream);
  gt_node_stream_delete(set_source_stream);
  gt_node_stream_delete(gff3_in_stream);
  gt_type_checker_delete(type_checker);

  return had_err;
}
Esempio n. 27
0
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv,
                               int parsed_args, void *tool_arguments,
                               GtError *err)
{
  GtLTRdigestOptions *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream  = NULL,
               *gff3_out_stream = NULL,
               *pdom_stream     = NULL,
               *ppt_stream      = NULL,
               *pbs_stream      = NULL,
               *tab_out_stream  = NULL,
               *sa_stream       = NULL,
               *last_stream     = NULL;
  int had_err      = 0,
      tests_to_run = 0,
      arg = parsed_args;
  GtRegionMapping *rmap = NULL;
  GtPdomModelSet *ms = NULL;
  gt_error_check(err);
  gt_assert(arguments);

  /* determine and open sequence source */
  if (gt_seqid2file_option_used(arguments->s2fi)) {
    /* create region mapping */
    rmap = gt_seqid2file_region_mapping_new(arguments->s2fi, err);
    if (!rmap)
      had_err = -1;
  } else {
    GtEncseqLoader *el;
    GtEncseq *encseq;
    /* no new-style sequence source option given, fall back to legacy syntax */
    if (argc < 3) {
      gt_error_set(err, "missing mandatory argument(s)");
      had_err = -1;
    }
    if (!had_err) {
      el = gt_encseq_loader_new();
      gt_encseq_loader_disable_autosupport(el);
      gt_encseq_loader_require_md5_support(el);
      gt_encseq_loader_require_description_support(el);
      encseq = gt_encseq_loader_load(el, argv[argc-1], err);
      /* XXX: clip off terminal argument */
      gt_free((char*) argv[argc-1]);
      argv[argc-1] = NULL;
      argc--;
      gt_encseq_loader_delete(el);
      if (!encseq)
        had_err = -1;
      else {
        rmap = gt_region_mapping_new_encseq_seqno(encseq);
        gt_encseq_delete(encseq);
      }
    }
  }
  gt_assert(had_err || rmap);

  /* Always search for PPT. */
  tests_to_run |= GT_LTRDIGEST_RUN_PPT;

  /* Open tRNA library if given. */
  if (!had_err && arguments->trna_lib
        && gt_str_length(arguments->trna_lib) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PBS;
    arguments->trna_lib_bs = gt_bioseq_new(gt_str_get(arguments->trna_lib),
                                           err);
    if (gt_error_is_set(err))
      had_err = -1;
  }

  /* Set HMMER cutoffs. */
  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0)
  {
    tests_to_run |= GT_LTRDIGEST_RUN_PDOM;
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }

  if (!had_err) {
    last_stream = gff3_in_stream  = gt_gff3_in_stream_new_sorted(argv[arg]);
  }

  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) {
    GtNodeVisitor *pdom_v;
    ms = gt_pdom_model_set_new(arguments->hmm_files, err);
    if (ms != NULL) {
      pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->evalue_cutoff,
                                             arguments->chain_max_gap_length,
                                             arguments->cutoff, rmap, err);
      if (pdom_v == NULL)
        had_err = -1;
      if (!had_err) {
        gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*)
                                                                        pdom_v,
                                                 GT_LTRDIGEST_TAG);
        if (arguments->output_all_chains)
          gt_ltrdigest_pdom_visitor_output_all_chains((GtLTRdigestPdomVisitor*)
                                                                        pdom_v);
        last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v);
      }
    } else had_err = -1;
  }

  if (!had_err && arguments->trna_lib_bs) {
    GtNodeVisitor *pbs_v;
    pbs_v = gt_ltrdigest_pbs_visitor_new(rmap, arguments->pbs_radius,
                                         arguments->max_edist,
                                         arguments->alilen,
                                         arguments->offsetlen,
                                         arguments->trnaoffsetlen,
                                         arguments->ali_score_match,
                                         arguments->ali_score_mismatch,
                                         arguments->ali_score_insertion,
                                         arguments->ali_score_deletion,
                                         arguments->trna_lib_bs, err);
    if (pbs_v != NULL)
      last_stream = pbs_stream = gt_visitor_stream_new(last_stream, pbs_v);
    else
      had_err = -1;
  }

  if (!had_err) {
    GtNodeVisitor *ppt_v;
    ppt_v = gt_ltrdigest_ppt_visitor_new(rmap, arguments->ppt_len,
                                         arguments->ubox_len,
                                         arguments->ppt_pyrimidine_prob,
                                         arguments->ppt_purine_prob,
                                         arguments->bkg_a_prob,
                                         arguments->bkg_g_prob,
                                         arguments->bkg_t_prob,
                                         arguments->bkg_c_prob,
                                         arguments->ubox_u_prob,
                                         arguments->ppt_radius,
                                         arguments->max_ubox_dist, err);
    if (ppt_v != NULL)
      last_stream = ppt_stream = gt_visitor_stream_new(last_stream, ppt_v);
    else
      had_err = -1;
  }

  if (!had_err) {
    GtNodeVisitor *sa_v;
    sa_v = gt_ltrdigest_strand_assign_visitor_new();
    gt_assert(sa_v);
    last_stream = sa_stream = gt_visitor_stream_new(last_stream, sa_v);
  }

  if (!had_err)
  {
    /* attach tabular output stream, if requested */
    if (gt_str_length(arguments->prefix) > 0)
    {
      last_stream = tab_out_stream = gt_ltrdigest_file_out_stream_new(
                                                  last_stream,
                                                  tests_to_run,
                                                  rmap,
                                                  gt_str_get(arguments->prefix),
                                                  arguments->seqnamelen,
                                                  err);
      if (!tab_out_stream)
        had_err = -1;
      if (!had_err && arguments->print_metadata)
      {
        had_err = gt_ltrdigest_file_out_stream_write_metadata(
                                           (GtLTRdigestFileOutStream*)
                                                                 tab_out_stream,
                                           tests_to_run,
                                           gt_str_get(arguments->trna_lib),
                                           argv[arg],
                                           arguments->ppt_len,
                                           arguments->ubox_len,
                                           arguments->ppt_radius,
                                           arguments->alilen,
                                           arguments->max_edist,
                                           arguments->offsetlen,
                                           arguments->trnaoffsetlen,
                                           arguments->pbs_radius,
                                           arguments->hmm_files,
                                           arguments->chain_max_gap_length,
                                           arguments->evalue_cutoff,
                                           err);
      }
      if (!had_err)
      {
        if (arguments->write_alignments)
          gt_ltrdigest_file_out_stream_enable_pdom_alignment_output(
                                                                tab_out_stream);
        if (arguments->write_aaseqs)
          gt_ltrdigest_file_out_stream_enable_aa_sequence_output(
                                                                tab_out_stream);
      }
    }

    last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream,
                                                           arguments->outfp);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(last_stream, err);
  }

  gt_pdom_model_set_delete(ms);
  gt_node_stream_delete(gff3_out_stream);
  gt_node_stream_delete(ppt_stream);
  gt_node_stream_delete(pbs_stream);
  gt_node_stream_delete(sa_stream);
  gt_node_stream_delete(pdom_stream);
  gt_node_stream_delete(tab_out_stream);
  gt_node_stream_delete(gff3_in_stream);
  gt_bioseq_delete(arguments->trna_lib_bs);
  gt_region_mapping_delete(rmap);

  return had_err;
}
Esempio n. 28
0
static int gt_tir_runner(GT_UNUSED int argc, GT_UNUSED const char **argv,
                         GT_UNUSED int parsed_args, void *tool_arguments,
                         GtError *err)
{
  GtTirArguments *arguments = tool_arguments;
  GtNodeStream *tir_stream = NULL,
               *pdom_stream = NULL,
               *gff3_out_stream = NULL,
               *last_stream = NULL;
  GtPdomModelSet *ms = NULL;
  GtRegionMapping *rmap = NULL;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(arguments);

  tir_stream = gt_tir_stream_new(arguments->str_indexname,
                                 arguments->min_seed_length,
                                 arguments->min_TIR_length,
                                 arguments->max_TIR_length,
                                 arguments->min_TIR_distance,
                                 arguments->max_TIR_distance,
                                 arguments->arbit_scores,
                                 arguments->xdrop_belowscore,
                                 arguments->similarity_threshold,
                                 arguments->best_overlaps,
                                 arguments->no_overlaps,
                                 arguments->min_TSD_length,
                                 arguments->max_TSD_length,
                                 arguments->vicinity,
                                 err);

  if (tir_stream == NULL)
    return -1;
  last_stream = tir_stream;

  rmap = gt_region_mapping_new_encseq((GtEncseq*)
                            gt_tir_stream_get_encseq((GtTIRStream*) tir_stream),
                            true, false);
  gt_assert(rmap);

  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0)
  {
    if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) {
      arguments->cutoff = GT_PHMM_CUTOFF_GA;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) {
      arguments->cutoff = GT_PHMM_CUTOFF_TC;
    } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) {
      arguments->cutoff = GT_PHMM_CUTOFF_NONE;
    } else {
      gt_error_set(err, "invalid cutoff setting!");
      had_err = -1;
    }
  }

  if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) {
    GtNodeVisitor *pdom_v;
    ms = gt_pdom_model_set_new(arguments->hmm_files, err);
    if (ms != NULL) {
      pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->cutoff,
                                             arguments->chain_max_gap_length,
                                             arguments->evalue_cutoff, rmap,
                                             err);
      if (pdom_v == NULL)
        had_err = -1;
      if (!had_err) {
        last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v);
        gt_ltrdigest_pdom_visitor_set_root_type((GtLTRdigestPdomVisitor*)
                                                                        pdom_v,
                                        gt_ft_terminal_inverted_repeat_element);
        gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*)
                                                                        pdom_v,
                                                 "TIRvish");
      }
    } else had_err = -1;
  }

  gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL);
  last_stream = gff3_out_stream;

  /* pull the features through the stream and free them afterwards */
  if (!had_err)
    had_err = gt_node_stream_pull(last_stream, err);

  gt_node_stream_delete(tir_stream);
  gt_node_stream_delete(pdom_stream);
  gt_node_stream_delete(gff3_out_stream);
  gt_region_mapping_delete(rmap);
  gt_pdom_model_set_delete(ms);

  return had_err;
}
Esempio n. 29
0
static int gt_sketch_runner(int argc, const char **argv, int parsed_args,
                              void *tool_arguments, GT_UNUSED GtError *err)
{
  GtSketchArguments *arguments = tool_arguments;
  GtNodeStream *in_stream = NULL,
               *add_introns_stream = NULL,
               *gff3_out_stream = NULL,
               *feature_stream = NULL,
               *sort_stream = NULL,
               *last_stream;
  GtFeatureIndex *features = NULL;
  const char *file;
  char *seqid = NULL;
  GtRange qry_range, sequence_region_range;
  GtArray *results = NULL;
  GtStyle *sty = NULL;
  GtStr *prog, *defaultstylefile = NULL;
  GtDiagram *d = NULL;
  GtLayout *l = NULL;
  GtImageInfo* ii = NULL;
  GtCanvas *canvas = NULL;
  GtUword height;
  bool has_seqid;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(arguments);

  prog = gt_str_new();
  gt_str_append_cstr_nt(prog, argv[0],
                        gt_cstr_length_up_to_char(argv[0], ' '));
  defaultstylefile = gt_get_gtdata_path(gt_str_get(prog), err);
  gt_str_delete(prog);
  if (!defaultstylefile)
    had_err = -1;
  if (!had_err) {
    gt_str_append_cstr(defaultstylefile, "/sketch/default.style");
  }

  file = argv[parsed_args];
  if (!had_err) {
    /* create feature index */
    features = gt_feature_index_memory_new();
    parsed_args++;

    /* create an input stream */
    if (strcmp(gt_str_get(arguments->input), "gff") == 0)
    {
      in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                 argv + parsed_args);
      if (arguments->verbose)
        gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) in_stream);
    } else if (strcmp(gt_str_get(arguments->input), "bed") == 0)
    {
      if (argc - parsed_args == 0)
        in_stream = gt_bed_in_stream_new(NULL);
      else
        in_stream = gt_bed_in_stream_new(argv[parsed_args]);
    } else if (strcmp(gt_str_get(arguments->input), "gtf") == 0)
    {
      if (argc - parsed_args == 0)
        in_stream = gt_gtf_in_stream_new(NULL);
      else
        in_stream = gt_gtf_in_stream_new(argv[parsed_args]);
    }
    last_stream = in_stream;

    /* create add introns stream if -addintrons was used */
    if (arguments->addintrons) {
      sort_stream = gt_sort_stream_new(last_stream);
      add_introns_stream = gt_add_introns_stream_new(sort_stream);
      last_stream = add_introns_stream;
    }

    /* create gff3 output stream if -pipe was used */
    if (arguments->pipe) {
      gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL);
      last_stream = gff3_out_stream;
    }

    /* create feature stream */
    feature_stream = gt_feature_stream_new(last_stream, features);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(feature_stream, err);

    gt_node_stream_delete(feature_stream);
    gt_node_stream_delete(gff3_out_stream);
    gt_node_stream_delete(sort_stream);
    gt_node_stream_delete(add_introns_stream);
    gt_node_stream_delete(in_stream);
  }

  if (!had_err) {
    had_err = gt_feature_index_has_seqid(features,
                                         &has_seqid,
                                         gt_str_get(arguments->seqid),
                                         err);
  }

  /* if seqid is empty, take first one added to index */
  if (!had_err && strcmp(gt_str_get(arguments->seqid),"") == 0) {
    seqid = gt_feature_index_get_first_seqid(features, err);
    if (seqid == NULL) {
      gt_error_set(err, "GFF input file must contain a sequence region!");
      had_err = -1;
    }
  }
  else if (!had_err && !has_seqid) {
    gt_error_set(err, "sequence region '%s' does not exist in GFF input file",
                 gt_str_get(arguments->seqid));
    had_err = -1;
  }
  else if (!had_err)
    seqid = gt_str_get(arguments->seqid);

  results = gt_array_new(sizeof (GtGenomeNode*));
  if (!had_err) {
    had_err = gt_feature_index_get_range_for_seqid(features,
                                                   &sequence_region_range,
                                                   seqid,
                                                   err);
  }
  if (!had_err) {
    qry_range.start = (arguments->start == GT_UNDEF_UWORD ?
                         sequence_region_range.start :
                         arguments->start);
    qry_range.end   = (arguments->end == GT_UNDEF_UWORD ?
                         sequence_region_range.end :
                         arguments->end);
  }

  if (!had_err) {
    if (arguments->verbose)
      fprintf(stderr, "# of results: "GT_WU"\n", gt_array_size(results));

    /* find and load style file */
    if (!(sty = gt_style_new(err)))
      had_err = -1;
    if (gt_str_length(arguments->stylefile) == 0) {
      gt_str_append_str(arguments->stylefile, defaultstylefile);
    } else {
      if (!had_err && gt_file_exists(gt_str_get(arguments->stylefile))) {
        if (arguments->unsafe)
          gt_style_unsafe_mode(sty);
      }
      else
      {
        had_err = -1;
        gt_error_set(err, "style file '%s' does not exist!",
                          gt_str_get(arguments->stylefile));
      }
    }
    if (!had_err)
      had_err = gt_style_load_file(sty, gt_str_get(arguments->stylefile), err);
  }

  if (!had_err) {
    /* create and write image file */
    if (!(d = gt_diagram_new(features, seqid, &qry_range, sty, err)))
      had_err = -1;
    if (!had_err && arguments->flattenfiles)
      gt_diagram_set_track_selector_func(d, flattened_file_track_selector,
                                         NULL);
    if (had_err || !(l = gt_layout_new(d, arguments->width, sty, err)))
      had_err = -1;
    if (!had_err)
      had_err = gt_layout_get_height(l, &height, err);
    if (!had_err) {
      ii = gt_image_info_new();

      if (strcmp(gt_str_get(arguments->format),"pdf")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PDF,
                                          arguments->width,
                                          height, ii, err);
      }
      else if (strcmp(gt_str_get(arguments->format),"ps")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PS,
                                          arguments->width,
                                          height, ii, err);
      }
      else if (strcmp(gt_str_get(arguments->format),"svg")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_SVG,
                                          arguments->width,
                                          height, ii, err);
      }
      else {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PNG,
                                          arguments->width,
                                          height, ii, err);
      }
      if (!canvas)
        had_err = -1;
      if (!had_err) {
        had_err = gt_layout_sketch(l, canvas, err);
      }
      if (!had_err) {
        if (arguments->showrecmaps) {
          GtUword i;
          const GtRecMap *rm;
          for (i = 0; i < gt_image_info_num_of_rec_maps(ii) ;i++) {
            char buf[BUFSIZ];
            rm = gt_image_info_get_rec_map(ii, i);
            (void) gt_rec_map_format_html_imagemap_coords(rm, buf, BUFSIZ);
            printf("%s, %s\n",
                   buf,
                   gt_feature_node_get_type(gt_rec_map_get_genome_feature(rm)));
          }
        }
        if (arguments->use_streams) {
          GtFile *outfile;
          GtStr *str = gt_str_new();
          gt_canvas_cairo_file_to_stream((GtCanvasCairoFile*) canvas, str);
          outfile = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, file, "w+", err);
          if (outfile) {
            gt_file_xwrite(outfile, gt_str_get_mem(str), gt_str_length(str));
            gt_file_delete(outfile);
          } else {
            had_err = -1;
          }
          gt_str_delete(str);
        } else {
          had_err = gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas,
                                                 file,
                                                 err);
        }
      }
    }
  }

  /* free */
  gt_free(seqid);
  gt_canvas_delete(canvas);
  gt_layout_delete(l);
  gt_image_info_delete(ii);
  gt_style_delete(sty);
  gt_diagram_delete(d);
  gt_array_delete(results);
  gt_str_delete(defaultstylefile);
  gt_feature_index_delete(features);

  return had_err;
}
Esempio n. 30
0
// Main method
int main(int argc, char * const *argv)
{
  GtError *error;
  GtLogger *logger;
  GtQueue *streams;
  GtNodeStream *stream, *last_stream;
  CanonGFF3Options options = { NULL, NULL, false };

  gt_lib_init();
  error = gt_error_new();
  canon_gff3_parse_options(argc, argv + 0, &options, error);

  streams = gt_queue_new();
  logger = gt_logger_new(true, "", stderr);

  stream = gt_gff3_in_stream_new_unsorted(argc - optind, (const char **)
                                                          argv+optind);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)stream);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)stream);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(options.infer)
  {
    GtHashmap *type_parents = gt_hashmap_new(GT_HASH_STRING, gt_free_func,
                                             gt_free_func);
    gt_hashmap_add(type_parents, gt_cstr_dup("mRNA"), gt_cstr_dup("gene"));
    gt_hashmap_add(type_parents, gt_cstr_dup("tRNA"), gt_cstr_dup("gene"));
    stream = agn_infer_parent_stream_new(last_stream,
                                                 type_parents);
    gt_hashmap_delete(type_parents);
    gt_queue_add(streams, stream);
    last_stream = stream;
  }

  stream = agn_gene_stream_new(last_stream, logger);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(options.source != NULL)
  {
    GtNodeVisitor *ssv = gt_set_source_visitor_new(options.source);
    stream = gt_visitor_stream_new(last_stream, ssv);
    gt_queue_add(streams, stream);
    last_stream = stream;
  }

  stream = gt_gff3_out_stream_new(last_stream, options.outstream);
  if(!options.infer)
    gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream *)stream);
  gt_queue_add(streams, stream);
  last_stream = stream;

  if(gt_node_stream_pull(last_stream, error) == -1)
  {
    fprintf(stderr, "[CanonGFF3] error processing node stream: %s",
            gt_error_get(error));
  }

  while(gt_queue_size(streams) > 0)
  {
    stream = gt_queue_get(streams);
    gt_node_stream_delete(stream);
  }
  gt_queue_delete(streams);
  if(options.source != NULL)
    gt_str_delete(options.source);
  if(options.outstream != NULL)
    gt_file_delete(options.outstream);
  gt_error_delete(error);
  gt_logger_delete(logger);
  gt_lib_clean();

  return 0;
}