GtFeatureIndex *agn_import_simple(int numfiles, const char **filenames, char *type, AgnLogger *logger) { GtFeatureIndex *features = gt_feature_index_memory_new(); GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3); GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, type, type); GtNodeStream *filterstream = agn_filter_stream_new(gff3, typestokeep); GtNodeStream *featstream = gt_feature_out_stream_new(filterstream, features); GtError *error = gt_error_new(); int result = gt_node_stream_pull(featstream, error); if(result == -1) { agn_logger_log_error(logger, "error processing node stream: %s", gt_error_get(error)); } gt_error_delete(error); if(agn_logger_has_error(logger)) { gt_feature_index_delete(features); features = NULL; } gt_node_stream_delete(gff3); gt_node_stream_delete(filterstream); gt_node_stream_delete(featstream); return features; }
int gt_gff3_to_gtf(int argc, const char **argv, GtError *err) { GtNodeStream *gff3_in_stream = NULL, *gtf_out_stream = NULL; int parsed_args, had_err = 0; gt_error_check(err); /* option parsing */ switch (parse_options(&parsed_args, argc, argv, err)) { case OPTIONPARSER_OK: break; case OPTIONPARSER_ERROR: return -1; case OPTIONPARSER_REQUESTS_EXIT: return 0; } /* create a gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); if (!gff3_in_stream) had_err = -1; if (!had_err) { /* create a gtf output stream */ gtf_out_stream = gt_gtf_out_stream_new(gff3_in_stream, NULL); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gtf_out_stream, err); } /* free */ gt_node_stream_delete(gff3_in_stream); gt_node_stream_delete(gtf_out_stream); return had_err; }
GtFeatureIndex *agn_import_canonical(int numfiles, const char **filenames, AgnLogger *logger) { GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3); GtFeatureIndex *features = gt_feature_index_memory_new(); GtNodeStream *cgstream = agn_canon_gene_stream_new(gff3, logger); GtNodeStream *featstream = gt_feature_out_stream_new(cgstream, features); GtError *error = gt_error_new(); int result = gt_node_stream_pull(featstream, error); if(result == -1) { agn_logger_log_error(logger, "error processing node stream: %s", gt_error_get(error)); } gt_error_delete(error); if(agn_logger_has_error(logger)) { gt_feature_index_delete(features); features = NULL; } gt_node_stream_delete(gff3); gt_node_stream_delete(cgstream); gt_node_stream_delete(featstream); return features; }
static int gt_seqids_runner(GT_UNUSED int argc, const char **argv, int parsed_args, GT_UNUSED void *tool_arguments, GtError *err) { GtNodeStream *in_stream, *v_stream; GtCstrTable *cst; int had_err = 0; gt_error_check(err); cst = gt_cstr_table_new(); in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); v_stream = gt_visitor_stream_new(in_stream, gt_collect_ids_visitor_new(cst)); had_err = gt_node_stream_pull(v_stream, err); if (!had_err) { GtStrArray *seqids; GtUword i; seqids = gt_cstr_table_get_all(cst); for (i = 0; i < gt_str_array_size(seqids); i++) { printf("%s\n", gt_str_array_get(seqids, i)); } gt_str_array_delete(seqids); } gt_node_stream_delete(v_stream); gt_node_stream_delete(in_stream); gt_cstr_table_delete(cst); return had_err; }
static int gt_bed_to_gff3_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *bed_in_stream = NULL, *gff3_out_stream = NULL; BEDToGFF3Arguments *arguments = tool_arguments; int had_err; gt_error_check(err); /* create a BED input stream */ bed_in_stream = gt_bed_in_stream_new(argv[parsed_args]); gt_bed_in_stream_set_feature_type((GtBEDInStream*) bed_in_stream, gt_str_get(arguments->feature_type)); gt_bed_in_stream_set_thick_feature_type((GtBEDInStream*) bed_in_stream, gt_str_get(arguments ->thick_feature_type)); gt_bed_in_stream_set_block_type((GtBEDInStream*) bed_in_stream, gt_str_get(arguments->block_type)); /* create a GFF3 output stream */ /* XXX: use proper genfile */ gff3_out_stream = gt_gff3_out_stream_new(bed_in_stream, NULL); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gff3_out_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(bed_in_stream); return had_err; }
static void infer_cds_visitor_test_data(GtQueue *queue) { GtError *error = gt_error_new(); const char *file = "data/gff3/grape-codons.gff3"; GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &file); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3in); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3in); GtLogger *logger = gt_logger_new(true, "", stderr); GtNodeStream *icv_stream = agn_infer_cds_stream_new(gff3in, NULL, logger); GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtNodeStream *arraystream = gt_array_out_stream_new(icv_stream, feats, error); int pullresult = gt_node_stream_pull(arraystream, error); if(pullresult == -1) { fprintf(stderr, "[AgnInferCDSVisitor::infer_cds_visitor_test_data] error " "processing features: %s\n", gt_error_get(error)); } gt_node_stream_delete(gff3in); gt_node_stream_delete(icv_stream); gt_node_stream_delete(arraystream); gt_logger_delete(logger); gt_array_sort(feats, (GtCompare)agn_genome_node_compare); gt_array_reverse(feats); while(gt_array_size(feats) > 0) { GtFeatureNode *fn = *(GtFeatureNode **)gt_array_pop(feats); gt_queue_add(queue, fn); } gt_array_delete(feats); gt_error_delete(error); }
static int gt_mergefeat_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { InterFeatArguments *arguments = tool_arguments; GtNodeStream *gff3_in_stream, *merge_feature_stream, *gff3_out_stream; int had_err; gt_error_check(err); gt_assert(arguments); /* create a gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); /* create merge feature stream */ merge_feature_stream = gt_merge_feature_stream_new(gff3_in_stream); /* create gff3 output stream */ gff3_out_stream = gt_gff3_out_stream_new(merge_feature_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gff3_out_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(merge_feature_stream); gt_node_stream_delete(gff3_in_stream); return had_err; }
static int gt_csa_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream, *csa_stream, *gff3_out_stream; CSAArguments *arguments = tool_arguments; int had_err; gt_error_check(err); gt_assert(arguments); /* create the streams */ gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments->verbose && arguments->outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); csa_stream = gt_csa_stream_new(gff3_in_stream, arguments->join_length); gff3_out_stream = gt_gff3_out_stream_new(csa_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gff3_out_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(csa_stream); gt_node_stream_delete(gff3_in_stream); return had_err; }
static int gt_splicesiteinfo_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SpliceSiteInfoArguments *arguments = tool_arguments; GtNodeStream *gff3_in_stream = NULL, *add_introns_stream = NULL, *splice_site_info_stream = NULL; GtRegionMapping *region_mapping; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (!had_err) { /* create gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); /* create region mapping */ region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!region_mapping) had_err = -1; } if (!had_err) { /* create addintrons stream (if necessary) */ if (arguments->addintrons) add_introns_stream = gt_add_introns_stream_new(gff3_in_stream); /* create extract feature stream */ splice_site_info_stream = gt_splice_site_info_stream_new( arguments->addintrons ? add_introns_stream : gff3_in_stream, region_mapping); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(splice_site_info_stream, err); } if (!had_err) { if (!gt_splice_site_info_stream_show(splice_site_info_stream, arguments->outfp)) { gt_warning("input file(s) contained no intron, use option -addintrons to " "add introns automatically"); } } /* free */ gt_node_stream_delete(splice_site_info_stream); gt_node_stream_delete(add_introns_stream); gt_node_stream_delete(gff3_in_stream); return had_err; }
static void gv_test_calc_integrity(AgnUnitTest *test) { const char *filename = "data/gff3/gaeval-stream-unit-test-2.gff3"; GtNodeStream *align_in = gt_gff3_in_stream_new_unsorted(1, &filename); AgnGaevalParams params = { 0.6, 0.3, 0.05, 0.05, 400, 200, 100 }; GtNodeVisitor *nv = agn_gaeval_visitor_new(align_in, params); AgnGaevalVisitor *gv = gaeval_visitor_cast(nv); gt_node_stream_delete(align_in); GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename); GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, "mRNA", "mRNA"); GtNodeStream *filtstream = agn_filter_stream_new(gff3in, typestokeep); GtLogger *logger = gt_logger_new(true, "", stderr); GtNodeStream *ics = agn_infer_cds_stream_new(filtstream, NULL, logger); GtNodeStream *ies = agn_infer_exons_stream_new(ics, NULL, logger); GtError *error = gt_error_new(); GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtNodeStream *featstream = gt_array_out_stream_new(ies, feats, error); int result = gt_node_stream_pull(featstream, error); if(result == -1) { fprintf(stderr, "[AgnGaevalVisitor::gv_test_calc_integrity] error " "processing GFF3: %s\n", gt_error_get(error)); return; } gt_node_stream_delete(gff3in); gt_node_stream_delete(filtstream); gt_node_stream_delete(featstream); gt_node_stream_delete(ics); gt_node_stream_delete(ies); gt_logger_delete(logger); gt_hashmap_delete(typestokeep); agn_assert(gt_array_size(feats) == 2); GtFeatureNode *g1 = *(GtFeatureNode **)gt_array_get(feats, 0); GtFeatureNode *g2 = *(GtFeatureNode **)gt_array_get(feats, 1); double cov1 = gaeval_visitor_calculate_coverage(gv, g1, error); double cov2 = gaeval_visitor_calculate_coverage(gv, g2, error); double int1 = gaeval_visitor_calculate_integrity(gv, g1, cov1, NULL, error); double int2 = gaeval_visitor_calculate_integrity(gv, g2, cov2, NULL, error); bool test1 = fabs(cov1 - 1.000) < 0.001 && fabs(cov2 - 0.997) < 0.001 && fabs(int1 - 0.850) < 0.001 && fabs(int2 - 0.863) < 0.001; agn_unit_test_result(test, "calculate integrity", test1); gt_error_delete(error); gt_array_delete(feats); gt_genome_node_delete((GtGenomeNode *)g1); gt_genome_node_delete((GtGenomeNode *)g2); gt_node_visitor_delete(nv); }
static int gt_inlineseq_split_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtInlineseqSplitArguments *arguments = tool_arguments; GtNodeStream *gff3_in_stream = NULL, *gff3_out_stream = NULL, *split_stream = NULL, *last_stream = NULL; GtFile *seq_out_file = NULL, *gff3_out_file = NULL; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (gt_str_length(arguments->seqoutfile) > 0) { seq_out_file = gt_file_new(gt_str_get(arguments->seqoutfile), "w+", err); if (!seq_out_file) had_err = -1; } if (!had_err && gt_str_length(arguments->gffoutfile) > 0) { gff3_out_file = gt_file_new(gt_str_get(arguments->gffoutfile), "w+", err); if (!gff3_out_file) had_err = -1; } if (!had_err) { last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted( argc - parsed_args, argv + parsed_args); gt_assert(gff3_in_stream); } if (!had_err) { last_stream = split_stream = gt_sequence_node_out_stream_new(last_stream, seq_out_file, err); gt_assert(split_stream); } if (!had_err) { last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream, gff3_out_file); had_err = gt_node_stream_pull(last_stream, err); } gt_node_stream_delete(gff3_in_stream); gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(split_stream); gt_file_delete(seq_out_file); gt_file_delete(gff3_out_file); return had_err; }
static int gt_gff3validator_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GFF3ValidatorArguments *arguments = tool_arguments; GtTypeChecker *type_checker = NULL; GtXRFChecker *xrf_checker = NULL; GtNodeStream *gff3_in_stream; int had_err = 0; gt_error_check(err); gt_assert(arguments); /* create a GFF3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream*) gff3_in_stream); /* set different type checker if necessary */ if (gt_typecheck_info_option_used(arguments->tci)) { type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err); if (!type_checker) had_err = -1; if (!had_err) gt_gff3_in_stream_set_type_checker(gff3_in_stream, type_checker); } /* set XRF checker */ if (gt_xrfcheck_info_option_used(arguments->xci)) { xrf_checker = gt_xrfcheck_info_create_xrf_checker(arguments->xci, err); if (!xrf_checker) had_err = -1; if (!had_err) gt_gff3_in_stream_set_xrf_checker(gff3_in_stream, xrf_checker); } /* enable strict mode (if necessary) */ if (!had_err && arguments->strict) gt_gff3_in_stream_enable_strict_mode((GtGFF3InStream*) gff3_in_stream); /* pull the features through the stream and free them afterwards */ if (!had_err) had_err = gt_node_stream_pull(gff3_in_stream, err); if (!had_err) printf("input is valid GFF3\n"); /* free */ gt_node_stream_delete(gff3_in_stream); gt_type_checker_delete(type_checker); gt_xrf_checker_delete(xrf_checker); return had_err; }
static int gt_tir_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtTirArguments *arguments = tool_arguments; GtNodeStream *tir_stream = NULL, *gff3_out_stream = NULL, *last_stream = NULL; int had_err = 0; gt_error_check(err); gt_assert(arguments); tir_stream = gt_tir_stream_new(arguments->str_indexname, arguments->min_seed_length, arguments->min_TIR_length, arguments->max_TIR_length, arguments->min_TIR_distance, arguments->max_TIR_distance, arguments->arbit_scores, arguments->xdrop_belowscore, arguments->similarity_threshold, arguments->best_overlaps, arguments->no_overlaps, arguments->min_TSD_length, arguments->max_TSD_length, arguments->vicinity, err); if (tir_stream == NULL) return -1; last_stream = tir_stream; /* gff3 outstream */ gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL); last_stream = gff3_out_stream; /* output arguments line */ /* gt_tir_showargsline(argc, argv); */ /* pull the features through the stream and free them afterwards */ if (!had_err) had_err = gt_node_stream_pull(last_stream, err); /* free */ gt_node_stream_delete(tir_stream); gt_node_stream_delete(gff3_out_stream); return had_err; }
int gt_chseqids(int argc, const char **argv, GtError *err) { GtNodeStream *gff3_in_stream, *chseqids_stream, *sort_stream = NULL, *gff3_out_stream = NULL; ChseqidsArguments arguments; GtStr *chseqids; int parsed_args, had_err = 0; gt_error_check(err); /* option parsing */ switch (parse_options(&parsed_args, &arguments, argc, argv, err)) { case GT_OPTION_PARSER_OK: break; case GT_OPTION_PARSER_ERROR: return -1; case GT_OPTION_PARSER_REQUESTS_EXIT: return 0; } /* create the streams */ gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args + 1]); if (arguments.verbose && arguments.outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); chseqids = gt_str_new_cstr(argv[parsed_args]); chseqids_stream = gt_chseqids_stream_new(gff3_in_stream, chseqids, err); if (!chseqids_stream) had_err = -1; gt_str_delete(chseqids); if (!had_err) { if (arguments.sort) { sort_stream = gt_sort_stream_new(chseqids_stream); gff3_out_stream = gt_gff3_out_stream_new(sort_stream, arguments.outfp); } else { gff3_out_stream = gt_gff3_out_stream_new(chseqids_stream, arguments.outfp); } } /* pull the features through the stream and free them afterwards */ if (!had_err) had_err = gt_node_stream_pull(gff3_out_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(chseqids_stream); gt_node_stream_delete(sort_stream); gt_node_stream_delete(gff3_in_stream); gt_file_delete(arguments.outfp); return had_err; }
static int gt_inlineseq_add_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream = NULL, *add_stream = NULL, *gff3_out_stream = NULL, *last_stream = NULL; GtRegionMapping *rm = NULL; InlineseqAddArguments *arguments = tool_arguments; int had_err = 0; gt_error_check(err); /* add region mapping if given */ if (gt_seqid2file_option_used(arguments->s2fi)) { rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!rm) had_err = -1; } if (!had_err) { last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted( argc - parsed_args, argv + parsed_args); gt_assert(gff3_in_stream); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream); last_stream = add_stream = gt_sequence_node_add_stream_new(last_stream, rm, err); if (!add_stream) { had_err = -1; } } if (!had_err) { last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp); } if (!had_err) had_err = gt_node_stream_pull(last_stream, err); /* free */ gt_node_stream_delete(gff3_in_stream); gt_node_stream_delete(add_stream); gt_node_stream_delete(gff3_out_stream); gt_region_mapping_delete(rm); return had_err; }
static int gt_extractfeat_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream = NULL, *extract_feature_stream = NULL; GtExtractFeatArguments *arguments = tool_arguments; GtRegionMapping *region_mapping; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (!had_err) { /* create gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments->verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); /* create region mapping */ region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!region_mapping) had_err = -1; } if (!had_err) { /* create extract feature stream */ extract_feature_stream = gt_extract_feature_stream_new(gff3_in_stream, region_mapping, gt_str_get(arguments->type), arguments->join, arguments->translate, arguments->seqid, arguments->target, arguments->width, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(extract_feature_stream, err); } /* free */ gt_node_stream_delete(extract_feature_stream); gt_node_stream_delete(gff3_in_stream); return had_err; }
static int gt_cds_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream, *cds_stream = NULL, *gff3_out_stream = NULL; CDSArguments *arguments = tool_arguments; GtRegionMapping *region_mapping; int had_err = 0; gt_error_check(err); gt_assert(arguments); /* create gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]); if (arguments->verbose && arguments->outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); /* create region mapping */ region_mapping = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!region_mapping) had_err = -1; if (!had_err) { /* create CDS stream */ cds_stream = gt_cds_stream_new(gff3_in_stream, region_mapping, arguments->minorflen, GT_CDS_SOURCE_TAG, arguments->start_codon, arguments->final_stop_codon, arguments->generic_start_codons); /* create gff3 output stream */ gff3_out_stream = gt_gff3_out_stream_new(cds_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gff3_out_stream, err); } /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(cds_stream); gt_node_stream_delete(gff3_in_stream); return had_err; }
static void gv_test_intersect(AgnUnitTest *test) { GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) ); GtError *error = gt_error_new(); const char *filename = "data/gff3/gaeval-stream-unit-test-1.gff3"; GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename); GtNodeStream *fstream = gt_array_out_stream_new(gff3in, feats, error); int result = gt_node_stream_pull(fstream, error); if(result == -1) { fprintf(stderr, "[AgnGaevalVisitor::gv_test_intersect] error " "processing GFF3: %s\n", gt_error_get(error)); return; } gt_error_delete(error); gt_node_stream_delete(gff3in); gt_node_stream_delete(fstream); agn_assert(gt_array_size(feats) == 9); GtGenomeNode *g1 = *(GtGenomeNode **)gt_array_get(feats, 1); GtGenomeNode *g2 = *(GtGenomeNode **)gt_array_get(feats, 3); GtGenomeNode *g3 = *(GtGenomeNode **)gt_array_get(feats, 7); GtGenomeNode *est1 = *(GtGenomeNode **)gt_array_get(feats, 0); GtGenomeNode *est2 = *(GtGenomeNode **)gt_array_get(feats, 2); GtGenomeNode *est3 = *(GtGenomeNode **)gt_array_get(feats, 4); GtGenomeNode *est4 = *(GtGenomeNode **)gt_array_get(feats, 5); GtGenomeNode *est5 = *(GtGenomeNode **)gt_array_get(feats, 6); GtGenomeNode *est6 = *(GtGenomeNode **)gt_array_get(feats, 8); GtArray *cov = gaeval_visitor_intersect(g1, est1); bool test1 = cov == NULL; cov = gaeval_visitor_intersect(g1, est2); test1 = gt_array_size(cov) == 1; if(test1) { GtRange *range01 = gt_array_pop(cov); GtRange testrange = { 400, 500 }; test1 = gt_range_compare(range01, &testrange) == 0; } agn_unit_test_result(test, "intersect (1)", test1); gt_array_delete(cov); cov = gaeval_visitor_intersect(g2, est3); bool test2 = gt_array_size(cov) == 2; if(test2) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 800, 900 }; GtRange testrange2 = { 1050, 1075 }; test2 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (2)", test2); gt_array_delete(cov); cov = gaeval_visitor_intersect(g2, est4); bool test3 = gt_array_size(cov) == 2; if(test3) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 1070, 1125 }; GtRange testrange2 = { 1250, 1310 }; test3 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (3)", test3); gt_array_delete(cov); cov = gaeval_visitor_intersect(g3, est5); bool test4 = gt_array_size(cov) == 2; if(test4) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 2000, 3000 }; GtRange testrange2 = { 4000, 5000 }; test4 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (4)", test4); gt_array_delete(cov); cov = gaeval_visitor_intersect(g3, est6); bool test5 = gt_array_size(cov) == 2; if(test5) { GtRange *range01 = gt_array_get(cov, 0); GtRange *range02 = gt_array_get(cov, 1); GtRange testrange1 = { 2500, 3000 }; GtRange testrange2 = { 4000, 5000 }; test5 = gt_range_compare(range01, &testrange1) == 0 && gt_range_compare(range02, &testrange2) == 0; } agn_unit_test_result(test, "intersect (5)", test5); gt_array_delete(cov); gt_array_delete(feats); gt_genome_node_delete(g1); gt_genome_node_delete(g2); gt_genome_node_delete(g3); gt_genome_node_delete(est1); gt_genome_node_delete(est2); gt_genome_node_delete(est3); gt_genome_node_delete(est4); gt_genome_node_delete(est5); gt_genome_node_delete(est6); }
static int gt_speck_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL, *a_in_stream = NULL, *a_out_stream = NULL, *feature_stream = NULL, *sort_stream = NULL, *last_stream = NULL; GtNodeVisitor *spec_visitor = NULL; GtSpecResults *res = NULL; GtFeatureIndex *fi = NULL; GtTimer *t = NULL; GtRegionMapping *rm = NULL; GtArray *arr = gt_array_new(sizeof (GtFeatureNode*)); SpeccheckArguments *arguments = tool_arguments; int had_err = 0; gt_error_check(err); res = gt_spec_results_new(); gt_assert(res); t = gt_timer_new(); gt_assert(t); spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res, err); if (!spec_visitor) return -1; /* add region mapping if given */ if (gt_seqid2file_option_used(arguments->s2fi)) { rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!rm) had_err = -1; if (!had_err) gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm); } /* set runtime error behaviour */ if (arguments->fail_hard) gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor); else gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor); /* redirect warnings */ gt_warning_set_handler(gt_speck_record_warning, res); last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted( argc - parsed_args, argv + parsed_args); gt_assert(gff3_in_stream); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream); /* insert sort stream if requested */ if (arguments->sort) { last_stream = sort_stream = gt_sort_stream_new(last_stream); } /* if -provideindex is given, collect input features and index them first */ if (arguments->provideindex) { fi = gt_feature_index_memory_new(); gt_assert(fi); last_stream = feature_stream = gt_feature_stream_new(last_stream, fi); gt_assert(feature_stream); last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr, err); if (!a_out_stream) had_err = -1; gt_timer_start(t); if (!had_err) had_err = gt_node_stream_pull(last_stream, err); if (!had_err) { gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor, gt_feature_index_ref(fi)); last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err); if (!a_in_stream) had_err = -1; } } else { gt_timer_start(t); } if (!had_err) { checker_stream = gt_visitor_stream_new(last_stream, spec_visitor); gt_assert(checker_stream); } /* perform checking */ if (!had_err) had_err = gt_node_stream_pull(checker_stream, err); gt_timer_stop(t); /* reset warnings output */ gt_warning_set_handler(gt_warning_default_handler, NULL); /* output results */ if (!had_err) gt_spec_results_report(res, arguments->outfp, gt_str_get(arguments->specfile), arguments->verbose, arguments->colored, !arguments->allexpects); if (!had_err) gt_timer_show_formatted(t, "Finished in " GT_WD ".%06ld s.\n", stderr); /* free */ gt_node_stream_delete(gff3_in_stream); gt_node_stream_delete(a_in_stream); gt_node_stream_delete(a_out_stream); gt_node_stream_delete(checker_stream); gt_node_stream_delete(feature_stream); gt_node_stream_delete(sort_stream); gt_spec_results_delete(res); gt_feature_index_delete(fi); gt_timer_delete(t); gt_array_delete(arr); return had_err; }
GtNodeVisitor* agn_gaeval_visitor_new(GtNodeStream *astream, AgnGaevalParams gparams) { agn_assert(astream); // Create the node visitor GtNodeVisitor *nv = gt_node_visitor_create(gaeval_visitor_class()); AgnGaevalVisitor *v = gaeval_visitor_cast(nv); v->alignments = gt_feature_index_memory_new(); v->tsvout = NULL; v->params = gparams; // Check that sum of weights is 1.0 double weights_total = gparams.alpha + gparams.beta + gparams.gamma + gparams.epsilon; if(fabs(weights_total - 1.0) > 0.0001) { fprintf(stderr, "[AgnGaevalVisitor::agn_gaeval_visitor_new] warning: " "sum of weights is not 1.0 %.3lf; integrity calculations will be " "incorrect\n", weights_total); } // Set up node stream to load alignment features into memory GtQueue *streams = gt_queue_new(); GtNodeStream *stream, *last_stream; GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, "cDNA_match", "cDNA_match"); gt_hashmap_add(typestokeep, "EST_match", "EST_match"); gt_hashmap_add(typestokeep, "nucleotide_match", "nucleotide_match"); stream = agn_filter_stream_new(astream, typestokeep); gt_queue_add(streams, stream); last_stream = stream; stream = gt_feature_out_stream_new(last_stream, v->alignments); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "cDNA_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "EST_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "nucleotide_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; // Process the node stream GtError *error = gt_error_new(); int result = gt_node_stream_pull(last_stream, error); if(result == -1) { fprintf(stderr, "[AEGeAn::AgnGaevalStream] error parsing alignments: %s\n", gt_error_get(error)); gt_node_visitor_delete(nv); return NULL; } gt_error_delete(error); gt_hashmap_delete(typestokeep); while(gt_queue_size(streams) > 0) { stream = gt_queue_get(streams); gt_node_stream_delete(stream); } gt_queue_delete(streams); return nv; }
static int gt_select_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { SelectArguments *arguments = tool_arguments; GtNodeStream *gff3_in_stream, *select_stream, *targetbest_select_stream = NULL, *gff3_out_stream; int had_err; GtFile *drop_file = NULL; GtNodeVisitor *gff3outvis = NULL; gt_error_check(err); gt_assert(arguments); /* create a gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); if (arguments->verbose && arguments->outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); /* create a filter stream */ select_stream = gt_select_stream_new(gff3_in_stream, arguments->seqid, arguments->source, &arguments->contain_range, &arguments->overlap_range, arguments->strand, arguments->targetstrand, arguments->has_CDS, arguments->max_gene_length, arguments->max_gene_num, arguments->min_gene_score, arguments->max_gene_score, arguments->min_average_splice_site_prob, arguments->feature_num, arguments->filter_files, arguments->filter_logic, err); if (select_stream) { GtSelectStream *fs = (GtSelectStream*) select_stream; if (gt_str_length(arguments->dropped_file) > 0) { drop_file = gt_file_new(gt_str_get(arguments->dropped_file), "w", err); gff3outvis = gt_gff3_visitor_new(drop_file); gt_select_stream_set_drophandler(fs, print_to_file_drophandler, (void*) gff3outvis); } else { gt_select_stream_set_drophandler(fs, default_drophandler, NULL); } gt_select_stream_set_single_intron_factor(select_stream, arguments->single_intron_factor); if (arguments->targetbest) targetbest_select_stream = gt_targetbest_select_stream_new(select_stream); /* create a gff3 output stream */ gff3_out_stream = gt_gff3_out_stream_new(arguments->targetbest ? targetbest_select_stream : select_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(gff3_out_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(select_stream); gt_node_stream_delete(targetbest_select_stream); } else { had_err = -1; } gt_file_delete(drop_file); gt_node_visitor_delete(gff3outvis); gt_node_stream_delete(gff3_in_stream); return had_err; }
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtLTRdigestOptions *arguments = tool_arguments; GtNodeStream *gff3_in_stream = NULL, *gff3_out_stream = NULL, *ltrdigest_stream = NULL, *tab_out_stream = NULL, *last_stream = NULL; int had_err = 0, tests_to_run = 0, arg = parsed_args; const char *indexname = argv[arg+1]; GtLogger *logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); GtEncseqLoader *el; GtEncseq *encseq; gt_error_check(err); gt_assert(arguments); /* Set sequence encoder options. Defaults are ok. */ el = gt_encseq_loader_new(); gt_encseq_loader_set_logger(el, logger); /* Open sequence file */ encseq = gt_encseq_loader_load(el, indexname, err); if (!encseq) had_err = -1; /* Always search for PPT. */ tests_to_run |= GT_LTRDIGEST_RUN_PPT; /* Open tRNA library if given. */ if (!had_err && arguments->trna_lib && gt_str_length(arguments->trna_lib) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PBS; arguments->pbs_opts.trna_lib = gt_bioseq_new(gt_str_get(arguments->trna_lib), err); if (gt_error_is_set(err)) had_err = -1; } #ifdef HAVE_HMMER /* Open HMMER files if given. */ if (!had_err && gt_str_array_size(arguments->pdom_opts.hmm_files) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PDOM; if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) { arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_GA; } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) { arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_TC; } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) { arguments->pdom_opts.cutoff = GT_PHMM_CUTOFF_NONE; } else { gt_error_set(err, "invalid cutoff setting!"); had_err = -1; } } #endif if (!had_err) { /* set up stream flow * ------------------*/ last_stream = gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[arg]); last_stream = ltrdigest_stream = gt_ltrdigest_stream_new(last_stream, tests_to_run, encseq, &arguments->pbs_opts, &arguments->ppt_opts, #ifdef HAVE_HMMER &arguments->pdom_opts, #endif err); if (!ltrdigest_stream) had_err = -1; } if (!had_err) { /* attach tabular output stream, if requested */ if (gt_str_length(arguments->prefix) > 0) { last_stream = tab_out_stream = gt_ltr_fileout_stream_new(last_stream, tests_to_run, encseq, gt_str_get(arguments->prefix), &arguments->ppt_opts, &arguments->pbs_opts, #ifdef HAVE_HMMER &arguments->pdom_opts, #endif gt_str_get(arguments->trna_lib), argv[arg+1], argv[arg], arguments->seqnamelen, err); #ifdef HAVE_HMMER if (&arguments->pdom_opts.write_alignments) gt_ltr_fileout_stream_enable_pdom_alignment_output(tab_out_stream); if (&arguments->pdom_opts.write_aaseqs) gt_ltr_fileout_stream_enable_aa_sequence_output(tab_out_stream); #endif } last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(last_stream, err); } gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(ltrdigest_stream); if (tab_out_stream != NULL) gt_node_stream_delete(tab_out_stream); gt_node_stream_delete(gff3_in_stream); gt_encseq_loader_delete(el); gt_encseq_delete(encseq); encseq = NULL; gt_bioseq_delete(arguments->pbs_opts.trna_lib); gt_logger_delete(logger); return had_err; }
int gt_feature_in_stream_unit_test(GtError *error) { GtNodeStream *src, *dest; GtFeatureIndex *prefeat, *postfeat; GtRange range1, range1test, range2, range2test; prefeat = in_stream_test_data(error); postfeat = gt_feature_index_memory_new(); src = gt_feature_in_stream_new(prefeat); dest = gt_feature_out_stream_new(src, postfeat); int result = gt_node_stream_pull(dest, error); if (result == -1) return -1; GtStrArray *seqids = gt_feature_index_get_seqids(postfeat, error); if (gt_str_array_size(seqids) != 2) { gt_error_set(error, "error in feature_in_stream unit test 1: expected 2 " "seqids, found "GT_WU"", gt_str_array_size(seqids)); return -1; } gt_str_array_delete(seqids); range1test.start = 500; range1test.end = 75000; range2test.start = 4000; range2test.end = 9500; gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_delete(prefeat); gt_feature_index_delete(postfeat); gt_node_stream_delete(src); gt_node_stream_delete(dest); prefeat = in_stream_test_data(error); postfeat = gt_feature_index_memory_new(); src = gt_feature_in_stream_new(prefeat); dest = gt_feature_out_stream_new(src, postfeat); gt_feature_in_stream_use_orig_ranges((GtFeatureInStream *)src); result = gt_node_stream_pull(dest, error); if (result == -1) return -1; range1test.start = 500; range1test.end = 75000; range2test.start = 4000; range2test.end = 9500; gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } range1test.start = 1; range1test.end = 100000; range2test.start = 1; range2test.end = 10000; gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_delete(prefeat); gt_feature_index_delete(postfeat); gt_node_stream_delete(src); gt_node_stream_delete(dest); return 0; }
static int gt_speck_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtNodeStream *gff3_in_stream = NULL, *checker_stream = NULL, *a_in_stream = NULL, *a_out_stream = NULL, *feature_stream = NULL, *sort_stream = NULL, *last_stream = NULL; GtNodeVisitor *spec_visitor = NULL; GtSpecResults *res = NULL; GtFeatureIndex *fi = NULL; GtTypeChecker *type_checker = NULL; GtTimer *t = NULL; GtRegionMapping *rm = NULL; GtArray *arr = gt_array_new(sizeof (GtFeatureNode*)); GtStr *prog, *speclib; SpeccheckArguments *arguments = tool_arguments; int had_err = 0; gt_error_check(err); res = gt_spec_results_new(); gt_assert(res); if (gt_file_exists(gt_str_get(arguments->format))) { speclib = gt_str_ref(arguments->format); } else { prog = gt_str_new(); gt_str_append_cstr_nt(prog, gt_error_get_progname(err), gt_cstr_length_up_to_char(gt_error_get_progname(err), ' ')); speclib = gt_get_gtdata_path(gt_str_get(prog), NULL); gt_str_delete(prog); gt_str_append_cstr(speclib, "/spec/output_drivers/"); gt_str_append_str(speclib, arguments->format); if (!gt_file_exists(gt_str_get(speclib))) { gt_error_set(err, "output driver file \"%s\" does not exist", gt_str_get(speclib)); had_err = -1; } } if (!had_err) { spec_visitor = gt_spec_visitor_new(gt_str_get(arguments->specfile), res, err); if (!spec_visitor) { gt_spec_results_delete(res); return -1; } } t = gt_timer_new(); gt_assert(t); /* add region mapping if given */ if (!had_err && gt_seqid2file_option_used(arguments->s2fi)) { rm = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!rm) had_err = -1; if (!had_err) gt_spec_visitor_add_region_mapping((GtSpecVisitor*) spec_visitor, rm); } /* set type checker if necessary */ if (!had_err && gt_typecheck_info_option_used(arguments->tci)) { type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err); if (!type_checker) had_err = -1; if (!had_err) gt_spec_visitor_add_type_checker((GtSpecVisitor*) spec_visitor, type_checker); } if (!had_err) { /* set runtime error behaviour */ if (arguments->fail_hard) gt_spec_visitor_fail_on_runtime_error((GtSpecVisitor*) spec_visitor); else gt_spec_visitor_report_runtime_errors((GtSpecVisitor*) spec_visitor); /* redirect warnings */ gt_warning_set_handler(gt_speck_record_warning, res); last_stream = gff3_in_stream = gt_gff3_in_stream_new_unsorted( argc - parsed_args, argv + parsed_args); gt_assert(gff3_in_stream); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream); /* insert sort stream if requested */ if (arguments->sort) { last_stream = sort_stream = gt_sort_stream_new(last_stream); } /* if -provideindex is given, collect input features and index them first */ if (arguments->provideindex) { fi = gt_feature_index_memory_new(); gt_assert(fi); last_stream = feature_stream = gt_feature_stream_new(last_stream, fi); gt_assert(feature_stream); last_stream = a_out_stream = gt_array_out_stream_all_new(last_stream, arr, err); if (!a_out_stream) had_err = -1; gt_timer_start(t); if (!had_err) had_err = gt_node_stream_pull(last_stream, err); if (!had_err) { gt_spec_visitor_add_feature_index((GtSpecVisitor*) spec_visitor, gt_feature_index_ref(fi)); last_stream = a_in_stream = gt_array_in_stream_new(arr, NULL, err); if (!a_in_stream) had_err = -1; } } else { gt_timer_start(t); } if (!had_err) { checker_stream = gt_visitor_stream_new(last_stream, spec_visitor); gt_assert(checker_stream); } /* perform checking */ if (!had_err) had_err = gt_node_stream_pull(checker_stream, err); gt_timer_stop(t); /* reset warnings output */ gt_warning_set_handler(gt_warning_default_handler, NULL); /* output results */ if (!had_err) { GtStr *runtime = gt_str_new(); gt_timer_get_formatted(t, GT_WD ".%06ld", runtime); had_err = gt_spec_results_render_template(res, gt_str_get(speclib), arguments->outfp, gt_str_get(arguments->specfile), arguments->verbose, arguments->colored, gt_str_get(runtime), err); gt_str_delete(runtime); } } /* free */ gt_node_stream_delete(gff3_in_stream); gt_node_stream_delete(a_in_stream); gt_node_stream_delete(a_out_stream); gt_node_stream_delete(checker_stream); gt_node_stream_delete(feature_stream); gt_node_stream_delete(sort_stream); gt_spec_results_delete(res); gt_feature_index_delete(fi); gt_type_checker_delete(type_checker); gt_timer_delete(t); gt_array_delete(arr); gt_str_delete(speclib); return had_err; }
int main(int argc, char ** argv) { GtNodeStream * in, * score, * out; GtFile * out_file; GtError * err; if (argc != 4) { usage(argv[0]); exit(1); } // initilaize genometools gt_lib_init(); err = gt_error_new(); if (!(in = gt_gff3_in_stream_new_sorted(argv[1]))) { fprintf(stderr, "Failed to open input stream with arg %s\n", argv[1]); exit(1); } if (!(out_file = gt_file_new(argv[2], "w+", err))) { gt_node_stream_delete(in); fprintf(stderr, "Failed to create output file %s\n", argv[2]); exit(1); } if (!(score = CpGI_score_stream_new(in, argv[3]))) { gt_file_delete(out_file); gt_node_stream_delete(in); fprintf(stderr, "Failed to create CpGI score stream\n"); exit(1); } out = gt_gff3_out_stream_new(in, out_file); if (!(out = gt_gff3_out_stream_new(score, out_file))) { gt_node_stream_delete(score); gt_file_delete(out_file); gt_node_stream_delete(in); fprintf(stderr, "Failed to create output stream\n"); exit(1); } if (gt_node_stream_pull(out, err)) { fprintf(stderr, "Failed to pull through out stream\n"); } // close genome tools gt_node_stream_delete(out); gt_node_stream_delete(score); gt_file_delete(out_file); gt_node_stream_delete(in); gt_error_delete(err); gt_lib_clean(); return 0; }
static int gt_gff3_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GFF3Arguments *arguments = tool_arguments; GtTypeChecker *type_checker = NULL; GtNodeStream *gff3_in_stream, *sort_stream = NULL, *load_stream = NULL, *merge_feature_stream = NULL, *add_introns_stream = NULL, *set_source_stream = NULL, *gff3_out_stream = NULL, *last_stream; int had_err = 0; gt_error_check(err); gt_assert(arguments); /* create a gff3 input stream */ gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); if (arguments->verbose && arguments->outfp) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream); if (arguments->checkids) gt_gff3_in_stream_check_id_attributes((GtGFF3InStream*) gff3_in_stream); if (!arguments->addids) gt_gff3_in_stream_disable_add_ids(gff3_in_stream); last_stream = gff3_in_stream; /* set different type checker if necessary */ if (gt_typecheck_info_option_used(arguments->tci)) { type_checker = gt_typecheck_info_create_type_checker(arguments->tci, err); if (!type_checker) had_err = -1; if (!had_err) gt_gff3_in_stream_set_type_checker(gff3_in_stream, type_checker); } /* set offset (if necessary) */ if (!had_err && arguments->offset != GT_UNDEF_WORD) gt_gff3_in_stream_set_offset(gff3_in_stream, arguments->offset); /* set offsetfile (if necessary) */ if (!had_err && gt_str_length(arguments->offsetfile)) { had_err = gt_gff3_in_stream_set_offsetfile(gff3_in_stream, arguments->offsetfile, err); } /* enable strict mode (if necessary) */ if (!had_err && arguments->strict) gt_gff3_in_stream_enable_strict_mode((GtGFF3InStream*) gff3_in_stream); /* enable tidy mode (if necessary) */ if (!had_err && arguments->tidy) gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream*) gff3_in_stream); if (!had_err && arguments->fixboundaries) gt_gff3_in_stream_fix_region_boundaries((GtGFF3InStream*) gff3_in_stream); /* create load stream (if necessary) */ if (!had_err && arguments->load) { load_stream = gt_load_stream_new(last_stream); last_stream = load_stream; } /* create sort stream (if necessary) */ if (!had_err && arguments->sort) { sort_stream = gt_sort_stream_new(last_stream); last_stream = sort_stream; } /* create merge feature stream (if necessary) */ if (!had_err && arguments->mergefeat) { gt_assert(sort_stream); merge_feature_stream = gt_merge_feature_stream_new(sort_stream); last_stream = merge_feature_stream; } /* create addintrons stream (if necessary) */ if (!had_err && arguments->addintrons) { gt_assert(last_stream); add_introns_stream = gt_add_introns_stream_new(last_stream); last_stream = add_introns_stream; } /* create setsource stream (if necessary) */ if (!had_err && gt_str_length(arguments->newsource) > 0) { gt_assert(last_stream); GtNodeVisitor *ssv = gt_set_source_visitor_new(arguments->newsource); set_source_stream = gt_visitor_stream_new(last_stream, ssv); last_stream = set_source_stream; } /* create gff3 output stream */ if (!had_err && arguments->show) { gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp); last_stream = gff3_out_stream; gt_gff3_out_stream_set_fasta_width((GtGFF3OutStream*) last_stream, arguments->width); if (arguments->retainids) gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream*) last_stream); } /* pull the features through the stream and free them afterwards */ if (!had_err) had_err = gt_node_stream_pull(last_stream, err); /* free */ gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(sort_stream); gt_node_stream_delete(load_stream); gt_node_stream_delete(merge_feature_stream); gt_node_stream_delete(add_introns_stream); gt_node_stream_delete(set_source_stream); gt_node_stream_delete(gff3_in_stream); gt_type_checker_delete(type_checker); return had_err; }
static int gt_ltrdigest_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtLTRdigestOptions *arguments = tool_arguments; GtNodeStream *gff3_in_stream = NULL, *gff3_out_stream = NULL, *pdom_stream = NULL, *ppt_stream = NULL, *pbs_stream = NULL, *tab_out_stream = NULL, *sa_stream = NULL, *last_stream = NULL; int had_err = 0, tests_to_run = 0, arg = parsed_args; GtRegionMapping *rmap = NULL; GtPdomModelSet *ms = NULL; gt_error_check(err); gt_assert(arguments); /* determine and open sequence source */ if (gt_seqid2file_option_used(arguments->s2fi)) { /* create region mapping */ rmap = gt_seqid2file_region_mapping_new(arguments->s2fi, err); if (!rmap) had_err = -1; } else { GtEncseqLoader *el; GtEncseq *encseq; /* no new-style sequence source option given, fall back to legacy syntax */ if (argc < 3) { gt_error_set(err, "missing mandatory argument(s)"); had_err = -1; } if (!had_err) { el = gt_encseq_loader_new(); gt_encseq_loader_disable_autosupport(el); gt_encseq_loader_require_md5_support(el); gt_encseq_loader_require_description_support(el); encseq = gt_encseq_loader_load(el, argv[argc-1], err); /* XXX: clip off terminal argument */ gt_free((char*) argv[argc-1]); argv[argc-1] = NULL; argc--; gt_encseq_loader_delete(el); if (!encseq) had_err = -1; else { rmap = gt_region_mapping_new_encseq_seqno(encseq); gt_encseq_delete(encseq); } } } gt_assert(had_err || rmap); /* Always search for PPT. */ tests_to_run |= GT_LTRDIGEST_RUN_PPT; /* Open tRNA library if given. */ if (!had_err && arguments->trna_lib && gt_str_length(arguments->trna_lib) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PBS; arguments->trna_lib_bs = gt_bioseq_new(gt_str_get(arguments->trna_lib), err); if (gt_error_is_set(err)) had_err = -1; } /* Set HMMER cutoffs. */ if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) { tests_to_run |= GT_LTRDIGEST_RUN_PDOM; if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) { arguments->cutoff = GT_PHMM_CUTOFF_GA; } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) { arguments->cutoff = GT_PHMM_CUTOFF_TC; } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) { arguments->cutoff = GT_PHMM_CUTOFF_NONE; } else { gt_error_set(err, "invalid cutoff setting!"); had_err = -1; } } if (!had_err) { last_stream = gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[arg]); } if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) { GtNodeVisitor *pdom_v; ms = gt_pdom_model_set_new(arguments->hmm_files, err); if (ms != NULL) { pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->evalue_cutoff, arguments->chain_max_gap_length, arguments->cutoff, rmap, err); if (pdom_v == NULL) had_err = -1; if (!had_err) { gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*) pdom_v, GT_LTRDIGEST_TAG); if (arguments->output_all_chains) gt_ltrdigest_pdom_visitor_output_all_chains((GtLTRdigestPdomVisitor*) pdom_v); last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v); } } else had_err = -1; } if (!had_err && arguments->trna_lib_bs) { GtNodeVisitor *pbs_v; pbs_v = gt_ltrdigest_pbs_visitor_new(rmap, arguments->pbs_radius, arguments->max_edist, arguments->alilen, arguments->offsetlen, arguments->trnaoffsetlen, arguments->ali_score_match, arguments->ali_score_mismatch, arguments->ali_score_insertion, arguments->ali_score_deletion, arguments->trna_lib_bs, err); if (pbs_v != NULL) last_stream = pbs_stream = gt_visitor_stream_new(last_stream, pbs_v); else had_err = -1; } if (!had_err) { GtNodeVisitor *ppt_v; ppt_v = gt_ltrdigest_ppt_visitor_new(rmap, arguments->ppt_len, arguments->ubox_len, arguments->ppt_pyrimidine_prob, arguments->ppt_purine_prob, arguments->bkg_a_prob, arguments->bkg_g_prob, arguments->bkg_t_prob, arguments->bkg_c_prob, arguments->ubox_u_prob, arguments->ppt_radius, arguments->max_ubox_dist, err); if (ppt_v != NULL) last_stream = ppt_stream = gt_visitor_stream_new(last_stream, ppt_v); else had_err = -1; } if (!had_err) { GtNodeVisitor *sa_v; sa_v = gt_ltrdigest_strand_assign_visitor_new(); gt_assert(sa_v); last_stream = sa_stream = gt_visitor_stream_new(last_stream, sa_v); } if (!had_err) { /* attach tabular output stream, if requested */ if (gt_str_length(arguments->prefix) > 0) { last_stream = tab_out_stream = gt_ltrdigest_file_out_stream_new( last_stream, tests_to_run, rmap, gt_str_get(arguments->prefix), arguments->seqnamelen, err); if (!tab_out_stream) had_err = -1; if (!had_err && arguments->print_metadata) { had_err = gt_ltrdigest_file_out_stream_write_metadata( (GtLTRdigestFileOutStream*) tab_out_stream, tests_to_run, gt_str_get(arguments->trna_lib), argv[arg], arguments->ppt_len, arguments->ubox_len, arguments->ppt_radius, arguments->alilen, arguments->max_edist, arguments->offsetlen, arguments->trnaoffsetlen, arguments->pbs_radius, arguments->hmm_files, arguments->chain_max_gap_length, arguments->evalue_cutoff, err); } if (!had_err) { if (arguments->write_alignments) gt_ltrdigest_file_out_stream_enable_pdom_alignment_output( tab_out_stream); if (arguments->write_aaseqs) gt_ltrdigest_file_out_stream_enable_aa_sequence_output( tab_out_stream); } } last_stream = gff3_out_stream = gt_gff3_out_stream_new(last_stream, arguments->outfp); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(last_stream, err); } gt_pdom_model_set_delete(ms); gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(ppt_stream); gt_node_stream_delete(pbs_stream); gt_node_stream_delete(sa_stream); gt_node_stream_delete(pdom_stream); gt_node_stream_delete(tab_out_stream); gt_node_stream_delete(gff3_in_stream); gt_bioseq_delete(arguments->trna_lib_bs); gt_region_mapping_delete(rmap); return had_err; }
static int gt_tir_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtTirArguments *arguments = tool_arguments; GtNodeStream *tir_stream = NULL, *pdom_stream = NULL, *gff3_out_stream = NULL, *last_stream = NULL; GtPdomModelSet *ms = NULL; GtRegionMapping *rmap = NULL; int had_err = 0; gt_error_check(err); gt_assert(arguments); tir_stream = gt_tir_stream_new(arguments->str_indexname, arguments->min_seed_length, arguments->min_TIR_length, arguments->max_TIR_length, arguments->min_TIR_distance, arguments->max_TIR_distance, arguments->arbit_scores, arguments->xdrop_belowscore, arguments->similarity_threshold, arguments->best_overlaps, arguments->no_overlaps, arguments->min_TSD_length, arguments->max_TSD_length, arguments->vicinity, err); if (tir_stream == NULL) return -1; last_stream = tir_stream; rmap = gt_region_mapping_new_encseq((GtEncseq*) gt_tir_stream_get_encseq((GtTIRStream*) tir_stream), true, false); gt_assert(rmap); if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) { if (!strcmp(gt_str_get(arguments->cutoffs), "GA")) { arguments->cutoff = GT_PHMM_CUTOFF_GA; } else if (!strcmp(gt_str_get(arguments->cutoffs), "TC")) { arguments->cutoff = GT_PHMM_CUTOFF_TC; } else if (!strcmp(gt_str_get(arguments->cutoffs), "NONE")) { arguments->cutoff = GT_PHMM_CUTOFF_NONE; } else { gt_error_set(err, "invalid cutoff setting!"); had_err = -1; } } if (!had_err && gt_str_array_size(arguments->hmm_files) > 0) { GtNodeVisitor *pdom_v; ms = gt_pdom_model_set_new(arguments->hmm_files, err); if (ms != NULL) { pdom_v = gt_ltrdigest_pdom_visitor_new(ms, arguments->cutoff, arguments->chain_max_gap_length, arguments->evalue_cutoff, rmap, err); if (pdom_v == NULL) had_err = -1; if (!had_err) { last_stream = pdom_stream = gt_visitor_stream_new(last_stream, pdom_v); gt_ltrdigest_pdom_visitor_set_root_type((GtLTRdigestPdomVisitor*) pdom_v, gt_ft_terminal_inverted_repeat_element); gt_ltrdigest_pdom_visitor_set_source_tag((GtLTRdigestPdomVisitor*) pdom_v, "TIRvish"); } } else had_err = -1; } gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL); last_stream = gff3_out_stream; /* pull the features through the stream and free them afterwards */ if (!had_err) had_err = gt_node_stream_pull(last_stream, err); gt_node_stream_delete(tir_stream); gt_node_stream_delete(pdom_stream); gt_node_stream_delete(gff3_out_stream); gt_region_mapping_delete(rmap); gt_pdom_model_set_delete(ms); return had_err; }
static int gt_sketch_runner(int argc, const char **argv, int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtSketchArguments *arguments = tool_arguments; GtNodeStream *in_stream = NULL, *add_introns_stream = NULL, *gff3_out_stream = NULL, *feature_stream = NULL, *sort_stream = NULL, *last_stream; GtFeatureIndex *features = NULL; const char *file; char *seqid = NULL; GtRange qry_range, sequence_region_range; GtArray *results = NULL; GtStyle *sty = NULL; GtStr *prog, *defaultstylefile = NULL; GtDiagram *d = NULL; GtLayout *l = NULL; GtImageInfo* ii = NULL; GtCanvas *canvas = NULL; GtUword height; bool has_seqid; int had_err = 0; gt_error_check(err); gt_assert(arguments); prog = gt_str_new(); gt_str_append_cstr_nt(prog, argv[0], gt_cstr_length_up_to_char(argv[0], ' ')); defaultstylefile = gt_get_gtdata_path(gt_str_get(prog), err); gt_str_delete(prog); if (!defaultstylefile) had_err = -1; if (!had_err) { gt_str_append_cstr(defaultstylefile, "/sketch/default.style"); } file = argv[parsed_args]; if (!had_err) { /* create feature index */ features = gt_feature_index_memory_new(); parsed_args++; /* create an input stream */ if (strcmp(gt_str_get(arguments->input), "gff") == 0) { in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); if (arguments->verbose) gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) in_stream); } else if (strcmp(gt_str_get(arguments->input), "bed") == 0) { if (argc - parsed_args == 0) in_stream = gt_bed_in_stream_new(NULL); else in_stream = gt_bed_in_stream_new(argv[parsed_args]); } else if (strcmp(gt_str_get(arguments->input), "gtf") == 0) { if (argc - parsed_args == 0) in_stream = gt_gtf_in_stream_new(NULL); else in_stream = gt_gtf_in_stream_new(argv[parsed_args]); } last_stream = in_stream; /* create add introns stream if -addintrons was used */ if (arguments->addintrons) { sort_stream = gt_sort_stream_new(last_stream); add_introns_stream = gt_add_introns_stream_new(sort_stream); last_stream = add_introns_stream; } /* create gff3 output stream if -pipe was used */ if (arguments->pipe) { gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL); last_stream = gff3_out_stream; } /* create feature stream */ feature_stream = gt_feature_stream_new(last_stream, features); /* pull the features through the stream and free them afterwards */ had_err = gt_node_stream_pull(feature_stream, err); gt_node_stream_delete(feature_stream); gt_node_stream_delete(gff3_out_stream); gt_node_stream_delete(sort_stream); gt_node_stream_delete(add_introns_stream); gt_node_stream_delete(in_stream); } if (!had_err) { had_err = gt_feature_index_has_seqid(features, &has_seqid, gt_str_get(arguments->seqid), err); } /* if seqid is empty, take first one added to index */ if (!had_err && strcmp(gt_str_get(arguments->seqid),"") == 0) { seqid = gt_feature_index_get_first_seqid(features, err); if (seqid == NULL) { gt_error_set(err, "GFF input file must contain a sequence region!"); had_err = -1; } } else if (!had_err && !has_seqid) { gt_error_set(err, "sequence region '%s' does not exist in GFF input file", gt_str_get(arguments->seqid)); had_err = -1; } else if (!had_err) seqid = gt_str_get(arguments->seqid); results = gt_array_new(sizeof (GtGenomeNode*)); if (!had_err) { had_err = gt_feature_index_get_range_for_seqid(features, &sequence_region_range, seqid, err); } if (!had_err) { qry_range.start = (arguments->start == GT_UNDEF_UWORD ? sequence_region_range.start : arguments->start); qry_range.end = (arguments->end == GT_UNDEF_UWORD ? sequence_region_range.end : arguments->end); } if (!had_err) { if (arguments->verbose) fprintf(stderr, "# of results: "GT_WU"\n", gt_array_size(results)); /* find and load style file */ if (!(sty = gt_style_new(err))) had_err = -1; if (gt_str_length(arguments->stylefile) == 0) { gt_str_append_str(arguments->stylefile, defaultstylefile); } else { if (!had_err && gt_file_exists(gt_str_get(arguments->stylefile))) { if (arguments->unsafe) gt_style_unsafe_mode(sty); } else { had_err = -1; gt_error_set(err, "style file '%s' does not exist!", gt_str_get(arguments->stylefile)); } } if (!had_err) had_err = gt_style_load_file(sty, gt_str_get(arguments->stylefile), err); } if (!had_err) { /* create and write image file */ if (!(d = gt_diagram_new(features, seqid, &qry_range, sty, err))) had_err = -1; if (!had_err && arguments->flattenfiles) gt_diagram_set_track_selector_func(d, flattened_file_track_selector, NULL); if (had_err || !(l = gt_layout_new(d, arguments->width, sty, err))) had_err = -1; if (!had_err) had_err = gt_layout_get_height(l, &height, err); if (!had_err) { ii = gt_image_info_new(); if (strcmp(gt_str_get(arguments->format),"pdf")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PDF, arguments->width, height, ii, err); } else if (strcmp(gt_str_get(arguments->format),"ps")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PS, arguments->width, height, ii, err); } else if (strcmp(gt_str_get(arguments->format),"svg")==0) { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_SVG, arguments->width, height, ii, err); } else { canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PNG, arguments->width, height, ii, err); } if (!canvas) had_err = -1; if (!had_err) { had_err = gt_layout_sketch(l, canvas, err); } if (!had_err) { if (arguments->showrecmaps) { GtUword i; const GtRecMap *rm; for (i = 0; i < gt_image_info_num_of_rec_maps(ii) ;i++) { char buf[BUFSIZ]; rm = gt_image_info_get_rec_map(ii, i); (void) gt_rec_map_format_html_imagemap_coords(rm, buf, BUFSIZ); printf("%s, %s\n", buf, gt_feature_node_get_type(gt_rec_map_get_genome_feature(rm))); } } if (arguments->use_streams) { GtFile *outfile; GtStr *str = gt_str_new(); gt_canvas_cairo_file_to_stream((GtCanvasCairoFile*) canvas, str); outfile = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, file, "w+", err); if (outfile) { gt_file_xwrite(outfile, gt_str_get_mem(str), gt_str_length(str)); gt_file_delete(outfile); } else { had_err = -1; } gt_str_delete(str); } else { had_err = gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas, file, err); } } } } /* free */ gt_free(seqid); gt_canvas_delete(canvas); gt_layout_delete(l); gt_image_info_delete(ii); gt_style_delete(sty); gt_diagram_delete(d); gt_array_delete(results); gt_str_delete(defaultstylefile); gt_feature_index_delete(features); return had_err; }
// Main method int main(int argc, char * const *argv) { GtError *error; GtLogger *logger; GtQueue *streams; GtNodeStream *stream, *last_stream; CanonGFF3Options options = { NULL, NULL, false }; gt_lib_init(); error = gt_error_new(); canon_gff3_parse_options(argc, argv + 0, &options, error); streams = gt_queue_new(); logger = gt_logger_new(true, "", stderr); stream = gt_gff3_in_stream_new_unsorted(argc - optind, (const char **) argv+optind); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)stream); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)stream); gt_queue_add(streams, stream); last_stream = stream; if(options.infer) { GtHashmap *type_parents = gt_hashmap_new(GT_HASH_STRING, gt_free_func, gt_free_func); gt_hashmap_add(type_parents, gt_cstr_dup("mRNA"), gt_cstr_dup("gene")); gt_hashmap_add(type_parents, gt_cstr_dup("tRNA"), gt_cstr_dup("gene")); stream = agn_infer_parent_stream_new(last_stream, type_parents); gt_hashmap_delete(type_parents); gt_queue_add(streams, stream); last_stream = stream; } stream = agn_gene_stream_new(last_stream, logger); gt_queue_add(streams, stream); last_stream = stream; if(options.source != NULL) { GtNodeVisitor *ssv = gt_set_source_visitor_new(options.source); stream = gt_visitor_stream_new(last_stream, ssv); gt_queue_add(streams, stream); last_stream = stream; } stream = gt_gff3_out_stream_new(last_stream, options.outstream); if(!options.infer) gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream *)stream); gt_queue_add(streams, stream); last_stream = stream; if(gt_node_stream_pull(last_stream, error) == -1) { fprintf(stderr, "[CanonGFF3] error processing node stream: %s", gt_error_get(error)); } while(gt_queue_size(streams) > 0) { stream = gt_queue_get(streams); gt_node_stream_delete(stream); } gt_queue_delete(streams); if(options.source != NULL) gt_str_delete(options.source); if(options.outstream != NULL) gt_file_delete(options.outstream); gt_error_delete(error); gt_logger_delete(logger); gt_lib_clean(); return 0; }