static int gt_seqids_runner(GT_UNUSED int argc, const char **argv, int parsed_args, GT_UNUSED void *tool_arguments, GtError *err) { GtNodeStream *in_stream, *v_stream; GtCstrTable *cst; int had_err = 0; gt_error_check(err); cst = gt_cstr_table_new(); in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args, argv + parsed_args); v_stream = gt_visitor_stream_new(in_stream, gt_collect_ids_visitor_new(cst)); had_err = gt_node_stream_pull(v_stream, err); if (!had_err) { GtStrArray *seqids; GtUword i; seqids = gt_cstr_table_get_all(cst); for (i = 0; i < gt_str_array_size(seqids); i++) { printf("%s\n", gt_str_array_get(seqids, i)); } gt_str_array_delete(seqids); } gt_node_stream_delete(v_stream); gt_node_stream_delete(in_stream); gt_cstr_table_delete(cst); return had_err; }
static int sequence_node_add_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtSequenceNodeAddStream *s; int had_err; gt_error_check(err); s = gt_sequence_node_add_stream_cast(ns); /* stream nodes as long as we have some, record seen seqids */ if (!(had_err = gt_node_stream_next(s->in_stream, gn, err)) && *gn) { had_err = gt_genome_node_accept(*gn, s->collect_vis, err); } /* if there are no more */ if (!had_err && !*gn) { if (!s->seqids) { s->seqids = gt_cstr_table_get_all(s->seqid_table); } gt_assert(s->seqids); if (s->cur_seqid >= gt_str_array_size(s->seqids)) { *gn = NULL; return 0; } else { GtGenomeNode *new_sn; GtUword len; char *seq = NULL; GtStr *seqid = gt_str_new(), *seqstr = gt_str_new(); gt_str_append_cstr(seqid, gt_str_array_get(s->seqids, s->cur_seqid)); had_err = gt_region_mapping_get_sequence_length(s->rm, &len, seqid, err); if (!had_err) { had_err = gt_region_mapping_get_sequence(s->rm, &seq, seqid, 1, len, err); } if (!had_err) { gt_str_append_cstr_nt(seqstr, seq, len); new_sn = gt_sequence_node_new(gt_str_get(seqid), seqstr); *gn = new_sn; } s->cur_seqid++; gt_free(seq); gt_str_delete(seqid); gt_str_delete(seqstr); } } return had_err; }
void gt_stat_visitor_show_stats(GtNodeVisitor *nv, GtFile *outfp) { GtStatVisitor *sv = stat_visitor_cast(nv); if (sv->number_of_sequence_regions) { gt_file_xprintf(outfp, "sequence regions: %lu (total length: %llu)\n", sv->number_of_sequence_regions, sv->total_length_of_sequence_regions); } if (sv->number_of_multi_features) { gt_file_xprintf(outfp, "multi-features: %lu\n", sv->number_of_multi_features); } if (sv->number_of_genes) gt_file_xprintf(outfp, "genes: %lu\n", sv->number_of_genes); if (sv->number_of_protein_coding_genes) { gt_file_xprintf(outfp, "protein-coding genes: %lu\n", sv->number_of_protein_coding_genes); } if (sv->number_of_mRNAs) gt_file_xprintf(outfp, "mRNAs: %lu\n", sv->number_of_mRNAs); if (sv->number_of_protein_coding_mRNAs) { gt_file_xprintf(outfp, "protein-coding mRNAs: %lu\n", sv->number_of_protein_coding_mRNAs); } if (sv->number_of_exons) gt_file_xprintf(outfp, "exons: %lu\n", sv->number_of_exons); if (sv->number_of_CDSs) gt_file_xprintf(outfp, "CDSs: %lu\n", sv->number_of_CDSs); if (sv->number_of_LTR_retrotransposons) { gt_file_xprintf(outfp, "LTR_retrotransposons: %lu\n", sv->number_of_LTR_retrotransposons); } if (sv->gene_length_distribution) { gt_file_xprintf(outfp, "gene length distribution:\n"); gt_disc_distri_show(sv->gene_length_distribution, outfp); } if (sv->gene_score_distribution) { gt_file_xprintf(outfp, "gene score distribution:\n"); gt_disc_distri_show(sv->gene_score_distribution, outfp); } if (sv->exon_length_distribution) { gt_file_xprintf(outfp, "exon length distribution:\n"); gt_disc_distri_show(sv->exon_length_distribution, outfp); } if (sv->exon_number_distribution) { gt_file_xprintf(outfp, "exon number distribution:\n"); gt_disc_distri_show(sv->exon_number_distribution, outfp); } if (sv->intron_length_distribution) { gt_file_xprintf(outfp, "intron length distribution:\n"); gt_disc_distri_show(sv->intron_length_distribution, outfp); } if (sv->cds_length_distribution) { gt_file_xprintf(outfp, "CDS length distribution:\n"); gt_disc_distri_show(sv->cds_length_distribution, outfp); } if (sv->used_sources) { GtStrArray *sources; unsigned long i; gt_file_xprintf(outfp, "used source tags:\n"); sources = gt_cstr_table_get_all(sv->used_sources); for (i = 0; i < gt_str_array_size(sources); i++) gt_file_xprintf(outfp, "%s\n", gt_str_array_get(sources, i)); gt_str_array_delete(sources); } }
GtStrArray* gt_gff3_in_stream_plain_get_used_types(GtNodeStream *ns) { GtGFF3InStreamPlain *is = gff3_in_stream_plain_cast(ns); gt_assert(is); return gt_cstr_table_get_all(is->used_types); }