GtNodeStream* gt_gtf_in_stream_new(const char *filename) { GtGTFInStream *gtf_in_stream; GtNodeStream *ns = gt_node_stream_create(gt_gtf_in_stream_class(), false); gtf_in_stream = gtf_in_stream_cast(ns); gtf_in_stream->genome_node_buffer = gt_queue_new(); gtf_in_stream->type_checker = gt_type_checker_builtin_new(); gtf_in_stream->filename = filename ? gt_cstr_dup(filename) : NULL; return ns; }
GtBEDParser* gt_bed_parser_new(void) { GtBEDParser *bed_parser = gt_calloc(1, sizeof *bed_parser); bed_parser->region_node_builder = gt_region_node_builder_new(); bed_parser->feature_nodes = gt_queue_new(); bed_parser->seqid_to_str_mapping = gt_hashmap_new(GT_HASH_STRING, NULL, (GtFree) gt_str_delete); bed_parser->word = gt_str_new(); bed_parser->another_word = gt_str_new(); return bed_parser; }
GtNodeVisitor* gt_add_ids_visitor_new(bool ensure_sorting) { GtNodeVisitor *nv = gt_node_visitor_create(gt_add_ids_visitor_class()); GtAddIDsVisitor *add_ids_visitor = add_ids_visitor_cast(nv); add_ids_visitor->node_buffer = gt_queue_new(); add_ids_visitor->defined_seqids = gt_cstr_table_new(); add_ids_visitor->undefined_sequence_regions = gt_hashmap_new(GT_HASH_STRING, NULL, (GtFree) automatic_sequence_region_delete); add_ids_visitor->ensure_sorting = ensure_sorting; return nv; }
GtNodeStream* gt_buffer_stream_new(GtNodeStream *in_stream) { GtBufferStream *bs; GtNodeStream *ns; gt_assert(in_stream); ns = gt_node_stream_create(gt_buffer_stream_class(), false); bs = buffer_stream_cast(ns); bs->in_stream = gt_node_stream_ref(in_stream); bs->node_buffer = gt_queue_new(); bs->buffering = true; return ns; }
GtNodeStream* agn_filter_stream_new(GtNodeStream *in_stream, GtHashmap *typestokeep) { GtNodeStream *ns; AgnFilterStream *stream; gt_assert(in_stream && typestokeep); ns = gt_node_stream_create(filter_stream_class(), false); stream = filter_stream_cast(ns); stream->in_stream = gt_node_stream_ref(in_stream); stream->cache = gt_queue_new(); stream->typestokeep = gt_hashmap_ref(typestokeep); return ns; }
GtNodeStream* gt_feature_in_stream_new(GtFeatureIndex *fi) { GtNodeStream *ns; GtFeatureInStream *stream; gt_assert(fi); ns = gt_node_stream_create(gt_feature_in_stream_class(), true); stream = feature_in_stream_cast(ns); stream->fi = fi; stream->regioncache = gt_queue_new(); stream->featurecache = NULL; stream->useorig = false; stream->init = false; return ns; }
GtDescBuffer* gt_desc_buffer_new(void) { GtDescBuffer *db = gt_malloc(sizeof *db); db->buf = gt_calloc(GT_DESC_BUFFER_INIT_SIZE, sizeof (char)); db->length = 0; db->maxlength = db->curlength = 0; db->allocated = GT_DESC_BUFFER_INIT_SIZE; db->finished = false; db->dirty = true; db->shorten = false; db->seen_whitespace = false; db->reference_count = 0; db->startqueue = gt_queue_new(); gt_queue_add(db->startqueue, (void*) 0); return db; }
GtNodeVisitor* gt_select_visitor_new(GtStr *seqid, GtStr *source, const GtRange *contain_range, const GtRange *overlap_range, GtStrand strand, GtStrand targetstrand, bool has_CDS, unsigned long max_gene_length, unsigned long max_gene_num, double min_gene_score, double max_gene_score, double min_average_splice_site_prob, unsigned long feature_num) { GtNodeVisitor *nv = gt_node_visitor_create(gt_select_visitor_class()); GtSelectVisitor *select_visitor = select_visitor_cast(nv); select_visitor->node_buffer = gt_queue_new(); select_visitor->seqid = gt_str_ref(seqid); select_visitor->source = gt_str_ref(source); if (contain_range) select_visitor->contain_range = *contain_range; else { select_visitor->contain_range.start = GT_UNDEF_ULONG; select_visitor->contain_range.end = GT_UNDEF_ULONG; } if (overlap_range) select_visitor->overlap_range = *overlap_range; else { select_visitor->overlap_range.start = GT_UNDEF_ULONG; select_visitor->overlap_range.end = GT_UNDEF_ULONG; } select_visitor->strand = strand; select_visitor->targetstrand = targetstrand; select_visitor->has_CDS = has_CDS; select_visitor->max_gene_length = max_gene_length; select_visitor->gene_num = 0; select_visitor->max_gene_num = max_gene_num; select_visitor->min_gene_score = min_gene_score; select_visitor->max_gene_score = max_gene_score; select_visitor->min_average_splice_site_prob = min_average_splice_site_prob; select_visitor->feature_num = feature_num; return nv; }
/* takes ownership of <files> */ static GtNodeStream* gff3_in_stream_plain_new(GtStrArray *files, bool ensure_sorting) { GtNodeStream *ns = gt_node_stream_create(gt_gff3_in_stream_plain_class(), ensure_sorting); GtGFF3InStreamPlain *gff3_in_stream_plain = gff3_in_stream_plain_cast(ns); gff3_in_stream_plain->next_file = 0; gff3_in_stream_plain->files = files; gff3_in_stream_plain->stdinstr = gt_str_new_cstr("stdin"); gff3_in_stream_plain->ensure_sorting = ensure_sorting; gff3_in_stream_plain->stdin_argument = false; gff3_in_stream_plain->file_is_open = false; gff3_in_stream_plain->fpin = NULL; gff3_in_stream_plain->line_number = 0; gff3_in_stream_plain->genome_node_buffer = gt_queue_new(); gff3_in_stream_plain->checkids = false; gff3_in_stream_plain->checkregions = false; gff3_in_stream_plain->gff3_parser = gt_gff3_parser_new(NULL); gff3_in_stream_plain->used_types = gt_cstr_table_new(); gff3_in_stream_plain->progress_bar = false; return ns; }
static int gff3_numsorted_out_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err) { GtGFF3NumsortedOutStream *gff3_out_stream; int had_err = 0; GtUword i = 0; gt_error_check(err); gff3_out_stream = gff3_numsorted_out_stream_cast(ns); if (!gff3_out_stream->outqueue) { gff3_out_stream->outqueue = gt_queue_new(); while (!(had_err = gt_node_stream_next(gff3_out_stream->in_stream, gn, err))) { if (!*gn) break; gt_array_add(gff3_out_stream->buffer, *gn); } if (!had_err) { gt_genome_nodes_sort_stable_with_func(gff3_out_stream->buffer, (GtCompare) gt_genome_node_compare_numeric_seqids); for (i = 0; !had_err && i < gt_array_size(gff3_out_stream->buffer); i++) { GtGenomeNode *mygn = *(GtGenomeNode**) gt_array_get(gff3_out_stream->buffer, i); gt_queue_add(gff3_out_stream->outqueue, mygn); } } } if (gff3_out_stream->outqueue && !had_err) { if (gt_queue_size(gff3_out_stream->outqueue) > 0) { GtGenomeNode *mygn = (GtGenomeNode*) gt_queue_get(gff3_out_stream->outqueue); gt_assert(mygn); had_err = gt_genome_node_accept(mygn, gff3_out_stream->gff3_visitor, err); if (!had_err) *gn = mygn; } } return had_err; }
GtNodeVisitor* agn_gaeval_visitor_new(GtNodeStream *astream, AgnGaevalParams gparams) { agn_assert(astream); // Create the node visitor GtNodeVisitor *nv = gt_node_visitor_create(gaeval_visitor_class()); AgnGaevalVisitor *v = gaeval_visitor_cast(nv); v->alignments = gt_feature_index_memory_new(); v->tsvout = NULL; v->params = gparams; // Check that sum of weights is 1.0 double weights_total = gparams.alpha + gparams.beta + gparams.gamma + gparams.epsilon; if(fabs(weights_total - 1.0) > 0.0001) { fprintf(stderr, "[AgnGaevalVisitor::agn_gaeval_visitor_new] warning: " "sum of weights is not 1.0 %.3lf; integrity calculations will be " "incorrect\n", weights_total); } // Set up node stream to load alignment features into memory GtQueue *streams = gt_queue_new(); GtNodeStream *stream, *last_stream; GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, "cDNA_match", "cDNA_match"); gt_hashmap_add(typestokeep, "EST_match", "EST_match"); gt_hashmap_add(typestokeep, "nucleotide_match", "nucleotide_match"); stream = agn_filter_stream_new(astream, typestokeep); gt_queue_add(streams, stream); last_stream = stream; stream = gt_feature_out_stream_new(last_stream, v->alignments); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "cDNA_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "EST_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "nucleotide_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; // Process the node stream GtError *error = gt_error_new(); int result = gt_node_stream_pull(last_stream, error); if(result == -1) { fprintf(stderr, "[AEGeAn::AgnGaevalStream] error parsing alignments: %s\n", gt_error_get(error)); gt_node_visitor_delete(nv); return NULL; } gt_error_delete(error); gt_hashmap_delete(typestokeep); while(gt_queue_size(streams) > 0) { stream = gt_queue_get(streams); gt_node_stream_delete(stream); } gt_queue_delete(streams); return nv; }
bool agn_infer_cds_visitor_unit_test(AgnUnitTest *test) { GtQueue *queue = gt_queue_new(); infer_cds_visitor_test_data(queue); agn_assert(gt_queue_size(queue) == 4); GtFeatureNode *fn = gt_queue_get(queue); GtArray *cds = agn_typecheck_select(fn, agn_typecheck_cds); bool grape1 = (gt_array_size(cds) == 4); if(grape1) { GtGenomeNode *cds2 = *(GtGenomeNode **)gt_array_get(cds, 1); GtRange range = gt_genome_node_get_range(cds2); grape1 = (range.start == 349 && range.end == 522); } agn_unit_test_result(test, "grape test sans UTRs", grape1); gt_genome_node_delete((GtGenomeNode *)fn); gt_array_delete(cds); fn = gt_queue_get(queue); cds = agn_typecheck_select(fn, agn_typecheck_cds); bool grape2 = (gt_array_size(cds) == 1); if(grape2) { GtGenomeNode *cds1 = *(GtGenomeNode **)gt_array_get(cds, 0); GtRange range = gt_genome_node_get_range(cds1); GtStrand strand = gt_feature_node_get_strand((GtFeatureNode *)cds1); grape2 = (range.start == 10747 && range.end == 11577 && strand == GT_STRAND_REVERSE); } agn_unit_test_result(test, "grape test with UTRs, strand check", grape2); gt_genome_node_delete((GtGenomeNode *)fn); gt_array_delete(cds); fn = gt_queue_get(queue); cds = agn_typecheck_select(fn, agn_typecheck_cds); bool grape3 = (gt_array_size(cds) == 2); if(grape3) { GtGenomeNode *cds2 = *(GtGenomeNode **)gt_array_get(cds, 1); GtRange range = gt_genome_node_get_range(cds2); grape3 = (range.start == 22651 && range.end == 23022); } agn_unit_test_result(test, "grape test 3", grape3); gt_genome_node_delete((GtGenomeNode *)fn); gt_array_delete(cds); fn = gt_queue_get(queue); cds = agn_typecheck_select(fn, agn_typecheck_cds); bool grape4 = (gt_array_size(cds) == 12); if(grape4) { GtGenomeNode *cds7 = *(GtGenomeNode **)gt_array_get(cds, 6); GtRange range = gt_genome_node_get_range(cds7); grape4 = (range.start == 27956 && range.end == 27996); } agn_unit_test_result(test, "grape test 4", grape4); gt_genome_node_delete((GtGenomeNode *)fn); gt_array_delete(cds); while(gt_queue_size(queue) > 0) { GtGenomeNode *cds_n = gt_queue_get(queue); gt_genome_node_delete(cds_n); } gt_queue_delete(queue); return agn_unit_test_success(test); }
// Main method int main(int argc, char * const *argv) { GtError *error; GtLogger *logger; GtQueue *streams; GtNodeStream *stream, *last_stream; CanonGFF3Options options = { NULL, NULL, false }; gt_lib_init(); error = gt_error_new(); canon_gff3_parse_options(argc, argv + 0, &options, error); streams = gt_queue_new(); logger = gt_logger_new(true, "", stderr); stream = gt_gff3_in_stream_new_unsorted(argc - optind, (const char **) argv+optind); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)stream); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)stream); gt_queue_add(streams, stream); last_stream = stream; if(options.infer) { GtHashmap *type_parents = gt_hashmap_new(GT_HASH_STRING, gt_free_func, gt_free_func); gt_hashmap_add(type_parents, gt_cstr_dup("mRNA"), gt_cstr_dup("gene")); gt_hashmap_add(type_parents, gt_cstr_dup("tRNA"), gt_cstr_dup("gene")); stream = agn_infer_parent_stream_new(last_stream, type_parents); gt_hashmap_delete(type_parents); gt_queue_add(streams, stream); last_stream = stream; } stream = agn_gene_stream_new(last_stream, logger); gt_queue_add(streams, stream); last_stream = stream; if(options.source != NULL) { GtNodeVisitor *ssv = gt_set_source_visitor_new(options.source); stream = gt_visitor_stream_new(last_stream, ssv); gt_queue_add(streams, stream); last_stream = stream; } stream = gt_gff3_out_stream_new(last_stream, options.outstream); if(!options.infer) gt_gff3_out_stream_retain_id_attributes((GtGFF3OutStream *)stream); gt_queue_add(streams, stream); last_stream = stream; if(gt_node_stream_pull(last_stream, error) == -1) { fprintf(stderr, "[CanonGFF3] error processing node stream: %s", gt_error_get(error)); } while(gt_queue_size(streams) > 0) { stream = gt_queue_get(streams); gt_node_stream_delete(stream); } gt_queue_delete(streams); if(options.source != NULL) gt_str_delete(options.source); if(options.outstream != NULL) gt_file_delete(options.outstream); gt_error_delete(error); gt_logger_delete(logger); gt_lib_clean(); return 0; }
int gt_queue_unit_test(GtError *err) { long check_counter = 0, check_counter_reverse = 1023; unsigned long i; int had_err = 0; GtQueue *q; gt_error_check(err); /* without wraparound */ q = gt_queue_new(); gt_ensure(had_err, !gt_queue_size(q)); for (i = 0; !had_err && i < 1024; i++) { gt_queue_add(q, (void*) i); gt_ensure(had_err, gt_queue_size(q) == i + 1); } if (!had_err) had_err = gt_queue_iterate(q, check_queue, &check_counter, err); if (!had_err) { had_err = gt_queue_iterate_reverse(q, check_queue_reverse, &check_counter_reverse, err); } gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL)); gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL)); if (!had_err) { gt_queue_remove(q, (void*) 0); gt_ensure(had_err, gt_queue_size(q) == 1023); } for (i = 1; !had_err && i < 1024; i++) { gt_ensure(had_err, gt_queue_head(q) == (void*) i); gt_ensure(had_err, gt_queue_get(q) == (void*) i); gt_ensure(had_err, gt_queue_size(q) == 1024 - i - 1); } gt_ensure(had_err, !gt_queue_size(q)); gt_queue_delete(q); /* with wraparound (without full queue) */ if (!had_err) { q = gt_queue_new(); gt_ensure(had_err, !gt_queue_size(q)); for (i = 0; !had_err && i < 1024; i++) { gt_queue_add(q, (void*) i); gt_ensure(had_err, gt_queue_size(q) == i + 1); } check_counter = 0; check_counter_reverse = 1023; if (!had_err) had_err = gt_queue_iterate(q, check_queue, &check_counter, err); gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL)); gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL)); if (!had_err) { had_err = gt_queue_iterate_reverse(q, check_queue_reverse, &check_counter_reverse, err); } for (i = 0; !had_err && i < 512; i++) { gt_ensure(had_err, gt_queue_head(q) == (void*) i); gt_ensure(had_err, gt_queue_get(q) == (void*) i); gt_ensure(had_err, gt_queue_size(q) == 1024 - i - 1); } for (i = 0; !had_err && i < 512; i++) { gt_queue_add(q, (void*) (i + 1024)); gt_ensure(had_err, gt_queue_size(q) == 512 + i + 1); } check_counter = 512; check_counter_reverse = 1535; if (!had_err) had_err = gt_queue_iterate(q, check_queue, &check_counter, err); if (!had_err) { had_err = gt_queue_iterate_reverse(q, check_queue_reverse, &check_counter_reverse, err); } gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL)); gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL)); if (!had_err) { gt_queue_remove(q, (void*) 512); gt_ensure(had_err, gt_queue_size(q) == 1023); } for (i = 1; !had_err && i < 1024; i++) { gt_ensure(had_err, gt_queue_head(q) == (void*) (512 + i)); gt_ensure(had_err, gt_queue_get(q) == (void*) (512 + i)); gt_ensure(had_err, gt_queue_size(q) == 1024 - i - 1); } gt_ensure(had_err, !gt_queue_size(q)); gt_queue_delete(q); } /* with wraparound (with full queue) */ if (!had_err) { q = gt_queue_new(); gt_ensure(had_err, !gt_queue_size(q)); for (i = 0; !had_err && i < 1024; i++) { gt_queue_add(q, (void*) i); gt_ensure(had_err, gt_queue_size(q) == i + 1); } check_counter = 0; check_counter_reverse = 1023; if (!had_err) had_err = gt_queue_iterate(q, check_queue, &check_counter, err); if (!had_err) { had_err = gt_queue_iterate_reverse(q, check_queue_reverse, &check_counter_reverse, err); } gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL)); gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL)); for (i = 0; !had_err && i < 512; i++) { gt_ensure(had_err, gt_queue_head(q) == (void*) i); gt_ensure(had_err, gt_queue_get(q) == (void*) i); gt_ensure(had_err, gt_queue_size(q) == 1024 - i - 1); } for (i = 0; !had_err && i < 1024; i++) { gt_queue_add(q, (void*) (i + 1024)); gt_ensure(had_err, gt_queue_size(q) == 512 + i + 1); } check_counter = 512; check_counter_reverse = 2047; if (!had_err) had_err = gt_queue_iterate(q, check_queue, &check_counter, err); if (!had_err) { had_err = gt_queue_iterate_reverse(q, check_queue_reverse, &check_counter_reverse, err); } gt_ensure(had_err, gt_queue_iterate(q, fail_func, NULL, NULL)); gt_ensure(had_err, gt_queue_iterate_reverse(q, fail_func, NULL, NULL)); if (!had_err) { gt_queue_remove(q, (void*) 512); gt_ensure(had_err, gt_queue_size(q) == 1535); } for (i = 1; !had_err && i < 1536; i++) { gt_ensure(had_err, gt_queue_head(q) == (void*) (512 + i)); gt_ensure(had_err, gt_queue_get(q) == (void*) (512 + i)); gt_ensure(had_err, gt_queue_size(q) == 1536 - i - 1); } gt_ensure(had_err, !gt_queue_size(q)); gt_queue_delete(q); } /* test a corner case */ if (!had_err) { q = gt_queue_new(); gt_queue_add(q, (void*) 1); gt_ensure(had_err, gt_queue_size(q) == 1); if (!had_err) gt_queue_add(q, (void*) 1); gt_ensure(had_err, gt_queue_size(q) == 2); gt_ensure(had_err, gt_queue_get(q)); gt_ensure(had_err, gt_queue_size(q) == 1); if (!had_err) gt_queue_add(q, (void*) 1); gt_ensure(had_err, gt_queue_size(q) == 2); gt_ensure(had_err, gt_queue_get(q)); gt_ensure(had_err, gt_queue_size(q) == 1); if (!had_err) gt_queue_add(q, (void*) 1); gt_ensure(had_err, gt_queue_size(q) == 2); gt_ensure(had_err, gt_queue_get(q)); gt_ensure(had_err, gt_queue_size(q) == 1); gt_ensure(had_err, gt_queue_get(q)); gt_ensure(had_err, gt_queue_size(q) == 0); if (!had_err) gt_queue_add(q, (void*) 1); gt_ensure(had_err, gt_queue_size(q) == 1); gt_ensure(had_err, gt_queue_get(q)); gt_ensure(had_err, gt_queue_size(q) == 0); gt_queue_delete(q); } /* gt_queue_remove() corner case */ if (!had_err) { q = gt_queue_new(); gt_queue_add(q, (void*) 1); gt_ensure(had_err, gt_queue_size(q) == 1); gt_queue_remove(q, (void*) 1); gt_ensure(had_err, gt_queue_size(q) == 0); gt_queue_delete(q); } /* gt_queue_remove() corner case */ if (!had_err) { q = gt_queue_new(); gt_queue_add(q, (void*) 0); gt_queue_add(q, (void*) 1); gt_queue_add(q, (void*) 2); gt_queue_add(q, (void*) 3); gt_ensure(had_err, gt_queue_get(q) == (void*) 0); gt_ensure(had_err, gt_queue_get(q) == (void*) 1); gt_queue_add(q, (void*) 4); gt_queue_add(q, (void*) 5); gt_queue_remove(q, (void*) 4); gt_queue_remove(q, (void*) 2); gt_queue_remove(q, (void*) 5); gt_queue_remove(q, (void*) 3); gt_ensure(had_err, gt_queue_size(q) == 0); gt_queue_delete(q); } /* delete with contents */ if (!had_err) { q = gt_queue_new(); gt_ensure(had_err, !gt_queue_size(q)); if (!had_err) gt_queue_add(q, gt_calloc(1, 16)); gt_ensure(had_err, gt_queue_size(q) == 1); if (!had_err) gt_queue_add(q, gt_calloc(1, 32)); gt_ensure(had_err, gt_queue_size(q) == 2); gt_queue_delete_with_contents(q); } return had_err; }