GtFeatureIndex *agn_import_simple(int numfiles, const char **filenames, char *type, AgnLogger *logger) { GtFeatureIndex *features = gt_feature_index_memory_new(); GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3); GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, type, type); GtNodeStream *filterstream = agn_filter_stream_new(gff3, typestokeep); GtNodeStream *featstream = gt_feature_out_stream_new(filterstream, features); GtError *error = gt_error_new(); int result = gt_node_stream_pull(featstream, error); if(result == -1) { agn_logger_log_error(logger, "error processing node stream: %s", gt_error_get(error)); } gt_error_delete(error); if(agn_logger_has_error(logger)) { gt_feature_index_delete(features); features = NULL; } gt_node_stream_delete(gff3); gt_node_stream_delete(filterstream); gt_node_stream_delete(featstream); return features; }
GtFeatureIndex *agn_import_canonical(int numfiles, const char **filenames, AgnLogger *logger) { GtNodeStream *gff3 = gt_gff3_in_stream_new_unsorted(numfiles, filenames); gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3); gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3); GtFeatureIndex *features = gt_feature_index_memory_new(); GtNodeStream *cgstream = agn_canon_gene_stream_new(gff3, logger); GtNodeStream *featstream = gt_feature_out_stream_new(cgstream, features); GtError *error = gt_error_new(); int result = gt_node_stream_pull(featstream, error); if(result == -1) { agn_logger_log_error(logger, "error processing node stream: %s", gt_error_get(error)); } gt_error_delete(error); if(agn_logger_has_error(logger)) { gt_feature_index_delete(features); features = NULL; } gt_node_stream_delete(gff3); gt_node_stream_delete(cgstream); gt_node_stream_delete(featstream); return features; }
GtNodeVisitor* agn_gaeval_visitor_new(GtNodeStream *astream, AgnGaevalParams gparams) { agn_assert(astream); // Create the node visitor GtNodeVisitor *nv = gt_node_visitor_create(gaeval_visitor_class()); AgnGaevalVisitor *v = gaeval_visitor_cast(nv); v->alignments = gt_feature_index_memory_new(); v->tsvout = NULL; v->params = gparams; // Check that sum of weights is 1.0 double weights_total = gparams.alpha + gparams.beta + gparams.gamma + gparams.epsilon; if(fabs(weights_total - 1.0) > 0.0001) { fprintf(stderr, "[AgnGaevalVisitor::agn_gaeval_visitor_new] warning: " "sum of weights is not 1.0 %.3lf; integrity calculations will be " "incorrect\n", weights_total); } // Set up node stream to load alignment features into memory GtQueue *streams = gt_queue_new(); GtNodeStream *stream, *last_stream; GtHashmap *typestokeep = gt_hashmap_new(GT_HASH_STRING, NULL, NULL); gt_hashmap_add(typestokeep, "cDNA_match", "cDNA_match"); gt_hashmap_add(typestokeep, "EST_match", "EST_match"); gt_hashmap_add(typestokeep, "nucleotide_match", "nucleotide_match"); stream = agn_filter_stream_new(astream, typestokeep); gt_queue_add(streams, stream); last_stream = stream; stream = gt_feature_out_stream_new(last_stream, v->alignments); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "cDNA_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "EST_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; stream = gt_inter_feature_stream_new(last_stream, "nucleotide_match", "match_gap"); gt_queue_add(streams, stream); last_stream = stream; // Process the node stream GtError *error = gt_error_new(); int result = gt_node_stream_pull(last_stream, error); if(result == -1) { fprintf(stderr, "[AEGeAn::AgnGaevalStream] error parsing alignments: %s\n", gt_error_get(error)); gt_node_visitor_delete(nv); return NULL; } gt_error_delete(error); gt_hashmap_delete(typestokeep); while(gt_queue_size(streams) > 0) { stream = gt_queue_get(streams); gt_node_stream_delete(stream); } gt_queue_delete(streams); return nv; }
int gt_feature_in_stream_unit_test(GtError *error) { GtNodeStream *src, *dest; GtFeatureIndex *prefeat, *postfeat; GtRange range1, range1test, range2, range2test; prefeat = in_stream_test_data(error); postfeat = gt_feature_index_memory_new(); src = gt_feature_in_stream_new(prefeat); dest = gt_feature_out_stream_new(src, postfeat); int result = gt_node_stream_pull(dest, error); if (result == -1) return -1; GtStrArray *seqids = gt_feature_index_get_seqids(postfeat, error); if (gt_str_array_size(seqids) != 2) { gt_error_set(error, "error in feature_in_stream unit test 1: expected 2 " "seqids, found "GT_WU"", gt_str_array_size(seqids)); return -1; } gt_str_array_delete(seqids); range1test.start = 500; range1test.end = 75000; range2test.start = 4000; range2test.end = 9500; gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_delete(prefeat); gt_feature_index_delete(postfeat); gt_node_stream_delete(src); gt_node_stream_delete(dest); prefeat = in_stream_test_data(error); postfeat = gt_feature_index_memory_new(); src = gt_feature_in_stream_new(prefeat); dest = gt_feature_out_stream_new(src, postfeat); gt_feature_in_stream_use_orig_ranges((GtFeatureInStream *)src); result = gt_node_stream_pull(dest, error); if (result == -1) return -1; range1test.start = 500; range1test.end = 75000; range2test.start = 4000; range2test.end = 9500; gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } range1test.start = 1; range1test.end = 100000; range2test.start = 1; range2test.end = 10000; gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_delete(prefeat); gt_feature_index_delete(postfeat); gt_node_stream_delete(src); gt_node_stream_delete(dest); return 0; }