GtStrArray* agn_seq_union(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats, AgnLogger *logger) { // Fetch seqids from reference and prediction annotations GtError *e = gt_error_new(); GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for reference: %s", gt_error_get(e)); gt_error_unset(e); } GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for prediction: %s", gt_error_get(e)); gt_error_unset(e); } gt_error_delete(e); if(agn_logger_has_error(logger)) { gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return NULL; } GtStrArray *seqids = agn_gt_str_array_union(refrseqids, predseqids); gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return seqids; }
static int feature_index_lua_get_seqids(lua_State *L) { GtFeatureIndex **feature_index; GtStrArray *seqids; feature_index = check_feature_index(L, 1); seqids = gt_feature_index_get_seqids(*feature_index); gt_assert(seqids); /* push table containing sequence ids onto the stack */ gt_lua_push_strarray_as_table(L, seqids); gt_str_array_delete(seqids); return 1; }
static int feature_index_lua_get_seqids(lua_State *L) { GtFeatureIndex **feature_index; GtStrArray *seqids; GtError *err; feature_index = check_feature_index(L, 1); err = gt_error_new(); seqids = gt_feature_index_get_seqids(*feature_index, err); if (!seqids) return gt_lua_error(L, err); gt_error_delete(err); /* push table containing sequence ids onto the stack */ gt_lua_push_strarray_as_table(L, seqids); gt_str_array_delete(seqids); return 1; }
void feature_in_stream_init(GtFeatureInStream *stream) { GtUword i; GtError *error = gt_error_new(); stream->seqids = gt_feature_index_get_seqids(stream->fi, error); stream->seqindex = 0; for (i = 0; i < gt_str_array_size(stream->seqids); i++) { const char *seqid = gt_str_array_get(stream->seqids, i); GtRange seqrange; if (stream->useorig) gt_feature_index_get_orig_range_for_seqid(stream->fi, &seqrange, seqid, error); else gt_feature_index_get_range_for_seqid(stream->fi, &seqrange, seqid, error); GtStr *seqstr = gt_str_new_cstr(seqid); GtGenomeNode *rn = gt_region_node_new(seqstr, seqrange.start, seqrange.end); gt_queue_add(stream->regioncache, rn); gt_str_delete(seqstr); } gt_error_delete(error); }
int gt_feature_in_stream_unit_test(GtError *error) { GtNodeStream *src, *dest; GtFeatureIndex *prefeat, *postfeat; GtRange range1, range1test, range2, range2test; prefeat = in_stream_test_data(error); postfeat = gt_feature_index_memory_new(); src = gt_feature_in_stream_new(prefeat); dest = gt_feature_out_stream_new(src, postfeat); int result = gt_node_stream_pull(dest, error); if (result == -1) return -1; GtStrArray *seqids = gt_feature_index_get_seqids(postfeat, error); if (gt_str_array_size(seqids) != 2) { gt_error_set(error, "error in feature_in_stream unit test 1: expected 2 " "seqids, found "GT_WU"", gt_str_array_size(seqids)); return -1; } gt_str_array_delete(seqids); range1test.start = 500; range1test.end = 75000; range2test.start = 4000; range2test.end = 9500; gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_delete(prefeat); gt_feature_index_delete(postfeat); gt_node_stream_delete(src); gt_node_stream_delete(dest); prefeat = in_stream_test_data(error); postfeat = gt_feature_index_memory_new(); src = gt_feature_in_stream_new(prefeat); dest = gt_feature_out_stream_new(src, postfeat); gt_feature_in_stream_use_orig_ranges((GtFeatureInStream *)src); result = gt_node_stream_pull(dest, error); if (result == -1) return -1; range1test.start = 500; range1test.end = 75000; range2test.start = 4000; range2test.end = 9500; gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } range1test.start = 1; range1test.end = 100000; range2test.start = 1; range2test.end = 10000; gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error); gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error); if (gt_range_compare(&range1, &range1test) || gt_range_compare(&range2, &range2test)) { gt_error_set(error, "error in feature_in_stream unit test 1: incorrect " "sequence regions"); return -1; } gt_feature_index_delete(prefeat); gt_feature_index_delete(postfeat); gt_node_stream_delete(src); gt_node_stream_delete(dest); return 0; }
/* to be called from implementing class! */ int gt_feature_index_unit_test(GtFeatureIndex *fi, GtError *err) { int had_err = 0, i, rval; GtFeatureIndexTestShared sh; GtStrArray *seqids; GtStr *seqid; GtRange check_range; GtRegionNode *rn; bool has_seqid; gt_error_check(err); sh.mutex = gt_mutex_new(); sh.nodes = gt_array_new(sizeof (GtFeatureNode*)); sh.error_count = 0; sh.next_node_idx = 0; sh.fi = fi; sh.err = gt_error_new(); /* create region */ seqid = gt_str_new_cstr(GT_FI_TEST_SEQID); rn = (GtRegionNode*) gt_region_node_new(seqid, GT_FI_TEST_START, GT_FI_TEST_END); /* test seqid is not supposed to exist */ gt_ensure(gt_feature_index_has_seqid(sh.fi, &has_seqid, GT_FI_TEST_SEQID, err) == 0); gt_ensure(!has_seqid); /* add a sequence region directly and check if it has been added */ rval = gt_feature_index_add_region_node(sh.fi, rn, err); gt_ensure(rval == 0); gt_genome_node_delete((GtGenomeNode*) rn); gt_ensure(gt_feature_index_has_seqid(sh.fi, &has_seqid, GT_FI_TEST_SEQID, err) == 0); gt_ensure(has_seqid); gt_feature_index_get_range_for_seqid(sh.fi, &check_range, GT_FI_TEST_SEQID, err); gt_ensure(check_range.start == GT_FI_TEST_START && check_range.end == GT_FI_TEST_END); /* set up nodes to store */ for (i=0;i<GT_FI_TEST_FEATURES_PER_THREAD*gt_jobs;i++) { GtUword start, end; GtFeatureNode *fn; start = random() % (GT_FI_TEST_END - GT_FI_TEST_FEATURE_WIDTH); end = start + random() % (GT_FI_TEST_FEATURE_WIDTH); fn = gt_feature_node_cast(gt_feature_node_new(seqid, "gene", start, end, GT_STRAND_FORWARD)); gt_array_add(sh.nodes, fn); } /* test parallel addition */ gt_multithread(gt_feature_index_unit_test_add, &sh, err); seqids = gt_feature_index_get_seqids(fi, err); gt_ensure(seqids); gt_ensure(gt_feature_index_has_seqid(fi, &has_seqid,GT_FI_TEST_SEQID, err) == 0); gt_ensure(has_seqid); gt_ensure(gt_str_array_size(seqids) == 1); /* test parallel query */ if (!had_err) gt_multithread(gt_feature_index_unit_test_query, &sh, err); gt_ensure(sh.error_count == 0); gt_mutex_delete(sh.mutex); gt_error_delete(sh.err); gt_str_array_delete(seqids); gt_array_delete(sh.nodes); gt_str_delete(seqid); return had_err; }
GtStrArray* agn_seq_intersection(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats, AgnLogger *logger) { // Fetch seqids from reference and prediction annotations GtError *e = gt_error_new(); GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for reference: %s", gt_error_get(e)); gt_error_unset(e); } GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e); if(gt_error_is_set(e)) { agn_logger_log_error(logger, "error fetching seqids for prediction: %s", gt_error_get(e)); gt_error_unset(e); } gt_error_delete(e); if(agn_logger_has_error(logger)) { gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return NULL; } GtStrArray *seqids = agn_gt_str_array_intersection(refrseqids, predseqids); // Print reference sequences with no prediction annotations GtUword i, j; for(i = 0; i < gt_str_array_size(refrseqids); i++) { const char *refrseq = gt_str_array_get(refrseqids, i); int matches = 0; for(j = 0; j < gt_str_array_size(seqids); j++) { const char *seq = gt_str_array_get(seqids, j); if(strcmp(refrseq, seq) == 0) matches++; } if(matches == 0) { agn_logger_log_warning(logger, "no prediction annotations found for " "sequence '%s'", refrseq); } } // Print prediction sequences with no reference annotations for(i = 0; i < gt_str_array_size(predseqids); i++) { const char *predseq = gt_str_array_get(predseqids, i); int matches = 0; for(j = 0; j < gt_str_array_size(seqids); j++) { const char *seq = gt_str_array_get(seqids, j); if(strcmp(predseq, seq) == 0) matches++; } if(matches == 0) { agn_logger_log_warning(logger, "no reference annotations found for " "sequence '%s'", predseq); } } if(gt_str_array_size(seqids) == 0) { agn_logger_log_error(logger, "no sequences in common between reference and " "prediction"); } gt_str_array_delete(refrseqids); gt_str_array_delete(predseqids); return seqids; }