コード例 #1
0
ファイル: AgnUtils.c プロジェクト: jfdenton/AEGeAn
GtStrArray* agn_seq_union(GtFeatureIndex *refrfeats, GtFeatureIndex *predfeats,
                          AgnLogger *logger)
{
  // Fetch seqids from reference and prediction annotations
  GtError *e = gt_error_new();
  GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for reference: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for prediction: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  gt_error_delete(e);
  if(agn_logger_has_error(logger))
  {
    gt_str_array_delete(refrseqids);
    gt_str_array_delete(predseqids);
    return NULL;
  }
  GtStrArray *seqids = agn_gt_str_array_union(refrseqids, predseqids);

  gt_str_array_delete(refrseqids);
  gt_str_array_delete(predseqids);
  return seqids;
}
コード例 #2
0
static int feature_index_lua_get_seqids(lua_State *L)
{
  GtFeatureIndex **feature_index;
  GtStrArray *seqids;
  feature_index = check_feature_index(L, 1);
  seqids = gt_feature_index_get_seqids(*feature_index);
  gt_assert(seqids);
  /* push table containing sequence ids onto the stack */
  gt_lua_push_strarray_as_table(L, seqids);
  gt_str_array_delete(seqids);
  return 1;
}
コード例 #3
0
static int feature_index_lua_get_seqids(lua_State *L)
{
  GtFeatureIndex **feature_index;
  GtStrArray *seqids;
  GtError *err;
  feature_index = check_feature_index(L, 1);
  err = gt_error_new();
  seqids = gt_feature_index_get_seqids(*feature_index, err);
  if (!seqids)
    return gt_lua_error(L, err);
  gt_error_delete(err);
  /* push table containing sequence ids onto the stack */
  gt_lua_push_strarray_as_table(L, seqids);
  gt_str_array_delete(seqids);
  return 1;
}
コード例 #4
0
void feature_in_stream_init(GtFeatureInStream *stream)
{
  GtUword i;
  GtError *error = gt_error_new();

  stream->seqids = gt_feature_index_get_seqids(stream->fi, error);
  stream->seqindex = 0;
  for (i = 0; i < gt_str_array_size(stream->seqids); i++)
  {
    const char *seqid = gt_str_array_get(stream->seqids, i);
    GtRange seqrange;
    if (stream->useorig)
      gt_feature_index_get_orig_range_for_seqid(stream->fi, &seqrange, seqid,
                                                error);
    else
      gt_feature_index_get_range_for_seqid(stream->fi, &seqrange, seqid, error);
    GtStr *seqstr = gt_str_new_cstr(seqid);
    GtGenomeNode *rn = gt_region_node_new(seqstr, seqrange.start, seqrange.end);
    gt_queue_add(stream->regioncache, rn);
    gt_str_delete(seqstr);
  }
  gt_error_delete(error);
}
コード例 #5
0
int gt_feature_in_stream_unit_test(GtError *error)
{
  GtNodeStream *src, *dest;
  GtFeatureIndex *prefeat, *postfeat;
  GtRange range1, range1test, range2, range2test;

  prefeat = in_stream_test_data(error);
  postfeat = gt_feature_index_memory_new();
  src = gt_feature_in_stream_new(prefeat);
  dest = gt_feature_out_stream_new(src, postfeat);
  int result = gt_node_stream_pull(dest, error);
  if (result == -1)
    return -1;

  GtStrArray *seqids = gt_feature_index_get_seqids(postfeat, error);
  if (gt_str_array_size(seqids) != 2)
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: expected 2 "
                 "seqids, found "GT_WU"", gt_str_array_size(seqids));
    return -1;
  }
  gt_str_array_delete(seqids);

  range1test.start = 500;  range1test.end = 75000;
  range2test.start = 4000; range2test.end = 9500;
  gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_delete(prefeat);
  gt_feature_index_delete(postfeat);
  gt_node_stream_delete(src);
  gt_node_stream_delete(dest);

  prefeat = in_stream_test_data(error);
  postfeat = gt_feature_index_memory_new();
  src = gt_feature_in_stream_new(prefeat);
  dest = gt_feature_out_stream_new(src, postfeat);
  gt_feature_in_stream_use_orig_ranges((GtFeatureInStream *)src);
  result = gt_node_stream_pull(dest, error);
  if (result == -1)
    return -1;

  range1test.start = 500;  range1test.end = 75000;
  range2test.start = 4000; range2test.end = 9500;
  gt_feature_index_get_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  range1test.start = 1; range1test.end = 100000;
  range2test.start = 1; range2test.end = 10000;
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range1, "chr1", error);
  gt_feature_index_get_orig_range_for_seqid(postfeat, &range2, "scf0001",error);
  if (gt_range_compare(&range1, &range1test) ||
      gt_range_compare(&range2, &range2test))
  {
    gt_error_set(error, "error in feature_in_stream unit test 1: incorrect "
                 "sequence regions");
    return -1;
  }
  gt_feature_index_delete(prefeat);
  gt_feature_index_delete(postfeat);
  gt_node_stream_delete(src);
  gt_node_stream_delete(dest);

  return 0;
}
コード例 #6
0
/* to be called from implementing class! */
int gt_feature_index_unit_test(GtFeatureIndex *fi, GtError *err)
{
  int had_err = 0, i, rval;
  GtFeatureIndexTestShared sh;
  GtStrArray *seqids;
  GtStr *seqid;
  GtRange check_range;
  GtRegionNode *rn;
  bool has_seqid;
  gt_error_check(err);

  sh.mutex = gt_mutex_new();
  sh.nodes = gt_array_new(sizeof (GtFeatureNode*));
  sh.error_count = 0;
  sh.next_node_idx = 0;
  sh.fi = fi;
  sh.err = gt_error_new();

  /* create region */
  seqid = gt_str_new_cstr(GT_FI_TEST_SEQID);
  rn = (GtRegionNode*) gt_region_node_new(seqid, GT_FI_TEST_START,
                                          GT_FI_TEST_END);

  /* test seqid is not supposed to exist */
  gt_ensure(gt_feature_index_has_seqid(sh.fi, &has_seqid,
                                                 GT_FI_TEST_SEQID, err) == 0);
  gt_ensure(!has_seqid);

  /* add a sequence region directly and check if it has been added */
  rval = gt_feature_index_add_region_node(sh.fi, rn, err);
  gt_ensure(rval == 0);
  gt_genome_node_delete((GtGenomeNode*) rn);
  gt_ensure(gt_feature_index_has_seqid(sh.fi, &has_seqid,
                                                GT_FI_TEST_SEQID, err) == 0);
  gt_ensure(has_seqid);

  gt_feature_index_get_range_for_seqid(sh.fi, &check_range, GT_FI_TEST_SEQID,
                                       err);
  gt_ensure(check_range.start == GT_FI_TEST_START
                    && check_range.end == GT_FI_TEST_END);

  /* set up nodes to store */
  for (i=0;i<GT_FI_TEST_FEATURES_PER_THREAD*gt_jobs;i++) {
    GtUword start, end;
    GtFeatureNode *fn;
    start = random() % (GT_FI_TEST_END - GT_FI_TEST_FEATURE_WIDTH);
    end = start + random() % (GT_FI_TEST_FEATURE_WIDTH);
    fn = gt_feature_node_cast(gt_feature_node_new(seqid, "gene", start, end,
                                                  GT_STRAND_FORWARD));
    gt_array_add(sh.nodes, fn);
  }
  /* test parallel addition */
  gt_multithread(gt_feature_index_unit_test_add, &sh, err);
  seqids = gt_feature_index_get_seqids(fi, err);
  gt_ensure(seqids);
  gt_ensure(gt_feature_index_has_seqid(fi, &has_seqid,GT_FI_TEST_SEQID,
                                                err) == 0);
  gt_ensure(has_seqid);
  gt_ensure(gt_str_array_size(seqids) == 1);

  /* test parallel query */
  if (!had_err)
    gt_multithread(gt_feature_index_unit_test_query, &sh, err);
  gt_ensure(sh.error_count == 0);

  gt_mutex_delete(sh.mutex);
  gt_error_delete(sh.err);
  gt_str_array_delete(seqids);
  gt_array_delete(sh.nodes);
  gt_str_delete(seqid);
  return had_err;
}
コード例 #7
0
ファイル: AgnUtils.c プロジェクト: jfdenton/AEGeAn
GtStrArray* agn_seq_intersection(GtFeatureIndex *refrfeats,
                                 GtFeatureIndex *predfeats, AgnLogger *logger)
{
  // Fetch seqids from reference and prediction annotations
  GtError *e = gt_error_new();
  GtStrArray *refrseqids = gt_feature_index_get_seqids(refrfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for reference: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  GtStrArray *predseqids = gt_feature_index_get_seqids(predfeats, e);
  if(gt_error_is_set(e))
  {
    agn_logger_log_error(logger, "error fetching seqids for prediction: %s",
                         gt_error_get(e));
    gt_error_unset(e);
  }
  gt_error_delete(e);
  if(agn_logger_has_error(logger))
  {
    gt_str_array_delete(refrseqids);
    gt_str_array_delete(predseqids);
    return NULL;
  }
  GtStrArray *seqids = agn_gt_str_array_intersection(refrseqids, predseqids);

  // Print reference sequences with no prediction annotations
  GtUword i, j;
  for(i = 0; i < gt_str_array_size(refrseqids); i++)
  {
    const char *refrseq = gt_str_array_get(refrseqids, i);
    int matches = 0;
    for(j = 0; j < gt_str_array_size(seqids); j++)
    {
      const char *seq = gt_str_array_get(seqids, j);
      if(strcmp(refrseq, seq) == 0)
        matches++;
    }
    if(matches == 0)
    {
      agn_logger_log_warning(logger, "no prediction annotations found for "
                             "sequence '%s'", refrseq);
    }
  }

  // Print prediction sequences with no reference annotations
  for(i = 0; i < gt_str_array_size(predseqids); i++)
  {
    const char *predseq = gt_str_array_get(predseqids, i);
    int matches = 0;
    for(j = 0; j < gt_str_array_size(seqids); j++)
    {
      const char *seq = gt_str_array_get(seqids, j);
      if(strcmp(predseq, seq) == 0)
        matches++;
    }
    if(matches == 0)
    {
      agn_logger_log_warning(logger, "no reference annotations found for "
                             "sequence '%s'", predseq);
    }
  }

  if(gt_str_array_size(seqids) == 0)
  {
    agn_logger_log_error(logger, "no sequences in common between reference and "
                         "prediction");
  }

  gt_str_array_delete(refrseqids);
  gt_str_array_delete(predseqids);
  return seqids;
}