Ejemplo n.º 1
0
static void gv_test_introns_confirmed(AgnUnitTest *test)
{
  GtGenomeNode *intron, *gap;
  GtStr *seqid = gt_str_new_cstr("chr");
  GtArray *introns = gt_array_new( sizeof(GtGenomeNode *) );
  intron = gt_feature_node_new(seqid, "intron", 1000, 1170, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);
  intron = gt_feature_node_new(seqid, "intron", 1225, 1305, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);
  intron = gt_feature_node_new(seqid, "intron", 1950, 2110, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);
  intron = gt_feature_node_new(seqid, "intron", 2545, 2655, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);
  intron = gt_feature_node_new(seqid, "intron", 2800, 2950, GT_STRAND_REVERSE);
  gt_array_add(introns, intron);

  GtArray *gaps = gt_array_new( sizeof(GtGenomeNode *) );

  double intcon = gaeval_visitor_introns_confirmed(introns, gaps);
  bool test1 = fabs(intcon - 0.0) < 0.0001;
  agn_unit_test_result(test, "introns confirmed (no gaps)", test1);

  gap = gt_feature_node_new(seqid, "match_gap", 1000, 1170, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);
  gap = gt_feature_node_new(seqid, "match_gap", 1225, 1302, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);
  gap = gt_feature_node_new(seqid, "match_gap", 1950, 2110, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);
  gap = gt_feature_node_new(seqid, "match_gap", 2575, 2655, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);
  gap = gt_feature_node_new(seqid, "match_gap", 2800, 2950, GT_STRAND_REVERSE);
  gt_array_add(gaps, gap);

  intcon = gaeval_visitor_introns_confirmed(introns, gaps);
  bool test2 = fabs(intcon - 0.6) < 0.0001;
  agn_unit_test_result(test, "introns confirmed (gaps)", test2);

  while(gt_array_size(introns) > 0)
  {
    intron = *(GtGenomeNode **)gt_array_pop(introns);
    gt_genome_node_delete(intron);
  }
  gt_array_delete(introns);

  while(gt_array_size(gaps) > 0)
  {
    gap = *(GtGenomeNode **)gt_array_pop(gaps);
    gt_genome_node_delete(gap);
  }
  gt_array_delete(gaps);
  gt_str_delete(seqid);
}
Ejemplo n.º 2
0
static void create_transitive_part_of_edges(GtTypeNode *node,
                                            GtBoolMatrix *part_of_out_edges,
                                            GtBoolMatrix *part_of_in_edges,
                                            GtArray *node_stack)
{
  unsigned long i, j;
  if (gt_array_size(node_stack)) {
    for (i  = gt_bool_matrix_get_first_column(part_of_in_edges, node->num);
         i != gt_bool_matrix_get_last_column(part_of_in_edges, node->num);
         i  = gt_bool_matrix_get_next_column(part_of_in_edges, node->num, i)) {
      for (j = 0; j < gt_array_size(node_stack); j++) {
        GtTypeNode *child = *(GtTypeNode**) gt_array_get(node_stack, j);
        gt_bool_matrix_set(part_of_out_edges, i, child->num, true);
        gt_bool_matrix_set(part_of_in_edges, child->num, i, true);
      }
    }
  }
  gt_array_add(node_stack, node);
  for (i = 0; i < gt_array_size(node->is_a_out_edges); i++) {
    GtTypeNode *parent = *(GtTypeNode**) gt_array_get(node->is_a_out_edges, i);
    create_transitive_part_of_edges(parent, part_of_out_edges, part_of_in_edges,
                                    node_stack);
  }
  gt_array_pop(node_stack);
}
Ejemplo n.º 3
0
static void infer_cds_visitor_test_data(GtQueue *queue)
{
  GtError *error = gt_error_new();
  const char *file = "data/gff3/grape-codons.gff3";
  GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &file);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream *)gff3in);
  gt_gff3_in_stream_enable_tidy_mode((GtGFF3InStream *)gff3in);
  GtLogger *logger = gt_logger_new(true, "", stderr);
  GtNodeStream *icv_stream = agn_infer_cds_stream_new(gff3in, NULL, logger);
  GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) );
  GtNodeStream *arraystream = gt_array_out_stream_new(icv_stream, feats, error);
  int pullresult = gt_node_stream_pull(arraystream, error);
  if(pullresult == -1)
  {
    fprintf(stderr, "[AgnInferCDSVisitor::infer_cds_visitor_test_data] error "
            "processing features: %s\n", gt_error_get(error));
  }
  gt_node_stream_delete(gff3in);
  gt_node_stream_delete(icv_stream);
  gt_node_stream_delete(arraystream);
  gt_logger_delete(logger);
  gt_array_sort(feats, (GtCompare)agn_genome_node_compare);
  gt_array_reverse(feats);
  while(gt_array_size(feats) > 0)
  {
    GtFeatureNode *fn = *(GtFeatureNode **)gt_array_pop(feats);
    gt_queue_add(queue, fn);
  }
  gt_array_delete(feats);
  gt_error_delete(error);
}
Ejemplo n.º 4
0
void agn_unit_test_delete(AgnUnitTest *test)
{
    gt_free(test->label);
    while(gt_array_size(test->results) > 0)
    {
        UnitTestResult *result = gt_array_pop(test->results);
        gt_free(result->label);
    }
    gt_array_delete(test->results);
    gt_free(test);
}
Ejemplo n.º 5
0
GtFeatureNode* gt_feature_node_iterator_next(GtFeatureNodeIterator *fni)
{
  GtFeatureNode *fn;
  gt_assert(fni);
  if (!gt_array_size(fni->feature_stack))
    return NULL;
  /* pop */
  fn = *(GtFeatureNode**) gt_array_pop(fni->feature_stack);
  /* push children on stack */
  if (!fni->direct && fn->children)
    add_children_to_stack(fni->feature_stack, fn);
  return fn;
}
Ejemplo n.º 6
0
static int feature_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                   GtError *error)
{
  GtFeatureInStream *stream = feature_in_stream_cast(ns);
  gt_error_check(error);

  if (!stream->init)
  {
    feature_in_stream_init(stream);
    stream->init = true;
  }

  if (gt_queue_size(stream->regioncache) > 0)
  {
    GtGenomeNode *region = gt_queue_get(stream->regioncache);
    *gn = region;
    return 0;
  }

  if (stream->featurecache == NULL || gt_array_size(stream->featurecache) == 0)
  {
    if (stream->featurecache != NULL)
    {
      gt_array_delete(stream->featurecache);
      stream->featurecache = NULL;
    }

    if (stream->seqindex == gt_str_array_size(stream->seqids))
    {
      *gn = NULL;
      return 0;
    }

    const char *seqid = gt_str_array_get(stream->seqids, stream->seqindex++);
    stream->featurecache = gt_feature_index_get_features_for_seqid(stream->fi,
                                                                   seqid,
                                                                   error);
    gt_array_sort(stream->featurecache, (GtCompare)gt_genome_node_compare);
    gt_array_reverse(stream->featurecache);
  }

  GtGenomeNode *feat = *(GtGenomeNode **)gt_array_pop(stream->featurecache);
  *gn = gt_genome_node_ref(feat);
  return 0;
}
Ejemplo n.º 7
0
bool gt_tool_iterator_next(GtToolIterator *tool_iterator, const char **name,
                           GtTool **tool)
{
  ToolIterationInfo tii;
  gt_assert(tool_iterator && name && tool);
  if (gt_array_size(tool_iterator->tool_stack)) {
    ToolEntry *entry = gt_array_pop(tool_iterator->tool_stack);
    *name = entry->name;
    *tool = entry->tool;
    if (tool_iterator->prefixptr) {
      gt_str_reset(tool_iterator->prefixptr);
      if (entry->prefix) {
        gt_str_append_str(tool_iterator->prefixptr, entry->prefix);
        gt_str_append_char(tool_iterator->prefixptr, tool_iterator->prefixsep);
      }
    }
    if (gt_tool_is_toolbox(entry->tool)) {
      GtToolbox *toolbox;
      GtArray *toollist;
      GtStr *myprefix;
      myprefix =
                gt_str_new_cstr(entry->prefix ? gt_str_get(entry->prefix) : "");
      gt_str_append_cstr(myprefix, entry->name);
      toolbox = gt_tool_get_toolbox(entry->tool);
      toollist = gt_array_new(sizeof (ToolEntry));
      tii.arr = toollist;
      tii.str = myprefix;
      gt_toolbox_iterate(toolbox, add_tool_to_stack, &tii);
      if (gt_array_size(toollist)) {
        gt_array_reverse(toollist); /* alphabetical order */
        gt_array_add_array(tool_iterator->tool_stack, toollist);
      }
      gt_array_delete(toollist);
      gt_str_delete(myprefix);
    } else
      gt_str_delete(entry->prefix);
    return true;
  }
  else
    return false;
}
Ejemplo n.º 8
0
static void gv_test_intersect(AgnUnitTest *test)
{
  GtArray *feats = gt_array_new( sizeof(GtFeatureNode *) );
  GtError *error = gt_error_new();
  const char *filename = "data/gff3/gaeval-stream-unit-test-1.gff3";
  GtNodeStream *gff3in = gt_gff3_in_stream_new_unsorted(1, &filename);
  GtNodeStream *fstream = gt_array_out_stream_new(gff3in, feats, error);
  int result = gt_node_stream_pull(fstream, error);
  if(result == -1)
  {
    fprintf(stderr, "[AgnGaevalVisitor::gv_test_intersect] error "
            "processing GFF3: %s\n", gt_error_get(error));
    return;
  }
  gt_error_delete(error);
  gt_node_stream_delete(gff3in);
  gt_node_stream_delete(fstream);

  agn_assert(gt_array_size(feats) == 9);
  GtGenomeNode *g1 = *(GtGenomeNode **)gt_array_get(feats, 1);
  GtGenomeNode *g2 = *(GtGenomeNode **)gt_array_get(feats, 3);
  GtGenomeNode *g3 = *(GtGenomeNode **)gt_array_get(feats, 7);

  GtGenomeNode *est1 = *(GtGenomeNode **)gt_array_get(feats, 0);
  GtGenomeNode *est2 = *(GtGenomeNode **)gt_array_get(feats, 2);
  GtGenomeNode *est3 = *(GtGenomeNode **)gt_array_get(feats, 4);
  GtGenomeNode *est4 = *(GtGenomeNode **)gt_array_get(feats, 5);
  GtGenomeNode *est5 = *(GtGenomeNode **)gt_array_get(feats, 6);
  GtGenomeNode *est6 = *(GtGenomeNode **)gt_array_get(feats, 8);

  GtArray *cov = gaeval_visitor_intersect(g1, est1);
  bool test1 = cov == NULL;
  cov = gaeval_visitor_intersect(g1, est2);
  test1 = gt_array_size(cov) == 1;
  if(test1)
  {
    GtRange *range01 = gt_array_pop(cov);
    GtRange testrange = { 400, 500 };
    test1 = gt_range_compare(range01, &testrange) == 0;
  }
  agn_unit_test_result(test, "intersect (1)", test1);
  gt_array_delete(cov);

  cov = gaeval_visitor_intersect(g2, est3);
  bool test2 = gt_array_size(cov) == 2;
  if(test2)
  {
    GtRange *range01 = gt_array_get(cov, 0);
    GtRange *range02 = gt_array_get(cov, 1);
    GtRange testrange1 = { 800, 900 };
    GtRange testrange2 = { 1050, 1075 };
    test2 = gt_range_compare(range01, &testrange1) == 0 &&
            gt_range_compare(range02, &testrange2) == 0;
  }
  agn_unit_test_result(test, "intersect (2)", test2);
  gt_array_delete(cov);

  cov = gaeval_visitor_intersect(g2, est4);
  bool test3 = gt_array_size(cov) == 2;
  if(test3)
  {
    GtRange *range01 = gt_array_get(cov, 0);
    GtRange *range02 = gt_array_get(cov, 1);
    GtRange testrange1 = { 1070, 1125 };
    GtRange testrange2 = { 1250, 1310 };
    test3 = gt_range_compare(range01, &testrange1) == 0 &&
            gt_range_compare(range02, &testrange2) == 0;
  }
  agn_unit_test_result(test, "intersect (3)", test3);
  gt_array_delete(cov);

  cov = gaeval_visitor_intersect(g3, est5);
  bool test4 = gt_array_size(cov) == 2;
  if(test4)
  {
    GtRange *range01 = gt_array_get(cov, 0);
    GtRange *range02 = gt_array_get(cov, 1);
    GtRange testrange1 = { 2000, 3000 };
    GtRange testrange2 = { 4000, 5000 };
    test4 = gt_range_compare(range01, &testrange1) == 0 &&
            gt_range_compare(range02, &testrange2) == 0;
  }
  agn_unit_test_result(test, "intersect (4)", test4);
  gt_array_delete(cov);

  cov = gaeval_visitor_intersect(g3, est6);
  bool test5 = gt_array_size(cov) == 2;
  if(test5)
  {
    GtRange *range01 = gt_array_get(cov, 0);
    GtRange *range02 = gt_array_get(cov, 1);
    GtRange testrange1 = { 2500, 3000 };
    GtRange testrange2 = { 4000, 5000 };
    test5 = gt_range_compare(range01, &testrange1) == 0 &&
            gt_range_compare(range02, &testrange2) == 0;
  }
  agn_unit_test_result(test, "intersect (5)", test5);
  gt_array_delete(cov);

  gt_array_delete(feats);
  gt_genome_node_delete(g1);
  gt_genome_node_delete(g2);
  gt_genome_node_delete(g3);
  gt_genome_node_delete(est1);
  gt_genome_node_delete(est2);
  gt_genome_node_delete(est3);
  gt_genome_node_delete(est4);
  gt_genome_node_delete(est5);
  gt_genome_node_delete(est6);
}
Ejemplo n.º 9
0
static double gaeval_visitor_calculate_integrity(AgnGaevalVisitor *v,
                                                 GtFeatureNode *genemodel,
                                                 double coverage,
                                                 double *components,
                                                 GtError *error)
{
  agn_assert(v && genemodel);

  GtStr *seqid = gt_genome_node_get_seqid((GtGenomeNode *)genemodel);
  GtRange mrna_range = gt_genome_node_get_range((GtGenomeNode *)genemodel);
  GtArray *overlapping = gt_array_new( sizeof(GtFeatureNode *) );
  bool hasseqid;
  gt_feature_index_has_seqid(v->alignments, &hasseqid, gt_str_get(seqid),error);
  if(hasseqid)
  {
    gt_feature_index_get_features_for_range(v->alignments, overlapping,
                                            gt_str_get(seqid), &mrna_range,
                                            error);
  }

  GtArray *gaps = gt_array_new( sizeof(GtFeatureNode *) );
  while(gt_array_size(overlapping) > 0)
  {
    GtFeatureNode *alignment = *(GtFeatureNode **)gt_array_pop(overlapping);
    GtArray *agaps = agn_typecheck_select(alignment,
                                          gaeval_visitor_typecheck_gap);
    gt_array_add_array(gaps, agaps);
    gt_array_delete(agaps);
  }
  gt_array_delete(overlapping);

  GtUword utr5p_len = agn_mrna_5putr_length(genemodel);
  double utr5p_score = 0.0;
  if(utr5p_len >= v->params.exp_5putr_len)
    utr5p_score = 1.0;
  else
    utr5p_score = (double)utr5p_len / (double)v->params.exp_5putr_len;

  GtUword utr3p_len = agn_mrna_3putr_length(genemodel);
  double utr3p_score = 0.0;
  if(utr3p_len >= v->params.exp_3putr_len)
    utr3p_score = 1.0;
  else
    utr3p_score = (double)utr3p_len / (double)v->params.exp_3putr_len;

  GtArray *introns = agn_typecheck_select(genemodel, agn_typecheck_intron);
  GtUword exoncount = agn_typecheck_count(genemodel, agn_typecheck_exon);
  agn_assert(gt_array_size(introns) == exoncount - 1);
  double structure_score = 0.0;
  if(gt_array_size(introns) == 0)
  {
    GtUword cdslen = agn_mrna_cds_length(genemodel);
    if(cdslen >= v->params.exp_cds_len)
      structure_score = 1.0;
    else
      structure_score = (double)cdslen / (double)v->params.exp_cds_len;
  }
  else
  {
    structure_score = gaeval_visitor_introns_confirmed(introns, gaps);
  }
  gt_array_delete(gaps);
  gt_array_delete(introns);

  double integrity = (v->params.alpha   * structure_score) +
                     (v->params.beta    * coverage)        +
                     (v->params.gamma   * utr5p_score)     +
                     (v->params.epsilon * utr3p_score);
  if(components != NULL)
  {
    components[0] = structure_score;
    components[1] = coverage;
    components[2] = utr5p_score;
    components[3] = utr3p_score;
  }

  return integrity;
}