Ejemplo n.º 1
0
static GtArray*
gaeval_visitor_intersect(GtGenomeNode *genemodel, GtGenomeNode *alignment)
{
  agn_assert(genemodel && alignment);

  GtFeatureNode *genefn = gt_feature_node_cast(genemodel);
  GtFeatureNode *algnfn = gt_feature_node_cast(alignment);
  agn_assert(gt_feature_node_has_type(genefn, "mRNA"));
  GtStrand genestrand = gt_feature_node_get_strand(genefn);
  GtStrand algnstrand = gt_feature_node_get_strand(algnfn);
  if(genestrand != algnstrand)
    return NULL;

  GtArray *covered_parts = gt_array_new( sizeof(GtRange) );
  GtArray *exons = agn_typecheck_select(genefn, agn_typecheck_exon);
  GtWord i;
  for(i = 0; i < gt_array_size(exons); i++)
  {
    GtGenomeNode *exon = *(GtGenomeNode **)gt_array_get(exons, i);
    GtRange exonrange = gt_genome_node_get_range(exon);

    GtFeatureNodeIterator *aniter = gt_feature_node_iterator_new(algnfn);
    GtFeatureNode *tempaln;
    GtRange nullrange = {0, 0};
    for(tempaln  = gt_feature_node_iterator_next(aniter);
        tempaln != NULL;
        tempaln  = gt_feature_node_iterator_next(aniter))
    {
      if(gt_feature_node_has_type(tempaln, "match_gap"))
        continue;

      GtRange alnrange = gt_genome_node_get_range((GtGenomeNode *) tempaln);
      GtRange intr = gaeval_visitor_range_intersect(&exonrange, &alnrange);
      if(gt_range_compare(&intr, &nullrange) != 0)
        gt_array_add(covered_parts, intr);
    }
    gt_feature_node_iterator_delete(aniter);
  }
  gt_array_delete(exons);

  for(i = 0; i < gt_array_size(covered_parts); i++)
  {
    GtRange *r1 = gt_array_get(covered_parts, i);
    GtUword j;
    for(j = i+1; j < gt_array_size(covered_parts); j++)
    {
      GtRange *r2 = gt_array_get(covered_parts, j);
      agn_assert(gt_range_overlap(r1, r2) == false);
    }
  }

  return covered_parts;
}
Ejemplo n.º 2
0
static void feature_node_change_seqid(GtGenomeNode *gn, GtStr *seqid)
{
  GtFeatureNode *fn = gt_feature_node_cast(gn);
  gt_assert(fn && seqid);
  gt_str_delete(fn->seqid);
  fn->seqid = gt_str_ref(seqid);
}
Ejemplo n.º 3
0
GtGenomeNode* gt_feature_node_new(GtStr *seqid, const char *type,
                                  GtUword start, GtUword end,
                                  GtStrand strand)
{
  GtGenomeNode *gn;
  GtFeatureNode *fn;
  gt_assert(seqid && type);
  gt_assert(start <= end);
  gn = gt_genome_node_create(gt_feature_node_class());
  fn = gt_feature_node_cast(gn);
  fn->seqid       = gt_str_ref(seqid);
  fn->source      = NULL;
  fn->type        = gt_symbol(type);
  fn->score       = GT_UNDEF_FLOAT;
  fn->range.start = start;
  fn->range.end   = end;
  fn->representative = NULL;
  fn->attributes  = NULL;
  fn->bit_field   = 0;
  fn->bit_field  |= strand << STRAND_OFFSET;
  fn->children    = NULL; /* the children list is create on demand */
  fn->observer    = NULL;
  gt_feature_node_set_phase(fn, GT_PHASE_UNDEFINED);
  set_transcriptfeaturetype(fn, TRANSCRIPT_FEATURE_TYPE_UNDETERMINED);
  set_tree_status(&fn->bit_field, IS_TREE);
  /* the DFS status is set to DFS_WHITE already */
  fn->representative = NULL;
  return gn;
}
static int feature_index_lua_add_feature_node(lua_State *L)
{
  GtFeatureIndex **fi;
  GtGenomeNode **gn;
  GtFeatureNode *fn;
  GtStr *seqid;
  GtError *err;
  bool has_seqid;
  gt_assert(L);
  fi = check_feature_index(L, 1);
  gn = check_genome_node(L, 2);
  fn = gt_feature_node_cast(*gn);
  luaL_argcheck(L, fn, 2, "not a feature node");
  seqid = gt_genome_node_get_seqid(*gn);
  luaL_argcheck(L, seqid, 2, "feature does not have a sequence id");
  err = gt_error_new();
  if (gt_feature_index_has_seqid(*fi, &has_seqid, gt_str_get(seqid), err))
    return gt_lua_error(L, err);
  gt_error_delete(err);
  luaL_argcheck(L, has_seqid, 2,
                "feature index does not contain corresponding sequence region");
  err = gt_error_new();
  if (gt_feature_index_add_feature_node(*fi, fn, err))
    return gt_lua_error(L, err);
  gt_error_delete(err);
  return 0;
}
Ejemplo n.º 5
0
int gt_diagram_unit_test(GtError *err)
{
  int had_err = 0;
  GtGenomeNode *gn;
  GtDiagramTestShared sh;
  GtRange testrng = {100, 10000};
  gt_error_check(err);

  gn = gt_feature_node_new_standard_gene();
  sh.fi = gt_feature_index_memory_new();
  sh.sty = gt_style_new(err);
  sh.err = err;
  sh.errstatus = 0;
  gt_feature_index_add_feature_node(sh.fi, gt_feature_node_cast(gn), err);
  gt_genome_node_delete(gn);
  sh.d = gt_diagram_new(sh.fi, "ctg123", &testrng, sh.sty, err);

  /* removed the multithreading test for now until it is fixed */
  gt_diagram_unit_test_sketch_func(&sh);
  gt_ensure(sh.errstatus == 0);

  gt_style_delete(sh.sty);
  gt_diagram_delete(sh.d);
  gt_feature_index_delete(sh.fi);

  return had_err;
}
Ejemplo n.º 6
0
static void feature_node_set_range(GtGenomeNode *gn, const GtRange *range)
{
  GtFeatureNode *fn = gt_feature_node_cast(gn);
  fn->range = *range;
  if (fn->observer && fn->observer->range_changed)
    fn->observer->range_changed(fn, &(fn->range), fn->observer->data);
}
Ejemplo n.º 7
0
static GtFeatureIndex *in_stream_test_data(GtError *error)
{
  GtFeatureIndex *fi;
  GtFeatureNode *fn;
  GtGenomeNode *gn;
  GtRegionNode *rn;
  GtStr *seqid;

  fi = gt_feature_index_memory_new();

  seqid = gt_str_new_cstr("chr1");
  gn = gt_region_node_new(seqid, 1, 100000);
  rn = gt_region_node_cast(gn);
  gt_feature_index_add_region_node(fi, rn, error);
  gt_genome_node_delete(gn);

  gn = gt_feature_node_new(seqid, "region", 500, 5000, GT_STRAND_BOTH);
  fn = gt_feature_node_cast(gn);
  gt_feature_index_add_feature_node(fi, fn, error);
  gt_genome_node_delete(gn);

  gn = gt_feature_node_new(seqid, "region", 50000, 75000, GT_STRAND_BOTH);
  fn = gt_feature_node_cast(gn);
  gt_feature_index_add_feature_node(fi, fn, error);
  gt_genome_node_delete(gn);

  gt_str_delete(seqid);
  seqid = gt_str_new_cstr("scf0001");
  gn = gt_region_node_new(seqid, 1, 10000);
  rn = gt_region_node_cast(gn);
  gt_feature_index_add_region_node(fi, rn, error);
  gt_genome_node_delete(gn);

  gn = gt_feature_node_new(seqid, "mRNA", 4000, 6000, GT_STRAND_REVERSE);
  fn = gt_feature_node_cast(gn);
  gt_feature_index_add_feature_node(fi, fn, error);
  gt_genome_node_delete(gn);

  gn = gt_feature_node_new(seqid, "mRNA", 7000, 9500, GT_STRAND_FORWARD);
  fn = gt_feature_node_cast(gn);
  gt_feature_index_add_feature_node(fi, fn, error);
  gt_genome_node_delete(gn);

  gt_str_delete(seqid);

  return fi;
}
Ejemplo n.º 8
0
static int feature_node_accept(GtGenomeNode *gn, GtNodeVisitor *nv,
                               GtError *err)
{
  GtFeatureNode *fn;
  gt_error_check(err);
  fn = gt_feature_node_cast(gn);
  return gt_node_visitor_visit_feature_node(nv, fn, err);
}
Ejemplo n.º 9
0
static int genome_node_lua_contains_marked(lua_State *L)
{
  GtGenomeNode **gn;
  gn = check_genome_node(L, 1);
  lua_pushboolean(L,
                  gt_feature_node_contains_marked(gt_feature_node_cast(*gn)));
  return 1;
}
Ejemplo n.º 10
0
GtGenomeNode* gt_feature_node_new_pseudo(GtStr *seqid, GtUword start,
                                         GtUword end, GtStrand strand)
{
  GtFeatureNode *pf;
  GtGenomeNode *pn;
  gt_assert(seqid);
  gt_assert(start <= end);
  pn = gt_feature_node_new(seqid, "pseudo", start, end, strand);
  pf = gt_feature_node_cast(pn);
  pf->type = NULL; /* pseudo features do not have a type */
  pf->bit_field |= 1 << PSEUDO_FEATURE_OFFSET;
  return pn;
}
Ejemplo n.º 11
0
GtGenomeNode* gt_feature_node_new_pseudo_template(GtFeatureNode *fn)
{
  GtFeatureNode *pf;
  GtGenomeNode *pn;
  GtRange range;
  gt_assert(fn);
  range = feature_node_get_range((GtGenomeNode*) fn),
  pn = gt_feature_node_new_pseudo(feature_node_get_seqid((GtGenomeNode*) fn),
                                  range.start, range.end,
                                  gt_feature_node_get_strand(fn));
  pf = gt_feature_node_cast(pn);
  gt_feature_node_set_source(pf, fn->source);
  return pn;
}
Ejemplo n.º 12
0
static int extracttarget_from_node(GtGenomeNode *gn, GtStrArray *seqfiles,
                                   GtError *err)
{
  GtFeatureNodeIterator *fni;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(gn && seqfiles);
  if (gt_genome_node_cast(gt_feature_node_class(), gn)) {
    const char *target;
    GtFeatureNode *child;
    fni = gt_feature_node_iterator_new(gt_feature_node_cast(gn));
    while (!had_err && /* XXX remove cast */
           (child = (GtFeatureNode*) gt_feature_node_iterator_next(fni))) {
      if ((target = gt_feature_node_get_attribute(child, "Target")))
        had_err = extracttarget_from_seqfiles(target, seqfiles, err);
    }
    gt_feature_node_iterator_delete(fni);
  }
  return had_err;
}
Ejemplo n.º 13
0
static void feature_node_free(GtGenomeNode *gn)
{
  GtFeatureNode *fn = gt_feature_node_cast(gn);
  gt_str_delete(fn->seqid);
  gt_str_delete(fn->source);
  gt_tag_value_map_delete(fn->attributes);
  if (fn->children) {
    GtDlistelem *dlistelem;
    for (dlistelem = gt_dlist_first(fn->children);
         dlistelem != NULL;
         dlistelem = gt_dlistelem_next(dlistelem)) {
      gt_genome_node_delete(gt_dlistelem_get_data(dlistelem));
    }
  }
  gt_dlist_delete(fn->children);
  if (fn->observer && fn->observer->deleted)
    fn->observer->deleted(fn, fn->observer->data);
  if (fn->observer)
    gt_feature_node_observer_delete(fn->observer);
}
static int extract_join_feature(GtGenomeNode *gn, const char *type,
                                GtRegionMapping *region_mapping,
                                GtStr *sequence, bool *reverse_strand,
                                bool *first_child_of_type_seen, GtPhase *phase,
                                GtError *err)
{
  char *outsequence;
  GtFeatureNode *fn;
  GtRange range;
  int had_err = 0;

  gt_error_check(err);
  fn = gt_feature_node_cast(gn);
  gt_assert(fn);

  if (gt_feature_node_has_type(fn, type)) {
    if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) {
      *reverse_strand = true;
      *phase = gt_feature_node_get_phase(fn);
    } else {
      if (!(*first_child_of_type_seen)) {
        *first_child_of_type_seen = true;
        *phase = gt_feature_node_get_phase(fn);
      } else *phase = GT_PHASE_UNDEFINED;
    }
    range = gt_genome_node_get_range(gn);
    had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence,
                                             gt_genome_node_get_seqid(gn),
                                             range.start, range.end, err);
    if (!had_err) {
      gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range));
      gt_free(outsequence);
    }
  }
  return had_err;
}
Ejemplo n.º 15
0
/* to be called from implementing class! */
int gt_feature_index_unit_test(GtFeatureIndex *fi, GtError *err)
{
  int had_err = 0, i, rval;
  GtFeatureIndexTestShared sh;
  GtStrArray *seqids;
  GtStr *seqid;
  GtRange check_range;
  GtRegionNode *rn;
  bool has_seqid;
  gt_error_check(err);

  sh.mutex = gt_mutex_new();
  sh.nodes = gt_array_new(sizeof (GtFeatureNode*));
  sh.error_count = 0;
  sh.next_node_idx = 0;
  sh.fi = fi;
  sh.err = gt_error_new();

  /* create region */
  seqid = gt_str_new_cstr(GT_FI_TEST_SEQID);
  rn = (GtRegionNode*) gt_region_node_new(seqid, GT_FI_TEST_START,
                                          GT_FI_TEST_END);

  /* test seqid is not supposed to exist */
  gt_ensure(gt_feature_index_has_seqid(sh.fi, &has_seqid,
                                                 GT_FI_TEST_SEQID, err) == 0);
  gt_ensure(!has_seqid);

  /* add a sequence region directly and check if it has been added */
  rval = gt_feature_index_add_region_node(sh.fi, rn, err);
  gt_ensure(rval == 0);
  gt_genome_node_delete((GtGenomeNode*) rn);
  gt_ensure(gt_feature_index_has_seqid(sh.fi, &has_seqid,
                                                GT_FI_TEST_SEQID, err) == 0);
  gt_ensure(has_seqid);

  gt_feature_index_get_range_for_seqid(sh.fi, &check_range, GT_FI_TEST_SEQID,
                                       err);
  gt_ensure(check_range.start == GT_FI_TEST_START
                    && check_range.end == GT_FI_TEST_END);

  /* set up nodes to store */
  for (i=0;i<GT_FI_TEST_FEATURES_PER_THREAD*gt_jobs;i++) {
    GtUword start, end;
    GtFeatureNode *fn;
    start = random() % (GT_FI_TEST_END - GT_FI_TEST_FEATURE_WIDTH);
    end = start + random() % (GT_FI_TEST_FEATURE_WIDTH);
    fn = gt_feature_node_cast(gt_feature_node_new(seqid, "gene", start, end,
                                                  GT_STRAND_FORWARD));
    gt_array_add(sh.nodes, fn);
  }
  /* test parallel addition */
  gt_multithread(gt_feature_index_unit_test_add, &sh, err);
  seqids = gt_feature_index_get_seqids(fi, err);
  gt_ensure(seqids);
  gt_ensure(gt_feature_index_has_seqid(fi, &has_seqid,GT_FI_TEST_SEQID,
                                                err) == 0);
  gt_ensure(has_seqid);
  gt_ensure(gt_str_array_size(seqids) == 1);

  /* test parallel query */
  if (!had_err)
    gt_multithread(gt_feature_index_unit_test_query, &sh, err);
  gt_ensure(sh.error_count == 0);

  gt_mutex_delete(sh.mutex);
  gt_error_delete(sh.err);
  gt_str_array_delete(seqids);
  gt_array_delete(sh.nodes);
  gt_str_delete(seqid);
  return had_err;
}
Ejemplo n.º 16
0
static int genome_node_lua_mark(lua_State *L)
{
  GtGenomeNode **gn = check_genome_node(L, 1);
  gt_feature_node_mark(gt_feature_node_cast(*gn));
  return 0;
}
static int gt_extract_feature_sequence_generic(GtStr *sequence,
                                GtGenomeNode *gn,
                                const char *type, bool join, GtStr *seqid,
                                GtStrArray *target_ids,
                                unsigned int *out_phase_offset,
                                GtRegionMapping *region_mapping, GtError *err)
{
  GtFeatureNode *fn;
  GtRange range;
  unsigned int phase_offset = 0;
  char *outsequence;
  const char *target;
  int had_err = 0;

  gt_error_check(err);
  fn = gt_genome_node_cast(gt_feature_node_class(), gn);
  gt_assert(fn);

  if (seqid)
    gt_str_append_str(seqid, gt_genome_node_get_seqid(gn));
  if (target_ids &&
      (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) {
    had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                         target_ids, NULL,
                                                         NULL, "", 0, err);
  }
  if (!had_err) {
    if (join) {
      GtFeatureNodeIterator *fni;
      GtFeatureNode *child;
      bool reverse_strand = false,
           first_child = true,
           first_child_of_type_seen = false;
      GtPhase phase = GT_PHASE_UNDEFINED;
      /* in this case we have to traverse the children */
      fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn));
      while (!had_err && (child = gt_feature_node_iterator_next(fni))) {
        if (first_child) {
          if (target_ids &&
               (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) {
            gt_str_array_reset(target_ids);
            had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                                 target_ids,
                                                                 NULL,
                                                                 NULL, "", 0,
                                                                 err);
          }
          first_child = false;
        }
        if (!had_err) {
          if (extract_join_feature((GtGenomeNode*) child, type, region_mapping,
                                   sequence, &reverse_strand,
                                   &first_child_of_type_seen,
                                   &phase, err)) {
            had_err = -1;
          }
          if (phase != GT_PHASE_UNDEFINED) {
            phase_offset = (int) phase;
          }
        }
      }
      gt_feature_node_iterator_delete(fni);
      gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED);
      if (!had_err && gt_str_length(sequence)) {
        if (reverse_strand) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
    else if (gt_feature_node_get_type(fn) == type) {
      GtPhase phase = gt_feature_node_get_phase(fn);
      gt_assert(!had_err);
      if (phase != GT_PHASE_UNDEFINED)
        phase_offset = (unsigned int) phase;
      /* otherwise we only have to look at this feature */
      range = gt_genome_node_get_range(gn);
      gt_assert(range.start); /* 1-based coordinates */
      had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence,
                                               gt_genome_node_get_seqid(gn),
                                               range.start, range.end, err);
      if (!had_err) {
        gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range));
        gt_free(outsequence);
        if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
  }
  if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) {
    *out_phase_offset = phase_offset;
  }
  return had_err;
}
Ejemplo n.º 18
0
static GtStr* feature_node_get_seqid(GtGenomeNode *gn)
{
  GtFeatureNode *fn = gt_feature_node_cast(gn);
  return fn->seqid;
}
Ejemplo n.º 19
0
static GtRange feature_node_get_range(GtGenomeNode *gn)
{
  GtFeatureNode *fn = gt_feature_node_cast(gn);
  return fn->range;
}