static int check_boundaries_visitor_check_rec(GtFeatureNode *parent,
                                              GtFeatureNode *child,
                                              GtError *err)
{
  GtFeatureNodeIterator *fni;
  GtFeatureNode *node;
  GtRange range,
          p_range;
  int had_err = 0;

  range = gt_genome_node_get_range((GtGenomeNode*) child);
  p_range = gt_genome_node_get_range((GtGenomeNode*) parent);

  if (range.start < p_range.start || range.end > p_range.end) {
    gt_warning("%s child range " GT_WU "-" GT_WU " (file %s, line %u) not "
               "contained in %s parent range " GT_WU "-" GT_WU " (file %s, "
               "line %u)",
               gt_feature_node_get_type(child),
               range.start, range.end,
               gt_genome_node_get_filename((GtGenomeNode*) child),
               gt_genome_node_get_line_number((GtGenomeNode*) child),
               gt_feature_node_get_type(parent),
               p_range.start, p_range.end,
               gt_genome_node_get_filename((GtGenomeNode*) parent),
               gt_genome_node_get_line_number((GtGenomeNode*) parent));
  }

  fni = gt_feature_node_iterator_new_direct(child);
  while ((node = gt_feature_node_iterator_next(fni))) {
    had_err = check_boundaries_visitor_check_rec(child, node, err);
  }
  gt_feature_node_iterator_delete(fni);

  return had_err;
}
Ejemplo n.º 2
0
static int add_to_parent(GtDiagram *d, GtFeatureNode *node,
                         GtFeatureNode *parent, GtError *err)
{
  GtBlock *block = NULL;
  NodeInfoElement *par_ni, *ni;
  gt_assert(d && node);
  if (!parent)
    return 0;
  par_ni = nodeinfo_get(d, parent);
  ni = nodeinfo_get(d, node);
  gt_log_log("adding %s to parent %p", gt_feature_node_get_type(node), parent);
  ni->parent = parent;
  block = nodeinfo_find_block(par_ni,
                              gt_feature_node_get_type(node),
                              parent);
  if (!block) {
    block = gt_block_new_from_node(parent);
    gt_block_set_type(block, gt_feature_node_get_type(node));
    if (assign_block_caption(d, node, parent, block, err) < 0) {
      gt_block_delete(block);
      return -1;
    }
    nodeinfo_add_block(par_ni,
                     gt_feature_node_get_type((GtFeatureNode*) node),
                     parent,
                     block);
  }
  gt_assert(block);
  gt_block_insert_element(block, node);
  return 0;
}
static int gt_ltr_input_check_visitor_feature_node(GtNodeVisitor *nv,
                                                   GtFeatureNode *fn,
                                                   GtError *err)
{
  GT_UNUSED GtLTRInputCheckVisitor *lv;
  GtFeatureNodeIterator *fni;
  bool seen_left = false;
  GtFeatureNode *curnode = NULL,
                *ltr_retrotrans = NULL,
                *lltr = NULL,
                *rltr = NULL;
  int had_err = 0;
  lv = gt_ltr_input_check_visitor_cast(nv);
  gt_assert(lv);
  gt_error_check(err);

  /* traverse annotation subgraph and find LTR components */
  fni = gt_feature_node_iterator_new(fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode),
               gt_ft_LTR_retrotransposon) == 0) {
      ltr_retrotrans = curnode;
    }
    if (strcmp(gt_feature_node_get_type(curnode),
               gt_ft_long_terminal_repeat) == 0) {
      if (seen_left)
        rltr = curnode;
      else {
        lltr = curnode;
        seen_left = true;
      }
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (lv->only_ltrs) {
    if (!had_err && !ltr_retrotrans) {
      gt_error_set(err, "connected component with %s entry node (%s, line %u) "
                        "does not contain a '%s' node, which is required",
                   gt_feature_node_get_type(fn),
                   gt_genome_node_get_filename((GtGenomeNode*) fn),
                   gt_genome_node_get_line_number((GtGenomeNode*) fn),
                   gt_ft_LTR_retrotransposon);
      had_err = -1;
    }
  }

  if (!had_err && ltr_retrotrans && (!lltr || !rltr)) {
    gt_error_set(err, "LTR_retrotransposon feature (%s, line %u) "
                      "does not contain two %s child features, both of which "
                      "are required",
                 gt_genome_node_get_filename((GtGenomeNode*) ltr_retrotrans),
                 gt_genome_node_get_line_number((GtGenomeNode*) ltr_retrotrans),
                 gt_ft_long_terminal_repeat);
    had_err = -1;
  }

  return had_err;
}
Ejemplo n.º 4
0
static int add_to_current(GtDiagram *d, GtFeatureNode *node,
                          GtFeatureNode *parent, GtError *err)
{
  GtBlock *block;
  NodeInfoElement *ni;
  GtStyleQueryStatus rval;
  GtStr *caption = NULL;
  bool status = true;
  const char *nnid_p = NULL,
             *nnid_n = NULL,
             *nodetype;
  gt_assert(d && node);
  nodetype = gt_feature_node_get_type(node);
  if (get_caption_display_status(d, nodetype, &status, err) < 0) {
    return -1;
  }
  /* Get nodeinfo element and set itself as parent */
  ni = nodeinfo_get(d, node);
  gt_log_log("adding %s to self", nodetype);
  ni->parent = node;
  /* create new GtBlock tuple and add to node info */
  block = gt_block_new_from_node(node);
  caption = gt_str_new();
  rval = gt_style_get_str(d->style,
                          nodetype, "block_caption",
                          caption, node, err);
  if (rval == GT_STYLE_QUERY_ERROR) {
    gt_str_delete(caption);
    gt_block_delete(block);
    return -1;
  } else if (rval == GT_STYLE_QUERY_NOT_SET) {
    nnid_p = get_node_name_or_id(parent);
    nnid_n = get_node_name_or_id(node);
    if ((nnid_p || nnid_n) && status)
    {
      if (parent) {
        if (nnid_p && gt_feature_node_has_children(parent))
          gt_str_append_cstr(caption, nnid_p);
        else
          gt_str_append_cstr(caption, "-");
        gt_str_append_cstr(caption, "/");
      }
      if (nnid_n)
        gt_str_append_cstr(caption, nnid_n);
    } else {
      gt_str_delete(caption);
      caption = NULL;
    }
  }
  gt_block_set_caption(block, caption);
  gt_block_insert_element(block, node);
  nodeinfo_add_block(ni, gt_feature_node_get_type(node), GT_UNDEF_REPR, block);
  return 0;
}
Ejemplo n.º 5
0
static int gt_orf_finder_visitor_feature_node(GtNodeVisitor *gv,
                                              GtFeatureNode *gf,
                                              GtError *err)
{
  GtORFFinderVisitor *lv;
  const char *gft = NULL;
  GtFeatureNodeIterator *gfi;
  GtFeatureNode *curnode = NULL;
  int had_err = 0;
  GtRange rng;

  lv = gt_orf_finder_visitor_cast(gv);
  gt_assert(lv);
  gt_error_check(err);

  gfi = gt_feature_node_iterator_new(gf);

  while (!had_err && (curnode = gt_feature_node_iterator_next(gfi))) {
    gft = gt_feature_node_get_type(curnode);

    if (gt_hashmap_get(lv->types, (void*) gft) != NULL ||
                       gt_hashmap_get(lv->types,
                                      (void*) "all") == (void*) 1) {
      if (!had_err) {
        rng = gt_genome_node_get_range((GtGenomeNode*) curnode);
        had_err = run_orffinder(lv->rmap, curnode, rng.start - 1, rng.end - 1,
                                lv->min, lv->max, lv->all, err);
        if (gt_hashmap_get(lv->types,
                           (void*) "all") == (void*) 1) {
          break;
        }
        else if (gt_feature_node_has_children(curnode)) {
          GtFeatureNode *tmpnode = NULL;
          GtFeatureNodeIterator *tmpgfi = gt_feature_node_iterator_new(curnode);
          (void) gt_feature_node_iterator_next(tmpgfi);
          while ((tmpnode = gt_feature_node_iterator_next(tmpgfi))) {
            gft = gt_feature_node_get_type(tmpnode);
            if (strcmp(gft, (const char*) GT_ORF_TYPE) == 0) {
              continue;
            }
            /* curnode = gt_feature_node_iterator_next(gfi); */
          }
          gt_feature_node_iterator_delete(tmpgfi);
        }
      }
    }
  }

  gt_feature_node_iterator_delete(gfi);

  return had_err;
}
GtNodeVisitor* gt_snp_annotator_visitor_new(GtFeatureNode *gene,
                                            GtTransTable *trans_table,
                                            GtRegionMapping *rmap,
                                            GtError *err)
{
  GtNodeVisitor *nv;
  GtSNPAnnotatorVisitor *sav;
  gt_assert(gene && gt_feature_node_get_type(gene) == gt_symbol(gt_ft_gene));
  nv = gt_node_visitor_create(gt_snp_annotator_visitor_class());
  sav = snp_annotator_visitor_cast(nv);
  sav->gene = (GtFeatureNode*) gt_genome_node_ref((GtGenomeNode*) gene);
  sav->rmap = gt_region_mapping_ref(rmap);
  sav->mRNA_type = gt_symbol(gt_ft_mRNA);
  sav->CDS_type = gt_symbol(gt_ft_CDS);
  sav->SNV_type = gt_symbol(gt_ft_SNV);
  sav->SNP_type = gt_symbol(gt_ft_SNP);
  sav->rnaseqs = gt_hashmap_new(GT_HASH_DIRECT, NULL, gt_free_func);
  if (trans_table) {
    sav->tt = trans_table;
    sav->own_tt = false;
  } else {
    sav->tt = gt_trans_table_new_standard(err);
    sav->own_tt = true;
  }
  if (!sav->tt || gt_snp_annotator_visitor_prepare_gene(sav, err) != 0) {
    gt_node_visitor_delete(nv);
    return NULL;
  }
  return nv;
}
Ejemplo n.º 7
0
void gt_gff3_output_leading_str(GtFeatureNode *fn, GtStr *outstr)
{
  GtGenomeNode *gn;
  gt_assert(fn && outstr);
  gn = (GtGenomeNode*) fn;
  gt_str_append_str(outstr, gt_genome_node_get_seqid(gn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_cstr(outstr, gt_feature_node_get_source(fn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_cstr(outstr, gt_feature_node_get_type(fn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_uword(outstr, gt_genome_node_get_start(gn));
  gt_str_append_char(outstr, '\t');
  gt_str_append_uword(outstr, gt_genome_node_get_end(gn));
  gt_str_append_char(outstr, '\t');
  if (gt_feature_node_score_is_defined(fn)) {
    char buf[BUFSIZ];
    (void) snprintf(buf, BUFSIZ, "%.3g", gt_feature_node_get_score(fn));
    gt_str_append_cstr(outstr, buf);
  } else
    gt_str_append_char(outstr, '.');
  gt_str_append_char(outstr, '\t');
  gt_str_append_char(outstr, GT_STRAND_CHARS[gt_feature_node_get_strand(fn)]);
  gt_str_append_char(outstr, '\t');
  gt_str_append_char(outstr, GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]);
  gt_str_append_char(outstr, '\t');
}
Ejemplo n.º 8
0
static void add_recursive(GtDiagram *d, GtFeatureNode *node,
                          GtFeatureNode* parent,
                          GtFeatureNode *original_node)
{
  NodeInfoElement *ni;
  GtFeatureNode *rep = GT_UNDEF_REPR;
  gt_assert(d && node && original_node);
  if (!parent) return;
  ni = nodeinfo_get(d, node);
  if (gt_feature_node_is_multi(original_node)) {
    rep = gt_feature_node_get_multi_representative(original_node);
  }
    /* end of recursion, insert into target block */
  if (parent == node) {
    GtBlock *block ;
    block = nodeinfo_find_block(ni,
                                gt_feature_node_get_type(node),
                                rep);
    if (!block) {
      block = gt_block_new_from_node(node);
      nodeinfo_add_block(ni,
                         gt_feature_node_get_type(node),
                         rep,
                         block);
    }
    gt_block_insert_element(block, original_node);
    gt_log_log("add %s to target %s", gt_feature_node_get_type(original_node),
                                      gt_block_get_type(block));
  }
  else
  {
    /* not at target type block yet, set up reverse entry and follow */
    NodeInfoElement *parent_ni;
    /* set up reverse entry */
    ni->parent = parent;
    parent_ni = gt_hashmap_get(d->nodeinfo, parent);
    if (parent_ni) {
      gt_log_log("recursion: %s -> %s", gt_feature_node_get_type(node),
                                        gt_feature_node_get_type(parent));
      add_recursive(d, parent, parent_ni->parent, original_node);
    }
  }
}
Ejemplo n.º 9
0
static int filter_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                              GtError *error)
{
  AgnFilterStream *stream;
  GtFeatureNode *fn;
  int had_err;
  gt_error_check(error);
  stream = filter_stream_cast(ns);

  if(gt_queue_size(stream->cache) > 0)
  {
    *gn = gt_queue_get(stream->cache);
    return 0;
  }

  while(1)
  {
    had_err = gt_node_stream_next(stream->in_stream, gn, error);
    if(had_err)
      return had_err;
    if(!*gn)
      return 0;

    fn = gt_feature_node_try_cast(*gn);
    if(!fn)
      return 0;

    GtFeatureNode *current;
    GtFeatureNodeIterator *iter = gt_feature_node_iterator_new(fn);
    for(current  = gt_feature_node_iterator_next(iter);
        current != NULL;
        current  = gt_feature_node_iterator_next(iter))
    {
      const char *type = gt_feature_node_get_type(current);
      bool keepfeature = false;
      if(gt_hashmap_get(stream->typestokeep, type) != NULL)
        keepfeature = true;

      if(keepfeature)
      {
        gt_genome_node_ref((GtGenomeNode *)current);
        gt_queue_add(stream->cache, current);
      }
    }
    gt_feature_node_iterator_delete(iter);
    gt_genome_node_delete((GtGenomeNode *)fn);
    if(gt_queue_size(stream->cache) > 0)
    {
      *gn = gt_queue_get(stream->cache);
      return 0;
    }
  }

  return 0;
}
Ejemplo n.º 10
0
static int feature_node_lua_get_type(lua_State *L)
{
  GtGenomeNode **gn;
  GtFeatureNode *fn;
  gn = check_genome_node(L, 1);
  /* make sure we get a feature node */
  fn = gt_feature_node_try_cast(*gn);
  luaL_argcheck(L, fn, 1, "not a feature node");
  lua_pushstring(L, gt_feature_node_get_type(fn));
  return 1;
}
Ejemplo n.º 11
0
static int add_to_rep(GtDiagram *d, GtFeatureNode *node, GtFeatureNode* parent,
                      GtError *err)
{
  GtBlock *block = NULL;
  GtFeatureNode *rep = GT_UNDEF_REPR;
  NodeInfoElement *ni;
  gt_assert(d && node && gt_feature_node_is_multi(node));

  rep = gt_feature_node_get_multi_representative(node);
  gt_log_log("adding %s to representative %p", gt_feature_node_get_type(node),
                                               rep);
  ni = nodeinfo_get(d, rep);

  block = nodeinfo_find_block(ni,
                              gt_feature_node_get_type(node),
                              rep);
  if (!block) {
    block = gt_block_new_from_node(parent);
    gt_block_set_type(block, gt_feature_node_get_type(node));
    /* if parent is a pseudonode, then we have a multiline feature without
       a parent. we must not access the parent in this case! */
    if (gt_feature_node_is_pseudo(parent)) {
      if (assign_block_caption(d, node, NULL, block, err) < 0) {
        gt_block_delete(block);
        return -1;
      }
    } else {
      if (assign_block_caption(d, node, parent, block, err) < 0) {
        gt_block_delete(block);
        return -1;
      }
    }
    nodeinfo_add_block(ni, gt_feature_node_get_type(node),
                       rep, block);
  }
  gt_assert(block);
  gt_block_insert_element(block, node);
  return 0;
}
Ejemplo n.º 12
0
GtBlock* gt_block_new_from_node(GtFeatureNode *node)
{
  GtBlock *block;
  gt_assert(node);
  block = gt_block_new();
  block->range = gt_genome_node_get_range((GtGenomeNode*) node);
  block->strand = gt_feature_node_get_strand(node);
  block->type = gt_feature_node_get_type(node);
  if (!gt_feature_node_is_pseudo(node)) {
    block->top_level_feature = (GtFeatureNode*)
                               gt_genome_node_ref((GtGenomeNode*) node);
  }
  return block;
}
Ejemplo n.º 13
0
void gt_gff3_output_leading(GtFeatureNode *fn, GtFile *outfp)
{
  GtGenomeNode *gn;
  gt_assert(fn);
  gn = (GtGenomeNode*) fn;
  gt_file_xprintf(outfp, "%s\t%s\t%s\t"GT_WU"\t"GT_WU"\t",
                     gt_str_get(gt_genome_node_get_seqid(gn)),
                     gt_feature_node_get_source(fn),
                     gt_feature_node_get_type(fn),
                     gt_genome_node_get_start(gn),
                     gt_genome_node_get_end(gn));
  if (gt_feature_node_score_is_defined(fn))
    gt_file_xprintf(outfp, "%.3g", gt_feature_node_get_score(fn));
  else
    gt_file_xfputc('.', outfp);
  gt_file_xprintf(outfp, "\t%c\t%c\t",
                     GT_STRAND_CHARS[gt_feature_node_get_strand(fn)],
                     GT_PHASE_CHARS[gt_feature_node_get_phase(fn)]);
}
Ejemplo n.º 14
0
static GtStr* create_unique_id(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn)
{
  const char *type;
  GtStr *id;
  gt_assert(gff3_visitor && fn);
  type = gt_feature_node_get_type(fn);

  /* increase id counter */
  gt_string_distri_add(gff3_visitor->id_counter, type);

  /* build id string */
  id = gt_str_new_cstr(type);
  gt_str_append_ulong(id, gt_string_distri_get(gff3_visitor->id_counter, type));

  /* store (unique) id */
  gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id);

  return id;
}
Ejemplo n.º 15
0
static int gt_seqpos_classifier_next_specified_ft(
    GtSeqposClassifier *seqpos_classifier, GtRange *range,
    bool *end_of_annotation, GtError *err)
{
  int had_err = 0;
  GtFeatureNode *cfn;
  bool fni_exhausted = (seqpos_classifier->fni == NULL) ? true : false;
  gt_assert(seqpos_classifier != NULL);
  gt_assert(range != NULL);
  while (true)
  {
    if (fni_exhausted)
    {
      had_err = gt_seqpos_classifier_next_fn(seqpos_classifier, err);
      if (had_err != 0 || seqpos_classifier->fn == NULL)
      {
        *end_of_annotation = true;
        return had_err;
      }
      fni_exhausted = false;
    }
    gt_assert(seqpos_classifier->fni != NULL);
    cfn = gt_feature_node_iterator_next(seqpos_classifier->fni);
    if (cfn == NULL)
    {
      fni_exhausted = true;
    }
    else if (strcmp(gt_feature_node_get_type(cfn),
          seqpos_classifier->specified_ft) == 0)
    {
      seqpos_classifier->nof_specified_ft_found++;
      *range = gt_genome_node_get_range((GtGenomeNode*)cfn);
      gt_assert(range->start > 0);
      gt_assert(range->end > 0);
      range->start--;
      range->end--;
      *end_of_annotation = false;
      return had_err;
    }
  }
}
Ejemplo n.º 16
0
static int assign_block_caption(GtDiagram *d,
                                GtFeatureNode *node,
                                GtFeatureNode *parent,
                                GtBlock *block,
                                GtError *err)
{
  const char *nnid_p = NULL, *nnid_n = NULL;
  GtStr *caption = NULL;
  bool status = true;
  int rval;

  caption = gt_str_new();
  rval = gt_style_get_str(d->style,
                          gt_feature_node_get_type(node), "block_caption",
                          caption, node, err);
  if (rval == GT_STYLE_QUERY_ERROR) {
    gt_str_delete(caption);
    return -1;
  } else if (rval == GT_STYLE_QUERY_NOT_SET) {
    nnid_p = get_node_name_or_id(parent);
    nnid_n = get_node_name_or_id(node);
    if ((nnid_p || nnid_n) && status)
    {
      if (parent) {
        if (nnid_p && gt_feature_node_has_children(parent))
          gt_str_append_cstr(caption, nnid_p);
        else
          gt_str_append_cstr(caption, "-");
        gt_str_append_cstr(caption, "/");
      }
      if (nnid_n)
        gt_str_append_cstr(caption, nnid_n);
    } else {
      gt_str_delete(caption);
      caption = NULL;
    }
  }
  gt_block_set_caption(block, caption);
  return 0;
}
Ejemplo n.º 17
0
static void orf_attach_results_to_gff3(GtFeatureNode *gf,
                                       GtRange orf_rng, unsigned int orf_frame,
                                       GtStrand strand, GT_UNUSED GtError *err)
{
  GtGenomeNode *child;
  GtStr *tag;
  tag = gt_str_new_cstr(GT_ORF_FINDER_TAG);

  orf_rng.start++; orf_rng.end++;

  GtFeatureNodeIterator *gfi;
  GtFeatureNode *curnode = NULL, *parent_node = NULL;
  GtRange gfi_range;
  char frame_buf[3];
  sprintf(frame_buf, "%d", orf_frame);

  gfi = gt_feature_node_iterator_new(gf);

  while ((curnode = gt_feature_node_iterator_next(gfi))) {
    if (strcmp(gt_feature_node_get_type(curnode),
                                              (const char*) GT_ORF_TYPE) != 0) {
      gfi_range = gt_genome_node_get_range((GtGenomeNode*) curnode);
      if (gt_range_contains(&gfi_range, &orf_rng)) {
        parent_node = curnode;
      }
    }
  }
  if (parent_node) {
    child = gt_feature_node_new(gt_genome_node_get_seqid((GtGenomeNode*) gf),
                                GT_ORF_TYPE,
                                orf_rng.start,
                                orf_rng.end,
                                strand);
    gt_feature_node_set_source((GtFeatureNode*) child, tag);
    gt_feature_node_set_attribute((GtFeatureNode*) child, "frame", frame_buf);
    gt_feature_node_add_child(parent_node,(GtFeatureNode*) child);
  }
  gt_str_delete(tag);
  gt_feature_node_iterator_delete(gfi);
}
Ejemplo n.º 18
0
static int gtf_show_feature_node(GtFeatureNode *fn, void *data, GtError *err)
{
  GtGTFVisitor *gtf_visitor = (GtGTFVisitor*) data;
  int had_err = 0;
  if (gt_feature_node_has_type(fn, gt_ft_gene)) {
      gtf_visitor->gene_id++;
      gtf_visitor->transcript_id = 0;
      had_err = gtf_show_transcript(fn, gtf_visitor, err);
  }
  else if (gt_feature_node_has_type(fn, gt_ft_mRNA)) {
    had_err = gtf_show_transcript(fn, gtf_visitor, err);
  }
  else if (!(gt_feature_node_has_type(fn, gt_ft_CDS) ||
             gt_feature_node_has_type(fn, gt_ft_exon))) {
      gt_warning("skipping GFF3 feature of type \"%s\" (from line %u in file "
                 "\"%s\")",
                 gt_feature_node_get_type(fn),
                 gt_genome_node_get_line_number((GtGenomeNode*) fn),
                 gt_genome_node_get_filename((GtGenomeNode*) fn));
  }
  return had_err;
}
Ejemplo n.º 19
0
static int cluster_annotate_nodes(GtClusteredSet *cs, GtEncseq *encseq,
                                  const char *feature, GtArray *nodes,
                                  GtError *err)
{
  GtFeatureNodeIterator *fni;
  GtFeatureNode *curnode = NULL, *tmp;
  GtClusteredSetIterator *csi = NULL;
  GtGenomeNode *gn;
  GtHashmap *desc2node;
  GtStr *seqid = NULL;
  int had_err = 0;
  unsigned long num_of_clusters, i, elm;
  const char *fnt = NULL;
  char buffer[BUFSIZ], *real_feature;
  gt_error_check(err);

  if ((strcmp(feature, "lLTR") == 0) || (strcmp(feature, "rLTR") == 0))
    real_feature = gt_cstr_dup(gt_ft_long_terminal_repeat);
  else
    real_feature = gt_cstr_dup(feature);

  desc2node = gt_hashmap_new(GT_HASH_STRING, free_hash, NULL);
  for (i = 0; i < gt_array_size(nodes); i++) {
    gn = *(GtGenomeNode**) gt_array_get(nodes, i);
    if (gt_feature_node_try_cast(gn) == NULL)
      continue;
    fni = gt_feature_node_iterator_new((GtFeatureNode*) gn);
    while ((curnode = gt_feature_node_iterator_next(fni)) != NULL) {
      char header[BUFSIZ];
      fnt = gt_feature_node_get_type(curnode);
      if (strcmp(fnt, gt_ft_repeat_region) == 0) {
        const char *rid;
        unsigned long id;
        seqid = gt_genome_node_get_seqid((GtGenomeNode*) curnode);
        rid = gt_feature_node_get_attribute(curnode, "ID");
        (void) sscanf(rid, "repeat_region%lu", &id);
        (void) snprintf(buffer, BUFSIZ, "%s_%lu", gt_str_get(seqid), id);
      } else if (strcmp(fnt, gt_ft_protein_match) == 0) {
        GtRange range;
        const char *attr;
        attr = gt_feature_node_get_attribute(curnode, "name");
        if (!attr)
          continue;
        if (strcmp(feature, attr) != 0)
          continue;
        range = gt_genome_node_get_range((GtGenomeNode*) curnode);
        if ((range.end - range.start + 1) < 10UL)
          continue;
        (void) snprintf(header, BUFSIZ, "%s_%lu_%lu", buffer, range.start,
                        range.end);
        gt_hashmap_add(desc2node, (void*) gt_cstr_dup(header), (void*) curnode);
      } else if (strcmp(fnt, real_feature) == 0) {
        GtRange range;
        range = gt_genome_node_get_range((GtGenomeNode*) curnode);
        if ((range.end - range.start + 1) < 10UL)
          continue;
        (void) snprintf(header, BUFSIZ, "%s_%lu_%lu", buffer, range.start,
                        range.end);
        gt_hashmap_add(desc2node, (void*) gt_cstr_dup(header), (void*) curnode);
      }
    }
    gt_feature_node_iterator_delete(fni);
  }
  gt_free(real_feature);

  num_of_clusters = gt_clustered_set_num_of_clusters(cs, err);
  for (i = 0; i < num_of_clusters; i++) {
    csi = gt_clustered_set_get_iterator(cs, i ,err);
    if (csi != NULL) {
      while (!had_err && (gt_clustered_set_iterator_next(csi, &elm, err)
             != GT_CLUSTERED_SET_ITERATOR_STATUS_END)) {
        char clid[BUFSIZ];
        const char *encseqdesc;
        char *encseqid;
        unsigned long desclen;
        encseqdesc = gt_encseq_description(encseq, &desclen, elm);
        encseqid = gt_calloc((size_t) (desclen + 1), sizeof (char));
        (void) strncpy(encseqid, encseqdesc, (size_t) desclen);
        encseqid[desclen] = '\0';
        tmp = (GtFeatureNode*) gt_hashmap_get(desc2node, (void*) encseqid);
        (void) snprintf(clid, BUFSIZ, "%lu", i);
        gt_feature_node_set_attribute(tmp, "clid", clid);
        gt_free(encseqid);
      }
    }
    gt_clustered_set_iterator_delete(csi, err);
    csi = NULL;
  }
  gt_hashmap_delete(desc2node);
  return had_err;
}
static int gt_extract_feature_sequence_generic(GtStr *sequence,
                                GtGenomeNode *gn,
                                const char *type, bool join, GtStr *seqid,
                                GtStrArray *target_ids,
                                unsigned int *out_phase_offset,
                                GtRegionMapping *region_mapping, GtError *err)
{
  GtFeatureNode *fn;
  GtRange range;
  unsigned int phase_offset = 0;
  char *outsequence;
  const char *target;
  int had_err = 0;

  gt_error_check(err);
  fn = gt_genome_node_cast(gt_feature_node_class(), gn);
  gt_assert(fn);

  if (seqid)
    gt_str_append_str(seqid, gt_genome_node_get_seqid(gn));
  if (target_ids &&
      (target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) {
    had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                         target_ids, NULL,
                                                         NULL, "", 0, err);
  }
  if (!had_err) {
    if (join) {
      GtFeatureNodeIterator *fni;
      GtFeatureNode *child;
      bool reverse_strand = false,
           first_child = true,
           first_child_of_type_seen = false;
      GtPhase phase = GT_PHASE_UNDEFINED;
      /* in this case we have to traverse the children */
      fni = gt_feature_node_iterator_new_direct(gt_feature_node_cast(gn));
      while (!had_err && (child = gt_feature_node_iterator_next(fni))) {
        if (first_child) {
          if (target_ids &&
               (target = gt_feature_node_get_attribute(child, GT_GFF_TARGET))) {
            gt_str_array_reset(target_ids);
            had_err = gt_gff3_parser_parse_all_target_attributes(target, false,
                                                                 target_ids,
                                                                 NULL,
                                                                 NULL, "", 0,
                                                                 err);
          }
          first_child = false;
        }
        if (!had_err) {
          if (extract_join_feature((GtGenomeNode*) child, type, region_mapping,
                                   sequence, &reverse_strand,
                                   &first_child_of_type_seen,
                                   &phase, err)) {
            had_err = -1;
          }
          if (phase != GT_PHASE_UNDEFINED) {
            phase_offset = (int) phase;
          }
        }
      }
      gt_feature_node_iterator_delete(fni);
      gt_assert(phase_offset <= (unsigned int) GT_PHASE_UNDEFINED);
      if (!had_err && gt_str_length(sequence)) {
        if (reverse_strand) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
    else if (gt_feature_node_get_type(fn) == type) {
      GtPhase phase = gt_feature_node_get_phase(fn);
      gt_assert(!had_err);
      if (phase != GT_PHASE_UNDEFINED)
        phase_offset = (unsigned int) phase;
      /* otherwise we only have to look at this feature */
      range = gt_genome_node_get_range(gn);
      gt_assert(range.start); /* 1-based coordinates */
      had_err = gt_region_mapping_get_sequence(region_mapping, &outsequence,
                                               gt_genome_node_get_seqid(gn),
                                               range.start, range.end, err);
      if (!had_err) {
        gt_str_append_cstr_nt(sequence, outsequence, gt_range_length(&range));
        gt_free(outsequence);
        if (gt_feature_node_get_strand(fn) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(gt_str_get(sequence),
                                          gt_str_length(sequence), err);
        }
      }
    }
  }
  if (out_phase_offset && phase_offset != GT_PHASE_UNDEFINED) {
    *out_phase_offset = phase_offset;
  }
  return had_err;
}
Ejemplo n.º 21
0
static int run_orffinder(GtRegionMapping *rmap,
                         GtFeatureNode *gf,
                         unsigned long start,
                         GT_UNUSED unsigned long end,
                         unsigned int min,
                         unsigned int max,
                         bool all,
                         GtError *err)
{
  int had_err = 0, i;
  unsigned long sum;
  GtCodonIterator* ci = NULL;
  GtTranslator* translator = NULL;
  GtORFIterator* orfi = NULL;
  GtORFIteratorStatus state;
  GtRange orf_rng, tmp_orf_rng[3];
  GtStr *seq;
  unsigned int orf_frame;

  /* forward strand */
  seq = gt_str_new();
  had_err = gt_extract_feature_sequence(seq,
                                        (GtGenomeNode*) gf,
                                        gt_feature_node_get_type(gf),
                                        false, NULL, NULL, rmap, err);

  ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq), err);
  gt_assert(ci);
  translator = gt_translator_new(ci);
  gt_assert(translator);

  orfi = gt_orf_iterator_new(ci, translator);
  gt_assert(orfi);

  for (i = 0; i < 3; i++) {
    tmp_orf_rng[i].start = GT_UNDEF_ULONG;
    tmp_orf_rng[i].end = GT_UNDEF_ULONG;
  }

  while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame,
                                              err)) == GT_ORF_ITERATOR_OK) {
      if (all) {
        process_orf(orf_rng, orf_frame, GT_STRAND_FORWARD, gf,
                    start, min, max, err);
      } else {
        if (gt_range_length(&orf_rng) >
            gt_range_length(&tmp_orf_rng[orf_frame])) {
          tmp_orf_rng[orf_frame].start = orf_rng.start;
          tmp_orf_rng[orf_frame].end = orf_rng.end;
        }
      }
  }
  if (state == GT_ORF_ITERATOR_ERROR)
    had_err = -1;

  if (!had_err) {
    if (!all) {
      for (i = 0; i < 3; i++) {
        if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) {
          process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_FORWARD, gf,
                      start, min, max, err);
        }
      }
    }
    gt_codon_iterator_delete(ci);
    gt_translator_delete(translator);
    gt_orf_iterator_delete(orfi);
    orfi = NULL;
    ci = NULL;
    translator = NULL;

    for (i = 0; i < 3; i++) {
      tmp_orf_rng[i].start = GT_UNDEF_ULONG;
      tmp_orf_rng[i].end = GT_UNDEF_ULONG;
    }

    /* reverse strand */
    if (!had_err) {
      GT_UNUSED int rval = 0;
      unsigned long length = gt_str_length(seq);
      char *strp = (char*) gt_str_get_mem(seq);
      rval = gt_reverse_complement(strp, gt_str_length(seq), err);
      gt_assert(!rval); /* XXX */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), gt_str_length(seq),
                                        err);
      gt_assert(ci);
      translator = gt_translator_new(ci);
      gt_assert(translator);
      orfi = gt_orf_iterator_new(ci, translator);
      gt_assert(orfi);

      sum = start + length - 1;

      while ((state = gt_orf_iterator_next(orfi, &orf_rng, &orf_frame,
                                                  err)) == GT_ORF_ITERATOR_OK) {
          if (all) {
            process_orf(orf_rng, orf_frame, GT_STRAND_REVERSE, gf,
                        sum, min, max, err);
          } else {
            if (gt_range_length(&orf_rng) >
                gt_range_length(&tmp_orf_rng[orf_frame])) {
              tmp_orf_rng[orf_frame].start = orf_rng.start;
              tmp_orf_rng[orf_frame].end = orf_rng.end;
            }
          }
      }
      if (state == GT_ORF_ITERATOR_ERROR)
        had_err = -1;
      if (!had_err) {
        if (!all) {
          for (i = 0; i < 3; i++) {
            if (tmp_orf_rng[i].start != GT_UNDEF_ULONG) {
              process_orf(tmp_orf_rng[i], (unsigned int) i, GT_STRAND_REVERSE,
                          gf, sum, min, max, err);
            }
          }
        }
      }
    }
    gt_str_delete(seq);
    gt_codon_iterator_delete(ci);
    gt_translator_delete(translator);
    gt_orf_iterator_delete(orfi);
  }
  return had_err;
}
Ejemplo n.º 22
0
static int process_node(GtDiagram *d, GtFeatureNode *node,
                        GtFeatureNode *parent, GtError *err)
{
  GtRange elem_range;
  bool *collapse;
  GtShouldGroupByParent *group;
  const char *feature_type = NULL,
             *parent_gft = NULL;
  double tmp;
  GtStyleQueryStatus rval;
  GtUword max_show_width = GT_UNDEF_UWORD,
                par_max_show_width = GT_UNDEF_UWORD;

  gt_assert(d && node);

  gt_log_log(">> getting '%s'", gt_feature_node_get_type(node));

  /* skip pseudonodes */
  if (gt_feature_node_is_pseudo(node))
    return 0;

  feature_type = gt_feature_node_get_type(node);
  gt_assert(feature_type);

  /* discard elements that do not overlap with visible range */
  elem_range = gt_genome_node_get_range((GtGenomeNode*) node);
  if (!gt_range_overlap(&d->range, &elem_range))
    return 0;

  /* get maximal view widths in nucleotides to show this type */
  rval = gt_style_get_num(d->style, feature_type, "max_show_width", &tmp, NULL,
                          err);
  switch (rval) {
    case GT_STYLE_QUERY_OK:
      max_show_width = tmp;
      break;
    case GT_STYLE_QUERY_ERROR:
      return -1;
      break; /* should never be reached */
    default:
      /* do not change default value */
      break;
  }

  /* for non-root nodes, get maximal view with to show parent */
  if (parent)
  {
    if (!gt_feature_node_is_pseudo(parent))
    {
      parent_gft = gt_feature_node_get_type(parent);
      rval = gt_style_get_num(d->style,
                              parent_gft, "max_show_width",
                              &tmp, NULL, err);
      switch (rval) {
        case GT_STYLE_QUERY_OK:
          par_max_show_width = tmp;
          break;
        case GT_STYLE_QUERY_ERROR:
          return -1;
          break; /* should never be reached */
        default:
          /* do not change default value */
          break;
      }
    } else
      par_max_show_width = GT_UNDEF_UWORD;
  }

  /* check if this type is to be displayed at all */
  if (max_show_width != GT_UNDEF_UWORD &&
      gt_range_length(&d->range) > max_show_width)
  {
    return 0;
  }

  /* disregard parent node if it is configured not to be shown */
  if (parent
        && par_max_show_width != GT_UNDEF_UWORD
        && gt_range_length(&d->range) > par_max_show_width)
  {
    parent = NULL;
  }

  /* check if this is a collapsing type, cache result */
  if ((collapse = (bool*) gt_hashmap_get(d->collapsingtypes,
                                         feature_type)) == NULL)
  {
    collapse = gt_malloc(sizeof (bool));
    *collapse = false;
    if (gt_style_get_bool(d->style, feature_type, "collapse_to_parent",
                           collapse, NULL, err) == GT_STYLE_QUERY_ERROR) {
      gt_free(collapse);
      return -1;
    }
    gt_hashmap_add(d->collapsingtypes, (void*) feature_type, collapse);
  }

  /* check if type should be grouped by parent, cache result */
  if ((group = (GtShouldGroupByParent*) gt_hashmap_get(d->groupedtypes,
                                                       feature_type)) == NULL)
  {
    bool tmp;
    group = gt_malloc(sizeof (GtShouldGroupByParent));
    rval = gt_style_get_bool(d->style, feature_type, "group_by_parent",
                             &tmp, NULL, err);
    switch (rval) {
      case GT_STYLE_QUERY_OK:
        if (tmp)
          *group = GT_GROUP_BY_PARENT;
        else
          *group = GT_DO_NOT_GROUP_BY_PARENT;
        break;
      case GT_STYLE_QUERY_NOT_SET:
        *group = GT_UNDEFINED_GROUPING;
        break;
      case GT_STYLE_QUERY_ERROR:
        gt_free(group);
        return -1;
        break; /* should never be reached */
    }
    gt_hashmap_add(d->groupedtypes, (void*) feature_type, group);
  }

  /* decide where to place this feature: */
  if (*collapse)
  {
    /* user has specified collapsing to parent for this type */
    if (parent && !gt_feature_node_is_pseudo(parent)) {
      /* collapsing child nodes are added to upwards blocks,
         but never collapse into pseudo nodes */
      add_recursive(d, node, parent, node);
    } else {
      /* if no parent or only pseudo-parent, do not collapse */
      if (add_to_current(d, node, parent, err) < 0) {
        return -1;
      }
    }
  }
  else  /* (!*collapse) */
  {
    if (parent) {
      bool do_not_overlap = false;
      do_not_overlap = gt_feature_node_direct_children_do_not_overlap_st(parent,
                                                                         node);
      if (*group == GT_GROUP_BY_PARENT
          || (do_not_overlap && *group == GT_UNDEFINED_GROUPING))
      {
        if (gt_feature_node_is_pseudo(parent)
              && gt_feature_node_is_multi(node))
        {
          if (add_to_rep(d, node, parent, err) < 0) {
            return -1;
          }
        } else if
            (gt_feature_node_number_of_children(parent) > 1)
        {
          if (add_to_parent(d, node, parent, err) < 0) {
            return -1;
          }
        } else {
          if (add_to_current(d, node, parent, err) < 0) {
            return -1;
          }
        }
      } else {
        if (gt_feature_node_is_pseudo(parent)
              && gt_feature_node_is_multi(node))
        {
          if (add_to_rep(d, node, parent, err) < 0) {
            return -1;
          }
        } else {
          if (add_to_current(d, node, parent, err) < 0) {
            return -1;
          }
        }
      }
    } else {
      /* root nodes always get their own block */
      if (add_to_current(d, node, parent, err) < 0) {
        return -1;
      }
    }
  }

  /* we can now assume that this node (or its representative)
     has been processed into the reverse lookup structure */
#ifndef NDEBUG
  if (gt_feature_node_is_multi(node))
  {
    GtFeatureNode *rep;
    rep = gt_feature_node_get_multi_representative((GtFeatureNode*) node);
    gt_assert(gt_hashmap_get(d->nodeinfo, rep));
  }
  else
    gt_assert(gt_hashmap_get(d->nodeinfo, node));
#endif

  return 0;
}
Ejemplo n.º 23
0
static int gt_sketch_runner(int argc, const char **argv, int parsed_args,
                              void *tool_arguments, GT_UNUSED GtError *err)
{
  GtSketchArguments *arguments = tool_arguments;
  GtNodeStream *in_stream = NULL,
               *add_introns_stream = NULL,
               *gff3_out_stream = NULL,
               *feature_stream = NULL,
               *sort_stream = NULL,
               *last_stream;
  GtFeatureIndex *features = NULL;
  const char *file;
  char *seqid = NULL;
  GtRange qry_range, sequence_region_range;
  GtArray *results = NULL;
  GtStyle *sty = NULL;
  GtStr *prog, *defaultstylefile = NULL;
  GtDiagram *d = NULL;
  GtLayout *l = NULL;
  GtImageInfo* ii = NULL;
  GtCanvas *canvas = NULL;
  GtUword height;
  bool has_seqid;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(arguments);

  prog = gt_str_new();
  gt_str_append_cstr_nt(prog, argv[0],
                        gt_cstr_length_up_to_char(argv[0], ' '));
  defaultstylefile = gt_get_gtdata_path(gt_str_get(prog), err);
  gt_str_delete(prog);
  if (!defaultstylefile)
    had_err = -1;
  if (!had_err) {
    gt_str_append_cstr(defaultstylefile, "/sketch/default.style");
  }

  file = argv[parsed_args];
  if (!had_err) {
    /* create feature index */
    features = gt_feature_index_memory_new();
    parsed_args++;

    /* create an input stream */
    if (strcmp(gt_str_get(arguments->input), "gff") == 0)
    {
      in_stream = gt_gff3_in_stream_new_unsorted(argc - parsed_args,
                                                 argv + parsed_args);
      if (arguments->verbose)
        gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) in_stream);
    } else if (strcmp(gt_str_get(arguments->input), "bed") == 0)
    {
      if (argc - parsed_args == 0)
        in_stream = gt_bed_in_stream_new(NULL);
      else
        in_stream = gt_bed_in_stream_new(argv[parsed_args]);
    } else if (strcmp(gt_str_get(arguments->input), "gtf") == 0)
    {
      if (argc - parsed_args == 0)
        in_stream = gt_gtf_in_stream_new(NULL);
      else
        in_stream = gt_gtf_in_stream_new(argv[parsed_args]);
    }
    last_stream = in_stream;

    /* create add introns stream if -addintrons was used */
    if (arguments->addintrons) {
      sort_stream = gt_sort_stream_new(last_stream);
      add_introns_stream = gt_add_introns_stream_new(sort_stream);
      last_stream = add_introns_stream;
    }

    /* create gff3 output stream if -pipe was used */
    if (arguments->pipe) {
      gff3_out_stream = gt_gff3_out_stream_new(last_stream, NULL);
      last_stream = gff3_out_stream;
    }

    /* create feature stream */
    feature_stream = gt_feature_stream_new(last_stream, features);

    /* pull the features through the stream and free them afterwards */
    had_err = gt_node_stream_pull(feature_stream, err);

    gt_node_stream_delete(feature_stream);
    gt_node_stream_delete(gff3_out_stream);
    gt_node_stream_delete(sort_stream);
    gt_node_stream_delete(add_introns_stream);
    gt_node_stream_delete(in_stream);
  }

  if (!had_err) {
    had_err = gt_feature_index_has_seqid(features,
                                         &has_seqid,
                                         gt_str_get(arguments->seqid),
                                         err);
  }

  /* if seqid is empty, take first one added to index */
  if (!had_err && strcmp(gt_str_get(arguments->seqid),"") == 0) {
    seqid = gt_feature_index_get_first_seqid(features, err);
    if (seqid == NULL) {
      gt_error_set(err, "GFF input file must contain a sequence region!");
      had_err = -1;
    }
  }
  else if (!had_err && !has_seqid) {
    gt_error_set(err, "sequence region '%s' does not exist in GFF input file",
                 gt_str_get(arguments->seqid));
    had_err = -1;
  }
  else if (!had_err)
    seqid = gt_str_get(arguments->seqid);

  results = gt_array_new(sizeof (GtGenomeNode*));
  if (!had_err) {
    had_err = gt_feature_index_get_range_for_seqid(features,
                                                   &sequence_region_range,
                                                   seqid,
                                                   err);
  }
  if (!had_err) {
    qry_range.start = (arguments->start == GT_UNDEF_UWORD ?
                         sequence_region_range.start :
                         arguments->start);
    qry_range.end   = (arguments->end == GT_UNDEF_UWORD ?
                         sequence_region_range.end :
                         arguments->end);
  }

  if (!had_err) {
    if (arguments->verbose)
      fprintf(stderr, "# of results: "GT_WU"\n", gt_array_size(results));

    /* find and load style file */
    if (!(sty = gt_style_new(err)))
      had_err = -1;
    if (gt_str_length(arguments->stylefile) == 0) {
      gt_str_append_str(arguments->stylefile, defaultstylefile);
    } else {
      if (!had_err && gt_file_exists(gt_str_get(arguments->stylefile))) {
        if (arguments->unsafe)
          gt_style_unsafe_mode(sty);
      }
      else
      {
        had_err = -1;
        gt_error_set(err, "style file '%s' does not exist!",
                          gt_str_get(arguments->stylefile));
      }
    }
    if (!had_err)
      had_err = gt_style_load_file(sty, gt_str_get(arguments->stylefile), err);
  }

  if (!had_err) {
    /* create and write image file */
    if (!(d = gt_diagram_new(features, seqid, &qry_range, sty, err)))
      had_err = -1;
    if (!had_err && arguments->flattenfiles)
      gt_diagram_set_track_selector_func(d, flattened_file_track_selector,
                                         NULL);
    if (had_err || !(l = gt_layout_new(d, arguments->width, sty, err)))
      had_err = -1;
    if (!had_err)
      had_err = gt_layout_get_height(l, &height, err);
    if (!had_err) {
      ii = gt_image_info_new();

      if (strcmp(gt_str_get(arguments->format),"pdf")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PDF,
                                          arguments->width,
                                          height, ii, err);
      }
      else if (strcmp(gt_str_get(arguments->format),"ps")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PS,
                                          arguments->width,
                                          height, ii, err);
      }
      else if (strcmp(gt_str_get(arguments->format),"svg")==0) {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_SVG,
                                          arguments->width,
                                          height, ii, err);
      }
      else {
        canvas = gt_canvas_cairo_file_new(sty, GT_GRAPHICS_PNG,
                                          arguments->width,
                                          height, ii, err);
      }
      if (!canvas)
        had_err = -1;
      if (!had_err) {
        had_err = gt_layout_sketch(l, canvas, err);
      }
      if (!had_err) {
        if (arguments->showrecmaps) {
          GtUword i;
          const GtRecMap *rm;
          for (i = 0; i < gt_image_info_num_of_rec_maps(ii) ;i++) {
            char buf[BUFSIZ];
            rm = gt_image_info_get_rec_map(ii, i);
            (void) gt_rec_map_format_html_imagemap_coords(rm, buf, BUFSIZ);
            printf("%s, %s\n",
                   buf,
                   gt_feature_node_get_type(gt_rec_map_get_genome_feature(rm)));
          }
        }
        if (arguments->use_streams) {
          GtFile *outfile;
          GtStr *str = gt_str_new();
          gt_canvas_cairo_file_to_stream((GtCanvasCairoFile*) canvas, str);
          outfile = gt_file_open(GT_FILE_MODE_UNCOMPRESSED, file, "w+", err);
          if (outfile) {
            gt_file_xwrite(outfile, gt_str_get_mem(str), gt_str_length(str));
            gt_file_delete(outfile);
          } else {
            had_err = -1;
          }
          gt_str_delete(str);
        } else {
          had_err = gt_canvas_cairo_file_to_file((GtCanvasCairoFile*) canvas,
                                                 file,
                                                 err);
        }
      }
    }
  }

  /* free */
  gt_free(seqid);
  gt_canvas_delete(canvas);
  gt_layout_delete(l);
  gt_image_info_delete(ii);
  gt_style_delete(sty);
  gt_diagram_delete(d);
  gt_array_delete(results);
  gt_str_delete(defaultstylefile);
  gt_feature_index_delete(features);

  return had_err;
}
Ejemplo n.º 24
0
static int gt_ltrdigest_pdom_visitor_choose_strand(GtLTRdigestPdomVisitor *lv)
{
  int had_err = 0;
  double log_eval_fwd = 0.0,
         log_eval_rev = 0.0;
  GtFeatureNodeIterator *fni;
  GtStrand strand;
  double score;
  bool seen_fwd = false,
       seen_rev = false;
  GtFeatureNode *curnode = NULL;
  GtUword i;
  GtArray *to_delete;

  fni = gt_feature_node_iterator_new(lv->ltr_retrotrans);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode),
               gt_ft_protein_match) == 0) {
      strand = gt_feature_node_get_strand(curnode);
      score = (double) gt_feature_node_get_score(curnode);
      if (strand == GT_STRAND_FORWARD) {
        log_eval_fwd += log(score);
        seen_fwd = true;
      } else if (strand == GT_STRAND_REVERSE) {
        log_eval_rev += log(score);
        seen_rev = true;
      }
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (seen_rev && !seen_fwd)
    gt_feature_node_set_strand(lv->ltr_retrotrans, GT_STRAND_REVERSE);
  else if (!seen_rev && seen_fwd)
    gt_feature_node_set_strand(lv->ltr_retrotrans, GT_STRAND_FORWARD);
  else if (!seen_rev && !seen_fwd)
    return had_err;
  else {
    gt_assert(seen_rev && seen_fwd);
    if (gt_double_compare(log_eval_fwd, log_eval_rev) < 0)
      strand = GT_STRAND_FORWARD;
    else
      strand = GT_STRAND_REVERSE;
    gt_feature_node_set_strand(lv->ltr_retrotrans, strand);

    to_delete = gt_array_new(sizeof (GtFeatureNode*));
    fni = gt_feature_node_iterator_new(lv->ltr_retrotrans);
    while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
      if (strcmp(gt_feature_node_get_type(curnode),
                 gt_ft_protein_match) == 0) {
        if (strand != gt_feature_node_get_strand(curnode)) {
          gt_array_add(to_delete, curnode);
        }
      }
    }
    gt_feature_node_iterator_delete(fni);
    gt_assert(gt_array_size(to_delete) > 0);
    for (i = 0; i < gt_array_size(to_delete); i++) {
      gt_feature_node_remove_leaf(lv->ltr_retrotrans,
                                  *(GtFeatureNode**) gt_array_get(to_delete,
                                                                  i));
    }
    gt_array_delete(to_delete);
  }
  return had_err;
}
static int snp_annotator_classify_snp(GtSNPAnnotatorVisitor *sav,
                                      GtFeatureNode *mRNA,
                                      GtFeatureNode *snp,
                                      GtUword variant_pos,
                                      GtUword variant_idx,
                                      char variant_char,
#ifndef NDEBUG
                                      GT_UNUSED char reference_char,
#endif
                                      GT_UNUSED GtError *err)
{
  int had_err = 0;
  char *mrnaseq;
  const char *variant_effect = NULL;
  gt_assert(mRNA && snp && sav);
  gt_log_log("processing variant char %c for SNP %s\n",
               variant_char, gt_feature_node_get_attribute(snp, "Dbxref"));
  mrnaseq = gt_hashmap_get(sav->rnaseqs, mRNA);
  gt_assert(mrnaseq);
  if (mrnaseq) {
    char codon[3],
         variant_codon[3];
    GtStr *effect_string;
    char oldamino,
         newamino;
    GT_UNUSED GtUword mrnalen;
    GtUword startpos = variant_pos / GT_CODON_LENGTH,
                  variantoffset = variant_pos % GT_CODON_LENGTH;
    mrnalen = strlen(mrnaseq);
    gt_assert(variant_pos < mrnalen);
    variant_codon[0] = codon[0] = mrnaseq[3*startpos];
    variant_codon[1] = codon[1] = mrnaseq[3*startpos+1];
    variant_codon[2] = codon[2] = mrnaseq[3*startpos+2];
    variant_codon[variantoffset] = variant_char;
#ifndef NDEBUG
    gt_assert(toupper(codon[variantoffset]) == toupper(reference_char));
#endif
    if (gt_trans_table_is_stop_codon(sav->tt, codon[0], codon[1], codon[2])) {
      if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0],
                                       variant_codon[1], variant_codon[2])) {
        variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_STOP_EFFECT);
      } else {
        variant_effect = gt_symbol(GT_SNP_STOP_LOST_EFFECT);
      }
    } else {
      if (gt_trans_table_is_stop_codon(sav->tt, variant_codon[0],
                                       variant_codon[1], variant_codon[2])) {
        variant_effect = gt_symbol(GT_SNP_NONSENSE_EFFECT);
      } else {
        had_err = gt_trans_table_translate_codon(sav->tt, codon[0], codon[1],
                                                 codon[2], &oldamino, err);
        if (!had_err) {
          had_err = gt_trans_table_translate_codon(sav->tt, variant_codon[0],
                                                   variant_codon[1],
                                                   variant_codon[2],
                                                   &newamino, err);
        }
        if (!had_err) {
          if (newamino == oldamino) {
            variant_effect = gt_symbol(GT_SNP_SYNONYMOUS_AMINO_EFFECT);
          } else {
            variant_effect = gt_symbol(GT_SNP_MISSENSE_EFFECT);
          }
        }
      }
    }
    if (!had_err) {
      const char *var_attrib;
      gt_assert(variant_effect != NULL);
      if ((var_attrib = gt_feature_node_get_attribute(snp,
                                                      GT_GVF_VARIANT_EFFECT))) {
        effect_string = gt_str_new_cstr(var_attrib);
        gt_str_append_cstr(effect_string, ",");
        gt_str_append_cstr(effect_string, variant_effect);
      } else {
        effect_string = gt_str_new_cstr(variant_effect);
      }
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_ulong(effect_string, variant_idx);
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_cstr(effect_string, gt_feature_node_get_type(mRNA));
      gt_str_append_cstr(effect_string, " ");
      gt_str_append_cstr(effect_string,
                         gt_feature_node_get_attribute(mRNA, GT_GFF_ID));
      gt_feature_node_set_attribute(snp, GT_GVF_VARIANT_EFFECT,
                                    gt_str_get(effect_string));
      gt_str_reset(effect_string);
      gt_str_delete(effect_string);
    }
  }

  return had_err;
}
static int gt_snp_annotator_visitor_prepare_gene(GtSNPAnnotatorVisitor *sav,
                                                 GtError *err)
{
  GtFeatureNodeIterator *fni,
                        *mrnafni;
  GtFeatureNode *curnode,
                *last_mRNA = NULL;
  GtStr *mrnaseq,
        *seqid;
  int had_err = 0;

  mrnaseq = gt_str_new();
  seqid = gt_genome_node_get_seqid((GtGenomeNode*) sav->gene);
  fni = gt_feature_node_iterator_new(sav->gene);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (gt_feature_node_get_type(curnode) == sav->mRNA_type) {
      GtFeatureNode *curnode2;
      if (last_mRNA) {
        char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char));
        (void) strncpy(mrna_charseq, gt_str_get(mrnaseq),
                       gt_str_length(mrnaseq));
        if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) {
          had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq),
                                          err);
        }
        if (!had_err) {
          gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq);
          last_mRNA = curnode;
          gt_str_reset(mrnaseq);
        }
      } else last_mRNA = curnode;
      if (!had_err) {
        mrnafni = gt_feature_node_iterator_new(curnode);
        while (!had_err && (curnode2 =
                                      gt_feature_node_iterator_next(mrnafni))) {
          if (gt_feature_node_get_type(curnode2) == sav->CDS_type) {
            char *tmp;
            GtRange rng = gt_genome_node_get_range((GtGenomeNode*) curnode2);
            had_err = gt_region_mapping_get_sequence(sav->rmap, &tmp, seqid,
                                                     rng.start, rng.end, err);
            if (!had_err) {
              gt_str_append_cstr_nt(mrnaseq, tmp, gt_range_length(&rng));
              gt_free(tmp);
            }
          }
        }
        gt_feature_node_iterator_delete(mrnafni);
      }
    }
  }
  if (!had_err && last_mRNA) {
    char *mrna_charseq = gt_calloc(gt_str_length(mrnaseq)+1, sizeof (char));
    (void) strncpy(mrna_charseq, gt_str_get(mrnaseq), gt_str_length(mrnaseq));
    if (gt_feature_node_get_strand(sav->gene) == GT_STRAND_REVERSE) {
      had_err = gt_reverse_complement(mrna_charseq, gt_str_length(mrnaseq),
                                      err);
    }
    if (!had_err) {
      gt_hashmap_add(sav->rnaseqs, last_mRNA, mrna_charseq);
    }
  }
  gt_feature_node_iterator_delete(fni);
  gt_str_delete(mrnaseq);
  return had_err;
}
static int snp_annotator_visitor_feature_node(GtNodeVisitor *nv,
                                              GtFeatureNode *fn,
                                              GtError *err)
{
  int had_err = 0;
  GtSNPAnnotatorVisitor *sav;
  GtFeatureNodeIterator *fni,
                        *mrnafni;
  GtFeatureNode *curnode,
                *curnode2;
  GtRange snp_rng;
  gt_error_check(err);
  sav = snp_annotator_visitor_cast(nv);

  /* ignore non-nodes */
  if (!fn) return 0;

  /* only process SNPs */
  if (!(gt_feature_node_get_type(fn) == sav->SNV_type ||
        gt_feature_node_get_type(fn) == sav->SNP_type)) {
    return 0;
  }

  fni = gt_feature_node_iterator_new_direct(sav->gene);
  snp_rng = gt_genome_node_get_range((GtGenomeNode*) fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (gt_feature_node_get_type(curnode) == sav->mRNA_type) {
      GtStrand mrna_strand = gt_feature_node_get_strand(curnode);
#ifndef NDEBUG
      const char *refstr;
#endif
      GtUword mrnasnppos = 0;
      mrnafni = gt_feature_node_iterator_new(curnode);
      while (!had_err && (curnode2 = gt_feature_node_iterator_next(mrnafni))) {
        if (gt_feature_node_get_type(curnode2) == sav->CDS_type) {
          GtRange cds_rng = gt_genome_node_get_range((GtGenomeNode*) curnode2);
          if (gt_range_overlap(&snp_rng, &cds_rng)) {
            char *mRNA,
                 origchar;
            char *variantchars, *variantptr = NULL;
            GT_UNUSED char *refchars, *refptr = NULL;
            mRNA = (char*) gt_hashmap_get(sav->rnaseqs, curnode);
            gt_assert(mRNA);
            gt_assert(snp_rng.start >= cds_rng.start);
            mrnasnppos += (snp_rng.start - cds_rng.start);
            if (mrna_strand == GT_STRAND_REVERSE)
              mrnasnppos = strlen(mRNA) - mrnasnppos - 1;
            gt_assert(mrnasnppos < strlen(mRNA));
            origchar = mRNA[mrnasnppos];
#ifndef NDEBUG
            refstr = refptr = gt_cstr_dup(gt_feature_node_get_attribute(fn,
                                                         GT_GVF_REFERENCE_SEQ));
            if (!had_err && refstr) {
              if (gt_feature_node_get_strand(curnode) == GT_STRAND_REVERSE) {
                int rval = gt_complement(&origchar, origchar, err);
                gt_assert(rval == 0);
              }
              gt_assert(toupper(origchar) == toupper(refstr[0]));
            }
#endif
            variantchars = variantptr = gt_cstr_dup(
                         gt_feature_node_get_attribute(fn, GT_GVF_VARIANT_SEQ));
            if (!had_err && variantchars) {
              GtUword i = 0;

              while (!had_err &&
                              (*variantchars != ';' && *variantchars != '\0')) {
                if (*variantchars != ',' && *variantchars != origchar) {
                  char variantchar = *variantchars;
#ifndef NDEBUG
                  char refchar = refstr ? refstr[0] : '-';  /* XXX */
                  if (!had_err && mrna_strand == GT_STRAND_REVERSE)
                    had_err = gt_complement(&refchar, refchar, err);
#endif
                  if (!had_err && mrna_strand == GT_STRAND_REVERSE)
                    had_err = gt_complement(&variantchar, variantchar, err);
                  if (!had_err) {
                    had_err = snp_annotator_classify_snp(sav, curnode, fn,
                                                         mrnasnppos,
                                                         i++,
                                                         variantchar,
#ifndef NDEBUG
                                                         refchar,
#endif
                                                         err);
                  }
                } else if (*variantchars == origchar) {
                  i++;
                }
                variantchars++;
              }
              gt_free(variantptr);
              gt_free(refptr);
            }
          } else {
            mrnasnppos += gt_range_length(&cds_rng);
          }
        }
      }
      gt_feature_node_iterator_delete(mrnafni);
    }
  }
  gt_feature_node_iterator_delete(fni);

  return had_err;
}
Ejemplo n.º 28
0
static int gt_ltrdigest_pdom_visitor_feature_node(GtNodeVisitor *nv,
                                                  GtFeatureNode *fn,
                                                  GtError *err)
{
  GtLTRdigestPdomVisitor *lv;
  GtFeatureNodeIterator *fni;
  GtFeatureNode *curnode = NULL;
  int had_err = 0;
  GtRange rng;
  GtUword i;
  lv = gt_ltrdigest_pdom_visitor_cast(nv);
  gt_assert(lv);
  gt_error_check(err);

  /* traverse annotation subgraph and find LTR element */
  fni = gt_feature_node_iterator_new(fn);
  while (!had_err && (curnode = gt_feature_node_iterator_next(fni))) {
    if (strcmp(gt_feature_node_get_type(curnode), lv->root_type) == 0) {
      lv->ltr_retrotrans = curnode;
    }
  }
  gt_feature_node_iterator_delete(fni);

  if (!had_err && lv->ltr_retrotrans != NULL) {
    GtCodonIterator *ci;
    GtTranslator *tr;
    GtTranslatorStatus status;
    GtUword seqlen;
    char translated, *rev_seq;
#ifndef _WIN32
    FILE *instream;
    GtHMMERParseStatus *pstatus;
#endif
    unsigned int frame;
    GtStr *seq;

    seq = gt_str_new();
    rng = gt_genome_node_get_range((GtGenomeNode*) lv->ltr_retrotrans);
    lv->leftLTR_5 = rng.start - 1;
    lv->rightLTR_3 = rng.end - 1;
    seqlen = gt_range_length(&rng);

    had_err = gt_extract_feature_sequence(seq,
                                          (GtGenomeNode*) lv->ltr_retrotrans,
                                          lv->root_type,
                                          false, NULL, NULL, lv->rmap, err);

    if (!had_err) {
      for (i = 0UL; i < 3UL; i++) {
        gt_str_reset(lv->fwd[i]);
        gt_str_reset(lv->rev[i]);
      }

      /* create translations */
      ci = gt_codon_iterator_simple_new(gt_str_get(seq), seqlen, NULL);
      gt_assert(ci);
      tr = gt_translator_new(ci);
      status = gt_translator_next(tr, &translated, &frame, err);
      while (status == GT_TRANSLATOR_OK && translated) {
        gt_str_append_char(lv->fwd[frame], translated);
        status = gt_translator_next(tr, &translated, &frame, NULL);
      }
      if (status == GT_TRANSLATOR_ERROR) had_err = -1;
      if (!had_err) {
        rev_seq = gt_malloc((size_t) seqlen * sizeof (char));
        strncpy(rev_seq, gt_str_get(seq), (size_t) seqlen * sizeof (char));
        (void) gt_reverse_complement(rev_seq, seqlen, NULL);
        gt_codon_iterator_delete(ci);
        ci = gt_codon_iterator_simple_new(rev_seq, seqlen, NULL);
        gt_translator_set_codon_iterator(tr, ci);
        status = gt_translator_next(tr, &translated, &frame, err);
        while (status == GT_TRANSLATOR_OK && translated) {
          gt_str_append_char(lv->rev[frame], translated);
          status = gt_translator_next(tr, &translated, &frame, NULL);
        }
        if (status == GT_TRANSLATOR_ERROR) had_err = -1;
        gt_free(rev_seq);
      }
      gt_codon_iterator_delete(ci);
      gt_translator_delete(tr);
    }

    /* run HMMER and handle results */
    if (!had_err) {
#ifndef _WIN32
      int pid, pc[2], cp[2];
      GT_UNUSED int rval;

      (void) signal(SIGCHLD, SIG_IGN); /* XXX: for now, ignore child's
                                               exit status */
      rval = pipe(pc);
      gt_assert(rval == 0);
      rval = pipe(cp);
      gt_assert(rval == 0);

      switch ((pid = (int) fork())) {
        case -1:
          perror("Can't fork");
          exit(1);   /* XXX: error handling */
        case 0:    /* child */
          (void) close(1);    /* close current stdout. */
          rval = dup(cp[1]);  /* make stdout go to write end of pipe. */
          (void) close(0);    /* close current stdin. */
          rval = dup(pc[0]);  /* make stdin come from read end of pipe. */
          (void) close(pc[0]);
          (void) close(pc[1]);
          (void) close(cp[0]);
          (void) close(cp[1]);
          (void) execvp("hmmscan", lv->args); /* XXX: read path from env */
          perror("couldn't execute hmmscan!");
          exit(1);
        default:    /* parent */
          for (i = 0UL; i < 3UL; i++) {
            char buf[5];
            GT_UNUSED ssize_t written;
            (void) sprintf(buf, ">"GT_WU"%c\n", i, '+');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->fwd[i]),
                            (size_t) gt_str_length(lv->fwd[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
            (void) sprintf(buf, ">"GT_WU"%c\n", i, '-');
            written = write(pc[1], buf, 4 * sizeof (char));
            written = write(pc[1], gt_str_get(lv->rev[i]),
                            (size_t) gt_str_length(lv->rev[i]) * sizeof (char));
            written = write(pc[1], "\n", 1 * sizeof (char));
          }
          (void) close(pc[0]);
          (void) close(pc[1]);
          (void) close(cp[1]);
          instream = fdopen(cp[0], "r");
          pstatus = gt_hmmer_parse_status_new();
          had_err = gt_ltrdigest_pdom_visitor_parse_output(lv, pstatus,
                                                           instream, err);
          (void) fclose(instream);
          if (!had_err)
            had_err = gt_ltrdigest_pdom_visitor_process_hits(lv, pstatus, err);
          gt_hmmer_parse_status_delete(pstatus);
      }
#else
      /* XXX */
      gt_error_set(err, "HMMER call not implemented on Windows\n");
      had_err = -1;
#endif
    }
    gt_str_delete(seq);
  }
  if (!had_err)
    had_err = gt_ltrdigest_pdom_visitor_choose_strand(lv);
  return had_err;
}
static int gt_ltr_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn,
                                       GT_UNUSED GtError *err)
{
  GtLTRVisitor *lv;
  GtRange node_range;
  GtArray *pdomarr = NULL;
  const char *pfamname;
  const char *fnt;
  lv = gt_ltr_visitor_cast(nv);
  gt_assert(lv);
  gt_error_check(err);

  fnt = gt_feature_node_get_type(fn);

  if (strcmp(fnt, gt_ft_LTR_retrotransposon) == 0)
  {
    lv->element->mainnode = fn;
  } else if (strcmp(fnt, gt_ft_long_terminal_repeat) == 0)
  {
    if (lv->element->leftLTR == NULL)
    {
      node_range = gt_genome_node_get_range((GtGenomeNode*) fn);
      lv->element->leftLTR = fn;
      /* compensate for 1-based node coords */
      lv->element->leftLTR_5 = node_range.start - 1;
      lv->element->leftLTR_3 = node_range.end - 1;
    }
    else
    {
      node_range = gt_genome_node_get_range((GtGenomeNode*) fn);
      lv->element->rightLTR = fn;
      /* compensate for 1-based node coords */
      lv->element->rightLTR_5 = node_range.start - 1;
      lv->element->rightLTR_3 = node_range.end - 1;
    }
  } else if (strcmp(fnt, gt_ft_target_site_duplication) == 0)
  {
    if (lv->element->leftTSD == NULL)
    {
      lv->element->leftTSD = fn;
    }
    else
    {
      lv->element->rightTSD = fn;
    }
  } else if (strcmp(fnt, gt_ft_RR_tract) == 0)
  {
    if (lv->element->ppt == NULL)
    {
      lv->element->ppt = fn;
    }
  } else if (strcmp(fnt, gt_ft_primer_binding_site) == 0)
  {
    if (lv->element->pbs == NULL)
    {
      lv->element->pbs = fn;
    }
  } else if (strcmp(fnt, gt_ft_protein_match) == 0)
  {
    char buf[BUFSIZ];
    if (!lv->element->pdoms)
    {
      lv->element->pdoms = gt_hashmap_new(GT_HASH_STRING, gt_free_func,
                                          (GtFree) gt_array_delete);
    }
    pfamname = gt_feature_node_get_attribute(fn, "name");
    (void) snprintf(buf, BUFSIZ-1, "%s", pfamname);
    gt_cstr_rep(buf, '/', '_');
    if (!(pdomarr = (GtArray*) gt_hashmap_get(lv->element->pdoms, buf)))
    {
      char *pfamcpy = gt_cstr_dup(buf);
      pdomarr = gt_array_new(sizeof (GtFeatureNode*));
      gt_hashmap_add(lv->element->pdoms, pfamcpy, pdomarr);
      if (lv->element->pdomorder != NULL)
        gt_array_add(lv->element->pdomorder, pfamcpy);
    }
    gt_array_add(pdomarr, fn);
  }
  return 0;
}
Ejemplo n.º 30
0
static int construct_mRNAs(GT_UNUSED void *key, void *value, void *data,
                           GtError *err)
{
  ConstructionInfo *cinfo = (ConstructionInfo*) data;
  GtArray *gt_genome_node_array = (GtArray*) value,
          *mRNAs = (GtArray*) cinfo->mRNAs;
  GtGenomeNode *mRNA_node, *first_node, *gn;
  const char *tname;
  GtStrand mRNA_strand;
  GtRange mRNA_range;
  GtStr *mRNA_seqid;
  GtUword i;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(key && value && data);
   /* at least one node in array */
  gt_assert(gt_array_size(gt_genome_node_array));

  /* determine the range and the strand of the mRNA */
  first_node = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, 0);
  mRNA_range = gt_genome_node_get_range(first_node);
  mRNA_strand = gt_feature_node_get_strand((GtFeatureNode*) first_node);
  mRNA_seqid = gt_genome_node_get_seqid(first_node);

  /* TODO: support discontinuous start/stop codons */
  for (i = 0; !had_err && i < gt_array_size(gt_genome_node_array); i++) {
    gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
    if (gt_feature_node_get_attribute((GtFeatureNode*) gn,
        GTF_PARSER_STOP_CODON_FLAG)) {
      GtUword j;
      GtRange stop_codon_rng = gt_genome_node_get_range(gn);
      bool found_cds = false;
      for (j = 0; !had_err && j < gt_array_size(gt_genome_node_array); j++) {
        GtGenomeNode* gn2;
        GtRange this_rng;
        const char *this_type;
        gn2 = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, j);
        if (gn == gn2) continue;
        this_rng = gt_genome_node_get_range(gn2);
        this_type = gt_feature_node_get_type((GtFeatureNode*) gn2);
        if (this_type == gt_symbol(gt_ft_CDS)) {
          if (gt_range_contains(&this_rng, &stop_codon_rng)) {
            if (cinfo->tidy) {
              gt_warning("stop codon on line %u in file %s is contained in "
                         "CDS in line %u",
                         gt_genome_node_get_line_number(gn),
                         gt_genome_node_get_filename(gn),
                         gt_genome_node_get_line_number(gn2));
              found_cds = true;
            } else {
              gt_error_set(err, "stop codon on line %u in file %s is "
                                "contained in CDS in line %u",
                           gt_genome_node_get_line_number(gn),
                           gt_genome_node_get_filename(gn),
                           gt_genome_node_get_line_number(gn2));
              had_err = -1;
            }
            break;
          }
          if (this_rng.end + 1 == stop_codon_rng.start) {
            this_rng.end = stop_codon_rng.end;
            gt_genome_node_set_range(gn2, &this_rng);
            found_cds = true;
            break;
          }
          if (this_rng.start == stop_codon_rng.end + 1) {
            this_rng.start = stop_codon_rng.start;
            gt_genome_node_set_range(gn2, &this_rng);
            found_cds = true;
            break;
          }
        }
      }
      if (!found_cds) {
        if (!had_err) {
          if (cinfo->tidy) {
            gt_warning("found stop codon on line %u in file %s with no "
                       "flanking CDS, ignoring it",
                       gt_genome_node_get_line_number(gn),
                       gt_genome_node_get_filename(gn));
          } else {
            gt_error_set(err, "found stop codon on line %u in file %s with no "
                              "flanking CDS",
                         gt_genome_node_get_line_number(gn),
                         gt_genome_node_get_filename(gn));
            had_err = -1;
            break;
          }
        }
      } else {
        gt_array_rem(gt_genome_node_array, i);
        gt_genome_node_delete(gn);
      }
    }
  }

  for (i = 1; !had_err && i < gt_array_size(gt_genome_node_array); i++) {
    GtRange range;
    GtStrand strand;
    gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
    range = gt_genome_node_get_range(gn);
    mRNA_range = gt_range_join(&mRNA_range, &range);
    strand = gt_feature_node_get_strand((GtFeatureNode*) gn);
    if (strand != mRNA_strand) {
      gt_error_set(err, "feature %s on line %u has strand %c, but the "
                        "parent transcript has strand %c",
                   (const char*) key,
                   gt_genome_node_get_line_number(gn),
                   GT_STRAND_CHARS[strand],
                   GT_STRAND_CHARS[mRNA_strand]);
      had_err = -1;
      break;
    } else {
      mRNA_strand = gt_strand_join(mRNA_strand, strand);
    }
    if (!had_err && gt_str_cmp(mRNA_seqid, gt_genome_node_get_seqid(gn))) {
      gt_error_set(err, "The features on lines %u and %u refer to different "
                "genomic sequences (``seqname''), although they have the same "
                "gene IDs (``gene_id'') which must be globally unique",
                gt_genome_node_get_line_number(first_node),
                gt_genome_node_get_line_number(gn));
      had_err = -1;
      break;
    }
  }

  if (!had_err) {
    mRNA_node = gt_feature_node_new(mRNA_seqid, gt_ft_mRNA, mRNA_range.start,
                                    mRNA_range.end, mRNA_strand);
    gt_feature_node_add_attribute(((GtFeatureNode*) mRNA_node), "ID", key);
    gt_feature_node_add_attribute(((GtFeatureNode*) mRNA_node), "transcript_id",
                                  key);

    if ((tname = gt_hashmap_get(cinfo->transcript_id_to_name_mapping,
                              (const char*) key)) && strlen(tname) > 0) {
      gt_feature_node_add_attribute((GtFeatureNode*) mRNA_node, GT_GFF_NAME,
                                      tname);
    }

    /* register children */
    for (i = 0; i < gt_array_size(gt_genome_node_array); i++) {
      gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
      gt_feature_node_add_child((GtFeatureNode*) mRNA_node,
                                (GtFeatureNode*) gt_genome_node_ref(gn));
    }

    /* store the mRNA */
    gt_array_add(mRNAs, mRNA_node);
  }

  return had_err;
}