Ejemplo n.º 1
0
static GtArray *gaeval_visitor_union(GtArray *cov1, GtArray *cov2)
{
  agn_assert(cov1 && cov2);
  gt_array_add_array(cov1, cov2);
  if(gt_array_size(cov1) > 1)
    gt_array_sort(cov1, (GtCompare)gt_range_compare);

  GtArray *runion = gt_array_new(sizeof(GtRange));
  if(gt_array_size(cov1) == 0)
    return runion;
  GtRange *rng = gt_array_get(cov1, 0);
  gt_array_add(runion, *rng);
  GtRange *prev = gt_array_get(runion, 0);
  if(gt_array_size(cov1) == 1)
    return runion;

  GtUword i;
  for(i = 1; i < gt_array_size(cov1); i++)
  {
    rng = gt_array_get(cov1, i);
    if(gt_range_overlap(rng, prev))
      *prev = gt_range_join(rng, prev);
    else
    {
      gt_array_add(runion, *rng);
      prev = gt_array_get(runion, gt_array_size(runion) - 1);
    }
  }

  return runion;
}
Ejemplo n.º 2
0
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtSortStream *sort_stream;
  GtGenomeNode *node, *eofn;
  int had_err = 0;
  gt_error_check(err);
  sort_stream = gt_sort_stream_cast(ns);

  if (!sort_stream->sorted) {
    while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node,
                                           err)) && node) {
      if ((eofn = gt_eof_node_try_cast(node)))
        gt_genome_node_delete(eofn); /* get rid of EOF nodes */
      else
        gt_array_add(sort_stream->nodes, node);
    }
    if (!had_err) {
      gt_genome_nodes_sort_stable(sort_stream->nodes);
      sort_stream->sorted = true;
    }
  }

  if (!had_err) {
    gt_assert(sort_stream->sorted);
    if (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
      *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                           sort_stream->idx);
      sort_stream->idx++;
      /* join region nodes with the same sequence ID */
      if (gt_region_node_try_cast(*gn)) {
        GtRange range_a, range_b;
        while (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
          node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                                sort_stream->idx);
          if (!gt_region_node_try_cast(node) ||
              gt_str_cmp(gt_genome_node_get_seqid(*gn),
                         gt_genome_node_get_seqid(node))) {
            /* the next node is not a region node with the same ID */
            break;
          }
          range_a = gt_genome_node_get_range(*gn);
          range_b = gt_genome_node_get_range(node);
          range_a = gt_range_join(&range_a, &range_b);
          gt_genome_node_set_range(*gn, &range_a);
          gt_genome_node_delete(node);
          sort_stream->idx++;
        }
      }
      return 0;
    }
  }

  if (!had_err) {
    gt_array_reset(sort_stream->nodes);
    *gn = NULL;
  }

  return had_err;
}
Ejemplo n.º 3
0
void gt_region_node_consolidate(GtRegionNode *rn_a, GtRegionNode *rn_b)
{
  GtRange range_a, range_b;
  gt_assert(rn_a);
  gt_assert(rn_b);
  gt_assert(!gt_str_cmp(gt_genome_node_get_seqid((GtGenomeNode*) rn_a),
                        gt_genome_node_get_seqid((GtGenomeNode*) rn_b)));
  range_a = gt_genome_node_get_range((GtGenomeNode*) rn_a);
  range_b = gt_genome_node_get_range((GtGenomeNode*) rn_b);
  range_a = gt_range_join(&range_a, &range_b);
  gt_genome_node_set_range((GtGenomeNode*) rn_a, &range_a);
}
Ejemplo n.º 4
0
int gt_block_unit_test(GtError *err)
{
  GtRange r1, r2, r_temp, b_range;
  GtStrand s;
  GtGenomeNode *gn1, *gn2;
  GtElement *e1, *e2;
  double height;
  GtBlock *b;
  GtStr *seqid, *caption1, *caption2;
  int had_err = 0;
  GtStyle *sty;
  GtError *testerr;
  gt_error_check(err);

  seqid = gt_str_new_cstr("seqid");
  caption1 = gt_str_new_cstr("foo");
  caption2 = gt_str_new_cstr("bar");
  testerr = gt_error_new();

  r1.start = 10UL;
  r1.end = 50UL;

  r2.start = 40UL;
  r2.end = 50UL;

  gn1 = gt_feature_node_new(seqid, gt_ft_gene, r1.start, r1.end,
                            GT_STRAND_FORWARD);
  gn2 = gt_feature_node_new(seqid, gt_ft_exon, r2.start, r2.end,
                            GT_STRAND_FORWARD);

  e1 = gt_element_new((GtFeatureNode*) gn1);
  e2 = gt_element_new((GtFeatureNode*) gn2);

  b = gt_block_new();

  /* test gt_block_insert_elements */
  gt_ensure((0UL == gt_block_get_size(b)));
  gt_block_insert_element(b, (GtFeatureNode*) gn1);
  gt_ensure((1UL == gt_block_get_size(b)));
  gt_block_insert_element(b, (GtFeatureNode*) gn2);
  gt_ensure((2UL == gt_block_get_size(b)));

  /* test gt_block_set_range & gt_block_get_range */
  r_temp = gt_range_join(&r1, &r2);
  gt_block_set_range(b, r_temp);
  b_range = gt_block_get_range(b);
  gt_ensure((0 == gt_range_compare(&b_range, &r_temp)));
  gt_ensure((1 == gt_range_compare(&r2, &r_temp)));

  /* tests gt_block_set_caption & gt_block_get_caption */
  gt_block_set_caption(b, caption1);
  gt_ensure((0 == gt_str_cmp(gt_block_get_caption(b), caption1)));
  gt_ensure((0 != gt_str_cmp(gt_block_get_caption(b), caption2)));

  /* tests gt_block_set_strand & gt_block_get_range */
  s = gt_block_get_strand(b);
  gt_ensure((GT_STRAND_UNKNOWN == s));
  gt_block_set_strand(b, GT_STRAND_FORWARD);
  s = gt_block_get_strand(b);
  gt_ensure((GT_STRAND_FORWARD == s));

  /* test gt_block_get_max_height() */
  sty = gt_style_new(err);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == BAR_HEIGHT_DEFAULT);
  gt_style_set_num(sty, "exon", "bar_height", 42);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 42);
  gt_style_set_num(sty, "gene", "bar_height", 23);
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 42);
  gt_style_unset(sty, "exon", "bar_height");
  gt_ensure(gt_block_get_max_height(b, &height, sty, err) == 0);
  gt_ensure(!gt_error_is_set(testerr));
  gt_ensure(height == 23);

  gt_str_delete(caption2);
  gt_str_delete(seqid);
  gt_element_delete(e1);
  gt_element_delete(e2);
  gt_block_delete(b);
  gt_style_delete(sty);
  gt_error_delete(testerr);
  gt_genome_node_delete(gn1);
  gt_genome_node_delete(gn2);

  return had_err;
}
Ejemplo n.º 5
0
static void make_sequence_region(GtHashmap *sequence_regions,
                                 GtStr *sequenceid,
                                 GthRegionFactory *srf,
                                 GthInput *input,
                                 GtUword filenum,
                                 GtUword seqnum)
{
    GtUword offset_is_defined = false;
    GtRange range, descrange;
    GtGenomeNode *sr = NULL;
    gt_assert(sequence_regions && sequenceid && srf && input);
    if (gth_input_use_substring_spec(input)) {
        range.start = gth_input_genomic_substring_from(input);
        range.end   = gth_input_genomic_substring_to(input);
    }
    else {
        range = gth_input_get_relative_genomic_range(input, filenum, seqnum);
    }
    if (srf->use_desc_ranges) {
        GtStr *description = gt_str_new();
        gth_input_get_genomic_description(input, description, filenum, seqnum);
        if (!gt_parse_description_range(gt_str_get(description), &descrange))
            offset_is_defined = true;
        gt_str_delete(description);
    }
    if (offset_is_defined)
        range = gt_range_offset(&range, descrange.start);
    else
        range = gt_range_offset(&range, 1); /* 1-based */
    if (!gt_str_length(sequenceid) ||
            (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) &&
             !offset_is_defined)) {
        /* sequenceid is empty or exists already (and no offset has been parsed)
           -> make one up */
        GtStr *seqid;
        char *base;
        base = gt_basename(gth_input_get_genomic_filename(input, filenum));
        seqid = gt_str_new_cstr(base);
        gt_free(base);
        gt_str_append_char(seqid, '|');
        gt_str_append_uword(seqid, seqnum + 1); /* 1-based */
        seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_UWORD);
        gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)));
        gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid));
        sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum),
                                range.start, range.end);
        gt_hashmap_add(sequence_regions,
                       (void*) gt_cstr_table_get(srf->used_seqids,
                               gt_str_get(seqid)),
                       sr);
        gt_str_delete(seqid);
    }
    else {
        /* sequenceid does not exists already (or an offset has been parsed)
           -> use this one */
        if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) {
            /* no sequence region with this id exists -> create one */
            gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid));
            seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                            offset_is_defined ? descrange.start : GT_UNDEF_UWORD);
            sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum,
                                                    seqnum), range.start, range.end);
            gt_hashmap_add(sequence_regions,
                           (void*) gt_cstr_table_get(srf->used_seqids,
                                   gt_str_get(sequenceid)),
                           sr);
        }
        else {
            GtRange prev_range, new_range;
            /* sequence region with this id exists already -> modify range */
            sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid));
            gt_assert(sr);
            prev_range = gt_genome_node_get_range(sr);
            new_range = gt_range_join(&prev_range, &range);
            gt_genome_node_set_range(sr, &new_range);
            seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                            offset_is_defined ? descrange.start : GT_UNDEF_UWORD);
        }
    }
    gt_assert(sr);
}
Ejemplo n.º 6
0
static int add_ids_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn,
                                        GtError *err)
{
  AutomaticSequenceRegion *auto_sr;
  GtAddIDsVisitor *aiv;
  const char *seqid;
  bool is_circular;
  aiv = add_ids_visitor_cast(nv);
  seqid = gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) fn));
  if (aiv->ensure_sorting && !gt_cstr_table_get(aiv->defined_seqids, seqid)) {
    gt_error_set(err, "the file %s is not sorted (seqid \"%s\" on line %u has "
                 "not been previously introduced with a \"%s\" line)",
                 gt_genome_node_get_filename((GtGenomeNode*) fn), seqid,
                 gt_genome_node_get_line_number((GtGenomeNode*) fn),
                 GT_GFF_SEQUENCE_REGION);
    return -1;
  }
  if (!gt_cstr_table_get(aiv->defined_seqids, seqid)) {
    GtFeatureNodeIterator *fni;
    GtFeatureNode *node;
    GtRange range = gt_genome_node_get_range((GtGenomeNode*) fn);
    is_circular = gt_feature_node_get_attribute(fn, GT_GFF_IS_CIRCULAR)
                  ? true : false;
    if (!is_circular) {
      fni = gt_feature_node_iterator_new(fn);
      while ((node = gt_feature_node_iterator_next(fni))) {
        GtRange node_range = gt_genome_node_get_range((GtGenomeNode*) node);
        range = gt_range_join(&range, &node_range);
      }
      gt_feature_node_iterator_delete(fni);
    }
    /* sequence region has not been previously introduced -> check if one has
       already been created automatically */
    auto_sr = gt_hashmap_get(aiv->undefined_sequence_regions, seqid);
    if (!auto_sr) {
      GtStr *seqid_str;
      /* sequence region has not been createad automatically -> do it now */
      gt_warning("seqid \"%s\" on line %u in file \"%s\" has not been "
                 "previously introduced with a \"%s\" line, create such a line "
                 "automatically", seqid,
                 gt_genome_node_get_line_number((GtGenomeNode*) fn),
                 gt_genome_node_get_filename((GtGenomeNode*) fn),
                 GT_GFF_SEQUENCE_REGION);
      auto_sr = automatic_sequence_region_new(is_circular);
      seqid_str = gt_genome_node_get_seqid((GtGenomeNode*) fn);
      auto_sr->sequence_region = gt_region_node_new(seqid_str, range.start,
                                                               range.end);
      gt_hashmap_add(aiv->undefined_sequence_regions, gt_str_get(seqid_str),
                     auto_sr);
    }
    else {
      if (auto_sr->is_circular) {
        gt_assert(!is_circular); /* XXX */
      }
      else if (is_circular) {
        gt_assert(!auto_sr->is_circular); /* XXX */
        auto_sr->is_circular = true;
        gt_genome_node_set_range(auto_sr->sequence_region, &range);
      }
      else {
        GtRange joined_range,
                sr_range = gt_genome_node_get_range(auto_sr->sequence_region);
        /* update the range of the sequence region */
        joined_range = gt_range_join(&range, &sr_range);
        gt_genome_node_set_range(auto_sr->sequence_region, &joined_range);
      }
    }
    gt_array_add(auto_sr->feature_nodes, fn);
  }
  else
    gt_queue_add(aiv->node_buffer, fn);
  return 0;
}
Ejemplo n.º 7
0
static int construct_genes(GT_UNUSED void *key, void *value, void *data,
                           GtError *err)
{
  GtHashmap *transcript_id_hash = (GtHashmap*) value;
  ConstructionInfo *cinfo = (ConstructionInfo*) data;
  GtQueue *genome_nodes = cinfo->genome_nodes;
  const char *gname;
  GtArray *mRNAs = gt_array_new(sizeof (GtGenomeNode*));
  GtGenomeNode *gene_node, *gn;
  GtStrand gene_strand;
  GtRange gene_range;
  GtStr *gene_seqid;
  GtUword i;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(key && value && data);
  cinfo->mRNAs = mRNAs;
  had_err = gt_hashmap_foreach(transcript_id_hash, construct_mRNAs, cinfo, err);
  if (!had_err) {
    gt_assert(gt_array_size(mRNAs)); /* at least one mRNA constructed */

    /* determine the range and the strand of the gene */
    gn = *(GtGenomeNode**) gt_array_get(mRNAs, 0);
    gene_range = gt_genome_node_get_range(gn);
    gene_strand = gt_feature_node_get_strand((GtFeatureNode*) gn);
    gene_seqid = gt_genome_node_get_seqid(gn);
    for (i = 1; i < gt_array_size(mRNAs); i++) {
      GtRange range;
      gn = *(GtGenomeNode**) gt_array_get(mRNAs, i);
      range = gt_genome_node_get_range(gn);
      gene_range = gt_range_join(&gene_range, &range);
      gene_strand = gt_strand_join(gene_strand,
                          gt_feature_node_get_strand((GtFeatureNode*) gn));
      gt_assert(gt_str_cmp(gene_seqid, gt_genome_node_get_seqid(gn)) == 0);
    }

    gene_node = gt_feature_node_new(gene_seqid, gt_ft_gene, gene_range.start,
                                    gene_range.end, gene_strand);

    if ((gname = gt_hashmap_get(cinfo->gene_id_to_name_mapping,
                              (const char*) key))) {
      gt_feature_node_add_attribute((GtFeatureNode*) gene_node, GT_GFF_NAME,
                                      gname);
    }

    /* register children */
    for (i = 0; i < gt_array_size(mRNAs); i++) {
      gn = *(GtGenomeNode**) gt_array_get(mRNAs, i);
      gt_feature_node_add_child((GtFeatureNode*) gene_node,
                                (GtFeatureNode*) gn);
    }

    /* store the gene */
    gt_queue_add(genome_nodes, gene_node);

    /* free */
    gt_array_delete(mRNAs);
  }

  return had_err;
}
Ejemplo n.º 8
0
static int construct_mRNAs(GT_UNUSED void *key, void *value, void *data,
                           GtError *err)
{
  ConstructionInfo *cinfo = (ConstructionInfo*) data;
  GtArray *gt_genome_node_array = (GtArray*) value,
          *mRNAs = (GtArray*) cinfo->mRNAs;
  GtGenomeNode *mRNA_node, *first_node, *gn;
  const char *tname;
  GtStrand mRNA_strand;
  GtRange mRNA_range;
  GtStr *mRNA_seqid;
  GtUword i;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(key && value && data);
   /* at least one node in array */
  gt_assert(gt_array_size(gt_genome_node_array));

  /* determine the range and the strand of the mRNA */
  first_node = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, 0);
  mRNA_range = gt_genome_node_get_range(first_node);
  mRNA_strand = gt_feature_node_get_strand((GtFeatureNode*) first_node);
  mRNA_seqid = gt_genome_node_get_seqid(first_node);
  for (i = 1; i < gt_array_size(gt_genome_node_array); i++) {
    GtRange range;
    gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
    range = gt_genome_node_get_range(gn);
    mRNA_range = gt_range_join(&mRNA_range, &range);
    /* XXX: an error check is necessary here, otherwise gt_strand_join() can
       cause a failed assertion */
    mRNA_strand = gt_strand_join(mRNA_strand,
                          gt_feature_node_get_strand((GtFeatureNode*) gn));
    if (gt_str_cmp(mRNA_seqid, gt_genome_node_get_seqid(gn))) {
      gt_error_set(err, "The features on lines %u and %u refer to different "
                "genomic sequences (``seqname''), although they have the same "
                "gene IDs (``gene_id'') which must be globally unique",
                gt_genome_node_get_line_number(first_node),
                gt_genome_node_get_line_number(gn));
      had_err = -1;
      break;
    }
  }

  if (!had_err) {
    mRNA_node = gt_feature_node_new(mRNA_seqid, gt_ft_mRNA, mRNA_range.start,
                                    mRNA_range.end, mRNA_strand);

    if ((tname = gt_hashmap_get(cinfo->transcript_id_to_name_mapping,
                              (const char*) key))) {
      gt_feature_node_add_attribute((GtFeatureNode*) mRNA_node, GT_GFF_NAME,
                                      tname);
    }

    /* register children */
    for (i = 0; i < gt_array_size(gt_genome_node_array); i++) {
      gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
      gt_feature_node_add_child((GtFeatureNode*) mRNA_node,
                                (GtFeatureNode*) gn);
    }

    /* store the mRNA */
    gt_array_add(mRNAs, mRNA_node);
  }

  return had_err;
}
Ejemplo n.º 9
0
static int construct_mRNAs(GT_UNUSED void *key, void *value, void *data,
                           GtError *err)
{
  ConstructionInfo *cinfo = (ConstructionInfo*) data;
  GtArray *gt_genome_node_array = (GtArray*) value,
          *mRNAs = (GtArray*) cinfo->mRNAs;
  GtGenomeNode *mRNA_node, *first_node, *gn;
  const char *tname;
  GtStrand mRNA_strand;
  GtRange mRNA_range;
  GtStr *mRNA_seqid;
  GtUword i;
  int had_err = 0;

  gt_error_check(err);
  gt_assert(key && value && data);
   /* at least one node in array */
  gt_assert(gt_array_size(gt_genome_node_array));

  /* determine the range and the strand of the mRNA */
  first_node = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, 0);
  mRNA_range = gt_genome_node_get_range(first_node);
  mRNA_strand = gt_feature_node_get_strand((GtFeatureNode*) first_node);
  mRNA_seqid = gt_genome_node_get_seqid(first_node);

  /* TODO: support discontinuous start/stop codons */
  for (i = 0; !had_err && i < gt_array_size(gt_genome_node_array); i++) {
    gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
    if (gt_feature_node_get_attribute((GtFeatureNode*) gn,
        GTF_PARSER_STOP_CODON_FLAG)) {
      GtUword j;
      GtRange stop_codon_rng = gt_genome_node_get_range(gn);
      bool found_cds = false;
      for (j = 0; !had_err && j < gt_array_size(gt_genome_node_array); j++) {
        GtGenomeNode* gn2;
        GtRange this_rng;
        const char *this_type;
        gn2 = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, j);
        if (gn == gn2) continue;
        this_rng = gt_genome_node_get_range(gn2);
        this_type = gt_feature_node_get_type((GtFeatureNode*) gn2);
        if (this_type == gt_symbol(gt_ft_CDS)) {
          if (gt_range_contains(&this_rng, &stop_codon_rng)) {
            if (cinfo->tidy) {
              gt_warning("stop codon on line %u in file %s is contained in "
                         "CDS in line %u",
                         gt_genome_node_get_line_number(gn),
                         gt_genome_node_get_filename(gn),
                         gt_genome_node_get_line_number(gn2));
              found_cds = true;
            } else {
              gt_error_set(err, "stop codon on line %u in file %s is "
                                "contained in CDS in line %u",
                           gt_genome_node_get_line_number(gn),
                           gt_genome_node_get_filename(gn),
                           gt_genome_node_get_line_number(gn2));
              had_err = -1;
            }
            break;
          }
          if (this_rng.end + 1 == stop_codon_rng.start) {
            this_rng.end = stop_codon_rng.end;
            gt_genome_node_set_range(gn2, &this_rng);
            found_cds = true;
            break;
          }
          if (this_rng.start == stop_codon_rng.end + 1) {
            this_rng.start = stop_codon_rng.start;
            gt_genome_node_set_range(gn2, &this_rng);
            found_cds = true;
            break;
          }
        }
      }
      if (!found_cds) {
        if (!had_err) {
          if (cinfo->tidy) {
            gt_warning("found stop codon on line %u in file %s with no "
                       "flanking CDS, ignoring it",
                       gt_genome_node_get_line_number(gn),
                       gt_genome_node_get_filename(gn));
          } else {
            gt_error_set(err, "found stop codon on line %u in file %s with no "
                              "flanking CDS",
                         gt_genome_node_get_line_number(gn),
                         gt_genome_node_get_filename(gn));
            had_err = -1;
            break;
          }
        }
      } else {
        gt_array_rem(gt_genome_node_array, i);
        gt_genome_node_delete(gn);
      }
    }
  }

  for (i = 1; !had_err && i < gt_array_size(gt_genome_node_array); i++) {
    GtRange range;
    GtStrand strand;
    gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
    range = gt_genome_node_get_range(gn);
    mRNA_range = gt_range_join(&mRNA_range, &range);
    strand = gt_feature_node_get_strand((GtFeatureNode*) gn);
    if (strand != mRNA_strand) {
      gt_error_set(err, "feature %s on line %u has strand %c, but the "
                        "parent transcript has strand %c",
                   (const char*) key,
                   gt_genome_node_get_line_number(gn),
                   GT_STRAND_CHARS[strand],
                   GT_STRAND_CHARS[mRNA_strand]);
      had_err = -1;
      break;
    } else {
      mRNA_strand = gt_strand_join(mRNA_strand, strand);
    }
    if (!had_err && gt_str_cmp(mRNA_seqid, gt_genome_node_get_seqid(gn))) {
      gt_error_set(err, "The features on lines %u and %u refer to different "
                "genomic sequences (``seqname''), although they have the same "
                "gene IDs (``gene_id'') which must be globally unique",
                gt_genome_node_get_line_number(first_node),
                gt_genome_node_get_line_number(gn));
      had_err = -1;
      break;
    }
  }

  if (!had_err) {
    mRNA_node = gt_feature_node_new(mRNA_seqid, gt_ft_mRNA, mRNA_range.start,
                                    mRNA_range.end, mRNA_strand);
    gt_feature_node_add_attribute(((GtFeatureNode*) mRNA_node), "ID", key);
    gt_feature_node_add_attribute(((GtFeatureNode*) mRNA_node), "transcript_id",
                                  key);

    if ((tname = gt_hashmap_get(cinfo->transcript_id_to_name_mapping,
                              (const char*) key)) && strlen(tname) > 0) {
      gt_feature_node_add_attribute((GtFeatureNode*) mRNA_node, GT_GFF_NAME,
                                      tname);
    }

    /* register children */
    for (i = 0; i < gt_array_size(gt_genome_node_array); i++) {
      gn = *(GtGenomeNode**) gt_array_get(gt_genome_node_array, i);
      gt_feature_node_add_child((GtFeatureNode*) mRNA_node,
                                (GtFeatureNode*) gt_genome_node_ref(gn));
    }

    /* store the mRNA */
    gt_array_add(mRNAs, mRNA_node);
  }

  return had_err;
}
static int snp_annotator_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                     GtError *err)
{
  GtSNPAnnotatorStream *sas;
  int had_err = 0;
  bool complete_cluster = false;
  GtGenomeNode *mygn = NULL;
  GtFeatureNode *fn = NULL;
  const char *snv_type = gt_symbol(gt_ft_SNV),
             *snp_type = gt_symbol(gt_ft_SNP),
             *gene_type = gt_symbol(gt_ft_gene);
  gt_error_check(err);
  sas = gt_snp_annotator_stream_cast(ns);

  /* if there are still SNPs left in the buffer, output them */
  if (gt_queue_size(sas->outqueue) > 0) {
    *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
    return had_err;
  } else complete_cluster = false;

  while (!had_err && !complete_cluster) {
    had_err = gt_node_stream_next(sas->merge_stream, &mygn, err);

    /* stop if stream is at the end */
    if (had_err || !mygn) break;

    /* process all feature nodes */
    if ((fn = gt_feature_node_try_cast(mygn))) {
      GtGenomeNode *addgn;
      const char *type = gt_feature_node_get_type(fn);
      GtRange new_rng = gt_genome_node_get_range(mygn);
      if (type == snv_type || type == snp_type) {
        /* -----> this is a SNP <----- */
        if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
          /* it falls into the currently observed range */
          gt_queue_add(sas->snps, gt_genome_node_ref((GtGenomeNode*) fn));
        } else {
          /* SNP outside a gene, this cluster is done
             add to out queue and start serving */
          gt_assert(gt_queue_size(sas->outqueue) == 0);
          had_err = snp_annotator_stream_process_current_gene(sas, err);
          gt_queue_add(sas->outqueue, mygn);
          if (gt_queue_size(sas->outqueue) > 0) {
            *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
            complete_cluster = true;
          }
        }
      } else if (type == gene_type) {
        /* -----> this is a gene <----- */
        if (gt_array_size(sas->cur_gene_set) == 0UL) {
          /* new overlapping gene cluster */
          addgn = gt_genome_node_ref(mygn);
          gt_array_add(sas->cur_gene_set, addgn);
          sas->cur_gene_range = gt_genome_node_get_range(mygn);
        } else {
          if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
            /* gene overlaps with current one, add to cluster */
            addgn = gt_genome_node_ref(mygn);
            gt_array_add(sas->cur_gene_set, addgn);
            sas->cur_gene_range = gt_range_join(&sas->cur_gene_range, &new_rng);
          } else {
            /* finish current cluster and start a new one */
            had_err = snp_annotator_stream_process_current_gene(sas, err);
            if (!had_err) {
              addgn = gt_genome_node_ref(mygn);
              gt_array_add(sas->cur_gene_set, addgn);
              sas->cur_gene_range = gt_genome_node_get_range(mygn);
            }
            if (gt_queue_size(sas->outqueue) > 0) {
              *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
              complete_cluster = true;
            }
          }
        }
        /* from now on, genes are kept in gene cluster arrays only */
        gt_genome_node_delete(mygn);
      }
    } else {
      /* meta node */
      had_err = snp_annotator_stream_process_current_gene(sas, err);
      if (!had_err) {
        gt_queue_add(sas->outqueue, mygn);
      }
      if (gt_queue_size(sas->outqueue) > 0) {
        *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
        complete_cluster = true;
      }
    }
  }

  return had_err;
}