Example #1
0
static int select_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtSelectStream *fs;
  int had_err;
  gt_error_check(err);
  fs = gt_select_stream_cast(ns);

  /* we still have nodes in the buffer */
  if (gt_select_visitor_node_buffer_size(fs->select_visitor)) {
    /* return one of them */
    *gn = gt_select_visitor_get_node(fs->select_visitor);
    return 0;
  }

  /* no nodes in the buffer -> get new nodes */
  while (!(had_err = gt_node_stream_next(fs->in_stream, gn, err)) && *gn) {
    gt_assert(*gn && !had_err);
    had_err = gt_genome_node_accept(*gn, fs->select_visitor, err);
    if (had_err) {
      /* we own the node -> delete it */
      gt_genome_node_delete(*gn);
      *gn = NULL;
      break;
    }
    if (gt_select_visitor_node_buffer_size(fs->select_visitor)) {
      *gn = gt_select_visitor_get_node(fs->select_visitor);
      return 0;
    }
  }

  /* either we have an error or no new node */
  gt_assert(had_err || !*gn);
  return had_err;
}
int gt_feature_index_add_gff3file(GtFeatureIndex *feature_index,
                                  const char *gff3file, GtError *err)
{
  GtNodeStream *gff3_in_stream;
  GtGenomeNode *gn;
  GtArray *tmp;
  int had_err = 0;
  GtUword i;
  gt_error_check(err);
  gt_assert(feature_index && gff3file);
  tmp = gt_array_new(sizeof (GtGenomeNode*));
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(1, &gff3file);
  while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn)
    gt_array_add(tmp, gn);
  if (!had_err) {
    GtNodeVisitor *feature_visitor = gt_feature_visitor_new(feature_index);
    for (i=0;i<gt_array_size(tmp);i++) {
      gn = *(GtGenomeNode**) gt_array_get(tmp, i);
      /* no need to lock, add_*_node() is synchronized */
      had_err = gt_genome_node_accept(gn, feature_visitor, NULL);
      gt_assert(!had_err); /* cannot happen */
    }
    gt_node_visitor_delete(feature_visitor);
  }
  gt_node_stream_delete(gff3_in_stream);
  for (i=0;i<gt_array_size(tmp);i++)
    gt_genome_node_delete(*(GtGenomeNode**) gt_array_get(tmp, i));
  gt_array_delete(tmp);
  return had_err;
}
Example #3
0
static int filter_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err)
{
    GtFilterStream *fs;
    int had_err;
    gt_error_check(err);
    fs = gt_filter_stream_cast(gs);

    /* we still have nodes in the buffer */
    if (gt_filter_visitor_node_buffer_size(fs->filter_visitor)) {
        /* return one of them */
        *gn = gt_filter_visitor_get_node(fs->filter_visitor);
        return 0;
    }

    /* no nodes in the buffer -> get new nodes */
    while (!(had_err = gt_node_stream_next(fs->in_stream, gn, err)) && *gn) {
        gt_assert(*gn && !had_err);
        had_err = gt_genome_node_accept(*gn, fs->filter_visitor, err);
        if (had_err)
            break;
        if (gt_filter_visitor_node_buffer_size(fs->filter_visitor)) {
            *gn = gt_filter_visitor_get_node(fs->filter_visitor);
            return 0;
        }
    }

    /* either we have an error or no new node */
    gt_assert(had_err || !*gn);
    return had_err;
}
Example #4
0
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtSortStream *sort_stream;
  GtGenomeNode *node, *eofn;
  int had_err = 0;
  gt_error_check(err);
  sort_stream = gt_sort_stream_cast(ns);

  if (!sort_stream->sorted) {
    while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node,
                                           err)) && node) {
      if ((eofn = gt_eof_node_try_cast(node)))
        gt_genome_node_delete(eofn); /* get rid of EOF nodes */
      else
        gt_array_add(sort_stream->nodes, node);
    }
    if (!had_err) {
      gt_genome_nodes_sort_stable(sort_stream->nodes);
      sort_stream->sorted = true;
    }
  }

  if (!had_err) {
    gt_assert(sort_stream->sorted);
    if (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
      *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                           sort_stream->idx);
      sort_stream->idx++;
      /* join region nodes with the same sequence ID */
      if (gt_region_node_try_cast(*gn)) {
        GtRange range_a, range_b;
        while (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
          node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                                sort_stream->idx);
          if (!gt_region_node_try_cast(node) ||
              gt_str_cmp(gt_genome_node_get_seqid(*gn),
                         gt_genome_node_get_seqid(node))) {
            /* the next node is not a region node with the same ID */
            break;
          }
          range_a = gt_genome_node_get_range(*gn);
          range_b = gt_genome_node_get_range(node);
          range_a = gt_range_join(&range_a, &range_b);
          gt_genome_node_set_range(*gn, &range_a);
          gt_genome_node_delete(node);
          sort_stream->idx++;
        }
      }
      return 0;
    }
  }

  if (!had_err) {
    gt_array_reset(sort_stream->nodes);
    *gn = NULL;
  }

  return had_err;
}
static int gff3_in_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtGFF3InStream *is;
  gt_error_check(err);
  is = gff3_in_stream_cast(ns);
  return gt_node_stream_next(is->last_stream, gn, err);
}
Example #6
0
static int filter_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                              GtError *error)
{
  AgnFilterStream *stream;
  GtFeatureNode *fn;
  int had_err;
  gt_error_check(error);
  stream = filter_stream_cast(ns);

  if(gt_queue_size(stream->cache) > 0)
  {
    *gn = gt_queue_get(stream->cache);
    return 0;
  }

  while(1)
  {
    had_err = gt_node_stream_next(stream->in_stream, gn, error);
    if(had_err)
      return had_err;
    if(!*gn)
      return 0;

    fn = gt_feature_node_try_cast(*gn);
    if(!fn)
      return 0;

    GtFeatureNode *current;
    GtFeatureNodeIterator *iter = gt_feature_node_iterator_new(fn);
    for(current  = gt_feature_node_iterator_next(iter);
        current != NULL;
        current  = gt_feature_node_iterator_next(iter))
    {
      const char *type = gt_feature_node_get_type(current);
      bool keepfeature = false;
      if(gt_hashmap_get(stream->typestokeep, type) != NULL)
        keepfeature = true;

      if(keepfeature)
      {
        gt_genome_node_ref((GtGenomeNode *)current);
        gt_queue_add(stream->cache, current);
      }
    }
    gt_feature_node_iterator_delete(iter);
    gt_genome_node_delete((GtGenomeNode *)fn);
    if(gt_queue_size(stream->cache) > 0)
    {
      *gn = gt_queue_get(stream->cache);
      return 0;
    }
  }

  return 0;
}
static int gt_ltr_cluster_stream_next(GtNodeStream *ns,
                                      GtGenomeNode **gn,
                                      GtError *err)
{
  GtLTRClusterStream *lcs;
  GtGenomeNode *ref_gn;
  int had_err = 0;
  unsigned long i = 0;

  gt_error_check(err);
  lcs = gt_ltr_cluster_stream_cast(ns);
  if (lcs->first_next) {
    while (!(had_err = gt_node_stream_next(lcs->in_stream, gn, err)) && *gn) {
      gt_assert(*gn && !had_err);
      ref_gn = gt_genome_node_ref(*gn);
      gt_array_add(lcs->nodes, ref_gn);
      had_err = gt_genome_node_accept(*gn, (GtNodeVisitor*) lcs->lcv, err);
      if (had_err) {
        gt_genome_node_delete(*gn);
        *gn = NULL;
        break;
      }
    }
    lcs->feat_to_encseq =
                       gt_ltr_cluster_prepare_seq_visitor_get_encseqs(lcs->lcv);
    lcs->feat_to_encseq_keys =
                      gt_ltr_cluster_prepare_seq_visitor_get_features(lcs->lcv);
    if (!had_err) {
      for (i = 0; i < gt_str_array_size(lcs->feat_to_encseq_keys); i++) {
        had_err = process_feature(lcs,
                                  gt_str_array_get(lcs->feat_to_encseq_keys, i),
                                  err);
        if (had_err)
          break;
      }
    }
    if (!had_err) {
      *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index);
      lcs->next_index++;
      lcs->first_next = false;
      return 0;
    }
  } else {
    if (lcs->next_index >= gt_array_size(lcs->nodes))
      *gn = NULL;
    else {
      *gn = *(GtGenomeNode**) gt_array_get(lcs->nodes, lcs->next_index);
      lcs->next_index++;
    }
    return 0;
  }

  return had_err;
}
static int inter_feature_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                        GtError *err)
{
  GtInterFeatureStream *ais;
  int had_err;
  gt_error_check(err);
  ais = gt_inter_feature_stream_cast(ns);
  had_err = gt_node_stream_next(ais->in_stream, gn, err);
  if (!had_err && *gn)
    had_err = gt_genome_node_accept(*gn, ais->inter_feature_visitor, err);
  return had_err;
}
Example #9
0
static int feature_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                               GtError *err)
{
  GtFeatureStream *feature_stream;
  int had_err;
  gt_error_check(err);
  feature_stream = feature_stream_cast(gs);
  had_err = gt_node_stream_next(feature_stream->in_stream, gn, err);
  if (!had_err && *gn)
    had_err = gt_genome_node_accept(*gn, feature_stream->feature_visitor, err);
  return had_err;
}
static int gt_node_stream_lua_next_tree(lua_State *L)
{
  GtNodeStream **gs = check_genome_stream(L, 1);
  GtGenomeNode *gn;
  GtError *err = gt_error_new();
  if (gt_node_stream_next(*gs, &gn, err))
    return gt_lua_error(L, err); /* handle error */
  else if (gn)
    gt_lua_genome_node_push(L, gn);
  else
    lua_pushnil(L);
  gt_error_delete(err);
  return 1;
}
static int sequence_node_add_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                         GtError *err)
{
  GtSequenceNodeAddStream *s;
  int had_err;
  gt_error_check(err);
  s = gt_sequence_node_add_stream_cast(ns);

  /* stream nodes as long as we have some, record seen seqids */
  if (!(had_err = gt_node_stream_next(s->in_stream, gn, err)) && *gn) {
    had_err = gt_genome_node_accept(*gn, s->collect_vis, err);
  }

  /* if there are no more  */
  if (!had_err && !*gn) {
    if (!s->seqids) {
      s->seqids = gt_cstr_table_get_all(s->seqid_table);
    }
    gt_assert(s->seqids);
    if (s->cur_seqid >= gt_str_array_size(s->seqids)) {
      *gn = NULL;
      return 0;
    } else {
      GtGenomeNode *new_sn;
      GtUword len;
      char *seq = NULL;
      GtStr *seqid = gt_str_new(),
            *seqstr = gt_str_new();
      gt_str_append_cstr(seqid, gt_str_array_get(s->seqids, s->cur_seqid));
      had_err = gt_region_mapping_get_sequence_length(s->rm, &len, seqid, err);
      if (!had_err) {
        had_err = gt_region_mapping_get_sequence(s->rm, &seq, seqid, 1, len,
                                                 err);
      }
      if (!had_err) {
        gt_str_append_cstr_nt(seqstr, seq, len);
        new_sn = gt_sequence_node_new(gt_str_get(seqid), seqstr);
        *gn = new_sn;
      }
      s->cur_seqid++;
      gt_free(seq);
      gt_str_delete(seqid);
      gt_str_delete(seqstr);
    }
  }

  return had_err;
}
Example #12
0
static int buffer_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtBufferStream *bs;
  gt_error_check(err);
  bs = buffer_stream_cast(ns);
  if (bs->buffering) {
    int had_err = gt_node_stream_next(bs->in_stream, gn, err);
    if (!had_err && *gn)
      gt_queue_add(bs->node_buffer, gt_genome_node_ref(*gn));
    return had_err;
  }
  else {
    *gn = gt_queue_size(bs->node_buffer) ? gt_queue_get(bs->node_buffer) : NULL;
    return 0;
  }
}
static int visitor_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtVisitorStream *visitor_stream;
  int had_err;
  gt_error_check(err);
  visitor_stream = visitor_stream_cast(ns);
  had_err = gt_node_stream_next(visitor_stream->in_stream, gn, err);
  if (!had_err && *gn)
    had_err = gt_genome_node_accept(*gn, visitor_stream->visitor, err);
  if (had_err) {
    /* we own the node -> delete it */
    gt_genome_node_delete(*gn);
    *gn = NULL;
  }
  return had_err;
}
Example #14
0
static int stat_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtStatStream *stat_stream;
  int had_err;
  gt_error_check(err);
  stat_stream = stat_stream_cast(ns);
  had_err = gt_node_stream_next(stat_stream->in_stream, gn, err);
  if (!had_err) {
    gt_assert(stat_stream->stat_visitor);
    if (*gn) {
      if (!gt_eof_node_try_cast(*gn)) /* do not count EOF nodes */
        stat_stream->number_of_DAGs++;
      had_err = gt_genome_node_accept(*gn, stat_stream->stat_visitor, err);
      gt_assert(!had_err); /* the status visitor is sane */
    }
  }
  return had_err;
}
static int cds_check_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                 GtError *err)
{
  GtCDSCheckStream *cs;
  int had_err;
  gt_error_check(err);
  cs = cds_check_stream_cast(ns);
  gt_assert(cs);
  had_err = gt_node_stream_next(cs->in_stream, gn, err);
  if (!had_err && *gn)
    had_err = gt_genome_node_accept(*gn, cs->cds_check_visitor, err);
  if (had_err) {
    /* we own the node -> delete it */
    gt_genome_node_delete(*gn);
    *gn = NULL;
  }
  return had_err;
}
Example #16
0
int gt_regioncov(int argc, const char **argv, GtError *err)
{
  GtNodeVisitor *regioncov_visitor;
  GtNodeStream *gff3_in_stream;
  GtGenomeNode *gn;
  RegionCovArguments arguments;
  int parsed_args, had_err = 0;
  gt_error_check(err);

  /* option parsing */
  switch (parse_options(&parsed_args, &arguments, argc, argv, err)) {
    case OPTIONPARSER_OK: break;
    case OPTIONPARSER_ERROR:
      return -1;
    case OPTIONPARSER_REQUESTS_EXIT:
      return 0;
  }

  /* create gff3 input stream */
  gt_assert(parsed_args < argc);
  gff3_in_stream = gt_gff3_in_stream_new_sorted(argv[parsed_args]);
  if (arguments.verbose)
    gt_gff3_in_stream_show_progress_bar((GtGFF3InStream*) gff3_in_stream);

  /* create region coverage visitor */
  regioncov_visitor = gt_regioncov_visitor_new(arguments.max_feature_dist);

  /* pull the features through the stream and free them afterwards */
  while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) {
      had_err = gt_genome_node_accept(gn, regioncov_visitor, err);
      gt_genome_node_delete(gn);
  }

  /* show region coverage */
  if (!had_err)
    gt_regioncov_visitor_show_coverage(regioncov_visitor);

  /* free */
  gt_node_visitor_delete(regioncov_visitor);
  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
static int bssm_train_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                  GtError *err)
{
  GthBSSMTrainStream *bssm_train_stream;
  int had_err;
  gt_error_check(err);
  bssm_train_stream = bssm_train_stream_cast(ns);
  had_err = gt_node_stream_next(bssm_train_stream->in_stream, gn, err);
  if (!had_err && *gn) {
    had_err = gt_genome_node_accept(*gn, bssm_train_stream->bssm_train_visitor,
                                    err);
  }
  if (had_err) {
    /* we own the node -> delete it */
    gt_genome_node_delete(*gn);
    *gn = NULL;
  }
  return had_err;
}
Example #18
0
int main(int argc, const char *argv[])
{
  GtNodeStream *gff3_in_stream;
  GtGenomeNode *gn;
  GtError *err;
  int had_err;

  if (gt_version_check(GT_MAJOR_VERSION, GT_MINOR_VERSION, GT_MICRO_VERSION)) {
    fprintf(stderr, "error: %s\n", gt_version_check(GT_MAJOR_VERSION,
                                                    GT_MINOR_VERSION,
                                                    GT_MICRO_VERSION));
    return EXIT_FAILURE;
  }

  /* initialize */
  gt_lib_init();

  /* create error object */
  err = gt_error_new();

  /* create GFF3 input stream (with ID attribute checking) */
  gff3_in_stream = gt_gff3_in_stream_new_unsorted(argc-1, argv+1);
  gt_gff3_in_stream_check_id_attributes((GtGFF3InStream*) gff3_in_stream);

  /* pull the features through the stream and free them afterwards */
  while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn)
    gt_genome_node_delete(gn);

  /* handle error */
  if (had_err)
    fprintf(stderr, "%s: error: %s\n", argv[0], gt_error_get(err));
  else
    printf("input is valid GFF3\n");

  /* free */
  gt_node_stream_delete(gff3_in_stream);
  gt_error_delete(err);

  if (had_err)
    return EXIT_FAILURE;
  return EXIT_SUCCESS;
}
Example #19
0
static int gt_array_out_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                                    GtError *err)
{
  GtArrayOutStream *aos;
  GtGenomeNode *node, *gn_ref;
  int had_err = 0;

  gt_error_check(err);
  aos = gt_array_out_stream_cast(gs);
  had_err = gt_node_stream_next(aos->in_stream, gn, err);

  if (!had_err && *gn) {
    if ((node = gt_feature_node_try_cast(*gn))) {
      gn_ref = gt_genome_node_ref(*gn);
      gt_array_add(aos->nodes, gn_ref);
    }
  }

  return had_err;
}
Example #20
0
static int gt_load_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtLoadStream *load_stream;
  GtGenomeNode *node, *eofn;
  int had_err = 0;
  gt_error_check(err);
  load_stream = gt_load_stream_cast(ns);

  if (!load_stream->full) {
    while (!(had_err = gt_node_stream_next(load_stream->in_stream, &node,
                                           err)) && node) {
      if ((eofn = gt_eof_node_try_cast(node)))
        gt_genome_node_delete(eofn); /* get rid of EOF nodes */
      else
        gt_array_add(load_stream->nodes, node);
    }
    if (!had_err) {
      load_stream->full = true;
    }
  }

  if (!had_err) {
    gt_assert(load_stream->full);
    if (load_stream->idx < gt_array_size(load_stream->nodes)) {
      *gn = *(GtGenomeNode**) gt_array_get(load_stream->nodes,
                                           load_stream->idx);
      load_stream->idx++;
      return 0;
    }
  }

  if (!had_err) {
    gt_array_reset(load_stream->nodes);
    *gn = NULL;
  }

  return had_err;
}
Example #21
0
static int gt_extracttarget_runner(GT_UNUSED int argc, const char **argv,
                                   int parsed_args, void *tool_arguments,
                                   GtError *err)
{
  ExtractTargetArguments *arguments = tool_arguments;
  GtNodeStream *gff3_in_stream;
  GtGenomeNode *gn;
  int had_err;

  gt_error_check(err);
  gt_assert(arguments);

  gff3_in_stream = gt_gff3_in_stream_new_unsorted(1, argv + parsed_args);

  while (!(had_err = gt_node_stream_next(gff3_in_stream, &gn, err)) && gn) {
    had_err = extracttarget_from_node(gn, arguments->seqfiles, err);
    gt_genome_node_delete(gn);
  }

  gt_node_stream_delete(gff3_in_stream);

  return had_err;
}
static int targetbest_filter_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                                         GtError *err)
{
  GtTargetbestFilterStream *tfs;
  GtGenomeNode *node;
  int had_err = 0;
  gt_error_check(err);
  tfs = targetbest_filter_stream_cast(gs);

  if (!tfs->in_stream_processed) {
    while (!(had_err = gt_node_stream_next(tfs->in_stream, &node, err)) &&
           node) {
      if (gt_feature_node_try_cast(node) &&
          gt_feature_node_get_attribute((GtFeatureNode*) node, "Target")) {
        filter_targetbest((GtFeatureNode*) node, tfs->trees,
                          tfs->target_to_elem);
      }
      else
        gt_dlist_add(tfs->trees, node);
    }
    tfs->next = gt_dlist_first(tfs->trees);
    tfs->in_stream_processed = true;
  }

  if (!had_err) {
    gt_assert(tfs->in_stream_processed);
    if (tfs->next) {
      *gn = gt_dlistelem_get_data(tfs->next);
      tfs->next = gt_dlistelem_next(tfs->next);
    }
    else
      *gn = NULL;
    return 0;
  }

  return had_err;
}
static int gff3_numsorted_out_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                           GtError *err)
{
  GtGFF3NumsortedOutStream *gff3_out_stream;
  int had_err = 0;
  GtUword i = 0;
  gt_error_check(err);
  gff3_out_stream = gff3_numsorted_out_stream_cast(ns);
  if (!gff3_out_stream->outqueue) {
    gff3_out_stream->outqueue = gt_queue_new();
    while (!(had_err =
                    gt_node_stream_next(gff3_out_stream->in_stream, gn, err))) {
      if (!*gn) break;
      gt_array_add(gff3_out_stream->buffer, *gn);
    }
    if (!had_err) {
      gt_genome_nodes_sort_stable_with_func(gff3_out_stream->buffer,
                             (GtCompare) gt_genome_node_compare_numeric_seqids);
      for (i = 0; !had_err && i < gt_array_size(gff3_out_stream->buffer); i++) {
        GtGenomeNode *mygn = *(GtGenomeNode**)
                                       gt_array_get(gff3_out_stream->buffer, i);
        gt_queue_add(gff3_out_stream->outqueue, mygn);
      }
    }
  }
  if (gff3_out_stream->outqueue && !had_err) {
    if (gt_queue_size(gff3_out_stream->outqueue) > 0) {
      GtGenomeNode *mygn = (GtGenomeNode*)
                                        gt_queue_get(gff3_out_stream->outqueue);
      gt_assert(mygn);
      had_err = gt_genome_node_accept(mygn, gff3_out_stream->gff3_visitor, err);
      if (!had_err)
        *gn = mygn;
    }
  }
  return had_err;
}
static int gt_seqpos_classifier_next_fn(GtSeqposClassifier *seqpos_classifier,
    GtError *err)
{
  int had_err = 0;
  gt_assert(seqpos_classifier != NULL);
  if (seqpos_classifier->fni != NULL)
  {
    gt_feature_node_iterator_delete(seqpos_classifier->fni);
    seqpos_classifier->fni = NULL;
  }
  while (true)
  {
    if (seqpos_classifier->gn != NULL)
    {
      gt_genome_node_delete(seqpos_classifier->gn);
    }
    had_err = gt_node_stream_next(seqpos_classifier->annotation_stream,
        &seqpos_classifier->gn, err);
    if (had_err != 0 || seqpos_classifier->gn == NULL)
    {
      seqpos_classifier->fn = NULL;
      seqpos_classifier->gn = NULL;
      return had_err;
    }
    else
    {
      if ((seqpos_classifier->fn =
            gt_feature_node_try_cast(seqpos_classifier->gn)) != NULL)
      {
        seqpos_classifier->fni =
          gt_feature_node_iterator_new(seqpos_classifier->fn);
        return had_err;
      }
    }
  }
}
static int CpGIOverlap_stream_next(GtNodeStream * ns,
                                   GtGenomeNode ** gn,
                                   GtError * err)
{
    GtGenomeNode * cur_node, * next_node;
    GtFeatureNodeIterator * iter;
    int err_num = 0;
    *gn = NULL;
    CpGIOverlap_stream * context;
    const char * gene_name = NULL;
    const char * overlap_name = NULL;
    char  chr_str[255];
    int  chr_num;
    unsigned int TSS;

    float CpGIOverlap;


    context = CpGIOverlap_stream_cast(ns);

    // find the genes, determine expression level
     if(!gt_node_stream_next(context->in_stream,
                            &cur_node,
                            err
                           ) && cur_node != NULL
       )
     {
         *gn = cur_node;

         // try casting as a feature node so we can test type
         if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node))
         {
               return 0;
         }
         else // we found a feature node
         {
              // first check if it is a pseudo node, if so find the gene in it if available
              if (gt_feature_node_is_pseudo(cur_node))
              {
                  iter = gt_feature_node_iterator_new(cur_node);
                  if (iter == NULL)
                      return;
                  while ((next_node = gt_feature_node_iterator_next(iter)) && !gt_feature_node_has_type(next_node, feature_type_gene));
                  gt_feature_node_iterator_delete(iter);
                  if (NULL == (cur_node = next_node))
                     return 0;
              }


              if(!gt_feature_node_has_type(cur_node, feature_type_gene))
                  return 0;

              // find name of gene
              gene_name = gt_feature_node_get_attribute(cur_node, "Name");

              if (gene_name == NULL)
                  return;

              if ( 1 != sscanf(gt_str_get(gt_genome_node_get_seqid(cur_node)), "Chr%d", &chr_num))
                  return 0;

              TSS = (gt_feature_node_get_strand(cur_node) == GT_STRAND_FORWARD) ? gt_genome_node_get_start(cur_node) : gt_genome_node_get_end(cur_node);

              // now figure out the overlapping gene 
              if (! (overlap_name = CpGIOverlap_stream_find_gene_overlap( context, TSS, chr_num)))
                 return 0;

              // save the score into the node
              gt_feature_node_set_attribute(cur_node, "cpgi_at_tss", overlap_name);
              
              return 0;

         }
     }

    return err_num;
}
static int CpGI_score_stream_next(GtNodeStream * ns,
                                   GtGenomeNode ** gn,
                                   GtError * err)
{
    GtGenomeNode * cur_node;
    int err_num = 0;
    *gn = NULL;
    CpGI_score_stream * score_stream;
    unsigned long island_start;
    unsigned long island_end;
    float island_score;
    int chromosome_num;
    GtStr * seqID_gtstr;
    char *  seqID_str;
    char *  num_cg_str;
    unsigned long num_cg = 0;

    score_stream = CpGI_score_stream_cast(ns);

    // find the CpGI's, process methylome score
     if(!gt_node_stream_next(score_stream->in_stream,
                            &cur_node,
                            err
                           ) && cur_node != NULL
       )
     {
         *gn = cur_node;

         // try casting as a feature node so we can test type
         if(!gt_genome_node_try_cast(gt_feature_node_class(), cur_node))
         {
               return 0;
         }
         else // we found a feature node
         {
              if(!gt_feature_node_has_type(cur_node, feature_type_CpGI))
                  return 0;

              #if DEBUG_SCORE
              printf("found CpGI\n");
              #endif 
 
              island_start = gt_genome_node_get_start(cur_node);
              island_end   = gt_genome_node_get_end(cur_node);

              seqID_gtstr = gt_genome_node_get_seqid(cur_node);
              seqID_str   = gt_str_get(seqID_gtstr);
              sscanf(seqID_str, "Chr%d", &chromosome_num);

              num_cg_str = gt_feature_node_get_attribute(cur_node, "sumcg");
              if (!num_cg_str)
                 return 0;
              
              sscanf(num_cg_str, "%d", &num_cg);             

              // now figure out the score
              island_score = CpGI_score_stream_score_island(score_stream ,
                                                            chromosome_num,
                                                            num_cg,
                                                            island_start,
                                                            island_end);
//              gt_str_delete(seqID_gtstr);

              // save the score into the node
              gt_feature_node_set_score(cur_node, island_score);
              
              return 0;

         }
     }

    return err_num;
}
static int snp_annotator_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                                     GtError *err)
{
  GtSNPAnnotatorStream *sas;
  int had_err = 0;
  bool complete_cluster = false;
  GtGenomeNode *mygn = NULL;
  GtFeatureNode *fn = NULL;
  const char *snv_type = gt_symbol(gt_ft_SNV),
             *snp_type = gt_symbol(gt_ft_SNP),
             *gene_type = gt_symbol(gt_ft_gene);
  gt_error_check(err);
  sas = gt_snp_annotator_stream_cast(ns);

  /* if there are still SNPs left in the buffer, output them */
  if (gt_queue_size(sas->outqueue) > 0) {
    *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
    return had_err;
  } else complete_cluster = false;

  while (!had_err && !complete_cluster) {
    had_err = gt_node_stream_next(sas->merge_stream, &mygn, err);

    /* stop if stream is at the end */
    if (had_err || !mygn) break;

    /* process all feature nodes */
    if ((fn = gt_feature_node_try_cast(mygn))) {
      GtGenomeNode *addgn;
      const char *type = gt_feature_node_get_type(fn);
      GtRange new_rng = gt_genome_node_get_range(mygn);
      if (type == snv_type || type == snp_type) {
        /* -----> this is a SNP <----- */
        if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
          /* it falls into the currently observed range */
          gt_queue_add(sas->snps, gt_genome_node_ref((GtGenomeNode*) fn));
        } else {
          /* SNP outside a gene, this cluster is done
             add to out queue and start serving */
          gt_assert(gt_queue_size(sas->outqueue) == 0);
          had_err = snp_annotator_stream_process_current_gene(sas, err);
          gt_queue_add(sas->outqueue, mygn);
          if (gt_queue_size(sas->outqueue) > 0) {
            *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
            complete_cluster = true;
          }
        }
      } else if (type == gene_type) {
        /* -----> this is a gene <----- */
        if (gt_array_size(sas->cur_gene_set) == 0UL) {
          /* new overlapping gene cluster */
          addgn = gt_genome_node_ref(mygn);
          gt_array_add(sas->cur_gene_set, addgn);
          sas->cur_gene_range = gt_genome_node_get_range(mygn);
        } else {
          if (gt_range_overlap(&new_rng, &sas->cur_gene_range)) {
            /* gene overlaps with current one, add to cluster */
            addgn = gt_genome_node_ref(mygn);
            gt_array_add(sas->cur_gene_set, addgn);
            sas->cur_gene_range = gt_range_join(&sas->cur_gene_range, &new_rng);
          } else {
            /* finish current cluster and start a new one */
            had_err = snp_annotator_stream_process_current_gene(sas, err);
            if (!had_err) {
              addgn = gt_genome_node_ref(mygn);
              gt_array_add(sas->cur_gene_set, addgn);
              sas->cur_gene_range = gt_genome_node_get_range(mygn);
            }
            if (gt_queue_size(sas->outqueue) > 0) {
              *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
              complete_cluster = true;
            }
          }
        }
        /* from now on, genes are kept in gene cluster arrays only */
        gt_genome_node_delete(mygn);
      }
    } else {
      /* meta node */
      had_err = snp_annotator_stream_process_current_gene(sas, err);
      if (!had_err) {
        gt_queue_add(sas->outqueue, mygn);
      }
      if (gt_queue_size(sas->outqueue) > 0) {
        *gn = (GtGenomeNode*) gt_queue_get(sas->outqueue);
        complete_cluster = true;
      }
    }
  }

  return had_err;
}
Example #28
0
static int chseqids_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                                GtError *err)
{
  GtChseqidsStream *cs;
  GtGenomeNode *node, **gn_a, **gn_b;
  GtFeatureNode *feature_node;
  GtStr *changed_seqid;
  unsigned long i;
  int rval, had_err = 0;
  gt_error_check(err);
  cs = chseqids_stream_cast(gs);

  if (!cs->sequence_regions_processed) {
    while (!had_err) {
      if (!(had_err = gt_node_stream_next(cs->in_stream, &node, err))) {
        if (node)
          gt_array_add(cs->gt_genome_node_buffer, node);
        else
          break;
        if (!gt_region_node_try_cast(node))
          break; /* no more sequence regions */
      }
    }
    /* now the buffer contains only sequence regions (except the last entry)
       -> change sequence ids */
    for (i = 0; !had_err && i < gt_array_size(cs->gt_genome_node_buffer); i++) {
      node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, i);
      if (gt_genome_node_get_seqid(node)) {
        if  ((changed_seqid = gt_mapping_map_string(cs->chseqids_mapping,
                                     gt_str_get(gt_genome_node_get_seqid(node)),
                                                 err))) {
          if ((feature_node = gt_feature_node_try_cast(node))) {
            rval = gt_genome_node_traverse_children(node, changed_seqid,
                                                    change_sequence_id, true,
                                                    err);
            gt_assert(!rval); /* change_sequence_id() is sane */
          }
          else
            gt_genome_node_change_seqid(node, changed_seqid);
          gt_str_delete(changed_seqid);
        }
        else
          had_err = -1;
       }
    }
    /* sort them */
    if (!had_err)
      gt_genome_nodes_sort(cs->gt_genome_node_buffer);
    /* consolidate them */
    for (i = 1; !had_err && i + 1 < gt_array_size(cs->gt_genome_node_buffer);
         i++) {
      gn_a = gt_array_get(cs->gt_genome_node_buffer, i-1);
      gn_b = gt_array_get(cs->gt_genome_node_buffer, i);
      if (gt_genome_nodes_are_equal_region_nodes(*gn_a, *gn_b)) {
        gt_region_node_consolidate(gt_region_node_cast(*gn_b),
                                   gt_region_node_cast(*gn_a));
        gt_genome_node_delete(*gn_a);
        *gn_a = NULL;
      }
    }
    cs->sequence_regions_processed = true;
  }

  /* return non-null nodes from buffer */
  while (!had_err &&
         cs->buffer_index < gt_array_size(cs->gt_genome_node_buffer)) {
    node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer,
                                           cs->buffer_index);
    cs->buffer_index++;
    if (node) {
      *gn = node;
      return had_err;
    }
  }

  if (!had_err)
    had_err = gt_node_stream_next(cs->in_stream, gn, err);
  if (!had_err && *gn) {
    if (gt_genome_node_get_seqid(*gn)) {
      changed_seqid = gt_mapping_map_string(cs->chseqids_mapping,
                                      gt_str_get(gt_genome_node_get_seqid(*gn)),
                                         err);
      gt_assert(changed_seqid); /* is always defined, because an undefined
                                   mapping would be catched earlier */
      if ((feature_node = gt_feature_node_try_cast(*gn))) {
        rval = gt_genome_node_traverse_children(*gn, changed_seqid,
                                                change_sequence_id, true, err);
        gt_assert(!rval); /* change_sequence_id() is sane */
      }
      else
        gt_genome_node_change_seqid(*gn, changed_seqid);
      gt_str_delete(changed_seqid);
    }
  }

  return had_err;
}
int gt_ltrfileout_stream_next(GtNodeStream *ns, GtGenomeNode **gn, GtError *err)
{
  GtLTRdigestFileOutStream *ls;
  GtFeatureNode *fn;
  GtRange lltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          rltr_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          ppt_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD},
          pbs_rng = {GT_UNDEF_UWORD, GT_UNDEF_UWORD};
  int had_err;
  GtUword i=0;

  gt_error_check(err);
  ls = gt_ltrdigest_file_out_stream_cast(ns);

  /* initialize this element */
  memset(&ls->element, 0, sizeof (GtLTRElement));

  /* get annotations from parser */
  had_err = gt_node_stream_next(ls->in_stream, gn, err);
  if (!had_err && *gn)
  {
    GtFeatureNodeIterator* gni;
    GtFeatureNode *mygn;

    /* only process feature nodes */
    if (!(fn = gt_feature_node_try_cast(*gn)))
      return 0;

    ls->element.pdomorder = gt_array_new(sizeof (const char*));

    /* fill LTRElement structure from GFF3 subgraph */
    gni = gt_feature_node_iterator_new(fn);
    for (mygn = fn; mygn != NULL; mygn = gt_feature_node_iterator_next(gni))
      (void) gt_genome_node_accept((GtGenomeNode*) mygn,
                                   (GtNodeVisitor*) ls->lv,
                                   err);
    gt_feature_node_iterator_delete(gni);
  }

  if (!had_err && ls->element.mainnode != NULL)
  {
    char desc[GT_MAXFASTAHEADER];
    GtFeatureNode *ltr3, *ltr5;
    GtStr *sdesc, *sreg, *seq;

    /* find sequence in GtEncseq */
    sreg = gt_genome_node_get_seqid((GtGenomeNode*) ls->element.mainnode);

    sdesc = gt_str_new();
    had_err = gt_region_mapping_get_description(ls->rmap, sdesc, sreg, err);

    if (!had_err) {
      GtRange rng;
      ls->element.seqid = gt_calloc((size_t) ls->seqnamelen+1, sizeof (char));
      (void) snprintf(ls->element.seqid,
                      MIN((size_t) gt_str_length(sdesc),
                          (size_t) ls->seqnamelen)+1,
                      "%s", gt_str_get(sdesc));
      gt_cstr_rep(ls->element.seqid, ' ', '_');
      if (gt_str_length(sdesc) > (GtUword) ls->seqnamelen)
        ls->element.seqid[ls->seqnamelen] = '\0';

      (void) gt_ltrelement_format_description(&ls->element,
                                              ls->seqnamelen,
                                              desc,
                                              (size_t) (GT_MAXFASTAHEADER-1));
      gt_str_delete(sdesc);

      /* output basic retrotransposon data */
      lltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftLTR);
      rltr_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightLTR);
      rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.mainnode);
      gt_file_xprintf(ls->tabout_file,
                      GT_WU"\t"GT_WU"\t"GT_WU"\t%s\t"GT_WU"\t"GT_WU"\t"GT_WU"\t"
                      GT_WU"\t"GT_WU"\t"GT_WU"\t",
                      rng.start, rng.end, gt_ltrelement_length(&ls->element),
                      ls->element.seqid, lltr_rng.start, lltr_rng.end,
                      gt_ltrelement_leftltrlen(&ls->element), rltr_rng.start,
                      rltr_rng.end, gt_ltrelement_rightltrlen(&ls->element));
    }
    seq = gt_str_new();

    /* output TSDs */
    if (!had_err && ls->element.leftTSD != NULL)
    {
      GtRange tsd_rng;
      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.leftTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.leftTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%s\t",
                         tsd_rng.start,
                         tsd_rng.end,
                         gt_str_get(seq));
      }
    gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    if (!had_err && ls->element.rightTSD != NULL)
    {
      GtRange tsd_rng;

      tsd_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.rightTSD);
      had_err = gt_extract_feature_sequence(seq,
                                       (GtGenomeNode*) ls->element.rightTSD,
                                       gt_symbol(gt_ft_target_site_duplication),
                                       false,
                                       NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t",
                           tsd_rng.start,
                           tsd_rng.end,
                           gt_str_get(seq));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t");

    /* output PPT */
    if (!had_err && ls->element.ppt != NULL)
    {
      GtStrand ppt_strand = gt_feature_node_get_strand(ls->element.ppt);

      ppt_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.ppt);
      had_err = gt_extract_feature_sequence(seq,
                                            (GtGenomeNode*) ls->element.ppt,
                                            gt_symbol(gt_ft_RR_tract), false,
                                            NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&ppt_rng),
                            GT_FSWIDTH, ls->pptout_file);
        gt_file_xprintf(ls->tabout_file,
                           ""GT_WU"\t"GT_WU"\t%s\t%c\t%d\t",
                           ppt_rng.start,
                           ppt_rng.end,
                           gt_str_get(seq),
                           GT_STRAND_CHARS[ppt_strand],
                           (ppt_strand == GT_STRAND_FORWARD ?
                               abs((int) (rltr_rng.start - ppt_rng.end)) :
                               abs((int) (lltr_rng.end - ppt_rng.start))));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t");

    /* output PBS */
    if (!had_err && ls->element.pbs != NULL)
    {
      GtStrand pbs_strand;

      pbs_strand = gt_feature_node_get_strand(ls->element.pbs);
      pbs_rng = gt_genome_node_get_range((GtGenomeNode*) ls->element.pbs);
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.pbs,
                                           gt_symbol(gt_ft_primer_binding_site),
                                           false, NULL, NULL, ls->rmap, err);
      if (!had_err) {
        gt_fasta_show_entry(desc, gt_str_get(seq), gt_range_length(&pbs_rng),
                            GT_FSWIDTH, ls->pbsout_file);
        gt_file_xprintf(ls->tabout_file,
                         ""GT_WU"\t"GT_WU"\t%c\t%s\t%s\t%s\t%s\t%s\t",
                         pbs_rng.start,
                         pbs_rng.end,
                         GT_STRAND_CHARS[pbs_strand],
                         gt_feature_node_get_attribute(ls->element.pbs, "trna"),
                         gt_str_get(seq),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "pbsoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "trnaoffset"),
                         gt_feature_node_get_attribute(ls->element.pbs,
                                                       "edist"));
      }
      gt_str_reset(seq);
    } else gt_file_xprintf(ls->tabout_file, "\t\t\t\t\t\t\t\t");

    /* output protein domains */
    if (!had_err && ls->element.pdoms != NULL)
    {
      GtStr *pdomorderstr = gt_str_new();
      for (i=0; !had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* key = *(const char**) gt_array_get(ls->element.pdomorder,
                                                       i);
        GtArray *entry = (GtArray*) gt_hashmap_get(ls->element.pdoms, key);
        had_err = write_pdom(ls, entry, key, ls->rmap, desc, err);
      }

      if (GT_STRAND_REVERSE == gt_feature_node_get_strand(ls->element.mainnode))
        gt_array_reverse(ls->element.pdomorder);

      for (i=0 ;!had_err && i<gt_array_size(ls->element.pdomorder); i++)
      {
        const char* name = *(const char**) gt_array_get(ls->element.pdomorder,
                                                        i);
        gt_str_append_cstr(pdomorderstr, name);
        if (i != gt_array_size(ls->element.pdomorder)-1)
          gt_str_append_cstr(pdomorderstr, "/");
      }
      gt_file_xprintf(ls->tabout_file, "%s", gt_str_get(pdomorderstr));
      gt_str_delete(pdomorderstr);
    }

    /* output LTRs (we just expect them to exist) */
    switch (gt_feature_node_get_strand(ls->element.mainnode))
    {
      case GT_STRAND_REVERSE:
        ltr5 = ls->element.rightLTR;
        ltr3 = ls->element.leftLTR;
        break;
      case GT_STRAND_FORWARD:
      default:
        ltr5 = ls->element.leftLTR;
        ltr3 = ls->element.rightLTR;
        break;
    }

    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr5,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr5out_file);
      gt_str_reset(seq);
    }
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq, (GtGenomeNode*) ltr3,
                                          gt_symbol(gt_ft_long_terminal_repeat),
                                          false,
                                          NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc, gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->ltr3out_file);
      gt_str_reset(seq);
    }

    /* output complete oriented element */
    if (!had_err) {
      had_err = gt_extract_feature_sequence(seq,
                                           (GtGenomeNode*) ls->element.mainnode,
                                           gt_symbol(gt_ft_LTR_retrotransposon),
                                           false,
                                           NULL, NULL, ls->rmap, err);
    }
    if (!had_err) {
      gt_fasta_show_entry(desc,gt_str_get(seq), gt_str_length(seq),
                          GT_FSWIDTH, ls->elemout_file);
      gt_str_reset(seq);
    }
    gt_file_xprintf(ls->tabout_file, "\n");
    gt_str_delete(seq);
  }
  gt_hashmap_delete(ls->element.pdoms);
  gt_array_delete(ls->element.pdomorder);
  gt_free(ls->element.seqid);
  return had_err;
}