Beispiel #1
0
static int gt_sort_stream_next(GtNodeStream *ns, GtGenomeNode **gn,
                               GtError *err)
{
  GtSortStream *sort_stream;
  GtGenomeNode *node, *eofn;
  int had_err = 0;
  gt_error_check(err);
  sort_stream = gt_sort_stream_cast(ns);

  if (!sort_stream->sorted) {
    while (!(had_err = gt_node_stream_next(sort_stream->in_stream, &node,
                                           err)) && node) {
      if ((eofn = gt_eof_node_try_cast(node)))
        gt_genome_node_delete(eofn); /* get rid of EOF nodes */
      else
        gt_array_add(sort_stream->nodes, node);
    }
    if (!had_err) {
      gt_genome_nodes_sort_stable(sort_stream->nodes);
      sort_stream->sorted = true;
    }
  }

  if (!had_err) {
    gt_assert(sort_stream->sorted);
    if (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
      *gn = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                           sort_stream->idx);
      sort_stream->idx++;
      /* join region nodes with the same sequence ID */
      if (gt_region_node_try_cast(*gn)) {
        GtRange range_a, range_b;
        while (sort_stream->idx < gt_array_size(sort_stream->nodes)) {
          node = *(GtGenomeNode**) gt_array_get(sort_stream->nodes,
                                                sort_stream->idx);
          if (!gt_region_node_try_cast(node) ||
              gt_str_cmp(gt_genome_node_get_seqid(*gn),
                         gt_genome_node_get_seqid(node))) {
            /* the next node is not a region node with the same ID */
            break;
          }
          range_a = gt_genome_node_get_range(*gn);
          range_b = gt_genome_node_get_range(node);
          range_a = gt_range_join(&range_a, &range_b);
          gt_genome_node_set_range(*gn, &range_a);
          gt_genome_node_delete(node);
          sort_stream->idx++;
        }
      }
      return 0;
    }
  }

  if (!had_err) {
    gt_array_reset(sort_stream->nodes);
    *gn = NULL;
  }

  return had_err;
}
Beispiel #2
0
static int feature_index_lua_add_region_node(lua_State *L)
{
  GtFeatureIndex **fi;
  GtGenomeNode **gn;
  GtRegionNode *rn;
  gt_assert(L);
  fi = check_feature_index(L, 1);
  gn = check_genome_node(L, 2);
  rn = gt_region_node_try_cast(*gn);
  luaL_argcheck(L, rn, 2, "not a region node");
  gt_feature_index_add_region_node(*fi, rn);
  return 0;
}
static int feature_index_lua_add_region_node(lua_State *L)
{
  GtFeatureIndex **fi;
  GtGenomeNode **gn;
  GtRegionNode *rn;
  GtError *err;
  gt_assert(L);
  fi = check_feature_index(L, 1);
  gn = check_genome_node(L, 2);
  rn = gt_region_node_try_cast(*gn);
  luaL_argcheck(L, rn, 2, "not a region node");
  err = gt_error_new();
  if (gt_feature_index_add_region_node(*fi, rn, err))
    return gt_lua_error(L, err);
  gt_error_delete(err);
  return 0;
}
static int chseqids_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                                GtError *err)
{
  GtChseqidsStream *cs;
  GtGenomeNode *node, **gn_a, **gn_b;
  GtFeatureNode *feature_node;
  GtStr *changed_seqid;
  unsigned long i;
  int rval, had_err = 0;
  gt_error_check(err);
  cs = chseqids_stream_cast(gs);

  if (!cs->sequence_regions_processed) {
    while (!had_err) {
      if (!(had_err = gt_node_stream_next(cs->in_stream, &node, err))) {
        if (node)
          gt_array_add(cs->gt_genome_node_buffer, node);
        else
          break;
        if (!gt_region_node_try_cast(node))
          break; /* no more sequence regions */
      }
    }
    /* now the buffer contains only sequence regions (except the last entry)
       -> change sequence ids */
    for (i = 0; !had_err && i < gt_array_size(cs->gt_genome_node_buffer); i++) {
      node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, i);
      if (gt_genome_node_get_seqid(node)) {
        if  ((changed_seqid = gt_mapping_map_string(cs->chseqids_mapping,
                                     gt_str_get(gt_genome_node_get_seqid(node)),
                                                 err))) {
          if ((feature_node = gt_feature_node_try_cast(node))) {
            rval = gt_genome_node_traverse_children(node, changed_seqid,
                                                    change_sequence_id, true,
                                                    err);
            gt_assert(!rval); /* change_sequence_id() is sane */
          }
          else
            gt_genome_node_change_seqid(node, changed_seqid);
          gt_str_delete(changed_seqid);
        }
        else
          had_err = -1;
       }
    }
    /* sort them */
    if (!had_err)
      gt_genome_nodes_sort(cs->gt_genome_node_buffer);
    /* consolidate them */
    for (i = 1; !had_err && i + 1 < gt_array_size(cs->gt_genome_node_buffer);
         i++) {
      gn_a = gt_array_get(cs->gt_genome_node_buffer, i-1);
      gn_b = gt_array_get(cs->gt_genome_node_buffer, i);
      if (gt_genome_nodes_are_equal_region_nodes(*gn_a, *gn_b)) {
        gt_region_node_consolidate(gt_region_node_cast(*gn_b),
                                   gt_region_node_cast(*gn_a));
        gt_genome_node_delete(*gn_a);
        *gn_a = NULL;
      }
    }
    cs->sequence_regions_processed = true;
  }

  /* return non-null nodes from buffer */
  while (!had_err &&
         cs->buffer_index < gt_array_size(cs->gt_genome_node_buffer)) {
    node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer,
                                           cs->buffer_index);
    cs->buffer_index++;
    if (node) {
      *gn = node;
      return had_err;
    }
  }

  if (!had_err)
    had_err = gt_node_stream_next(cs->in_stream, gn, err);
  if (!had_err && *gn) {
    if (gt_genome_node_get_seqid(*gn)) {
      changed_seqid = gt_mapping_map_string(cs->chseqids_mapping,
                                      gt_str_get(gt_genome_node_get_seqid(*gn)),
                                         err);
      gt_assert(changed_seqid); /* is always defined, because an undefined
                                   mapping would be catched earlier */
      if ((feature_node = gt_feature_node_try_cast(*gn))) {
        rval = gt_genome_node_traverse_children(*gn, changed_seqid,
                                                change_sequence_id, true, err);
        gt_assert(!rval); /* change_sequence_id() is sane */
      }
      else
        gt_genome_node_change_seqid(*gn, changed_seqid);
      gt_str_delete(changed_seqid);
    }
  }

  return had_err;
}