static int md5_to_seqid(GtGenomeNode *gn, GtRegionMapping *region_mapping,
                        GtError *err)
{
  GtStr *seqid;
  int had_err = 0;
  gt_error_check(err);
  gt_assert(gn && region_mapping);
  seqid = gt_genome_node_get_seqid(gn);
  if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) {
    /* seqid is a MD5 seqid -> change id */
    GtStr *desc = gt_str_new();
    had_err = gt_region_mapping_get_description(region_mapping, desc, seqid,
                                                err);
    if (!had_err) {
      GtStr *new_seqid = gt_str_new();
      gt_regular_seqid_save(new_seqid, desc);
      if (gt_feature_node_try_cast(gn)) {
        M2IChangeSeqidInfo info;
        info.new_seqid = new_seqid;
        info.region_mapping = region_mapping;
        had_err = gt_feature_node_traverse_children((GtFeatureNode*) gn, &info,
                                                    m2i_change_seqid, true,
                                                    err);
      }
      else
        gt_genome_node_change_seqid(gn, new_seqid);
      gt_str_delete(new_seqid);
    }
    gt_str_delete(desc);
  }
  return had_err;
}
Ejemplo n.º 2
0
static int change_sequence_id(GtGenomeNode *gn, void *data,
                              GT_UNUSED GtError *err)
{
  GtStr *changed_seqid = data;
  gt_error_check(err);
  gt_assert(changed_seqid);
  gt_genome_node_change_seqid(gn, changed_seqid);
  return 0;
}
static int m2i_change_seqid(GtFeatureNode *fn, void *data, GtError *err)
{
  const char *target;
  M2IChangeSeqidInfo *info = (M2IChangeSeqidInfo*) data;
  gt_error_check(err);
  gt_assert(fn && info);
  gt_genome_node_change_seqid((GtGenomeNode*) fn, info->new_seqid);
  if ((target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET)))
    return m2i_change_target_seqids(fn, target, info->region_mapping, err);
  return 0;
}
Ejemplo n.º 4
0
static int chseqids_stream_next(GtNodeStream *gs, GtGenomeNode **gn,
                                GtError *err)
{
  GtChseqidsStream *cs;
  GtGenomeNode *node, **gn_a, **gn_b;
  GtFeatureNode *feature_node;
  GtStr *changed_seqid;
  unsigned long i;
  int rval, had_err = 0;
  gt_error_check(err);
  cs = chseqids_stream_cast(gs);

  if (!cs->sequence_regions_processed) {
    while (!had_err) {
      if (!(had_err = gt_node_stream_next(cs->in_stream, &node, err))) {
        if (node)
          gt_array_add(cs->gt_genome_node_buffer, node);
        else
          break;
        if (!gt_region_node_try_cast(node))
          break; /* no more sequence regions */
      }
    }
    /* now the buffer contains only sequence regions (except the last entry)
       -> change sequence ids */
    for (i = 0; !had_err && i < gt_array_size(cs->gt_genome_node_buffer); i++) {
      node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, i);
      if (gt_genome_node_get_seqid(node)) {
        if  ((changed_seqid = gt_mapping_map_string(cs->chseqids_mapping,
                                     gt_str_get(gt_genome_node_get_seqid(node)),
                                                 err))) {
          if ((feature_node = gt_feature_node_try_cast(node))) {
            rval = gt_genome_node_traverse_children(node, changed_seqid,
                                                    change_sequence_id, true,
                                                    err);
            gt_assert(!rval); /* change_sequence_id() is sane */
          }
          else
            gt_genome_node_change_seqid(node, changed_seqid);
          gt_str_delete(changed_seqid);
        }
        else
          had_err = -1;
       }
    }
    /* sort them */
    if (!had_err)
      gt_genome_nodes_sort(cs->gt_genome_node_buffer);
    /* consolidate them */
    for (i = 1; !had_err && i + 1 < gt_array_size(cs->gt_genome_node_buffer);
         i++) {
      gn_a = gt_array_get(cs->gt_genome_node_buffer, i-1);
      gn_b = gt_array_get(cs->gt_genome_node_buffer, i);
      if (gt_genome_nodes_are_equal_region_nodes(*gn_a, *gn_b)) {
        gt_region_node_consolidate(gt_region_node_cast(*gn_b),
                                   gt_region_node_cast(*gn_a));
        gt_genome_node_delete(*gn_a);
        *gn_a = NULL;
      }
    }
    cs->sequence_regions_processed = true;
  }

  /* return non-null nodes from buffer */
  while (!had_err &&
         cs->buffer_index < gt_array_size(cs->gt_genome_node_buffer)) {
    node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer,
                                           cs->buffer_index);
    cs->buffer_index++;
    if (node) {
      *gn = node;
      return had_err;
    }
  }

  if (!had_err)
    had_err = gt_node_stream_next(cs->in_stream, gn, err);
  if (!had_err && *gn) {
    if (gt_genome_node_get_seqid(*gn)) {
      changed_seqid = gt_mapping_map_string(cs->chseqids_mapping,
                                      gt_str_get(gt_genome_node_get_seqid(*gn)),
                                         err);
      gt_assert(changed_seqid); /* is always defined, because an undefined
                                   mapping would be catched earlier */
      if ((feature_node = gt_feature_node_try_cast(*gn))) {
        rval = gt_genome_node_traverse_children(*gn, changed_seqid,
                                                change_sequence_id, true, err);
        gt_assert(!rval); /* change_sequence_id() is sane */
      }
      else
        gt_genome_node_change_seqid(*gn, changed_seqid);
      gt_str_delete(changed_seqid);
    }
  }

  return had_err;
}