static int md5_to_seqid(GtGenomeNode *gn, GtRegionMapping *region_mapping, GtError *err) { GtStr *seqid; int had_err = 0; gt_error_check(err); gt_assert(gn && region_mapping); seqid = gt_genome_node_get_seqid(gn); if (gt_md5_seqid_has_prefix(gt_str_get(seqid))) { /* seqid is a MD5 seqid -> change id */ GtStr *desc = gt_str_new(); had_err = gt_region_mapping_get_description(region_mapping, desc, seqid, err); if (!had_err) { GtStr *new_seqid = gt_str_new(); gt_regular_seqid_save(new_seqid, desc); if (gt_feature_node_try_cast(gn)) { M2IChangeSeqidInfo info; info.new_seqid = new_seqid; info.region_mapping = region_mapping; had_err = gt_feature_node_traverse_children((GtFeatureNode*) gn, &info, m2i_change_seqid, true, err); } else gt_genome_node_change_seqid(gn, new_seqid); gt_str_delete(new_seqid); } gt_str_delete(desc); } return had_err; }
static int change_sequence_id(GtGenomeNode *gn, void *data, GT_UNUSED GtError *err) { GtStr *changed_seqid = data; gt_error_check(err); gt_assert(changed_seqid); gt_genome_node_change_seqid(gn, changed_seqid); return 0; }
static int m2i_change_seqid(GtFeatureNode *fn, void *data, GtError *err) { const char *target; M2IChangeSeqidInfo *info = (M2IChangeSeqidInfo*) data; gt_error_check(err); gt_assert(fn && info); gt_genome_node_change_seqid((GtGenomeNode*) fn, info->new_seqid); if ((target = gt_feature_node_get_attribute(fn, GT_GFF_TARGET))) return m2i_change_target_seqids(fn, target, info->region_mapping, err); return 0; }
static int chseqids_stream_next(GtNodeStream *gs, GtGenomeNode **gn, GtError *err) { GtChseqidsStream *cs; GtGenomeNode *node, **gn_a, **gn_b; GtFeatureNode *feature_node; GtStr *changed_seqid; unsigned long i; int rval, had_err = 0; gt_error_check(err); cs = chseqids_stream_cast(gs); if (!cs->sequence_regions_processed) { while (!had_err) { if (!(had_err = gt_node_stream_next(cs->in_stream, &node, err))) { if (node) gt_array_add(cs->gt_genome_node_buffer, node); else break; if (!gt_region_node_try_cast(node)) break; /* no more sequence regions */ } } /* now the buffer contains only sequence regions (except the last entry) -> change sequence ids */ for (i = 0; !had_err && i < gt_array_size(cs->gt_genome_node_buffer); i++) { node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, i); if (gt_genome_node_get_seqid(node)) { if ((changed_seqid = gt_mapping_map_string(cs->chseqids_mapping, gt_str_get(gt_genome_node_get_seqid(node)), err))) { if ((feature_node = gt_feature_node_try_cast(node))) { rval = gt_genome_node_traverse_children(node, changed_seqid, change_sequence_id, true, err); gt_assert(!rval); /* change_sequence_id() is sane */ } else gt_genome_node_change_seqid(node, changed_seqid); gt_str_delete(changed_seqid); } else had_err = -1; } } /* sort them */ if (!had_err) gt_genome_nodes_sort(cs->gt_genome_node_buffer); /* consolidate them */ for (i = 1; !had_err && i + 1 < gt_array_size(cs->gt_genome_node_buffer); i++) { gn_a = gt_array_get(cs->gt_genome_node_buffer, i-1); gn_b = gt_array_get(cs->gt_genome_node_buffer, i); if (gt_genome_nodes_are_equal_region_nodes(*gn_a, *gn_b)) { gt_region_node_consolidate(gt_region_node_cast(*gn_b), gt_region_node_cast(*gn_a)); gt_genome_node_delete(*gn_a); *gn_a = NULL; } } cs->sequence_regions_processed = true; } /* return non-null nodes from buffer */ while (!had_err && cs->buffer_index < gt_array_size(cs->gt_genome_node_buffer)) { node = *(GtGenomeNode**) gt_array_get(cs->gt_genome_node_buffer, cs->buffer_index); cs->buffer_index++; if (node) { *gn = node; return had_err; } } if (!had_err) had_err = gt_node_stream_next(cs->in_stream, gn, err); if (!had_err && *gn) { if (gt_genome_node_get_seqid(*gn)) { changed_seqid = gt_mapping_map_string(cs->chseqids_mapping, gt_str_get(gt_genome_node_get_seqid(*gn)), err); gt_assert(changed_seqid); /* is always defined, because an undefined mapping would be catched earlier */ if ((feature_node = gt_feature_node_try_cast(*gn))) { rval = gt_genome_node_traverse_children(*gn, changed_seqid, change_sequence_id, true, err); gt_assert(!rval); /* change_sequence_id() is sane */ } else gt_genome_node_change_seqid(*gn, changed_seqid); gt_str_delete(changed_seqid); } } return had_err; }