const char* gt_symbol(const char *cstr) { const char *symbol; if (!cstr) return NULL; gt_mutex_lock(symbol_mutex); if (!(symbol = gt_cstr_table_get(symbols, cstr))) { gt_cstr_table_add(symbols, cstr); symbol = gt_cstr_table_get(symbols, cstr); } gt_mutex_unlock(symbol_mutex); return symbol; }
bool gt_orphanage_parent_is_missing(GtOrphanage *o, const char *parent_id) { gt_assert(o && parent_id); if (gt_cstr_table_get(o->missing_parents, parent_id)) return true; return false; }
bool gt_orphanage_is_orphan(GtOrphanage *o, const char *id) { gt_assert(o && id); if (gt_cstr_table_get(o->orphan_ids, id)) return true; return false; }
static bool id_string_is_unique(GtStr *id, GtStr *buf, GtCstrTable *tab, GtUword i) { gt_str_reset(buf); gt_str_append_str(buf, id); make_unique_id_string(buf, i); return !gt_cstr_table_get(tab, gt_str_get(buf)); }
static void compute_source_statistics(GtFeatureNode *fn, GtCstrTable *used_sources) { const char *source; gt_assert(fn && used_sources); source = gt_feature_node_get_source(fn); if (!gt_cstr_table_get(used_sources, source)) gt_cstr_table_add(used_sources, source); }
void gt_orphanage_add(GtOrphanage *o, GtGenomeNode *orphan, const char *orphan_id, GtStrArray *missing_parents) { const char *missing_parent; GtUword i; gt_assert(o && orphan); gt_assert(gt_feature_node_get_attribute((GtFeatureNode*) orphan, GT_GFF_PARENT)); gt_queue_add(o->orphans, orphan); if (orphan_id && !gt_cstr_table_get(o->orphan_ids, orphan_id)) gt_cstr_table_add(o->orphan_ids, orphan_id); if (missing_parents) { for (i = 0; i < gt_str_array_size(missing_parents); i++) { missing_parent = gt_str_array_get(missing_parents, i); if (!gt_cstr_table_get(o->missing_parents, missing_parent)) gt_cstr_table_add(o->missing_parents, missing_parent); } } }
static GtStr* make_id_unique(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn) { GtUword i = 1; GtStr *id = gt_str_new_cstr(gt_feature_node_get_attribute(fn, "ID")); if (gt_cstr_table_get(gff3_visitor->used_ids, gt_str_get(id))) { GtStr *buf = gt_str_new(); while (!id_string_is_unique(id, buf, gff3_visitor->used_ids, i++)); gt_warning("feature ID \"%s\" not unique: changing to %s", gt_str_get(id), gt_str_get(buf)); gt_str_set(id, gt_str_get(buf)); gt_str_delete(buf); } /* update table with the new id */ gt_cstr_table_add(gff3_visitor->used_ids, gt_str_get(id)); /* store (unique) id */ gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id); return id; }
static int add_ids_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn, GT_UNUSED GtError *err) { GtAddIDsVisitor *aiv; const char *seqid; int had_err = 0; gt_error_check(err); aiv = add_ids_visitor_cast(nv); seqid = gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn)); if (gt_hashmap_get(aiv->undefined_sequence_regions, seqid)) { gt_error_set(err, "genome feature with id \"%s\" has been defined before " "the corresponding \"%s\" definition on line %u in file " "\"%s\"", seqid, GT_GFF_SEQUENCE_REGION, gt_genome_node_get_line_number((GtGenomeNode*) rn), gt_genome_node_get_filename((GtGenomeNode*) rn)); had_err = -1; } if (!had_err) { if (!gt_cstr_table_get(aiv->defined_seqids, seqid)) gt_cstr_table_add(aiv->defined_seqids, seqid); gt_queue_add(aiv->node_buffer, rn); } return had_err; }
static void make_sequence_region(GtHashmap *sequence_regions, GtStr *sequenceid, GthRegionFactory *srf, GthInput *input, GtUword filenum, GtUword seqnum) { GtUword offset_is_defined = false; GtRange range, descrange; GtGenomeNode *sr = NULL; gt_assert(sequence_regions && sequenceid && srf && input); if (gth_input_use_substring_spec(input)) { range.start = gth_input_genomic_substring_from(input); range.end = gth_input_genomic_substring_to(input); } else { range = gth_input_get_relative_genomic_range(input, filenum, seqnum); } if (srf->use_desc_ranges) { GtStr *description = gt_str_new(); gth_input_get_genomic_description(input, description, filenum, seqnum); if (!gt_parse_description_range(gt_str_get(description), &descrange)) offset_is_defined = true; gt_str_delete(description); } if (offset_is_defined) range = gt_range_offset(&range, descrange.start); else range = gt_range_offset(&range, 1); /* 1-based */ if (!gt_str_length(sequenceid) || (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) && !offset_is_defined)) { /* sequenceid is empty or exists already (and no offset has been parsed) -> make one up */ GtStr *seqid; char *base; base = gt_basename(gth_input_get_genomic_filename(input, filenum)); seqid = gt_str_new_cstr(base); gt_free(base); gt_str_append_char(seqid, '|'); gt_str_append_uword(seqid, seqnum + 1); /* 1-based */ seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_UWORD); gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid))); gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid)); sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum), range.start, range.end); gt_hashmap_add(sequence_regions, (void*) gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)), sr); gt_str_delete(seqid); } else { /* sequenceid does not exists already (or an offset has been parsed) -> use this one */ if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) { /* no sequence region with this id exists -> create one */ gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid)); seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid, offset_is_defined ? descrange.start : GT_UNDEF_UWORD); sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum), range.start, range.end); gt_hashmap_add(sequence_regions, (void*) gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)), sr); } else { GtRange prev_range, new_range; /* sequence region with this id exists already -> modify range */ sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid)); gt_assert(sr); prev_range = gt_genome_node_get_range(sr); new_range = gt_range_join(&prev_range, &range); gt_genome_node_set_range(sr, &new_range); seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid, offset_is_defined ? descrange.start : GT_UNDEF_UWORD); } } gt_assert(sr); }
static int add_ids_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn, GtError *err) { AutomaticSequenceRegion *auto_sr; GtAddIDsVisitor *aiv; const char *seqid; bool is_circular; aiv = add_ids_visitor_cast(nv); seqid = gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) fn)); if (aiv->ensure_sorting && !gt_cstr_table_get(aiv->defined_seqids, seqid)) { gt_error_set(err, "the file %s is not sorted (seqid \"%s\" on line %u has " "not been previously introduced with a \"%s\" line)", gt_genome_node_get_filename((GtGenomeNode*) fn), seqid, gt_genome_node_get_line_number((GtGenomeNode*) fn), GT_GFF_SEQUENCE_REGION); return -1; } if (!gt_cstr_table_get(aiv->defined_seqids, seqid)) { GtFeatureNodeIterator *fni; GtFeatureNode *node; GtRange range = gt_genome_node_get_range((GtGenomeNode*) fn); is_circular = gt_feature_node_get_attribute(fn, GT_GFF_IS_CIRCULAR) ? true : false; if (!is_circular) { fni = gt_feature_node_iterator_new(fn); while ((node = gt_feature_node_iterator_next(fni))) { GtRange node_range = gt_genome_node_get_range((GtGenomeNode*) node); range = gt_range_join(&range, &node_range); } gt_feature_node_iterator_delete(fni); } /* sequence region has not been previously introduced -> check if one has already been created automatically */ auto_sr = gt_hashmap_get(aiv->undefined_sequence_regions, seqid); if (!auto_sr) { GtStr *seqid_str; /* sequence region has not been createad automatically -> do it now */ gt_warning("seqid \"%s\" on line %u in file \"%s\" has not been " "previously introduced with a \"%s\" line, create such a line " "automatically", seqid, gt_genome_node_get_line_number((GtGenomeNode*) fn), gt_genome_node_get_filename((GtGenomeNode*) fn), GT_GFF_SEQUENCE_REGION); auto_sr = automatic_sequence_region_new(is_circular); seqid_str = gt_genome_node_get_seqid((GtGenomeNode*) fn); auto_sr->sequence_region = gt_region_node_new(seqid_str, range.start, range.end); gt_hashmap_add(aiv->undefined_sequence_regions, gt_str_get(seqid_str), auto_sr); } else { if (auto_sr->is_circular) { gt_assert(!is_circular); /* XXX */ } else if (is_circular) { gt_assert(!auto_sr->is_circular); /* XXX */ auto_sr->is_circular = true; gt_genome_node_set_range(auto_sr->sequence_region, &range); } else { GtRange joined_range, sr_range = gt_genome_node_get_range(auto_sr->sequence_region); /* update the range of the sequence region */ joined_range = gt_range_join(&range, &sr_range); gt_genome_node_set_range(auto_sr->sequence_region, &joined_range); } } gt_array_add(auto_sr->feature_nodes, fn); } else gt_queue_add(aiv->node_buffer, fn); return 0; }