Ejemplo n.º 1
0
const char* gt_symbol(const char *cstr)
{
  const char *symbol;
  if (!cstr)
    return NULL;
  gt_mutex_lock(symbol_mutex);
  if (!(symbol = gt_cstr_table_get(symbols, cstr))) {
    gt_cstr_table_add(symbols, cstr);
    symbol = gt_cstr_table_get(symbols, cstr);
  }
  gt_mutex_unlock(symbol_mutex);
  return symbol;
}
Ejemplo n.º 2
0
bool gt_orphanage_parent_is_missing(GtOrphanage *o, const char *parent_id)
{
  gt_assert(o && parent_id);
  if (gt_cstr_table_get(o->missing_parents, parent_id))
    return true;
  return false;
}
Ejemplo n.º 3
0
bool gt_orphanage_is_orphan(GtOrphanage *o, const char *id)
{
 gt_assert(o && id);
  if (gt_cstr_table_get(o->orphan_ids, id))
    return true;
  return false;
}
Ejemplo n.º 4
0
static bool id_string_is_unique(GtStr *id, GtStr *buf, GtCstrTable *tab,
                                GtUword i)
{
  gt_str_reset(buf);
  gt_str_append_str(buf, id);
  make_unique_id_string(buf, i);
  return !gt_cstr_table_get(tab, gt_str_get(buf));
}
Ejemplo n.º 5
0
static void compute_source_statistics(GtFeatureNode *fn,
                                      GtCstrTable *used_sources)
{
    const char *source;
    gt_assert(fn && used_sources);
    source = gt_feature_node_get_source(fn);
    if (!gt_cstr_table_get(used_sources, source))
        gt_cstr_table_add(used_sources, source);
}
Ejemplo n.º 6
0
void gt_orphanage_add(GtOrphanage *o, GtGenomeNode *orphan,
                      const char *orphan_id, GtStrArray *missing_parents)
{
  const char *missing_parent;
  GtUword i;
  gt_assert(o && orphan);
  gt_assert(gt_feature_node_get_attribute((GtFeatureNode*) orphan,
                                          GT_GFF_PARENT));
  gt_queue_add(o->orphans, orphan);
  if (orphan_id && !gt_cstr_table_get(o->orphan_ids, orphan_id))
    gt_cstr_table_add(o->orphan_ids, orphan_id);
  if (missing_parents) {
    for (i = 0; i < gt_str_array_size(missing_parents); i++) {
      missing_parent = gt_str_array_get(missing_parents, i);
      if (!gt_cstr_table_get(o->missing_parents, missing_parent))
        gt_cstr_table_add(o->missing_parents, missing_parent);
    }
  }
}
Ejemplo n.º 7
0
static GtStr* make_id_unique(GtGFF3Visitor *gff3_visitor, GtFeatureNode *fn)
{
  GtUword i = 1;
  GtStr *id = gt_str_new_cstr(gt_feature_node_get_attribute(fn, "ID"));

  if (gt_cstr_table_get(gff3_visitor->used_ids, gt_str_get(id))) {
    GtStr *buf = gt_str_new();
    while (!id_string_is_unique(id, buf, gff3_visitor->used_ids, i++));
    gt_warning("feature ID \"%s\" not unique: changing to %s", gt_str_get(id),
                                                               gt_str_get(buf));
    gt_str_set(id, gt_str_get(buf));
    gt_str_delete(buf);
  }
  /* update table with the new id */
  gt_cstr_table_add(gff3_visitor->used_ids, gt_str_get(id));
  /* store (unique) id */
  gt_hashmap_add(gff3_visitor->feature_node_to_unique_id_str, fn, id);

  return id;
}
Ejemplo n.º 8
0
static int add_ids_visitor_region_node(GtNodeVisitor *nv, GtRegionNode *rn,
                                       GT_UNUSED GtError *err)
{
  GtAddIDsVisitor *aiv;
  const char *seqid;
  int had_err = 0;
  gt_error_check(err);
  aiv = add_ids_visitor_cast(nv);
  seqid = gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) rn));
  if (gt_hashmap_get(aiv->undefined_sequence_regions, seqid)) {
    gt_error_set(err, "genome feature with id \"%s\" has been defined before "
                 "the corresponding \"%s\" definition on line %u in file "
                 "\"%s\"", seqid, GT_GFF_SEQUENCE_REGION,
                 gt_genome_node_get_line_number((GtGenomeNode*) rn),
                 gt_genome_node_get_filename((GtGenomeNode*) rn));
    had_err = -1;
  }
  if (!had_err) {
    if (!gt_cstr_table_get(aiv->defined_seqids, seqid))
      gt_cstr_table_add(aiv->defined_seqids, seqid);
    gt_queue_add(aiv->node_buffer, rn);
  }
  return had_err;
}
Ejemplo n.º 9
0
static void make_sequence_region(GtHashmap *sequence_regions,
                                 GtStr *sequenceid,
                                 GthRegionFactory *srf,
                                 GthInput *input,
                                 GtUword filenum,
                                 GtUword seqnum)
{
    GtUword offset_is_defined = false;
    GtRange range, descrange;
    GtGenomeNode *sr = NULL;
    gt_assert(sequence_regions && sequenceid && srf && input);
    if (gth_input_use_substring_spec(input)) {
        range.start = gth_input_genomic_substring_from(input);
        range.end   = gth_input_genomic_substring_to(input);
    }
    else {
        range = gth_input_get_relative_genomic_range(input, filenum, seqnum);
    }
    if (srf->use_desc_ranges) {
        GtStr *description = gt_str_new();
        gth_input_get_genomic_description(input, description, filenum, seqnum);
        if (!gt_parse_description_range(gt_str_get(description), &descrange))
            offset_is_defined = true;
        gt_str_delete(description);
    }
    if (offset_is_defined)
        range = gt_range_offset(&range, descrange.start);
    else
        range = gt_range_offset(&range, 1); /* 1-based */
    if (!gt_str_length(sequenceid) ||
            (gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid)) &&
             !offset_is_defined)) {
        /* sequenceid is empty or exists already (and no offset has been parsed)
           -> make one up */
        GtStr *seqid;
        char *base;
        base = gt_basename(gth_input_get_genomic_filename(input, filenum));
        seqid = gt_str_new_cstr(base);
        gt_free(base);
        gt_str_append_char(seqid, '|');
        gt_str_append_uword(seqid, seqnum + 1); /* 1-based */
        seqid_store_add(srf->seqid_store, filenum, seqnum, seqid, GT_UNDEF_UWORD);
        gt_assert(!gt_cstr_table_get(srf->used_seqids, gt_str_get(seqid)));
        gt_cstr_table_add(srf->used_seqids, gt_str_get(seqid));
        sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum, seqnum),
                                range.start, range.end);
        gt_hashmap_add(sequence_regions,
                       (void*) gt_cstr_table_get(srf->used_seqids,
                               gt_str_get(seqid)),
                       sr);
        gt_str_delete(seqid);
    }
    else {
        /* sequenceid does not exists already (or an offset has been parsed)
           -> use this one */
        if (!gt_cstr_table_get(srf->used_seqids, gt_str_get(sequenceid))) {
            /* no sequence region with this id exists -> create one */
            gt_cstr_table_add(srf->used_seqids, gt_str_get(sequenceid));
            seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                            offset_is_defined ? descrange.start : GT_UNDEF_UWORD);
            sr = gt_region_node_new(seqid_store_get(srf->seqid_store, filenum,
                                                    seqnum), range.start, range.end);
            gt_hashmap_add(sequence_regions,
                           (void*) gt_cstr_table_get(srf->used_seqids,
                                   gt_str_get(sequenceid)),
                           sr);
        }
        else {
            GtRange prev_range, new_range;
            /* sequence region with this id exists already -> modify range */
            sr = gt_hashmap_get(sequence_regions, gt_str_get(sequenceid));
            gt_assert(sr);
            prev_range = gt_genome_node_get_range(sr);
            new_range = gt_range_join(&prev_range, &range);
            gt_genome_node_set_range(sr, &new_range);
            seqid_store_add(srf->seqid_store, filenum, seqnum, sequenceid,
                            offset_is_defined ? descrange.start : GT_UNDEF_UWORD);
        }
    }
    gt_assert(sr);
}
Ejemplo n.º 10
0
static int add_ids_visitor_feature_node(GtNodeVisitor *nv, GtFeatureNode *fn,
                                        GtError *err)
{
  AutomaticSequenceRegion *auto_sr;
  GtAddIDsVisitor *aiv;
  const char *seqid;
  bool is_circular;
  aiv = add_ids_visitor_cast(nv);
  seqid = gt_str_get(gt_genome_node_get_seqid((GtGenomeNode*) fn));
  if (aiv->ensure_sorting && !gt_cstr_table_get(aiv->defined_seqids, seqid)) {
    gt_error_set(err, "the file %s is not sorted (seqid \"%s\" on line %u has "
                 "not been previously introduced with a \"%s\" line)",
                 gt_genome_node_get_filename((GtGenomeNode*) fn), seqid,
                 gt_genome_node_get_line_number((GtGenomeNode*) fn),
                 GT_GFF_SEQUENCE_REGION);
    return -1;
  }
  if (!gt_cstr_table_get(aiv->defined_seqids, seqid)) {
    GtFeatureNodeIterator *fni;
    GtFeatureNode *node;
    GtRange range = gt_genome_node_get_range((GtGenomeNode*) fn);
    is_circular = gt_feature_node_get_attribute(fn, GT_GFF_IS_CIRCULAR)
                  ? true : false;
    if (!is_circular) {
      fni = gt_feature_node_iterator_new(fn);
      while ((node = gt_feature_node_iterator_next(fni))) {
        GtRange node_range = gt_genome_node_get_range((GtGenomeNode*) node);
        range = gt_range_join(&range, &node_range);
      }
      gt_feature_node_iterator_delete(fni);
    }
    /* sequence region has not been previously introduced -> check if one has
       already been created automatically */
    auto_sr = gt_hashmap_get(aiv->undefined_sequence_regions, seqid);
    if (!auto_sr) {
      GtStr *seqid_str;
      /* sequence region has not been createad automatically -> do it now */
      gt_warning("seqid \"%s\" on line %u in file \"%s\" has not been "
                 "previously introduced with a \"%s\" line, create such a line "
                 "automatically", seqid,
                 gt_genome_node_get_line_number((GtGenomeNode*) fn),
                 gt_genome_node_get_filename((GtGenomeNode*) fn),
                 GT_GFF_SEQUENCE_REGION);
      auto_sr = automatic_sequence_region_new(is_circular);
      seqid_str = gt_genome_node_get_seqid((GtGenomeNode*) fn);
      auto_sr->sequence_region = gt_region_node_new(seqid_str, range.start,
                                                               range.end);
      gt_hashmap_add(aiv->undefined_sequence_regions, gt_str_get(seqid_str),
                     auto_sr);
    }
    else {
      if (auto_sr->is_circular) {
        gt_assert(!is_circular); /* XXX */
      }
      else if (is_circular) {
        gt_assert(!auto_sr->is_circular); /* XXX */
        auto_sr->is_circular = true;
        gt_genome_node_set_range(auto_sr->sequence_region, &range);
      }
      else {
        GtRange joined_range,
                sr_range = gt_genome_node_get_range(auto_sr->sequence_region);
        /* update the range of the sequence region */
        joined_range = gt_range_join(&range, &sr_range);
        gt_genome_node_set_range(auto_sr->sequence_region, &joined_range);
      }
    }
    gt_array_add(auto_sr->feature_nodes, fn);
  }
  else
    gt_queue_add(aiv->node_buffer, fn);
  return 0;
}